Frank-Wolfe Graph Matching Methods — graph_match

Match two given graphs, returns a list of graph matching results, including matching correspondence vector of \(G_2\) with respect to \(G_1\), doubly stochastic matrix and permutation matrix.

Usage

graph_match_convex(
  A,
  B,
  seeds = NULL,
  similarity = NULL,
  start = "bari",
  max_iter = 100,
  tol = 1e-05,
  lap_method = NULL
)

graph_match_indefinite(
  A,
  B,
  seeds = NULL,
  similarity = NULL,
  start = "bari",
  max_iter = 20,
  lap_method = NULL
)

graph_match_PATH(
  A,
  B,
  seeds = NULL,
  similarity = NULL,
  epsilon = 1,
  tol = 1e-05,
  max_iter = 20,
  lap_method = NULL
)

Arguments

A: A matrix, igraph object, or list of either.
B: A matrix, igraph object, or list of either.
seeds: A vector of integers or logicals, a matrix or a data frame. If the seed pairs have the same indices in both graphs then seeds can be a vector. If not, seeds must be a matrix or a data frame, with the first column being the indices of \(G_1\) and the second column being the corresponding indices of \(G_2\).
similarity: A matrix. An n-by-n matrix containing vertex similarities.
start: A matrix or a character. Any nns-by-nns matrix or character value like "bari", "rds" or "convex" to initialize the starting matrix.
max_iter: A number. Maximum number of replacing matches.
tol: A number. Tolerance of edge disagreements.
lap_method: Choice for lap method. One of "lapjv", "lapmod", or "clue".
epsilon: A small number

Value

graph_match_indefinite, graph_match_convex and graph_match_PATH

return an object of class "graphMatch" which is a list containing the following components:

corr_A: matching correspondence in \(G_1\)
corr_B: matching correspondence in \(G_2\)
soft: the doubly stochastic matrix from the last iteration with which one can extract more than one matching candidates
iter: number of iterations until convergence or reaches the max_iter
max_iter: Maximum number of replacing matches
lap_method: Choice for solving the LAP
seeds: a vector of logicals indicating if the corresponding vertex is a seed

References

Y. Aflalo and A. Bronstein and R. Kimmel (2014), On convex relaxation of graph isomorphism. Proceedings of the National Academy of Sciences, pages 2942-2947.

V. Lyzinski and D. E. Fishkind and M. Fiori and J. T. Vogelstein and C. E. Priebe and G. Sapiro (2016), Graph Matching: Relax at Your Own Risk. IEEE TPAMI, pages 60-73.

V. Lyzinski and D. E. Fishkind and C. E. Priebe (2014), Seeded Graph Matching for Correlated Erdos-Renyi Graphs.J. Mach. Learn. Res., pages 3513-3540.

M. Zaslavskiy, F. Bach and J. Vert (2009), A Path following algorithm for the graph matching problem. IEEE Trans Pattern Anal Mach Intell, pages 2227-2242.

Examples

# \donttest{
cgnp_pair <- sample_correlated_gnp_pair(n = 10, corr =  0.9, p =  0.5)
g1 <- cgnp_pair$graph1
g2 <- cgnp_pair$graph2
# match G_1 & G_2 with no seeds
gm(g1, g2, method = "convex", max_iter = 10)
#> Warning: Frank-Wolfe iterations reach the maximum iteration, convergence may not occur.
#> gm(A = g1, B = g2, method = "convex", max_iter = 10)
#> 
#> Match (10 x 10):
#>    corr_A corr_B
#> 1       1      7
#> 2       2      2
#> 3       3      3
#> 4       4      4
#> 5       5      5
#> 6       6      6
#> 7       7      1
#> 8       8      9
#> 9       9      8
#> 10     10     10
seeds <- 1:10 <= 3
gm(g1, g2, seeds, method = "convex", max_iter = 10)
#> Warning: Frank-Wolfe iterations reach the maximum iteration, convergence may not occur.
#> gm(A = g1, B = g2, seeds = seeds, method = "convex", max_iter = 10)
#> 
#> Match (10 x 10):
#>    corr_A corr_B
#> 1       1      1
#> 2       2      2
#> 3       3      3
#> 4       4      4
#> 5       5      5
#> 6       6      6
#> 7       7      7
#> 8       8      8
#> 9       9      9
#> 10     10     10
# }



# match G_1 & G_2 with some known node pairs as seeds
cgnp_pair <- sample_correlated_gnp_pair(n = 10, corr =  0.3, p =  0.5)
g1 <- cgnp_pair$graph1
g2 <- cgnp_pair$graph2
seeds <- 1:10 <= 3
GM_bari <- gm(g1, g2, seeds, method = "indefinite", start = "bari")
GM_bari
#> gm(A = g1, B = g2, seeds = seeds, method = "indefinite", start = "bari")
#> 
#> Match (10 x 10):
#>    corr_A corr_B
#> 1       1      1
#> 2       2      2
#> 3       3      3
#> 4       4      4
#> 5       5      7
#> 6       6      6
#> 7       7      8
#> 8       8      5
#> 9       9      9
#> 10     10     10
GM_bari[!GM_bari$seeds] # matching correspondence for non-seeds
#>    corr_A corr_B
#> 4       4      4
#> 5       5      7
#> 6       6      6
#> 7       7      8
#> 8       8      5
#> 9       9      9
#> 10     10     10

summary(GM_bari, g1, g2, true_label = 1:10)
#> Call: gm(A = g1, B = g2, seeds = seeds, method = "indefinite", start = "bari")
#> 
#> # Matches: 7
#> # True Matches:  4, # Seeds:  3, # Vertices:  10, 10
#>                        
#>   common_edges 16.00000
#>  missing_edges  3.00000
#>    extra_edges 10.00000
#>          fnorm  5.09902

# match G_1 & G_2 with some incorrect seeds
hard_seeds <- matrix(c(4,6,5,4),2)
seeds <- rbind(as.matrix(check_seeds(seeds, nv = 10)$seeds),hard_seeds)
GM_badseed <- gm(g1, g2, seeds, method = "indefinite")

GM_badseed[] # get the corresponding permutation matrix
#> 10 x 10 sparse Matrix of class "dgCMatrix"
#>                          
#>  [1,] 1 . . . . . . . . .
#>  [2,] . 1 . . . . . . . .
#>  [3,] . . 1 . . . . . . .
#>  [4,] . . . . 1 . . . . .
#>  [5,] . . . . . . . . . 1
#>  [6,] . . . 1 . . . . . .
#>  [7,] . . . . . . 1 . . .
#>  [8,] . . . . . 1 . . . .
#>  [9,] . . . . . . . . 1 .
#> [10,] . . . . . . . 1 . .
GM_badseed %*% g2 # permute the second graph according to match result: PBP^T
#> IGRAPH 8771406 UN-- 10 26 -- Erdos-Renyi (gnp) graph
#> + attr: name_1 (g/c), name_2 (g/c), type_1 (g/c), type_2 (g/c), loops_1
#> | (g/l), loops_2 (g/l), p_1 (g/n), p_2 (g/n), name (g/c), type (g/c),
#> | loops (g/l), p (g/n), name (v/n)
#> + edges from 8771406 (vertex names):
#>  [1]  8-- 9  7--10  7-- 9  8-- 7  6-- 9  6-- 7 10-- 5  9-- 5  8-- 5  7-- 5
#> [11]  6-- 5  4--10  4-- 9  6-- 4  3-- 9  3-- 8  3-- 6  3-- 5  2--10  2-- 8
#> [21]  2-- 4  2-- 3  1-- 8  1-- 7  1-- 4  1-- 2
GM_badseed$soft # doubly stochastic matrix from the last step of Frank-Wolfe iterations
#> 10 x 10 sparse Matrix of class "dgCMatrix"
#>                          
#>  [1,] 1 . . . . . . . . .
#>  [2,] . 1 . . . . . . . .
#>  [3,] . . 1 . . . . . . .
#>  [4,] . . . . 1 . . . . .
#>  [5,] . . . . . . . . . 1
#>  [6,] . . . 1 . . . . . .
#>  [7,] . . . . . . 1 . . .
#>  [8,] . . . . . 1 . . . .
#>  [9,] . . . . . . . . 1 .
#> [10,] . . . . . . . 1 . .
GM_badseed$iter # number of iterations
#> [1] 3
GM_badseed$max_iter # preset maximum number of iterations: 20
#> [1] 20

# match two multi-layer graphs
gp_list <- replicate(3, sample_correlated_gnp_pair(20, .3, .5), simplify = FALSE)
A <- lapply(gp_list, function(gp)gp[[1]])
B <- lapply(gp_list, function(gp)gp[[2]])

match_multi_layer <- gm(A, B, seeds = 1:10, method = "indefinite", start = "bari", max_iter = 20)
summary(match_multi_layer, A, B)
#> Call: gm(A = A, B = B, seeds = 1:10, method = "indefinite", start = "bari", 
#>     max_iter = 20)
#> 
#> # Matches: 10, # Seeds:  10, # Vertices:  20, 20
#>          layer       1        2        3
#>   common_edges 65.0000 60.00000 69.00000
#>  missing_edges 37.0000 30.00000 34.00000
#>    extra_edges 31.0000 30.00000 35.00000
#>          fnorm 11.6619 10.95445 11.74734

# match G_1 & G_2 using PATH algorithm
gm(g1, g2, method = "PATH")
#> Warning: Frank-Wolfe iterations reach the maximum iteration, convergence may not occur.
#> gm(A = g1, B = g2, method = "PATH")
#> 
#> Match (10 x 10):
#>    corr_A corr_B
#> 1       1      4
#> 2       2      1
#> 3       3      9
#> 4       4     10
#> 5       5      5
#> 6       6      3
#> 7       7      6
#> 8       8      2
#> 9       9      8
#> 10     10      7