Skip to content

Commit 80ba68b

Browse files
authored
Implement google_matrix and binary operators (#62)
* Implement `google_matrix` and binary operators * Also implement `floyd_warshall_numpy` * Remove `floyd_warshall_numpy` from "core" (still in "nxapi")
1 parent 17f19ba commit 80ba68b

File tree

13 files changed

+401
-7
lines changed

13 files changed

+401
-7
lines changed

Diff for: README.md

+10
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,17 @@ dispatch pattern shown above.
156156
- isolates
157157
- number_of_isolates
158158
- Link Analysis
159+
- google_matrix
159160
- hits
160161
- pagerank
162+
- Operators
163+
- compose
164+
- difference
165+
- disjoint_union
166+
- full_join
167+
- intersection
168+
- symmetric_difference
169+
- union
161170
- Reciprocity
162171
- overall_reciprocity
163172
- reciprocity
@@ -168,6 +177,7 @@ dispatch pattern shown above.
168177
- all_pairs_bellman_ford_path_length
169178
- all_pairs_shortest_path_length
170179
- floyd_warshall
180+
- floyd_warshall_numpy
171181
- floyd_warshall_predecessor_and_distance
172182
- has_path
173183
- negative_edge_cycle

Diff for: graphblas_algorithms/algorithms/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from .dominating import *
1111
from .isolate import *
1212
from .link_analysis import *
13+
from .operators import *
1314
from .reciprocity import *
1415
from .regular import *
1516
from .shortest_paths import *

Diff for: graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py

+65-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
from graphblas import Vector
1+
import numpy as np
2+
from graphblas import Matrix, Vector, binary, monoid
23
from graphblas.semiring import plus_first, plus_times
34

45
from graphblas_algorithms import Graph
56
from graphblas_algorithms.algorithms._helpers import is_converged
67
from graphblas_algorithms.algorithms.exceptions import ConvergenceFailure
78

8-
__all__ = ["pagerank"]
9+
__all__ = ["pagerank", "google_matrix"]
910

1011

1112
def pagerank(
@@ -98,3 +99,65 @@ def pagerank(
9899
x.name = name
99100
return x
100101
raise ConvergenceFailure(max_iter)
102+
103+
104+
def google_matrix(
105+
G: Graph,
106+
alpha=0.85,
107+
personalization=None,
108+
nodelist=None,
109+
dangling=None,
110+
name="google_matrix",
111+
) -> Matrix:
112+
A = G._A
113+
ids = G.list_to_ids(nodelist)
114+
if ids is not None:
115+
ids = np.array(ids, np.uint64)
116+
A = A[ids, ids].new(float, name=name)
117+
else:
118+
A = A.dup(float, name=name)
119+
N = A.nrows
120+
if N == 0:
121+
return A
122+
123+
# Personalization vector or scalar
124+
if personalization is None:
125+
p = 1.0 / N
126+
else:
127+
if ids is not None:
128+
personalization = personalization[ids].new(name="personalization")
129+
denom = personalization.reduce().get(0)
130+
if denom == 0:
131+
raise ZeroDivisionError("personalization sums to 0")
132+
p = (personalization / denom).new(mask=personalization.V, name="p")
133+
134+
if ids is None or len(ids) == len(G):
135+
nonempty_rows = G.get_property("any_rowwise+") # XXX: What about self-edges?
136+
else:
137+
nonempty_rows = A.reduce_rowwise(monoid.any).new(name="nonempty_rows")
138+
139+
is_dangling = nonempty_rows.nvals < N
140+
if is_dangling:
141+
empty_rows = (~nonempty_rows.S).new(name="empty_rows")
142+
if dangling is not None:
143+
if ids is not None:
144+
dangling = dangling[ids].new(name="dangling")
145+
dangling_weights = (1.0 / dangling.reduce().get(0) * dangling).new(
146+
mask=dangling.V, name="dangling_weights"
147+
)
148+
A << binary.first(empty_rows.outer(dangling_weights) | A)
149+
elif personalization is None:
150+
A << binary.first((p * empty_rows) | A)
151+
else:
152+
A << binary.first(empty_rows.outer(p) | A)
153+
154+
scale = A.reduce_rowwise(monoid.plus).new(float)
155+
scale << alpha / scale
156+
A << scale * A
157+
p *= 1 - alpha
158+
if personalization is None:
159+
# Add a scalar everywhere, which makes A dense
160+
A(binary.plus)[:, :] = p
161+
else:
162+
A << A + p
163+
return A
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .binary import *

Diff for: graphblas_algorithms/algorithms/operators/binary.py

+156
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
import numpy as np
2+
from graphblas import Matrix, binary, dtypes, unary
3+
4+
from ..exceptions import GraphBlasAlgorithmException
5+
6+
__all__ = [
7+
"compose",
8+
"difference",
9+
"disjoint_union",
10+
"full_join",
11+
"intersection",
12+
"symmetric_difference",
13+
"union",
14+
]
15+
16+
17+
def union(G, H, rename=(), *, name="union"):
18+
if G.is_multigraph() != H.is_multigraph():
19+
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
20+
if G.is_multigraph():
21+
raise NotImplementedError("Not yet implemented for multigraphs")
22+
if rename:
23+
prefix = rename[0]
24+
if prefix is not None:
25+
G = type(G)(
26+
G._A, key_to_id={f"{prefix}{key}": val for key, val in G._key_to_id.items()}
27+
)
28+
if len(rename) > 1:
29+
prefix = rename[1]
30+
if prefix is not None:
31+
H = type(H)(
32+
H._A, key_to_id={f"{prefix}{key}": val for key, val in H._key_to_id.items()}
33+
)
34+
A = G._A
35+
B = H._A
36+
if not G._key_to_id.keys().isdisjoint(H._key_to_id.keys()):
37+
raise GraphBlasAlgorithmException("The node sets of the graphs are not disjoint.")
38+
C = Matrix(dtypes.unify(A.dtype, B.dtype), A.nrows + B.nrows, A.ncols + B.ncols, name=name)
39+
C[: A.nrows, : A.ncols] = A
40+
C[A.nrows :, A.ncols :] = B
41+
offset = A.nrows
42+
key_to_id = {key: val + offset for key, val in H._key_to_id.items()}
43+
key_to_id.update(G._key_to_id)
44+
return type(G)(C, key_to_id=key_to_id)
45+
46+
47+
def disjoint_union(G, H, *, name="disjoint_union"):
48+
if G.is_multigraph() != H.is_multigraph():
49+
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
50+
if G.is_multigraph():
51+
raise NotImplementedError("Not yet implemented for multigraphs")
52+
A = G._A
53+
B = H._A
54+
C = Matrix(dtypes.unify(A.dtype, B.dtype), A.nrows + B.nrows, A.ncols + B.ncols, name=name)
55+
C[: A.nrows, : A.ncols] = A
56+
C[A.nrows :, A.ncols :] = B
57+
return type(G)(C)
58+
59+
60+
def intersection(G, H, *, name="intersection"):
61+
if G.is_multigraph() != H.is_multigraph():
62+
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
63+
if G.is_multigraph():
64+
raise NotImplementedError("Not yet implemented for multigraphs")
65+
keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__)
66+
ids = np.array(G.list_to_ids(keys), np.uint64)
67+
A = G._A[ids, ids].new()
68+
ids = np.array(H.list_to_ids(keys), np.uint64)
69+
B = H._A[ids, ids].new(dtypes.unify(A.dtype, H._A.dtype), mask=A.S, name=name)
70+
B << unary.one(B)
71+
return type(G)(B, key_to_id=dict(zip(keys, range(len(keys)))))
72+
73+
74+
def difference(G, H, *, name="difference"):
75+
if G.is_multigraph() != H.is_multigraph():
76+
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
77+
if G.is_multigraph():
78+
raise NotImplementedError("Not yet implemented for multigraphs")
79+
if G._key_to_id.keys() != H._key_to_id.keys():
80+
raise GraphBlasAlgorithmException("Node sets of graphs not equal")
81+
A = G._A
82+
if G._key_to_id == H._key_to_id:
83+
B = H._A
84+
else:
85+
# Need to perform a permutation
86+
keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__)
87+
ids = np.array(H.list_to_ids(keys), np.uint64)
88+
B = H._A[ids, ids].new()
89+
C = unary.one(A).new(mask=~B.S, name=name)
90+
return type(G)(C, key_to_id=G._key_to_id)
91+
92+
93+
def symmetric_difference(G, H, *, name="symmetric_difference"):
94+
if G.is_multigraph() != H.is_multigraph():
95+
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
96+
if G.is_multigraph():
97+
raise NotImplementedError("Not yet implemented for multigraphs")
98+
if G._key_to_id.keys() != H._key_to_id.keys():
99+
raise GraphBlasAlgorithmException("Node sets of graphs not equal")
100+
A = G._A
101+
if G._key_to_id == H._key_to_id:
102+
B = H._A
103+
else:
104+
# Need to perform a permutation
105+
keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__)
106+
ids = np.array(H.list_to_ids(keys), np.uint64)
107+
B = H._A[ids, ids].new()
108+
Mask = binary.pair[bool](A & B).new(name="mask")
109+
C = binary.pair(A | B, left_default=True, right_default=True).new(mask=~Mask.S, name=name)
110+
return type(G)(C, key_to_id=G._key_to_id)
111+
112+
113+
def compose(G, H, *, name="compose"):
114+
if G.is_multigraph() != H.is_multigraph():
115+
raise GraphBlasAlgorithmException("All graphs must be graphs or multigraphs.")
116+
if G.is_multigraph():
117+
raise NotImplementedError("Not yet implemented for multigraphs")
118+
A = G._A
119+
B = H._A
120+
if G._key_to_id.keys() == H._key_to_id.keys():
121+
if G._key_to_id != H._key_to_id:
122+
# Need to perform a permutation
123+
keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__)
124+
ids = np.array(H.list_to_ids(keys), np.uint64)
125+
B = B[ids, ids].new()
126+
C = binary.second(A | B).new(name=name)
127+
key_to_id = G._key_to_id
128+
else:
129+
keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__)
130+
B = H._A
131+
C = Matrix(
132+
dtypes.unify(A.dtype, B.dtype),
133+
A.nrows + B.nrows - len(keys),
134+
A.ncols + B.ncols - len(keys),
135+
name=name,
136+
)
137+
C[: A.nrows, : A.ncols] = A
138+
ids1 = np.array(G.list_to_ids(keys), np.uint64)
139+
ids2 = np.array(H.list_to_ids(keys), np.uint64)
140+
C[ids1, ids1] = B[ids2, ids2]
141+
newkeys = sorted(H._key_to_id.keys() - G._key_to_id.keys(), key=H._key_to_id.__getitem__)
142+
ids = np.array(H.list_to_ids(newkeys), np.uint64)
143+
C[A.nrows :, A.ncols :] = B[ids, ids]
144+
# Now make new `key_to_id`
145+
ids += A.nrows
146+
key_to_id = dict(zip(newkeys, ids.tolist()))
147+
key_to_id.update(G._key_to_id)
148+
return type(G)(C, key_to_id=key_to_id)
149+
150+
151+
def full_join(G, H, rename=(), *, name="full_join"):
152+
rv = union(G, H, rename, name=name)
153+
nrows, ncols = G._A.shape
154+
rv._A[:nrows, ncols:] = True
155+
rv._A[nrows:, :ncols] = True
156+
return rv

Diff for: graphblas_algorithms/algorithms/shortest_paths/dense.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
from graphblas import Matrix, Vector, binary, indexunary, replace, select
22
from graphblas.semiring import any_plus, any_second
33

4+
from ..exceptions import GraphBlasAlgorithmException
5+
46
__all__ = ["floyd_warshall", "floyd_warshall_predecessor_and_distance"]
57

68

79
def floyd_warshall(G, is_weighted=False):
810
return floyd_warshall_predecessor_and_distance(G, is_weighted, compute_predecessors=False)[1]
911

1012

11-
def floyd_warshall_predecessor_and_distance(G, is_weighted=False, *, compute_predecessors=True):
13+
def floyd_warshall_predecessor_and_distance(
14+
G, is_weighted=False, *, compute_predecessors=True, permutation=None
15+
):
1216
# By using `offdiag` instead of `G._A`, we ensure that D will not become dense.
1317
# Dense D may be better at times, but not including the diagonal will result in less work.
1418
# Typically, Floyd-Warshall algorithms sets the diagonal of D to 0 at the beginning.
@@ -19,6 +23,13 @@ def floyd_warshall_predecessor_and_distance(G, is_weighted=False, *, compute_pre
1923
nonempty_nodes = binary.pair(row_degrees & column_degrees).new(name="nonempty_nodes")
2024
else:
2125
A, nonempty_nodes = G.get_properties("U- degrees-")
26+
if permutation is not None:
27+
if len(permutation) != nonempty_nodes.size:
28+
raise GraphBlasAlgorithmException(
29+
"permutation must contain every node in G with no repeats."
30+
)
31+
A = A[permutation, permutation].new()
32+
nonempty_nodes = nonempty_nodes[permutation].new(name="nonempty_nodes")
2233

2334
if A.dtype == bool or not is_weighted:
2435
dtype = int

Diff for: graphblas_algorithms/interface.py

+34-2
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,16 @@ class Dispatcher:
5151
number_of_isolates = nxapi.isolate.number_of_isolates
5252
# Link Analysis
5353
hits = nxapi.link_analysis.hits_alg.hits
54+
google_matrix = nxapi.link_analysis.pagerank_alg.google_matrix
5455
pagerank = nxapi.link_analysis.pagerank_alg.pagerank
56+
# Operators
57+
compose = nxapi.operators.binary.compose
58+
difference = nxapi.operators.binary.difference
59+
disjoint_union = nxapi.operators.binary.disjoint_union
60+
full_join = nxapi.operators.binary.full_join
61+
intersection = nxapi.operators.binary.intersection
62+
symmetric_difference = nxapi.operators.binary.symmetric_difference
63+
union = nxapi.operators.binary.union
5564
# Reciprocity
5665
overall_reciprocity = nxapi.overall_reciprocity
5766
reciprocity = nxapi.reciprocity
@@ -60,6 +69,7 @@ class Dispatcher:
6069
is_regular = nxapi.regular.is_regular
6170
# Shortest Paths
6271
floyd_warshall = nxapi.shortest_paths.dense.floyd_warshall
72+
floyd_warshall_numpy = nxapi.shortest_paths.dense.floyd_warshall_numpy
6373
floyd_warshall_predecessor_and_distance = (
6474
nxapi.shortest_paths.dense.floyd_warshall_predecessor_and_distance
6575
)
@@ -112,10 +122,14 @@ def convert_from_nx(graph, weight=None, *, name=None):
112122

113123
@staticmethod
114124
def convert_to_nx(obj, *, name=None):
125+
from graphblas import Matrix
126+
115127
from .classes import Graph
116128

117129
if isinstance(obj, Graph):
118130
obj = obj.to_networkx()
131+
elif isinstance(obj, Matrix):
132+
obj = obj.to_dense(fill_value=False)
119133
return obj
120134

121135
@staticmethod
@@ -127,8 +141,11 @@ def on_start_tests(items):
127141

128142
def key(testpath):
129143
filename, path = testpath.split(":")
130-
classname, testname = path.split(".")
131-
return (testname, frozenset({classname, filename}))
144+
*names, testname = path.split(".")
145+
if names:
146+
[classname] = names
147+
return (testname, frozenset({classname, filename}))
148+
return (testname, frozenset({filename}))
132149

133150
# Reasons to skip tests
134151
multi_attributed = "unable to handle multi-attributed graphs"
@@ -140,7 +157,22 @@ def key(testpath):
140157
key("test_mst.py:TestBoruvka.test_attributes"): multi_attributed,
141158
key("test_mst.py:TestBoruvka.test_weight_attribute"): multi_attributed,
142159
key("test_dense.py:TestFloyd.test_zero_weight"): multidigraph,
160+
key("test_dense_numpy.py:test_zero_weight"): multidigraph,
143161
key("test_weighted.py:TestBellmanFordAndGoldbergRadzik.test_multigraph"): multigraph,
162+
key("test_binary.py:test_compose_multigraph"): multigraph,
163+
key("test_binary.py:test_difference_multigraph_attributes"): multigraph,
164+
key("test_binary.py:test_disjoint_union_multigraph"): multigraph,
165+
key("test_binary.py:test_full_join_multigraph"): multigraph,
166+
key("test_binary.py:test_intersection_multigraph_attributes"): multigraph,
167+
key(
168+
"test_binary.py:test_intersection_multigraph_attributes_node_set_different"
169+
): multigraph,
170+
key("test_binary.py:test_symmetric_difference_multigraph"): multigraph,
171+
key("test_binary.py:test_union_attributes"): multi_attributed,
172+
# TODO: move failing assertion from `test_union_and_compose`
173+
key("test_binary.py:test_union_and_compose"): multi_attributed,
174+
key("test_binary.py:test_union_multigraph"): multigraph,
175+
key("test_vf2pp.py:test_custom_multigraph4_different_labels"): multigraph,
144176
}
145177
for item in items:
146178
kset = set(item.keywords)

0 commit comments

Comments
 (0)