Skip to content

Commit e30c0b2

Browse files
authored
Add triangles (#3)
* Add `triangles` I think the tests could be improved. Some of the NetworkX tests get coverage, but only compare to 0 triangles. Also, we should test more with self-edges. There may be better ways to compute triangles for: - all nodes - a subset of nodes - a single node There are *a lot* of different ways to compute triangles, so this could be explored further in the future. I hope the current PR is competitive. * Handle and test triangle count with self-edges * Add `has_self_edges=True` argument for single triangle count * Tiny improvement * Add transitivity * Begin clustering coefficient; also, make computing properties easier. * Better handling of properties * Implement clustering for undirected, unweighted graphs * Add `average_clustering` and helper functions to make things cleaner
1 parent 441d5be commit e30c0b2

File tree

8 files changed

+338
-31
lines changed

8 files changed

+338
-31
lines changed

README.md

+1-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
[![pypi](https://img.shields.io/pypi/v/graphblas-algorithms.svg)](https://pypi.python.org/pypi/graphblas-algorithms/)
44
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/python-graphblas/graphblas-algorithms/blob/main/LICENSE)
55
[![Tests](https://github.com/python-graphblas/graphblas-algorithms/workflows/Tests/badge.svg?branch=main)](https://github.com/python-graphblas/graphblas-algorithms/actions)
6-
[![Coverage](https://coveralls.io/repos/python-graphblas/graphblas-algorithms/badge.svg?branch=main)](https://coveralls.io/r/python-graphblas/graphblas-algorithms)
7-
[![Code style](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
6+
<!--- [![Coverage](https://coveralls.io/repos/python-graphblas/graphblas-algorithms/badge.svg?branch=main)](https://coveralls.io/r/python-graphblas/graphblas-algorithms) --->
87
<!--- [![conda-forge](https://img.shields.io/conda/vn/conda-forge/graphblas-algorithms.svg)](https://anaconda.org/conda-forge/graphblas-algorithms) --->
98
<!--- [![Docs](https://readthedocs.org/projects/graphblas-algorithms/badge/?version=latest)](https://graphblas-algorithms.readthedocs.io/en/latest/) --->
109

graphblas_algorithms/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from . import _version
2+
from .cluster import average_clustering, clustering, transitivity, triangles # noqa
23
from .link_analysis import pagerank # noqa
34

45
__version__ = _version.get_versions()["version"]

graphblas_algorithms/_utils.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import graphblas as gb
2+
from graphblas import Vector, binary
3+
4+
5+
def graph_to_adjacency(G, weight=None, dtype=None, *, name=None):
6+
key_to_id = {k: i for i, k in enumerate(G)}
7+
A = gb.io.from_networkx(G, nodelist=key_to_id, weight=weight, dtype=dtype, name=name)
8+
return A, key_to_id
9+
10+
11+
def dict_to_vector(d, key_to_id, *, size=None, dtype=None, name=None):
12+
if d is None:
13+
return None
14+
if size is None:
15+
size = len(key_to_id)
16+
indices, values = zip(*((key_to_id[key], val) for key, val in d.items()))
17+
return Vector.from_values(indices, values, size=size, dtype=dtype, name=name)
18+
19+
20+
def list_to_vector(nodes, key_to_id, *, size=None, name=None):
21+
if nodes is None:
22+
return None, None
23+
if size is None:
24+
size = len(key_to_id)
25+
id_to_key = {key_to_id[key]: key for key in nodes}
26+
v = Vector.from_values(list(id_to_key), True, size=size, dtype=bool, name=name)
27+
return v, id_to_key
28+
29+
30+
def list_to_mask(nodes, key_to_id, *, size=None, name="mask"):
31+
if nodes is None:
32+
return None, None
33+
v, id_to_key = list_to_vector(nodes, key_to_id, size=size, name=name)
34+
return v.S, id_to_key
35+
36+
37+
def vector_to_dict(v, key_to_id, id_to_key=None, *, mask=None, fillvalue=None):
38+
# This mutates the vector to fill it!
39+
if id_to_key is None:
40+
id_to_key = {key_to_id[key]: key for key in key_to_id}
41+
if mask is not None:
42+
if fillvalue is not None and v.nvals < mask.parent.nvals:
43+
v(mask, binary.first) << fillvalue
44+
elif fillvalue is not None and v.nvals < v.size:
45+
v(mask=~v.S) << fillvalue
46+
return {id_to_key[index]: value for index, value in zip(*v.to_values(sort=False))}

graphblas_algorithms/cluster.py

+180
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
import graphblas as gb
2+
import networkx as nx
3+
from graphblas import Matrix, agg, select
4+
from graphblas.semiring import any_pair, plus_pair
5+
from networkx import average_clustering as _nx_average_clustering
6+
from networkx import clustering as _nx_clustering
7+
from networkx.utils import not_implemented_for
8+
9+
from ._utils import graph_to_adjacency, list_to_mask, vector_to_dict
10+
11+
12+
def get_properties(G, names, *, L=None, U=None, degrees=None, has_self_edges=True):
13+
"""Calculate properties of undirected graph"""
14+
if isinstance(names, str):
15+
# Separated by commas and/or spaces
16+
names = [name for name in names.replace(" ", ",").split(",") if name]
17+
rv = []
18+
for name in names:
19+
if name == "L":
20+
if L is None:
21+
L = select.tril(G, -1).new(name="L")
22+
rv.append(L)
23+
elif name == "U":
24+
if U is None:
25+
U = select.triu(G, 1).new(name="U")
26+
rv.append(U)
27+
elif name == "degrees":
28+
if degrees is None:
29+
degrees = get_degrees(G, L=L, U=U, has_self_edges=has_self_edges)
30+
rv.append(degrees)
31+
elif name == "has_self_edges":
32+
# Compute if cheap
33+
if L is not None:
34+
has_self_edges = G.nvals > 2 * L.nvals
35+
elif U is not None:
36+
has_self_edges = G.nvals > 2 * U.nvals
37+
rv.append(has_self_edges)
38+
else:
39+
raise ValueError(f"Unknown property name: {name}")
40+
if len(rv) == 1:
41+
return rv[0]
42+
return rv
43+
44+
45+
def get_degrees(G, mask=None, *, L=None, U=None, has_self_edges=True):
46+
if L is not None:
47+
has_self_edges = G.nvals > 2 * L.nvals
48+
elif U is not None:
49+
has_self_edges = G.nvals > 2 * U.nvals
50+
if has_self_edges:
51+
if L is None or U is None:
52+
L, U = get_properties(G, "L U", L=L, U=U)
53+
degrees = (
54+
L.reduce_rowwise(agg.count).new(mask=mask) + U.reduce_rowwise(agg.count).new(mask=mask)
55+
).new(name="degrees")
56+
else:
57+
degrees = G.reduce_rowwise(agg.count).new(mask=mask, name="degrees")
58+
return degrees
59+
60+
61+
def single_triangle_core(G, index, *, L=None, has_self_edges=True):
62+
M = Matrix(bool, G.nrows, G.ncols)
63+
M[index, index] = True
64+
C = any_pair(G.T @ M.T).new(name="C") # select.coleq(G.T, index)
65+
has_self_edges = get_properties(G, "has_self_edges", L=L, has_self_edges=has_self_edges)
66+
if has_self_edges:
67+
del C[index, index] # Ignore self-edges
68+
R = C.T.new(name="R")
69+
if has_self_edges:
70+
# Pretty much all the time is spent here taking TRIL, which is used to ignore self-edges
71+
L = get_properties(G, "L", L=L)
72+
return plus_pair(L @ R.T).new(mask=C.S).reduce_scalar(allow_empty=False).value
73+
else:
74+
return plus_pair(G @ R.T).new(mask=C.S).reduce_scalar(allow_empty=False).value // 2
75+
76+
77+
def triangles_core(G, mask=None, *, L=None, U=None):
78+
# Ignores self-edges
79+
L, U = get_properties(G, "L U", L=L, U=U)
80+
C = plus_pair(L @ L.T).new(mask=L.S)
81+
return (
82+
C.reduce_rowwise().new(mask=mask)
83+
+ C.reduce_columnwise().new(mask=mask)
84+
+ plus_pair(U @ L.T).new(mask=U.S).reduce_rowwise().new(mask=mask)
85+
).new(name="triangles")
86+
87+
88+
@not_implemented_for("directed")
89+
def triangles(G, nodes=None):
90+
if len(G) == 0:
91+
return {}
92+
A, key_to_id = graph_to_adjacency(G, dtype=bool)
93+
if nodes in G:
94+
return single_triangle_core(A, key_to_id[nodes])
95+
mask, id_to_key = list_to_mask(nodes, key_to_id)
96+
result = triangles_core(A, mask=mask)
97+
return vector_to_dict(result, key_to_id, id_to_key, mask=mask, fillvalue=0)
98+
99+
100+
def total_triangles_core(G, *, L=None, U=None):
101+
# We use SandiaDot method, because it's usually the fastest on large graphs.
102+
# For smaller graphs, Sandia method is usually faster: plus_pair(L @ L).new(mask=L.S)
103+
L, U = get_properties(G, "L U", L=L, U=U)
104+
return plus_pair(L @ U.T).new(mask=L.S).reduce_scalar(allow_empty=False).value
105+
106+
107+
def transitivity_core(G, *, L=None, U=None, degrees=None):
108+
L, U = get_properties(G, "L U", L=L, U=U)
109+
numerator = total_triangles_core(G, L=L, U=U)
110+
if numerator == 0:
111+
return 0
112+
degrees = get_properties(G, "degrees", L=L, U=U, degrees=degrees)
113+
denom = (degrees * (degrees - 1)).reduce().value
114+
return 6 * numerator / denom
115+
116+
117+
@not_implemented_for("directed") # Should we implement it for directed?
118+
def transitivity(G):
119+
if len(G) == 0:
120+
return 0
121+
A = gb.io.from_networkx(G, weight=None, dtype=bool)
122+
return transitivity_core(A)
123+
124+
125+
def clustering_core(G, mask=None, *, L=None, U=None, degrees=None):
126+
L, U = get_properties(G, "L U", L=L, U=U)
127+
tri = triangles_core(G, mask=mask, L=L, U=U)
128+
degrees = get_degrees(G, mask=mask, L=L, U=U)
129+
denom = degrees * (degrees - 1)
130+
return (2 * tri / denom).new(name="clustering")
131+
132+
133+
def single_clustering_core(G, index, *, L=None, degrees=None, has_self_edges=True):
134+
has_self_edges = get_properties(G, "has_self_edges", L=L, has_self_edges=has_self_edges)
135+
tri = single_triangle_core(G, index, L=L, has_self_edges=has_self_edges)
136+
if tri == 0:
137+
return 0
138+
if degrees is not None:
139+
degrees = degrees[index].value
140+
else:
141+
row = G[index, :].new()
142+
degrees = row.reduce(agg.count).value
143+
if has_self_edges and row[index].value is not None:
144+
degrees -= 1
145+
denom = degrees * (degrees - 1)
146+
return 2 * tri / denom
147+
148+
149+
def clustering(G, nodes=None, weight=None):
150+
if len(G) == 0:
151+
return {}
152+
if isinstance(G, nx.DiGraph) or weight is not None:
153+
# TODO: Not yet implemented. Clustering implemented only for undirected and unweighted.
154+
return _nx_clustering(G, nodes=nodes, weight=weight)
155+
A, key_to_id = graph_to_adjacency(G, weight=weight)
156+
if nodes in G:
157+
return single_clustering_core(A, key_to_id[nodes])
158+
mask, id_to_key = list_to_mask(nodes, key_to_id)
159+
result = clustering_core(A, mask=mask)
160+
return vector_to_dict(result, key_to_id, id_to_key, mask=mask, fillvalue=0.0)
161+
162+
163+
def average_clustering_core(G, mask=None, count_zeros=True, *, L=None, U=None, degrees=None):
164+
c = clustering_core(G, mask=mask, L=L, U=U, degrees=degrees)
165+
val = c.reduce(allow_empty=False).value
166+
if not count_zeros:
167+
return val / c.nvals
168+
elif mask is not None:
169+
return val / mask.parent.nvals
170+
else:
171+
return val / c.size
172+
173+
174+
def average_clustering(G, nodes=None, weight=None, count_zeros=True):
175+
if len(G) == 0 or isinstance(G, nx.DiGraph) or weight is not None:
176+
# TODO: Not yet implemented. Clustering implemented only for undirected and unweighted.
177+
return _nx_average_clustering(G, nodes=nodes, weight=weight, count_zeros=count_zeros)
178+
A, key_to_id = graph_to_adjacency(G, weight=weight)
179+
mask, _ = list_to_mask(nodes, key_to_id)
180+
return average_clustering_core(A, mask=mask, count_zeros=count_zeros)

graphblas_algorithms/link_analysis.py

+13-27
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
from collections import OrderedDict
21
from warnings import warn
32

4-
import graphblas as gb
53
import networkx as nx
64
from graphblas import Vector, binary, unary
75
from graphblas.semiring import plus_first, plus_times
86

7+
from ._utils import dict_to_vector, graph_to_adjacency, vector_to_dict
8+
99

1010
def pagerank_core(
1111
A,
@@ -44,7 +44,7 @@ def pagerank_core(
4444
# Inverse of row_degrees
4545
# Fold alpha constant into S
4646
if row_degrees is None:
47-
S = A.reduce_rowwise().new(float, name="S")
47+
S = A.reduce_rowwise().new(float, name="S") # XXX: What about self-edges
4848
S << alpha / S
4949
else:
5050
S = (alpha / row_degrees).new(name="S")
@@ -119,26 +119,15 @@ def pagerank(
119119
N = len(G)
120120
if N == 0:
121121
return {}
122-
node_ids = OrderedDict((k, i) for i, k in enumerate(G))
123-
A = gb.io.from_networkx(G, nodelist=node_ids, weight=weight, dtype=float)
124-
125-
x = p = dangling_weights = None
126-
# Initial vector (we'll normalize later)
127-
if nstart is not None:
128-
indices, values = zip(*((node_ids[key], val) for key, val in nstart.items()))
129-
x = Vector.from_values(indices, values, size=N, dtype=float, name="nstart")
130-
# Personalization vector (we'll normalize later)
131-
if personalization is not None:
132-
indices, values = zip(*((node_ids[key], val) for key, val in personalization.items()))
133-
p = Vector.from_values(indices, values, size=N, dtype=float, name="personalization")
134-
# Dangling nodes (we'll normalize later)
135-
row_degrees = A.reduce_rowwise().new(name="row_degrees")
136-
if dangling is not None:
137-
if row_degrees.nvals < N: # is_dangling
138-
indices, values = zip(*((node_ids[key], val) for key, val in dangling.items()))
139-
dangling_weights = Vector.from_values(
140-
indices, values, size=N, dtype=float, name="dangling"
141-
)
122+
A, key_to_id = graph_to_adjacency(G, weight=weight, dtype=float)
123+
# We'll normalize initial, personalization, and dangling vectors later
124+
x = dict_to_vector(nstart, key_to_id, dtype=float, name="nstart")
125+
p = dict_to_vector(personalization, key_to_id, dtype=float, name="personalization")
126+
row_degrees = A.reduce_rowwise().new(name="row_degrees") # XXX: What about self-edges?
127+
if dangling is not None and row_degrees.nvals < N:
128+
dangling_weights = dict_to_vector(dangling, key_to_id, dtype=float, name="dangling")
129+
else:
130+
dangling_weights = None
142131
result = pagerank_core(
143132
A,
144133
alpha=alpha,
@@ -149,7 +138,4 @@ def pagerank(
149138
dangling=dangling_weights,
150139
row_degrees=row_degrees,
151140
)
152-
if result.nvals != N:
153-
# Not likely, but fill with 0 just in case
154-
result(mask=~result.S) << 0
155-
return dict(zip(node_ids, result.to_values()[1]))
141+
return vector_to_dict(result, key_to_id, fillvalue=0.0)

0 commit comments

Comments
 (0)