python-graphblas
diff --git a/‎README.md
Lines changed: 1 addition & 2 deletions b/‎README.md
Lines changed: 1 addition & 2 deletions
diff --git a/‎graphblas_algorithms/__init__.py
Lines changed: 1 addition & 0 deletions b/‎graphblas_algorithms/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎graphblas_algorithms/_utils.py
Lines changed: 46 additions & 0 deletions b/‎graphblas_algorithms/_utils.py
Lines changed: 46 additions & 0 deletions
diff --git a/‎graphblas_algorithms/cluster.py
Lines changed: 180 additions & 0 deletions b/‎graphblas_algorithms/cluster.py
Lines changed: 180 additions & 0 deletions
diff --git a/‎graphblas_algorithms/link_analysis.py
Lines changed: 13 additions & 27 deletions b/‎graphblas_algorithms/link_analysis.py
Lines changed: 13 additions & 27 deletions
@@ -3,8 +3,7 @@
 [![pypi](https://img.shields.io/pypi/v/graphblas-algorithms.svg)](https://pypi.python.org/pypi/graphblas-algorithms/)
 [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/python-graphblas/graphblas-algorithms/blob/main/LICENSE)
 [![Tests](https://github.com/python-graphblas/graphblas-algorithms/workflows/Tests/badge.svg?branch=main)](https://github.com/python-graphblas/graphblas-algorithms/actions)
-[![Coverage](https://coveralls.io/repos/python-graphblas/graphblas-algorithms/badge.svg?branch=main)](https://coveralls.io/r/python-graphblas/graphblas-algorithms)
-[![Code style](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+<!--- [![Coverage](https://coveralls.io/repos/python-graphblas/graphblas-algorithms/badge.svg?branch=main)](https://coveralls.io/r/python-graphblas/graphblas-algorithms) --->
 <!--- [![conda-forge](https://img.shields.io/conda/vn/conda-forge/graphblas-algorithms.svg)](https://anaconda.org/conda-forge/graphblas-algorithms) --->
 <!--- [![Docs](https://readthedocs.org/projects/graphblas-algorithms/badge/?version=latest)](https://graphblas-algorithms.readthedocs.io/en/latest/) --->
 
 
@@ -1,4 +1,5 @@
 from . import _version
+from .cluster import average_clustering, clustering, transitivity, triangles  # noqa
 from .link_analysis import pagerank  # noqa
 
 __version__ = _version.get_versions()["version"]
@@ -0,0 +1,46 @@
+import graphblas as gb
+from graphblas import Vector, binary
+
+
+def graph_to_adjacency(G, weight=None, dtype=None, *, name=None):
+    key_to_id = {k: i for i, k in enumerate(G)}
+    A = gb.io.from_networkx(G, nodelist=key_to_id, weight=weight, dtype=dtype, name=name)
+    return A, key_to_id
+
+
+def dict_to_vector(d, key_to_id, *, size=None, dtype=None, name=None):
+    if d is None:
+        return None
+    if size is None:
+        size = len(key_to_id)
+    indices, values = zip(*((key_to_id[key], val) for key, val in d.items()))
+    return Vector.from_values(indices, values, size=size, dtype=dtype, name=name)
+
+
+def list_to_vector(nodes, key_to_id, *, size=None, name=None):
+    if nodes is None:
+        return None, None
+    if size is None:
+        size = len(key_to_id)
+    id_to_key = {key_to_id[key]: key for key in nodes}
+    v = Vector.from_values(list(id_to_key), True, size=size, dtype=bool, name=name)
+    return v, id_to_key
+
+
+def list_to_mask(nodes, key_to_id, *, size=None, name="mask"):
+    if nodes is None:
+        return None, None
+    v, id_to_key = list_to_vector(nodes, key_to_id, size=size, name=name)
+    return v.S, id_to_key
+
+
+def vector_to_dict(v, key_to_id, id_to_key=None, *, mask=None, fillvalue=None):
+    # This mutates the vector to fill it!
+    if id_to_key is None:
+        id_to_key = {key_to_id[key]: key for key in key_to_id}
+    if mask is not None:
+        if fillvalue is not None and v.nvals < mask.parent.nvals:
+            v(mask, binary.first) << fillvalue
+    elif fillvalue is not None and v.nvals < v.size:
+        v(mask=~v.S) << fillvalue
+    return {id_to_key[index]: value for index, value in zip(*v.to_values(sort=False))}
@@ -0,0 +1,180 @@
+import graphblas as gb
+import networkx as nx
+from graphblas import Matrix, agg, select
+from graphblas.semiring import any_pair, plus_pair
+from networkx import average_clustering as _nx_average_clustering
+from networkx import clustering as _nx_clustering
+from networkx.utils import not_implemented_for
+
+from ._utils import graph_to_adjacency, list_to_mask, vector_to_dict
+
+
+def get_properties(G, names, *, L=None, U=None, degrees=None, has_self_edges=True):
+    """Calculate properties of undirected graph"""
+    if isinstance(names, str):
+        # Separated by commas and/or spaces
+        names = [name for name in names.replace(" ", ",").split(",") if name]
+    rv = []
+    for name in names:
+        if name == "L":
+            if L is None:
+                L = select.tril(G, -1).new(name="L")
+            rv.append(L)
+        elif name == "U":
+            if U is None:
+                U = select.triu(G, 1).new(name="U")
+            rv.append(U)
+        elif name == "degrees":
+            if degrees is None:
+                degrees = get_degrees(G, L=L, U=U, has_self_edges=has_self_edges)
+            rv.append(degrees)
+        elif name == "has_self_edges":
+            # Compute if cheap
+            if L is not None:
+                has_self_edges = G.nvals > 2 * L.nvals
+            elif U is not None:
+                has_self_edges = G.nvals > 2 * U.nvals
+            rv.append(has_self_edges)
+        else:
+            raise ValueError(f"Unknown property name: {name}")
+    if len(rv) == 1:
+        return rv[0]
+    return rv
+
+
+def get_degrees(G, mask=None, *, L=None, U=None, has_self_edges=True):
+    if L is not None:
+        has_self_edges = G.nvals > 2 * L.nvals
+    elif U is not None:
+        has_self_edges = G.nvals > 2 * U.nvals
+    if has_self_edges:
+        if L is None or U is None:
+            L, U = get_properties(G, "L U", L=L, U=U)
+        degrees = (
+            L.reduce_rowwise(agg.count).new(mask=mask) + U.reduce_rowwise(agg.count).new(mask=mask)
+        ).new(name="degrees")
+    else:
+        degrees = G.reduce_rowwise(agg.count).new(mask=mask, name="degrees")
+    return degrees
+
+
+def single_triangle_core(G, index, *, L=None, has_self_edges=True):
+    M = Matrix(bool, G.nrows, G.ncols)
+    M[index, index] = True
+    C = any_pair(G.T @ M.T).new(name="C")  # select.coleq(G.T, index)
+    has_self_edges = get_properties(G, "has_self_edges", L=L, has_self_edges=has_self_edges)
+    if has_self_edges:
+        del C[index, index]  # Ignore self-edges
+    R = C.T.new(name="R")
+    if has_self_edges:
+        # Pretty much all the time is spent here taking TRIL, which is used to ignore self-edges
+        L = get_properties(G, "L", L=L)
+        return plus_pair(L @ R.T).new(mask=C.S).reduce_scalar(allow_empty=False).value
+    else:
+        return plus_pair(G @ R.T).new(mask=C.S).reduce_scalar(allow_empty=False).value // 2
+
+
+def triangles_core(G, mask=None, *, L=None, U=None):
+    # Ignores self-edges
+    L, U = get_properties(G, "L U", L=L, U=U)
+    C = plus_pair(L @ L.T).new(mask=L.S)
+    return (
+        C.reduce_rowwise().new(mask=mask)
+        + C.reduce_columnwise().new(mask=mask)
+        + plus_pair(U @ L.T).new(mask=U.S).reduce_rowwise().new(mask=mask)
+    ).new(name="triangles")
+
+
+@not_implemented_for("directed")
+def triangles(G, nodes=None):
+    if len(G) == 0:
+        return {}
+    A, key_to_id = graph_to_adjacency(G, dtype=bool)
+    if nodes in G:
+        return single_triangle_core(A, key_to_id[nodes])
+    mask, id_to_key = list_to_mask(nodes, key_to_id)
+    result = triangles_core(A, mask=mask)
+    return vector_to_dict(result, key_to_id, id_to_key, mask=mask, fillvalue=0)
+
+
+def total_triangles_core(G, *, L=None, U=None):
+    # We use SandiaDot method, because it's usually the fastest on large graphs.
+    # For smaller graphs, Sandia method is usually faster: plus_pair(L @ L).new(mask=L.S)
+    L, U = get_properties(G, "L U", L=L, U=U)
+    return plus_pair(L @ U.T).new(mask=L.S).reduce_scalar(allow_empty=False).value
+
+
+def transitivity_core(G, *, L=None, U=None, degrees=None):
+    L, U = get_properties(G, "L U", L=L, U=U)
+    numerator = total_triangles_core(G, L=L, U=U)
+    if numerator == 0:
+        return 0
+    degrees = get_properties(G, "degrees", L=L, U=U, degrees=degrees)
+    denom = (degrees * (degrees - 1)).reduce().value
+    return 6 * numerator / denom
+
+
+@not_implemented_for("directed")  # Should we implement it for directed?
+def transitivity(G):
+    if len(G) == 0:
+        return 0
+    A = gb.io.from_networkx(G, weight=None, dtype=bool)
+    return transitivity_core(A)
+
+
+def clustering_core(G, mask=None, *, L=None, U=None, degrees=None):
+    L, U = get_properties(G, "L U", L=L, U=U)
+    tri = triangles_core(G, mask=mask, L=L, U=U)
+    degrees = get_degrees(G, mask=mask, L=L, U=U)
+    denom = degrees * (degrees - 1)
+    return (2 * tri / denom).new(name="clustering")
+
+
+def single_clustering_core(G, index, *, L=None, degrees=None, has_self_edges=True):
+    has_self_edges = get_properties(G, "has_self_edges", L=L, has_self_edges=has_self_edges)
+    tri = single_triangle_core(G, index, L=L, has_self_edges=has_self_edges)
+    if tri == 0:
+        return 0
+    if degrees is not None:
+        degrees = degrees[index].value
+    else:
+        row = G[index, :].new()
+        degrees = row.reduce(agg.count).value
+        if has_self_edges and row[index].value is not None:
+            degrees -= 1
+    denom = degrees * (degrees - 1)
+    return 2 * tri / denom
+
+
+def clustering(G, nodes=None, weight=None):
+    if len(G) == 0:
+        return {}
+    if isinstance(G, nx.DiGraph) or weight is not None:
+        # TODO: Not yet implemented.  Clustering implemented only for undirected and unweighted.
+        return _nx_clustering(G, nodes=nodes, weight=weight)
+    A, key_to_id = graph_to_adjacency(G, weight=weight)
+    if nodes in G:
+        return single_clustering_core(A, key_to_id[nodes])
+    mask, id_to_key = list_to_mask(nodes, key_to_id)
+    result = clustering_core(A, mask=mask)
+    return vector_to_dict(result, key_to_id, id_to_key, mask=mask, fillvalue=0.0)
+
+
+def average_clustering_core(G, mask=None, count_zeros=True, *, L=None, U=None, degrees=None):
+    c = clustering_core(G, mask=mask, L=L, U=U, degrees=degrees)
+    val = c.reduce(allow_empty=False).value
+    if not count_zeros:
+        return val / c.nvals
+    elif mask is not None:
+        return val / mask.parent.nvals
+    else:
+        return val / c.size
+
+
+def average_clustering(G, nodes=None, weight=None, count_zeros=True):
+    if len(G) == 0 or isinstance(G, nx.DiGraph) or weight is not None:
+        # TODO: Not yet implemented.  Clustering implemented only for undirected and unweighted.
+        return _nx_average_clustering(G, nodes=nodes, weight=weight, count_zeros=count_zeros)
+    A, key_to_id = graph_to_adjacency(G, weight=weight)
+    mask, _ = list_to_mask(nodes, key_to_id)
+    return average_clustering_core(A, mask=mask, count_zeros=count_zeros)
@@ -1,11 +1,11 @@
-from collections import OrderedDict
 from warnings import warn
 
-import graphblas as gb
 import networkx as nx
 from graphblas import Vector, binary, unary
 from graphblas.semiring import plus_first, plus_times
 
+from ._utils import dict_to_vector, graph_to_adjacency, vector_to_dict
+
 
 def pagerank_core(
     A,
@@ -44,7 +44,7 @@ def pagerank_core(
     # Inverse of row_degrees
     # Fold alpha constant into S
     if row_degrees is None:
-        S = A.reduce_rowwise().new(float, name="S")
+        S = A.reduce_rowwise().new(float, name="S")  # XXX: What about self-edges
         S << alpha / S
     else:
         S = (alpha / row_degrees).new(name="S")
@@ -119,26 +119,15 @@ def pagerank(
     N = len(G)
     if N == 0:
         return {}
-    node_ids = OrderedDict((k, i) for i, k in enumerate(G))
-    A = gb.io.from_networkx(G, nodelist=node_ids, weight=weight, dtype=float)
-
-    x = p = dangling_weights = None
-    # Initial vector (we'll normalize later)
-    if nstart is not None:
-        indices, values = zip(*((node_ids[key], val) for key, val in nstart.items()))
-        x = Vector.from_values(indices, values, size=N, dtype=float, name="nstart")
-    # Personalization vector (we'll normalize later)
-    if personalization is not None:
-        indices, values = zip(*((node_ids[key], val) for key, val in personalization.items()))
-        p = Vector.from_values(indices, values, size=N, dtype=float, name="personalization")
-    # Dangling nodes (we'll normalize later)
-    row_degrees = A.reduce_rowwise().new(name="row_degrees")
-    if dangling is not None:
-        if row_degrees.nvals < N:  # is_dangling
-            indices, values = zip(*((node_ids[key], val) for key, val in dangling.items()))
-            dangling_weights = Vector.from_values(
-                indices, values, size=N, dtype=float, name="dangling"
-            )
+    A, key_to_id = graph_to_adjacency(G, weight=weight, dtype=float)
+    # We'll normalize initial, personalization, and dangling vectors later
+    x = dict_to_vector(nstart, key_to_id, dtype=float, name="nstart")
+    p = dict_to_vector(personalization, key_to_id, dtype=float, name="personalization")
+    row_degrees = A.reduce_rowwise().new(name="row_degrees")  # XXX: What about self-edges?
+    if dangling is not None and row_degrees.nvals < N:
+        dangling_weights = dict_to_vector(dangling, key_to_id, dtype=float, name="dangling")
+    else:
+        dangling_weights = None
     result = pagerank_core(
         A,
         alpha=alpha,
@@ -149,7 +138,4 @@ def pagerank(
         dangling=dangling_weights,
         row_degrees=row_degrees,
     )
-    if result.nvals != N:
-        # Not likely, but fill with 0 just in case
-        result(mask=~result.S) << 0
-    return dict(zip(node_ids, result.to_values()[1]))
+    return vector_to_dict(result, key_to_id, fillvalue=0.0)