Skip to content

Commit 3cc4180

Browse files
authored
Implement .generators.ego.ego_graph (#61)
* Implement `.generators.ego.ego_graph` Also, clean up shared BFS functions and move to `_bfs.py`. * use external images in README so they render on PyPI Support and test against Python 3.11 Change development status to Beta (was Alpha).
1 parent 80ba68b commit 3cc4180

27 files changed

+289
-240
lines changed

Diff for: .github/workflows/test.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
fail-fast: true
1616
matrix:
1717
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
18-
python-version: ["3.8", "3.9", "3.10"]
18+
python-version: ["3.8", "3.9", "3.10", "3.11"]
1919
steps:
2020
- name: Checkout
2121
uses: actions/checkout@v3

Diff for: .pre-commit-config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ repos:
5555
- id: black
5656
# - id: black-jupyter
5757
- repo: https://github.com/charliermarsh/ruff-pre-commit
58-
rev: v0.0.263
58+
rev: v0.0.264
5959
hooks:
6060
- id: ruff
6161
args: [--fix-only, --show-fixes]
@@ -81,7 +81,7 @@ repos:
8181
additional_dependencies: [tomli]
8282
files: ^(graphblas_algorithms|docs)/
8383
- repo: https://github.com/charliermarsh/ruff-pre-commit
84-
rev: v0.0.263
84+
rev: v0.0.264
8585
hooks:
8686
- id: ruff
8787
# `pyroma` may help keep our package standards up to date if best practices change.

Diff for: MANIFEST.in

-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,3 @@ include setup.py
44
include README.md
55
include LICENSE
66
include MANIFEST.in
7-
docs/_static/img/logo-name-medium.png
8-
docs/_static/img/graphblas-vs-igraph.png
9-
docs/_static/img/graphblas-vs-networkx.png

Diff for: README.md

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
![GraphBLAS Algorithms](docs/_static/img/logo-name-medium.svg)
1+
![GraphBLAS Algorithms](https://raw.githubusercontent.com/python-graphblas/graphblas-algorithms/main/docs/_static/img/logo-name-medium.svg)
22
<br>
33
[![conda-forge](https://img.shields.io/conda/vn/conda-forge/graphblas-algorithms.svg)](https://anaconda.org/conda-forge/graphblas-algorithms)
44
[![pypi](https://img.shields.io/pypi/v/graphblas-algorithms.svg)](https://pypi.python.org/pypi/graphblas-algorithms/)
@@ -21,9 +21,9 @@ Why use GraphBLAS Algorithms? Because it is *fast*, *flexible*, and *familiar* b
2121
Are we missing any [algorithms](#Plugin-Algorithms) that you want?
2222
[Please let us know!](https://github.com/python-graphblas/graphblas-algorithms/issues)
2323
<br>
24-
<img src="docs/_static/img/graphblas-vs-networkx.png" alt="GraphBLAS vs NetworkX" title="Even faster than scipy.sparse!" width="640" />
24+
<img src="https://raw.githubusercontent.com/python-graphblas/graphblas-algorithms/main/docs/_static/img/graphblas-vs-networkx.png" alt="GraphBLAS vs NetworkX" title="Even faster than scipy.sparse!" width="640" />
2525
<br>
26-
<img src="docs/_static/img/graphblas-vs-igraph.png" alt="GraphBLAS vs igraph" title="igraph may use different algorithms for PageRank" width="600" />
26+
<img src="https://raw.githubusercontent.com/python-graphblas/graphblas-algorithms/main/docs/_static/img/graphblas-vs-igraph.png" alt="GraphBLAS vs igraph" title="igraph may use different algorithms for PageRank" width="600" />
2727

2828
### Installation
2929
```
@@ -151,6 +151,8 @@ dispatch pattern shown above.
151151
- descendants
152152
- Dominating
153153
- is_dominating_set
154+
- Generators
155+
- ego_graph
154156
- Isolate
155157
- is_isolate
156158
- isolates

Diff for: graphblas_algorithms/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from .classes import *
44

55
from .algorithms import * # isort:skip
6+
from .generators import * # isort:skip
67

78
try:
89
__version__ = importlib.metadata.version("graphblas-algorithms")

Diff for: graphblas_algorithms/algorithms/_bfs.py

+150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
"""BFS routines used by other algorithms"""
2+
3+
import numpy as np
4+
from graphblas import Matrix, Vector, binary, replace, unary
5+
from graphblas.semiring import any_pair
6+
7+
8+
def _get_cutoff(n, cutoff):
9+
if cutoff is None or cutoff >= n:
10+
return n # Everything
11+
return cutoff + 1 # Inclusive
12+
13+
14+
def _plain_bfs(G, source, *, cutoff=None):
15+
index = G._key_to_id[source]
16+
A = G.get_property("offdiag")
17+
n = A.nrows
18+
v = Vector(bool, n, name="bfs_plain")
19+
q = Vector(bool, n, name="q")
20+
v[index] = True
21+
q[index] = True
22+
any_pair_bool = any_pair[bool]
23+
cutoff = _get_cutoff(n, cutoff)
24+
for _i in range(1, cutoff):
25+
q(~v.S, replace) << any_pair_bool(q @ A)
26+
if q.nvals == 0:
27+
break
28+
v(q.S) << True
29+
return v
30+
31+
32+
def _bfs_level(G, source, cutoff=None, *, transpose=False, dtype=int):
33+
if dtype == bool:
34+
dtype = int
35+
index = G._key_to_id[source]
36+
A = G.get_property("offdiag")
37+
if transpose and G.is_directed():
38+
A = A.T # TODO: should we use "AT" instead?
39+
n = A.nrows
40+
v = Vector(dtype, n, name="bfs_level")
41+
q = Vector(bool, n, name="q")
42+
v[index] = 0
43+
q[index] = True
44+
any_pair_bool = any_pair[bool]
45+
cutoff = _get_cutoff(n, cutoff)
46+
for i in range(1, cutoff):
47+
q(~v.S, replace) << any_pair_bool(q @ A)
48+
if q.nvals == 0:
49+
break
50+
v(q.S) << i
51+
return v
52+
53+
54+
def _bfs_levels(G, nodes, cutoff=None, *, dtype=int):
55+
if dtype == bool:
56+
dtype = int
57+
A = G.get_property("offdiag")
58+
n = A.nrows
59+
if nodes is None:
60+
# TODO: `D = Vector.from_scalar(0, n, dtype).diag()`
61+
D = Vector(dtype, n, name="bfs_levels_vector")
62+
D << 0
63+
D = D.diag(name="bfs_levels")
64+
else:
65+
ids = G.list_to_ids(nodes)
66+
D = Matrix.from_coo(
67+
np.arange(len(ids), dtype=np.uint64),
68+
ids,
69+
0,
70+
dtype,
71+
nrows=len(ids),
72+
ncols=n,
73+
name="bfs_levels",
74+
)
75+
Q = unary.one[bool](D).new(name="Q")
76+
any_pair_bool = any_pair[bool]
77+
cutoff = _get_cutoff(n, cutoff)
78+
for i in range(1, cutoff):
79+
Q(~D.S, replace) << any_pair_bool(Q @ A)
80+
if Q.nvals == 0:
81+
break
82+
D(Q.S) << i
83+
return D
84+
85+
86+
# TODO: benchmark this and the version commented out below
87+
def _plain_bfs_bidirectional(G, source):
88+
# Bi-directional BFS w/o symmetrizing the adjacency matrix
89+
index = G._key_to_id[source]
90+
A = G.get_property("offdiag")
91+
# XXX: should we use `AT` if available?
92+
n = A.nrows
93+
v = Vector(bool, n, name="bfs_plain")
94+
q_out = Vector(bool, n, name="q_out")
95+
q_in = Vector(bool, n, name="q_in")
96+
v[index] = True
97+
q_in[index] = True
98+
any_pair_bool = any_pair[bool]
99+
is_out_empty = True
100+
is_in_empty = False
101+
for _i in range(1, n):
102+
# Traverse out-edges from the most recent `q_in` and `q_out`
103+
if is_out_empty:
104+
q_out(~v.S) << any_pair_bool(q_in @ A)
105+
else:
106+
q_out << binary.any(q_out | q_in)
107+
q_out(~v.S, replace) << any_pair_bool(q_out @ A)
108+
is_out_empty = q_out.nvals == 0
109+
if not is_out_empty:
110+
v(q_out.S) << True
111+
elif is_in_empty:
112+
break
113+
# Traverse in-edges from the most recent `q_in` and `q_out`
114+
if is_in_empty:
115+
q_in(~v.S) << any_pair_bool(A @ q_out)
116+
else:
117+
q_in << binary.any(q_out | q_in)
118+
q_in(~v.S, replace) << any_pair_bool(A @ q_in)
119+
is_in_empty = q_in.nvals == 0
120+
if not is_in_empty:
121+
v(q_in.S) << True
122+
elif is_out_empty:
123+
break
124+
return v
125+
126+
127+
"""
128+
def _plain_bfs_bidirectional(G, source):
129+
# Bi-directional BFS w/o symmetrizing the adjacency matrix
130+
index = G._key_to_id[source]
131+
A = G.get_property("offdiag")
132+
n = A.nrows
133+
v = Vector(bool, n, name="bfs_plain")
134+
q = Vector(bool, n, name="q")
135+
q2 = Vector(bool, n, name="q_2")
136+
v[index] = True
137+
q[index] = True
138+
any_pair_bool = any_pair[bool]
139+
for _i in range(1, n):
140+
q2(~v.S, replace) << any_pair_bool(q @ A)
141+
v(q2.S) << True
142+
q(~v.S, replace) << any_pair_bool(A @ q)
143+
if q.nvals == 0:
144+
if q2.nvals == 0:
145+
break
146+
q, q2 = q2, q
147+
elif q2.nvals != 0:
148+
q << binary.any(q | q2)
149+
return v
150+
"""

Diff for: graphblas_algorithms/algorithms/centrality/eigenvector.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
11
from graphblas import Vector
22

3-
from graphblas_algorithms.algorithms._helpers import is_converged, normalize
4-
from graphblas_algorithms.algorithms.exceptions import (
5-
ConvergenceFailure,
6-
GraphBlasAlgorithmException,
7-
PointlessConcept,
8-
)
3+
from .._helpers import is_converged, normalize
4+
from ..exceptions import ConvergenceFailure, GraphBlasAlgorithmException, PointlessConcept
95

106
__all__ = ["eigenvector_centrality"]
117

Diff for: graphblas_algorithms/algorithms/centrality/katz.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,8 @@
22
from graphblas.core.utils import output_type
33
from graphblas.semiring import plus_first, plus_times
44

5-
from graphblas_algorithms.algorithms._helpers import is_converged, normalize
6-
from graphblas_algorithms.algorithms.exceptions import (
7-
ConvergenceFailure,
8-
GraphBlasAlgorithmException,
9-
)
5+
from .._helpers import is_converged, normalize
6+
from ..exceptions import ConvergenceFailure, GraphBlasAlgorithmException
107

118
__all__ = ["katz_centrality"]
129

+2-21
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
from graphblas import Vector, replace
2-
from graphblas.semiring import any_pair
3-
4-
from graphblas_algorithms.algorithms.exceptions import PointlessConcept
1+
from .._bfs import _plain_bfs
2+
from ..exceptions import PointlessConcept
53

64

75
def is_connected(G):
@@ -12,20 +10,3 @@ def is_connected(G):
1210

1311
def node_connected_component(G, n):
1412
return _plain_bfs(G, n)
15-
16-
17-
def _plain_bfs(G, source):
18-
index = G._key_to_id[source]
19-
A = G.get_property("offdiag")
20-
n = A.nrows
21-
v = Vector(bool, n, name="bfs_plain")
22-
q = Vector(bool, n, name="q")
23-
v[index] = True
24-
q[index] = True
25-
any_pair_bool = any_pair[bool]
26-
for _i in range(1, n):
27-
q(~v.S, replace) << any_pair_bool(q @ A)
28-
if q.nvals == 0:
29-
break
30-
v(q.S) << True
31-
return v
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,8 @@
1-
from graphblas import Vector, binary, replace
2-
from graphblas.semiring import any_pair
3-
4-
from graphblas_algorithms.algorithms.exceptions import PointlessConcept
1+
from .._bfs import _plain_bfs_bidirectional
2+
from ..exceptions import PointlessConcept
53

64

75
def is_weakly_connected(G):
86
if len(G) == 0:
97
raise PointlessConcept("Connectivity is undefined for the null graph.")
10-
return _plain_bfs(G, next(iter(G))).nvals == len(G)
11-
12-
13-
# TODO: benchmark this and the version commented out below
14-
def _plain_bfs(G, source):
15-
# Bi-directional BFS w/o symmetrizing the adjacency matrix
16-
index = G._key_to_id[source]
17-
A = G.get_property("offdiag")
18-
# XXX: should we use `AT` if available?
19-
n = A.nrows
20-
v = Vector(bool, n, name="bfs_plain")
21-
q_out = Vector(bool, n, name="q_out")
22-
q_in = Vector(bool, n, name="q_in")
23-
v[index] = True
24-
q_in[index] = True
25-
any_pair_bool = any_pair[bool]
26-
is_out_empty = True
27-
is_in_empty = False
28-
for _i in range(1, n):
29-
# Traverse out-edges from the most recent `q_in` and `q_out`
30-
if is_out_empty:
31-
q_out(~v.S) << any_pair_bool(q_in @ A)
32-
else:
33-
q_out << binary.any(q_out | q_in)
34-
q_out(~v.S, replace) << any_pair_bool(q_out @ A)
35-
is_out_empty = q_out.nvals == 0
36-
if not is_out_empty:
37-
v(q_out.S) << True
38-
elif is_in_empty:
39-
break
40-
# Traverse in-edges from the most recent `q_in` and `q_out`
41-
if is_in_empty:
42-
q_in(~v.S) << any_pair_bool(A @ q_out)
43-
else:
44-
q_in << binary.any(q_out | q_in)
45-
q_in(~v.S, replace) << any_pair_bool(A @ q_in)
46-
is_in_empty = q_in.nvals == 0
47-
if not is_in_empty:
48-
v(q_in.S) << True
49-
elif is_out_empty:
50-
break
51-
return v
52-
53-
54-
"""
55-
def _plain_bfs(G, source):
56-
# Bi-directional BFS w/o symmetrizing the adjacency matrix
57-
index = G._key_to_id[source]
58-
A = G.get_property("offdiag")
59-
n = A.nrows
60-
v = Vector(bool, n, name="bfs_plain")
61-
q = Vector(bool, n, name="q")
62-
q2 = Vector(bool, n, name="q_2")
63-
v[index] = True
64-
q[index] = True
65-
any_pair_bool = any_pair[bool]
66-
for _i in range(1, n):
67-
q2(~v.S, replace) << any_pair_bool(q @ A)
68-
v(q2.S) << True
69-
q(~v.S, replace) << any_pair_bool(A @ q)
70-
if q.nvals == 0:
71-
if q2.nvals == 0:
72-
break
73-
q, q2 = q2, q
74-
elif q2.nvals != 0:
75-
q << binary.any(q | q2)
76-
return v
77-
"""
8+
return _plain_bfs_bidirectional(G, next(iter(G))).nvals == len(G)

Diff for: graphblas_algorithms/algorithms/core.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
from graphblas import Matrix, monoid, replace, select, semiring
22

3-
from graphblas_algorithms.classes.graph import Graph
3+
from graphblas_algorithms import Graph
44

55
__all__ = ["k_truss"]
66

77

88
def k_truss(G: Graph, k) -> Graph:
9+
# TODO: should we have an option to keep the output matrix the same size?
910
# Ignore self-edges
1011
S = G.get_property("offdiag")
1112

@@ -32,6 +33,5 @@ def k_truss(G: Graph, k) -> Graph:
3233
Ktruss = C[indices, indices].new()
3334

3435
# Convert back to networkx graph with correct node ids
35-
keys = G.list_to_keys(indices)
36-
key_to_id = dict(zip(keys, range(len(indices))))
36+
key_to_id = G.renumber_key_to_id(indices.tolist())
3737
return Graph(Ktruss, key_to_id=key_to_id)

Diff for: graphblas_algorithms/algorithms/link_analysis/hits_alg.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from graphblas import Vector
22

3-
from graphblas_algorithms.algorithms._helpers import is_converged, normalize
4-
from graphblas_algorithms.algorithms.exceptions import ConvergenceFailure
3+
from .._helpers import is_converged, normalize
4+
from ..exceptions import ConvergenceFailure
55

66
__all__ = ["hits"]
77

0 commit comments

Comments
 (0)