Skip to content

Commit ca797fc

Browse files
Merge pull request #160 from tigergraph/GML-1812-specialized_testing
Gml 1812 specialized testing
2 parents 8514ecc + 362024f commit ca797fc

12 files changed

+290
-54
lines changed

tests/data/baseline/ml/fastRP.json.gz

204 KB
Binary file not shown.

tests/run.sh

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
clear
2-
python3 test/create_baseline.py &&
3-
python3 test/setup.py &&
4-
pytest test/test_centrality.py::TestCentrality
5-
# pytest
2+
python3 test/setup.py &&
3+
python3 test/baseline/create_baselines.py &&
4+
pytest test/test_centrality.py test/test_ml.py

tests/test/baseline/__init__.py

Whitespace-only changes.

tests/test/baseline/algos/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .degree_cent import *
2+
from .fastrp import fastrp_wrapper as fastrp
+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from collections import Counter
2+
3+
import networkx as nx
4+
5+
6+
def run_degree_baseline_complete(g: nx.Graph, _):
7+
s = 1.0 / (len(g) - 1.0)
8+
9+
# d-1 because nx will double count the self-edge
10+
res = {n: (d - 1) * s for n, d in g.degree()}
11+
12+
out = []
13+
for k, v in res.items():
14+
out.append({"Vertex_ID": k, "score": v})
15+
16+
out = [{"top_scores": out}]
17+
return out
18+
19+
20+
def run_degree_baseline(g: nx.Graph, metric):
21+
res = metric(g)
22+
23+
out = []
24+
for k, v in res.items():
25+
out.append({"Vertex_ID": k, "score": v})
26+
27+
out = [{"top_scores": out}]
28+
return out
29+
30+
31+
def weighted_deg_cent(
32+
g: nx.Graph,
33+
dir: str = "",
34+
):
35+
res = Counter()
36+
for e in g.edges:
37+
a = g.get_edge_data(e[0], e[1])["weight"]
38+
match dir:
39+
case "in":
40+
res[e[1]] += a
41+
case "out":
42+
res[e[0]] += a
43+
case _:
44+
res[e[0]] += a
45+
res[e[1]] += a
46+
return res

tests/test/baseline/algos/fastrp.py

+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# source: https://github.com/GTmac/FastRP/blob/master/fastrp.py
2+
3+
import numpy as np
4+
from scipy.sparse import csc_matrix, csr_matrix, spdiags
5+
from sklearn import random_projection
6+
from sklearn.preprocessing import normalize, scale
7+
8+
9+
# projection method: choose from Gaussian and Sparse
10+
# input matrix: choose from adjacency and transition matrix
11+
# alpha adjusts the weighting of nodes according to their degree
12+
def fastrp_projection(
13+
A, q=3, dim=128, projection_method="gaussian", input_matrix="adj", alpha=None
14+
):
15+
assert input_matrix == "adj" or input_matrix == "trans"
16+
assert projection_method == "gaussian" or projection_method == "sparse"
17+
18+
if input_matrix == "adj":
19+
M = A
20+
else:
21+
N = A.shape[0]
22+
normalizer = spdiags(np.squeeze(1.0 / csc_matrix.sum(A, axis=1)), 0, N, N)
23+
M = normalizer @ A
24+
# Gaussian projection matrix
25+
if projection_method == "gaussian":
26+
transformer = random_projection.GaussianRandomProjection(
27+
n_components=dim, random_state=42
28+
)
29+
# Sparse projection matrix
30+
else:
31+
transformer = random_projection.SparseRandomProjection(
32+
n_components=dim, random_state=42
33+
)
34+
Y = transformer.fit(M)
35+
# Random projection for A
36+
if alpha is not None:
37+
Y.components_ = Y.components_ @ spdiags(
38+
np.squeeze(np.power(csc_matrix.sum(A, axis=1), alpha)), 0, N, N
39+
)
40+
cur_U = transformer.transform(M)
41+
U_list = [cur_U]
42+
43+
for _ in range(2, q + 1):
44+
cur_U = M @ cur_U
45+
U_list.append(cur_U)
46+
return U_list
47+
48+
49+
# When weights is None, concatenate instead of linearly combines the embeddings from different powers of A
50+
def fastrp_merge(U_list, weights, normalization=False):
51+
dense_U_list = (
52+
[_U.todense() for _U in U_list] if type(U_list[0]) == csc_matrix else U_list
53+
)
54+
_U_list = (
55+
[normalize(_U, norm="l2", axis=1) for _U in dense_U_list]
56+
if normalization
57+
else dense_U_list
58+
)
59+
60+
if weights is None:
61+
return np.concatenate(_U_list, axis=1)
62+
U = np.zeros_like(_U_list[0])
63+
for cur_U, weight in zip(_U_list, weights):
64+
U += cur_U * weight
65+
# U = scale(U.todense())
66+
# U = normalize(U.todense(), norm='l2', axis=1)
67+
return scale(U.toarray()) if type(U) == csr_matrix else scale(U)
68+
69+
70+
# A is always the adjacency matrix
71+
# the choice between adj matrix and trans matrix is decided in the conf
72+
def fastrp_wrapper(A, conf):
73+
U_list = fastrp_projection(
74+
A,
75+
q=len(conf["weights"]),
76+
dim=conf["dim"],
77+
projection_method=conf["projection_method"],
78+
input_matrix=conf["input_matrix"],
79+
alpha=conf["alpha"],
80+
)
81+
U = fastrp_merge(U_list, conf["weights"], conf["normalization"])
82+
return U
83+
84+
85+
def get_emb_filename(prefix, conf):
86+
return (
87+
prefix
88+
+ "-dim="
89+
+ str(conf["dim"])
90+
+ ",projection_method="
91+
+ conf["projection_method"]
92+
+ ",input_matrix="
93+
+ conf["input_matrix"]
94+
+ ",normalization="
95+
+ str(conf["normalization"])
96+
+ ",weights="
97+
+ (
98+
",".join(map(str, conf["weights"]))
99+
if conf["weights"] is not None
100+
else "None"
101+
)
102+
+ ",alpha="
103+
+ (str(conf["alpha"]) if "alpha" in conf else "")
104+
+ ",C="
105+
+ (str(conf["C"]) if "alpha" in conf else "1.0")
106+
+ ".mat"
107+
)
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import degree_cent_baseline
2+
import fast_rp_baseline
3+
4+
if __name__ == "__main__":
5+
degree_cent_baseline.run()
6+
fast_rp_baseline.run()

tests/test/create_baseline.py renamed to tests/test/baseline/degree_cent_baseline.py

+2-45
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,16 @@
11
import csv
22
import json
3-
from collections import Counter
43
from functools import partial
54

65
import networkx as nx
76
import numpy as np
7+
from algos import run_degree_baseline, run_degree_baseline_complete, weighted_deg_cent
88
from tqdm import tqdm
99

1010
data_path_root = "data/"
1111
baseline_path_root = f"{data_path_root}/baseline/"
1212

1313

14-
def weighted_deg_cent(
15-
g: nx.Graph,
16-
dir: str = "",
17-
):
18-
res = Counter()
19-
for e in g.edges:
20-
a = g.get_edge_data(e[0], e[1])["weight"]
21-
match dir:
22-
case "in":
23-
res[e[1]] += a
24-
case "out":
25-
res[e[0]] += a
26-
case _:
27-
res[e[0]] += a
28-
res[e[1]] += a
29-
return res
30-
31-
32-
def run_degree_baseline_complete(g: nx.Graph, _):
33-
s = 1.0 / (len(g) - 1.0)
34-
35-
# d-1 because nx will double count the self-edge
36-
res = {n: (d - 1) * s for n, d in g.degree()}
37-
38-
out = []
39-
for k, v in res.items():
40-
out.append({"Vertex_ID": k, "score": v})
41-
42-
out = [{"top_scores": out}]
43-
return out
44-
45-
46-
def run_degree_baseline(g: nx.Graph, metric):
47-
res = metric(g)
48-
49-
out = []
50-
for k, v in res.items():
51-
out.append({"Vertex_ID": k, "score": v})
52-
53-
out = [{"top_scores": out}]
54-
return out
55-
56-
5714
def create_graph(edges, weights=False, directed=False):
5815
if directed:
5916
g = nx.DiGraph()
@@ -90,7 +47,7 @@ def create_degree_baseline(paths):
9047
json.dump(res, f) # , indent=2)
9148

9249

93-
if __name__ == "__main__":
50+
def run():
9451
# (data, output_path, fun, metric)
9552
paths = [
9653
# unweighted
+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import gzip
2+
import json
3+
4+
import networkx as nx
5+
import numpy as np
6+
import pandas as pd
7+
from algos import fastrp
8+
from dotenv import load_dotenv
9+
from pyTigerGraph.datasets import Datasets
10+
11+
load_dotenv()
12+
data_path_root = "data"
13+
baseline_path_root = f"{data_path_root}/baseline"
14+
15+
16+
def run(ds_name="Cora"):
17+
dataset = Datasets(ds_name)
18+
edges = pd.read_csv(dataset.tmp_dir + f"/{ds_name}/edges.csv", header=None)
19+
edges.columns = ["src", "tgt"]
20+
21+
g = nx.Graph()
22+
g.add_edges_from(edges.to_numpy())
23+
node_ids = sorted(list(g.nodes))
24+
A = nx.adjacency_matrix(g, nodelist=node_ids)
25+
conf = {
26+
"weights": [1, 2, 4],
27+
"dim": 8,
28+
# "projection_method": "sparse",
29+
"projection_method": "gaussian",
30+
"input_matrix": "trans",
31+
"alpha": -0.628,
32+
"normalization": False,
33+
}
34+
35+
vecs = fastrp(A, conf)
36+
37+
assert len(vecs) == len(node_ids)
38+
39+
res = {str(k): list(v) for k, v in zip(node_ids, vecs)}
40+
with gzip.open(f"{baseline_path_root}/ml/fastRP.json.gz", "wb") as f:
41+
f.write(json.dumps(res).encode())
42+
43+
with gzip.open(f"{baseline_path_root}/ml/fastRP.json.gz", "rb") as f:
44+
d = json.load(f)

tests/test/setup.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,31 @@
11
import json
22
import os
33
import re
4+
import time
45

56
import pyTigerGraph as tg
6-
import util
77
from dotenv import load_dotenv
88
from pyTigerGraph.datasets import Datasets
9-
from tqdm import tqdm
9+
from tqdm import tqdm, trange
10+
11+
import util
1012

1113
load_dotenv()
1214
graph_name = "graph_algorithms_testing"
1315
pattern = re.compile(r'"name":\s*"tg_.*"')
1416

17+
18+
def add_reverse_edge(ds: Datasets):
19+
with open(f"{dataset.tmp_dir}/{ds.name}/create_schema.gsql") as f:
20+
schema: str = f.read()
21+
with open(f"{dataset.tmp_dir}/{ds.name}/create_schema.gsql", "w") as f:
22+
schema = schema.replace(
23+
"ADD DIRECTED EDGE Cite (from Paper, to Paper, time Int, is_train Bool, is_val Bool);",
24+
'ADD DIRECTED EDGE Cite (from Paper, to Paper, time Int, is_train Bool, is_val Bool) WITH REVERSE_EDGE="reverse_Cite";',
25+
)
26+
f.write(schema)
27+
28+
1529
if __name__ == "__main__":
1630
host_name = os.getenv("HOST_NAME")
1731
user_name = os.getenv("USER_NAME")
@@ -28,9 +42,14 @@
2842
if res["error"]:
2943
exit(1)
3044
# load the data
45+
dataset = Datasets("Cora")
46+
add_reverse_edge(dataset)
47+
conn.ingestDataset(dataset, getToken=True)
48+
3149
dataset = Datasets("graph_algorithms_testing")
3250
conn.ingestDataset(dataset, getToken=True)
3351

52+
conn.graphname = graph_name
3453
# install the queries
3554
feat = conn.gds.featurizer()
3655
installed_queries = util.get_installed_queries(conn)
@@ -43,3 +62,5 @@
4362
print(q)
4463
feat.installAlgorithm(q)
4564

65+
for _ in trange(30, desc="Sleeping while data loads"):
66+
time.sleep(1)

0 commit comments

Comments
 (0)