Skip to content

Commit 7bf4d73

Browse files
Merge pull request #170 from tigergraph/GML-1836-template_queries
Gml 1836 template queries
2 parents 54cecf8 + 2070416 commit 7bf4d73

File tree

21 files changed

+791
-721
lines changed

21 files changed

+791
-721
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,5 @@ __pycache__
1414
.env
1515
*.xml
1616
pyrightconfig.json
17+
l
18+
*.ipynb

GDBMS_ALGO/centrality/degree_cent.gsql

+6-2
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,14 @@ CREATE TEMPLATE QUERY GDBMS_ALGO.centrality.degree_cent(SET<STRING> v_type_set,
5757
FROM all:s
5858
ACCUM
5959
IF in_degree THEN
60-
s.@sum_degree_score += s.outdegree(reverse_e_type_set)
60+
FOREACH e_type IN reverse_e_type_set DO
61+
s.@sum_degree_score += s.outdegree(e_type)
62+
END
6163
END,
6264
IF out_degree THEN
63-
s.@sum_degree_score += s.outdegree(e_type_set)
65+
FOREACH e_type IN e_type_set DO
66+
s.@sum_degree_score += s.outdegree(e_type)
67+
END
6468
END
6569
POST-ACCUM
6670
IF normalize THEN

GDBMS_ALGO/centrality/pagerank.gsql

+4-4
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ CREATE TEMPLATE QUERY GDBMS_ALGO.centrality.pagerank (STRING v_type, STRING e_ty
44
BOOL display_edges = FALSE) SYNTAX V1 {
55

66
/*
7-
First Author: <First Author Name>
8-
First Commit Date: <First Commit Date>
7+
First Author: unk
8+
First Commit Date: unk
99

10-
Recent Author: <Recent Commit Author Name>
11-
Recent Commit Date: <Recent Commit Date>
10+
Recent Author: Rob Rossmiller
11+
Recent Commit Date: Rob Rossmiller
1212

1313

1414
Repository:
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
1-
CREATE QUERY tg_pagerank (STRING v_type, STRING e_type,
2-
FLOAT max_change=0.001, INT maximum_iteration=25, FLOAT damping=0.85, INT top_k = 100,
3-
BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "",
4-
BOOL display_edges = FALSE) SYNTAX V1 {
5-
1+
CREATE QUERY tg_pagerank (STRING v_type, STRING e_type, FLOAT max_change=0.001, INT maximum_iteration=25, FLOAT damping=0.85, INT top_k = 100, BOOL normalize=TRUE, BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "", BOOL display_edges = FALSE) SYNTAX V1 {
62
/*
7-
First Author: <First Author Name>
8-
First Commit Date: <First Commit Date>
3+
First Author: <unk>
4+
First Commit Date: <unk>
95

10-
Recent Author: <Recent Commit Author Name>
11-
Recent Commit Date: <Recent Commit Date>
6+
Recent Author: Rob Rossmiller
7+
Recent Commit Date: Sept 4, 2024
128

139

1410
Repository:
@@ -45,7 +41,7 @@ CREATE QUERY tg_pagerank (STRING v_type, STRING e_type,
4541
file_path:
4642
file to write CSV output to
4743
top_k:
48-
#top scores to output
44+
//top scores to output
4945
display_edges:
5046
output edges for visualization
5147
max_change:
@@ -54,65 +50,71 @@ CREATE QUERY tg_pagerank (STRING v_type, STRING e_type,
5450
importance of traversal vs. random teleport
5551
*/
5652

57-
TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score;
58-
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap;
59-
SetAccum<VERTEX> @@top_vertices; # vertices with top score
60-
MaxAccum<FLOAT> @@max_diff = 9999; # max score change in an iteration
61-
SumAccum<FLOAT> @sum_recvd_score = 0; # sum of scores each vertex receives FROM neighbors
62-
SumAccum<FLOAT> @sum_score = 1; # initial score for every vertex is 1.
63-
SetAccum<EDGE> @@edge_set; # list of all edges, if display is needed
64-
FILE f (file_path);
65-
66-
# PageRank iterations
67-
Start = {v_type}; # Start with all vertices of specified type(s)
68-
WHILE @@max_diff > max_change
69-
LIMIT maximum_iteration DO
53+
TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score;
54+
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap;
55+
SetAccum<VERTEX> @@top_vertices; // vertices with top score
56+
MaxAccum<FLOAT> @@max_diff = 9999; // max score change in an iteration
57+
SumAccum<FLOAT> @sum_recvd_score = 0; // sum of scores each vertex receives FROM neighbors
58+
SumAccum<FLOAT> @sum_score = 1; // initial score for every vertex is 1.
59+
SetAccum<EDGE> @@edge_set; // list of all edges, if display is needed
60+
FILE f (file_path);
61+
INT N=1;
62+
63+
64+
// PageRank iterations
65+
Start = {v_type}; // Start with all vertices of specified type(s)
66+
IF normalize THEN
67+
N = Start.size();
68+
tmp = SELECT s FROM Start:s
69+
ACCUM s.@sum_score = 1.0/N;
70+
END;
71+
72+
WHILE @@max_diff > max_change LIMIT maximum_iteration DO
7073
@@max_diff = 0;
71-
V = SELECT s
72-
FROM Start:s -(e_type:e)- v_type:t
73-
ACCUM
74-
t.@sum_recvd_score += s.@sum_score/(s.outdegree(e_type))
75-
POST-ACCUM
76-
s.@sum_score = (1.0-damping) + damping * s.@sum_recvd_score,
77-
s.@sum_recvd_score = 0,
78-
@@max_diff += abs(s.@sum_score - s.@sum_score');
79-
END; # END WHILE loop
80-
81-
# Output
82-
IF file_path != "" THEN
83-
f.println("Vertex_ID", "PageRank");
84-
END;
85-
V = SELECT s
86-
FROM Start:s
87-
POST-ACCUM
88-
IF result_attribute != "" THEN
89-
s.setAttr(result_attribute, s.@sum_score)
90-
END,
91-
92-
IF file_path != "" THEN
93-
f.println(s, s.@sum_score)
94-
END,
95-
96-
IF print_results THEN
97-
@@top_scores_heap += Vertex_Score(s, s.@sum_score)
98-
END;
9974

100-
IF print_results THEN
101-
PRINT @@top_scores_heap;
102-
IF display_edges THEN
103-
104-
FOREACH vert IN @@top_scores_heap DO
105-
@@top_vertices += vert.Vertex_ID;
75+
V = SELECT s FROM Start:s -(e_type:e)- v_type:t
76+
ACCUM t.@sum_recvd_score += s.@sum_score/(s.outdegree(e_type))
77+
POST-ACCUM
78+
s.@sum_score = (1.0-damping)/N + damping * s.@sum_recvd_score,
79+
s.@sum_recvd_score = 0,
80+
@@max_diff += abs(s.@sum_score - s.@sum_score');
81+
END;
82+
83+
// Output
84+
IF file_path != "" THEN
85+
f.println("Vertex_ID", "PageRank");
86+
END;
87+
88+
V = SELECT s FROM Start:s
89+
POST-ACCUM
90+
IF result_attribute != "" THEN
91+
s.setAttr(result_attribute, s.@sum_score)
92+
END,
93+
94+
IF file_path != "" THEN
95+
f.println(s, s.@sum_score)
96+
END,
97+
98+
IF print_results THEN
99+
@@top_scores_heap += Vertex_Score(s, s.@sum_score)
100+
END;
101+
102+
IF print_results THEN
103+
PRINT @@top_scores_heap;
104+
IF display_edges THEN
105+
106+
FOREACH vert IN @@top_scores_heap DO
107+
@@top_vertices += vert.Vertex_ID;
108+
END;
109+
110+
Top = {@@top_vertices};
111+
Top = SELECT s
112+
FROM Top:s -(e_type:e)- v_type:t
113+
WHERE @@top_vertices.contains(t)
114+
ACCUM @@edge_set += e;
115+
116+
PRINT @@edge_set;
117+
PRINT Top;
106118
END;
107-
108-
Top = {@@top_vertices};
109-
Top = SELECT s
110-
FROM Top:s -(e_type:e)- v_type:t
111-
WHERE @@top_vertices.contains(t)
112-
ACCUM @@edge_set += e;
113-
114-
PRINT @@edge_set;
115-
PRINT Top;
116119
END;
117-
END;
118120
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"@@top_scores_heap": [{"Vertex_ID": "A", "score": 0.125}, {"Vertex_ID": "B", "score": 0.125}, {"Vertex_ID": "C", "score": 0.125}, {"Vertex_ID": "D", "score": 0.125}, {"Vertex_ID": "E", "score": 0.125}, {"Vertex_ID": "F", "score": 0.125}, {"Vertex_ID": "G", "score": 0.125}, {"Vertex_ID": "H", "score": 0.125}]}]

tests/data/baseline/ml/fastRP.json.gz

333 Bytes
Binary file not shown.

tests/run.sh

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
clear
22
python3 test/setup.py &&
3-
python3 test/baseline/create_baselines.py &&
4-
pytest test/test_centrality.py #test/test_ml.py
3+
# pytest test/test_centrality.py::TestCentrality::test_degree_centrality1 #test/test_ml.py
4+
# pytest test/test_centrality.py::TestCentrality::test_pagerank
5+
# pytest test/test_ml.py
6+
pytest test/test_centrality.py::TestCentrality
7+
echo 'done'

tests/test/baseline/algos/__init__.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1-
from .degree_cent import *
2-
from .fastrp import fastrp_wrapper as fastrp
1+
from .degree_centrality import DegreeCentralityBaseline
2+
from .fastrp import FastRPBaseline
3+
from .pagerank import PagerankBaseline

tests/test/baseline/algos/base.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from abc import ABC, abstractmethod
2+
3+
4+
class Baseline(ABC):
5+
@abstractmethod
6+
def run(self):
7+
...

tests/test/baseline/algos/common.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import csv
2+
import json
3+
4+
import networkx as nx
5+
import numpy as np
6+
from tqdm import tqdm
7+
8+
9+
def create_graph(edges, weights=False, directed=False) -> nx.Graph:
10+
if directed:
11+
g = nx.DiGraph()
12+
else:
13+
g = nx.Graph()
14+
if weights:
15+
# make weights floats
16+
edges = [[a, b, float(c)] for a, b, c in edges]
17+
g.add_weighted_edges_from(edges)
18+
else:
19+
g.add_edges_from(edges)
20+
return g
21+
22+
23+
def create_baseline(paths, algo):
24+
t = tqdm(paths, desc=f"Creating {algo} baselines")
25+
for p, out_path, fn, m in t:
26+
t.set_postfix_str(out_path.split("/")[-1].split(".")[0])
27+
with open(p) as f:
28+
edges = np.array(list(csv.reader(f)))
29+
30+
directed = True if "Directed" in out_path else False
31+
weights = True if "Weighted" in out_path else False
32+
g = create_graph(edges, weights, directed)
33+
34+
# from matplotlib import pyplot as plt
35+
# pos = nx.drawing.layout.kamada_kawai_layout(g)
36+
# nx.draw(g, pos)
37+
# nx.draw_networkx_labels(g, pos, {n: n for n in g.nodes})
38+
# plt.savefig(f"{out_path.split('/')[-1]}.png")
39+
40+
res = fn(g, m)
41+
with open(out_path, "w") as f:
42+
json.dump(res, f) # , indent=2)

tests/test/baseline/algos/degree_cent.py

-46
This file was deleted.

0 commit comments

Comments
 (0)