Skip to content

Commit 8383057

Browse files
authored
Merge pull request #138 from tigergraph/3.8-migrate
3.8 Publish
2 parents 3d5d022 + 7a0a424 commit 8383057

File tree

123 files changed

+7490
-1838
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

123 files changed

+7490
-1838
lines changed

.DS_Store

6 KB
Binary file not shown.

.github/auto_request_review.yml

+7-7
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,17 @@ reviewers:
99
# Reviewer groups each of which has a list of GitHub usernames
1010
groups:
1111
group1:
12-
- YimingPan-Code
13-
- Boyu1997
14-
group2:
12+
- yimingpantg
13+
- TannerW
1514
- wyatt-joyner-tg
16-
- tommytgraph
17-
- harshadindigal
15+
- parkererickson
16+
- lennessyy
17+
- a-m-thomas
1818
secondary:
19-
- YimingPan-Code
19+
- yimingpantg
2020
- wyatt-joyner-tg
2121
tertiary:
22-
- xchang2014
22+
- xinyuchtg
2323
- victor-gsl
2424

2525
files:
+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
CREATE TEMPLATE QUERY GDBMS_ALGO.centrality.article_rank (STRING v_type, STRING e_type,
2+
FLOAT max_change = 0.001, INT maximum_iteration = 25, FLOAT damping = 0.85, INT top_k = 100,
3+
BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "") SYNTAX V1 {
4+
5+
/*
6+
First Author: karimsaraipour
7+
First Commit Date: Oct 4, 2021
8+
9+
Recent Author: abdullaTigerGraph
10+
Recent Commit Date: Apr 4, 2022
11+
12+
13+
Repository:
14+
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Centrality
15+
16+
Maturity:
17+
Production
18+
19+
Description:
20+
Compute the article rank score for each vertex in the GRAPH
21+
In each iteration, compute a score for each vertex:
22+
score = (1-damping) + damping*average outdegree*sum(received scores FROM its neighbors/average outdegree+Outdegree).
23+
The article Rank algorithm stops when either of the following is true:
24+
a) it reaches maximum_iteration iterations;
25+
b) the max score change for any vertex compared to the last iteration <= max_change.
26+
27+
This query supports only taking in a single edge for the time being (8/13/2020).
28+
29+
Publications:
30+
https://www.emerald.com/insight/content/doi/10.1108/00012530911005544/full/html
31+
32+
TigerGraph Documentation:
33+
https://docs.tigergraph.com/graph-ml/current/centrality-algorithms/article-rank
34+
35+
Parameters:
36+
v_type:
37+
vertex types to traverse
38+
print_results:
39+
If True, print JSON output
40+
e_type:
41+
edge types to traverse
42+
result_attribute:
43+
INT attribute to store results to
44+
maximum_iteration:
45+
max #iterations
46+
file_path:
47+
file to write CSV output to
48+
top_k:
49+
#top scores to output
50+
display_edges:
51+
output edges for visualization
52+
max_change:
53+
max allowed change between iterations to achieve convergence
54+
damping:
55+
importance of traversal vs. random teleport
56+
*/
57+
58+
59+
TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score;
60+
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap;
61+
MaxAccum<FLOAT> @@max_diff = 9999; # max score change in an iteration
62+
SumAccum<FLOAT> @sum_recvd_score = 0; # sum of scores each vertex receives FROM neighbors
63+
SumAccum<FLOAT> @sum_score = 0.15; # initial score for every vertex is 0.15.
64+
SetAccum<EDGE> @@edge_Set; # list of all edges, if display is needed
65+
AvgAccum @@avg_out;
66+
SumAccum<INT> @sum_out_degree;
67+
FILE f (file_path);
68+
69+
# PageRank iterations
70+
Start = {v_type}; # Start with all vertices of specified type(s)
71+
Start = SELECT s
72+
FROM Start:s
73+
ACCUM
74+
s.@sum_out_degree += s.outdegree(e_type),
75+
@@avg_out += s.outdegree(e_type);
76+
77+
WHILE @@max_diff > max_change
78+
LIMIT maximum_iteration DO @@max_diff = 0;
79+
80+
V = SELECT s
81+
FROM Start:s -(e_type:e)- v_type:t
82+
ACCUM t.@sum_recvd_score += s.@sum_score/(@@avg_out+s.@sum_out_degree)
83+
POST-ACCUM
84+
s.@sum_score = (1.0-damping) + damping * s.@sum_recvd_score*@@avg_out,
85+
s.@sum_recvd_score = 0,
86+
@@max_diff += abs(s.@sum_score - s.@sum_score');
87+
88+
END; # END WHILE loop
89+
90+
# Output
91+
IF file_path != "" THEN
92+
f.println("Vertex_ID", "article Rank");
93+
END;
94+
95+
V = SELECT s
96+
FROM Start:s
97+
POST-ACCUM
98+
IF result_attribute != "" THEN
99+
s.setAttr(result_attribute, s.@sum_score)
100+
END,
101+
102+
IF file_path != "" THEN
103+
f.println(s, s.@sum_score)
104+
END,
105+
106+
IF print_results THEN
107+
@@top_scores_heap += Vertex_Score(s, s.@sum_score)
108+
END;
109+
110+
IF print_results THEN
111+
PRINT @@top_scores_heap;
112+
END;
113+
}
+200
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
CREATE TEMPLATE QUERY GDBMS_ALGO.centrality.betweenness_cent(SET<STRING> v_type_set, SET<STRING> e_type_set, STRING reverse_e_type,INT max_hops = 10,
2+
INT top_k = 100, BOOL print_results = True, STRING result_attribute = "",
3+
STRING file_path = "", BOOL display_edges = FALSE) SYNTAX V1 {
4+
5+
/*
6+
First Author: karimsaraipour
7+
First Commit Date: Sep 2, 2021
8+
9+
Recent Author: Boyu Jiang
10+
Recent Commit Date: Mar 14, 2022
11+
12+
13+
Repository:
14+
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Centrality
15+
16+
Maturity:
17+
Production
18+
19+
Description:
20+
Compute Betweenness Centrality for each VERTEX.
21+
Use multi-source BFS.
22+
23+
Publications:
24+
http://www.vldb.org/pvldb/vol8/p449-then.pdf
25+
26+
TigerGraph Documentation:
27+
https://docs.tigergraph.com/graph-ml/current/centrality-algorithms/betweenness-centrality
28+
29+
Parameters:
30+
v_type_set:
31+
vertex types to traverse
32+
print_results:
33+
If True, print JSON output
34+
e_type_set:
35+
edge types to traverse
36+
result_attribute:
37+
INT attribute to store results to
38+
reverse_e_type:
39+
reverse edge type in directed graph, in undirected graph set reverse_e_type=e_type_set
40+
max_hops:
41+
look only this far from each vertex
42+
file_path:
43+
file to write CSV output to
44+
top_k:
45+
report only this many top scores
46+
display_edges:
47+
If True, output edges for visualization
48+
*/
49+
50+
TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score; #tuple to store betweenness centrality score
51+
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap; #heap to store top K score
52+
SumAccum<INT> @@sum_curr_dist; #current distance
53+
BitwiseOrAccum @bitwise_or_visit_next; #use bitwise instead of setAccum
54+
BitwiseOrAccum @bitwise_or_seen;
55+
BitwiseOrAccum @bitwise_or_visit;
56+
SumAccum<INT> @@sum_count = 1;#used to set unique ID
57+
SumAccum<INT> @sum_id; #store the unique ID
58+
SetAccum<INT> @@batch_set; #used to set unique ID
59+
MapAccum<INT,INT> @@map; #used to set unique ID
60+
SetAccum<EDGE> @@edge_set;
61+
SumAccum<FLOAT> @sum_delta = 0;
62+
MapAccum<INT,BitwiseOrAccum> @times_map;
63+
MapAccum<INT,SumAccum<INT>> @sigma_map;
64+
65+
INT empty=0;
66+
FILE f (file_path);
67+
INT num_vert;
68+
INT batch_number;
69+
70+
# Compute betweenness
71+
all = {v_type_set};
72+
num_vert = all.size();
73+
batch_number = num_vert/60;
74+
75+
IF batch_number == 0 THEN
76+
batch_number = 1;
77+
END;
78+
79+
#Calculate the sum of distance to other vertex for each vertex
80+
FOREACH i IN RANGE[0, batch_number-1] DO
81+
Current = SELECT s
82+
FROM all:s
83+
WHERE getvid(s)%batch_number == i
84+
POST-ACCUM
85+
@@map+=(getvid(s)->0),
86+
@@batch_set+=getvid(s);
87+
88+
FOREACH ver in @@batch_set DO
89+
@@map += (ver->@@sum_count); @@sum_count += 1;
90+
END; #set a unique ID for each vertex, ID from 1-63
91+
92+
Start = SELECT s
93+
FROM Current:s
94+
POST-ACCUM
95+
s.@sum_id=@@map.get(getvid(s));
96+
97+
Start = SELECT s
98+
FROM Current:s
99+
POST-ACCUM
100+
s.@bitwise_or_seen = 1<<s.@sum_id,
101+
s.@bitwise_or_visit = s.@bitwise_or_seen,
102+
s.@sigma_map += (0->1),
103+
s.@times_map += (0->s.@bitwise_or_visit); # set initial seen and visit
104+
105+
@@batch_set.clear();
106+
@@map.clear();
107+
@@sum_count=0;
108+
109+
WHILE (Start.size() > 0) LIMIT max_hops DO
110+
@@sum_curr_dist+=1;
111+
112+
Start = SELECT t
113+
FROM Start:s -(reverse_e_type:e)-v_type_set:t
114+
WHERE s.@bitwise_or_visit&-t.@bitwise_or_seen-1>0 AND s!=t #use -t.@seen-1 to get the trverse of t.@seen
115+
ACCUM #updatevisitNext
116+
INT c = s.@bitwise_or_visit&-t.@bitwise_or_seen-1,
117+
IF c>0 THEN
118+
t.@bitwise_or_visit_next+=c,
119+
t.@bitwise_or_seen+=c
120+
END,
121+
t.@sigma_map+=(@@sum_curr_dist->s.@sigma_map.get(@@sum_curr_dist-1)) #set sigma based on depth
122+
POST-ACCUM
123+
t.@bitwise_or_visit=t.@bitwise_or_visit_next,
124+
t.@times_map+=(@@sum_curr_dist->t.@bitwise_or_visit),
125+
t.@bitwise_or_visit_next=0;
126+
END;
127+
128+
@@sum_curr_dist+=-1;
129+
130+
Start = SELECT s
131+
FROM all:s
132+
WHERE s.@sigma_map.get(@@sum_curr_dist)!=0;
133+
134+
WHILE (Start.size()>0) LIMIT max_hops DO
135+
@@sum_curr_dist+=-1;
136+
Start = SELECT t
137+
FROM Start:s -(reverse_e_type:e)- v_type_set:t
138+
WHERE t.@times_map.get(@@sum_curr_dist)&s.@times_map.get(@@sum_curr_dist+1)!=0
139+
ACCUM
140+
FLOAT currValue=t.@sigma_map.get(@@sum_curr_dist)/(s.@sigma_map.get(@@sum_curr_dist+1)*(1+s.@sum_delta)),
141+
INT r=t.@times_map.get(@@sum_curr_dist)&s.@times_map.get(@@sum_curr_dist+1),
142+
INT plus=0,
143+
WHILE r>0 DO
144+
r=r&(r-1),plus=plus+1 #count how many 1 in the number, same as setAccum,size()
145+
END,
146+
FLOAT value = currValue*plus/2.0,
147+
t.@sum_delta+=value;
148+
149+
Start = SELECT s
150+
FROM all:s
151+
WHERE s.@sigma_map.get(@@sum_curr_dist)!=0;
152+
END;
153+
154+
@@sum_curr_dist=0;
155+
Start = SELECT s
156+
FROM all:s
157+
POST-ACCUM
158+
s.@bitwise_or_seen=0,
159+
s.@bitwise_or_visit=0,
160+
s.@sigma_map.clear(),
161+
s.@times_map.clear();
162+
END;
163+
164+
#Output
165+
IF file_path != "" THEN
166+
f.println("Vertex_ID", "Betweenness");
167+
END;
168+
169+
Start = SELECT s
170+
FROM all:s
171+
POST-ACCUM
172+
IF result_attribute != "" THEN
173+
s.setAttr(result_attribute, s.@sum_delta)
174+
END,
175+
176+
IF print_results THEN
177+
@@top_scores_heap += Vertex_Score(s, s.@sum_delta)
178+
END,
179+
180+
IF file_path != "" THEN
181+
f.println(s, s.@sum_delta)
182+
END;
183+
184+
IF print_results THEN
185+
PRINT @@top_scores_heap AS top_scores;
186+
187+
IF display_edges THEN
188+
PRINT Start[Start.@sum_delta];
189+
190+
Start = SELECT s
191+
FROM Start:s -(e_type_set:e)-:t
192+
ACCUM
193+
@@edge_set += e;
194+
195+
PRINT @@edge_set;
196+
197+
END;
198+
END;
199+
200+
}

0 commit comments

Comments
 (0)