Skip to content

Commit 4015287

Browse files
[ALGOS-158] fix(add): add template algorithm; (#40)
* Create article_rank.gsql * Update and rename closeness.gsql to closeness_cent.gsql * Create betweenness_cent.gsql * Create degree_cent.gsql * Create weighted_degree_cent.gsql * Create eigenvector_cent.gsql * Create harmonic_cent * Create pagerank.gsql * Create pagerank_wt.gsql * Create pagerank_pers_ap_batch.gsql * Create pagerank_pers.gsql * Update and rename knn.gsql to knn_cosine_ss.gsql * Create greedy_graph_coloring.gsql * Create maximal_indep_set.gsql * Create maximal_indep_set_random.gsql * Update louvain.gsql * Create wcc.gsql * Create wcc_small_world.gsql * Create scc.gsql * Create scc_small_world.gsql * Create kcore.gsql * Create label_prop.gsql * Create lcc.gsql * Create map_equation.gsql * Create slpa.gsql * Create tri_count.gsql * Create tri_count_fast.gsql * Update and rename weisfeiler_lehman to weisfeiler_lehman.gsql * Create embedding_cosine_similarity.gsql * Update cycle_detection.gsql * Create cycle_detection_batch.gsql * Create cycle_detection_count.gsql * Create astar.gsql * Create bfs.gsql * Create cycle_component.gsql * Update astar.gsql * Update bfs.gsql * Update cycle_detection.gsql * Update cycle_detection_batch.gsql * Update cycle_detection_batch.gsql * Update cycle_detection_batch.gsql * Update embedding_cosine_similarity.gsql * Update weisfeiler_lehman.gsql * Update kcore.gsql * Update label_prop.gsql * Update lcc.gsql * Update louvain.gsql * Update map_equation.gsql * Update scc.gsql * Update scc_small_world.gsql * Update slpa.gsql * Update tri_count.gsql * Update tri_count_fast.gsql * Update wcc.gsql * Update wcc_small_world.gsql * Update wcc_small_world.gsql * Update greedy_graph_coloring.gsql * Update knn_cosine_ss.gsql * Update maximal_indep_set.gsql * Update maximal_indep_set_random.gsql * Update article_rank.gsql * Update betweenness_cent.gsql * Update closeness_cent.gsql * Update degree_cent.gsql * Update eigenvector_cent.gsql * Update harmonic_cent * Update pagerank.gsql * Update pagerank_pers.gsql * Update pagerank_pers_ap_batch.gsql * Update pagerank_wt.gsql * Update pagerank_wt.gsql * Update weighted_degree_cent.gsql * Create max_BFS_depth.gsql * Create estimate_diameter.gsql * Create maxflow.gsql * Create msf.gsql * Create mst.gsql * Create all_path.gsql * Create all_path_bidirection.gsql * Create shortest_ss_no_wt.gsql * Create shortest_ss_any_wt.gsql * Create shortest_ss_pos_wt.gsql * Create shortest_ss_pos_wt_tb.gsql * Update cosine_nbor_ss.gsql * Create cosine_nbor_ap_batch * Create jaccard_nbor_ss.gsql * Create jaccard_nbor_ap_batch.gsql * Update same_community.gsql * Create total_neighbors.gsql * Create resource_allocation * Create preferential_attachment.gsql * Create common_neighbors.gsql * Create adamic_adar * Delete mst.gsql * Delete estimate_diameter.gsql * Update cycle_detection_batch.gsql * Update max_BFS_depth.gsql * Delete shortest_ss_any_wt.gsql * Update cycle_detection_batch.gsql * Update cycle_detection_batch.gsql * Delete map_equation.gsql * Delete scc.gsql * Update tri_count.gsql * Update tri_count_fast.gsql * Delete kcore.gsql * Delete all_path.gsql * Delete all_path_bidirection.gsql * Delete maximal_indep_set.gsql * Delete maximal_indep_set_random.gsql * Update tg_algo_all_path.yml * Update tg_algo_all_path_bidirection.yml * Create all_path_bidirection.gsql * Create all_path.gsql * Delete eigenvector_cent.gsql * Delete pagerank_pers.gsql * Delete pagerank_pers_ap_batch.gsql * Delete knn_cosine_ss.gsql * Delete wcc_small_world.gsql * Delete embedding_cosine_similarity.gsql * Delete astar.gsql * Delete all_path_bidirection.gsql * Delete maxflow.gsql * Delete max_BFS_depth.gsql * Delete msf.gsql * Delete shortest_ss_no_wt.gsql * Delete shortest_ss_pos_wt.gsql * Delete shortest_ss_pos_wt_tb.gsql * Delete cosine_nbor_ss.gsql * Delete jaccard_nbor_ss.gsql * Delete common_neighbors.gsql * Delete preferential_attachment.gsql * Delete same_community.gsql * Delete total_neighbors.gsql Co-authored-by: Yiming Pan <[email protected]>
1 parent cc28461 commit 4015287

35 files changed

+3215
-7
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
CREATE TEMPLATE QUERY GDBMS_ALGO.centrality.article_rank (STRING v_type, STRING e_type,
2+
FLOAT max_change = 0.001, INT maximum_iteration = 25, FLOAT damping = 0.85, INT top_k = 100,
3+
BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "") SYNTAX V1 {
4+
5+
/*
6+
First Author: karimsaraipour
7+
First Commit Date: Oct 4, 2021
8+
9+
Recent Author: abdullaTigerGraph
10+
Recent Commit Date: Apr 4, 2022
11+
12+
13+
Repository:
14+
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Centrality
15+
16+
Maturity:
17+
Production
18+
19+
Description:
20+
Compute the article rank score for each vertex in the GRAPH
21+
In each iteration, compute a score for each vertex:
22+
score = (1-damping) + damping*average outdegree*sum(received scores FROM its neighbors/average outdegree+Outdegree).
23+
The article Rank algorithm stops when either of the following is true:
24+
a) it reaches maximum_iteration iterations;
25+
b) the max score change for any vertex compared to the last iteration <= max_change.
26+
27+
This query supports only taking in a single edge for the time being (8/13/2020).
28+
29+
Publications:
30+
https://www.emerald.com/insight/content/doi/10.1108/00012530911005544/full/html
31+
32+
TigerGraph Documentation:
33+
https://docs.tigergraph.com/graph-ml/current/centrality-algorithms/article-rank
34+
35+
Parameters:
36+
v_type:
37+
vertex types to traverse
38+
print_results:
39+
If True, print JSON output
40+
e_type:
41+
edge types to traverse
42+
result_attribute:
43+
INT attribute to store results to
44+
maximum_iteration:
45+
max #iterations
46+
file_path:
47+
file to write CSV output to
48+
top_k:
49+
#top scores to output
50+
display_edges:
51+
output edges for visualization
52+
max_change:
53+
max allowed change between iterations to achieve convergence
54+
damping:
55+
importance of traversal vs. random teleport
56+
*/
57+
58+
59+
TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score;
60+
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap;
61+
MaxAccum<FLOAT> @@max_diff = 9999; # max score change in an iteration
62+
SumAccum<FLOAT> @sum_recvd_score = 0; # sum of scores each vertex receives FROM neighbors
63+
SumAccum<FLOAT> @sum_score = 0.15; # initial score for every vertex is 0.15.
64+
SetAccum<EDGE> @@edge_Set; # list of all edges, if display is needed
65+
AvgAccum @@avg_out;
66+
SumAccum<INT> @sum_out_degree;
67+
FILE f (file_path);
68+
69+
# PageRank iterations
70+
Start = {v_type}; # Start with all vertices of specified type(s)
71+
Start = SELECT s
72+
FROM Start:s
73+
ACCUM
74+
s.@sum_out_degree += s.outdegree(e_type),
75+
@@avg_out += s.outdegree(e_type);
76+
77+
WHILE @@max_diff > max_change
78+
LIMIT maximum_iteration DO @@max_diff = 0;
79+
80+
V = SELECT s
81+
FROM Start:s -(e_type:e)- v_type:t
82+
ACCUM t.@sum_recvd_score += s.@sum_score/(@@avg_out+s.@sum_out_degree)
83+
POST-ACCUM
84+
s.@sum_score = (1.0-damping) + damping * s.@sum_recvd_score*@@avg_out,
85+
s.@sum_recvd_score = 0,
86+
@@max_diff += abs(s.@sum_score - s.@sum_score');
87+
88+
END; # END WHILE loop
89+
90+
# Output
91+
IF file_path != "" THEN
92+
f.println("Vertex_ID", "article Rank");
93+
END;
94+
95+
V = SELECT s
96+
FROM Start:s
97+
POST-ACCUM
98+
IF result_attribute != "" THEN
99+
s.setAttr(result_attribute, s.@sum_score)
100+
END,
101+
102+
IF file_path != "" THEN
103+
f.println(s, s.@sum_score)
104+
END,
105+
106+
IF print_results THEN
107+
@@top_scores_heap += Vertex_Score(s, s.@sum_score)
108+
END;
109+
110+
IF print_results THEN
111+
PRINT @@top_scores_heap;
112+
END;
113+
}
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
CREATE TEMPLATE QUERY GDBMS_ALGO.centrality.betweenness_cent(SET<STRING> v_type_set, SET<STRING> e_type_set, STRING reverse_e_type,INT max_hops = 10,
2+
INT top_k = 100, BOOL print_results = True, STRING result_attribute = "",
3+
STRING file_path = "", BOOL display_edges = FALSE) SYNTAX V1 {
4+
5+
/*
6+
First Author: karimsaraipour
7+
First Commit Date: Sep 2, 2021
8+
9+
Recent Author: Boyu Jiang
10+
Recent Commit Date: Mar 14, 2022
11+
12+
13+
Repository:
14+
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Centrality
15+
16+
Maturity:
17+
Production
18+
19+
Description:
20+
Compute Betweenness Centrality for each VERTEX.
21+
Use multi-source BFS.
22+
23+
Publications:
24+
http://www.vldb.org/pvldb/vol8/p449-then.pdf
25+
26+
TigerGraph Documentation:
27+
https://docs.tigergraph.com/graph-ml/current/centrality-algorithms/betweenness-centrality
28+
29+
Parameters:
30+
v_type_set:
31+
vertex types to traverse
32+
print_results:
33+
If True, print JSON output
34+
e_type_set:
35+
edge types to traverse
36+
result_attribute:
37+
INT attribute to store results to
38+
reverse_e_type:
39+
reverse edge type in directed graph, in undirected graph set reverse_e_type=e_type_set
40+
max_hops:
41+
look only this far from each vertex
42+
file_path:
43+
file to write CSV output to
44+
top_k:
45+
report only this many top scores
46+
display_edges:
47+
If True, output edges for visualization
48+
*/
49+
50+
TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score; #tuple to store betweenness centrality score
51+
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap; #heap to store top K score
52+
SumAccum<INT> @@sum_curr_dist; #current distance
53+
BitwiseOrAccum @bitwise_or_visit_next; #use bitwise instead of setAccum
54+
BitwiseOrAccum @bitwise_or_seen;
55+
BitwiseOrAccum @bitwise_or_visit;
56+
SumAccum<INT> @@sum_count = 1;#used to set unique ID
57+
SumAccum<INT> @sum_id; #store the unique ID
58+
SetAccum<INT> @@batch_set; #used to set unique ID
59+
MapAccum<INT,INT> @@map; #used to set unique ID
60+
SetAccum<EDGE> @@edge_set;
61+
SumAccum<FLOAT> @sum_delta = 0;
62+
MapAccum<INT,BitwiseOrAccum> @times_map;
63+
MapAccum<INT,SumAccum<INT>> @sigma_map;
64+
65+
INT empty=0;
66+
FILE f (file_path);
67+
INT num_vert;
68+
INT batch_number;
69+
70+
# Compute betweenness
71+
all = {v_type_set};
72+
num_vert = all.size();
73+
batch_number = num_vert/60;
74+
75+
IF batch_number == 0 THEN
76+
batch_number = 1;
77+
END;
78+
79+
#Calculate the sum of distance to other vertex for each vertex
80+
FOREACH i IN RANGE[0, batch_number-1] DO
81+
Current = SELECT s
82+
FROM all:s
83+
WHERE getvid(s)%batch_number == i
84+
POST-ACCUM
85+
@@map+=(getvid(s)->0),
86+
@@batch_set+=getvid(s);
87+
88+
FOREACH ver in @@batch_set DO
89+
@@map += (ver->@@sum_count); @@sum_count += 1;
90+
END; #set a unique ID for each vertex, ID from 1-63
91+
92+
Start = SELECT s
93+
FROM Current:s
94+
POST-ACCUM
95+
s.@sum_id=@@map.get(getvid(s));
96+
97+
Start = SELECT s
98+
FROM Current:s
99+
POST-ACCUM
100+
s.@bitwise_or_seen = 1<<s.@sum_id,
101+
s.@bitwise_or_visit = s.@bitwise_or_seen,
102+
s.@sigma_map += (0->1),
103+
s.@times_map += (0->s.@bitwise_or_visit); # set initial seen and visit
104+
105+
@@batch_set.clear();
106+
@@map.clear();
107+
@@sum_count=0;
108+
109+
WHILE (Start.size() > 0) LIMIT max_hops DO
110+
@@sum_curr_dist+=1;
111+
112+
Start = SELECT t
113+
FROM Start:s -(reverse_e_type:e)-v_type_set:t
114+
WHERE s.@bitwise_or_visit&-t.@bitwise_or_seen-1>0 AND s!=t #use -t.@seen-1 to get the trverse of t.@seen
115+
ACCUM #updatevisitNext
116+
INT c = s.@bitwise_or_visit&-t.@bitwise_or_seen-1,
117+
IF c>0 THEN
118+
t.@bitwise_or_visit_next+=c,
119+
t.@bitwise_or_seen+=c
120+
END,
121+
t.@sigma_map+=(@@sum_curr_dist->s.@sigma_map.get(@@sum_curr_dist-1)) #set sigma based on depth
122+
POST-ACCUM
123+
t.@bitwise_or_visit=t.@bitwise_or_visit_next,
124+
t.@times_map+=(@@sum_curr_dist->t.@bitwise_or_visit),
125+
t.@bitwise_or_visit_next=0;
126+
END;
127+
128+
@@sum_curr_dist+=-1;
129+
130+
Start = SELECT s
131+
FROM all:s
132+
WHERE s.@sigma_map.get(@@sum_curr_dist)!=0;
133+
134+
WHILE (Start.size()>0) LIMIT max_hops DO
135+
@@sum_curr_dist+=-1;
136+
Start = SELECT t
137+
FROM Start:s -(reverse_e_type:e)- v_type_set:t
138+
WHERE t.@times_map.get(@@sum_curr_dist)&s.@times_map.get(@@sum_curr_dist+1)!=0
139+
ACCUM
140+
FLOAT currValue=t.@sigma_map.get(@@sum_curr_dist)/(s.@sigma_map.get(@@sum_curr_dist+1)*(1+s.@sum_delta)),
141+
INT r=t.@times_map.get(@@sum_curr_dist)&s.@times_map.get(@@sum_curr_dist+1),
142+
INT plus=0,
143+
WHILE r>0 DO
144+
r=r&(r-1),plus=plus+1 #count how many 1 in the number, same as setAccum,size()
145+
END,
146+
FLOAT value = currValue*plus/2.0,
147+
t.@sum_delta+=value;
148+
149+
Start = SELECT s
150+
FROM all:s
151+
WHERE s.@sigma_map.get(@@sum_curr_dist)!=0;
152+
END;
153+
154+
@@sum_curr_dist=0;
155+
Start = SELECT s
156+
FROM all:s
157+
POST-ACCUM
158+
s.@bitwise_or_seen=0,
159+
s.@bitwise_or_visit=0,
160+
s.@sigma_map.clear(),
161+
s.@times_map.clear();
162+
END;
163+
164+
#Output
165+
IF file_path != "" THEN
166+
f.println("Vertex_ID", "Betweenness");
167+
END;
168+
169+
Start = SELECT s
170+
FROM all:s
171+
POST-ACCUM
172+
IF result_attribute != "" THEN
173+
s.setAttr(result_attribute, s.@sum_delta)
174+
END,
175+
176+
IF print_results THEN
177+
@@top_scores_heap += Vertex_Score(s, s.@sum_delta)
178+
END,
179+
180+
IF file_path != "" THEN
181+
f.println(s, s.@sum_delta)
182+
END;
183+
184+
IF print_results THEN
185+
PRINT @@top_scores_heap AS top_scores;
186+
187+
IF display_edges THEN
188+
PRINT Start[Start.@sum_delta];
189+
190+
Start = SELECT s
191+
FROM Start:s -(e_type_set:e)-:t
192+
ACCUM
193+
@@edge_set += e;
194+
195+
PRINT @@edge_set;
196+
197+
END;
198+
END;
199+
200+
}

GDBMS_ALGO/centrality/closeness.gsql

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)