-
Notifications
You must be signed in to change notification settings - Fork 72
/
Copy pathtg_harmonic_cent.gsql
169 lines (148 loc) · 5.66 KB
/
tg_harmonic_cent.gsql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
CREATE DISTRIBUTED QUERY tg_harmonic_cent(SET<STRING> v_type_set, SET<STRING> e_type_set, SET<STRING> reverse_e_type_set,INT max_hops = 10,
INT top_k = 100, BOOL wf = TRUE, BOOL print_results = True, STRING result_attribute = "",
STRING file_path = "", BOOL display_edges = FALSE) SYNTAX V1 {
/*
First Author: <First Author Name>
First Commit Date: <First Commit Date>
Recent Author: <Recent Commit Author Name>
Recent Commit Date: <Recent Commit Date>
Repository:
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Centrality
Maturity:
Production
Description:
Compute Harmonic Centrality for each VERTEX.
Use multi-source BFS.
Publications:
https://arxiv.org/pdf/cond-mat/0008357.pdf
TigerGraph Documentation:
https://docs.tigergraph.com/graph-ml/current/centrality-algorithms/harmonic-centrality
Parameters:
v_type_set:
vertex types to traverse
print_results:
If True, print JSON output
e_type_set:
edge types to traverse
result_attribute:
INT attribute to store results to
reverse_e_type_set:
reverse edge type in directed graph, in undirected graph set reverse_e_type_set=e_type_set
max_hops:
look only this far from each vertex
file_path:
file to write CSV output to
top_k:
report only this many top scores
display_edges:
If True, output edges for visualization
wf:
If True, enable Wasserman and Faust normalization factor for multi-component graphs
*/
TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score; #tuple to store harmonic centrality score
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap; #heap to store top K score
SumAccum<INT> @@sum_curr_dist; #current distance
BitwiseOrAccum @bitwise_or_visit_next; #use bitwise instead of setAccum
SumAccum<FLOAT> @sum_res; #Result, sum of distance
SumAccum<INT> @sum_size; #get graph size
SumAccum<FLOAT> @sum_score;
BitwiseOrAccum @bitwise_or_seen;
BitwiseOrAccum @bitwise_or_visit;
SumAccum<INT> @@sum_count = 1;#used to set unique ID
SumAccum<INT> @sum_id; #store the unique ID
SetAccum<INT> @@batch_set; #used to set unique ID
MapAccum<INT,INT> @@map; #used to set unique ID
SetAccum<EDGE> @@edge_set;
INT empty=0;
FILE f (file_path);
INT num_vert;
INT batch_number;
# Compute harmonic
all = {v_type_set};
num_vert = all.size();
batch_number = num_vert/60;
IF batch_number==0 THEN
batch_number=1;
END;
#Calculate the sum of distance to other vertex for each vertex
FOREACH i IN RANGE[0, batch_number-1] DO
Start = SELECT s
FROM all:s
WHERE getvid(s)%batch_number == i
POST-ACCUM @@map+=(getvid(s)->0),
@@batch_set+=getvid(s);
FOREACH ver in @@batch_set DO
@@map+=(ver->@@sum_count); @@sum_count+=1;
END; #set a unique ID for each vertex, ID from 1-63
Start = SELECT s
FROM Start:s
POST-ACCUM s.@sum_id=@@map.get(getvid(s));
Start = Select s
FROM Start:s
POST-ACCUM s.@bitwise_or_seen=1<<s.@sum_id,
s.@bitwise_or_visit=1<<s.@sum_id; # set initial seen and visit s.@seen1 s.@seen2
@@batch_set.clear();
@@map.clear();
@@sum_count=0;
WHILE (Start.size() > 0) LIMIT max_hops DO
@@sum_curr_dist+=1;
Start = SELECT t
FROM Start:s -(reverse_e_type_set:e)-v_type_set:t
WHERE s.@bitwise_or_visit&-t.@bitwise_or_seen-1>0 and s!=t #use -t.@seen-1 to get the trverse of t.@seen
ACCUM
INT c = s.@bitwise_or_visit&-t.@bitwise_or_seen-1,
IF c>0 THEN
t.@bitwise_or_visit_next+=c,
t.@bitwise_or_seen+=c
END
POST-ACCUM
t.@bitwise_or_visit=t.@bitwise_or_visit_next,
INT r = t.@bitwise_or_visit_next,
WHILE r>0 DO
r=r&(r-1),
t.@sum_res+=1.0/@@sum_curr_dist*1.0,
t.@sum_size+=1 #count how many 1 in the number, same as setAccum,size()
END,
t.@bitwise_or_visit_next=0;
END;
@@sum_curr_dist=0;
Start = SELECT s
FROM all:s
POST-ACCUM s.@bitwise_or_seen=0,
s.@bitwise_or_visit=0;
END;
Start = SELECT s
FROM all:s
# Calculate harmonic Centrality for each vertex
WHERE s.@sum_res>0
POST-ACCUM
IF wf THEN
s.@sum_score = s.@sum_res*1.0/s.@sum_size*1.0
ELSE
s.@sum_score = s.@sum_res*1.0
END,
IF result_attribute != "" THEN
s.setAttr(result_attribute, s.@sum_score)
END,
IF print_results THEN
@@top_scores_heap += Vertex_Score(s, s.@sum_score)
END,
IF file_path != "" THEN
f.println(s, s.@sum_score)
END;
#test
#Output
IF file_path != "" THEN
f.println("Vertex_ID", "Harmonic");
END;
IF print_results THEN
PRINT @@top_scores_heap AS top_scores;
IF display_edges THEN
PRINT Start[Start.@sum_score];
Start = SELECT s
FROM Start:s -(e_type_set:e)-:t
ACCUM @@edge_set += e;
PRINT @@edge_set;
END;
END;
}