-
Notifications
You must be signed in to change notification settings - Fork 72
/
Copy pathtg_louvain.gsql
246 lines (223 loc) · 9.03 KB
/
tg_louvain.gsql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
CREATE DISTRIBUTED QUERY tg_louvain(SET<STRING> v_type_set, SET<STRING> e_type_set, STRING weight_attribute = "weight", INT maximum_iteration = 10,
STRING result_attribute = "cid", STRING file_path = "", BOOL print_stats = FALSE) SYNTAX V1 {
/*
First Author: <First Author Name>
First Commit Date: <First Commit Date>
Recent Author: <Recent Commit Author Name>
Recent Commit Date: <Recent Commit Date>
Repository:
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Community
Maturity:
Production
Description:
louvain community detection algorithm
add keyword DISTRIBUTED for cluster environment
Publications:
NA
TigerGraph Documentation:
https://docs.tigergraph.com/graph-ml/current/community-algorithms/louvain
Parameters:
v_type_set:
vertex types to traverse
e_type_set:
edge types to traverse
weight_attribute:
attribute name for edge weights (use an empty string if the graph is unweighted)
* note
when there is a weight attribute mismatch, there may not be an explicit error message
maximum_iteration:
maximum iteration of louvain optimization
result_attribute:
attribute name to assign community id results to; use empty string to skip
file_path:
file path to write CSV output to; use empty string to skip
print_stats:
If True, print louvain execution info
*/
TYPEDEF TUPLE <FLOAT deltaQ, FLOAT weight, VERTEX cc> move;
SumAccum<FLOAT> @sum_ac; #sum of the degrees of all the vertices in community C of the vertex
ListAccum<VERTEX> @cc_list; #the community center
SumAccum<FLOAT> @sum_weight; # total weight incident to this vertex
SumAccum<FLOAT> @sum_cc_weight; # total weight incident to the cc vertex
MapAccum<VERTEX,SumAccum<FLOAT>> @A_map; #A[c]: sum of the edge weights for the edges in community c
MaxAccum<move> @max_best_move; # highest dQ, highest -Outdegree, highest cc
ListAccum<VERTEX> @cm_list; #community member list
SumAccum<FLOAT> @@sum_m; # total edge weight
SumAccum<INT> @sum_outdegree; # helper variable for outdegree calculation
SumAccum<INT> @@sum_cc_change;
MapAccum<INT, SumAccum<INT>> @@community_map;
MapAccum<INT, SumAccum<INT>> @@community_size_count;
FILE f(file_path);
// initialize
Start = {v_type_set};
Start = SELECT s
FROM Start:s -(e_type_set:e)- :t
ACCUM
@@sum_m += e.getAttr(weight_attribute, "FLOAT")*0.5,
s.@sum_weight += e.getAttr(weight_attribute, "FLOAT")*1.0,
s.@sum_cc_weight += e.getAttr(weight_attribute, "FLOAT")*1.0,
s.@sum_outdegree += 1
// mark @cc only for vertices with more than 1 neighbors
// and only the marked vertices will participate in the actual louvain algorithm
// the unmorked vertices will be resolved by the vertex following heuristic
POST-ACCUM
IF s.@sum_outdegree > 1 THEN
s.@cc_list += s
END;
IF print_stats THEN
PRINT Start.size() AS AllVertexCount;
END;
// special @cc update in the first iteration
Start = SELECT t
FROM Start:s -(e_type_set:e)- :t
WHERE s.@sum_outdegree > 1 AND t.@sum_outdegree > 1
ACCUM
t.@max_best_move += move(e.getAttr(weight_attribute, "FLOAT")*1.0 + @@sum_m*t.@sum_weight *
(t.@sum_weight - s.@sum_weight), -s.@sum_cc_weight, s.@cc_list.get(0))
POST-ACCUM
IF t.@max_best_move.deltaQ > 0 THEN
IF -t.@max_best_move.weight < t.@sum_cc_weight THEN
t.@cc_list.clear(),
t.@cc_list += t.@max_best_move.cc,
t.@sum_cc_weight = -t.@max_best_move.weight,
@@sum_cc_change += 1
ELSE
IF -t.@max_best_move.weight == t.@sum_cc_weight AND getvid(t) < getvid(t.@max_best_move.cc) THEN
t.@cc_list.clear(),
t.@cc_list += t.@max_best_move.cc,
t.@sum_cc_weight = -t.@max_best_move.weight,
@@sum_cc_change += 1
END
END
END;
IF print_stats THEN
PRINT @@sum_cc_change AS InitChangeCount;
END;
// main loop
WHILE @@sum_cc_change > 0 LIMIT maximum_iteration DO
// initialize for iteration
@@sum_cc_change = 0;
Start = SELECT s
FROM Start:s
WHERE s.@sum_outdegree > 1
POST-ACCUM
s.@sum_ac = 0,
s.@cm_list.clear(),
s.@A_map.clear();
Start = SELECT s
FROM Start:s
ACCUM
FOREACH v IN s.@cc_list DO
CASE WHEN getvid(v) != -1 THEN
v.@cm_list += s
END
END;
Start = SELECT s
FROM Start:s -(e_type_set:e)- :t
WHERE t.@sum_outdegree > 1
ACCUM
s.@A_map += (t.@cc_list.get(0) -> e.getAttr(weight_attribute, "FLOAT")*1.0);
Start = SELECT s
FROM Start:s
ACCUM
FOREACH v IN s.@cc_list DO
CASE WHEN getvid(v) != -1 THEN
v.@sum_ac += s.@sum_weight
END
END;
Start = SELECT s
FROM Start:s
ACCUM
FOREACH v IN s.@cm_list DO
CASE WHEN getvid(v) != -1 THEN
v.@sum_ac = s.@sum_ac
END
END;
// compute @max_dQ
Start = SELECT s
FROM Start:s -(e_type_set:e)- :t
WHERE t.@sum_outdegree > 1
ACCUM
INT A_s = 0,
IF s.@A_map.containsKey(s) THEN
A_s = s.@A_map.get(s)
END,
s.@max_best_move += move(s.@A_map.get(t.@cc_list.get(0)) - A_s +
1/@@sum_m*s.@sum_weight*(s.@sum_ac-t.@sum_ac), -t.@sum_cc_weight, t.@cc_list.get(0))
POST-ACCUM
IF s.@max_best_move.deltaQ > 0 THEN
IF -s.@max_best_move.weight < s.@sum_cc_weight THEN // smallest best_move weight < current weight
s.@cc_list.clear(),
s.@cc_list += s.@max_best_move.cc,
s.@sum_cc_weight = -s.@max_best_move.weight,
@@sum_cc_change += 1
ELSE
IF -s.@max_best_move.weight == s.@sum_cc_weight AND getvid(s.@cc_list.get(0)) < getvid(s.@max_best_move.cc) THEN
s.@cc_list.clear(),
s.@cc_list += s.@max_best_move.cc,
s.@sum_cc_weight = -s.@max_best_move.weight,
@@sum_cc_change += 1
END
END
END;
IF print_stats THEN
PRINT @@sum_cc_change AS IterChangeCount;
END;
END;
// process node with outdegree=1
// follow the vertex to its neighbor's community
// if the neighbor also have outdegree=1, mark the two vertices as one community
Start = {v_type_set};
Start = SELECT s
FROM Start:s -(e_type_set:e)- :t
WHERE s.@sum_outdegree == 1 AND t.@sum_outdegree != 1
ACCUM
s.@cc_list += t.@cc_list.get(0);
IF print_stats THEN
PRINT Start.size() AS VertexFollowedToCommunity;
END;
Start = {v_type_set};
Start = SELECT s
FROM Start:s -(e_type_set:e)- :t
WHERE s.@sum_outdegree == 1 AND t.@sum_outdegree == 1
ACCUM
IF getvid(s) <= getvid(t) THEN
s.@cc_list += s
ELSE
s.@cc_list += t
END;
IF print_stats THEN
PRINT Start.size() AS VertexFollowedToVertex;
END;
// process node with outdegree=0
// assign them to communities containing only itself
Start = {v_type_set};
Start = SELECT s
FROM Start:s
WHERE s.@sum_outdegree == 0
ACCUM
s.@cc_list += s;
IF print_stats THEN
PRINT Start.size() AS VertexAssignedToItself;
END;
// save result
Start = {v_type_set};
Start = SELECT s
FROM Start:s
POST-ACCUM
IF result_attribute != "" THEN
s.setAttr(result_attribute, getvid(s.@cc_list.get(0)))
END,
IF file_path != "" THEN
f.println(s, getvid(s.@cc_list.get(0)))
END;
// print result satistic
IF print_stats THEN
Start = SELECT s
FROM Start:s
WHERE s.@cc_list.size() > 0
POST-ACCUM
@@community_map += (getvid(s.@cc_list.get(0)) -> 1);
PRINT @@community_map.size() AS FinalCommunityCount;
END;
}