1
- CREATE TEMPLATE QUERY GDBMS_ALGO.community.label_prop (SET<STRING> v_type_set, SET<STRING> e_type_set, INT maximum_iteration, INT print_limit,
2
- BOOL print_results = TRUE, STRING file_path = "", STRING result_attribute = "") SYNTAX V1 {
3
-
1
+ CREATE TEMPLATE QUERY GDBMS_ALGO.community.label_prop(
2
+ SET<STRING> v_type_set,
3
+ SET<STRING> e_type_set,
4
+ UINT maximum_iteration = 10,
5
+ UINT sample_edge_num = 1000,
6
+ UINT batch_num = 12,
7
+ INT print_limit,
8
+ BOOL print_results = TRUE,
9
+ STRING result_attribute = "",
10
+ STRING file_path=""
11
+ ) FOR GRAPH MyGraph SYNTAX V1 {
4
12
5
13
/*
6
- First Author: <First Author Name>
7
- First Commit Date: <First Commit Date>
8
-
9
- Recent Author: <Recent Commit Author Name>
10
- Recent Commit Date: <Recent Commit Date>
14
+
15
+ First Commit Date: 2024-07-15
11
16
17
+
18
+ Recent Commit Date: 2024-07-16
12
19
13
20
Repository:
14
21
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Community
@@ -17,89 +24,164 @@ CREATE TEMPLATE QUERY GDBMS_ALGO.community.label_prop (SET<STRING> v_type_set, S
17
24
Production
18
25
19
26
Description:
20
- Partition the vertices into communities, according to the Label Propagation method.
21
- Indicate community membership by assigning each vertex a community ID.
22
-
23
- Publications:
24
- NA
27
+ This query partitions vertices into communities using the Label Propagation method.
28
+ It assigns a community ID to each vertex based on its neighbors' community IDs.
25
29
26
30
TigerGraph Documentation:
27
31
https://docs.tigergraph.com/graph-ml/current/community-algorithms/label-propagation
28
32
29
33
Parameters:
30
34
v_type_set:
31
- Names of vertex types to use
35
+ The set of vertex types to traverse.
32
36
e_type_set:
33
- Names of edge types to use
37
+ The set of edge types to traverse.
34
38
maximum_iteration:
35
- Number of maximum iteration of the algorithm
39
+ The maximum number of iterations for the algorithm.
40
+ sample_edge_num:
41
+ The number of edges to sample for super nodes.
42
+ batch_num:
43
+ The number of batches. Using batches reduces memory consumption.
36
44
print_limit:
37
- If >=0, max number of vertices to output to JSON.
45
+ If >= 0, the maximum number of vertices to output to JSON.
38
46
print_results:
39
- If True, output JSON to standard output
47
+ If True, output JSON to standard output. WARNING: Avoid printing results for large datasets.
40
48
result_attribute:
41
- If not empty, store community id values (INT) to this attribute
49
+ If not empty, store community ID values (INT) in this attribute.
42
50
file_path:
43
- If not empty, write output to this file .
51
+ File to write CSV output to.
44
52
*/
45
53
46
- OrAccum @@or_changed = true;
47
- MapAccum<INT, INT> @map; # <communityId, numNeighbors>
48
- MapAccum<INT, INT> @@comm_sizes_map; # <communityId, members>
49
- SumAccum<INT> @sum_label, @sum_num;
50
- FILE f (file_path);
51
- Start = {v_type_set};
54
+ TYPEDEF TUPLE <DOUBLE score, VERTEX community> MoveScore;
55
+ MinAccum<VERTEX> @community_id; // Community ID of the node
56
+ SumAccum<INT> @vid; // Vertex's internal ID
57
+ SumAccum<INT> @batch_id; // Batch ID for the node
58
+ SumAccum<INT> @degree; // Outdegree of the node
59
+ SumAccum<INT> @@vertex_num; // Total number of vertices
60
+ MapAccum<VERTEX, SumAccum<DOUBLE>> @community_k_in_map; // Number of neighbors belonging to each community
61
+ MaxAccum<MoveScore> @best_move; // Best move for the node with the highest score
62
+ MaxAccum<DOUBLE> @@min_double; // Used to reset the @best_move
63
+ OrAccum @to_change_community; // Flag to check if the node needs to change community
64
+ MapAccum<VERTEX, INT> @@comm_sizes_map; // Map: community ID -> size of the community
65
+ FILE f(file_path); // File to write results to
66
+
67
+ // Initialization
68
+ All_Nodes = {v_type_set};
69
+ Tmp = SELECT s
70
+ FROM All_Nodes:s -(e_type_set:e)- :t
71
+ POST-ACCUM
72
+ s.@community_id = s,
73
+ s.@vid = getvid(s),
74
+ s.@batch_id = s.@vid % batch_num,
75
+ s.@degree = s.outdegree(e_type_set);
76
+ @@vertex_num = All_Nodes.size();
77
+ @@vertex_num = @@vertex_num / batch_num;
78
+
79
+ // Label propagation
80
+ INT hop = 0;
81
+ Candidates = All_Nodes;
82
+ WHILE Candidates.size() > 0 AND hop < maximum_iteration DO
83
+ hop = hop + 1;
84
+ // Find the best move
85
+ IF hop == 1 THEN // First iteration
86
+ ChangedNodes = SELECT s
87
+ FROM Candidates:s -(e_type_set:e)- :t
88
+ WHERE s.@degree < t.@degree
89
+ ACCUM s.@best_move += MoveScore(t.@degree, t.@community_id)
90
+ POST-ACCUM
91
+ IF s.@best_move.community != s.@community_id THEN
92
+ s.@to_change_community = TRUE
93
+ END
94
+ HAVING s.@to_change_community == TRUE;
95
+ ELSE // Remaining iterations
96
+ IF Candidates.size() < @@vertex_num OR batch_num == 1 THEN // No batch processing
97
+ ChangedNodes = SELECT s
98
+ FROM Candidates:s -(e_type_set:e)- :t
99
+ SAMPLE sample_edge_num EDGE WHEN s.outdegree(e_type_set) > sample_edge_num
100
+ ACCUM s.@community_k_in_map += (t.@community_id -> 1)
101
+ POST-ACCUM
102
+ s.@best_move = MoveScore(@@min_double, s), // Reset best move
103
+ FOREACH (community_id, k_in) IN s.@community_k_in_map DO
104
+ s.@best_move += MoveScore(k_in, community_id)
105
+ END,
106
+ IF s.@best_move.community != s.@community_id THEN
107
+ s.@to_change_community = TRUE
108
+ END,
109
+ s.@community_k_in_map.clear()
110
+ HAVING s.@to_change_community == TRUE;
111
+ ELSE // Use batch processing
112
+ ChangedNodes = {};
113
+ FOREACH batch_id IN RANGE[0, batch_num-1] DO
114
+ Nodes = SELECT s
115
+ FROM Candidates:s
116
+ WHERE s.@batch_id == batch_id;
117
+ Nodes = SELECT s
118
+ FROM Nodes:s -(e_type_set:e)- :t
119
+ SAMPLE sample_edge_num EDGE WHEN s.outdegree(e_type_set) > sample_edge_num
120
+ ACCUM s.@community_k_in_map += (t.@community_id -> 1)
121
+ POST-ACCUM
122
+ s.@best_move = MoveScore(@@min_double, s), // Reset best move
123
+ FOREACH (community_id, k_in) IN s.@community_k_in_map DO
124
+ s.@best_move += MoveScore(k_in, community_id)
125
+ END,
126
+ IF s.@best_move.community != s.@community_id THEN
127
+ s.@to_change_community = TRUE
128
+ END,
129
+ s.@community_k_in_map.clear()
130
+ HAVING s.@to_change_community == TRUE;
131
+ ChangedNodes = ChangedNodes UNION Nodes;
132
+ END;
133
+ END;
134
+ END;
135
+
136
+ // Handle nodes that swap communities
137
+ SwapNodes = SELECT s
138
+ FROM ChangedNodes:s -(e_type_set:e)- :t
139
+ WHERE s.@best_move.community == t.@community_id
140
+ AND t.@to_change_community == TRUE
141
+ AND t.@best_move.community == s.@community_id
142
+ AND (s.@best_move.score < t.@best_move.score
143
+ OR (abs(s.@best_move.score - t.@best_move.score) < 0.00000000001
144
+ AND s.@vid > t.@vid))
145
+ POST-ACCUM
146
+ s.@to_change_community = FALSE;
147
+ ChangedNodes = ChangedNodes MINUS SwapNodes;
52
148
53
- # Assign unique labels to each vertex
54
- Start = SELECT s
55
- FROM Start:s
56
- ACCUM s.@sum_label = getvid(s);
149
+ // Update community IDs
150
+ ChangedNodes = SELECT s
151
+ FROM ChangedNodes:s
152
+ POST-ACCUM
153
+ s.@community_id = s.@best_move.community,
154
+ s.@to_change_community = FALSE;
57
155
58
- # Propagate labels to neighbors until labels converge or the max iterations is reached
59
- WHILE @@or_changed == true LIMIT maximum_iteration DO
60
- @@or_changed = false;
61
- Start = SELECT s
62
- FROM Start:s -(e_type_set:e)- :t
63
- ACCUM t.@map += (s.@sum_label -> 1) # count the occurrences of neighbor's labels
64
- POST-ACCUM
65
- INT max_v = 0,
66
- INT label = 0,
67
- # Iterate over the map to get the neighbor label that occurs most often
68
- FOREACH (k,v) IN t.@map DO
69
- CASE WHEN v > max_v THEN
70
- max_v = v,
71
- label = k
72
- END
73
- END,
74
- # When the neighbor search finds a label AND it is a new label
75
- # AND the label's count has increased, update the label.
76
- CASE WHEN label != 0 AND t.@sum_label != label AND max_v > t.@sum_num THEN
77
- @@or_changed += true,
78
- t.@sum_label = label,
79
- t.@sum_num = max_v
80
- END,
81
-
82
- END;
156
+ // Find candidates for the next iteration
157
+ Candidates = SELECT t
158
+ FROM ChangedNodes:s -(e_type_set:e)- :t
159
+ WHERE t.@community_id != s.@community_id;
160
+ END;
83
161
84
- Start = {v_type_set};
85
- Start = SELECT s
86
- FROM Start:s
87
- POST-ACCUM
88
- IF result_attribute != "" THEN
89
- s.setAttr(result_attribute, s.@sum_label)
90
- END,
91
-
92
- IF file_path != "" THEN
93
- f.println(s, s.@sum_label)
94
- END,
95
-
96
- IF print_results THEN
97
- @@comm_sizes_map += (s.@sum_label -> 1)
98
- END
99
- LIMIT print_limit;
162
+ // Output results
163
+ Nodes = SELECT s
164
+ FROM All_Nodes:s
165
+ POST-ACCUM
166
+ IF result_attribute != "" THEN
167
+ s.setAttr(result_attribute, getvid(s.@community_id))
168
+ END,
169
+ IF print_results THEN
170
+ @@comm_sizes_map += (s.@community_id -> 1)
171
+ END,
172
+ IF file_path != "" THEN
173
+ IF v_type_set.size() == 1 THEN
174
+ f.println(s.id, s.@community_id)
175
+ ELSE
176
+ VERTEX node = s.@community_id,
177
+ f.println(s.type, s, node.type, node)
178
+ END
179
+ END
180
+ LIMIT print_limit;
100
181
101
- IF print_results THEN
182
+ // Print results if print_results is True
183
+ IF print_results THEN
102
184
PRINT @@comm_sizes_map;
103
- PRINT Start[Start.@sum_label ];
104
- END;
185
+ PRINT Nodes[Nodes.@community_id ];
186
+ END;
105
187
}
0 commit comments