-
Notifications
You must be signed in to change notification settings - Fork 72
/
Copy pathtri_count_small_world.gsql
206 lines (179 loc) · 8.18 KB
/
tri_count_small_world.gsql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count_small_world(
SET<STRING> v_type_set,
SET<STRING> e_type_set,
UINT supernode_min_degree = 100000,
UINT threshold = 100000
) SYNTAX V1 {
/*
First Author: [email protected]
First Commit Date: 2024-07-18
Recent Author: [email protected]
Recent Commit Date: 2024-07-18
Repository:
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Community
Maturity:
Production
Description:
This query computes the total number of triangles in the graph.
It is optimized for small-world graphs to save memory.
Publications:
NA
TigerGraph Documentation:
https://docs.tigergraph.com/graph-ml/current/community-algorithms/triangle-counting
Parameters:
v_type_set:
The set of vertex types to traverse.
e_type_set:
The set of edge types to traverse.
supernode_min_degree:
The minimum degree for a vertex to be considered a supernode.
The default value is 100000.
threshold:
The threshold for choosing initial pivot vertices. Only vertices whose product of indegree
and outdegree exceeds this threshold will be considered candidates for the pivot vertex.
The default value is 100000.
*/
SumAccum<INT> @sum_outdegree; // Accumulator for the outdegree
SumAccum<INT> @sum_indegree; // Accumulator for the indegree
SumAccum<INT> @sum_degree_product; // Accumulator for the product of outdegree and indegree
OrAccum @or_in_frontier; // Flag to check if the vertex is in the frontier
OrAccum @or_tri_counted; // Flag to check if all triangles that contain the vertex have been counted
OrAccum @or_is_neighbor; // Flag to check if the vertex is a neighbor of nodes in the frontier
SetAccum<VERTEX> @set_nodes_in_frontier; // Set of nodes in the frontier
SumAccum<INT> @@sum_tri_count; // The count of triangles
// -------------------- 1. Initialization --------------------
// Calculate the product of indegree and outdegree,
// and filter vertices with a product no less than the threshold
AllNodes = {v_type_set};
PivotCandidates = SELECT s
FROM AllNodes:s-(e_type_set)-v_type_set:t
WHERE s != t
ACCUM s.@sum_outdegree += 1,
s.@sum_indegree += 1
POST-ACCUM (s)
s.@sum_degree_product = s.@sum_indegree * s.@sum_outdegree
HAVING s.@sum_degree_product >= threshold;
// -------------------- 2. Handle the supernodes --------------------
// Count the number of triangles for supernodes
SuperNodes = SELECT s
FROM PivotCandidates:s
WHERE s.@sum_outdegree >= supernode_min_degree;
WHILE SuperNodes.size() > 0 DO
// Select some supernodes and set them as frontier
Nodes = SELECT s
FROM SuperNodes:s
LIMIT 10;
Nodes = SELECT s
FROM Nodes:s
POST-ACCUM (s)
s.@or_in_frontier += TRUE;
// Find neighbors of nodes in the frontier
Neighbors = SELECT t
FROM Nodes:s-(e_type_set)-v_type_set:t
WHERE s != t
AND t.@or_tri_counted == FALSE // Don't visit nodes whose all triangles have been counted
AND (t.@or_in_frontier == FALSE // Neighbor not in the frontier
OR getvid(s) > getvid(t)) // If neighbor is in the frontier, only consider one direction
ACCUM t.@set_nodes_in_frontier += s
POST-ACCUM (t)
t.@or_is_neighbor += TRUE;
// Calculate the number of triangles involving nodes in the frontier
Tmp = SELECT s
FROM Neighbors:s-(e_type_set)-v_type_set:t
WHERE getvid(s) > getvid(t) // Traverse only one direction of the undirected edge
AND t.@or_is_neighbor == TRUE
ACCUM @@sum_tri_count += COUNT(s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier);
// Reset variables for nodes in the frontier
Nodes = SELECT s
FROM Nodes:s
POST-ACCUM (s)
s.@or_in_frontier = FALSE,
s.@or_tri_counted = TRUE;
// Reset variables for the next iteration
Neighbors = SELECT s
FROM Neighbors:s
POST-ACCUM (s)
s.@or_is_neighbor = FALSE,
s.@set_nodes_in_frontier.clear();
// Remove visited vertices from the SuperNodes set
SuperNodes = SuperNodes MINUS Nodes;
PivotCandidates = PivotCandidates MINUS Nodes;
END;
// -------------------- 3. Handle nodes in large WCCs --------------------
// Count the number of triangles for nodes in large WCCs
WHILE PivotCandidates.size() > 0 DO
// Select the initial pivot vertex with the largest product of indegree and outdegree
Nodes = SELECT s
FROM PivotCandidates:s
ORDER BY s.@sum_degree_product DESC
LIMIT 1;
Nodes = SELECT s
FROM Nodes:s
POST-ACCUM (s)
s.@or_in_frontier += TRUE;
// Use BFS to find all elements in its connected component
WHILE Nodes.size() > 0 DO
// Find neighbors of nodes in the frontier
Neighbors = SELECT t
FROM Nodes:s-(e_type_set)-v_type_set:t
WHERE s != t
AND t.@or_tri_counted == FALSE // Don't visit nodes whose all triangles have been counted
AND (t.@or_in_frontier == FALSE // Neighbor not in the frontier
OR getvid(s) > getvid(t)) // If neighbor is in the frontier, only consider one direction
ACCUM t.@set_nodes_in_frontier += s
POST-ACCUM (t)
t.@or_is_neighbor += TRUE;
// Calculate the number of triangles involving nodes in the frontier
Tmp = SELECT s
FROM Neighbors:s-(e_type_set)-v_type_set:t
WHERE getvid(s) > getvid(t) // Traverse only one direction of the undirected edge
AND t.@or_is_neighbor == TRUE
ACCUM @@sum_tri_count += COUNT(s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier);
// Reset variables for nodes in the frontier
Nodes = SELECT s
FROM Nodes:s
POST-ACCUM (s)
s.@or_in_frontier = FALSE,
s.@or_tri_counted = TRUE;
// Reset variables for the next iteration
Neighbors = SELECT s
FROM Neighbors:s
POST-ACCUM (s)
s.@or_is_neighbor = FALSE,
s.@set_nodes_in_frontier.clear();
// Use BFS to visit the next frontier
Nodes = SELECT t
FROM Nodes:s-(e_type_set:e)-v_type_set:t
WHERE t.@or_tri_counted == FALSE
POST-ACCUM t.@or_in_frontier += TRUE;
END;
// Remove visited vertices from the PivotCandidates set
PivotCandidates = SELECT s
FROM PivotCandidates:s
WHERE s.@or_tri_counted == FALSE;
END;
// -------------------- 4. Handle nodes in small WCCs --------------------
// For remaining vertices in small WCCs
Nodes = SELECT s
FROM AllNodes:s
WHERE s.@or_tri_counted == FALSE;
// Build neighbor sets manually, only for vertices with smaller IDs in the triangle.
// This ensures that only two of the three vertices in a triangle will build neighbor sets.
Tmp = SELECT t
FROM Nodes:s-(e_type_set)-v_type_set:t
WHERE t.@or_tri_counted == FALSE
AND getvid(s) > getvid(t)
ACCUM t.@set_nodes_in_frontier += s
POST-ACCUM
t.@or_is_neighbor = TRUE;
// Compute the intersection of neighbor sets to count triangles.
// This step ensures that each triangle is counted only once.
Tmp = SELECT t
FROM Nodes:s-(e_type_set)-:t
WHERE getvid(s) > getvid(t)
AND t.@or_is_neighbor == TRUE
ACCUM @@sum_tri_count += COUNT(s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier);
// -------------------- 5. Output --------------------
// Output the results
PRINT @@sum_tri_count AS num_triangles;
}