Skip to content

Commit afd48bc

Browse files
committed
[ALGOS-268] feat(algos): improve triangle count algorithm;
1 parent 040463f commit afd48bc

File tree

3 files changed

+240
-89
lines changed

3 files changed

+240
-89
lines changed

GDBMS_ALGO/community/tri_count.gsql

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1-
CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count(STRING v_type, STRING e_type) SYNTAX V1 {
1+
CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count(
2+
SET<STRING> v_type_set,
3+
SET<STRING> e_type_set
4+
) SYNTAX V1 {
25

3-
/*
4-
First Author: <First Author Name>
5-
First Commit Date: <First Commit Date>
6-
7-
Recent Author: <Recent Commit Author Name>
8-
Recent Commit Date: <Recent Commit Date>
6+
/*
7+
First Author: [email protected]
8+
First Commit Date: 2024-07-17
99

10+
Recent Author: [email protected]
11+
Recent Commit Date: 2024-07-17
1012

1113
Repository:
1214
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Community
@@ -15,7 +17,7 @@ CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count(STRING v_type, STRING e_typ
1517
Production
1618

1719
Description:
18-
This algorithm uses the classic edge-iterator method to count triangles. It is slower than the fast version, but uses less memory.
20+
This query computes the total number of triangles in the graph.
1921

2022
Publications:
2123
NA
@@ -24,31 +26,30 @@ CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count(STRING v_type, STRING e_typ
2426
https://docs.tigergraph.com/graph-ml/current/community-algorithms/triangle-counting
2527

2628
Parameters:
27-
v_type:
28-
Vertex type to count
29-
e_type:
30-
Edge type to traverse
29+
v_type_set:
30+
The set of vertex types to traverse.
31+
e_type_set:
32+
The set of edge types to traverse.
3133
*/
3234

33-
# Compute the total number of triangles in the GRAPH. No input parameters are needed.
34-
SumAccum<INT> @@sum_cnt;
35-
SetAccum<VERTEX> @self_set;
36-
37-
all = {v_type};
38-
all = SELECT s
39-
FROM all:s
40-
ACCUM s.@self_set += s;
41-
42-
# For each edge e, the number of triangles that contain e is equivalent
43-
# to the number of common neighbors between vertices s and t
44-
45-
tmp = SELECT t
46-
FROM all:s -(e_type:e) -:t
47-
WHERE getvid(s) > getvid(t)
48-
ACCUM INT c1 = COUNT(s.neighbors(e_type) MINUS s.@self_set),
49-
INT c2 = COUNT((s.neighbors(e_type) MINUS s.@self_set) MINUS (t.neighbors(e_type) MINUS t.@self_set)),
50-
@@sum_cnt += c1-c2;
51-
52-
# Each triangle is counted 3 times for each edge, so final result is divided by 3
53-
PRINT @@sum_cnt/3 AS num_triangles;
35+
SumAccum<INT> @@sum_tri_count;
36+
SetAccum<VERTEX> @set_neighbors;
37+
Nodes = {v_type_set};
38+
39+
// Build neighbor sets manually, only for vertices with smaller IDs in the triangle.
40+
// This ensures that only two of the three vertices in a triangle will build neighbor sets.
41+
Tmp = SELECT t
42+
FROM Nodes:s-(e_type_set)- v_type_set:t
43+
WHERE getvid(s) > getvid(t)
44+
ACCUM t.@set_neighbors += s;
45+
46+
// Compute the intersection of neighbor sets to count triangles.
47+
// This step ensures that each triangle is counted only once.
48+
Tmp = SELECT t
49+
FROM Nodes:s-(e_type_set)- :t
50+
WHERE getvid(s) > getvid(t)
51+
ACCUM @@sum_tri_count += COUNT(s.@set_neighbors INTERSECT t.@set_neighbors);
52+
53+
// Output the results
54+
PRINT @@sum_tri_count AS num_triangles;
5455
}

GDBMS_ALGO/community/tri_count_fast.gsql

Lines changed: 0 additions & 56 deletions
This file was deleted.
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count_small_world(
2+
SET<STRING> v_type_set,
3+
SET<STRING> e_type_set,
4+
UINT supernode_min_degree = 100000,
5+
UINT threshold = 100000
6+
) SYNTAX V1 {
7+
8+
/*
9+
First Author: [email protected]
10+
First Commit Date: 2024-07-18
11+
12+
Recent Author: [email protected]
13+
Recent Commit Date: 2024-07-18
14+
15+
Repository:
16+
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Community
17+
18+
Maturity:
19+
Production
20+
21+
Description:
22+
This query computes the total number of triangles in the graph.
23+
It is optimized for small-world graphs to save memory.
24+
25+
Publications:
26+
NA
27+
28+
TigerGraph Documentation:
29+
https://docs.tigergraph.com/graph-ml/current/community-algorithms/triangle-counting
30+
31+
Parameters:
32+
v_type_set:
33+
The set of vertex types to traverse.
34+
e_type_set:
35+
The set of edge types to traverse.
36+
supernode_min_degree:
37+
The minimum degree for a vertex to be considered a supernode.
38+
The default value is 100000.
39+
threshold:
40+
The threshold for choosing initial pivot vertices. Only vertices whose product of indegree
41+
and outdegree exceeds this threshold will be considered candidates for the pivot vertex.
42+
The default value is 100000.
43+
*/
44+
45+
SumAccum<INT> @sum_outdegree; // Accumulator for the outdegree
46+
SumAccum<INT> @sum_indegree; // Accumulator for the indegree
47+
SumAccum<INT> @sum_degree_product; // Accumulator for the product of outdegree and indegree
48+
OrAccum @or_in_frontier; // Flag to check if the vertex is in the frontier
49+
OrAccum @or_tri_counted; // Flag to check if all triangles that contain the vertex have been counted
50+
OrAccum @or_is_neighbor; // Flag to check if the vertex is a neighbor of nodes in the frontier
51+
SetAccum<VERTEX> @set_nodes_in_frontier; // Set of nodes in the frontier
52+
SumAccum<INT> @@sum_tri_count; // The count of triangles
53+
54+
// -------------------- 1. Initialization --------------------
55+
// Calculate the product of indegree and outdegree,
56+
// and filter vertices with a product no less than the threshold
57+
AllNodes = {v_type_set};
58+
PivotCandidates = SELECT s
59+
FROM AllNodes:s-(e_type_set)-v_type_set:t
60+
WHERE s != t
61+
ACCUM s.@sum_outdegree += 1,
62+
s.@sum_indegree += 1
63+
POST-ACCUM (s)
64+
s.@sum_degree_product = s.@sum_indegree * s.@sum_outdegree
65+
HAVING s.@sum_degree_product >= threshold;
66+
67+
// -------------------- 2. Handle the supernodes --------------------
68+
// Count the number of triangles for supernodes
69+
SuperNodes = SELECT s
70+
FROM PivotCandidates:s
71+
WHERE s.@sum_outdegree >= supernode_min_degree;
72+
WHILE SuperNodes.size() > 0 DO
73+
// Select some supernodes and set them as frontier
74+
Nodes = SELECT s
75+
FROM SuperNodes:s
76+
LIMIT 10;
77+
Nodes = SELECT s
78+
FROM Nodes:s
79+
POST-ACCUM (s)
80+
s.@or_in_frontier += TRUE;
81+
82+
// Find neighbors of nodes in the frontier
83+
Neighbors = SELECT t
84+
FROM Nodes:s-(e_type_set)-v_type_set:t
85+
WHERE s != t
86+
AND t.@or_tri_counted == FALSE // Don't visit nodes whose all triangles have been counted
87+
AND (t.@or_in_frontier == FALSE // Neighbor not in the frontier
88+
OR getvid(s) > getvid(t)) // If neighbor is in the frontier, only consider one direction
89+
ACCUM t.@set_nodes_in_frontier += s
90+
POST-ACCUM (t)
91+
t.@or_is_neighbor += TRUE;
92+
93+
// Calculate the number of triangles involving nodes in the frontier
94+
Tmp = SELECT s
95+
FROM Neighbors:s-(e_type_set)-v_type_set:t
96+
WHERE getvid(s) > getvid(t) // Traverse only one direction of the undirected edge
97+
AND t.@or_is_neighbor == TRUE
98+
ACCUM @@sum_tri_count += COUNT(s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier);
99+
100+
// Reset variables for nodes in the frontier
101+
Nodes = SELECT s
102+
FROM Nodes:s
103+
POST-ACCUM (s)
104+
s.@or_in_frontier = FALSE,
105+
s.@or_tri_counted = TRUE;
106+
107+
// Reset variables for the next iteration
108+
Neighbors = SELECT s
109+
FROM Neighbors:s
110+
POST-ACCUM (s)
111+
s.@or_is_neighbor = FALSE,
112+
s.@set_nodes_in_frontier.clear();
113+
114+
// Remove visited vertices from the SuperNodes set
115+
SuperNodes = SuperNodes MINUS Nodes;
116+
PivotCandidates = PivotCandidates MINUS Nodes;
117+
END;
118+
119+
// -------------------- 3. Handle nodes in large WCCs --------------------
120+
// Count the number of triangles for nodes in large WCCs
121+
WHILE PivotCandidates.size() > 0 DO
122+
// Select the initial pivot vertex with the largest product of indegree and outdegree
123+
Nodes = SELECT s
124+
FROM PivotCandidates:s
125+
ORDER BY s.@sum_degree_product DESC
126+
LIMIT 1;
127+
Nodes = SELECT s
128+
FROM Nodes:s
129+
POST-ACCUM (s)
130+
s.@or_in_frontier += TRUE;
131+
132+
// Use BFS to find all elements in its connected component
133+
WHILE Nodes.size() > 0 DO
134+
// Find neighbors of nodes in the frontier
135+
Neighbors = SELECT t
136+
FROM Nodes:s-(e_type_set)-v_type_set:t
137+
WHERE s != t
138+
AND t.@or_tri_counted == FALSE // Don't visit nodes whose all triangles have been counted
139+
AND (t.@or_in_frontier == FALSE // Neighbor not in the frontier
140+
OR getvid(s) > getvid(t)) // If neighbor is in the frontier, only consider one direction
141+
ACCUM t.@set_nodes_in_frontier += s
142+
POST-ACCUM (t)
143+
t.@or_is_neighbor += TRUE;
144+
145+
// Calculate the number of triangles involving nodes in the frontier
146+
Tmp = SELECT s
147+
FROM Neighbors:s-(e_type_set)-v_type_set:t
148+
WHERE getvid(s) > getvid(t) // Traverse only one direction of the undirected edge
149+
AND t.@or_is_neighbor == TRUE
150+
ACCUM @@sum_tri_count += COUNT(s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier);
151+
152+
// Reset variables for nodes in the frontier
153+
Nodes = SELECT s
154+
FROM Nodes:s
155+
POST-ACCUM (s)
156+
s.@or_in_frontier = FALSE,
157+
s.@or_tri_counted = TRUE;
158+
159+
// Reset variables for the next iteration
160+
Neighbors = SELECT s
161+
FROM Neighbors:s
162+
POST-ACCUM (s)
163+
s.@or_is_neighbor = FALSE,
164+
s.@set_nodes_in_frontier.clear();
165+
166+
// Use BFS to visit the next frontier
167+
Nodes = SELECT t
168+
FROM Nodes:s-(e_type_set:e)-v_type_set:t
169+
WHERE t.@or_tri_counted == FALSE
170+
POST-ACCUM t.@or_in_frontier += TRUE;
171+
END;
172+
173+
// Remove visited vertices from the PivotCandidates set
174+
PivotCandidates = SELECT s
175+
FROM PivotCandidates:s
176+
WHERE s.@or_tri_counted == FALSE;
177+
END;
178+
179+
// -------------------- 4. Handle nodes in small WCCs --------------------
180+
// For remaining vertices in small WCCs
181+
Nodes = SELECT s
182+
FROM AllNodes:s
183+
WHERE s.@or_tri_counted == FALSE;
184+
185+
// Build neighbor sets manually, only for vertices with smaller IDs in the triangle.
186+
// This ensures that only two of the three vertices in a triangle will build neighbor sets.
187+
Tmp = SELECT t
188+
FROM Nodes:s-(e_type_set)-v_type_set:t
189+
WHERE t.@or_tri_counted == FALSE
190+
AND getvid(s) > getvid(t)
191+
ACCUM t.@set_nodes_in_frontier += s
192+
POST-ACCUM
193+
t.@or_is_neighbor = TRUE;
194+
195+
// Compute the intersection of neighbor sets to count triangles.
196+
// This step ensures that each triangle is counted only once.
197+
Tmp = SELECT t
198+
FROM Nodes:s-(e_type_set)-:t
199+
WHERE getvid(s) > getvid(t)
200+
AND t.@or_is_neighbor == TRUE
201+
ACCUM @@sum_tri_count += COUNT(s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier);
202+
203+
// -------------------- 5. Output --------------------
204+
// Output the results
205+
PRINT @@sum_tri_count AS num_triangles;
206+
}

0 commit comments

Comments
 (0)