diff --git a/GDBMS_ALGO/community/tri_count.gsql b/GDBMS_ALGO/community/tri_count.gsql index a5e704d9..27f92d2e 100644 --- a/GDBMS_ALGO/community/tri_count.gsql +++ b/GDBMS_ALGO/community/tri_count.gsql @@ -1,12 +1,14 @@ -CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count(STRING v_type, STRING e_type) SYNTAX V1 { +CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count( + SET v_type_set, + SET e_type_set +) SYNTAX V1 { -/* - First Author: - First Commit Date: - - Recent Author: - Recent Commit Date: + /* + First Author: xuanlei.lin@tigergraph.com + First Commit Date: 2024-07-17 + Recent Author: xuanlei.lin@tigergraph.com + Recent Commit Date: 2024-07-17 Repository: https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Community @@ -15,7 +17,7 @@ CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count(STRING v_type, STRING e_typ Production Description: - This algorithm uses the classic edge-iterator method to count triangles. It is slower than the fast version, but uses less memory. + This query computes the total number of triangles in the graph. Publications: NA @@ -24,31 +26,30 @@ CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count(STRING v_type, STRING e_typ https://docs.tigergraph.com/graph-ml/current/community-algorithms/triangle-counting Parameters: - v_type: - Vertex type to count - e_type: - Edge type to traverse + v_type_set: + The set of vertex types to traverse. + e_type_set: + The set of edge types to traverse. */ -# Compute the total number of triangles in the GRAPH. No input parameters are needed. -SumAccum @@sum_cnt; -SetAccum @self_set; - -all = {v_type}; -all = SELECT s - FROM all:s - ACCUM s.@self_set += s; - -# For each edge e, the number of triangles that contain e is equivalent -# to the number of common neighbors between vertices s and t - -tmp = SELECT t - FROM all:s -(e_type:e) -:t - WHERE getvid(s) > getvid(t) - ACCUM INT c1 = COUNT(s.neighbors(e_type) MINUS s.@self_set), - INT c2 = COUNT((s.neighbors(e_type) MINUS s.@self_set) MINUS (t.neighbors(e_type) MINUS t.@self_set)), - @@sum_cnt += c1-c2; - -# Each triangle is counted 3 times for each edge, so final result is divided by 3 -PRINT @@sum_cnt/3 AS num_triangles; + SumAccum @@sum_tri_count; + SetAccum @set_neighbors; + Nodes = {v_type_set}; + + // Build neighbor sets manually, only for vertices with smaller IDs in the triangle. + // This ensures that only two of the three vertices in a triangle will build neighbor sets. + Tmp = SELECT t + FROM Nodes:s-(e_type_set)- v_type_set:t + WHERE getvid(s) > getvid(t) + ACCUM t.@set_neighbors += s; + + // Compute the intersection of neighbor sets to count triangles. + // This step ensures that each triangle is counted only once. + Tmp = SELECT t + FROM Nodes:s-(e_type_set)- :t + WHERE getvid(s) > getvid(t) + ACCUM @@sum_tri_count += COUNT(s.@set_neighbors INTERSECT t.@set_neighbors); + + // Output the results + PRINT @@sum_tri_count AS num_triangles; } diff --git a/GDBMS_ALGO/community/tri_count_fast.gsql b/GDBMS_ALGO/community/tri_count_fast.gsql deleted file mode 100644 index 5cae093d..00000000 --- a/GDBMS_ALGO/community/tri_count_fast.gsql +++ /dev/null @@ -1,56 +0,0 @@ -CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count_fast(STRING v_type, STRING e_type) SYNTAX V1 { - - /* - First Author: - First Commit Date: - - Recent Author: - Recent Commit Date: - - - Repository: - https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Community - - Maturity: - Production - - Description: - The fast version of the Triangle Counting algorithm is faster than the standard version, but uses some additional memory. - - Publications: - NA - - TigerGraph Documentation: - https://docs.tigergraph.com/graph-ml/current/community-algorithms/triangle-counting - - Parameters: - v_type: - Vertex type to count - e_type: - Edge type to traverse - */ - -# Compute the total number of triangles in the graph -# This algorithm is faster than tri_count but uses additional memory for temporary storage -SumAccum @@sum_cnt; -SetAccum @neighbors_set; -//SumAccum @sum_outdegree; -all = {v_type}; - -# We build up our neighbor lists manually because we'll only build them up on the 2 smaller vertices on a triangle. - -tmp = SELECT t - FROM all:s-(e_type)- v_type:t - WHERE getvid(s) > getvid(t) - ACCUM t.@neighbors_set += s; - -# Here we compute the intersection for 2 points on the triangle. -tmp = SELECT t - FROM all:s-(e_type)- :t - WHERE getvid(s) > getvid(t) - ACCUM @@sum_cnt += COUNT(s.@neighbors_set INTERSECT t.@neighbors_set); - -# print result -PRINT @@sum_cnt AS num_triangles; - -} diff --git a/GDBMS_ALGO/community/tri_count_small_world.gsql b/GDBMS_ALGO/community/tri_count_small_world.gsql new file mode 100644 index 00000000..6b8611eb --- /dev/null +++ b/GDBMS_ALGO/community/tri_count_small_world.gsql @@ -0,0 +1,206 @@ +CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count_small_world( + SET v_type_set, + SET e_type_set, + UINT supernode_min_degree = 100000, + UINT threshold = 100000 +) SYNTAX V1 { + + /* + First Author: xuanlei.lin@tigergraph.com + First Commit Date: 2024-07-18 + + Recent Author: xuanlei.lin@tigergraph.com + Recent Commit Date: 2024-07-18 + + Repository: + https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Community + + Maturity: + Production + + Description: + This query computes the total number of triangles in the graph. + It is optimized for small-world graphs to save memory. + + Publications: + NA + + TigerGraph Documentation: + https://docs.tigergraph.com/graph-ml/current/community-algorithms/triangle-counting + + Parameters: + v_type_set: + The set of vertex types to traverse. + e_type_set: + The set of edge types to traverse. + supernode_min_degree: + The minimum degree for a vertex to be considered a supernode. + The default value is 100000. + threshold: + The threshold for choosing initial pivot vertices. Only vertices whose product of indegree + and outdegree exceeds this threshold will be considered candidates for the pivot vertex. + The default value is 100000. + */ + + SumAccum @sum_outdegree; // Accumulator for the outdegree + SumAccum @sum_indegree; // Accumulator for the indegree + SumAccum @sum_degree_product; // Accumulator for the product of outdegree and indegree + OrAccum @or_in_frontier; // Flag to check if the vertex is in the frontier + OrAccum @or_tri_counted; // Flag to check if all triangles that contain the vertex have been counted + OrAccum @or_is_neighbor; // Flag to check if the vertex is a neighbor of nodes in the frontier + SetAccum @set_nodes_in_frontier; // Set of nodes in the frontier + SumAccum @@sum_tri_count; // The count of triangles + + // -------------------- 1. Initialization -------------------- + // Calculate the product of indegree and outdegree, + // and filter vertices with a product no less than the threshold + AllNodes = {v_type_set}; + PivotCandidates = SELECT s + FROM AllNodes:s-(e_type_set)-v_type_set:t + WHERE s != t + ACCUM s.@sum_outdegree += 1, + s.@sum_indegree += 1 + POST-ACCUM (s) + s.@sum_degree_product = s.@sum_indegree * s.@sum_outdegree + HAVING s.@sum_degree_product >= threshold; + + // -------------------- 2. Handle the supernodes -------------------- + // Count the number of triangles for supernodes + SuperNodes = SELECT s + FROM PivotCandidates:s + WHERE s.@sum_outdegree >= supernode_min_degree; + WHILE SuperNodes.size() > 0 DO + // Select some supernodes and set them as frontier + Nodes = SELECT s + FROM SuperNodes:s + LIMIT 10; + Nodes = SELECT s + FROM Nodes:s + POST-ACCUM (s) + s.@or_in_frontier += TRUE; + + // Find neighbors of nodes in the frontier + Neighbors = SELECT t + FROM Nodes:s-(e_type_set)-v_type_set:t + WHERE s != t + AND t.@or_tri_counted == FALSE // Don't visit nodes whose all triangles have been counted + AND (t.@or_in_frontier == FALSE // Neighbor not in the frontier + OR getvid(s) > getvid(t)) // If neighbor is in the frontier, only consider one direction + ACCUM t.@set_nodes_in_frontier += s + POST-ACCUM (t) + t.@or_is_neighbor += TRUE; + + // Calculate the number of triangles involving nodes in the frontier + Tmp = SELECT s + FROM Neighbors:s-(e_type_set)-v_type_set:t + WHERE getvid(s) > getvid(t) // Traverse only one direction of the undirected edge + AND t.@or_is_neighbor == TRUE + ACCUM @@sum_tri_count += COUNT(s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier); + + // Reset variables for nodes in the frontier + Nodes = SELECT s + FROM Nodes:s + POST-ACCUM (s) + s.@or_in_frontier = FALSE, + s.@or_tri_counted = TRUE; + + // Reset variables for the next iteration + Neighbors = SELECT s + FROM Neighbors:s + POST-ACCUM (s) + s.@or_is_neighbor = FALSE, + s.@set_nodes_in_frontier.clear(); + + // Remove visited vertices from the SuperNodes set + SuperNodes = SuperNodes MINUS Nodes; + PivotCandidates = PivotCandidates MINUS Nodes; + END; + + // -------------------- 3. Handle nodes in large WCCs -------------------- + // Count the number of triangles for nodes in large WCCs + WHILE PivotCandidates.size() > 0 DO + // Select the initial pivot vertex with the largest product of indegree and outdegree + Nodes = SELECT s + FROM PivotCandidates:s + ORDER BY s.@sum_degree_product DESC + LIMIT 1; + Nodes = SELECT s + FROM Nodes:s + POST-ACCUM (s) + s.@or_in_frontier += TRUE; + + // Use BFS to find all elements in its connected component + WHILE Nodes.size() > 0 DO + // Find neighbors of nodes in the frontier + Neighbors = SELECT t + FROM Nodes:s-(e_type_set)-v_type_set:t + WHERE s != t + AND t.@or_tri_counted == FALSE // Don't visit nodes whose all triangles have been counted + AND (t.@or_in_frontier == FALSE // Neighbor not in the frontier + OR getvid(s) > getvid(t)) // If neighbor is in the frontier, only consider one direction + ACCUM t.@set_nodes_in_frontier += s + POST-ACCUM (t) + t.@or_is_neighbor += TRUE; + + // Calculate the number of triangles involving nodes in the frontier + Tmp = SELECT s + FROM Neighbors:s-(e_type_set)-v_type_set:t + WHERE getvid(s) > getvid(t) // Traverse only one direction of the undirected edge + AND t.@or_is_neighbor == TRUE + ACCUM @@sum_tri_count += COUNT(s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier); + + // Reset variables for nodes in the frontier + Nodes = SELECT s + FROM Nodes:s + POST-ACCUM (s) + s.@or_in_frontier = FALSE, + s.@or_tri_counted = TRUE; + + // Reset variables for the next iteration + Neighbors = SELECT s + FROM Neighbors:s + POST-ACCUM (s) + s.@or_is_neighbor = FALSE, + s.@set_nodes_in_frontier.clear(); + + // Use BFS to visit the next frontier + Nodes = SELECT t + FROM Nodes:s-(e_type_set:e)-v_type_set:t + WHERE t.@or_tri_counted == FALSE + POST-ACCUM t.@or_in_frontier += TRUE; + END; + + // Remove visited vertices from the PivotCandidates set + PivotCandidates = SELECT s + FROM PivotCandidates:s + WHERE s.@or_tri_counted == FALSE; + END; + + // -------------------- 4. Handle nodes in small WCCs -------------------- + // For remaining vertices in small WCCs + Nodes = SELECT s + FROM AllNodes:s + WHERE s.@or_tri_counted == FALSE; + + // Build neighbor sets manually, only for vertices with smaller IDs in the triangle. + // This ensures that only two of the three vertices in a triangle will build neighbor sets. + Tmp = SELECT t + FROM Nodes:s-(e_type_set)-v_type_set:t + WHERE t.@or_tri_counted == FALSE + AND getvid(s) > getvid(t) + ACCUM t.@set_nodes_in_frontier += s + POST-ACCUM + t.@or_is_neighbor = TRUE; + + // Compute the intersection of neighbor sets to count triangles. + // This step ensures that each triangle is counted only once. + Tmp = SELECT t + FROM Nodes:s-(e_type_set)-:t + WHERE getvid(s) > getvid(t) + AND t.@or_is_neighbor == TRUE + ACCUM @@sum_tri_count += COUNT(s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier); + + // -------------------- 5. Output -------------------- + // Output the results + PRINT @@sum_tri_count AS num_triangles; +}