|
| 1 | +CREATE TEMPLATE QUERY GDBMS_ALGO.community.tri_count_small_world( |
| 2 | + SET<STRING> v_type_set, |
| 3 | + SET<STRING> e_type_set, |
| 4 | + UINT supernode_min_degree = 100000, |
| 5 | + UINT threshold = 100000 |
| 6 | +) SYNTAX V1 { |
| 7 | + |
| 8 | + /* |
| 9 | + |
| 10 | + First Commit Date: 2024-07-18 |
| 11 | + |
| 12 | + |
| 13 | + Recent Commit Date: 2024-07-18 |
| 14 | + |
| 15 | + Repository: |
| 16 | + https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Community |
| 17 | + |
| 18 | + Maturity: |
| 19 | + Production |
| 20 | + |
| 21 | + Description: |
| 22 | + This query computes the total number of triangles in the graph. |
| 23 | + It is optimized for small-world graphs to save memory. |
| 24 | + |
| 25 | + Publications: |
| 26 | + NA |
| 27 | + |
| 28 | + TigerGraph Documentation: |
| 29 | + https://docs.tigergraph.com/graph-ml/current/community-algorithms/triangle-counting |
| 30 | + |
| 31 | + Parameters: |
| 32 | + v_type_set: |
| 33 | + The set of vertex types to traverse. |
| 34 | + e_type_set: |
| 35 | + The set of edge types to traverse. |
| 36 | + supernode_min_degree: |
| 37 | + The minimum degree for a vertex to be considered a supernode. |
| 38 | + The default value is 100000. |
| 39 | + threshold: |
| 40 | + The threshold for choosing initial pivot vertices. Only vertices whose product of indegree |
| 41 | + and outdegree exceeds this threshold will be considered candidates for the pivot vertex. |
| 42 | + The default value is 100000. |
| 43 | + */ |
| 44 | + |
| 45 | + SumAccum<INT> @sum_outdegree; // Accumulator for the outdegree |
| 46 | + SumAccum<INT> @sum_indegree; // Accumulator for the indegree |
| 47 | + SumAccum<INT> @sum_degree_product; // Accumulator for the product of outdegree and indegree |
| 48 | + OrAccum @or_in_frontier; // Flag to check if the vertex is in the frontier |
| 49 | + OrAccum @or_tri_counted; // Flag to check if all triangles that contain the vertex have been counted |
| 50 | + OrAccum @or_is_neighbor; // Flag to check if the vertex is a neighbor of nodes in the frontier |
| 51 | + SetAccum<VERTEX> @set_nodes_in_frontier; // Set of nodes in the frontier |
| 52 | + SumAccum<INT> @@sum_tri_count; // The count of triangles |
| 53 | + |
| 54 | + // -------------------- 1. Initialization -------------------- |
| 55 | + // Calculate the product of indegree and outdegree, |
| 56 | + // and filter vertices with a product no less than the threshold |
| 57 | + AllNodes = {v_type_set}; |
| 58 | + PivotCandidates = SELECT s |
| 59 | + FROM AllNodes:s-(e_type_set)-v_type_set:t |
| 60 | + WHERE s != t |
| 61 | + ACCUM s.@sum_outdegree += 1, |
| 62 | + s.@sum_indegree += 1 |
| 63 | + POST-ACCUM (s) |
| 64 | + s.@sum_degree_product = s.@sum_indegree * s.@sum_outdegree |
| 65 | + HAVING s.@sum_degree_product >= threshold; |
| 66 | + |
| 67 | + // -------------------- 2. Handle the supernodes -------------------- |
| 68 | + // Count the number of triangles for supernodes |
| 69 | + SuperNodes = SELECT s |
| 70 | + FROM PivotCandidates:s |
| 71 | + WHERE s.@sum_outdegree >= supernode_min_degree; |
| 72 | + WHILE SuperNodes.size() > 0 DO |
| 73 | + // Select some supernodes and set them as frontier |
| 74 | + Nodes = SELECT s |
| 75 | + FROM SuperNodes:s |
| 76 | + LIMIT 10; |
| 77 | + Nodes = SELECT s |
| 78 | + FROM Nodes:s |
| 79 | + POST-ACCUM (s) |
| 80 | + s.@or_in_frontier += TRUE; |
| 81 | + |
| 82 | + // Find neighbors of nodes in the frontier |
| 83 | + Neighbors = SELECT t |
| 84 | + FROM Nodes:s-(e_type_set)-v_type_set:t |
| 85 | + WHERE s != t |
| 86 | + AND t.@or_tri_counted == FALSE // Don't visit nodes whose all triangles have been counted |
| 87 | + AND (t.@or_in_frontier == FALSE // Neighbor not in the frontier |
| 88 | + OR getvid(s) > getvid(t)) // If neighbor is in the frontier, only consider one direction |
| 89 | + ACCUM t.@set_nodes_in_frontier += s |
| 90 | + POST-ACCUM (t) |
| 91 | + t.@or_is_neighbor += TRUE; |
| 92 | + |
| 93 | + // Calculate the number of triangles involving nodes in the frontier |
| 94 | + Tmp = SELECT s |
| 95 | + FROM Neighbors:s-(e_type_set)-v_type_set:t |
| 96 | + WHERE getvid(s) > getvid(t) // Traverse only one direction of the undirected edge |
| 97 | + AND t.@or_is_neighbor == TRUE |
| 98 | + ACCUM @@sum_tri_count += COUNT(s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier); |
| 99 | + |
| 100 | + // Reset variables for nodes in the frontier |
| 101 | + Nodes = SELECT s |
| 102 | + FROM Nodes:s |
| 103 | + POST-ACCUM (s) |
| 104 | + s.@or_in_frontier = FALSE, |
| 105 | + s.@or_tri_counted = TRUE; |
| 106 | + |
| 107 | + // Reset variables for the next iteration |
| 108 | + Neighbors = SELECT s |
| 109 | + FROM Neighbors:s |
| 110 | + POST-ACCUM (s) |
| 111 | + s.@or_is_neighbor = FALSE, |
| 112 | + s.@set_nodes_in_frontier.clear(); |
| 113 | + |
| 114 | + // Remove visited vertices from the SuperNodes set |
| 115 | + SuperNodes = SuperNodes MINUS Nodes; |
| 116 | + PivotCandidates = PivotCandidates MINUS Nodes; |
| 117 | + END; |
| 118 | + |
| 119 | + // -------------------- 3. Handle nodes in large WCCs -------------------- |
| 120 | + // Count the number of triangles for nodes in large WCCs |
| 121 | + WHILE PivotCandidates.size() > 0 DO |
| 122 | + // Select the initial pivot vertex with the largest product of indegree and outdegree |
| 123 | + Nodes = SELECT s |
| 124 | + FROM PivotCandidates:s |
| 125 | + ORDER BY s.@sum_degree_product DESC |
| 126 | + LIMIT 1; |
| 127 | + Nodes = SELECT s |
| 128 | + FROM Nodes:s |
| 129 | + POST-ACCUM (s) |
| 130 | + s.@or_in_frontier += TRUE; |
| 131 | + |
| 132 | + // Use BFS to find all elements in its connected component |
| 133 | + WHILE Nodes.size() > 0 DO |
| 134 | + // Find neighbors of nodes in the frontier |
| 135 | + Neighbors = SELECT t |
| 136 | + FROM Nodes:s-(e_type_set)-v_type_set:t |
| 137 | + WHERE s != t |
| 138 | + AND t.@or_tri_counted == FALSE // Don't visit nodes whose all triangles have been counted |
| 139 | + AND (t.@or_in_frontier == FALSE // Neighbor not in the frontier |
| 140 | + OR getvid(s) > getvid(t)) // If neighbor is in the frontier, only consider one direction |
| 141 | + ACCUM t.@set_nodes_in_frontier += s |
| 142 | + POST-ACCUM (t) |
| 143 | + t.@or_is_neighbor += TRUE; |
| 144 | + |
| 145 | + // Calculate the number of triangles involving nodes in the frontier |
| 146 | + Tmp = SELECT s |
| 147 | + FROM Neighbors:s-(e_type_set)-v_type_set:t |
| 148 | + WHERE getvid(s) > getvid(t) // Traverse only one direction of the undirected edge |
| 149 | + AND t.@or_is_neighbor == TRUE |
| 150 | + ACCUM @@sum_tri_count += COUNT(s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier); |
| 151 | + |
| 152 | + // Reset variables for nodes in the frontier |
| 153 | + Nodes = SELECT s |
| 154 | + FROM Nodes:s |
| 155 | + POST-ACCUM (s) |
| 156 | + s.@or_in_frontier = FALSE, |
| 157 | + s.@or_tri_counted = TRUE; |
| 158 | + |
| 159 | + // Reset variables for the next iteration |
| 160 | + Neighbors = SELECT s |
| 161 | + FROM Neighbors:s |
| 162 | + POST-ACCUM (s) |
| 163 | + s.@or_is_neighbor = FALSE, |
| 164 | + s.@set_nodes_in_frontier.clear(); |
| 165 | + |
| 166 | + // Use BFS to visit the next frontier |
| 167 | + Nodes = SELECT t |
| 168 | + FROM Nodes:s-(e_type_set:e)-v_type_set:t |
| 169 | + WHERE t.@or_tri_counted == FALSE |
| 170 | + POST-ACCUM t.@or_in_frontier += TRUE; |
| 171 | + END; |
| 172 | + |
| 173 | + // Remove visited vertices from the PivotCandidates set |
| 174 | + PivotCandidates = SELECT s |
| 175 | + FROM PivotCandidates:s |
| 176 | + WHERE s.@or_tri_counted == FALSE; |
| 177 | + END; |
| 178 | + |
| 179 | + // -------------------- 4. Handle nodes in small WCCs -------------------- |
| 180 | + // For remaining vertices in small WCCs |
| 181 | + Nodes = SELECT s |
| 182 | + FROM AllNodes:s |
| 183 | + WHERE s.@or_tri_counted == FALSE; |
| 184 | + |
| 185 | + // Build neighbor sets manually, only for vertices with smaller IDs in the triangle. |
| 186 | + // This ensures that only two of the three vertices in a triangle will build neighbor sets. |
| 187 | + Tmp = SELECT t |
| 188 | + FROM Nodes:s-(e_type_set)-v_type_set:t |
| 189 | + WHERE t.@or_tri_counted == FALSE |
| 190 | + AND getvid(s) > getvid(t) |
| 191 | + ACCUM t.@set_nodes_in_frontier += s |
| 192 | + POST-ACCUM |
| 193 | + t.@or_is_neighbor = TRUE; |
| 194 | + |
| 195 | + // Compute the intersection of neighbor sets to count triangles. |
| 196 | + // This step ensures that each triangle is counted only once. |
| 197 | + Tmp = SELECT t |
| 198 | + FROM Nodes:s-(e_type_set)-:t |
| 199 | + WHERE getvid(s) > getvid(t) |
| 200 | + AND t.@or_is_neighbor == TRUE |
| 201 | + ACCUM @@sum_tri_count += COUNT(s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier); |
| 202 | + |
| 203 | + // -------------------- 5. Output -------------------- |
| 204 | + // Output the results |
| 205 | + PRINT @@sum_tri_count AS num_triangles; |
| 206 | +} |
0 commit comments