Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ALGOS-tmp] feat(algos): Improve the LCC and Triangle Count Algortihm; #165

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 103 additions & 81 deletions GDBMS_ALGO/community/lcc.gsql
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
CREATE TEMPLATE QUERY GDBMS_ALGO.community.lcc (STRING v_type, STRING e_type,INT top_k=100,BOOL print_results = True, STRING result_attribute = "",
STRING file_path = "", BOOL display_edges = FALSE) SYNTAX V1 {
CREATE TEMPLATE QUERY GDBMS_ALGO.community.lcc (
SET<STRING> v_type_set,
SET<STRING> e_type_set,
UINT top_k = 100,
BOOL print_results = FALSE,
STRING result_attribute = "",
STRING file_path = "",
BOOL display_edges = FALSE
) SYNTAX V1 {

/*
First Author: <First Author Name>
First Commit Date: <First Commit Date>

Recent Author: <Recent Commit Author Name>
Recent Commit Date: <Recent Commit Date>
First Author: [email protected]
First Commit Date: 2024-07-15

Recent Author: [email protected]
Recent Commit Date: 2024-07-15

Repository:
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Community
Expand All @@ -16,10 +22,9 @@ CREATE TEMPLATE QUERY GDBMS_ALGO.community.lcc (STRING v_type, STRING e_type,INT
Production

Description:
The Local Clustering Coefficient algorithm computes the local clustering coefficient
for each node in the graph.
lcc = Number_trangles/((n-1)n/2)
Here n is the outdegreeof vertex.
This query computes the Local Clustering Coefficient (LCC) for each node in the graph.
LCC = Number_of_triangles / ((n-1) * n / 2)
where n is the outdegree of the vertex.

Publications:
NA
Expand All @@ -28,83 +33,100 @@ CREATE TEMPLATE QUERY GDBMS_ALGO.community.lcc (STRING v_type, STRING e_type,INT
https://docs.tigergraph.com/graph-ml/current/community-algorithms/local-clustering-coefficient

Parameters:
v_type:
vertex types to traverse
v_type_set:
The set of vertex types to traverse.
e_type_set:
The set of edge types to traverse.
top_k:
Number of top scores to report.
print_results:
If True, print JSON output
e_type:
edge types to traverse
If True, print JSON output.
result_attribute:
INT attribute to store results to
top_k:
report only this many top scores
Attribute to store the results.
file_path:
file to write CSV output to
File to write CSV output to.
display_edges:
If True, output edges for visualization
If True, output edges for visualization. WARNING: Avoid displaying edges for large datasets.
*/
TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score;
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap;
SumAccum<FLOAT> @sum_tri; #number of trangles
SumAccum<FLOAT> @sum_lcc; #lcc value
SetAccum<int> @neighbors_set; #neighbors set
OrAccum @or_self_con; #check if the vertex is self-connect
SetAccum<EDGE> @@edge_set;
FILE f (file_path);
# Here we compute the intersection for 2 points on the triangle.
Start = {v_type};
Start = SELECT s
FROM Start:s-(e_type)-v_type:t
ACCUM
IF getvid(s) != getvid(t) THEN
t.@neighbors_set += getvid(s)
ELSE
t.@or_self_con+=TRUE
END;# check id the vertex is self-connect
Start = SELECT s
FROM Start:s-(e_type)-v_type:t
WHERE s.outdegree(e_type)>1
ACCUM
s.@sum_tri+=COUNT((s.@neighbors_set INTERSECT t.@neighbors_set))
POST-ACCUM
IF s.@or_self_con AND s.outdegree(e_type)<3 THEN
s.@sum_lcc+=0
ELSE IF s.@or_self_con AND s.outdegree(e_type)>2 THEN
s.@sum_lcc+= (((s.@sum_tri+1-s.outdegree(e_type)))/((s.outdegree(e_type)-2)*(s.outdegree(e_type)-1)))
ELSE
s.@sum_lcc+= ((s.@sum_tri)/((s.outdegree(e_type)-0)*(s.outdegree(e_type)-1)))

TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score; // Define a tuple for storing vertex scores
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap; // Heap to store top-k scores
SumAccum<INT> @sum_outdegree; // Accumulator for the outdegree of vertices
SetAccum<VERTEX> @set_nodes_in_frontier; // Set to store nodes in the frontier
MapAccum<VERTEX, INT> @map_node_tri_count; // Map: node in the frontier -> triangle count
SumAccum<INT> @sum_tri_count; // Accumulator for the count of triangles
SumAccum<FLOAT> @sum_lcc; // Accumulator for the LCC value
SetAccum<EDGE> @@edge_set; // Set of edges for visualization
FILE f (file_path); // File to write results to

// Calculate the outdegree for each vertex
AllNodes = {v_type_set};
Nodes = SELECT s
FROM AllNodes:s-(e_type_set)-v_type_set:t
WHERE s != t
ACCUM s.@sum_outdegree += 1;

// Find neighbors and prepare for triangle counting
Neighbors = SELECT t
FROM Nodes:s-(e_type_set)-v_type_set:t
WHERE getvid(s) > getvid(t)
ACCUM t.@set_nodes_in_frontier += s;

// Calculate the number of triangles involving nodes in the frontier
Tmp = SELECT s
FROM Neighbors:s-(e_type_set)-v_type_set:t
WHERE getvid(s) > getvid(t)
ACCUM FOREACH node_in_frontier IN s.@set_nodes_in_frontier INTERSECT t.@set_nodes_in_frontier DO
s.@map_node_tri_count += (node_in_frontier -> 1),
// Increment triangle count for s and t
s.@sum_tri_count += 1,
t.@sum_tri_count += 1
END;

#output
Start = SELECT s
FROM Start:s
# Calculate Closeness Centrality for each vertex
POST-ACCUM
IF result_attribute != "" THEN
s.setAttr(result_attribute, s.@sum_lcc)
END,
IF print_results THEN
@@top_scores_heap += Vertex_Score(s, s.@sum_lcc)
END,
IF file_path != "" THEN
f.println(s, s.@sum_lcc)
END;

IF file_path != "" THEN
f.println("Vertex_ID", "lcc");
END;

// Sum up the triangle counts for nodes in the frontier
Tmp = SELECT s
FROM Tmp:s-(e_type_set)-v_type_set:t
WHERE s.@map_node_tri_count.containsKey(t)
ACCUM t.@sum_tri_count += s.@map_node_tri_count.get(t);

// Calculate the LCC for nodes in the frontier
Nodes = SELECT s
FROM Nodes:s
POST-ACCUM (s)
IF s.@sum_outdegree > 1 THEN
s.@sum_lcc = s.@sum_tri_count * 2.0 / (s.@sum_outdegree * (s.@sum_outdegree - 1))
END;

// Reset variables
Neighbors = SELECT s
FROM Neighbors:s
POST-ACCUM (s)
s.@map_node_tri_count.clear(),
s.@set_nodes_in_frontier.clear();

// Output results
AllNodes = SELECT s
FROM AllNodes:s
POST-ACCUM
IF result_attribute != "" THEN
s.setAttr(result_attribute, s.@sum_lcc)
END,
IF print_results THEN
@@top_scores_heap += Vertex_Score(s, s.@sum_lcc)
END,
IF file_path != "" THEN
f.println(s, s.@sum_lcc)
END;

// Print results if print_results is True
IF print_results THEN
PRINT @@top_scores_heap AS top_scores;
IF display_edges THEN
PRINT Start[Start.@sum_lcc];
Start = SELECT s
FROM Start:s -(e_type:e)-:t
ACCUM @@edge_set += e;
PRINT @@edge_set;
END;
PRINT @@top_scores_heap AS top_scores;
IF display_edges THEN
PRINT AllNodes[AllNodes.@sum_lcc];
AllNodes = SELECT s
FROM AllNodes:s -(e_type_set:e)-:t
ACCUM @@edge_set += e;
PRINT @@edge_set;
END;
END;
}
Loading
Loading