-
Notifications
You must be signed in to change notification settings - Fork 72
/
Copy pathtg_label_prop.gsql
105 lines (88 loc) · 3.43 KB
/
tg_label_prop.gsql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
CREATE DISTRIBUTED QUERY tg_label_prop (SET<STRING> v_type_set, SET<STRING> e_type_set, INT maximum_iteration, INT print_limit,
BOOL print_results = TRUE, STRING file_path = "", STRING result_attribute = "") SYNTAX V1 {
/*
First Author: <First Author Name>
First Commit Date: <First Commit Date>
Recent Author: <Recent Commit Author Name>
Recent Commit Date: <Recent Commit Date>
Repository:
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Community
Maturity:
Production
Description:
Partition the vertices into communities, according to the Label Propagation method.
Indicate community membership by assigning each vertex a community ID.
Publications:
NA
TigerGraph Documentation:
https://docs.tigergraph.com/graph-ml/current/community-algorithms/label-propagation
Parameters:
v_type_set:
Names of vertex types to use
e_type_set:
Names of edge types to use
maximum_iteration:
Number of maximum iteration of the algorithm
print_limit:
If >=0, max number of vertices to output to JSON.
print_results:
If True, output JSON to standard output
result_attribute:
If not empty, store community id values (INT) to this attribute
file_path:
If not empty, write output to this file.
*/
OrAccum @@or_changed = true;
MapAccum<INT, INT> @map; # <communityId, numNeighbors>
MapAccum<INT, INT> @@comm_sizes_map; # <communityId, members>
SumAccum<INT> @sum_label, @sum_num;
FILE f (file_path);
Start = {v_type_set};
# Assign unique labels to each vertex
Start = SELECT s
FROM Start:s
ACCUM s.@sum_label = getvid(s);
# Propagate labels to neighbors until labels converge or the max iterations is reached
WHILE @@or_changed == true LIMIT maximum_iteration DO
@@or_changed = false;
Start = SELECT s
FROM Start:s -(e_type_set:e)- :t
ACCUM t.@map += (s.@sum_label -> 1) # count the occurrences of neighbor's labels
POST-ACCUM
INT max_v = 0,
INT label = 0,
# Iterate over the map to get the neighbor label that occurs most often
FOREACH (k,v) IN t.@map DO
CASE WHEN v > max_v THEN
max_v = v,
label = k
END
END,
# When the neighbor search finds a label AND it is a new label
# AND the label's count has increased, update the label.
CASE WHEN label != 0 AND t.@sum_label != label AND max_v > t.@sum_num THEN
@@or_changed += true,
t.@sum_label = label,
t.@sum_num = max_v
END,
END;
Start = {v_type_set};
Start = SELECT s
FROM Start:s
POST-ACCUM
IF result_attribute != "" THEN
s.setAttr(result_attribute, s.@sum_label)
END,
IF file_path != "" THEN
f.println(s, s.@sum_label)
END,
IF print_results THEN
@@comm_sizes_map += (s.@sum_label -> 1)
END
LIMIT print_limit;
IF print_results THEN
PRINT @@comm_sizes_map;
PRINT Start[Start.@sum_label];
END;
}