-
Notifications
You must be signed in to change notification settings - Fork 72
/
Copy pathtg_msf.gsql
199 lines (177 loc) · 6.95 KB
/
tg_msf.gsql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
CREATE DISTRIBUTED QUERY tg_msf (SET<STRING> v_type_set, SET<STRING> e_type_set, STRING weight_attribute, STRING weight_type,
BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "") SYNTAX V1 {
/*
First Author: <First Author Name>
First Commit Date: <First Commit Date>
Recent Author: <Recent Commit Author Name>
Recent Commit Date: <Recent Commit Date>
Repository:
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Path
Maturity:
production
Description:
This query identifies minimum spanning trees using the algorithm in section 6.2 of Qin et al. 2014
Publications:
http://www-std1.se.cuhk.edu.hk/~hcheng/paper/SIGMOD2014qin.pdf
TigerGraph Documentation:
https://docs.tigergraph.com/graph-ml/current/pathfinding-algorithms/minimum-spanning-forest
Parameters:
v_type_set:
vertex types to traverse
print_results:
If True, print JSON output
e_type_set:
edge types to traverse
result_attribute:
INT attribute to store results to
weight_attribute:
attribute for edge weights
file_path:
file to write CSV output to
weight_type:
weight data type (INT,FLOAT,DOUBLE)
*/
TYPEDEF TUPLE <FLOAT weight, VERTEX from_v, VERTEX to_v, EDGE e, INT vid> EDGE_WEIGHT;
MapAccum<VERTEX, VERTEX> @@parents_map;
MapAccum<VERTEX, AndAccum<BOOL>> @@star_map;
SumAccum<INT> @@sum_parent_changed_count;
SetAccum<EDGE> @@result_set;
SetAccum<EDGE_WEIGHT> @@mst_set;
HeapAccum<EDGE_WEIGHT>(1, weight ASC, to_v ASC, vid ASC) @ew_heap;
MinAccum<VERTEX> @min_parent; # Given a vertex v, we need to be able to send its outgoing edge info to its parent, which is only posible if we store the parent in a local accumulator.
OrAccum @or_ignore;
FILE f (file_path);
# Check weight_type parameter
IF weight_type NOT IN ("INT", "FLOAT", "DOUBLE") THEN
PRINT "weight_type must be INT, FLOAT, or DOUBLE" AS errMsg;
RETURN;
END;
all_v = {v_type_set};
### FOREST INITIALIZATION ###
# For each node v, let parent p(v) = neighbor of v connected via the least-weighted edge.
all_v = SELECT v
FROM all_v:v -(e_type_set:e)- :u
ACCUM
CASE weight_type
WHEN "INT" THEN
v.@ew_heap += EDGE_WEIGHT(e.getAttr(weight_attribute, "INT"), v,u,e, getvid(u))
WHEN "FLOAT" THEN
v.@ew_heap += EDGE_WEIGHT(e.getAttr(weight_attribute, "FLOAT"), v,u,e, getvid(u))
WHEN "DOUBLE" THEN
v.@ew_heap += EDGE_WEIGHT(e.getAttr(weight_attribute, "DOUBLE"), v,u,e, getvid(u))
END
POST-ACCUM
@@parents_map += (v -> v.@ew_heap.top().to_v),
@@sum_parent_changed_count += 1;
WHILE @@sum_parent_changed_count > 0 DO
### BREAK CYCLES ###
all_v = SELECT v
FROM all_v:v
POST-ACCUM v.@or_ignore = false;
all_v = SELECT v
FROM all_v:v
POST-ACCUM
VERTEX p = @@parents_map.get(v),
VERTEX gp = @@parents_map.get(p),
IF v != p AND v == gp THEN
IF (getvid(v) < getvid(p)) THEN
@@parents_map += (v -> v),
v.@or_ignore = TRUE
END
END;
# only add edges to MST after breaking cycles to avoid double counting edges
add_edges = SELECT v
FROM all_v:v
WHERE v.@or_ignore == false AND v.@ew_heap.size() > 0
POST-ACCUM
IF file_path != "" THEN
@@mst_set += v.@ew_heap.top()
END,
IF print_results OR result_attribute != "" THEN
@@result_set += v.@ew_heap.top().e
END;
### UPDATE PARENT POINTERS ###
@@sum_parent_changed_count = 0;
all_v = SELECT v
FROM all_v:v
POST-ACCUM
VERTEX p = @@parents_map.get(v),
VERTEX gp = @@parents_map.get(p),
IF (p != gp) THEN
@@sum_parent_changed_count += 1,
@@parents_map += (v -> gp)
END;
IF @@sum_parent_changed_count == 0 THEN
BREAK;
END;
### STAR DETECTION ###
@@star_map.clear();
# Rule 1: Let s(v) = 1 if p(v) = p(p(v))
# Only root and depth 1 vertices will have s(v) = 1. Everything else will have s(v) = 0.
all_v = SELECT v
FROM all_v:v
POST-ACCUM
VERTEX parent = @@parents_map.get(v),
IF parent == @@parents_map.get(parent) THEN
@@star_map += (v -> true)
ELSE
@@star_map += (v -> false)
END;
# Rule 2: If s(v) = 1 but v has a grandchild u such that s(u) = 0, then s(v) = 0. This will end up updating root vertices.
not_star_roots = SELECT u
FROM all_v:u
WHERE @@star_map.get(u) == false
POST-ACCUM
@@star_map += (@@parents_map.get(@@parents_map.get(u)) -> false);
# Rule 3: If s(p(v)) = 0, then s(v) = 0. This will end up updating vertices at depth 1 of trees.
not_star_depth1 = SELECT u
FROM all_v:u
WHERE @@star_map.get(@@parents_map.get(u)) == false
POST-ACCUM
@@star_map += (u -> false);
### STAR HOOKING ###
# First, we need to clear each vertex's heap and reset the local @parent.
all_v = SELECT v
FROM all_v:v
POST-ACCUM
v.@ew_heap.clear(),
v.@min_parent = @@parents_map.get(v);
star_nodes = SELECT v
FROM all_v:v -(e_type_set:e)- :u
WHERE @@star_map.get(v) == true AND v.@min_parent != u.@min_parent
ACCUM
VERTEX parent = v.@min_parent,
CASE weight_type
WHEN "INT" THEN
parent.@ew_heap += EDGE_WEIGHT(e.getAttr(weight_attribute, "INT"), v,u,e, getvid(u))
WHEN "FLOAT" THEN
parent.@ew_heap += EDGE_WEIGHT(e.getAttr(weight_attribute, "FLOAT"), v,u,e, getvid(u))
WHEN "DOUBLE" THEN
parent.@ew_heap += EDGE_WEIGHT(e.getAttr(weight_attribute, "DOUBLE"), v,u,e, getvid(u))
END;
updated_star_roots = SELECT v
FROM all_v:v
WHERE @@star_map.get(v) == true AND @@parents_map.get(v) == v AND v.@ew_heap.size() > 0
POST-ACCUM
@@parents_map += (v -> @@parents_map.get(v.@ew_heap.top().to_v));
END;
IF result_attribute != "" THEN
all_v = SELECT v
FROM all_v:v -(e_type_set:e)- :u
ACCUM
IF e IN @@result_set THEN
e.setAttr(result_attribute, TRUE)
ELSE
e.setAttr(result_attribute, FALSE)
END;
END;
IF print_results THEN
PRINT @@result_set;
END;
IF file_path != "" THEN
f.println("From", "To", "Weight");
FOREACH e IN @@mst_set DO
f.println(e.from_v, e.to_v, e.weight);
END;
END;
}