Skip to content

Commit 8de11de

Browse files
committed
update public repo
1 parent 564391c commit 8de11de

File tree

18 files changed

+133
-136
lines changed

18 files changed

+133
-136
lines changed

algorithms/Centrality/closeness/exact/tg_closeness_cent.gsql

+22-20
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, SET<STRING> re_type,INT max_hops = 10,
1+
CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, STRING re_type,INT max_hops = 10,
22
INT top_k = 100, BOOL wf = TRUE, BOOL print_accum = True, STRING result_attr = "",
33
STRING file_path = "", BOOL display_edges = FALSE) SYNTAX V1 {
44

@@ -31,7 +31,7 @@ CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, SET<STRIN
3131
FILE f (file_path);
3232
INT num_vert;
3333
INT batch_number;
34-
# Compute closeness
34+
# Compute closeness
3535
all = {v_type};
3636

3737
num_vert = all.size();
@@ -44,23 +44,23 @@ CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, SET<STRIN
4444
FROM all:s
4545
WHERE getvid(s)%batch_number == i
4646
POST-ACCUM
47-
@@map+=(getvid(s)->0),
47+
@@map+=(getvid(s)->0),
4848
@@batch_set+=getvid(s);
4949

5050
FOREACH ver in @@batch_set DO
5151
@@map+=(ver->@@sum_count);
52-
@@sum_count+=1;
52+
@@sum_count+=1;
5353
END; #set a unique ID for each vertex, ID from 1-63
5454

5555
Start = SELECT s
5656
FROM Start:s
5757
POST-ACCUM
58-
s.@sum_id=@@map.get(getvid(s));
58+
s.@sum_id=@@map.get(getvid(s));
5959

6060
Start = Select s
6161
FROM Start:s
6262
POST-ACCUM
63-
s.@bitwise_or_seen=1<<s.@sum_id,
63+
s.@bitwise_or_seen=1<<s.@sum_id,
6464
s.@bitwise_or_visit=1<<s.@sum_id; # set initial seen and visit s.@seen1 s.@seen2
6565
@@batch_set.clear();
6666
@@map.clear();
@@ -92,13 +92,13 @@ CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, SET<STRIN
9292
END;
9393

9494
#Output
95-
IF file_path != "" THEN
96-
f.println("Vertex_ID", "Closeness");
97-
END;
98-
95+
IF file_path != "" THEN
96+
f.println("Vertex_ID", "Closeness");
97+
END;
98+
9999
Start = SELECT s
100100
FROM all:s
101-
# Calculate Closeness Centrality for each vertex
101+
# Calculate Closeness Centrality for each vertex
102102
WHERE s.@sum_res>0
103103
POST-ACCUM
104104
IF wf THEN
@@ -107,15 +107,15 @@ CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, SET<STRIN
107107
s.@sum_score = s.@sum_size*1.0/s.@sum_res*1.0
108108
END,
109109

110-
IF result_attr != "" THEN
110+
IF result_attr != "" THEN
111111
s.setAttr(result_attr, s.@sum_score)
112112
END,
113113

114-
IF print_accum THEN
114+
IF print_accum THEN
115115
@@top_scores_heap += Vertex_Score(s, s.@sum_score)
116116
END,
117117

118-
IF file_path != "" THEN
118+
IF file_path != "" THEN
119119
f.println(s, s.@sum_score)
120120
END;
121121
#test
@@ -128,12 +128,14 @@ CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, SET<STRIN
128128
@@top_scores_heap += Vertex_Score(s, -1);
129129
PRINT @@top_scores_heap AS top_scores;
130130
IF display_edges THEN
131-
PRINT Start[Start.@sum_score];
132-
Start = SELECT s
133-
FROM Start:s -(e_type:e)-:t
134-
ACCUM
135-
@@edge_set += e;
136-
PRINT @@edge_set;
131+
Start = SELECT s
132+
FROM all:s
133+
WHERE s.@sum_res>0;
134+
PRINT Start[Start.@sum_score];
135+
Start = SELECT s
136+
FROM Start:s -(e_type:e)-:t
137+
ACCUM @@edge_set += e;
138+
PRINT @@edge_set;
137139
END;
138140
END;
139141
}

algorithms/Centrality/degree/weighted/tg_algo_weighted_degree_cent.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
# [x|y...|z] : file/directory x, y, or z
88

99
algorithm:
10-
name: weighted degree cent
10+
name: Weighted Degree Centrality
1111
filename: "tg_weighted_degree_cent.gsql"
1212
sha_id: 4569e4faa05d46caa2c333636fe37a2fb85da4dc
13-
description: "need to add"
13+
description: "Calculates the weighted degree centrality for each vertex in a graph."
1414
version: lib3.0
1515
include: true
1616

algorithms/Centrality/degree/weighted/tg_weighted_degree_cent.gsql

+11-11
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,20 @@ CREATE QUERY tg_weighted_degree_cent(STRING v_type, STRING e_type, STRING re_typ
3838

3939
Start = SELECT s
4040
FROM all:s
41-
POST-ACCUM
42-
IF result_attr != "" THEN
43-
s.setAttr(result_attr, s.@sum_degree_score)
44-
END,
41+
POST-ACCUM
42+
IF result_attr != "" THEN
43+
s.setAttr(result_attr, s.@sum_degree_score)
44+
END,
4545

46-
IF print_accum THEN
47-
@@top_scores_heap += Vertex_Score(s, s.@sum_degree_score)
48-
END,
46+
IF print_accum THEN
47+
@@top_scores_heap += Vertex_Score(s, s.@sum_degree_score)
48+
END,
4949

50-
IF file_path != "" THEN
51-
f.println(s, s.@sum_degree_score)
52-
END;
50+
IF file_path != "" THEN
51+
f.println(s, s.@sum_degree_score)
52+
END;
5353

5454
IF print_accum THEN
5555
PRINT @@top_scores_heap AS top_scores;
5656
END;
57-
}
57+
}

algorithms/Centrality/eigenvector/tg_eigenvector_cent.gsql

+10-10
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ CREATE QUERY tg_eigenvector_cent(SET<STRING> v_type, SET<STRING> e_type, INT max
1616
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap;
1717
SumAccum<FLOAT> @@sum_squares_eigen_values;
1818
SumAccum<FLOAT> @sum_received_value;
19-
SumAccum<FLOAT> @sum_eigen_value = 1;
19+
SumAccum<FLOAT> @sum_eigen_value = 1.0;
2020
SumAccum<FLOAT> @@sum_cur_norm_values;
2121
SumAccum<FLOAT> @@sum_prev_norm_values;
2222
FLOAT conv_value = 9999;
@@ -27,21 +27,21 @@ CREATE QUERY tg_eigenvector_cent(SET<STRING> v_type, SET<STRING> e_type, INT max
2727
@@sum_cur_norm_values = 0;
2828
V = SELECT s
2929
FROM Start:s - (e_type:e) - v_type:t
30-
ACCUM t.@sum_received_value += s.@sum_eigen_value
30+
ACCUM t.@sum_received_value += s.@sum_eigen_value;
31+
V = SELECT s
32+
FROM Start:s
3133
POST-ACCUM s.@sum_eigen_value = s.@sum_received_value,
3234
@@sum_squares_eigen_values += s.@sum_eigen_value * s.@sum_eigen_value,
3335
s.@sum_received_value = 0;
34-
p = SELECT s
35-
FROM V:s
36-
LIMIT 10;
3736

3837
V = SELECT s
3938
FROM V:s
4039
POST-ACCUM s.@sum_eigen_value = s.@sum_eigen_value / sqrt(@@sum_squares_eigen_values),
4140
@@sum_cur_norm_values += s.@sum_eigen_value;
41+
4242
conv_value = abs(@@sum_cur_norm_values - @@sum_prev_norm_values);
4343
@@sum_prev_norm_values = @@sum_cur_norm_values;
44-
44+
4545
END;
4646
#Output
4747
IF file_path != "" THEN
@@ -53,16 +53,16 @@ CREATE QUERY tg_eigenvector_cent(SET<STRING> v_type, SET<STRING> e_type, INT max
5353
IF s.@sum_eigen_value==1.0 THEN
5454
s.@sum_eigen_value+=-1
5555
END
56-
POST-ACCUM
57-
IF result_attr != "" THEN
56+
POST-ACCUM
57+
IF result_attr != "" THEN
5858
s.setAttr(result_attr, s.@sum_eigen_value)
5959
END,
6060

61-
IF print_accum THEN
61+
IF print_accum THEN
6262
@@top_scores_heap += Vertex_Score(s, s.@sum_eigen_value)
6363
END,
6464

65-
IF file_path != "" THEN
65+
IF file_path != "" THEN
6666
f.println(s, s.@sum_eigen_value)
6767
END;
6868

Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
---
2-
description: "Centrality algorithms calculate the 'importance' of each vertex given a particular metric. These metrics generally revolve around density of a vertex's connectivity or the importance of that vertex to the general connectivity of the entire graph. Some widely used examples include Betweenness Centrality, which produces scores for vertices based on the number of shortest paths that they appear in and Closeness Centrality, which measures importance inversely proportional to how 'far' the vertex is away from every other vertex."
2+
description: "Centrality algorithms calculate the centrality of each vertex given a particular metric."

algorithms/Community/louvain/tg_louvain.gsql

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ CREATE QUERY tg_louvain(SET<STRING> v_type, SET<STRING> e_type, STRING wt_attr =
88
Parameters:
99
v_type: vertex types to traverse
1010
e_type: edge types to traverse
11-
wt_attr: attribute name for edge weights use empty string if graph is unweighted
11+
wt_attr: attribute name for edge weights use empty string is graph is unweighted
1212
wt_attr type is hardcoded to FLOAT INT or DOUBLE can be supported by changing all `e.getAttr(wt_attr, "FLOAT")`
1313
to `e.getAttr(wt_attr, "INT")` or `e.getAttr(wt_attr, "DOUBLE")`
1414
* note: when there is a weight attribute missmatch, there may not be an explicit error message

algorithms/Community/triangle_counting/fast/tg_algo_tri_count_fast.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@
1212
sha_id: ed6ea869749977cc0f3df71225d7325fb81c9767
1313
description: "The fast version of the Triangle Counting algorithm is faster than the standard version, but uses some additional memory."
1414
version: lib3.0
15-
include: false
15+
include: true

algorithms/Community/triangle_counting/fast/tg_tri_count_fast.gsql

+6-8
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,23 @@ CREATE QUERY tg_tri_count_fast(STRING v_type, STRING e_type) SYNTAX V1 {
44
# This algorithm is faster than tri_count but uses additional memory for temporary storage
55
SumAccum<INT> @@sum_cnt;
66
SetAccum<VERTEX> @neighbors_set;
7-
SumAccum<INT> @sum_outdegree;
7+
//SumAccum<INT> @sum_outdegree;
88
all = {v_type};
99

1010
# We build up our neighbor lists manually because we'll only build them up on the 2 smaller vertices on a triangle.
11-
tmp = SELECT t
12-
FROM all:s-(e_type)- v_type:t
13-
ACCUM s.@sum_outdegree+=1;
11+
1412
tmp = SELECT t
1513
FROM all:s-(e_type)- v_type:t
16-
WHERE s.@sum_outdegree > t.@sum_outdegree OR (s.@sum_outdegree == t.@sum_outdegree AND getvid(s) > getvid(t))
14+
WHERE getvid(s) > getvid(t)
1715
ACCUM t.@neighbors_set += s;
1816

1917
# Here we compute the intersection for 2 points on the triangle.
2018
tmp = SELECT t
2119
FROM all:s-((e_type))- :t
22-
WHERE s != t
20+
WHERE getvid(s) > getvid(t)
2321
ACCUM @@sum_cnt += COUNT(s.@neighbors_set INTERSECT t.@neighbors_set);
2422

25-
# Divide by 2 because every triangle was counted twice
26-
PRINT @@sum_cnt/2 AS num_triangles;
23+
# print result
24+
PRINT @@sum_cnt AS num_triangles;
2725

2826
}

algorithms/GraphML/Embeddings/FastRP/tg_fastRP.gsql

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
CREATE QUERY tg_fastRP(SET<STRING> v_type, SET<STRING> e_type,
22
STRING weights, FLOAT beta, INT k, INT reduced_dim,
33
INT sampling_constant, INT random_seed,
4-
BOOL print_accum=FALSE, STRING result_attr="") SYNTAX v1{
4+
BOOL print_accum=FALSE, STRING result_attr="") {
55
/*
66
This query produces an embedding for each vertex.
77
Assumes that tg_fastRP_preprocessing has been run first.
@@ -123,9 +123,9 @@ CREATE QUERY tg_fastRP(SET<STRING> v_type, SET<STRING> e_type,
123123

124124
IF print_accum THEN
125125
res = SELECT a FROM verts:a;
126-
PRINT res[res.@final_embedding_list];
126+
PRINT res[res.@final_embedding_arr];
127127
END;
128128
IF result_attr != "" THEN
129-
storeEmbeddings = SELECT s FROM verts:s POST-ACCUM s.fastrp_embedding = s.@final_embedding_list;
129+
storeEmbeddings = SELECT s FROM verts:s POST-ACCUM s.embedding = s.@final_embedding_list;
130130
END;
131-
}
131+
}

algorithms/GraphML/Embeddings/Node2Vec/README.md

+8-34
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,24 @@
11
# Node2Vec
22

3-
Node2Vec is a vertex embedding algorithm proposed in [node2vec: Scalable Feature Learning for Networks](https://arxiv.org/abs/1607.00653?context=cs). TigerGraph splits the computation into two parts: the random walk process and the embedding training process. Assuming that you are using version 3.6 or greater of the TigerGraph database, ignore the UDF install instructions.
4-
53
## [TigerGraph Node2Vec Documentation](https://docs.tigergraph.com/graph-ml/current/node-embeddings/node2vec)
64

75
## Instructions
86

9-
### Random Walk Process Install
10-
There are two different random walk processes to choose from. The first is regular random walks, implemented in `tg_random_walk.gsql`. This is equivalent to setting `p` and `q` parameters of Node2Vec both to 1, which is also equivalent to the [DeepWalk](https://arxiv.org/pdf/1403.6652.pdf) paper. This version is more performant than `tg_weighted_random_walk.gsql`, due to the less computation that is needed. If the graph is large, you may want to batch the random walk process to reduce memory consumption. Use `tg_random_walk_batch.gsql` if this is desired.
11-
12-
The second option is weighted random walk, as described in the Node2Vec paper. This is implemented in the `tg_weighted_random_walk_sub.gsql` and `tg_weighted_random_walk.gsql`. If your TigerGraph database version is below 3.6, see the UDF installation instructions below. If the graph is large, you may want to batch the random walk process to reduce memory consumption. Use `tg_weighted_random_walk_batch.gsql` with `tg_weighted_random_walk_sub.gsql` if desired.
13-
14-
**To install the un-weighted random walk:** copy the algorithm from `tg_random_walk.gsql` and install it on the database using the standard query install process.
15-
16-
**To install the weighted random walk:** copy `tg_weighted_random_walk_sub.gsql` and install. Then copy and install `tg_weighted_random_walk.gsql`.
17-
18-
### Node2Vec Embedding Install
19-
Once the random walks have been generated, we can use the output to train the Node2Vec model. To install, make sure the proper UDFs are installed. If you are using a TigerGraph database of version 3.6 or greater, the UDFs are pre-installed.
20-
21-
**To install Node2Vec query:** copy the query from `tg_node2vec.gsql` and install on the database.
22-
237
### Preliminary Notes
24-
Vim is the text editor of choice in this README, any other text editors such as Emacs or Nano will suffice in the commands listed below
8+
** Vim is the text editor of choice in this README, any other text editors such as Emacs or Nano will suffice in the commands listed below
259
\
26-
`<TGversion>` should be replaced with your current Tigergraph version number
27-
28-
### UDF installation
29-
30-
#### Weighted Random Walk UDF install
31-
If you are using `tg_weighted_random_walk_sub.gsql`, then you will need to install the `tg_random_udf.cpp`. **The code defined in `tg_random_udf.cpp` should be pasted inside the `UDIMPL`f namespace inside of `ExprFunctions.hpp`.
32-
```bash
33-
# open file and paste code
34-
35-
$ vim ~/tigergraph/app/<TGversion>/dev/gdk/gsql/src/QueryUdf/ExprFunctions.hpp
36-
```
10+
** `<TGversion>` should be replaced with your current Tigergraph version number
3711

38-
#### Node2Vec UDF install
39-
`tg_node2vec_sub()` is a UDF that is called in `tg_node2vec.gsql`. \
40-
**The code defined in `tg_node2vec_sub.cpp` should be pasted inside the `UDIMPL` namespace inside of `ExprFunctions.hpp`
12+
### Getting UDF
13+
`node2vec()` is a user-defined function utilized in `node2vec_query.gsql` \
14+
**The code defined in `UDF` should be pasted inside the `UDIMPL` namespace inside of `ExprFunctions.hpp`
4115
```bash
4216
# open file and paste code
4317

4418
$ vim ~/tigergraph/app/<TGversion>/dev/gdk/gsql/src/QueryUdf/ExprFunctions.hpp
4519
```
4620

47-
##### Getting Word2vec file
21+
### Getting Word2vec file
4822
There are multiple options to get `word2vec.h`
4923
1. Download/Copy `word2vec.h` file into `~/tigergraph/app/<TGversion>/dev/gdk/gsdk/include` directory
5024
2. Create the file and copy the code from `word2vec.h` and paste it into the newly created file (steps shown below)
@@ -56,7 +30,7 @@ $ cd ~/tigergraph/app/<TGversion>/dev/gdk/gsdk/include/
5630
$ vim word2vec.h
5731
```
5832

59-
##### Including word2vec
33+
### Including word2vec
6034
The newly created `word2vec.h` needs to be included in the `ExprUtil.hpp` file
6135
```bash
6236
$ vim ~/tigergraph/app/<TGversion>/dev/gdk/gsql/src/QueryUdf/ExprUtil.hpp
@@ -86,7 +60,7 @@ $ PUT ExprFunctions from "/home/tigergraph/tigergraph/app/<TGversion>/dev/gdk/gs
8660
### Running Queries
8761
** The following instructions can be done with GraphStudio or GSQL terminal
8862
1. Install the `random_walk` query
89-
2. Run query `random_walk` with desired parameters. Visit https://docs.tigergraph.com/graph-ml/current/node-embeddings/node2vec for a description of the random walk query parameters. Make sure that TigerGraph has the correct permissions to write to the output directory you specify.
63+
2. Run query `random_walk` with desired parameters. Visit https://docs.tigergraph.com/tigergraph-platform-overview/graph-algorithm-library#parameters for a description of the random walk query parameters
9064
3. (optional) Inspect output of random_walk query
9165
```bash
9266
# For the default filepath parameter

algorithms/GraphML/Embeddings/weisfeiler_lehman/tg_algo_weisfeiler_lehman.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
# [x|y...|z] : file/directory x, y, or z
88

99
algorithm:
10-
name: weisfeiler lehman
10+
name: Weisfeiler-Lehman
1111
filename: "tg_weisfeiler_lehman.gsql"
1212
sha_id: 4569e4faa05d46caa2c333636fe37a2fb85da4dc
13-
description: "need to add"
13+
description: "This algorithm finds corresponding vertices in isomorphic subgraphs in a graph."
1414
version: lib3.0
1515
include: true

algorithms/Path/cycle_component/tg_algo_cycle_component.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
# [x|y...|z] : file/directory x, y, or z
88

99
algorithm:
10-
name: cycle component
10+
name: Cycle Component
1111
filename: "tg_cycle_component.gsql"
1212
sha_id: 4569e4faa05d46caa2c333636fe37a2fb85da4dc
13-
description: "need to add"
13+
description: "Finds components which are on cycles without printing the entire cycle."
1414
version: lib3.0
15-
include: false
15+
include: true

0 commit comments

Comments
 (0)