update public repo

YimingPan1997 · YimingPan1997 · commit 8de11de70ef0 · 2022-10-05T13:30:47.000-07:00
diff --git a/algorithms/Centrality/closeness/exact/tg_closeness_cent.gsql b/algorithms/Centrality/closeness/exact/tg_closeness_cent.gsql
@@ -1,4 +1,4 @@
-CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, SET<STRING> re_type,INT max_hops = 10,
+CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, STRING re_type,INT max_hops = 10,
   INT top_k = 100, BOOL wf = TRUE, BOOL print_accum = True, STRING result_attr = "",
   STRING file_path = "", BOOL display_edges = FALSE) SYNTAX V1 {
   
@@ -31,7 +31,7 @@ CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, SET<STRIN
   FILE f (file_path);
   INT num_vert;
   INT batch_number;
-# Compute closeness	
+# Compute closeness  
   all = {v_type};
   
   num_vert = all.size();
@@ -44,23 +44,23 @@ CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, SET<STRIN
                   FROM all:s
                   WHERE getvid(s)%batch_number == i
                   POST-ACCUM 
-		        @@map+=(getvid(s)->0),
+            @@map+=(getvid(s)->0),
                         @@batch_set+=getvid(s);
   
           FOREACH ver in @@batch_set DO 
               @@map+=(ver->@@sum_count); 
-	      @@sum_count+=1;
+        @@sum_count+=1;
           END; #set a unique ID for each vertex, ID from 1-63
     
           Start = SELECT s 
                   FROM Start:s 
                   POST-ACCUM 
-		         s.@sum_id=@@map.get(getvid(s));
+             s.@sum_id=@@map.get(getvid(s));
     
           Start = Select s 
                   FROM Start:s
                   POST-ACCUM 
-		         s.@bitwise_or_seen=1<<s.@sum_id,
+             s.@bitwise_or_seen=1<<s.@sum_id,
                          s.@bitwise_or_visit=1<<s.@sum_id; # set initial seen and visit s.@seen1 s.@seen2 
           @@batch_set.clear();
           @@map.clear();
@@ -92,13 +92,13 @@ CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, SET<STRIN
   END;
   
   #Output
-	IF file_path != "" THEN
-	    f.println("Vertex_ID", "Closeness");
-	END;
-	
+  IF file_path != "" THEN
+      f.println("Vertex_ID", "Closeness");
+  END;
+  
   Start = SELECT s 
           FROM all:s
-	  # Calculate Closeness Centrality for each vertex
+    # Calculate Closeness Centrality for each vertex
           WHERE s.@sum_res>0
           POST-ACCUM 
                 IF wf THEN 
@@ -107,15 +107,15 @@ CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, SET<STRIN
                     s.@sum_score = s.@sum_size*1.0/s.@sum_res*1.0 
                 END,
     
-		            IF result_attr != "" THEN 
+                IF result_attr != "" THEN 
                     s.setAttr(result_attr, s.@sum_score) 
                 END,
     
-	              IF print_accum THEN 
+                IF print_accum THEN 
                     @@top_scores_heap += Vertex_Score(s, s.@sum_score) 
                 END,
     
-		            IF file_path != "" THEN 
+                IF file_path != "" THEN 
                     f.println(s, s.@sum_score) 
                 END;
    #test
@@ -128,12 +128,14 @@ CREATE QUERY tg_closeness_cent(SET<STRING> v_type, SET<STRING> e_type, SET<STRIN
                      @@top_scores_heap += Vertex_Score(s, -1);
        PRINT @@top_scores_heap AS top_scores;
        IF display_edges THEN
-	   PRINT Start[Start.@sum_score];
-       Start = SELECT s
-	       FROM Start:s -(e_type:e)-:t
-	       ACCUM 
-                     @@edge_set += e;
-	       PRINT @@edge_set;
+          Start = SELECT s 
+                  FROM all:s 
+                  WHERE s.@sum_res>0;
+          PRINT Start[Start.@sum_score];
+          Start = SELECT s
+                  FROM Start:s -(e_type:e)-:t
+                  ACCUM @@edge_set += e;
+          PRINT @@edge_set;
        END;
     END;
 }
diff --git a/algorithms/Centrality/degree/weighted/tg_algo_weighted_degree_cent.yml b/algorithms/Centrality/degree/weighted/tg_algo_weighted_degree_cent.yml
@@ -7,10 +7,10 @@
 # [x|y...|z]  : file/directory x, y, or z
 
   algorithm:
-    name: weighted degree cent
+    name: Weighted Degree Centrality
     filename: "tg_weighted_degree_cent.gsql"
     sha_id: 4569e4faa05d46caa2c333636fe37a2fb85da4dc
-    description: "need to add"
+    description: "Calculates the weighted degree centrality for each vertex in a graph."
     version: lib3.0
     include: true
 
diff --git a/algorithms/Centrality/degree/weighted/tg_weighted_degree_cent.gsql b/algorithms/Centrality/degree/weighted/tg_weighted_degree_cent.gsql
@@ -38,20 +38,20 @@ CREATE QUERY tg_weighted_degree_cent(STRING v_type, STRING e_type, STRING re_typ
 
   Start = SELECT s 
           FROM all:s
-    POST-ACCUM
-        IF result_attr != "" THEN 
-                  s.setAttr(result_attr, s.@sum_degree_score) 
-              END,
+          POST-ACCUM
+          IF result_attr != "" THEN 
+             s.setAttr(result_attr, s.@sum_degree_score) 
+          END,
     
-        IF print_accum THEN 
-                  @@top_scores_heap += Vertex_Score(s, s.@sum_degree_score) 
-              END,
+          IF print_accum THEN 
+             @@top_scores_heap += Vertex_Score(s, s.@sum_degree_score) 
+          END,
     
-        IF file_path != "" THEN 
-                  f.println(s, s.@sum_degree_score) 
-              END;
+          IF file_path != "" THEN 
+             f.println(s, s.@sum_degree_score) 
+          END;
         
    IF print_accum THEN
        PRINT @@top_scores_heap AS top_scores;
    END;
-}
+}
diff --git a/algorithms/Centrality/eigenvector/tg_eigenvector_cent.gsql b/algorithms/Centrality/eigenvector/tg_eigenvector_cent.gsql
@@ -16,7 +16,7 @@ CREATE QUERY tg_eigenvector_cent(SET<STRING> v_type, SET<STRING> e_type, INT max
     HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap;
     SumAccum<FLOAT> @@sum_squares_eigen_values;
     SumAccum<FLOAT> @sum_received_value;
-    SumAccum<FLOAT> @sum_eigen_value = 1;
+    SumAccum<FLOAT> @sum_eigen_value = 1.0;
     SumAccum<FLOAT> @@sum_cur_norm_values;
     SumAccum<FLOAT> @@sum_prev_norm_values;
     FLOAT conv_value = 9999;
@@ -27,21 +27,21 @@ CREATE QUERY tg_eigenvector_cent(SET<STRING> v_type, SET<STRING> e_type, INT max
         @@sum_cur_norm_values = 0;
         V = SELECT s 
             FROM Start:s - (e_type:e) - v_type:t
-            ACCUM t.@sum_received_value += s.@sum_eigen_value
+            ACCUM t.@sum_received_value += s.@sum_eigen_value;
+        V = SELECT s 
+            FROM Start:s
             POST-ACCUM s.@sum_eigen_value = s.@sum_received_value,
                        @@sum_squares_eigen_values += s.@sum_eigen_value * s.@sum_eigen_value,
                        s.@sum_received_value = 0;
-        p = SELECT s 
-            FROM V:s 
-            LIMIT 10;
        
         V = SELECT s 
             FROM V:s
             POST-ACCUM s.@sum_eigen_value = s.@sum_eigen_value / sqrt(@@sum_squares_eigen_values),
                        @@sum_cur_norm_values += s.@sum_eigen_value;
+        
         conv_value = abs(@@sum_cur_norm_values - @@sum_prev_norm_values);
         @@sum_prev_norm_values = @@sum_cur_norm_values;
-    	                         
+                               
     END;
     #Output
     IF file_path != "" THEN
@@ -53,16 +53,16 @@ CREATE QUERY tg_eigenvector_cent(SET<STRING> v_type, SET<STRING> e_type, INT max
                 IF s.@sum_eigen_value==1.0 THEN 
                     s.@sum_eigen_value+=-1 
                 END
-	    POST-ACCUM 
-	        IF result_attr != "" THEN 
+      POST-ACCUM 
+          IF result_attr != "" THEN 
                     s.setAttr(result_attr, s.@sum_eigen_value) 
                 END,
       
-		IF print_accum THEN 
+    IF print_accum THEN 
                     @@top_scores_heap += Vertex_Score(s, s.@sum_eigen_value) 
                 END,
       
-		IF file_path != "" THEN 
+    IF file_path != "" THEN 
                     f.println(s, s.@sum_eigen_value) 
                 END;
 
diff --git a/algorithms/Centrality/tg_category_centrality.yml b/algorithms/Centrality/tg_category_centrality.yml
@@ -1,2 +1,2 @@
 ---
-  description: "Centrality algorithms calculate the 'importance' of each vertex given a particular metric. These metrics generally revolve around density of a vertex's connectivity or the importance of that vertex to the general connectivity of the entire graph. Some widely used examples include Betweenness Centrality, which produces scores for vertices based on the number of shortest paths that they appear in and Closeness Centrality, which measures importance inversely proportional to how 'far' the vertex is away from every other vertex."
+  description: "Centrality algorithms calculate the centrality of each vertex given a particular metric."
diff --git a/algorithms/Community/louvain/tg_louvain.gsql b/algorithms/Community/louvain/tg_louvain.gsql
@@ -8,7 +8,7 @@ CREATE QUERY tg_louvain(SET<STRING> v_type, SET<STRING> e_type, STRING wt_attr =
   Parameters:
   v_type: vertex types to traverse
   e_type: edge types to traverse
-  wt_attr: attribute name for edge weights use empty string if graph is unweighted
+  wt_attr: attribute name for edge weights use empty string is graph is unweighted
   wt_attr type is hardcoded to FLOAT INT or DOUBLE can be supported by changing all `e.getAttr(wt_attr, "FLOAT")`
   to `e.getAttr(wt_attr, "INT")` or `e.getAttr(wt_attr, "DOUBLE")`
   * note: when there is a weight attribute missmatch, there may not be an explicit error message
diff --git a/algorithms/Community/triangle_counting/fast/tg_algo_tri_count_fast.yml b/algorithms/Community/triangle_counting/fast/tg_algo_tri_count_fast.yml
@@ -12,4 +12,4 @@
     sha_id: ed6ea869749977cc0f3df71225d7325fb81c9767
     description: "The fast version of the Triangle Counting algorithm is faster than the standard version, but uses some additional memory."
     version: lib3.0
-    include: false
+    include: true
diff --git a/algorithms/Community/triangle_counting/fast/tg_tri_count_fast.gsql b/algorithms/Community/triangle_counting/fast/tg_tri_count_fast.gsql
@@ -4,25 +4,23 @@ CREATE QUERY tg_tri_count_fast(STRING v_type, STRING e_type)  SYNTAX V1 {
 # This algorithm is faster than tri_count but uses additional memory for temporary storage
 SumAccum<INT> @@sum_cnt;
 SetAccum<VERTEX> @neighbors_set;
-SumAccum<INT> @sum_outdegree;
+//SumAccum<INT> @sum_outdegree;
 all = {v_type};
 
 # We build up our neighbor lists manually because we'll only build them up on the 2 smaller vertices on a triangle. 
-tmp = SELECT t 
-      FROM all:s-(e_type)- v_type:t 
-      ACCUM s.@sum_outdegree+=1;
+
 tmp = SELECT t
       FROM all:s-(e_type)- v_type:t
-      WHERE s.@sum_outdegree > t.@sum_outdegree OR (s.@sum_outdegree == t.@sum_outdegree AND getvid(s) > getvid(t))
+      WHERE getvid(s) > getvid(t)
       ACCUM t.@neighbors_set += s;
 
 # Here we compute the intersection for 2 points on the triangle.
 tmp = SELECT t
       FROM all:s-((e_type))- :t
-      WHERE s != t
+      WHERE getvid(s) > getvid(t)
       ACCUM @@sum_cnt += COUNT(s.@neighbors_set INTERSECT t.@neighbors_set);
                    
-# Divide by 2 because every triangle was counted twice
-PRINT @@sum_cnt/2 AS num_triangles;
+# print result
+PRINT @@sum_cnt AS num_triangles;
 
 }
diff --git a/algorithms/GraphML/Embeddings/FastRP/tg_fastRP.gsql b/algorithms/GraphML/Embeddings/FastRP/tg_fastRP.gsql
@@ -1,7 +1,7 @@
 CREATE QUERY tg_fastRP(SET<STRING> v_type, SET<STRING> e_type,
               STRING weights, FLOAT beta, INT k, INT reduced_dim,
              INT sampling_constant, INT random_seed,
-             BOOL print_accum=FALSE, STRING result_attr="") SYNTAX v1{
+             BOOL print_accum=FALSE, STRING result_attr="") {
   /*
     This query produces an embedding for each vertex.
     Assumes that tg_fastRP_preprocessing has been run first.
@@ -123,9 +123,9 @@ CREATE QUERY tg_fastRP(SET<STRING> v_type, SET<STRING> e_type,
 
   IF print_accum THEN
     res = SELECT a FROM verts:a;
-    PRINT res[res.@final_embedding_list];
+    PRINT res[res.@final_embedding_arr];
   END;
   IF result_attr != "" THEN
-    storeEmbeddings = SELECT s FROM verts:s POST-ACCUM s.fastrp_embedding = s.@final_embedding_list;
+    storeEmbeddings = SELECT s FROM verts:s POST-ACCUM s.embedding = s.@final_embedding_list;
   END;
-}
+}
diff --git a/algorithms/GraphML/Embeddings/Node2Vec/README.md b/algorithms/GraphML/Embeddings/Node2Vec/README.md
@@ -1,50 +1,24 @@
 # Node2Vec
 
-Node2Vec is a vertex embedding algorithm proposed in [node2vec: Scalable Feature Learning for Networks](https://arxiv.org/abs/1607.00653?context=cs). TigerGraph splits the computation into two parts: the random walk process and the embedding training process. Assuming that you are using version 3.6 or greater of the TigerGraph database, ignore the UDF install instructions.
-
 ## [TigerGraph Node2Vec Documentation](https://docs.tigergraph.com/graph-ml/current/node-embeddings/node2vec)
 
 ## Instructions
 
-### Random Walk Process Install
-There are two different random walk processes to choose from. The first is regular random walks, implemented in `tg_random_walk.gsql`. This is equivalent to setting `p` and `q` parameters of Node2Vec both to 1, which is also equivalent to the [DeepWalk](https://arxiv.org/pdf/1403.6652.pdf) paper. This version is more performant than `tg_weighted_random_walk.gsql`, due to the less computation that is needed. If the graph is large, you may want to batch the random walk process to reduce memory consumption. Use `tg_random_walk_batch.gsql` if this is desired.
-
-The second option is weighted random walk, as described in the Node2Vec paper. This is implemented in the `tg_weighted_random_walk_sub.gsql` and `tg_weighted_random_walk.gsql`. If your TigerGraph database version is below 3.6, see the UDF installation instructions below. If the graph is large, you may want to batch the random walk process to reduce memory consumption. Use `tg_weighted_random_walk_batch.gsql` with `tg_weighted_random_walk_sub.gsql` if desired.
-
-**To install the un-weighted random walk:** copy the algorithm from `tg_random_walk.gsql` and install it on the database using the standard query install process.
-
-**To install the weighted random walk:** copy `tg_weighted_random_walk_sub.gsql` and install. Then copy and install `tg_weighted_random_walk.gsql`.
-
-### Node2Vec Embedding Install
-Once the random walks have been generated, we can use the output to train the Node2Vec model. To install, make sure the proper UDFs are installed. If you are using a TigerGraph database of version 3.6 or greater, the UDFs are pre-installed.
-
-**To install Node2Vec query:** copy the query from `tg_node2vec.gsql` and install on the database.
-
 ### Preliminary Notes
-Vim is the text editor of choice in this README, any other text editors such as Emacs or Nano will suffice in the commands listed below 
+**  Vim is the text editor of choice in this README, any other text editors such as Emacs or Nano will suffice in the commands listed below 
 \
-`<TGversion>` should be replaced with your current Tigergraph version number
-
-### UDF installation
-
-#### Weighted Random Walk UDF install
-If you are using `tg_weighted_random_walk_sub.gsql`, then you will need to install the `tg_random_udf.cpp`. **The code defined in `tg_random_udf.cpp` should be pasted inside the `UDIMPL`f namespace inside of `ExprFunctions.hpp`.
-```bash
-# open file and paste code
-
-$ vim ~/tigergraph/app/<TGversion>/dev/gdk/gsql/src/QueryUdf/ExprFunctions.hpp
-```
+**  `<TGversion>` should be replaced with your current Tigergraph version number
 
-#### Node2Vec UDF install
-`tg_node2vec_sub()` is a UDF that is called in `tg_node2vec.gsql`. \
-**The code defined in `tg_node2vec_sub.cpp` should be pasted inside the `UDIMPL` namespace inside of `ExprFunctions.hpp`
+### Getting UDF
+`node2vec()` is a user-defined function utilized in `node2vec_query.gsql` \
+**The code defined in `UDF` should be pasted inside the `UDIMPL` namespace inside of `ExprFunctions.hpp`
 ```bash
 # open file and paste code
 
 $ vim ~/tigergraph/app/<TGversion>/dev/gdk/gsql/src/QueryUdf/ExprFunctions.hpp
 ```
 
-##### Getting Word2vec file
+### Getting Word2vec file
 There are multiple options to get `word2vec.h`
 1. Download/Copy `word2vec.h` file into `~/tigergraph/app/<TGversion>/dev/gdk/gsdk/include` directory
 2. Create the file and copy the code from `word2vec.h` and paste it into the newly created file (steps shown below)
@@ -56,7 +30,7 @@ $ cd ~/tigergraph/app/<TGversion>/dev/gdk/gsdk/include/
 $ vim word2vec.h                  
 ```
 
-##### Including word2vec
+### Including word2vec
 The newly created `word2vec.h` needs to be included in the `ExprUtil.hpp` file
 ```bash
 $ vim ~/tigergraph/app/<TGversion>/dev/gdk/gsql/src/QueryUdf/ExprUtil.hpp
@@ -86,7 +60,7 @@ $ PUT ExprFunctions from "/home/tigergraph/tigergraph/app/<TGversion>/dev/gdk/gs
 ### Running Queries
 ** The following instructions can be done with GraphStudio or GSQL terminal
 1. Install the `random_walk` query
-2. Run query `random_walk` with desired parameters. Visit https://docs.tigergraph.com/graph-ml/current/node-embeddings/node2vec for a description of the random walk query parameters. Make sure that TigerGraph has the correct permissions to write to the output directory you specify.
+2. Run query `random_walk` with desired parameters. Visit https://docs.tigergraph.com/tigergraph-platform-overview/graph-algorithm-library#parameters for a description of the random walk query parameters
 3. (optional) Inspect output of random_walk query
     ```bash
     # For the default filepath parameter
diff --git a/algorithms/GraphML/Embeddings/weisfeiler_lehman/tg_algo_weisfeiler_lehman.yml b/algorithms/GraphML/Embeddings/weisfeiler_lehman/tg_algo_weisfeiler_lehman.yml
@@ -7,9 +7,9 @@
 # [x|y...|z]  : file/directory x, y, or z
 
   algorithm:
-    name: weisfeiler lehman
+    name: Weisfeiler-Lehman
     filename: "tg_weisfeiler_lehman.gsql"
     sha_id: 4569e4faa05d46caa2c333636fe37a2fb85da4dc
-    description: "need to add"
+    description: "This algorithm finds corresponding vertices in isomorphic subgraphs in a graph."
     version: lib3.0
     include: true
diff --git a/algorithms/Path/cycle_component/tg_algo_cycle_component.yml b/algorithms/Path/cycle_component/tg_algo_cycle_component.yml
@@ -7,9 +7,9 @@
 # [x|y...|z]  : file/directory x, y, or z
 
   algorithm:
-    name: cycle component
+    name: Cycle Component
     filename: "tg_cycle_component.gsql"
     sha_id: 4569e4faa05d46caa2c333636fe37a2fb85da4dc
-    description: "need to add"
+    description: "Finds components which are on cycles without printing the entire cycle."
     version: lib3.0
-    include: false
+    include: true
diff --git a/algorithms/Path/cycle_component/tg_cycle_component.gsql b/algorithms/Path/cycle_component/tg_cycle_component.gsql
diff --git a/algorithms/Path/cycle_detection/full_result/batch/tg_cycle_detection_batch.gsql b/algorithms/Path/cycle_detection/full_result/batch/tg_cycle_detection_batch.gsql
diff --git a/algorithms/Path/path_between_two_vertices/bidirection/tg_algo_all_path_bidirection.yml b/algorithms/Path/path_between_two_vertices/bidirection/tg_algo_all_path_bidirection.yml
diff --git a/algorithms/Path/path_between_two_vertices/bidirection/tg_all_path_bidirection.gsql b/algorithms/Path/path_between_two_vertices/bidirection/tg_all_path_bidirection.gsql
diff --git a/algorithms/Path/path_between_two_vertices/one_direction/tg_algo_all_path.yml b/algorithms/Path/path_between_two_vertices/one_direction/tg_algo_all_path.yml
diff --git a/algorithms/Path/path_between_two_vertices/one_direction/tg_all_path.gsql b/algorithms/Path/path_between_two_vertices/one_direction/tg_all_path.gsql

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`---`
`2`		- description: "Centrality algorithms calculate the 'importance' of each vertex given a particular metric. These metrics generally revolve around density of a vertex's connectivity or the importance of that vertex to the general connectivity of the entire graph. Some widely used examples include Betweenness Centrality, which produces scores for vertices based on the number of shortest paths that they appear in and Closeness Centrality, which measures importance inversely proportional to how 'far' the vertex is away from every other vertex."
	`2`	`+ description: "Centrality algorithms calculate the centrality of each vertex given a particular metric."`