diff --git a/.gitignore b/.gitignore
index 86b0401..4922a64 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,8 @@ target/*
/.gitignore
/.settings
/.classpath
+/metrics
+/storage
+/results
+/.idea/
+/graphdb-benchmarks.iml
diff --git a/README.md b/README.md
index d1a1956..d39b4c6 100644
--- a/README.md
+++ b/README.md
@@ -1,30 +1,71 @@
graphdb-benchmarks
==================
-The project graphdb-benchmarks is a benchmark between popular graph dataases. Currently the framework supports [Titan](http://thinkaurelius.github.io/titan/), [OrientDB](http://www.orientechnologies.com/orientdb/), [Neo4j](http://neo4j.com/) and [Sparksee](http://www.sparsity-technologies.com/). The purpose of this benchmark is to examine the performance of each graph database in terms of execution time. The benchmark is composed of four workloads, Clustering, Massive Insertion, Single Insertion and Query Workload. Every workload has been designed to simulate common operations in graph database systems.
-
-- *Clustering Workload (CW)*: CW consists of a well-known community detection algorithm for modularity optimization, the Louvain Method. We adapt the algorithm on top of the benchmarked graph databases and employ cache techniques to take advantage of both graph database capabilities and in-memory execution speed. We measure the time the algorithm needs to converge.
-- *Massive Insertion Workload (MIW)*: we create the graph database and configure it for massive loading, then we populate it with a particular dataset. We measure the time for the creation of the whole graph.
-- *Single Insertion Workload (SIW)*: we create the graph database and load it with a particular dataset. Every object insertion (node or edge) is committed directly and the graph is constructed incrementally. We measure the insertion time per block, which consists of one thousand edges and the nodes that appear during the insertion of these edges.
+The project graphdb-benchmarks is a benchmark between popular graph databases.
+Currently the framework supports [Titan](http://thinkaurelius.github.io/titan/),
+[OrientDB](http://www.orientechnologies.com/orientdb/), and [Neo4j](http://neo4j.com/).
+The purpose of this benchmark is to examine the performance of each graph database in
+terms of execution time. The benchmark is composed of four workloads, Clustering, Massive
+Insertion, Single Insertion and Query Workload. Every workload has been designed to
+simulate common operations in graph database systems.
+
+- *Clustering Workload (CW)*: CW consists of a well-known community detection algorithm
+for modularity optimization, the Louvain Method. We adapt the algorithm on top of the
+benchmarked graph databases and employ cache techniques to take advantage of both graph
+database capabilities and in-memory execution speed. We measure the time the algorithm
+needs to converge.
+- *Massive Insertion Workload (MIW)*: we create the graph database and configure it for
+massive loading, then we populate it with a particular data set. We measure the time for
+the creation of the whole graph.
+- *Single Insertion Workload (SIW)*: we create the graph database and load it with a
+particular data set. Every object insertion (node or edge) is committed directly and
+the graph is constructed incrementally. We measure the insertion time per block, which
+consists of one thousand edges and the nodes that appear during the insertion of these
+edges.
- *Query Workload (QW)*: we execute three common queries:
- * FindNeighbours (FN): finds the neighbours of all nodes.
+ * FindNeighbours (FN): finds the neighbors of all nodes.
* FindAdjacentNodes (FA): finds the adjacent nodes of all edges.
- * FindShortestPath (FS): finds the shortest path between the first node and 100 randomly picked nodes.
+ * FindShortestPath (FS): finds the shortest path between a random node and 100 other random nodes.
Here we measure the execution time of each query.
-For our evaluation we use both synthetic and real data. More specifically, we execute MIW, SIW and QW with real data derived from the SNAP dataset collection ([Enron Dataset](http://snap.stanford.edu/data/email-Enron.html), [Amazon dataset](http://snap.stanford.edu/data/amazon0601.html), [Youtube dataset](http://snap.stanford.edu/data/com-Youtube.html) and [LiveJournal dataset](http://snap.stanford.edu/data/com-LiveJournal.html)). On the other hand, with the CW we use synthetic data generated with the [LFR-Benchmark generator](https://sites.google.com/site/andrealancichinetti/files) which produces networks with power-law degree distribution and implanted communities within the network. The synthetic data can be downloaded form [here](http://figshare.com/articles/Synthetic_Data_for_graphdb_benchmark/1221760).
-
-For further information about the study please refer to the [published paper](http://link.springer.com/chapter/10.1007/978-3-319-10518-5_1) on Springer site and the presentation on [Slideshare](http://www.slideshare.net/sympapadopoulos/adbis2014-presentation).
-
-**Note 1:** The published paper contains the experimental study of Titan, OrientDB and Neo4j. After the publication we included the Sparksee graph database.
-
-**Note 2:** After the very useful comments and contributions of OrientDB developers, we updated the benchmark implementations and re-run the experiments. We have updated the initial presentation with the new results and uploaded a new version of the paper in the following [link](http://mklab.iti.gr/files/beis_adbis2014_corrected.pdf).
-
-**Note 3:** Alexander Patrikalakis, a software developer at Amazon Web Services, refactored the benchmark, added support for Blueprints 2.5 and added support for the DynamoDB Storage Backend for Titan.
+For our evaluation we use both synthetic and real data. More specifically, we execute
+MIW, SIW and QW with real data derived from the SNAP data set collection
+([Enron data set](http://snap.stanford.edu/data/email-Enron.html),
+[Amazon data set](http://snap.stanford.edu/data/amazon0601.html),
+[Youtube data set](http://snap.stanford.edu/data/com-Youtube.html) and
+[LiveJournal data set](http://snap.stanford.edu/data/com-LiveJournal.html)). On the
+other hand, with the CW we use synthetic data generated with the
+[LFR-Benchmark generator](https://sites.google.com/site/andrealancichinetti/files) which
+produces networks with power-law degree distribution and implanted communities within the
+network. The synthetic data can be downloaded from
+[here](http://figshare.com/articles/Synthetic_Data_for_graphdb_benchmark/1221760).
+
+For further information about the study please refer to the
+[published paper](http://link.springer.com/chapter/10.1007/978-3-319-10518-5_1) on
+Springer site and the presentation on
+[Slideshare](http://www.slideshare.net/sympapadopoulos/adbis2014-presentation).
+
+**Note 1:** The published paper contains the experimental study of Titan, OrientDB and Neo4j.
+After the publication we included the Sparksee graph database. Sparksee does not implement TinkerPop 3 yet.
+
+**Note 2:** After the very useful comments and contributions of OrientDB developers, we
+updated the benchmark implementations and re-run the experiments. We have updated the
+initial presentation with the new results and uploaded a new version of the paper in the
+following [link](http://mklab.iti.gr/files/beis_adbis2014_corrected.pdf).
+
+**Note 3:** Alexander Patrikalakis, a software developer at Amazon Web Services, refactored
+the benchmark, added support for Apache TinkerPop 3 and added support for the DynamoDB Storage
+Backend for Titan. He also added support for the Tupl Storage Backend for Titan.
Instructions
------------
-To run the project at first you have to choose one of the aforementioned datasets. Of course you can select any dataset, but because there is not any utility class to convert the dataset in the appropriate format (for now), the format of the data must be identical with the tested datasets. The input parameters are configured from the src/test/resources/input.properties file. Please follow the instructions in this file to select the correct parameters. Then, run `mvn dependency:copy-dependencies && mvn test -Pbench` to execute the benchmarking run.
+To run the project at first you have to choose one of the aforementioned data sets. Of
+course you can select any data set, but because there is not any utility class to
+convert the data set in the appropriate format (for now), the format of the data must
+be identical with the tested data sets. The input parameters are configured from the
+src/test/resources/input.properties file. Please follow the instructions in this file
+to select the correct parameters. Then, run `mvn install && mvn test -Pbench` to execute the
+benchmarking run.
Results
-------
@@ -88,32 +129,82 @@ Below we list the results of the CW for graphs with 1,000, 5,000, 10,0000, 20,00
####MIW & QW results
Below we list the results of MIW and QW for each dataset.
-
-| Dataset | Workload | Titan | OrientDB | Neo4j |
-| ------- | -------- | ----- | -------- | ----- |
-| EN | MIW |9.36 |62.77 |**6.77** |
-| AM | MIW |34.00 |97.00 |**10.61** |
-| YT | MIW |104.27 |252.15 |**24.69** |
-| LJ | MIW |663.03 |9416.74 |**349.55**|
-| |
-| EN | QW-FN |1.87 |**0.56** |0.95 |
-| AM | QW-FN |6.47 |3.50 |**1.85** |
-| YT | QW-FN |20.71 |9.34 |**4.51** |
-| LJ | QW-FN |213.41 |303.09 |**47.07** |
-| |
-| EN | QW-FA |3.78 |0.71 |**0.16** |
-| AM | QW-FA |13.77 |2.30 |**0.36** |
-| YT | QW-FA |42.82 |6.15 |**1.46** |
-| LJ | QW-FA |460.25 |518.12 |**16.53** |
-| |
-| EN | QW-FS |1.63 |3.09 |**0.16** |
-| AM | QW-FS |0.12 |83.29 |**0.302** |
-| YT | QW-FS |24.87 |23.47 |**0.08** |
-| LJ | QW-FS |123.50 |86.87 |**18.13** |
-
+The results are measured in seconds.
+
+| Dataset | Workload | Titan-BDB | Titan-Tupl | Neo4j |
+| ------- | -------- | ---------- | ---------- | ----------- |
+| 1k | QW-FA | 0.331 | 0.104 | **0.043** |
+| 5k | QW-FA | 2.235 | 0.645 | **0.203** |
+| 10k | QW-FA | 5.059 | 1.182 | **0.389** |
+| EN | QW-FA | 5.842 | 1.653 | **0.403** |
+| 20k | QW-FA | 10.568 | 2.521 | **0.826** |
+| 30k | QW-FA | 18.356 | 4.638 | **1.383** |
+| 40k | QW-FA | 27.907 | 7.107 | **2.010** |
+| 50k | QW-FA | 34.284 | 9.521 | **2.472** |
+| AM | QW-FA | 61.811 | 19.015 | **3.413** |
+| | | | | |
+| 1k | QW-FN | 0.607 | 0.229 | **0.131** |
+| 5k | QW-FN | 2.850 | 0.964 | **0.626** |
+| 10k | QW-FN | 5.960 | 2.063 | **1.349** |
+| EN | QW-FN | 7.711 | 3.915 | **1.633** |
+| 20k | QW-FN | 12.861 | 5.218 | **2.841** |
+| 30k | QW-FN | 21.816 | 8.340 | **4.603** |
+| 40k | QW-FN | 31.187 | 11.632 | **7.272** |
+| 50k | QW-FN | 41.175 | 14.742 | **8.489** |
+| AM | QW-FN | 76.562 | 28.242 | **12.466** |
+| | | | | |
+| 1k | QW-FS | 2.932 | 2.555 | |
+| 5k | QW-FS | 18.743 | 17.995 | |
+| 10k | QW-FS | 31.006 | 30.289 | |
+| EN | QW-FS | | | |
+| 20k | QW-FS | 122.864 | 122.204 | |
+| 30k | QW-FS | 136.276 | 124.886 | |
+| 40k | QW-FS | 276.389 | 261.699 | |
+| 50k | QW-FS | 339.146 | 310.307 | |
+| AM | QW-FS | | | |
+| | | | | |
+| 1k | MIW | 1.204 | 0.696 | **0.481** |
+| 5k | MIW | 4.293 | 2.755 | **1.239** |
+| 10k | MIW | 8.291 | 5.707 | **2.334** |
+| EN | MIW | 9.858 | 6.960 | **2.401** |
+| 20k | MIW | 16.872 | 11.829 | **4.511** |
+| 30k | MIW | 29.851 | 20.081 | **8.767** |
+| 40k | MIW | 44.257 | 34.078 | **12.761** |
+| 50k | MIW | 57.001 | 35.008 | **15.755** |
+| AM | MIW | 98.405 | 64.286 | **23.867** |
+
+Note, Find Shortest Path benchmark is currently broken.
+Consequently, I did not update the QW-FS numbers.
+Also, OrientDB's TP3 implementation is not official yet,
+so I did not run numbers for OrientDB as well.
+These benchmarks were performed on the RAM disk (/dev/shm) of a m4.10xlarge with a maximum heap size of 32 GiB.
+
+I also analyzed storage the footprint of each of these databases.
+I conclude that the storage footprint in MiB for all of them is linear with respect
+to the number of vertices and edges.
+
+| Dataset | Vertices | Edges | Titan-BDB | Titan-Tupl | Neo4j |
+| ------- | -------- | ------- | --------- | ---------- | --------- |
+| 1k | 1000 | 15160 | 1.7 | **0.9** | 1.0 |
+| 5k | 5000 | 148198 | 16.0 | 7.5 | **5.7** |
+| 10k | 10000 | 360632 | 38.4 | 18.3 | **13.1** |
+| EN | 36692 | 367666 | 43.8 | 21.4 | **15.7** |
+| 20k | 20000 | 778900 | 83.8 | 42.0 | **27.7** |
+| 30k | 30000 | 1332020 | 145.4 | 73.8 | **46.8** |
+| 40k | 40000 | 2013894 | 221.2 | 111.3 | **70.2** |
+| 50k | 50000 | 2512092 | 277.0 | 138.1 | **87.3** |
+| AM | 403394 | 3387392 | 441.2 | 213.3 | **147.4** |
+
+Applying least squares optimization to this multitude of data points yields the following estimates of space
+in bytes required to store each vertex and edge (assuming no labels and no properties).
+
+| Bytes on disk | Titan-BDB | Titan-Tupl | Neo4j |
+| ------------- | --------- | ---------- | --------- |
+| Per vertex | 0.212 | **0.082** | 0.090 |
+| Per edge | 0.108 | 0.055 | **0.034** |
####SIW results
-Below we list the results of SIW for each dataset.
+Below we list the results of SIW for each data set.

+
+ 1.10.57
-
- org.antlr
- antlr-runtime
- 3.2
-
com.google.guava
guava
- 14.0.1
-
-
- colt
- colt
- 1.2.0
-
-
- commons-codec
- commons-codec
- 1.7
+ 18.0
org.apache.commons
@@ -110,53 +98,34 @@
org.apache.logging.log4j
- log4j-core
+ log4j-1.2-api
${log4j2.version}
- org.apache.geronimo.specs
- geronimo-jta_1.1_spec
- 1.1.1
-
-
- com.tinkerpop.gremlin
- gremlin-groovy
- ${blueprints.version}
-
-
- com.tinkerpop.gremlin
- gremlin-java
- ${blueprints.version}
-
-
- com.github.stephenc.high-scale-lib
- high-scale-lib
- 1.1.2
-
-
- com.carrotsearch
- hppc
- 0.4.2
-
-
- com.sleepycat
- je
- 5.0.73
+ org.apache.logging.log4j
+ log4j-core
+ ${log4j2.version}
- net.java.dev.jna
- jna
- 4.0.0
+ org.slf4j
+ slf4j-log4j12
+ 1.7.12
org.apache.lucene
lucene-core
3.6.2
+
+ org.apache.tinkerpop
+ neo4j-gremlin
+ ${tinkerpop.version}
+
+
org.neo4j
- neo4j-cypher
- ${neo4j.version}
+ neo4j-tinkerpop-api-impl
+ 0.3-2.3.2
org.neo4j
@@ -164,45 +133,9 @@
${neo4j.version}
- com.tinkerpop.blueprints
- blueprints-neo4j2-graph
- ${blueprints.version}
-
-
- ch.qos.logback
- logback-classic
-
-
-
-
- com.orientechnologies
- orientdb-graphdb
- ${orientdb.version}
-
-
- com.tinkerpop
- pipes
- ${blueprints.version}
-
-
- org.slf4j
- slf4j-api
- 1.7.5
-
-
- org.slf4j
- slf4j-log4j12
- 1.7.5
-
-
- org.iq80.snappy
- snappy
- 0.3
-
-
- com.spatial4j
- spatial4j
- 0.3
+ org.apache.tinkerpop
+ gremlin-core
+ ${tinkerpop.version}
com.thinkaurelius.titan
@@ -213,25 +146,12 @@
com.thinkaurelius.titan
titan-cassandra
${titan.version}
-
com.thinkaurelius.titan
titan-hbase
${titan.version}
-
- com.thinkaurelius.titan
- titan-core
- ${titan.version}
-
org.apache.hbase
hbase-client
@@ -239,29 +159,24 @@
com.amazonaws
- dynamodb-titan054-storage-backend
+ dynamodb-titan100-storage-backend
${dynamodb.titan.version}
- com.sparsity
- sparkseejava
- 5.0.0
-
-
- com.tinkerpop.blueprints
- blueprints-sparksee-graph
- ${blueprints.version}
+ jp.classmethod
+ tupl-titan100-storage-backend
+ 1.0.1
junit
junit
- 4.11
+ 4.12
test
- com.codahale.metrics
- metrics-core
- ${metrics.version}
+ com.github.amcp
+ orientdb-gremlin
+ ${tinkerpop.version}.0
@@ -342,16 +257,16 @@
org.apache.maven.plugins
maven-dependency-plugin
- 2.2
+ ${dependency.plugin.version}
copy-dependencies
- package
+ process-test-resources
copy-dependencies
- ${project.build.directory}/dependency
+ ${project.build.directory}/dependencies
false
false
true
@@ -396,7 +311,7 @@
maven-assembly-plugin
- 2.5.3
+ ${maven.assembly.version}
src/assembly/component.xml
@@ -432,6 +347,7 @@
**/GraphDatabaseBenchmarkTest.java
+ -Xmx32g -ea
false
${basedir}/src/test/resources/META-INF/log4j2.xml
@@ -444,22 +360,23 @@
+
+
+ jitpack.io
+ https://jitpack.io
+
+
- org.apache.httpcomponents
- httpclient
- 4.3.6
-
-
- org.apache.httpcomponents
- httpcore
- 4.3.3
+ com.amazonaws
+ aws-java-sdk-dynamodb
+ ${aws.java.sdk.version}
- joda-time
- joda-time
- 2.8.1
+ com.amazonaws
+ DynamoDBLocal
+ ${aws.java.sdk.version}
diff --git a/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java
index 0d7bdd1..208f428 100644
--- a/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java
+++ b/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java
@@ -35,24 +35,12 @@
public class ClusteringBenchmark extends BenchmarkBase implements RequiresGraphData
{
private static final Logger LOG = LogManager.getLogger();
- private final List cacheValues;
+ private final List cachePercentages;
public ClusteringBenchmark(BenchmarkConfiguration config)
{
super(config, BenchmarkType.CLUSTERING);
- this.cacheValues = new ArrayList();
- if (config.getCacheValues() == null)
- {
- int cacheValueMultiplier = (int) config.getCacheIncrementFactor().intValue() * config.getNodesCount();
- for (int i = 1; i <= config.getCacheValuesCount(); i++)
- {
- cacheValues.add(i * cacheValueMultiplier);
- }
- }
- else
- {
- cacheValues.addAll(config.getCacheValues());
- }
+ this.cachePercentages = new ArrayList(config.getCachePercentages());
}
@Override
@@ -93,27 +81,26 @@ public void startBenchmarkInternal()
private SortedMap clusteringBenchmark(GraphDatabaseType type) throws ExecutionException
{
- GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
- graphDatabase.open();
+ GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type, false /*batchLoading*/);
SortedMap timeMap = new TreeMap();
- for (int cacheSize : cacheValues)
+ for (int cachePercentage : cachePercentages)
{
LOG.info("Graph Database: " + type.getShortname() + ", Dataset: " + bench.getDataset().getName()
- + ", Cache Size: " + cacheSize);
+ + ", Cache Size: " + cachePercentage);
- Stopwatch watch = new Stopwatch();
- watch.start();
- LouvainMethod louvainMethodCache = new LouvainMethod(graphDatabase, cacheSize, bench.randomizedClustering());
+ Stopwatch watch = Stopwatch.createStarted();
+ LouvainMethod louvainMethodCache = new LouvainMethod(graphDatabase, cachePercentage,
+ bench.randomizedClustering() ? bench.getRandom() : null);
louvainMethodCache.computeModularity();
- timeMap.put(cacheSize, watch.elapsed(TimeUnit.MILLISECONDS) / 1000.0);
+ timeMap.put(cachePercentage, watch.elapsed(TimeUnit.MILLISECONDS) / 1000.0);
// evaluation with NMI
- Map> predictedCommunities = graphDatabase.mapCommunities(louvainMethodCache.getN());
+ Map> predictedCommunities = graphDatabase.mapCommunities(louvainMethodCache.getNodeCount());
Map> actualCommunities = mapNodesToCommunities(Utils.readTabulatedLines(
bench.getActualCommunitiesFile(), 4 /* numberOfLinesToSkip */));
Metrics metrics = new Metrics();
- double NMI = metrics.normalizedMutualInformation(bench.getNodesCount(), actualCommunities,
+ double NMI = metrics.normalizedMutualInformation(louvainMethodCache.getNodeCount(), actualCommunities,
predictedCommunities);
LOG.info("NMI value: " + NMI);
}
diff --git a/src/main/java/eu/socialsensor/benchmarks/DeleteGraphBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/DeleteGraphBenchmark.java
index 2c00564..7af3f50 100644
--- a/src/main/java/eu/socialsensor/benchmarks/DeleteGraphBenchmark.java
+++ b/src/main/java/eu/socialsensor/benchmarks/DeleteGraphBenchmark.java
@@ -24,8 +24,7 @@ public DeleteGraphBenchmark(BenchmarkConfiguration bench)
@Override
public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
{
- Stopwatch watch = new Stopwatch();
- watch.start();
+ Stopwatch watch = Stopwatch.createStarted();
Utils.deleteDatabase(type, bench);
times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
}
diff --git a/src/main/java/eu/socialsensor/benchmarks/FindNeighboursOfAllNodesBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/FindNeighboursOfAllNodesBenchmark.java
index 3c24ffa..dd041bf 100644
--- a/src/main/java/eu/socialsensor/benchmarks/FindNeighboursOfAllNodesBenchmark.java
+++ b/src/main/java/eu/socialsensor/benchmarks/FindNeighboursOfAllNodesBenchmark.java
@@ -26,10 +26,8 @@ public FindNeighboursOfAllNodesBenchmark(BenchmarkConfiguration config)
@Override
public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
{
- GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
- graphDatabase.open();
- Stopwatch watch = new Stopwatch();
- watch.start();
+ GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type, false /*batchLoading*/);
+ Stopwatch watch = Stopwatch.createStarted();
graphDatabase.findAllNodeNeighbours();
graphDatabase.shutdown();
times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
diff --git a/src/main/java/eu/socialsensor/benchmarks/FindNodesOfAllEdgesBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/FindNodesOfAllEdgesBenchmark.java
index a201e42..6de1e84 100644
--- a/src/main/java/eu/socialsensor/benchmarks/FindNodesOfAllEdgesBenchmark.java
+++ b/src/main/java/eu/socialsensor/benchmarks/FindNodesOfAllEdgesBenchmark.java
@@ -26,10 +26,8 @@ public FindNodesOfAllEdgesBenchmark(BenchmarkConfiguration config)
@Override
public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
{
- GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
- graphDatabase.open();
- Stopwatch watch = new Stopwatch();
- watch.start();
+ GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type, false /*batchLoading*/);
+ Stopwatch watch = Stopwatch.createStarted();
graphDatabase.findNodesOfAllEdges();
graphDatabase.shutdown();
times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
diff --git a/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java
index bc78ce0..cc4fa04 100644
--- a/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java
+++ b/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java
@@ -7,6 +7,7 @@
import eu.socialsensor.main.GraphDatabaseType;
import eu.socialsensor.utils.Utils;
+import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
@@ -21,23 +22,17 @@
public class FindShortestPathBenchmark extends PermutingBenchmarkBase implements RequiresGraphData
{
- private final Set generatedNodes;
-
public FindShortestPathBenchmark(BenchmarkConfiguration config)
{
super(config, BenchmarkType.FIND_SHORTEST_PATH);
- generatedNodes = DatasetFactory.getInstance().getDataset(config.getDataset())
- .generateRandomNodes(config.getRandomNodes());
}
@Override
public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
{
- GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
- graphDatabase.open();
- Stopwatch watch = new Stopwatch();
- watch.start();
- graphDatabase.shortestPaths(generatedNodes);
+ GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type, false /*batchLoading*/);
+ Stopwatch watch = Stopwatch.createStarted();
+ graphDatabase.shortestPaths();
graphDatabase.shutdown();
times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
}
diff --git a/src/main/java/eu/socialsensor/benchmarks/MassiveInsertionBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/MassiveInsertionBenchmark.java
index 3bfb4d8..fcf30cd 100644
--- a/src/main/java/eu/socialsensor/benchmarks/MassiveInsertionBenchmark.java
+++ b/src/main/java/eu/socialsensor/benchmarks/MassiveInsertionBenchmark.java
@@ -33,15 +33,10 @@ public MassiveInsertionBenchmark(BenchmarkConfiguration config)
public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
{
logger.debug("Creating database instance for type " + type.getShortname());
- GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
+ GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type, true /*batchLoading*/);
logger.debug("Prepare database instance for type {} for massive loading", type.getShortname());
- // the following step includes provisioning in managed database
- // services. do not measure this time as
- // it is not related to the action of inserting.
- graphDatabase.createGraphForMassiveLoad();
logger.debug("Massive load graph in database type {}", type.getShortname());
- Stopwatch watch = new Stopwatch();
- watch.start();
+ Stopwatch watch = Stopwatch.createStarted();
graphDatabase.massiveModeLoading(bench.getDataset());
logger.debug("Shutdown massive graph in database type {}", type.getShortname());
graphDatabase.shutdownMassiveGraph();
diff --git a/src/main/java/eu/socialsensor/benchmarks/PermutingBenchmarkBase.java b/src/main/java/eu/socialsensor/benchmarks/PermutingBenchmarkBase.java
index 9b857ae..9d3cc4b 100644
--- a/src/main/java/eu/socialsensor/benchmarks/PermutingBenchmarkBase.java
+++ b/src/main/java/eu/socialsensor/benchmarks/PermutingBenchmarkBase.java
@@ -38,8 +38,6 @@ protected PermutingBenchmarkBase(BenchmarkConfiguration bench, BenchmarkType typ
@Override
public void startBenchmarkInternal()
{
- LOG.info(String.format("Executing %s Benchmark . . . .", type.longname()));
-
if (bench.permuteBenchmarks())
{
PermutationIterator iter = new PermutationIterator(
@@ -57,7 +55,7 @@ public void startBenchmarkInternal()
startBenchmarkInternalOnePermutation(bench.getSelectedDatabases(), 1);
}
- LOG.info(String.format("%s Benchmark finished", type.longname()));
+ LOG.info(String.format("%s Benchmark Finished", type.longname()));
post();
}
diff --git a/src/main/java/eu/socialsensor/benchmarks/SingleInsertionBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/SingleInsertionBenchmark.java
index f2cd557..f0a6a5b 100644
--- a/src/main/java/eu/socialsensor/benchmarks/SingleInsertionBenchmark.java
+++ b/src/main/java/eu/socialsensor/benchmarks/SingleInsertionBenchmark.java
@@ -47,8 +47,7 @@ public void post()
@Override
public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
{
- GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
- graphDatabase.createGraphForSingleLoad();
+ GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type, false /*batchLoading*/);
graphDatabase.singleModeLoading(bench.getDataset(), bench.getResultsPath(), scenarioNumber);
graphDatabase.shutdown();
}
diff --git a/src/main/java/eu/socialsensor/clustering/Cache.java b/src/main/java/eu/socialsensor/clustering/Cache.java
index 5f76615..51ad50c 100644
--- a/src/main/java/eu/socialsensor/clustering/Cache.java
+++ b/src/main/java/eu/socialsensor/clustering/Cache.java
@@ -36,8 +36,10 @@ public class Cache
LoadingCache nodeToCommunityMap; // key=nodeId
// value=communityId
- public Cache(final GraphDatabase,?,?,?> graphDatabase, int cacheSize) throws ExecutionException
+ public Cache(final GraphDatabase,?,?,?> graphDatabase, int cachePercentage, int nodeCount) throws ExecutionException
{
+ final int cacheSize = Math.max(0, Math.min(nodeCount,
+ Math.round(((float) cachePercentage) / 100.0f * nodeCount)));
nodeNeighbours = CacheBuilder.newBuilder().maximumSize(cacheSize)
.build(new CacheLoader>() {
public Set load(Integer nodeId)
diff --git a/src/main/java/eu/socialsensor/clustering/LouvainMethod.java b/src/main/java/eu/socialsensor/clustering/LouvainMethod.java
index 8b05957..fd5ea51 100644
--- a/src/main/java/eu/socialsensor/clustering/LouvainMethod.java
+++ b/src/main/java/eu/socialsensor/clustering/LouvainMethod.java
@@ -17,32 +17,32 @@
*/
public class LouvainMethod
{
- boolean isRandomized;
+ private final Random random;
private double resolution = 1.0;
private double graphWeightSum;
- private int N;
+ private int nodeCount;
private List communityWeights;
private boolean communityUpdate = false;
GraphDatabase,?,?,?> graphDatabase;
Cache cache;
- public LouvainMethod(GraphDatabase,?,?,?> graphDatabase, int cacheSize, boolean isRandomized) throws ExecutionException
+ public LouvainMethod(GraphDatabase,?,?,?> graphDatabase, int cachePercentage, Random random) throws ExecutionException
{
this.graphDatabase = graphDatabase;
- this.isRandomized = isRandomized;
+ this.random = random;
initialize();
- cache = new Cache(graphDatabase, cacheSize);
+ cache = new Cache(graphDatabase, cachePercentage, nodeCount);
}
private void initialize()
{
- this.N = this.graphDatabase.getNodeCount();// this step takes a long
+ this.nodeCount = this.graphDatabase.getNodeCount();// this step takes a long
// time on dynamodb.
this.graphWeightSum = this.graphDatabase.getGraphWeightSum() / 2;
- this.communityWeights = new ArrayList(this.N);
- for (int i = 0; i < this.N; i++)
+ this.communityWeights = new ArrayList(this.nodeCount);
+ for (int i = 0; i < this.nodeCount; i++)
{
this.communityWeights.add(0.0);
}
@@ -52,7 +52,6 @@ private void initialize()
public void computeModularity() throws ExecutionException
{
- Random rand = new Random();
boolean someChange = true;
while (someChange)
{
@@ -62,12 +61,12 @@ public void computeModularity() throws ExecutionException
{
localChange = false;
int start = 0;
- if (this.isRandomized)
+ if (null != this.random)
{
- start = Math.abs(rand.nextInt()) % this.N;
+ start = Math.abs(random.nextInt()) % this.nodeCount;
}
int step = 0;
- for (int i = start; step < this.N; i = (i + 1) % this.N)
+ for (int i = start; step < this.nodeCount; i = (i + 1) % this.nodeCount)
{
step++;
int bestCommunity = updateBestCommunity(i);
@@ -137,18 +136,18 @@ private double q(int nodeCommunity, int community) throws ExecutionException
public void zoomOut()
{
- this.N = this.graphDatabase.reInitializeCommunities();
+ this.nodeCount = this.graphDatabase.reInitializeCommunities();
this.cache.reInitializeCommunities();
- this.communityWeights = new ArrayList(this.N);
- for (int i = 0; i < this.N; i++)
+ this.communityWeights = new ArrayList(this.nodeCount);
+ for (int i = 0; i < this.nodeCount; i++)
{
this.communityWeights.add(graphDatabase.getCommunityWeight(i));
}
}
- public int getN()
+ public int getNodeCount()
{
- return this.N;
+ return this.nodeCount;
}
}
\ No newline at end of file
diff --git a/src/main/java/eu/socialsensor/dataset/Dataset.java b/src/main/java/eu/socialsensor/dataset/Dataset.java
index 47dbb1b..6034f3e 100644
--- a/src/main/java/eu/socialsensor/dataset/Dataset.java
+++ b/src/main/java/eu/socialsensor/dataset/Dataset.java
@@ -1,11 +1,7 @@
package eu.socialsensor.dataset;
import java.io.File;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
import org.apache.commons.math3.util.MathArrays;
@@ -19,37 +15,32 @@
public class Dataset implements Iterable>
{
private final List> data;
+ private final List generatedNodes;
- public Dataset(File datasetFile)
+ public Dataset(File datasetFile, Random random, int randomNodeSetSize)
{
data = Utils.readTabulatedLines(datasetFile, 4 /* numberOfLinesToSkip */);
- }
-
- public Set generateRandomNodes(int numRandomNodes)
- {
- Set nodes = new HashSet();
- for (List line : data.subList(4, data.size()))
- {
- for (String nodeId : line)
- {
- nodes.add(nodeId.trim());
- }
+ final Set nodes = new HashSet<>();
+ //read node strings and convert to Integers and add to HashSet
+ data.stream().forEach(line -> { //TODO evaluate parallelStream
+ line.stream().forEach(nodeId -> {
+ nodes.add(Integer.valueOf(nodeId.trim()));
+ });
+ });
+ if(randomNodeSetSize > nodes.size()) {
+ throw new IllegalArgumentException("cant select more random nodes than there are unique nodes in dataset");
}
- List nodeList = new ArrayList(nodes);
- int[] nodeIndexList = new int[nodeList.size()];
- for (int i = 0; i < nodeList.size(); i++)
- {
- nodeIndexList[i] = i;
- }
- MathArrays.shuffle(nodeIndexList);
+ //shuffle
+ final List nodeList = new ArrayList<>(nodes);
+ Collections.shuffle(nodeList, random);
- Set generatedNodes = new HashSet();
- for (int i = 0; i < numRandomNodes; i++)
- {
- generatedNodes.add(Integer.valueOf(nodeList.get(nodeIndexList[i])));
+ //choose randomNodeSetSize of them
+ generatedNodes = new ArrayList(randomNodeSetSize);
+ Iterator it = nodeList.iterator();
+ while(generatedNodes.size() < randomNodeSetSize) {
+ generatedNodes.add(it.next());
}
- return generatedNodes;
}
@Override
@@ -57,4 +48,11 @@ public Iterator> iterator()
{
return data.iterator();
}
+
+ public List> getList() {
+ return new ArrayList>(data);
+ }
+ public List getRandomNodes() {
+ return generatedNodes;
+ }
}
diff --git a/src/main/java/eu/socialsensor/dataset/DatasetFactory.java b/src/main/java/eu/socialsensor/dataset/DatasetFactory.java
index f34475c..825a9a5 100644
--- a/src/main/java/eu/socialsensor/dataset/DatasetFactory.java
+++ b/src/main/java/eu/socialsensor/dataset/DatasetFactory.java
@@ -3,6 +3,7 @@
import java.io.File;
import java.util.HashMap;
import java.util.Map;
+import java.util.Random;
/**
*
@@ -28,11 +29,18 @@ public static DatasetFactory getInstance()
return theInstance;
}
- public Dataset getDataset(File datasetFile)
+ public Dataset getDataset(File datasetFile) {
+ if (!datasetMap.containsKey(datasetFile))
+ {
+ throw new IllegalArgumentException("no mapping for data file " + datasetFile.getAbsolutePath());
+ }
+ return datasetMap.get(datasetFile);
+ }
+ public Dataset createAndGetDataset(File datasetFile, Random random, int randomNodeSetSize)
{
if (!datasetMap.containsKey(datasetFile))
{
- datasetMap.put(datasetFile, new Dataset(datasetFile));
+ datasetMap.put(datasetFile, new Dataset(datasetFile, random, randomNodeSetSize));
}
return datasetMap.get(datasetFile);
diff --git a/src/main/java/eu/socialsensor/graphdatabases/DepthPredicate.java b/src/main/java/eu/socialsensor/graphdatabases/DepthPredicate.java
new file mode 100644
index 0000000..bb71946
--- /dev/null
+++ b/src/main/java/eu/socialsensor/graphdatabases/DepthPredicate.java
@@ -0,0 +1,28 @@
+package eu.socialsensor.graphdatabases;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.tinkerpop.gremlin.process.traversal.Traverser;
+import org.apache.tinkerpop.gremlin.structure.T;
+
+import java.util.function.Predicate;
+
+/**
+ * Depth predicate for shortest path
+ *
+ * @author Alexander Patrikalakis
+ */
+public class DepthPredicate implements Predicate> {
+ private static final Logger LOG = LogManager.getLogger();
+ private final int hops;
+
+ public DepthPredicate(int hops) {
+ this.hops = hops;
+ }
+
+ @Override
+ public boolean test(Traverser it) {
+ LOG.trace("testing {}", it.path());
+ return it.path().size() <= hops;
+ }
+}
diff --git a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java
index 0d47887..2df41ac 100644
--- a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java
+++ b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java
@@ -36,22 +36,6 @@ public interface GraphDatabase nodes);
+ public void shortestPaths();
/**
* Execute findShortestPaths query from the Query interface
- *
- * @param nodes
+ *
+ * @param fromNode
+ * @param toNode
* any number of random nodes
*/
- public void shortestPath(final VertexType fromNode, Integer node);
+ public void shortestPath(final VertexType fromNode, Integer toNode);
/**
* @return the number of nodes
@@ -214,11 +190,4 @@ public interface GraphDatabase> mapCommunities(int numberOfCommunities);
-
- /**
- *
- * @param nodeId
- * @return return true if node exist, false if not
- */
- public boolean nodeExists(int nodeId);
}
diff --git a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java
index d4992e0..86ed39e 100644
--- a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java
+++ b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java
@@ -1,27 +1,31 @@
package eu.socialsensor.graphdatabases;
import java.io.File;
+import java.util.Iterator;
+import java.util.List;
import java.util.Set;
-import org.neo4j.graphdb.Transaction;
-import org.neo4j.kernel.GraphDatabaseAPI;
+import org.apache.commons.logging.Log;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
+import com.google.common.base.Preconditions;
import eu.socialsensor.main.GraphDatabaseBenchmark;
import eu.socialsensor.main.GraphDatabaseType;
-@SuppressWarnings("deprecation")
public abstract class GraphDatabaseBase implements GraphDatabase
{
+ private static final Logger LOG = LogManager.getLogger();
public static final String SIMILAR = "similar";
public static final String QUERY_CONTEXT = ".eu.socialsensor.query.";
public static final String NODE_ID = "nodeId";
public static final String NODE_COMMUNITY = "nodeCommunity";
public static final String COMMUNITY = "community";
+ public static final String NODE_LABEL = "node";
protected final File dbStorageDirectory;
- protected final MetricRegistry metrics = new MetricRegistry();
protected final GraphDatabaseType type;
private final Timer nextVertexTimes;
private final Timer getNeighborsOfVertexTimes;
@@ -29,8 +33,11 @@ public abstract class GraphDatabaseBase randomNodes;
+ protected final int maxHops;
- protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory)
+ protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory, List randomNodes,
+ int shortestPathMaxHops)
{
this.type = type;
final String queryTypeContext = type.getShortname() + QUERY_CONTEXT;
@@ -40,6 +47,8 @@ protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory)
this.getOtherVertexFromEdgeTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getOtherVertexFromEdge");
this.getAllEdgesTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getAllEdges");
this.shortestPathTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "shortestPath");
+ this.randomNodes = randomNodes;
+ this.maxHops = shortestPathMaxHops;
this.dbStorageDirectory = dbStorageDirectory;
if (!this.dbStorageDirectory.exists())
@@ -50,120 +59,92 @@ protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory)
@Override
public void findAllNodeNeighbours() {
- //get the iterator
- Object tx = null;
- if(GraphDatabaseType.NEO4J == type) { //TODO fix this
- tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx();
- }
- try {
- VertexIteratorType vertexIterator = this.getVertexIterator();
- while(vertexIteratorHasNext(vertexIterator)) {
- VertexType vertex;
- Timer.Context ctxt = nextVertexTimes.time();
+ long nodeDegreeSum = 0;
+ VertexIteratorType vertexIterator = this.getVertexIterator();
+ while(vertexIteratorHasNext(vertexIterator)) {
+ VertexType vertex;
+ Timer.Context ctxt = nextVertexTimes.time();
+ try {
+ vertex = nextVertex(vertexIterator);
+ } finally {
+ ctxt.stop();
+ }
+
+ final EdgeIteratorType edgeNeighborIterator;
+ ctxt = getNeighborsOfVertexTimes.time();
+ try {
+ //gets forward and reverse edges.
+ edgeNeighborIterator = this.getNeighborsOfVertex(vertex);
+ } finally {
+ ctxt.stop();
+ }
+ while(edgeIteratorHasNext(edgeNeighborIterator)) {
+ EdgeType edge;
+ ctxt = nextEdgeTimes.time();
try {
- vertex = nextVertex(vertexIterator);
+ edge = nextEdge(edgeNeighborIterator);
} finally {
ctxt.stop();
}
-
- final EdgeIteratorType edgeNeighborIterator;
- ctxt = getNeighborsOfVertexTimes.time();
+ @SuppressWarnings("unused")
+ Object other;
+ ctxt = getOtherVertexFromEdgeTimes.time();
try {
- edgeNeighborIterator = this.getNeighborsOfVertex(vertex);
+ other = getOtherVertexFromEdge(edge, vertex);
} finally {
ctxt.stop();
}
- while(edgeIteratorHasNext(edgeNeighborIterator)) {
- EdgeType edge;
- ctxt = nextEdgeTimes.time();
- try {
- edge = nextEdge(edgeNeighborIterator);
- } finally {
- ctxt.stop();
- }
- @SuppressWarnings("unused")
- Object other;
- ctxt = getOtherVertexFromEdgeTimes.time();
- try {
- other = getOtherVertexFromEdge(edge, vertex);
- } finally {
- ctxt.stop();
- }
- }
- this.cleanupEdgeIterator(edgeNeighborIterator);
- }
- this.cleanupVertexIterator(vertexIterator);
- if(this instanceof Neo4jGraphDatabase) {
- ((Transaction) tx).success();
- }
- } finally {//TODO fix this
- if(GraphDatabaseType.NEO4J == type) {
- ((Transaction) tx).finish();
+ nodeDegreeSum++;
}
+ this.cleanupEdgeIterator(edgeNeighborIterator);
}
+ this.cleanupVertexIterator(vertexIterator);
+ LOG.debug("The sum of node degrees was " + nodeDegreeSum);
}
@Override
public void findNodesOfAllEdges() {
- Object tx = null;
- if(GraphDatabaseType.NEO4J == type) {//TODO fix this
- tx = ((GraphDatabaseAPI) ((Neo4jGraphDatabase) this).neo4jGraph).tx().unforced().begin();
- }
+ int edges = 0;
+ EdgeIteratorType edgeIterator;
+ Timer.Context ctxt = getAllEdgesTimes.time();
try {
-
- EdgeIteratorType edgeIterator;
- Timer.Context ctxt = getAllEdgesTimes.time();
+ edgeIterator = this.getAllEdges();
+ } finally {
+ ctxt.stop();
+ }
+
+ while(edgeIteratorHasNext(edgeIterator)) {
+ EdgeType edge;
+ ctxt = nextEdgeTimes.time();
try {
- edgeIterator = this.getAllEdges();
+ edge = nextEdge(edgeIterator);
} finally {
ctxt.stop();
}
-
- while(edgeIteratorHasNext(edgeIterator)) {
- EdgeType edge;
- ctxt = nextEdgeTimes.time();
- try {
- edge = nextEdge(edgeIterator);
- } finally {
- ctxt.stop();
- }
- @SuppressWarnings("unused")
- VertexType source = this.getSrcVertexFromEdge(edge);
- @SuppressWarnings("unused")
- VertexType destination = this.getDestVertexFromEdge(edge);
- }
- } finally {//TODO fix this
- if(GraphDatabaseType.NEO4J == type) {
- ((Transaction) tx).close();
- }
+ @SuppressWarnings("unused")
+ VertexType source = this.getSrcVertexFromEdge(edge);
+ @SuppressWarnings("unused")
+ VertexType destination = this.getDestVertexFromEdge(edge);
+ edges++;
}
+ LOG.debug("Counted " + edges + " edges");
}
@Override
- public void shortestPaths(Set nodes) {
- Object tx = null;
- if(GraphDatabaseType.NEO4J == type) {//TODO fix this
- tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx();
- }
- try {
- //TODO(amcp) change this to use 100+1 random node list and then to use a sublist instead of always choosing node # 1
- VertexType from = getVertex(1);
- Timer.Context ctxt;
- for(Integer i : nodes) {
- //time this
- ctxt = shortestPathTimes.time();
- try {
- shortestPath(from, i);
- } finally {
- ctxt.stop();
- }
- }
- if(this instanceof Neo4jGraphDatabase) {
- ((Transaction) tx).success();
- }
- } finally {//TODO fix this
- if(GraphDatabaseType.NEO4J == type) {
- ((Transaction) tx).finish();
+ public void shortestPaths() {
+ //randomness of selected node comes from the hashing function of hash set
+ final Iterator it = randomNodes.iterator();
+ Preconditions.checkArgument(it.hasNext());
+ final VertexType from = getVertex(it.next());
+ Timer.Context ctxt;
+ while(it.hasNext()) {
+ final Integer i = it.next();
+ //time this
+ ctxt = shortestPathTimes.time();
+ try {
+ shortestPath(from, i);
+ } finally {
+ ctxt.stop();
}
}
}
diff --git a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java
index 61dc1f2..7e96c5d 100644
--- a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java
+++ b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java
@@ -1,6 +1,7 @@
package eu.socialsensor.graphdatabases;
-import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
import eu.socialsensor.insert.Insertion;
import eu.socialsensor.insert.Neo4jMassiveInsertion;
@@ -9,29 +10,31 @@
import eu.socialsensor.main.GraphDatabaseType;
import eu.socialsensor.utils.Utils;
-import org.neo4j.graphalgo.GraphAlgoFactory;
-import org.neo4j.graphalgo.PathFinder;
+import org.apache.tinkerpop.gremlin.neo4j.structure.Neo4jGraph;
+import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
+import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
+import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import org.neo4j.graphdb.Direction;
import org.neo4j.graphdb.DynamicLabel;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.Node;
-import org.neo4j.graphdb.Path;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.RelationshipType;
-import org.neo4j.graphdb.ResourceIterable;
+import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.graphdb.Transaction;
-import org.neo4j.graphdb.factory.GraphDatabaseFactory;
+import org.neo4j.graphdb.schema.IndexCreator;
+import org.neo4j.graphdb.schema.IndexDefinition;
import org.neo4j.graphdb.schema.Schema;
import org.neo4j.helpers.collection.IteratorUtil;
-import org.neo4j.kernel.GraphDatabaseAPI;
-import org.neo4j.kernel.TransactionBuilder;
-import org.neo4j.kernel.Traversal;
+import org.neo4j.kernel.api.exceptions.index.ExceptionDuringFlipKernelException;
+import org.neo4j.tinkerpop.api.impl.Neo4jGraphAPIImpl;
import org.neo4j.tooling.GlobalGraphOperations;
import org.neo4j.unsafe.batchinsert.BatchInserter;
import org.neo4j.unsafe.batchinsert.BatchInserters;
import java.io.File;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -49,84 +52,80 @@
*/
public class Neo4jGraphDatabase extends GraphDatabaseBase, Iterator, Node, Relationship>
{
- protected GraphDatabaseService neo4jGraph = null;
- private Schema schema = null;
+ private final GraphDatabaseService neo4jGraph;
+ private final Neo4jGraph neo4jTp;
+ private final Schema schema;
+ private final BatchInserter inserter;
- private BatchInserter inserter = null;
-
- public static enum RelTypes implements RelationshipType
+ public enum RelTypes implements RelationshipType
{
SIMILAR
}
public static Label NODE_LABEL = DynamicLabel.label("Node");
- public Neo4jGraphDatabase(File dbStorageDirectoryIn)
- {
- super(GraphDatabaseType.NEO4J, dbStorageDirectoryIn);
- }
-
- @Override
- public void open()
- {
- neo4jGraph = new GraphDatabaseFactory().newEmbeddedDatabase(dbStorageDirectory.getAbsolutePath());
- try (final Transaction tx = beginUnforcedTransaction())
- {
- try
+ public Neo4jGraphDatabase(File dbStorageDirectoryIn, boolean batchLoading, List randomNodes, int shortestPathMaxHops)
+ {
+ super(GraphDatabaseType.NEO4J, dbStorageDirectoryIn, randomNodes, shortestPathMaxHops);
+ if(batchLoading) {
+ neo4jGraph = null;
+ neo4jTp = null;
+ schema = null;
+
+ Map config = new HashMap();
+ config.put("cache_type", "none");
+ config.put("use_memory_mapped_buffers", "true");
+ config.put("neostore.nodestore.db.mapped_memory", "200M");
+ config.put("neostore.relationshipstore.db.mapped_memory", "1000M");
+ config.put("neostore.propertystore.db.mapped_memory", "250M");
+ config.put("neostore.propertystore.db.strings.mapped_memory", "250M");
+
+ try {
+ //the BatchInserters are deprecated and will become private in a future release.
+ inserter = BatchInserters.inserter(dbStorageDirectory, config);
+ } catch (IOException e) {
+ throw new IllegalStateException("unable to create batch inserter in dir " + dbStorageDirectory);
+ }
+ inserter.createDeferredSchemaIndex(NODE_LABEL).on(NODE_ID).create();
+ inserter.createDeferredSchemaIndex(NODE_LABEL).on(COMMUNITY).create();
+ inserter.createDeferredSchemaIndex(NODE_LABEL).on(NODE_COMMUNITY).create();
+ } else {
+ inserter = null;
+ neo4jTp = Neo4jGraph.open(dbStorageDirectory.getAbsolutePath());
+ neo4jGraph = ((Neo4jGraphAPIImpl) neo4jTp.getBaseGraph()).getGraphDatabase();
+ try (final Transaction tx = neo4jGraph.beginTx())
{
- neo4jGraph.schema().awaitIndexesOnline(10l, TimeUnit.MINUTES);
+ schema = neo4jGraph.schema();
+ if(!schemaHasIndexOnVertexLabelProperty(NODE_LABEL.name(), NODE_ID)) {
+ schema.indexFor(NODE_LABEL).on(NODE_ID).create();
+ }
+ if(!schemaHasIndexOnVertexLabelProperty(NODE_LABEL.name(), COMMUNITY)) {
+ schema.indexFor(NODE_LABEL).on(COMMUNITY).create();
+ }
+ if(!schemaHasIndexOnVertexLabelProperty(NODE_LABEL.name(), NODE_COMMUNITY)) {
+ schema.indexFor(NODE_LABEL).on(NODE_COMMUNITY).create();
+ }
+ schema.awaitIndexesOnline(10l, TimeUnit.MINUTES);
tx.success();
}
- catch (Exception e)
- {
- tx.failure();
- throw new BenchmarkingException("unknown error", e);
- }
}
}
- @Override
- public void createGraphForSingleLoad()
- {
- neo4jGraph = new GraphDatabaseFactory().newEmbeddedDatabase(dbStorageDirectory.getAbsolutePath());
- try (final Transaction tx = beginUnforcedTransaction())
- {
- try
- {
- schema = neo4jGraph.schema();
- schema.indexFor(NODE_LABEL).on(NODE_ID).create();
- schema.indexFor(NODE_LABEL).on(COMMUNITY).create();
- schema.indexFor(NODE_LABEL).on(NODE_COMMUNITY).create();
- tx.success();
+ private boolean schemaHasIndexOnVertexLabelProperty(String label, String propertyName) {
+ final List targetPropertyList = Lists.newArrayList(propertyName);
+ for(IndexDefinition def : schema.getIndexes()) {
+ if(!def.getLabel().name().equals(label)) {
+ continue;
}
- catch (Exception e)
- {
- tx.failure();
- throw new BenchmarkingException("unknown error", e);
+ //the label is the same here
+ final List definitionProps = Lists.newArrayList(def.getPropertyKeys());
+ if(definitionProps.equals(targetPropertyList)) {
+ return true;
+ } else {
+ continue; //keep looking
}
}
- }
-
- @Override
- public void createGraphForMassiveLoad()
- {
- Map config = new HashMap();
- config.put("cache_type", "none");
- config.put("use_memory_mapped_buffers", "true");
- config.put("neostore.nodestore.db.mapped_memory", "200M");
- config.put("neostore.relationshipstore.db.mapped_memory", "1000M");
- config.put("neostore.propertystore.db.mapped_memory", "250M");
- config.put("neostore.propertystore.db.strings.mapped_memory", "250M");
-
- inserter = BatchInserters.inserter(dbStorageDirectory.getAbsolutePath(), config);
- createDeferredSchema();
- }
-
- private void createDeferredSchema()
- {
- inserter.createDeferredSchemaIndex(NODE_LABEL).on(NODE_ID).create();
- inserter.createDeferredSchemaIndex(NODE_LABEL).on(COMMUNITY).create();
- inserter.createDeferredSchemaIndex(NODE_LABEL).on(NODE_COMMUNITY).create();
+ return false;
}
@Override
@@ -181,37 +180,84 @@ public void shutdownMassiveGraph()
{
throw new BenchmarkingException("could not remove lock");
}
+ }
- inserter = null;
+ @Override
+ public void shortestPaths() {
+ try (Transaction tx = neo4jGraph.beginTx()) {
+ try {
+ super.shortestPaths();
+ tx.success();
+ } catch(Exception e) {
+ tx.failure();
+ }
+ }
}
@Override
- public void shortestPath(Node n1, Integer i)
- {
- PathFinder finder
- = GraphAlgoFactory.shortestPath(Traversal.expanderForTypes(Neo4jGraphDatabase.RelTypes.SIMILAR), 5);
- Node n2 = getVertex(i);
- Path path = finder.findSinglePath(n1, n2);
+ public void findNodesOfAllEdges() {
+ try (Transaction tx = neo4jGraph.beginTx()) {
+ try {
+ super.findNodesOfAllEdges();
+ tx.success();
+ } catch(Exception e) {
+ tx.failure();
+ }
+ }
+ }
- @SuppressWarnings("unused")
- int length = 0;
- if (path != null)
- {
- length = path.length();
+ @Override
+ public void findAllNodeNeighbours() {
+ try (Transaction tx = neo4jGraph.beginTx()) {
+ try{
+ super.findAllNodeNeighbours();
+ tx.success();
+ } catch(Exception e) {
+ tx.failure();
+ }
}
}
- //TODO can unforced option be pulled into configuration?
- private Transaction beginUnforcedTransaction() {
- final TransactionBuilder builder = ((GraphDatabaseAPI) neo4jGraph).tx().unforced();
- return builder.begin();
+ @Override
+ public void shortestPath(Node n1, Integer i)
+ {
+// PathFinder finder
+// = GraphAlgoFactory.shortestPath(PathExpanders.forType(Neo4jGraphDatabase.RelTypes.SIMILAR), maxHops);
+// Node n2 = getVertex(i);
+// Path path = finder.findSinglePath(n1, n2);
+//
+// @SuppressWarnings("unused")
+// int length = 0;
+// if (path != null)
+// {
+// length = path.length();
+// }
+ final GraphTraversalSource g = neo4jTp.traversal();
+ final DepthPredicate maxDepth = new DepthPredicate(maxHops);
+ final Integer fromNodeId = (Integer) n1.getProperty(NODE_ID);
+ final GraphTraversal, org.apache.tinkerpop.gremlin.process.traversal.Path> t =
+ g.V().has(NODE_ID, fromNodeId)
+ .repeat(
+ __.out(SIMILAR)
+ .simplePath())
+ .until(
+ __.has(NODE_ID, i)
+ .and(__.filter( maxDepth ))
+ )
+ .limit(1)
+ .path();
+
+ t.tryNext()
+ .ifPresent( it -> {
+ final int pathSize = it.size();
+ });
}
@Override
public int getNodeCount()
{
int nodeCount = 0;
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
@@ -232,12 +278,11 @@ public int getNodeCount()
public Set getNeighborsIds(int nodeId)
{
Set neighbors = new HashSet();
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
- Node n = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_ID, String.valueOf(nodeId)).iterator()
- .next();
+ Node n = neo4jGraph.findNodes(NODE_LABEL, NODE_ID, String.valueOf(nodeId)).next();
for (Relationship relationship : n.getRelationships(RelTypes.SIMILAR, Direction.OUTGOING))
{
Node neighbour = relationship.getOtherNode(n);
@@ -260,12 +305,11 @@ public Set getNeighborsIds(int nodeId)
public double getNodeWeight(int nodeId)
{
double weight = 0;
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
- Node n = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_ID, String.valueOf(nodeId)).iterator()
- .next();
+ Node n = neo4jGraph.findNodes(NODE_LABEL, NODE_ID, String.valueOf(nodeId)).next();
weight = getNodeOutDegree(n);
tx.success();
}
@@ -297,7 +341,7 @@ public void initCommunityProperty()
int communityCounter = 0;
// maybe commit changes every 1000 transactions?
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
@@ -321,14 +365,15 @@ public void initCommunityProperty()
public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities)
{
Set communities = new HashSet();
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
- ResourceIterable nodes = neo4jGraph.findNodesByLabelAndProperty(Neo4jGraphDatabase.NODE_LABEL,
+ ResourceIterator nodes = neo4jGraph.findNodes(Neo4jGraphDatabase.NODE_LABEL,
NODE_COMMUNITY, nodeCommunities);
- for (Node n : nodes)
+ while (nodes.hasNext())
{
+ final Node n = nodes.next();
for (Relationship r : n.getRelationships(RelTypes.SIMILAR, Direction.OUTGOING))
{
Node neighbour = r.getOtherNode(n);
@@ -352,13 +397,14 @@ public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities
public Set getNodesFromCommunity(int community)
{
Set nodes = new HashSet();
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
- ResourceIterable iter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, COMMUNITY, community);
- for (Node n : iter)
+ ResourceIterator iter = neo4jGraph.findNodes(NODE_LABEL, COMMUNITY, community);
+ while (iter.hasNext())
{
+ final Node n = iter.next();
String nodeIdString = (String) (n.getProperty(NODE_ID));
nodes.add(Integer.valueOf(nodeIdString));
}
@@ -378,14 +424,15 @@ public Set getNodesFromNodeCommunity(int nodeCommunity)
{
Set nodes = new HashSet();
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
- ResourceIterable iter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_COMMUNITY,
+ ResourceIterator iter = neo4jGraph.findNodes(NODE_LABEL, NODE_COMMUNITY,
nodeCommunity);
- for (Node n : iter)
+ while (iter.hasNext())
{
+ final Node n = iter.next();
String nodeIdString = (String) (n.getProperty(NODE_ID));
nodes.add(Integer.valueOf(nodeIdString));
}
@@ -405,21 +452,23 @@ public Set getNodesFromNodeCommunity(int nodeCommunity)
public double getEdgesInsideCommunity(int nodeCommunity, int communityNodes)
{
double edges = 0;
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
- ResourceIterable nodes = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_COMMUNITY,
+ ResourceIterator nodes = neo4jGraph.findNodes(NODE_LABEL, NODE_COMMUNITY,
nodeCommunity);
- ResourceIterable comNodes = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, COMMUNITY,
+ ResourceIterator comNodes = neo4jGraph.findNodes(NODE_LABEL, COMMUNITY,
communityNodes);
- for (Node node : nodes)
+ final Set comNodeSet = Sets.newHashSet(comNodes);
+ while (nodes.hasNext())
{
+ final Node node = nodes.next();
Iterable relationships = node.getRelationships(RelTypes.SIMILAR, Direction.OUTGOING);
for (Relationship r : relationships)
{
Node neighbor = r.getOtherNode(node);
- if (Iterables.contains(comNodes, neighbor))
+ if (comNodeSet.contains(neighbor))
{
edges++;
}
@@ -441,14 +490,14 @@ public double getEdgesInsideCommunity(int nodeCommunity, int communityNodes)
public double getCommunityWeight(int community)
{
double communityWeight = 0;
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
- ResourceIterable iter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, COMMUNITY, community);
- if (Iterables.size(iter) > 1)
+ List nodes = Lists.newArrayList(neo4jGraph.findNodes(NODE_LABEL, COMMUNITY, community));
+ if (nodes.size() > 1)
{
- for (Node n : iter)
+ for (Node n : nodes)
{
communityWeight += getNodeOutDegree(n);
}
@@ -469,15 +518,15 @@ public double getCommunityWeight(int community)
public double getNodeCommunityWeight(int nodeCommunity)
{
double nodeCommunityWeight = 0;
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
- ResourceIterable iter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_COMMUNITY,
- nodeCommunity);
- if (Iterables.size(iter) > 1)
+ List nodes = Lists.newArrayList(neo4jGraph.findNodes(NODE_LABEL, NODE_COMMUNITY,
+ nodeCommunity));
+ if (nodes.size() > 1)
{
- for (Node n : iter)
+ for (Node n : nodes)
{
nodeCommunityWeight += getNodeOutDegree(n);
}
@@ -497,14 +546,15 @@ public double getNodeCommunityWeight(int nodeCommunity)
@Override
public void moveNode(int nodeCommunity, int toCommunity)
{
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
- ResourceIterable fromIter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_COMMUNITY,
+ ResourceIterator fromIter = neo4jGraph.findNodes(NODE_LABEL, NODE_COMMUNITY,
nodeCommunity);
- for (Node node : fromIter)
+ while (fromIter.hasNext())
{
+ final Node node = fromIter.next();
node.setProperty(COMMUNITY, toCommunity);
}
tx.success();
@@ -522,7 +572,7 @@ public double getGraphWeightSum()
{
int edgeCount = 0;
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
@@ -545,7 +595,7 @@ public int reInitializeCommunities()
Map initCommunities = new HashMap();
int communityCounter = 0;
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
@@ -578,12 +628,11 @@ public int getCommunity(int nodeCommunity)
{
Integer community = 0;
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
- Node node = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_COMMUNITY, nodeCommunity).iterator()
- .next();
+ final Node node = neo4jGraph.findNodes(NODE_LABEL, NODE_COMMUNITY, nodeCommunity).next();
community = (Integer) (node.getProperty(COMMUNITY));
tx.success();
}
@@ -601,13 +650,12 @@ public int getCommunity(int nodeCommunity)
public int getCommunityFromNode(int nodeId)
{
Integer community = 0;
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
// Node node = nodeIndex.get(NODE_ID, nodeId).getSingle();
- Node node = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_ID, String.valueOf(nodeId)).iterator()
- .next();
+ final Node node = neo4jGraph.findNodes(NODE_LABEL, NODE_ID, String.valueOf(nodeId)).next();
community = (Integer) (node.getProperty(COMMUNITY));
tx.success();
}
@@ -626,13 +674,14 @@ public int getCommunitySize(int community)
{
Set nodeCommunities = new HashSet();
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
- ResourceIterable nodes = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, COMMUNITY, community);
- for (Node n : nodes)
+ ResourceIterator nodes = neo4jGraph.findNodes(NODE_LABEL, COMMUNITY, community);
+ while (nodes.hasNext())
{
+ final Node n = nodes.next();
Integer nodeCommunity = (Integer) (n.getProperty(COMMUNITY));
nodeCommunities.add(nodeCommunity);
}
@@ -653,16 +702,17 @@ public Map> mapCommunities(int numberOfCommunities)
{
Map> communities = new HashMap>();
- try (final Transaction tx = beginUnforcedTransaction())
+ try (final Transaction tx = neo4jGraph.beginTx())
{
try
{
for (int i = 0; i < numberOfCommunities; i++)
{
- ResourceIterable nodesIter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, COMMUNITY, i);
+ ResourceIterator nodesIter = neo4jGraph.findNodes(NODE_LABEL, COMMUNITY, i);
List nodes = new ArrayList();
- for (Node n : nodesIter)
+ while (nodesIter.hasNext())
{
+ final Node n = nodesIter.next();
String nodeIdString = (String) (n.getProperty(NODE_ID));
nodes.add(Integer.valueOf(nodeIdString));
}
@@ -680,30 +730,6 @@ public Map> mapCommunities(int numberOfCommunities)
return communities;
}
- @Override
- public boolean nodeExists(int nodeId)
- {
- try (final Transaction tx = beginUnforcedTransaction())
- {
- try
- {
- ResourceIterable nodesIter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_ID, nodeId);
- if (nodesIter.iterator().hasNext())
- {
- tx.success();
- return true;
- }
- tx.success();
- }
- catch (Exception e)
- {
- tx.failure();
- throw new BenchmarkingException("unable to determine if node exists", e);
- }
- }
- return false;
- }
-
@Override
public Iterator getVertexIterator()
{
@@ -779,9 +805,16 @@ public Node nextVertex(Iterator it)
@Override
public Node getVertex(Integer i)
{
- // note, this probably should be run in the context of an active transaction.
- return neo4jGraph.findNodesByLabelAndProperty(Neo4jGraphDatabase.NODE_LABEL, NODE_ID, i).iterator()
- .next();
+ Node result = null;
+ try (final Transaction tx = neo4jGraph.beginTx()) {
+ try {
+ result = neo4jGraph.findNodes(Neo4jGraphDatabase.NODE_LABEL, NODE_ID, i).next();
+ tx.success();
+ } catch(Exception e) {
+ tx.failure();
+ }
+ }
+ return result;
}
}
diff --git a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java
index ded6e27..edd59af 100644
--- a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java
+++ b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java
@@ -1,24 +1,8 @@
package eu.socialsensor.graphdatabases;
-import com.google.common.collect.Iterables;
-import com.orientechnologies.common.collection.OMultiCollectionIterator;
-import com.orientechnologies.common.util.OCallable;
-import com.orientechnologies.orient.core.command.OBasicCommandContext;
+import com.google.common.collect.Iterators;
import com.orientechnologies.orient.core.config.OGlobalConfiguration;
-import com.orientechnologies.orient.core.id.ORID;
import com.orientechnologies.orient.core.metadata.schema.OType;
-import com.orientechnologies.orient.graph.sql.functions.OSQLFunctionShortestPath;
-import com.tinkerpop.blueprints.Direction;
-import com.tinkerpop.blueprints.Edge;
-import com.tinkerpop.blueprints.Parameter;
-import com.tinkerpop.blueprints.Vertex;
-import com.tinkerpop.blueprints.impls.orient.OrientBaseGraph;
-import com.tinkerpop.blueprints.impls.orient.OrientEdgeType;
-import com.tinkerpop.blueprints.impls.orient.OrientGraph;
-import com.tinkerpop.blueprints.impls.orient.OrientGraphFactory;
-import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx;
-import com.tinkerpop.blueprints.impls.orient.OrientVertex;
-import com.tinkerpop.blueprints.impls.orient.OrientVertexType;
import eu.socialsensor.insert.Insertion;
import eu.socialsensor.insert.OrientMassiveInsertion;
@@ -35,9 +19,22 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.function.Consumer;
+
+import org.apache.commons.configuration.Configuration;
+import org.apache.commons.configuration.PropertiesConfiguration;
+import org.apache.tinkerpop.gremlin.orientdb.OrientGraph;
+import org.apache.tinkerpop.gremlin.orientdb.OrientGraphFactory;
+import org.apache.tinkerpop.gremlin.orientdb.OrientVertex;
+import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
+import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
+import org.apache.tinkerpop.gremlin.structure.Direction;
+import org.apache.tinkerpop.gremlin.structure.Edge;
+import org.apache.tinkerpop.gremlin.structure.Vertex;
/**
- * OrientDB graph database implementation
+ * OrientDB graph database implementation.
+ * TODO(amcp) replace with the official OrientDB implementation when available.
*
* @author sotbeis, sotbeis@iti.gr
* @author Alexander Patrikalakis
@@ -45,37 +42,16 @@
public class OrientGraphDatabase extends GraphDatabaseBase, Iterator, Vertex, Edge>
{
- private OrientGraph graph = null;
- private boolean useLightWeightEdges;
+ public static final String UNIQUE_HASH_INDEX = "UNIQUE_HASH_INDEX";
+ public static final String NOTUNIQUE_HASH_INDEX = "NOTUNIQUE_HASH_INDEX";
+ private final OrientGraph graph;
- //
+ @SuppressWarnings("deprecation")
public OrientGraphDatabase(BenchmarkConfiguration config, File dbStorageDirectoryIn)
{
- super(GraphDatabaseType.ORIENT_DB, dbStorageDirectoryIn);
+ super(GraphDatabaseType.ORIENT_DB, dbStorageDirectoryIn, config.getRandomNodeList(),
+ config.getShortestPathMaxHops());
OGlobalConfiguration.STORAGE_COMPRESSION_METHOD.setValue("nothing");
- this.useLightWeightEdges = config.orientLightweightEdges() == null ? true : config.orientLightweightEdges()
- .booleanValue();
- }
-
- @Override
- public void open()
- {
- graph = getGraph(dbStorageDirectory);
- }
-
- @SuppressWarnings("deprecation")
- @Override
- public void createGraphForSingleLoad()
- {
- OGlobalConfiguration.STORAGE_KEEP_OPEN.setValue(false);
- graph = getGraph(dbStorageDirectory);
- createSchema();
- }
-
- @SuppressWarnings("deprecation")
- @Override
- public void createGraphForMassiveLoad()
- {
OGlobalConfiguration.STORAGE_KEEP_OPEN.setValue(false);
graph = getGraph(dbStorageDirectory);
createSchema();
@@ -84,7 +60,7 @@ public void createGraphForMassiveLoad()
@Override
public void massiveModeLoading(File dataPath)
{
- OrientMassiveInsertion orientMassiveInsertion = new OrientMassiveInsertion(this.graph.getRawGraph().getURL());
+ OrientMassiveInsertion orientMassiveInsertion = new OrientMassiveInsertion(graph);
orientMassiveInsertion.createGraph(dataPath, 0 /* scenarioNumber */);
}
@@ -98,19 +74,19 @@ public void singleModeLoading(File dataPath, File resultsPath, int scenarioNumbe
@Override
public void shutdown()
{
- if (graph == null)
+ try
{
- return;
+ graph.close();
+ } catch(Exception e) {
+ throw new IllegalStateException("unable to close graph", e);
}
- graph.shutdown();
- graph = null;
}
@Override
public void delete()
{
- OrientGraphNoTx g = new OrientGraphNoTx("plocal:" + dbStorageDirectory.getAbsolutePath());
- g.drop();
+ OrientGraph g = getGraph(dbStorageDirectory);
+ g.getRawDatabase().drop();
Utils.deleteRecursively(dbStorageDirectory);
}
@@ -124,78 +100,79 @@ public void shutdownMassiveGraph()
@Override
public void shortestPath(final Vertex v1, Integer i)
{
+ @SuppressWarnings("unused")
final OrientVertex v2 = (OrientVertex) getVertex(i);
- List result = (List) new OSQLFunctionShortestPath().execute(graph,
- null, null, new Object[] { ((OrientVertex) v1).getRecord(), v2.getRecord(), Direction.OUT, 5 },
- new OBasicCommandContext());
-
- result.size();
+// List result = (List) new OSQLFunctionShortestPath().execute(graph,
+// null, null, new Object[] { ((OrientVertex) v1).getRecord(), v2.getRecord(), Direction.OUT, maxHops },
+// new OBasicCommandContext());
+//
+// result.size();
}
@Override
public int getNodeCount()
{
- return (int) graph.countVertices();
+ return graph.traversal().V().count().toList().get(0).intValue();
}
@Override
public Set getNeighborsIds(int nodeId)
{
- Set neighbours = new HashSet();
- Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next();
- for (Vertex v : vertex.getVertices(Direction.IN, SIMILAR))
- {
- Integer neighborId = v.getProperty(NODE_ID);
- neighbours.add(neighborId);
- }
+ final Set neighbours = new HashSet();
+ final Vertex vertex = getVertex(nodeId);
+ vertex.vertices(Direction.IN, SIMILAR).forEachRemaining(new Consumer() {
+ @Override
+ public void accept(Vertex t) {
+ Integer neighborId = (Integer) t.property(NODE_ID).value();
+ neighbours.add(neighborId);
+ }
+ });
return neighbours;
}
@Override
public double getNodeWeight(int nodeId)
{
- Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next();
+ Vertex vertex = getVertex(nodeId);
double weight = getNodeOutDegree(vertex);
return weight;
}
public double getNodeInDegree(Vertex vertex)
{
- @SuppressWarnings("rawtypes")
- OMultiCollectionIterator result = (OMultiCollectionIterator) vertex.getVertices(Direction.IN, SIMILAR);
- return (double) result.size();
+ return (double) Iterators.size(vertex.edges(Direction.IN, SIMILAR));
}
public double getNodeOutDegree(Vertex vertex)
{
- @SuppressWarnings("rawtypes")
- OMultiCollectionIterator result = (OMultiCollectionIterator) vertex.getVertices(Direction.OUT, SIMILAR);
- return (double) result.size();
+ return (double) Iterators.size(vertex.edges(Direction.OUT, SIMILAR));
}
@Override
public void initCommunityProperty()
{
int communityCounter = 0;
- for (Vertex v : graph.getVertices())
+ for (Vertex v : graph.traversal().V().toList())
{
- ((OrientVertex) v).setProperties(NODE_COMMUNITY, communityCounter, COMMUNITY, communityCounter);
- ((OrientVertex) v).save();
+ v.property(NODE_COMMUNITY, communityCounter);
+ v.property(COMMUNITY, communityCounter);
communityCounter++;
}
}
@Override
- public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities)
+ public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunity)
{
Set communities = new HashSet();
- Iterable vertices = graph.getVertices(NODE_COMMUNITY, nodeCommunities);
- for (Vertex vertex : vertices)
+
+ for (Vertex vertex : graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).toList())
{
- for (Vertex v : vertex.getVertices(Direction.OUT, SIMILAR))
+ final Iterator it = vertex.vertices(Direction.OUT, SIMILAR);
+ for (Vertex v; it.hasNext();)
{
- int community = v.getProperty(COMMUNITY);
+ v = it.next();
+ int community = (Integer) v.property(COMMUNITY).value();
if (!communities.contains(community))
{
communities.add(community);
@@ -209,10 +186,9 @@ public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities
public Set getNodesFromCommunity(int community)
{
Set nodes = new HashSet();
- Iterable iter = graph.getVertices(COMMUNITY, community);
- for (Vertex v : iter)
+ for (Vertex v : graph.traversal().V().has(COMMUNITY, community).toList())
{
- Integer nodeId = v.getProperty(NODE_ID);
+ Integer nodeId = (Integer) v.property(NODE_ID).value();
nodes.add(nodeId);
}
return nodes;
@@ -222,10 +198,9 @@ public Set getNodesFromCommunity(int community)
public Set getNodesFromNodeCommunity(int nodeCommunity)
{
Set nodes = new HashSet();
- Iterable iter = graph.getVertices("nodeCommunity", nodeCommunity);
- for (Vertex v : iter)
+ for (Vertex v : graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).toList())
{
- Integer nodeId = v.getProperty(NODE_ID);
+ Integer nodeId = (Integer) v.property(NODE_ID).value();
nodes.add(nodeId);
}
return nodes;
@@ -235,14 +210,14 @@ public Set getNodesFromNodeCommunity(int nodeCommunity)
public double getEdgesInsideCommunity(int vertexCommunity, int communityVertices)
{
double edges = 0;
- Iterable vertices = graph.getVertices(NODE_COMMUNITY, vertexCommunity);
- Iterable comVertices = graph.getVertices(COMMUNITY, communityVertices);
- for (Vertex vertex : vertices)
+ Set comVertices = graph.traversal().V().has(COMMUNITY, communityVertices).toSet();
+ for (Vertex vertex : graph.traversal().V().has(NODE_COMMUNITY, vertexCommunity).toList())
{
- for (Vertex v : vertex.getVertices(Direction.OUT, SIMILAR))
+ Iterator it = vertex.vertices(Direction.OUT, SIMILAR);
+ for (Vertex v; it.hasNext();)
{
- if (Iterables.contains(comVertices, v))
- {
+ v = it.next();
+ if(comVertices.contains(v)) {
edges++;
}
}
@@ -254,13 +229,13 @@ public double getEdgesInsideCommunity(int vertexCommunity, int communityVertices
public double getCommunityWeight(int community)
{
double communityWeight = 0;
- Iterable iter = graph.getVertices(COMMUNITY, community);
- if (Iterables.size(iter) > 1)
+ final List list = graph.traversal().V().has(COMMUNITY, community).toList();
+ if (list.size() <= 1) {
+ return communityWeight;
+ }
+ for (Vertex vertex : list)
{
- for (Vertex vertex : iter)
- {
- communityWeight += getNodeOutDegree(vertex);
- }
+ communityWeight += getNodeOutDegree(vertex);
}
return communityWeight;
}
@@ -269,8 +244,7 @@ public double getCommunityWeight(int community)
public double getNodeCommunityWeight(int nodeCommunity)
{
double nodeCommunityWeight = 0;
- Iterable iter = graph.getVertices(NODE_COMMUNITY, nodeCommunity);
- for (Vertex vertex : iter)
+ for (Vertex vertex : graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).toList())
{
nodeCommunityWeight += getNodeOutDegree(vertex);
}
@@ -280,22 +254,17 @@ public double getNodeCommunityWeight(int nodeCommunity)
@Override
public void moveNode(int nodeCommunity, int toCommunity)
{
- Iterable fromIter = graph.getVertices(NODE_COMMUNITY, nodeCommunity);
- for (Vertex vertex : fromIter)
+ for (Vertex vertex : graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).toList())
{
- vertex.setProperty(COMMUNITY, toCommunity);
+ vertex.property(COMMUNITY, toCommunity);
}
}
@Override
public double getGraphWeightSum()
{
- long edges = 0;
- for (Vertex o : graph.getVertices())
- {
- edges += ((OrientVertex) o).countEdges(Direction.OUT, SIMILAR);
- }
- return (double) edges;
+ final Iterator edges = graph.edges();
+ return (double) Iterators.size(edges);
}
@Override
@@ -303,17 +272,19 @@ public int reInitializeCommunities()
{
Map initCommunities = new HashMap();
int communityCounter = 0;
- for (Vertex v : graph.getVertices())
+ Iterator it = graph.vertices();
+ for (Vertex v; it.hasNext();)
{
- int communityId = v.getProperty(COMMUNITY);
+ v = it.next();
+ int communityId = (Integer) v.property(COMMUNITY).value();
if (!initCommunities.containsKey(communityId))
{
initCommunities.put(communityId, communityCounter);
communityCounter++;
}
int newCommunityId = initCommunities.get(communityId);
- ((OrientVertex) v).setProperties(COMMUNITY, newCommunityId, NODE_COMMUNITY, newCommunityId);
- ((OrientVertex) v).save();
+ v.property(COMMUNITY, newCommunityId);
+ v.property(NODE_COMMUNITY, newCommunityId);
}
return communityCounter;
}
@@ -321,30 +292,25 @@ public int reInitializeCommunities()
@Override
public int getCommunity(int nodeCommunity)
{
- final Iterator result = graph.getVertices(NODE_COMMUNITY, nodeCommunity).iterator();
- if (!result.hasNext())
- throw new IllegalArgumentException("node community not found: " + nodeCommunity);
-
- Vertex vertex = result.next();
- int community = vertex.getProperty(COMMUNITY);
+ Vertex vertex = graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).next();
+ int community = (Integer) vertex.property(COMMUNITY).value();
return community;
}
@Override
public int getCommunityFromNode(int nodeId)
{
- Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next();
- return vertex.getProperty(COMMUNITY);
+ Vertex vertex = getVertex(nodeId);
+ return (Integer) vertex.property(COMMUNITY).value();
}
@Override
public int getCommunitySize(int community)
{
- Iterable vertices = graph.getVertices(COMMUNITY, community);
Set nodeCommunities = new HashSet();
- for (Vertex v : vertices)
+ for (Vertex v : graph.traversal().V().has(COMMUNITY, community).toList())
{
- int nodeCommunity = v.getProperty(NODE_COMMUNITY);
+ int nodeCommunity = (Integer) v.property(NODE_COMMUNITY).value();
if (!nodeCommunities.contains(nodeCommunity))
{
nodeCommunities.add(nodeCommunity);
@@ -359,11 +325,11 @@ public Map> mapCommunities(int numberOfCommunities)
Map> communities = new HashMap>();
for (int i = 0; i < numberOfCommunities; i++)
{
- Iterator verticesIter = graph.getVertices(COMMUNITY, i).iterator();
+ GraphTraversal t = graph.traversal().V().has(COMMUNITY, i);
List vertices = new ArrayList();
- while (verticesIter.hasNext())
+ while (t.hasNext())
{
- Integer nodeId = verticesIter.next().getProperty(NODE_ID);
+ Integer nodeId = (Integer) t.next().property(NODE_ID).value();
vertices.add(nodeId);
}
communities.put(i, vertices);
@@ -373,59 +339,40 @@ public Map> mapCommunities(int numberOfCommunities)
protected void createSchema()
{
- graph.executeOutsideTx(new OCallable