From 80df67f8eda89678984136428b4a2da0dc09493b Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Fri, 4 Mar 2016 00:41:01 +0900 Subject: [PATCH 01/26] removed unused files --- .../graphdatabases/SparkseeGraphDatabase.java | 606 ------------------ .../insert/OrientAbstractInsertion.java | 78 --- .../insert/SparkseeMassiveInsertion.java | 43 -- .../insert/SparkseeSingleInsertion.java | 42 -- .../eu/socialsensor/query/Neo4jQuery.java | 119 ---- .../eu/socialsensor/query/OrientQuery.java | 66 -- .../java/eu/socialsensor/query/Query.java | 30 - .../eu/socialsensor/query/SparkseeQuery.java | 85 --- .../eu/socialsensor/query/TitanQuery.java | 109 ---- 9 files changed, 1178 deletions(-) delete mode 100644 src/main/java/eu/socialsensor/graphdatabases/SparkseeGraphDatabase.java delete mode 100644 src/main/java/eu/socialsensor/insert/OrientAbstractInsertion.java delete mode 100644 src/main/java/eu/socialsensor/insert/SparkseeMassiveInsertion.java delete mode 100644 src/main/java/eu/socialsensor/insert/SparkseeSingleInsertion.java delete mode 100644 src/main/java/eu/socialsensor/query/Neo4jQuery.java delete mode 100644 src/main/java/eu/socialsensor/query/OrientQuery.java delete mode 100644 src/main/java/eu/socialsensor/query/Query.java delete mode 100644 src/main/java/eu/socialsensor/query/SparkseeQuery.java delete mode 100644 src/main/java/eu/socialsensor/query/TitanQuery.java diff --git a/src/main/java/eu/socialsensor/graphdatabases/SparkseeGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/SparkseeGraphDatabase.java deleted file mode 100644 index c2a604a..0000000 --- a/src/main/java/eu/socialsensor/graphdatabases/SparkseeGraphDatabase.java +++ /dev/null @@ -1,606 +0,0 @@ -package eu.socialsensor.graphdatabases; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import com.sparsity.sparksee.algorithms.SinglePairShortestPathBFS; -import com.sparsity.sparksee.gdb.AttributeKind; -import com.sparsity.sparksee.gdb.Condition; -import com.sparsity.sparksee.gdb.DataType; -import com.sparsity.sparksee.gdb.Database; -import com.sparsity.sparksee.gdb.EdgeData; -import com.sparsity.sparksee.gdb.EdgesDirection; -import com.sparsity.sparksee.gdb.Graph; -import com.sparsity.sparksee.gdb.Objects; -import com.sparsity.sparksee.gdb.ObjectsIterator; -import com.sparsity.sparksee.gdb.Session; -import com.sparsity.sparksee.gdb.Sparksee; -import com.sparsity.sparksee.gdb.SparkseeConfig; -import com.sparsity.sparksee.gdb.Value; - -import eu.socialsensor.insert.Insertion; -import eu.socialsensor.insert.SparkseeMassiveInsertion; -import eu.socialsensor.insert.SparkseeSingleInsertion; -import eu.socialsensor.main.BenchmarkConfiguration; -import eu.socialsensor.main.BenchmarkingException; -import eu.socialsensor.main.GraphDatabaseType; -import eu.socialsensor.utils.Utils; - -/** - * Sparksee graph database implementation - * - * @author sotbeis, sotbeis@iti.gr - * @author Alexander Patrikalakis - */ -public class SparkseeGraphDatabase extends GraphDatabaseBase -{ - public static final String NODE = "node"; - - public static final String INSERTION_TIMES_OUTPUT_PATH = "data/sparksee.insertion.times"; - - private final String sparkseeLicenseKey; - - private boolean readOnly = false; - - double totalWeight; - - private SparkseeConfig sparkseeConfig; - private Sparksee sparksee; - private Database database; - private Session session; - private Graph sparkseeGraph; - - public static int NODE_ATTRIBUTE; - public static int COMMUNITY_ATTRIBUTE; - public static int NODE_COMMUNITY_ATTRIBUTE; - - public static int NODE_TYPE; - - public static int EDGE_TYPE; - - Value value = new Value(); - - public SparkseeGraphDatabase(BenchmarkConfiguration config, File dbStorageDirectoryIn) - { - super(GraphDatabaseType.SPARKSEE, dbStorageDirectoryIn); - this.sparkseeLicenseKey = config.getSparkseeLicenseKey(); - } - - @Override - public void open() - { - sparkseeConfig = new SparkseeConfig(); - sparkseeConfig.setLicense(sparkseeLicenseKey); - sparksee = new Sparksee(sparkseeConfig); - try - { - this.database = sparksee.open(getDbFile(dbStorageDirectory), readOnly); - } - catch (FileNotFoundException e) - { - throw new BenchmarkingException("unable to open the db storage directory for sparksee", e); - } - this.session = database.newSession(); - this.sparkseeGraph = session.getGraph(); - createSchema(); - } - - private String getDbFile(File dbPath) - { - return new File(dbPath, "SparkseeDB.gdb").getAbsolutePath(); - } - - @Override - public void createGraphForSingleLoad() - { - try - { - dbStorageDirectory.mkdirs(); - sparkseeConfig = new SparkseeConfig(); - sparkseeConfig.setLicense(sparkseeLicenseKey); - sparksee = new Sparksee(sparkseeConfig); - database = sparksee.create(getDbFile(dbStorageDirectory), "SparkseeDB"); - session = database.newSession(); - sparkseeGraph = session.getGraph(); - createSchema(); - } - catch (FileNotFoundException e) - { - e.printStackTrace(); - } - - } - - @Override - public void createGraphForMassiveLoad() - { - // maybe some more configuration? - try - { - dbStorageDirectory.mkdirs(); - sparkseeConfig = new SparkseeConfig(); - sparkseeConfig.setLicense(sparkseeLicenseKey); - sparksee = new Sparksee(sparkseeConfig); - database = sparksee.create(getDbFile(dbStorageDirectory), "SparkseeDB"); - session = database.newSession(); - sparkseeGraph = session.getGraph(); - createSchema(); - } - catch (FileNotFoundException e) - { - e.printStackTrace(); - } - } - - private void createSchema() - { - NODE_TYPE = sparkseeGraph.newNodeType(NODE); - NODE_ATTRIBUTE = sparkseeGraph.newAttribute(NODE_TYPE, NODE_ID, DataType.String, AttributeKind.Unique); - EDGE_TYPE = sparkseeGraph.newEdgeType(SIMILAR, true, false); - COMMUNITY_ATTRIBUTE = sparkseeGraph.newAttribute(NODE_TYPE, COMMUNITY, DataType.Integer, - AttributeKind.Indexed); - NODE_COMMUNITY_ATTRIBUTE = sparkseeGraph.newAttribute(NODE_TYPE, NODE_COMMUNITY, DataType.Integer, - AttributeKind.Indexed); - } - - @Override - public void massiveModeLoading(File dataPath) - { - Insertion sparkseeMassiveInsertion = new SparkseeMassiveInsertion(session); - sparkseeMassiveInsertion.createGraph(dataPath, 0 /* scenarioNumber */); - } - - @Override - public void singleModeLoading(File dataPath, File resultsPath, int scenarioNumber) - { - Insertion sparkseeSingleInsertion = new SparkseeSingleInsertion(this.session, resultsPath); - sparkseeSingleInsertion.createGraph(dataPath, scenarioNumber); - } - - @Override - public void shutdown() - { - if (session != null) - { - session.close(); - session = null; - database.close(); - database = null; - sparksee.close(); - sparksee = null; - } - - } - - @Override - public void shutdownMassiveGraph() - { - shutdown(); - } - - @Override - public void delete() - { - Utils.deleteRecursively(dbStorageDirectory); - } - - @Override - public void shortestPath(final Long srcNodeID, Integer i) - { - @SuppressWarnings("unused") - double length = 0; - int nodeType = sparkseeGraph.findType(NODE); - int edgeType = sparkseeGraph.findType(SIMILAR); - - long dstNodeID = getVertex(i); - SinglePairShortestPathBFS shortestPathBFS = new SinglePairShortestPathBFS(session, srcNodeID, dstNodeID); - shortestPathBFS.addNodeType(nodeType); - shortestPathBFS.addEdgeType(edgeType, EdgesDirection.Outgoing); - shortestPathBFS.setMaximumHops(4); - shortestPathBFS.run(); - if (shortestPathBFS.exists()) - { - length = shortestPathBFS.getCost(); - } - shortestPathBFS.close(); - } - - @Override - public int getNodeCount() - { - return (int) sparkseeGraph.countNodes(); - } - - @Override - public Set getNeighborsIds(int nodeId) - { - Set neighbors = new HashSet(); - long nodeID = sparkseeGraph.findObject(NODE_ATTRIBUTE, value.setString(String.valueOf(nodeId))); - Objects neighborsObjects = sparkseeGraph.neighbors(nodeID, EDGE_TYPE, EdgesDirection.Outgoing); - ObjectsIterator neighborsIter = neighborsObjects.iterator(); - while (neighborsIter.hasNext()) - { - long neighborID = neighborsIter.next(); - Value neighborNodeID = sparkseeGraph.getAttribute(neighborID, NODE_ATTRIBUTE); - neighbors.add(Integer.valueOf(neighborNodeID.getString())); - } - neighborsIter.close(); - neighborsObjects.close(); - return neighbors; - } - - @Override - public double getNodeWeight(int nodeId) - { - long nodeID = sparkseeGraph.findObject(NODE_ATTRIBUTE, value.setString(String.valueOf(nodeId))); - return getNodeOutDegree(nodeID); - } - - public double getNodeInDegree(long node) - { - long inDegree = sparkseeGraph.degree(node, EDGE_TYPE, EdgesDirection.Ingoing); - return (double) inDegree; - } - - public double getNodeOutDegree(long node) - { - long outDegree = sparkseeGraph.degree(node, EDGE_TYPE, EdgesDirection.Outgoing); - return (double) outDegree; - } - - @Override - public void initCommunityProperty() - { - int communityCounter = 0; - // basic or indexed attribute? - Objects nodes = sparkseeGraph.select(NODE_TYPE); - ObjectsIterator nodesIter = nodes.iterator(); - while (nodesIter.hasNext()) - { - long nodeID = nodesIter.next(); - sparkseeGraph.setAttribute(nodeID, COMMUNITY_ATTRIBUTE, value.setInteger(communityCounter)); - sparkseeGraph.setAttribute(nodeID, NODE_COMMUNITY_ATTRIBUTE, value.setInteger(communityCounter)); - communityCounter++; - } - nodesIter.close(); - nodes.close(); - } - - @Override - public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities) - { - Set communities = new HashSet(); - Objects nodes = sparkseeGraph.select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal, - value.setInteger(nodeCommunities)); - ObjectsIterator nodesIter = nodes.iterator(); - while (nodesIter.hasNext()) - { - long nodeID = nodesIter.next(); - Objects neighbors = sparkseeGraph.neighbors(nodeID, EDGE_TYPE, EdgesDirection.Outgoing); - ObjectsIterator neighborsIter = neighbors.iterator(); - while (neighborsIter.hasNext()) - { - long neighborID = neighborsIter.next(); - Value community = sparkseeGraph.getAttribute(neighborID, COMMUNITY_ATTRIBUTE); - communities.add(community.getInteger()); - } - neighborsIter.close(); - neighbors.close(); - } - nodesIter.close(); - nodes.close(); - return communities; - } - - @Override - public Set getNodesFromCommunity(int community) - { - Set nodesFromCommunity = new HashSet(); - Objects nodes = sparkseeGraph.select(COMMUNITY_ATTRIBUTE, Condition.Equal, value.setInteger(community)); - ObjectsIterator nodesIter = nodes.iterator(); - while (nodesIter.hasNext()) - { - Value nodeId = sparkseeGraph.getAttribute(nodesIter.next(), NODE_ATTRIBUTE); - nodesFromCommunity.add(Integer.valueOf(nodeId.getString())); - } - nodesIter.close(); - nodes.close(); - return nodesFromCommunity; - } - - @Override - public Set getNodesFromNodeCommunity(int nodeCommunity) - { - Set nodesFromNodeCommunity = new HashSet(); - Objects nodes = sparkseeGraph - .select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal, value.setInteger(nodeCommunity)); - ObjectsIterator nodesIter = nodes.iterator(); - while (nodesIter.hasNext()) - { - Value nodeId = sparkseeGraph.getAttribute(nodesIter.next(), NODE_ATTRIBUTE); - nodesFromNodeCommunity.add(Integer.valueOf(nodeId.getString())); - } - nodesIter.close(); - nodes.close(); - return nodesFromNodeCommunity; - } - - @Override - public double getEdgesInsideCommunity(int nodeCommunity, int communityNode) - { - double edges = 0; - Objects nodesFromNodeCommunitiy = sparkseeGraph.select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal, - value.setInteger(nodeCommunity)); - Objects nodesFromCommunity = sparkseeGraph.select(COMMUNITY_ATTRIBUTE, Condition.Equal, - value.setInteger(communityNode)); - ObjectsIterator nodesFromNodeCommunityIter = nodesFromNodeCommunitiy.iterator(); - while (nodesFromNodeCommunityIter.hasNext()) - { - long nodeID = nodesFromNodeCommunityIter.next(); - Objects neighbors = sparkseeGraph.neighbors(nodeID, EDGE_TYPE, EdgesDirection.Outgoing); - ObjectsIterator neighborsIter = neighbors.iterator(); - while (neighborsIter.hasNext()) - { - if (nodesFromCommunity.contains(neighborsIter.next())) - { - edges++; - } - } - neighborsIter.close(); - neighbors.close(); - } - nodesFromNodeCommunityIter.close(); - nodesFromCommunity.close(); - nodesFromNodeCommunitiy.close(); - return edges; - } - - @Override - public double getCommunityWeight(int community) - { - double communityWeight = 0; - Objects nodesFromCommunity = sparkseeGraph.select(COMMUNITY_ATTRIBUTE, Condition.Equal, - value.setInteger(community)); - ObjectsIterator nodesFromCommunityIter = nodesFromCommunity.iterator(); - if (nodesFromCommunity.size() > 1) - { - while (nodesFromCommunityIter.hasNext()) - { - communityWeight += getNodeOutDegree(nodesFromCommunityIter.next()); - } - } - nodesFromCommunityIter.close(); - nodesFromCommunity.close(); - return communityWeight; - } - - @Override - public double getNodeCommunityWeight(int nodeCommunity) - { - double nodeCommunityWeight = 0; - Objects nodesFromNodeCommunity = sparkseeGraph.select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal, - value.setInteger(nodeCommunity)); - ObjectsIterator nodesFromNodeCommunityIter = nodesFromNodeCommunity.iterator(); - if (nodesFromNodeCommunity.size() > 1) - { - while (nodesFromNodeCommunityIter.hasNext()) - { - nodeCommunityWeight += getNodeOutDegree(nodesFromNodeCommunityIter.next()); - } - } - nodesFromNodeCommunityIter.close(); - nodesFromNodeCommunity.close(); - return nodeCommunityWeight; - } - - @Override - public void moveNode(int nodeCommunity, int toCommunity) - { - Objects fromNodes = sparkseeGraph.select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal, - value.setInteger(nodeCommunity)); - ObjectsIterator fromNodesIter = fromNodes.iterator(); - while (fromNodesIter.hasNext()) - { - sparkseeGraph.setAttribute(fromNodesIter.next(), COMMUNITY_ATTRIBUTE, value.setInteger(toCommunity)); - } - fromNodesIter.close(); - fromNodes.close(); - } - - @Override - public double getGraphWeightSum() - { - return (double) sparkseeGraph.countEdges(); - } - - @Override - public int reInitializeCommunities() - { - Map initCommunities = new HashMap(); - int communityCounter = 0; - Objects nodes = sparkseeGraph.select(NODE_TYPE); - ObjectsIterator nodesIter = nodes.iterator(); - while (nodesIter.hasNext()) - { - long nodeID = nodesIter.next(); - Value communityId = sparkseeGraph.getAttribute(nodeID, COMMUNITY_ATTRIBUTE); - if (!initCommunities.containsKey(communityId.getInteger())) - { - initCommunities.put(communityId.getInteger(), communityCounter); - communityCounter++; - } - int newCommunityId = initCommunities.get(communityId.getInteger()); - sparkseeGraph.setAttribute(nodeID, COMMUNITY_ATTRIBUTE, value.setInteger(newCommunityId)); - sparkseeGraph.setAttribute(nodeID, NODE_COMMUNITY_ATTRIBUTE, value.setInteger(newCommunityId)); - } - nodesIter.close(); - nodes.close(); - return communityCounter; - } - - @Override - public int getCommunity(int nodeCommunity) - { - long nodeID = sparkseeGraph.findObject(NODE_COMMUNITY_ATTRIBUTE, value.setInteger(nodeCommunity)); - Value communityId = sparkseeGraph.getAttribute(nodeID, COMMUNITY_ATTRIBUTE); - return communityId.getInteger(); - } - - @Override - public int getCommunityFromNode(int nodeId) - { - long nodeID = sparkseeGraph.findObject(NODE_ATTRIBUTE, value.setString(String.valueOf(nodeId))); - Value communityId = sparkseeGraph.getAttribute(nodeID, COMMUNITY_ATTRIBUTE); - return communityId.getInteger(); - } - - @Override - public int getCommunitySize(int community) - { - Objects nodesFromCommunities = sparkseeGraph.select(COMMUNITY_ATTRIBUTE, Condition.Equal, - value.setInteger(community)); - ObjectsIterator nodesFromCommunitiesIter = nodesFromCommunities.iterator(); - Set nodeCommunities = new HashSet(); - while (nodesFromCommunitiesIter.hasNext()) - { - Value nodeCommunityId = sparkseeGraph.getAttribute(nodesFromCommunitiesIter.next(), - NODE_COMMUNITY_ATTRIBUTE); - nodeCommunities.add(nodeCommunityId.getInteger()); - } - nodesFromCommunitiesIter.close(); - nodesFromCommunities.close(); - return nodeCommunities.size(); - } - - @Override - public Map> mapCommunities(int numberOfCommunities) - { - Map> communities = new HashMap>(); - for (int i = 0; i < numberOfCommunities; i++) - { - Objects nodesFromCommunity = sparkseeGraph - .select(COMMUNITY_ATTRIBUTE, Condition.Equal, value.setInteger(i)); - ObjectsIterator nodesFromCommunityIter = nodesFromCommunity.iterator(); - List nodes = new ArrayList(); - while (nodesFromCommunityIter.hasNext()) - { - Value nodeId = sparkseeGraph.getAttribute(nodesFromCommunityIter.next(), NODE_ATTRIBUTE); - nodes.add(Integer.valueOf(nodeId.getString())); - } - communities.put(i, nodes); - nodesFromCommunityIter.close(); - nodesFromCommunity.close(); - } - return communities; - } - - @Override - public boolean nodeExists(int nodeId) - { - Objects nodes = sparkseeGraph.select(NODE_ATTRIBUTE, Condition.Equal, value.setInteger(nodeId)); - ObjectsIterator nodesIter = nodes.iterator(); - if (nodesIter.hasNext()) - { - nodesIter.close(); - nodes.close(); - return true; - } - nodesIter.close(); - nodes.close(); - return false; - } - - @Override - public ObjectsIterator getVertexIterator() - { - final int nodeType = sparkseeGraph.findType(NODE); - final Objects objects = sparkseeGraph.select(nodeType); - return objects.iterator(); - } - - @Override - public ObjectsIterator getNeighborsOfVertex(Long v) - { - final int edgeType = sparkseeGraph.findType(SIMILAR); - final Objects neighbors = sparkseeGraph.neighbors(v, edgeType, EdgesDirection.Any); - return neighbors.iterator(); - } - - @Override - public void cleanupVertexIterator(ObjectsIterator it) - { - it.close(); - } - - @Override - public Long getOtherVertexFromEdge(Long r, Long oneVertex) - { - return r; //pass through - } - - @Override - public ObjectsIterator getAllEdges() - { - int edgeType = sparkseeGraph.findType(SIMILAR); - Objects objects = sparkseeGraph.select(edgeType); - return objects.iterator(); - } - - @Override - public Long getSrcVertexFromEdge(Long edge) - { - EdgeData edgeData = sparkseeGraph.getEdgeData(edge); - return edgeData.getTail(); - } - - @Override - public Long getDestVertexFromEdge(Long edge) - { - EdgeData edgeData = sparkseeGraph.getEdgeData(edge); - return edgeData.getHead(); - } - - @Override - public boolean edgeIteratorHasNext(ObjectsIterator it) - { - return it.hasNext(); - } - - @Override - public Long nextEdge(ObjectsIterator it) - { - return it.next(); - } - - @Override - public void cleanupEdgeIterator(ObjectsIterator it) - { - it.close(); - } - - @Override - public boolean vertexIteratorHasNext(ObjectsIterator it) - { - return it.hasNext(); - } - - @Override - public Long nextVertex(ObjectsIterator it) - { - return it.next(); - } - - @Override - public Long getVertex(Integer i) - { - int nodeType = sparkseeGraph.findType(NODE); - int nodeAttribute = sparkseeGraph.findAttribute(nodeType, NODE_ID); - return sparkseeGraph.findObject(nodeAttribute, value.setInteger(i)); - } -} diff --git a/src/main/java/eu/socialsensor/insert/OrientAbstractInsertion.java b/src/main/java/eu/socialsensor/insert/OrientAbstractInsertion.java deleted file mode 100644 index af6ecdd..0000000 --- a/src/main/java/eu/socialsensor/insert/OrientAbstractInsertion.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * - * * Copyright 2014 Orient Technologies LTD (info(at)orientechnologies.com) - * * - * * Licensed under the Apache License, Version 2.0 (the "License"); - * * you may not use this file except in compliance with the License. - * * You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, - * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * * See the License for the specific language governing permissions and - * * limitations under the License. - * * - * * For more information: http://www.orientechnologies.com - * - */ - -package eu.socialsensor.insert; - -import org.apache.log4j.Logger; - -import com.orientechnologies.orient.core.db.record.OIdentifiable; -import com.orientechnologies.orient.core.index.OIndex; -import com.tinkerpop.blueprints.TransactionalGraph; -import com.tinkerpop.blueprints.Vertex; -import com.tinkerpop.blueprints.impls.orient.OrientExtendedGraph; -import com.tinkerpop.blueprints.impls.orient.asynch.OrientGraphAsynch; - -/** - * Implementation of single Insertion in OrientDB graph database - * - * @author sotbeis - * @email sotbeis@iti.gr - * - */ -public abstract class OrientAbstractInsertion implements Insertion { - - public static String INSERTION_TIMES_OUTPUT_PATH = null; - - protected OrientExtendedGraph orientGraph = null; - protected Logger logger = Logger.getLogger(OrientAbstractInsertion.class); - - protected OIndex index; - - public OrientAbstractInsertion(OrientExtendedGraph orientGraph) { - this.orientGraph = orientGraph; - } - - protected Vertex getOrCreate(final String value) { - final int key = Integer.parseInt(value); - - Vertex v; - if (orientGraph instanceof OrientGraphAsynch) { - v = ((OrientGraphAsynch) orientGraph).addOrUpdateVertex(key, "nodeId", key); - } - else { - if (index == null) { - index = orientGraph.getRawGraph().getMetadata().getIndexManager().getIndex("V.nodeId"); - } - - final OIdentifiable rec = (OIdentifiable) index.get(key); - if (rec != null) { - return orientGraph.getVertex(rec); - } - - v = orientGraph.addVertex(key, "nodeId", key); - - if (orientGraph instanceof TransactionalGraph) { - ((TransactionalGraph) orientGraph).commit(); - } - } - - return v; - } -} diff --git a/src/main/java/eu/socialsensor/insert/SparkseeMassiveInsertion.java b/src/main/java/eu/socialsensor/insert/SparkseeMassiveInsertion.java deleted file mode 100644 index 10ef522..0000000 --- a/src/main/java/eu/socialsensor/insert/SparkseeMassiveInsertion.java +++ /dev/null @@ -1,43 +0,0 @@ -package eu.socialsensor.insert; - -import com.sparsity.sparksee.gdb.Graph; -import com.sparsity.sparksee.gdb.Session; -import com.sparsity.sparksee.gdb.Value; - -import eu.socialsensor.graphdatabases.SparkseeGraphDatabase; -import eu.socialsensor.main.GraphDatabaseType; - -public class SparkseeMassiveInsertion extends InsertionBase implements Insertion -{ - private final Session session; - private final Graph sparkseeGraph; - private int operations; - - public SparkseeMassiveInsertion(Session session) - { - super(GraphDatabaseType.SPARKSEE, null /* resultsPath */); - this.session = session; - this.sparkseeGraph = session.getGraph(); - this.operations = 0; - } - - @Override - public Long getOrCreate(String value) - { - Value sparkseeValue = new Value(); - return sparkseeGraph.findOrCreateObject(SparkseeGraphDatabase.NODE_ATTRIBUTE, sparkseeValue.setString(value)); - } - - @Override - public void relateNodes(Long src, Long dest) - { - sparkseeGraph.newEdge(SparkseeGraphDatabase.EDGE_TYPE, src, dest); - operations++; - if (operations == 10000) - { - session.commit(); - session.begin(); - operations = 0; - } - } -} diff --git a/src/main/java/eu/socialsensor/insert/SparkseeSingleInsertion.java b/src/main/java/eu/socialsensor/insert/SparkseeSingleInsertion.java deleted file mode 100644 index 1244df9..0000000 --- a/src/main/java/eu/socialsensor/insert/SparkseeSingleInsertion.java +++ /dev/null @@ -1,42 +0,0 @@ -package eu.socialsensor.insert; - -import java.io.File; - -import com.sparsity.sparksee.gdb.Graph; -import com.sparsity.sparksee.gdb.Session; -import com.sparsity.sparksee.gdb.Value; - -import eu.socialsensor.graphdatabases.SparkseeGraphDatabase; -import eu.socialsensor.main.GraphDatabaseType; - -public class SparkseeSingleInsertion extends InsertionBase -{ - private final Session session; - private final Graph sparkseeGraph; - - Value value = new Value(); - - public SparkseeSingleInsertion(Session session, File resultsPath) - { - // no temp files for massive load insert - super(GraphDatabaseType.SPARKSEE, resultsPath); - this.session = session; - this.sparkseeGraph = session.getGraph(); - } - - @Override - public Long getOrCreate(String value) - { - Value sparkseeValue = new Value(); - return sparkseeGraph.findOrCreateObject(SparkseeGraphDatabase.NODE_ATTRIBUTE, sparkseeValue.setString(value)); - } - - @Override - public void relateNodes(Long src, Long dest) - { - session.begin(); - sparkseeGraph.newEdge(SparkseeGraphDatabase.EDGE_TYPE, src, dest); - session.commit(); - } - -} diff --git a/src/main/java/eu/socialsensor/query/Neo4jQuery.java b/src/main/java/eu/socialsensor/query/Neo4jQuery.java deleted file mode 100644 index c971b79..0000000 --- a/src/main/java/eu/socialsensor/query/Neo4jQuery.java +++ /dev/null @@ -1,119 +0,0 @@ -package eu.socialsensor.query; - -import org.neo4j.graphalgo.GraphAlgoFactory; -import org.neo4j.graphalgo.PathFinder; -import org.neo4j.graphdb.Direction; -import org.neo4j.graphdb.GraphDatabaseService; -import org.neo4j.graphdb.Node; -import org.neo4j.graphdb.Path; -import org.neo4j.graphdb.Relationship; -import org.neo4j.graphdb.Transaction; -import org.neo4j.graphdb.factory.GraphDatabaseFactory; -import org.neo4j.kernel.GraphDatabaseAPI; -import org.neo4j.kernel.Traversal; -import org.neo4j.tooling.GlobalGraphOperations; - -import eu.socialsensor.benchmarks.FindShortestPathBenchmark; -import eu.socialsensor.graphdatabases.Neo4jGraphDatabase; -import eu.socialsensor.main.GraphDatabaseBenchmark; - -/** - * Query implementation for Neo4j graph database - * - * @author sotbeis - * @email sotbeis@iti.gr - */ -@SuppressWarnings("deprecation") -public class Neo4jQuery implements Query { - - private GraphDatabaseService neo4jGraph = null; - - public static void main(String args[]) { - for(int i = 0; i < 5; i++) { - GraphDatabaseService neo4jGraph = new GraphDatabaseFactory().newEmbeddedDatabase(GraphDatabaseBenchmark.NEO4JDB_PATH); - Neo4jQuery neo4jQuery = new Neo4jQuery(neo4jGraph); - neo4jQuery.findNeighborsOfAllNodes(); - neo4jGraph.shutdown(); - } - } - - public Neo4jQuery(GraphDatabaseService neo4jGraph) { - this.neo4jGraph = neo4jGraph; - } - - @Override - public void findNeighborsOfAllNodes() { - Transaction tx = null; - try { - tx = neo4jGraph.beginTx(); - for(Node n : GlobalGraphOperations.at(neo4jGraph).getAllNodes()) { - for(Relationship relationship : n.getRelationships(Neo4jGraphDatabase.RelTypes.SIMILAR, Direction.BOTH)) { - @SuppressWarnings("unused") - Node neighbour = relationship.getOtherNode(n); - } - } - tx.success(); - } - catch(Exception e) { - - } - finally { - if(tx != null) { - tx.close(); - } - } - } - - @Override - public void findNodesOfAllEdges() { - Transaction tx = null; - try { - tx = ((GraphDatabaseAPI)neo4jGraph).tx().unforced().begin(); - for(Relationship r : GlobalGraphOperations.at(neo4jGraph).getAllRelationships()) { - @SuppressWarnings("unused") - Node startNode = r.getStartNode(); - @SuppressWarnings("unused") - Node endNode = r.getEndNode(); - } - } - catch(Exception e) { - - } - finally { - if(tx != null) { - tx.close(); - } - } - } - - @Override - public void findShortestPaths() { - Transaction tx = null; - try { - tx = neo4jGraph.beginTx(); - PathFinder finder = GraphAlgoFactory.shortestPath(Traversal.expanderForTypes(Neo4jGraphDatabase.RelTypes.SIMILAR),5); - Node n1 = neo4jGraph.findNodesByLabelAndProperty(Neo4jGraphDatabase.NODE_LABEL, "nodeId", "1").iterator().next(); - for(int i : FindShortestPathBenchmark.generatedNodes) { - Node n2 = neo4jGraph.findNodesByLabelAndProperty(Neo4jGraphDatabase.NODE_LABEL, "nodeId", String.valueOf(i)).iterator().next(); - Path path = finder.findSinglePath(n1, n2); - @SuppressWarnings("unused") - int length = 0; - if(path != null) { - length = path.length(); - } - } - tx.success(); - } - catch(Exception e) { - - } - finally { - if(tx != null) { - tx.close(); - } - - } - } - - -} diff --git a/src/main/java/eu/socialsensor/query/OrientQuery.java b/src/main/java/eu/socialsensor/query/OrientQuery.java deleted file mode 100644 index b2cfa9a..0000000 --- a/src/main/java/eu/socialsensor/query/OrientQuery.java +++ /dev/null @@ -1,66 +0,0 @@ -package eu.socialsensor.query; - -import java.util.List; - -import com.orientechnologies.orient.core.command.OBasicCommandContext; -import com.orientechnologies.orient.core.db.record.OIdentifiable; -import com.orientechnologies.orient.graph.sql.functions.OSQLFunctionShortestPath; -import com.tinkerpop.blueprints.Direction; -import com.tinkerpop.blueprints.Vertex; -import com.tinkerpop.blueprints.impls.orient.OrientExtendedGraph; -import com.tinkerpop.blueprints.impls.orient.OrientVertex; -import eu.socialsensor.benchmarks.FindShortestPathBenchmark; - -/** - * Query implementation for OrientDB graph database - * - * @author sotbeis - * @email sotbeis@iti.gr - */ -public class OrientQuery implements Query { - private OrientExtendedGraph orientGraph = null; - - public OrientQuery(OrientExtendedGraph orientGraph) { - this.orientGraph = orientGraph; - } - - public static void main(String args[]) { - } - - @Override - public void findNeighborsOfAllNodes() { - for (Vertex v : orientGraph.getVertices()) { - for (Vertex vv : v.getVertices(Direction.BOTH, "similar")) { - } - } - } - - @Override - public void findNodesOfAllEdges() { - for (Vertex v : orientGraph.getVertices()) { - for (Vertex vv : v.getVertices(Direction.BOTH)) { - } - } - } - - @Override - public void findShortestPaths() { - for (int k = 0; k < 3; ++k) { - final long start = System.currentTimeMillis(); - - OrientVertex v1 = (OrientVertex) orientGraph.getVertices("nodeId", 1).iterator().next(); - for (int i : FindShortestPathBenchmark.generatedNodes) { - final OrientVertex v2 = (OrientVertex) orientGraph.getVertices("nodeId", i).iterator().next(); - - List result = (List) new OSQLFunctionShortestPath().execute(orientGraph, null, null, - new Object[] { v1.getRecord(), v2.getRecord(), Direction.OUT, 5 }, new OBasicCommandContext()); - - int length = result.size(); - - System.out.printf("\nORIENTDB SP(%s,%s): %d", 1, i, length); - } - - System.out.printf("\nORIENTDB TOTAL: " + (System.currentTimeMillis() - start)); - } - } -} diff --git a/src/main/java/eu/socialsensor/query/Query.java b/src/main/java/eu/socialsensor/query/Query.java deleted file mode 100644 index 8bfc77c..0000000 --- a/src/main/java/eu/socialsensor/query/Query.java +++ /dev/null @@ -1,30 +0,0 @@ -package eu.socialsensor.query; - -/** - * Represents the queries for each graph database - * - * @author sotbeis - * @email sotbeis@iti.gr - * - */ -public interface Query { - - /** - * Iterates over the nodes and finds the neighbours - * of each node - */ - public void findNeighborsOfAllNodes(); - - /** - * Iterates over the edges and finds the adjacent - * nodes of each edge - */ - public void findNodesOfAllEdges(); - - /** - * Finds the shortest path between the first node - * and 100 randomly picked nodes - */ - public void findShortestPaths(); - -} diff --git a/src/main/java/eu/socialsensor/query/SparkseeQuery.java b/src/main/java/eu/socialsensor/query/SparkseeQuery.java deleted file mode 100644 index 32eecd7..0000000 --- a/src/main/java/eu/socialsensor/query/SparkseeQuery.java +++ /dev/null @@ -1,85 +0,0 @@ -package eu.socialsensor.query; - -import com.sparsity.sparksee.algorithms.SinglePairShortestPathBFS; -import com.sparsity.sparksee.gdb.EdgeData; -import com.sparsity.sparksee.gdb.EdgesDirection; -import com.sparsity.sparksee.gdb.Graph; -import com.sparsity.sparksee.gdb.Objects; -import com.sparsity.sparksee.gdb.ObjectsIterator; -import com.sparsity.sparksee.gdb.Session; -import com.sparsity.sparksee.gdb.Value; - -import eu.socialsensor.benchmarks.FindShortestPathBenchmark; - -/** - * Query implementation for Sparksee graph database - * - * @author sotbeis - * @email sotbeis@iti.gr - */ -public class SparkseeQuery implements Query { - - private Session session; - private Graph sparkseeGraph; - - public SparkseeQuery(Session session) { - this.session = session; - this.sparkseeGraph = session.getGraph(); - } - - @Override - public void findNeighborsOfAllNodes() { - int nodeType = sparkseeGraph.findType("node"); - int edgeType = sparkseeGraph.findType("similar"); - Objects objects = sparkseeGraph.select(nodeType); - ObjectsIterator iter = objects.iterator(); - while(iter.hasNext()) { - long nodeID = iter.next(); - Objects neighbors = sparkseeGraph.neighbors(nodeID, edgeType, EdgesDirection.Any); - neighbors.close(); - } - iter.close(); - objects.close(); - } - - @Override - public void findNodesOfAllEdges() { - int edgeType = sparkseeGraph.findType("similar"); - Objects objects = sparkseeGraph.select(edgeType); - ObjectsIterator iter = objects.iterator(); - while(iter.hasNext()) { - long edgeID = iter.next(); - EdgeData edge = sparkseeGraph.getEdgeData(edgeID); - @SuppressWarnings("unused") - long srcNodeID = edge.getHead(); - @SuppressWarnings("unused") - long dstNodeID = edge.getTail(); - } - iter.close(); - objects.close(); - } - - @Override - public void findShortestPaths() { - @SuppressWarnings("unused") - double length = 0; - int nodeType = sparkseeGraph.findType("node"); - int nodeAttribute = sparkseeGraph.findAttribute(nodeType, "nodeId"); - int edgeType = sparkseeGraph.findType("similar"); - Value value = new Value(); - long srcNodeID = sparkseeGraph.findObject(nodeAttribute, value.setString("1")); - for(int i : FindShortestPathBenchmark.generatedNodes) { - long dstNodeID = sparkseeGraph.findObject(nodeAttribute, value.setString(String.valueOf(i))); - SinglePairShortestPathBFS shortestPathBFS = new SinglePairShortestPathBFS(session, srcNodeID, dstNodeID); - shortestPathBFS.addNodeType(nodeType); - shortestPathBFS.addEdgeType(edgeType, EdgesDirection.Outgoing); - shortestPathBFS.setMaximumHops(4); - shortestPathBFS.run(); - if(shortestPathBFS.exists()) { - length = shortestPathBFS.getCost(); - } - shortestPathBFS.close(); - } - } - -} diff --git a/src/main/java/eu/socialsensor/query/TitanQuery.java b/src/main/java/eu/socialsensor/query/TitanQuery.java deleted file mode 100644 index 278799c..0000000 --- a/src/main/java/eu/socialsensor/query/TitanQuery.java +++ /dev/null @@ -1,109 +0,0 @@ -package eu.socialsensor.query; - -import java.io.FileNotFoundException; -import java.io.PrintWriter; -import java.util.Iterator; -import java.util.List; - -import com.thinkaurelius.titan.core.TitanEdge; -import com.thinkaurelius.titan.core.TitanFactory; -import com.thinkaurelius.titan.core.TitanGraph; -import com.tinkerpop.blueprints.Direction; -import com.tinkerpop.blueprints.Edge; -import com.tinkerpop.blueprints.Vertex; -import com.tinkerpop.gremlin.java.GremlinPipeline; -import com.tinkerpop.pipes.PipeFunction; -import com.tinkerpop.pipes.branch.LoopPipe.LoopBundle; - -import eu.socialsensor.benchmarks.FindShortestPathBenchmark; -import eu.socialsensor.main.GraphDatabaseBenchmark; - -/** - * Query implementation for Titan graph database - * - * @author sotbeis - * @email sotbeis@iti.gr - */ - -public class TitanQuery implements Query { - - private TitanGraph titanGraph = null; - - public static void main(String args[]) { -// GraphDatabase graph = new TitanGraphDatabase(); -// graph.createGraphForMassiveLoad(GraphDatabaseBenchmark.TITANDB_PATH); -// graph.massiveModeLoading("./data/youtubeEdges.txt"); -// graph.shutdownMassiveGraph(); - - TitanQuery titanQuery = new TitanQuery(); - titanQuery.findNodesOfAllEdges(); - } - - public TitanQuery(TitanGraph titanGraph) { - this.titanGraph = titanGraph; - } - - public TitanQuery() { - this.titanGraph = TitanFactory.build() - .set("storage.backend", "berkeleyje") - .set("storage.transactions", false) - .set("storage.directory", GraphDatabaseBenchmark.TITANDB_PATH) - .open(); - } - - @Override - @SuppressWarnings("unused") - public void findNeighborsOfAllNodes() { - for (Vertex v : titanGraph.getVertices()) { - for (Vertex vv : v.getVertices(Direction.BOTH, "similar")) { - } - } - } - - @Override - @SuppressWarnings("unused") - public void findNodesOfAllEdges() { - - try { - PrintWriter writer = new PrintWriter("orient"); - - for(Edge e : titanGraph.getEdges()) { - Vertex srcVertex = e.getVertex(Direction.OUT); - Vertex dstVertex = e.getVertex(Direction.IN); - - writer.println(srcVertex.getProperty("nodeId") + "\t" + dstVertex.getProperty("nodeId")); - } - - writer.close(); - - } catch (FileNotFoundException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - } - - @Override - public void findShortestPaths() { - Vertex v1 = titanGraph.getVertices("nodeId", 1).iterator().next(); - - for(int i : FindShortestPathBenchmark.generatedNodes) { - final Vertex v2 = titanGraph.getVertices("nodeId", i).iterator().next(); - @SuppressWarnings("rawtypes") - final GremlinPipeline pathPipe = new GremlinPipeline(v1) - .as("similar") - .out("similar") - .loop("similar", new PipeFunction, Boolean>() { - //@Override - public Boolean compute(LoopBundle bundle) { - return bundle.getLoops() < 5 && !bundle.getObject().equals(v2); - } - }) - .path(); - @SuppressWarnings("unused") - int length = pathPipe.iterator().next().size(); - - } - } - -} From cd6e9bb74fe0892d0541f364d733762b22e531e6 Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Fri, 4 Mar 2016 01:20:53 +0900 Subject: [PATCH 02/26] tp3 support, builds --- pom.xml | 161 ++--------- .../graphdatabases/OrientGraphDatabase.java | 253 ++++++++--------- .../graphdatabases/TitanGraphDatabase.java | 260 +++++++----------- .../eu/socialsensor/insert/InsertionBase.java | 2 + .../insert/OrientMassiveInsertion.java | 46 ++-- .../insert/OrientSingleInsertion.java | 64 ++--- .../insert/TitanMassiveInsertion.java | 29 +- .../insert/TitanSingleInsertion.java | 41 ++- .../main/BenchmarkConfiguration.java | 24 -- .../java/eu/socialsensor/utils/Utils.java | 5 - 10 files changed, 303 insertions(+), 582 deletions(-) diff --git a/pom.xml b/pom.xml index 7a8acfb..d31b5ac 100644 --- a/pom.xml +++ b/pom.xml @@ -50,39 +50,22 @@ - 2.6.0 - 2.1.0 - 0.5.4 + 3.0.1-incubating + 1.0.0 0.98.8-hadoop2 2.0.1 1.0.0 2.1 2.18.1 1.8 - 3.0.0-BETA3 - - org.antlr - antlr-runtime - 3.2 - com.google.guava guava 14.0.1 - - colt - colt - 1.2.0 - - - commons-codec - commons-codec - 1.7 - org.apache.commons commons-collections4 @@ -113,41 +96,6 @@ log4j-core ${log4j2.version} - - org.apache.geronimo.specs - geronimo-jta_1.1_spec - 1.1.1 - - - com.tinkerpop.gremlin - gremlin-groovy - ${blueprints.version} - - - com.tinkerpop.gremlin - gremlin-java - ${blueprints.version} - - - com.github.stephenc.high-scale-lib - high-scale-lib - 1.1.2 - - - com.carrotsearch - hppc - 0.4.2 - - - com.sleepycat - je - 5.0.73 - - - net.java.dev.jna - jna - 4.0.0 - org.apache.lucene lucene-core @@ -164,45 +112,9 @@ ${neo4j.version} - com.tinkerpop.blueprints - blueprints-neo4j2-graph - ${blueprints.version} - - - ch.qos.logback - logback-classic - - - - - com.orientechnologies - orientdb-graphdb - ${orientdb.version} - - - com.tinkerpop - pipes - ${blueprints.version} - - - org.slf4j - slf4j-api - 1.7.5 - - - org.slf4j - slf4j-log4j12 - 1.7.5 - - - org.iq80.snappy - snappy - 0.3 - - - com.spatial4j - spatial4j - 0.3 + org.apache.tinkerpop + gremlin-core + ${tinkerpop.version} com.thinkaurelius.titan @@ -213,55 +125,27 @@ com.thinkaurelius.titan titan-cassandra ${titan.version} - com.thinkaurelius.titan titan-hbase ${titan.version} - - com.thinkaurelius.titan - titan-core - ${titan.version} - - - org.apache.hbase - hbase-client - ${hbase.version} - com.amazonaws - dynamodb-titan054-storage-backend + dynamodb-titan100-storage-backend ${dynamodb.titan.version} - - com.sparsity - sparkseejava - 5.0.0 - - - com.tinkerpop.blueprints - blueprints-sparksee-graph - ${blueprints.version} - junit junit - 4.11 + 4.12 test - com.codahale.metrics - metrics-core - ${metrics.version} + com.github.amcp + orientdb-gremlin + ${tinkerpop.version}.0 @@ -444,23 +328,10 @@ - - - - org.apache.httpcomponents - httpclient - 4.3.6 - - - org.apache.httpcomponents - httpcore - 4.3.3 - - - joda-time - joda-time - 2.8.1 - - - + + + jitpack.io + https://jitpack.io + + diff --git a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java index ded6e27..8fd7ade 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java @@ -1,24 +1,8 @@ package eu.socialsensor.graphdatabases; import com.google.common.collect.Iterables; -import com.orientechnologies.common.collection.OMultiCollectionIterator; -import com.orientechnologies.common.util.OCallable; -import com.orientechnologies.orient.core.command.OBasicCommandContext; +import com.google.common.collect.Iterators; import com.orientechnologies.orient.core.config.OGlobalConfiguration; -import com.orientechnologies.orient.core.id.ORID; -import com.orientechnologies.orient.core.metadata.schema.OType; -import com.orientechnologies.orient.graph.sql.functions.OSQLFunctionShortestPath; -import com.tinkerpop.blueprints.Direction; -import com.tinkerpop.blueprints.Edge; -import com.tinkerpop.blueprints.Parameter; -import com.tinkerpop.blueprints.Vertex; -import com.tinkerpop.blueprints.impls.orient.OrientBaseGraph; -import com.tinkerpop.blueprints.impls.orient.OrientEdgeType; -import com.tinkerpop.blueprints.impls.orient.OrientGraph; -import com.tinkerpop.blueprints.impls.orient.OrientGraphFactory; -import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx; -import com.tinkerpop.blueprints.impls.orient.OrientVertex; -import com.tinkerpop.blueprints.impls.orient.OrientVertexType; import eu.socialsensor.insert.Insertion; import eu.socialsensor.insert.OrientMassiveInsertion; @@ -35,6 +19,18 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.Consumer; + +import org.apache.commons.configuration.Configuration; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.tinkerpop.gremlin.orientdb.OrientGraph; +import org.apache.tinkerpop.gremlin.orientdb.OrientGraphFactory; +import org.apache.tinkerpop.gremlin.orientdb.OrientVertex; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; +import org.apache.tinkerpop.gremlin.structure.Direction; +import org.apache.tinkerpop.gremlin.structure.Edge; +import org.apache.tinkerpop.gremlin.structure.Vertex; /** * OrientDB graph database implementation @@ -45,16 +41,14 @@ public class OrientGraphDatabase extends GraphDatabaseBase, Iterator, Vertex, Edge> { + //to look up the existence of indexes in OrientDB, you need to have vertex labels. + public static final String NODE_LABEL = "NODE"; private OrientGraph graph = null; - private boolean useLightWeightEdges; - // public OrientGraphDatabase(BenchmarkConfiguration config, File dbStorageDirectoryIn) { super(GraphDatabaseType.ORIENT_DB, dbStorageDirectoryIn); OGlobalConfiguration.STORAGE_COMPRESSION_METHOD.setValue("nothing"); - this.useLightWeightEdges = config.orientLightweightEdges() == null ? true : config.orientLightweightEdges() - .booleanValue(); } @Override @@ -84,7 +78,7 @@ public void createGraphForMassiveLoad() @Override public void massiveModeLoading(File dataPath) { - OrientMassiveInsertion orientMassiveInsertion = new OrientMassiveInsertion(this.graph.getRawGraph().getURL()); + OrientMassiveInsertion orientMassiveInsertion = new OrientMassiveInsertion(graph); orientMassiveInsertion.createGraph(dataPath, 0 /* scenarioNumber */); } @@ -102,15 +96,20 @@ public void shutdown() { return; } - graph.shutdown(); + try + { + graph.close(); + } catch(Exception e) { + throw new IllegalStateException("unable to close graph", e); + } graph = null; } @Override public void delete() { - OrientGraphNoTx g = new OrientGraphNoTx("plocal:" + dbStorageDirectory.getAbsolutePath()); - g.drop(); + OrientGraph g = getGraph(dbStorageDirectory); + g.getRawDatabase().drop(); Utils.deleteRecursively(dbStorageDirectory); } @@ -126,76 +125,77 @@ public void shortestPath(final Vertex v1, Integer i) { final OrientVertex v2 = (OrientVertex) getVertex(i); - List result = (List) new OSQLFunctionShortestPath().execute(graph, - null, null, new Object[] { ((OrientVertex) v1).getRecord(), v2.getRecord(), Direction.OUT, 5 }, - new OBasicCommandContext()); - - result.size(); + //TODO(amcp) need to do something about the number 5 +// List result = (List) new OSQLFunctionShortestPath().execute(graph, +// null, null, new Object[] { ((OrientVertex) v1).getRecord(), v2.getRecord(), Direction.OUT, 5 }, +// new OBasicCommandContext()); +// +// result.size(); } @Override public int getNodeCount() { - return (int) graph.countVertices(); + return graph.traversal().V().count().toList().get(0).intValue(); } @Override public Set getNeighborsIds(int nodeId) { - Set neighbours = new HashSet(); - Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next(); - for (Vertex v : vertex.getVertices(Direction.IN, SIMILAR)) - { - Integer neighborId = v.getProperty(NODE_ID); - neighbours.add(neighborId); - } + final Set neighbours = new HashSet(); + final Vertex vertex = getVertex(nodeId); + vertex.vertices(Direction.IN, SIMILAR).forEachRemaining(new Consumer() { + @Override + public void accept(Vertex t) { + Integer neighborId = (Integer) t.property(NODE_ID).value(); + neighbours.add(neighborId); + } + }); return neighbours; } @Override public double getNodeWeight(int nodeId) { - Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next(); + Vertex vertex = getVertex(nodeId); double weight = getNodeOutDegree(vertex); return weight; } public double getNodeInDegree(Vertex vertex) { - @SuppressWarnings("rawtypes") - OMultiCollectionIterator result = (OMultiCollectionIterator) vertex.getVertices(Direction.IN, SIMILAR); - return (double) result.size(); + return (double) Iterators.size(vertex.edges(Direction.IN, SIMILAR)); } public double getNodeOutDegree(Vertex vertex) { - @SuppressWarnings("rawtypes") - OMultiCollectionIterator result = (OMultiCollectionIterator) vertex.getVertices(Direction.OUT, SIMILAR); - return (double) result.size(); + return (double) Iterators.size(vertex.edges(Direction.OUT, SIMILAR)); } @Override public void initCommunityProperty() { int communityCounter = 0; - for (Vertex v : graph.getVertices()) + for (Vertex v : graph.traversal().V().toList()) { - ((OrientVertex) v).setProperties(NODE_COMMUNITY, communityCounter, COMMUNITY, communityCounter); - ((OrientVertex) v).save(); + v.property(NODE_COMMUNITY, communityCounter); + v.property(COMMUNITY, communityCounter); communityCounter++; } } @Override - public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities) + public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunity) { Set communities = new HashSet(); - Iterable vertices = graph.getVertices(NODE_COMMUNITY, nodeCommunities); - for (Vertex vertex : vertices) + + for (Vertex vertex : graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).toList()) { - for (Vertex v : vertex.getVertices(Direction.OUT, SIMILAR)) + final Iterator it = vertex.vertices(Direction.OUT, SIMILAR); + for (Vertex v; it.hasNext();) { - int community = v.getProperty(COMMUNITY); + v = it.next(); + int community = (Integer) v.property(COMMUNITY).value(); if (!communities.contains(community)) { communities.add(community); @@ -209,10 +209,9 @@ public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities public Set getNodesFromCommunity(int community) { Set nodes = new HashSet(); - Iterable iter = graph.getVertices(COMMUNITY, community); - for (Vertex v : iter) + for (Vertex v : graph.traversal().V().has(COMMUNITY, community).toList()) { - Integer nodeId = v.getProperty(NODE_ID); + Integer nodeId = (Integer) v.property(NODE_ID).value(); nodes.add(nodeId); } return nodes; @@ -222,10 +221,9 @@ public Set getNodesFromCommunity(int community) public Set getNodesFromNodeCommunity(int nodeCommunity) { Set nodes = new HashSet(); - Iterable iter = graph.getVertices("nodeCommunity", nodeCommunity); - for (Vertex v : iter) + for (Vertex v : graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).toList()) { - Integer nodeId = v.getProperty(NODE_ID); + Integer nodeId = (Integer) v.property(NODE_ID).value(); nodes.add(nodeId); } return nodes; @@ -235,14 +233,14 @@ public Set getNodesFromNodeCommunity(int nodeCommunity) public double getEdgesInsideCommunity(int vertexCommunity, int communityVertices) { double edges = 0; - Iterable vertices = graph.getVertices(NODE_COMMUNITY, vertexCommunity); - Iterable comVertices = graph.getVertices(COMMUNITY, communityVertices); - for (Vertex vertex : vertices) + Set comVertices = graph.traversal().V().has(COMMUNITY, communityVertices).toSet(); + for (Vertex vertex : graph.traversal().V().has(NODE_COMMUNITY, vertexCommunity).toList()) { - for (Vertex v : vertex.getVertices(Direction.OUT, SIMILAR)) + Iterator it = vertex.vertices(Direction.OUT, SIMILAR); + for (Vertex v; it.hasNext();) { - if (Iterables.contains(comVertices, v)) - { + v = it.next(); + if(comVertices.contains(v)) { edges++; } } @@ -254,13 +252,13 @@ public double getEdgesInsideCommunity(int vertexCommunity, int communityVertices public double getCommunityWeight(int community) { double communityWeight = 0; - Iterable iter = graph.getVertices(COMMUNITY, community); - if (Iterables.size(iter) > 1) + final List list = graph.traversal().V().has(COMMUNITY, community).toList(); + if (list.size() <= 1) { + return communityWeight; + } + for (Vertex vertex : list) { - for (Vertex vertex : iter) - { - communityWeight += getNodeOutDegree(vertex); - } + communityWeight += getNodeOutDegree(vertex); } return communityWeight; } @@ -269,8 +267,7 @@ public double getCommunityWeight(int community) public double getNodeCommunityWeight(int nodeCommunity) { double nodeCommunityWeight = 0; - Iterable iter = graph.getVertices(NODE_COMMUNITY, nodeCommunity); - for (Vertex vertex : iter) + for (Vertex vertex : graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).toList()) { nodeCommunityWeight += getNodeOutDegree(vertex); } @@ -280,22 +277,17 @@ public double getNodeCommunityWeight(int nodeCommunity) @Override public void moveNode(int nodeCommunity, int toCommunity) { - Iterable fromIter = graph.getVertices(NODE_COMMUNITY, nodeCommunity); - for (Vertex vertex : fromIter) + for (Vertex vertex : graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).toList()) { - vertex.setProperty(COMMUNITY, toCommunity); + vertex.property(COMMUNITY, toCommunity); } } @Override public double getGraphWeightSum() { - long edges = 0; - for (Vertex o : graph.getVertices()) - { - edges += ((OrientVertex) o).countEdges(Direction.OUT, SIMILAR); - } - return (double) edges; + final Iterator edges = graph.edges(); + return (double) Iterators.size(edges); } @Override @@ -303,17 +295,19 @@ public int reInitializeCommunities() { Map initCommunities = new HashMap(); int communityCounter = 0; - for (Vertex v : graph.getVertices()) + Iterator it = graph.vertices(); + for (Vertex v; it.hasNext();) { - int communityId = v.getProperty(COMMUNITY); + v = it.next(); + int communityId = (Integer) v.property(COMMUNITY).value(); if (!initCommunities.containsKey(communityId)) { initCommunities.put(communityId, communityCounter); communityCounter++; } int newCommunityId = initCommunities.get(communityId); - ((OrientVertex) v).setProperties(COMMUNITY, newCommunityId, NODE_COMMUNITY, newCommunityId); - ((OrientVertex) v).save(); + v.property(COMMUNITY, newCommunityId); + v.property(NODE_COMMUNITY, newCommunityId); } return communityCounter; } @@ -321,30 +315,25 @@ public int reInitializeCommunities() @Override public int getCommunity(int nodeCommunity) { - final Iterator result = graph.getVertices(NODE_COMMUNITY, nodeCommunity).iterator(); - if (!result.hasNext()) - throw new IllegalArgumentException("node community not found: " + nodeCommunity); - - Vertex vertex = result.next(); - int community = vertex.getProperty(COMMUNITY); + Vertex vertex = graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).next(); + int community = (Integer) vertex.property(COMMUNITY).value(); return community; } @Override public int getCommunityFromNode(int nodeId) { - Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next(); - return vertex.getProperty(COMMUNITY); + Vertex vertex = getVertex(nodeId); + return (Integer) vertex.property(COMMUNITY).value(); } @Override public int getCommunitySize(int community) { - Iterable vertices = graph.getVertices(COMMUNITY, community); Set nodeCommunities = new HashSet(); - for (Vertex v : vertices) + for (Vertex v : graph.traversal().V().has(COMMUNITY, community).toList()) { - int nodeCommunity = v.getProperty(NODE_COMMUNITY); + int nodeCommunity = (Integer) v.property(NODE_COMMUNITY).value(); if (!nodeCommunities.contains(nodeCommunity)) { nodeCommunities.add(nodeCommunity); @@ -359,11 +348,11 @@ public Map> mapCommunities(int numberOfCommunities) Map> communities = new HashMap>(); for (int i = 0; i < numberOfCommunities; i++) { - Iterator verticesIter = graph.getVertices(COMMUNITY, i).iterator(); + GraphTraversal t = graph.traversal().V().has(COMMUNITY, i); List vertices = new ArrayList(); - while (verticesIter.hasNext()) + while (t.hasNext()) { - Integer nodeId = verticesIter.next().getProperty(NODE_ID); + Integer nodeId = (Integer) t.next().property(NODE_ID).value(); vertices.add(nodeId); } communities.put(i, vertices); @@ -373,59 +362,45 @@ public Map> mapCommunities(int numberOfCommunities) protected void createSchema() { - graph.executeOutsideTx(new OCallable() { - @SuppressWarnings({ "unchecked", "rawtypes" }) - @Override - public Object call(final OrientBaseGraph g) - { - OrientVertexType v = g.getVertexBaseType(); - if(!v.existsProperty(NODE_ID)) { // TODO fix schema detection hack later - v.createProperty(NODE_ID, OType.INTEGER); - g.createKeyIndex(NODE_ID, Vertex.class, new Parameter("type", "UNIQUE_HASH_INDEX"), new Parameter( - "keytype", "INTEGER")); - - v.createEdgeProperty(Direction.OUT, SIMILAR, OType.LINKBAG); - v.createEdgeProperty(Direction.IN, SIMILAR, OType.LINKBAG); - OrientEdgeType similar = g.createEdgeType(SIMILAR); - similar.createProperty("out", OType.LINK, v); - similar.createProperty("in", OType.LINK, v); - g.createKeyIndex(COMMUNITY, Vertex.class, new Parameter("type", "NOTUNIQUE_HASH_INDEX"), - new Parameter("keytype", "INTEGER")); - g.createKeyIndex(NODE_COMMUNITY, Vertex.class, new Parameter("type", "NOTUNIQUE_HASH_INDEX"), - new Parameter("keytype", "INTEGER")); - } + createIndex(NODE_ID, NODE_LABEL, "UNIQUE_HASH_INDEX", "INTEGER"); + createIndex(COMMUNITY, null /*label*/, "NOTUNIQUE_HASH_INDEX", "INTEGER"); + createIndex(NODE_COMMUNITY, null /*label*/, "NOTUNIQUE_HASH_INDEX", "INTEGER"); + } - return null; - } - }); + private void createIndex(String key, String label, String type, String keytype) { + if(graph.getVertexIndexedKeys(label).contains(NODE_ID)) { + return; + } + final Configuration nodeIdIndexConfig = new PropertiesConfiguration(); + nodeIdIndexConfig.addProperty("type", type); + nodeIdIndexConfig.addProperty("keytype", keytype); + graph.createVertexIndex(NODE_ID, label, nodeIdIndexConfig); } private OrientGraph getGraph(final File dbPath) { - OrientGraph g; - OrientGraphFactory graphFactory = new OrientGraphFactory("plocal:" + dbPath.getAbsolutePath()); - g = graphFactory.getTx();//.setUseLog(false); - ((OrientGraph) g).setUseLightweightEdges(this.useLightWeightEdges); - return g; + Configuration config = new PropertiesConfiguration(); + config.setProperty(OrientGraph.CONFIG_URL, "plocal:" + dbPath.getAbsolutePath()); + final OrientGraphFactory graphFactory = new OrientGraphFactory(config); + return graphFactory.getTx(); } @Override public boolean nodeExists(int nodeId) { - Iterable iter = graph.getVertices(NODE_ID, nodeId); - return iter.iterator().hasNext(); + return graph.traversal().V().has(NODE_ID, nodeId).hasNext(); } @Override public Iterator getVertexIterator() { - return graph.getVertices().iterator(); + return graph.vertices(); } @Override public Iterator getNeighborsOfVertex(Vertex v) { - return v.getEdges(Direction.BOTH, SIMILAR).iterator(); + return v.edges(Direction.BOTH, SIMILAR); } @Override @@ -437,25 +412,25 @@ public void cleanupVertexIterator(Iterator it) @Override public Vertex getOtherVertexFromEdge(Edge edge, Vertex oneVertex) { - return edge.getVertex(Direction.IN).equals(oneVertex) ? edge.getVertex(Direction.OUT) : edge.getVertex(Direction.IN); + return edge.inVertex().equals(oneVertex) ? edge.outVertex() : edge.inVertex(); } @Override public Iterator getAllEdges() { - return graph.getEdges().iterator(); + return graph.edges(); } @Override public Vertex getSrcVertexFromEdge(Edge edge) { - return edge.getVertex(Direction.IN); + return edge.outVertex(); } @Override public Vertex getDestVertexFromEdge(Edge edge) { - return edge.getVertex(Direction.OUT); + return edge.inVertex(); } @Override @@ -491,6 +466,8 @@ public Vertex nextVertex(Iterator it) @Override public Vertex getVertex(Integer i) { - return graph.getVertices(NODE_ID, i).iterator().next(); + final GraphTraversalSource g = graph.traversal(); + final Vertex vertex = g.V().has(NODE_ID, i).next(); + return vertex; } } diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index af9f440..f13d28e 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -14,6 +14,14 @@ import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.MapConfiguration; +import org.apache.tinkerpop.gremlin.process.traversal.Path; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; +import org.apache.tinkerpop.gremlin.structure.Direction; +import org.apache.tinkerpop.gremlin.structure.Edge; +import org.apache.tinkerpop.gremlin.structure.Property; +import org.apache.tinkerpop.gremlin.structure.Vertex; import com.amazon.titan.diskstorage.dynamodb.BackendDataModel; import com.amazon.titan.diskstorage.dynamodb.Client; @@ -24,7 +32,7 @@ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; import com.amazonaws.services.dynamodbv2.model.CreateTableRequest; import com.amazonaws.services.dynamodbv2.model.ResourceInUseException; -import com.google.common.collect.Iterables; +import com.google.common.collect.Iterators; import com.thinkaurelius.titan.core.Multiplicity; import com.thinkaurelius.titan.core.PropertyKey; import com.thinkaurelius.titan.core.TitanFactory; @@ -32,14 +40,6 @@ import com.thinkaurelius.titan.core.schema.TitanManagement; import com.thinkaurelius.titan.core.util.TitanCleanup; import com.thinkaurelius.titan.graphdb.configuration.GraphDatabaseConfiguration; -import com.tinkerpop.blueprints.Direction; -import com.tinkerpop.blueprints.Edge; -import com.tinkerpop.blueprints.Vertex; -import com.tinkerpop.blueprints.util.wrappers.batch.BatchGraph; -import com.tinkerpop.blueprints.util.wrappers.batch.VertexIDType; -import com.tinkerpop.gremlin.java.GremlinPipeline; -import com.tinkerpop.pipes.PipeFunction; -import com.tinkerpop.pipes.branch.LoopPipe.LoopBundle; import eu.socialsensor.insert.Insertion; import eu.socialsensor.insert.TitanMassiveInsertion; @@ -60,8 +60,7 @@ public class TitanGraphDatabase extends GraphDatabaseBase, Iter double totalWeight; - private TitanGraph titanGraph; - private BatchGraph batchGraph; + private TitanGraph graph; public final BenchmarkConfiguration config; public TitanGraphDatabase(GraphDatabaseType type, BenchmarkConfiguration config, File dbStorageDirectory) @@ -237,7 +236,7 @@ private void open(boolean batchLoading) } client.shutdown(); } - titanGraph = buildTitanGraph(type, dbStorageDirectory, config, batchLoading); + graph = buildTitanGraph(type, dbStorageDirectory, config, batchLoading); } @Override @@ -252,36 +251,32 @@ public void createGraphForMassiveLoad() { open(true /* batchLoading */); createSchema(); - - batchGraph = new BatchGraph(titanGraph, VertexIDType.NUMBER, 100000 /* bufferSize */); - batchGraph.setVertexIdKey(NODE_ID); - batchGraph.setLoadingFromScratch(true /* fromScratch */); } @Override public void massiveModeLoading(File dataPath) { - Insertion titanMassiveInsertion = new TitanMassiveInsertion(this.batchGraph, type); + Insertion titanMassiveInsertion = new TitanMassiveInsertion(this.graph, type); titanMassiveInsertion.createGraph(dataPath, 0 /* scenarioNumber */); } @Override public void singleModeLoading(File dataPath, File resultsPath, int scenarioNumber) { - Insertion titanSingleInsertion = new TitanSingleInsertion(this.titanGraph, type, resultsPath); + Insertion titanSingleInsertion = new TitanSingleInsertion(this.graph, type, resultsPath); titanSingleInsertion.createGraph(dataPath, scenarioNumber); } @Override public void shutdown() { - if (titanGraph == null) + if (graph == null) { return; } try { - titanGraph.shutdown(); + graph.close(); } catch (IOError e) { @@ -289,26 +284,26 @@ public void shutdown() System.err.println("Failed to shutdown titan graph: " + e.getMessage()); } - titanGraph = null; + graph = null; } @Override public void delete() { - titanGraph = buildTitanGraph(type, dbStorageDirectory, config, false /* batchLoading */); + graph = buildTitanGraph(type, dbStorageDirectory, config, false /* batchLoading */); try { - titanGraph.shutdown(); + graph.close(); } catch (IOError e) { // TODO Fix issue in shutting down titan-cassandra-embedded System.err.println("Failed to shutdown titan graph: " + e.getMessage()); } - TitanCleanup.clear(titanGraph); + TitanCleanup.clear(graph); try { - titanGraph.shutdown(); + graph.close(); } catch (IOError e) { @@ -321,74 +316,43 @@ public void delete() @Override public void shutdownMassiveGraph() { - if (titanGraph == null) + if (graph == null) { return; } - try - { - batchGraph.shutdown(); - } - catch (IOError e) - { - // TODO Fix issue in shutting down titan-cassandra-embedded - System.err.println("Failed to shutdown batch graph: " + e.getMessage()); - } - try - { - titanGraph.shutdown(); - } - catch (IOError e) - { - // TODO Fix issue in shutting down titan-cassandra-embedded - System.err.println("Failed to shutdown titan graph: " + e.getMessage()); - } - batchGraph = null; - titanGraph = null; + shutdown(); } @Override public void shortestPath(final Vertex fromNode, Integer node) { - final Vertex v2 = titanGraph.getVertices(NODE_ID, node).iterator().next(); - @SuppressWarnings("rawtypes") - final GremlinPipeline pathPipe = new GremlinPipeline(fromNode).as(SIMILAR) - .out(SIMILAR).loop(SIMILAR, new PipeFunction, Boolean>() { - // @Override - public Boolean compute(LoopBundle bundle) - { - return bundle.getLoops() < 5 && !bundle.getObject().equals(v2); - } - }).path(); - @SuppressWarnings("unused") - int length = pathPipe.iterator().next().size(); + final GraphTraversalSource g = graph.traversal(); + final Vertex toNode = g.V().has(NODE_ID, node).next(); + //TODO(amcp) how to limit depth to 5? + List paths = g.V(fromNode).repeat(__.both().simplePath()).until(__.is(toNode)).limit(1).path().toList(); + + for(Path path : paths) { + path.size(); + } } @Override public int getNodeCount() { - long nodeCount = new GremlinPipeline(titanGraph).V().count(); + final GraphTraversalSource g = graph.traversal(); + final long nodeCount = g.V().count().toList().get(0); return (int) nodeCount; } @Override public Set getNeighborsIds(int nodeId) { - // Set neighbours = new HashSet(); - // Vertex vertex = titanGraph.getVertices("nodeId", - // nodeId).iterator().next(); - // for (Vertex v : vertex.getVertices(Direction.IN, SIMILAR)) { - // Integer neighborId = v.getProperty("nodeId"); - // neighbours.add(neighborId); - // } - // return neighbours; + final Vertex vertex = getVertex(nodeId); Set neighbors = new HashSet(); - Vertex vertex = titanGraph.getVertices(NODE_ID, nodeId).iterator().next(); - GremlinPipeline pipe = new GremlinPipeline(vertex).out(SIMILAR); - Iterator iter = pipe.iterator(); + Iterator iter = vertex.vertices(Direction.OUT, SIMILAR); while (iter.hasNext()) { - Integer neighborId = iter.next().getProperty(NODE_ID); + Integer neighborId = Integer.valueOf(iter.next().property(NODE_ID).value().toString()); neighbors.add(neighborId); } return neighbors; @@ -397,37 +361,29 @@ public Set getNeighborsIds(int nodeId) @Override public double getNodeWeight(int nodeId) { - Vertex vertex = titanGraph.getVertices(NODE_ID, nodeId).iterator().next(); + Vertex vertex = getVertex(nodeId); double weight = getNodeOutDegree(vertex); return weight; } public double getNodeInDegree(Vertex vertex) { - // Iterable result = vertex.getVertices(Direction.IN, - // SIMILAR); - // return (double)Iterables.size(result); - GremlinPipeline pipe = new GremlinPipeline(vertex).in(SIMILAR); - return (double) pipe.count(); + return (double) Iterators.size(vertex.edges(Direction.IN, SIMILAR)); } public double getNodeOutDegree(Vertex vertex) { - // Iterable result = vertex.getVertices(Direction.OUT, - // SIMILAR); - // return (double)Iterables.size(result); - GremlinPipeline pipe = new GremlinPipeline(vertex).out(SIMILAR); - return (double) pipe.count(); + return (double) Iterators.size(vertex.edges(Direction.OUT, SIMILAR)); } @Override public void initCommunityProperty() { int communityCounter = 0; - for (Vertex v : titanGraph.getVertices()) + for (Vertex v : graph.traversal().V().toList()) { - v.setProperty(NODE_COMMUNITY, communityCounter); - v.setProperty(COMMUNITY, communityCounter); + v.property(NODE_COMMUNITY, communityCounter); + v.property(COMMUNITY, communityCounter); communityCounter++; } } @@ -436,22 +392,11 @@ public void initCommunityProperty() public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities) { Set communities = new HashSet(); - Iterable vertices = titanGraph.getVertices(NODE_COMMUNITY, nodeCommunities); - for (Vertex vertex : vertices) + final GraphTraversalSource g = graph.traversal(); + + for (Property p : g.V().has(NODE_COMMUNITY, nodeCommunities).out(SIMILAR).properties(COMMUNITY).toSet()) { - // for(Vertex v : vertex.getVertices(Direction.OUT, SIMILAR)) { - // int community = v.getProperty("community"); - // if(!communities.contains(community)) { - // communities.add(community); - // } - // } - GremlinPipeline pipe = new GremlinPipeline(vertex).out(SIMILAR); - Iterator iter = pipe.iterator(); - while (iter.hasNext()) - { - int community = iter.next().getProperty(COMMUNITY); - communities.add(community); - } + communities.add((Integer) p.value()); } return communities; } @@ -459,11 +404,11 @@ public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities @Override public Set getNodesFromCommunity(int community) { + final GraphTraversalSource g = graph.traversal(); Set nodes = new HashSet(); - Iterable iter = titanGraph.getVertices(COMMUNITY, community); - for (Vertex v : iter) + for (Vertex v : g.V().has(COMMUNITY, community).toList()) { - Integer nodeId = v.getProperty(NODE_ID); + Integer nodeId = (Integer) v.property(NODE_ID).value(); nodes.add(nodeId); } return nodes; @@ -473,11 +418,11 @@ public Set getNodesFromCommunity(int community) public Set getNodesFromNodeCommunity(int nodeCommunity) { Set nodes = new HashSet(); - Iterable iter = titanGraph.getVertices(NODE_COMMUNITY, nodeCommunity); - for (Vertex v : iter) + final GraphTraversalSource g = graph.traversal(); + + for (Property property : g.V().has(NODE_COMMUNITY, nodeCommunity).properties(NODE_ID).toList()) { - Integer nodeId = v.getProperty(NODE_ID); - nodes.add(nodeId); + nodes.add((Integer) property.value()); } return nodes; } @@ -486,25 +431,17 @@ public Set getNodesFromNodeCommunity(int nodeCommunity) public double getEdgesInsideCommunity(int vertexCommunity, int communityVertices) { double edges = 0; - Iterable vertices = titanGraph.getVertices(NODE_COMMUNITY, vertexCommunity); - Iterable comVertices = titanGraph.getVertices(COMMUNITY, communityVertices); - for (Vertex vertex : vertices) + Set comVertices = graph.traversal().V().has(COMMUNITY, communityVertices).toSet(); + for (Vertex vertex : graph.traversal().V().has(NODE_COMMUNITY, vertexCommunity).toList()) { - for (Vertex v : vertex.getVertices(Direction.OUT, SIMILAR)) + Iterator it = vertex.vertices(Direction.OUT, SIMILAR); + for (Vertex v; it.hasNext();) { - if (Iterables.contains(comVertices, v)) - { + v = it.next(); + if(comVertices.contains(v)) { edges++; } } - // GremlinPipeline pipe = new - // GremlinPipeline(vertex).out(SIMILAR); - // Iterator iter = pipe.iterator(); - // while(iter.hasNext()) { - // if(Iterables.contains(comVertices, iter.next())){ - // edges++; - // } - // } } return edges; } @@ -513,13 +450,13 @@ public double getEdgesInsideCommunity(int vertexCommunity, int communityVertices public double getCommunityWeight(int community) { double communityWeight = 0; - Iterable iter = titanGraph.getVertices(COMMUNITY, community); - if (Iterables.size(iter) > 1) + final List list = graph.traversal().V().has(COMMUNITY, community).toList(); + if (list.size() <= 1) { + return communityWeight; + } + for (Vertex vertex : list) { - for (Vertex vertex : iter) - { - communityWeight += getNodeOutDegree(vertex); - } + communityWeight += getNodeOutDegree(vertex); } return communityWeight; } @@ -528,8 +465,7 @@ public double getCommunityWeight(int community) public double getNodeCommunityWeight(int nodeCommunity) { double nodeCommunityWeight = 0; - Iterable iter = titanGraph.getVertices(NODE_COMMUNITY, nodeCommunity); - for (Vertex vertex : iter) + for (Vertex vertex : graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).toList()) { nodeCommunityWeight += getNodeOutDegree(vertex); } @@ -539,18 +475,17 @@ public double getNodeCommunityWeight(int nodeCommunity) @Override public void moveNode(int nodeCommunity, int toCommunity) { - Iterable fromIter = titanGraph.getVertices(NODE_COMMUNITY, nodeCommunity); - for (Vertex vertex : fromIter) + for (Vertex vertex : graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).toList()) { - vertex.setProperty(COMMUNITY, toCommunity); + vertex.property(COMMUNITY, toCommunity); } } @Override public double getGraphWeightSum() { - Iterable edges = titanGraph.getEdges(); - return (double) Iterables.size(edges); + final Iterator edges = graph.edges(); + return (double) Iterators.size(edges); } @Override @@ -558,17 +493,19 @@ public int reInitializeCommunities() { Map initCommunities = new HashMap(); int communityCounter = 0; - for (Vertex v : titanGraph.getVertices()) + Iterator it = graph.vertices(); + for (Vertex v; it.hasNext();) { - int communityId = v.getProperty(COMMUNITY); + v = it.next(); + int communityId = (Integer) v.property(COMMUNITY).value(); if (!initCommunities.containsKey(communityId)) { initCommunities.put(communityId, communityCounter); communityCounter++; } int newCommunityId = initCommunities.get(communityId); - v.setProperty(COMMUNITY, newCommunityId); - v.setProperty(NODE_COMMUNITY, newCommunityId); + v.property(COMMUNITY, newCommunityId); + v.property(NODE_COMMUNITY, newCommunityId); } return communityCounter; } @@ -576,26 +513,25 @@ public int reInitializeCommunities() @Override public int getCommunity(int nodeCommunity) { - Vertex vertex = titanGraph.getVertices(NODE_COMMUNITY, nodeCommunity).iterator().next(); - int community = vertex.getProperty(COMMUNITY); + Vertex vertex = graph.traversal().V().has(NODE_COMMUNITY, nodeCommunity).next(); + int community = (Integer) vertex.property(COMMUNITY).value(); return community; } @Override public int getCommunityFromNode(int nodeId) { - Vertex vertex = titanGraph.getVertices(NODE_ID, nodeId).iterator().next(); - return vertex.getProperty(COMMUNITY); + Vertex vertex = getVertex(nodeId); + return (Integer) vertex.property(COMMUNITY).value(); } @Override public int getCommunitySize(int community) { - Iterable vertices = titanGraph.getVertices(COMMUNITY, community); Set nodeCommunities = new HashSet(); - for (Vertex v : vertices) + for (Vertex v : graph.traversal().V().has(COMMUNITY, community).toList()) { - int nodeCommunity = v.getProperty(NODE_COMMUNITY); + int nodeCommunity = (Integer) v.property(NODE_COMMUNITY).value(); if (!nodeCommunities.contains(nodeCommunity)) { nodeCommunities.add(nodeCommunity); @@ -610,11 +546,11 @@ public Map> mapCommunities(int numberOfCommunities) Map> communities = new HashMap>(); for (int i = 0; i < numberOfCommunities; i++) { - Iterator verticesIter = titanGraph.getVertices(COMMUNITY, i).iterator(); + GraphTraversal t = graph.traversal().V().has(COMMUNITY, i); List vertices = new ArrayList(); - while (verticesIter.hasNext()) + while (t.hasNext()) { - Integer nodeId = verticesIter.next().getProperty(NODE_ID); + Integer nodeId = (Integer) t.next().property(NODE_ID).value(); vertices.add(nodeId); } communities.put(i, vertices); @@ -624,23 +560,22 @@ public Map> mapCommunities(int numberOfCommunities) private void createSchema() { - final TitanManagement mgmt = titanGraph.getManagementSystem(); - if (!titanGraph.getIndexedKeys(Vertex.class).contains(NODE_ID)) + final TitanManagement mgmt = graph.openManagement(); + if (null == mgmt.getGraphIndex(NODE_ID)) { final PropertyKey key = mgmt.makePropertyKey(NODE_ID).dataType(Integer.class).make(); mgmt.buildIndex(NODE_ID, Vertex.class).addKey(key).unique().buildCompositeIndex(); } - if (!titanGraph.getIndexedKeys(Vertex.class).contains(COMMUNITY)) + if (null == mgmt.getGraphIndex(COMMUNITY)) { final PropertyKey key = mgmt.makePropertyKey(COMMUNITY).dataType(Integer.class).make(); mgmt.buildIndex(COMMUNITY, Vertex.class).addKey(key).buildCompositeIndex(); } - if (!titanGraph.getIndexedKeys(Vertex.class).contains(NODE_COMMUNITY)) + if (null == mgmt.getGraphIndex(NODE_COMMUNITY)) { final PropertyKey key = mgmt.makePropertyKey(NODE_COMMUNITY).dataType(Integer.class).make(); mgmt.buildIndex(NODE_COMMUNITY, Vertex.class).addKey(key).buildCompositeIndex(); } - if (mgmt.getEdgeLabel(SIMILAR) == null) { mgmt.makeEdgeLabel(SIMILAR).multiplicity(Multiplicity.MULTI).directed().make(); @@ -651,20 +586,19 @@ private void createSchema() @Override public boolean nodeExists(int nodeId) { - Iterable iter = titanGraph.getVertices(NODE_ID, nodeId); - return iter.iterator().hasNext(); + return graph.traversal().V().has(NODE_ID, nodeId).hasNext(); } @Override public Iterator getVertexIterator() { - return titanGraph.getVertices().iterator(); + return graph.vertices(); } @Override public Iterator getNeighborsOfVertex(Vertex v) { - return v.getEdges(Direction.BOTH, SIMILAR).iterator(); + return v.edges(Direction.BOTH, SIMILAR); } @Override @@ -676,25 +610,25 @@ public void cleanupVertexIterator(Iterator it) @Override public Vertex getOtherVertexFromEdge(Edge edge, Vertex oneVertex) { - return edge.getVertex(Direction.IN).equals(oneVertex) ? edge.getVertex(Direction.OUT) : edge.getVertex(Direction.IN); + return edge.inVertex().equals(oneVertex) ? edge.outVertex() : edge.inVertex(); } @Override public Iterator getAllEdges() { - return titanGraph.getEdges().iterator(); + return graph.edges(); } @Override public Vertex getSrcVertexFromEdge(Edge edge) { - return edge.getVertex(Direction.IN); + return edge.outVertex(); } @Override public Vertex getDestVertexFromEdge(Edge edge) { - return edge.getVertex(Direction.OUT); + return edge.inVertex(); } @Override @@ -730,6 +664,8 @@ public Vertex nextVertex(Iterator it) @Override public Vertex getVertex(Integer i) { - return titanGraph.getVertices(NODE_ID, i.intValue()).iterator().next(); + final GraphTraversalSource g = graph.traversal(); + final Vertex vertex = g.V().has(NODE_ID, i).next(); + return vertex; } } diff --git a/src/main/java/eu/socialsensor/insert/InsertionBase.java b/src/main/java/eu/socialsensor/insert/InsertionBase.java index d787f4a..564d66c 100644 --- a/src/main/java/eu/socialsensor/insert/InsertionBase.java +++ b/src/main/java/eu/socialsensor/insert/InsertionBase.java @@ -30,6 +30,8 @@ public abstract class InsertionBase implements Insertion { private static final Logger logger = LogManager.getLogger(); public static final String INSERTION_CONTEXT = ".eu.socialsensor.insertion."; + public static final String SIMILAR = "similar"; + public static final String NODEID = "nodeId"; private final Timer getOrCreateTimes; private final Timer relateNodesTimes; diff --git a/src/main/java/eu/socialsensor/insert/OrientMassiveInsertion.java b/src/main/java/eu/socialsensor/insert/OrientMassiveInsertion.java index accf746..14cf4c5 100644 --- a/src/main/java/eu/socialsensor/insert/OrientMassiveInsertion.java +++ b/src/main/java/eu/socialsensor/insert/OrientMassiveInsertion.java @@ -1,9 +1,10 @@ package eu.socialsensor.insert; -import com.orientechnologies.orient.core.config.OGlobalConfiguration; -import com.orientechnologies.orient.graph.batch.OGraphBatchInsertBasic; -import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.structure.Graph; +import org.apache.tinkerpop.gremlin.structure.Vertex; +import eu.socialsensor.graphdatabases.OrientGraphDatabase; import eu.socialsensor.main.GraphDatabaseType; /** @@ -13,40 +14,35 @@ * @author Alexander Patrikalakis * */ -public class OrientMassiveInsertion extends InsertionBase implements Insertion +public class OrientMassiveInsertion extends InsertionBase implements Insertion { - private static final int ESTIMATED_ENTRIES = 1000000; - private static final int AVERAGE_NUMBER_OF_EDGES_PER_NODE = 40; - private static final int NUMBER_OF_ORIENT_CLUSTERS = 16; - private final OGraphBatchInsertBasic graph; + private final Graph graph; - public OrientMassiveInsertion(final String url) + public OrientMassiveInsertion(Graph graph) { super(GraphDatabaseType.ORIENT_DB, null /* resultsPath */); - OGlobalConfiguration.ENVIRONMENT_CONCURRENT.setValue(false); - OrientGraphNoTx transactionlessGraph = new OrientGraphNoTx(url); - for (int i = 0; i < NUMBER_OF_ORIENT_CLUSTERS; ++i) - { - transactionlessGraph.getVertexBaseType().addCluster("v_" + i); - transactionlessGraph.getEdgeBaseType().addCluster("e_" + i); - } - transactionlessGraph.shutdown(); + this.graph = graph; + } - graph = new OGraphBatchInsertBasic(url); - graph.setAverageEdgeNumberPerNode(AVERAGE_NUMBER_OF_EDGES_PER_NODE); - graph.setEstimatedEntries(ESTIMATED_ENTRIES); - graph.begin(); + @Override + protected Vertex getOrCreate(String value) + { + final Integer intValue = Integer.valueOf(value); + final GraphTraversal traversal = graph.traversal().V().has(NODEID, intValue); + final Vertex vertex = traversal.hasNext() ? traversal.next() : graph.addVertex(OrientGraphDatabase.NODE_LABEL); + vertex.property(NODEID, intValue); + return vertex; } @Override - protected Long getOrCreate(String value) + protected void relateNodes(Vertex src, Vertex dest) { - return Long.parseLong(value); + src.addEdge(SIMILAR, dest); } @Override - protected void relateNodes(Long src, Long dest) + protected void post() { - graph.createEdge(src, dest); + graph.tx().commit(); } } diff --git a/src/main/java/eu/socialsensor/insert/OrientSingleInsertion.java b/src/main/java/eu/socialsensor/insert/OrientSingleInsertion.java index 6fa617e..6fa63f1 100644 --- a/src/main/java/eu/socialsensor/insert/OrientSingleInsertion.java +++ b/src/main/java/eu/socialsensor/insert/OrientSingleInsertion.java @@ -2,12 +2,11 @@ import java.io.File; -import com.orientechnologies.orient.core.db.record.OIdentifiable; -import com.orientechnologies.orient.core.index.OIndex; -import com.tinkerpop.blueprints.TransactionalGraph; -import com.tinkerpop.blueprints.Vertex; -import com.tinkerpop.blueprints.impls.orient.OrientGraph; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.structure.Graph; +import org.apache.tinkerpop.gremlin.structure.Vertex; +import eu.socialsensor.graphdatabases.OrientGraphDatabase; import eu.socialsensor.main.GraphDatabaseType; /** @@ -19,58 +18,35 @@ */ public final class OrientSingleInsertion extends InsertionBase { - protected final OrientGraph orientGraph; - protected final OIndex index; + protected final Graph graph; - public OrientSingleInsertion(OrientGraph orientGraph, File resultsPath) + public OrientSingleInsertion(Graph graph, File resultsPath) { super(GraphDatabaseType.ORIENT_DB, resultsPath); - this.orientGraph = orientGraph; - this.index = this.orientGraph.getRawGraph().getMetadata().getIndexManager().getIndex("V.nodeId"); + this.graph = graph; } @Override protected void relateNodes(Vertex src, Vertex dest) { - orientGraph.addEdge(null, src, dest, "similar"); - - // TODO why commit twice? is this a nested transaction? - if (orientGraph instanceof TransactionalGraph) - { - ((TransactionalGraph) orientGraph).commit(); - ((TransactionalGraph) orientGraph).commit(); - } - } - - @Override - protected Vertex getOrCreate(final String value) - { - final int key = Integer.parseInt(value); - - Vertex v; - final OIdentifiable rec = (OIdentifiable) index.get(key); - if (rec != null) + try { - return orientGraph.getVertex(rec); + src.addEdge(SIMILAR, dest); + graph.tx().commit(); } - - v = orientGraph.addVertex(key, "nodeId", key); - - if (orientGraph instanceof TransactionalGraph) + catch (Exception e) { - ((TransactionalGraph) orientGraph).commit(); + graph.tx().rollback(); } - - return v; } - @Override - protected void post() - { - super.post(); - if (orientGraph instanceof TransactionalGraph) - { - ((TransactionalGraph) orientGraph).commit(); - } + protected Vertex getOrCreate(final String value) { + final Integer intValue = Integer.valueOf(value); + final GraphTraversal traversal = graph.traversal().V().has(NODEID, intValue); + final Vertex vertex = traversal.hasNext() ? traversal.next() : graph.addVertex(OrientGraphDatabase.NODE_LABEL); + vertex.property(NODEID, intValue); + graph.tx().commit(); + return vertex; } + } diff --git a/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java b/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java index 95a8ac6..8fa0630 100644 --- a/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java +++ b/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java @@ -1,9 +1,8 @@ package eu.socialsensor.insert; -import com.thinkaurelius.titan.core.TitanGraph; -import com.thinkaurelius.titan.core.util.TitanId; -import com.tinkerpop.blueprints.Vertex; -import com.tinkerpop.blueprints.util.wrappers.batch.BatchGraph; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.structure.Graph; +import org.apache.tinkerpop.gremlin.structure.Vertex; import eu.socialsensor.main.GraphDatabaseType; @@ -16,32 +15,32 @@ */ public class TitanMassiveInsertion extends InsertionBase { - private final BatchGraph batchGraph; + private final Graph graph; - public TitanMassiveInsertion(BatchGraph batchGraph, GraphDatabaseType type) + public TitanMassiveInsertion(Graph graph, GraphDatabaseType type) { super(type, null /* resultsPath */); // no temp files for massive load // insert - this.batchGraph = batchGraph; + this.graph = graph; } @Override public Vertex getOrCreate(String value) { Integer intVal = Integer.valueOf(value); - final long titanVertexId = TitanId.toVertexId(intVal); - Vertex vertex = batchGraph.getVertex(titanVertexId); - if (vertex == null) - { - vertex = batchGraph.addVertex(titanVertexId); - vertex.setProperty("nodeId", intVal); - } + final GraphTraversal t = graph.traversal().V().has(NODEID, intVal); + final Vertex vertex = t.hasNext() ? t.next() : graph.addVertex(NODEID, intVal); return vertex; } @Override public void relateNodes(Vertex src, Vertex dest) { - src.addEdge("similar", dest); + src.addEdge(SIMILAR, dest); + } + + @Override + protected void post() { + graph.tx().commit(); } } diff --git a/src/main/java/eu/socialsensor/insert/TitanSingleInsertion.java b/src/main/java/eu/socialsensor/insert/TitanSingleInsertion.java index b7dfe16..2a37339 100644 --- a/src/main/java/eu/socialsensor/insert/TitanSingleInsertion.java +++ b/src/main/java/eu/socialsensor/insert/TitanSingleInsertion.java @@ -2,10 +2,10 @@ import java.io.File; -import com.thinkaurelius.titan.core.TitanGraph; -import com.thinkaurelius.titan.core.util.TitanId; -import com.tinkerpop.blueprints.Compare; -import com.tinkerpop.blueprints.Vertex; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.structure.Graph; +import org.apache.tinkerpop.gremlin.structure.Transaction; +import org.apache.tinkerpop.gremlin.structure.Vertex; import eu.socialsensor.main.GraphDatabaseType; @@ -18,44 +18,37 @@ */ public class TitanSingleInsertion extends InsertionBase { - private final TitanGraph titanGraph; + private final Graph graph; - public TitanSingleInsertion(TitanGraph titanGraph, GraphDatabaseType type, File resultsPath) + public TitanSingleInsertion(Graph titanGraph, GraphDatabaseType type, File resultsPath) { super(type, resultsPath); - this.titanGraph = titanGraph; + this.graph = titanGraph; } @Override public Vertex getOrCreate(String value) { - Integer intValue = Integer.valueOf(value); - final Vertex v; - if (titanGraph.query().has("nodeId", Compare.EQUAL, intValue).vertices().iterator().hasNext()) - { - v = (Vertex) titanGraph.query().has("nodeId", Compare.EQUAL, intValue).vertices().iterator().next(); - } - else - { - final long titanVertexId = TitanId.toVertexId(intValue); - v = titanGraph.addVertex(titanVertexId); - v.setProperty("nodeId", intValue); - titanGraph.commit(); - } - return v; + final Transaction tx = graph.tx(); + final Integer intValue = Integer.valueOf(value); + final GraphTraversal traversal = graph.traversal().V().has(NODEID, intValue); + final Vertex vertex = traversal.hasNext() ? traversal.next() : graph.addVertex(NODEID, intValue); + tx.commit(); + return vertex; } @Override public void relateNodes(Vertex src, Vertex dest) { + final Transaction tx = graph.tx(); try { - titanGraph.addEdge(null, src, dest, "similar"); - titanGraph.commit(); + src.addEdge(SIMILAR, dest); + tx.commit(); } catch (Exception e) { - titanGraph.rollback(); //TODO(amcp) why can this happen? doesn't this indicate illegal state? + tx.rollback(); } } } diff --git a/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java b/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java index 768db9b..26e907f 100644 --- a/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java +++ b/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java @@ -25,12 +25,6 @@ */ public class BenchmarkConfiguration { - // orientdb Configuration - private static final String LIGHTWEIGHT_EDGES = "lightweight-edges"; - - // Sparksee / DEX configuration - private static final String LICENSE_KEY = "license-key"; - // Titan specific configuration private static final String TITAN = "titan"; private static final String BUFFER_SIZE = GraphDatabaseConfiguration.BUFFER_SIZE.getName(); @@ -87,8 +81,6 @@ public class BenchmarkConfiguration private final long dynamodbTps; private final BackendDataModel dynamodbDataModel; private final boolean dynamodbConsistentRead; - private final Boolean orientLightweightEdges; - private final String sparkseeLicenseKey; // shortest path private final int randomNodes; @@ -164,12 +156,6 @@ public BenchmarkConfiguration(Configuration appconfig) this.dynamodbEndpoint = dynamodb.containsKey(ENDPOINT) ? dynamodb.getString(ENDPOINT) : null; this.dynamodbTablePrefix = dynamodb.containsKey(TABLE_PREFIX) ? dynamodb.getString(TABLE_PREFIX) : Constants.DYNAMODB_TABLE_PREFIX.getDefaultValue(); - Configuration orient = socialsensor.subset("orient"); - orientLightweightEdges = orient.containsKey(LIGHTWEIGHT_EDGES) ? orient.getBoolean(LIGHTWEIGHT_EDGES) : null; - - Configuration sparksee = socialsensor.subset("sparksee"); - sparkseeLicenseKey = sparksee.containsKey(LICENSE_KEY) ? sparksee.getString(LICENSE_KEY) : null; - Configuration titan = socialsensor.subset(TITAN); //TODO(amcp) move dynamodb ns into titan bufferSize = titan.getInt(BUFFER_SIZE, GraphDatabaseConfiguration.BUFFER_SIZE.getDefaultValue()); blocksize = titan.getInt(IDS_BLOCKSIZE, GraphDatabaseConfiguration.IDS_BLOCK_SIZE.getDefaultValue()); @@ -360,16 +346,6 @@ public File getActualCommunitiesFile() return actualCommunities; } - public Boolean orientLightweightEdges() - { - return orientLightweightEdges; - } - - public String getSparkseeLicenseKey() - { - return sparkseeLicenseKey; - } - public boolean permuteBenchmarks() { return permuteBenchmarks; diff --git a/src/main/java/eu/socialsensor/utils/Utils.java b/src/main/java/eu/socialsensor/utils/Utils.java index cfaf3e2..65e4cc7 100644 --- a/src/main/java/eu/socialsensor/utils/Utils.java +++ b/src/main/java/eu/socialsensor/utils/Utils.java @@ -23,7 +23,6 @@ import eu.socialsensor.graphdatabases.GraphDatabase; import eu.socialsensor.graphdatabases.Neo4jGraphDatabase; import eu.socialsensor.graphdatabases.OrientGraphDatabase; -import eu.socialsensor.graphdatabases.SparkseeGraphDatabase; import eu.socialsensor.graphdatabases.TitanGraphDatabase; import eu.socialsensor.main.BenchmarkConfiguration; import eu.socialsensor.main.BenchmarkingException; @@ -197,10 +196,6 @@ else if (GraphDatabaseType.ORIENT_DB == type) { graphDatabase = new OrientGraphDatabase(config, dbStorageDirectory); } - else if (GraphDatabaseType.SPARKSEE == type) - { - graphDatabase = new SparkseeGraphDatabase(config, dbStorageDirectory); - } else { // For safety, will handle the null case From b1450323f7569e0900887ace78fea7f5c8fd81d8 Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Fri, 4 Mar 2016 02:24:06 +0900 Subject: [PATCH 03/26] upgraded neo4j to 2.3.2 --- pom.xml | 12 ++- .../graphdatabases/GraphDatabaseBase.java | 7 +- .../graphdatabases/Neo4jGraphDatabase.java | 101 +++++++++--------- .../graphdatabases/OrientGraphDatabase.java | 2 +- .../insert/Neo4jSingleInsertion.java | 11 +- 5 files changed, 69 insertions(+), 64 deletions(-) diff --git a/pom.xml b/pom.xml index d31b5ac..0edf869 100644 --- a/pom.xml +++ b/pom.xml @@ -53,7 +53,7 @@ 3.0.1-incubating 1.0.0 0.98.8-hadoop2 - 2.0.1 + 2.3.2 1.0.0 2.1 2.18.1 @@ -101,10 +101,16 @@ lucene-core 3.6.2 + + org.apache.tinkerpop + neo4j-gremlin + ${tinkerpop.version} + + org.neo4j - neo4j-cypher - ${neo4j.version} + neo4j-tinkerpop-api-impl + 0.3-2.3.2 org.neo4j diff --git a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java index d4992e0..b47fa95 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java @@ -4,7 +4,6 @@ import java.util.Set; import org.neo4j.graphdb.Transaction; -import org.neo4j.kernel.GraphDatabaseAPI; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.Timer; @@ -105,9 +104,9 @@ public void findAllNodeNeighbours() { @Override public void findNodesOfAllEdges() { - Object tx = null; + Transaction tx = null; if(GraphDatabaseType.NEO4J == type) {//TODO fix this - tx = ((GraphDatabaseAPI) ((Neo4jGraphDatabase) this).neo4jGraph).tx().unforced().begin(); + tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx(); } try { @@ -134,7 +133,7 @@ public void findNodesOfAllEdges() { } } finally {//TODO fix this if(GraphDatabaseType.NEO4J == type) { - ((Transaction) tx).close(); + tx.close(); } } } diff --git a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java index 61dc1f2..eeb8a66 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java @@ -1,6 +1,7 @@ package eu.socialsensor.graphdatabases; -import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import eu.socialsensor.insert.Insertion; import eu.socialsensor.insert.Neo4jMassiveInsertion; @@ -17,21 +18,20 @@ import org.neo4j.graphdb.Label; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Path; +import org.neo4j.graphdb.PathExpanders; import org.neo4j.graphdb.Relationship; import org.neo4j.graphdb.RelationshipType; -import org.neo4j.graphdb.ResourceIterable; +import org.neo4j.graphdb.ResourceIterator; import org.neo4j.graphdb.Transaction; import org.neo4j.graphdb.factory.GraphDatabaseFactory; import org.neo4j.graphdb.schema.Schema; import org.neo4j.helpers.collection.IteratorUtil; -import org.neo4j.kernel.GraphDatabaseAPI; -import org.neo4j.kernel.TransactionBuilder; -import org.neo4j.kernel.Traversal; import org.neo4j.tooling.GlobalGraphOperations; import org.neo4j.unsafe.batchinsert.BatchInserter; import org.neo4j.unsafe.batchinsert.BatchInserters; import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -69,7 +69,7 @@ public Neo4jGraphDatabase(File dbStorageDirectoryIn) @Override public void open() { - neo4jGraph = new GraphDatabaseFactory().newEmbeddedDatabase(dbStorageDirectory.getAbsolutePath()); + neo4jGraph = new GraphDatabaseFactory().newEmbeddedDatabase(dbStorageDirectory); try (final Transaction tx = beginUnforcedTransaction()) { try @@ -88,7 +88,7 @@ public void open() @Override public void createGraphForSingleLoad() { - neo4jGraph = new GraphDatabaseFactory().newEmbeddedDatabase(dbStorageDirectory.getAbsolutePath()); + neo4jGraph = new GraphDatabaseFactory().newEmbeddedDatabase(dbStorageDirectory); try (final Transaction tx = beginUnforcedTransaction()) { try @@ -118,7 +118,11 @@ public void createGraphForMassiveLoad() config.put("neostore.propertystore.db.mapped_memory", "250M"); config.put("neostore.propertystore.db.strings.mapped_memory", "250M"); - inserter = BatchInserters.inserter(dbStorageDirectory.getAbsolutePath(), config); + try { + inserter = BatchInserters.inserter(dbStorageDirectory, config); + } catch (IOException e) { + throw new IllegalStateException("unable to create batch inserter in dir " + dbStorageDirectory); + } createDeferredSchema(); } @@ -189,7 +193,7 @@ public void shutdownMassiveGraph() public void shortestPath(Node n1, Integer i) { PathFinder finder - = GraphAlgoFactory.shortestPath(Traversal.expanderForTypes(Neo4jGraphDatabase.RelTypes.SIMILAR), 5); + = GraphAlgoFactory.shortestPath(PathExpanders.forType(Neo4jGraphDatabase.RelTypes.SIMILAR), 5); Node n2 = getVertex(i); Path path = finder.findSinglePath(n1, n2); @@ -201,10 +205,8 @@ public void shortestPath(Node n1, Integer i) } } - //TODO can unforced option be pulled into configuration? private Transaction beginUnforcedTransaction() { - final TransactionBuilder builder = ((GraphDatabaseAPI) neo4jGraph).tx().unforced(); - return builder.begin(); + return neo4jGraph.beginTx(); } @Override @@ -236,8 +238,7 @@ public Set getNeighborsIds(int nodeId) { try { - Node n = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_ID, String.valueOf(nodeId)).iterator() - .next(); + Node n = neo4jGraph.findNodes(NODE_LABEL, NODE_ID, String.valueOf(nodeId)).next(); for (Relationship relationship : n.getRelationships(RelTypes.SIMILAR, Direction.OUTGOING)) { Node neighbour = relationship.getOtherNode(n); @@ -264,8 +265,7 @@ public double getNodeWeight(int nodeId) { try { - Node n = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_ID, String.valueOf(nodeId)).iterator() - .next(); + Node n = neo4jGraph.findNodes(NODE_LABEL, NODE_ID, String.valueOf(nodeId)).next(); weight = getNodeOutDegree(n); tx.success(); } @@ -325,10 +325,11 @@ public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities { try { - ResourceIterable nodes = neo4jGraph.findNodesByLabelAndProperty(Neo4jGraphDatabase.NODE_LABEL, + ResourceIterator nodes = neo4jGraph.findNodes(Neo4jGraphDatabase.NODE_LABEL, NODE_COMMUNITY, nodeCommunities); - for (Node n : nodes) + while (nodes.hasNext()) { + final Node n = nodes.next(); for (Relationship r : n.getRelationships(RelTypes.SIMILAR, Direction.OUTGOING)) { Node neighbour = r.getOtherNode(n); @@ -356,9 +357,10 @@ public Set getNodesFromCommunity(int community) { try { - ResourceIterable iter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, COMMUNITY, community); - for (Node n : iter) + ResourceIterator iter = neo4jGraph.findNodes(NODE_LABEL, COMMUNITY, community); + while (iter.hasNext()) { + final Node n = iter.next(); String nodeIdString = (String) (n.getProperty(NODE_ID)); nodes.add(Integer.valueOf(nodeIdString)); } @@ -382,10 +384,11 @@ public Set getNodesFromNodeCommunity(int nodeCommunity) { try { - ResourceIterable iter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_COMMUNITY, + ResourceIterator iter = neo4jGraph.findNodes(NODE_LABEL, NODE_COMMUNITY, nodeCommunity); - for (Node n : iter) + while (iter.hasNext()) { + final Node n = iter.next(); String nodeIdString = (String) (n.getProperty(NODE_ID)); nodes.add(Integer.valueOf(nodeIdString)); } @@ -409,17 +412,19 @@ public double getEdgesInsideCommunity(int nodeCommunity, int communityNodes) { try { - ResourceIterable nodes = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_COMMUNITY, + ResourceIterator nodes = neo4jGraph.findNodes(NODE_LABEL, NODE_COMMUNITY, nodeCommunity); - ResourceIterable comNodes = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, COMMUNITY, + ResourceIterator comNodes = neo4jGraph.findNodes(NODE_LABEL, COMMUNITY, communityNodes); - for (Node node : nodes) + final Set comNodeSet = Sets.newHashSet(comNodes); + while (nodes.hasNext()) { + final Node node = nodes.next(); Iterable relationships = node.getRelationships(RelTypes.SIMILAR, Direction.OUTGOING); for (Relationship r : relationships) { Node neighbor = r.getOtherNode(node); - if (Iterables.contains(comNodes, neighbor)) + if (comNodeSet.contains(neighbor)) { edges++; } @@ -445,10 +450,10 @@ public double getCommunityWeight(int community) { try { - ResourceIterable iter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, COMMUNITY, community); - if (Iterables.size(iter) > 1) + List nodes = Lists.newArrayList(neo4jGraph.findNodes(NODE_LABEL, COMMUNITY, community)); + if (nodes.size() > 1) { - for (Node n : iter) + for (Node n : nodes) { communityWeight += getNodeOutDegree(n); } @@ -473,11 +478,11 @@ public double getNodeCommunityWeight(int nodeCommunity) { try { - ResourceIterable iter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_COMMUNITY, - nodeCommunity); - if (Iterables.size(iter) > 1) + List nodes = Lists.newArrayList(neo4jGraph.findNodes(NODE_LABEL, NODE_COMMUNITY, + nodeCommunity)); + if (nodes.size() > 1) { - for (Node n : iter) + for (Node n : nodes) { nodeCommunityWeight += getNodeOutDegree(n); } @@ -501,10 +506,11 @@ public void moveNode(int nodeCommunity, int toCommunity) { try { - ResourceIterable fromIter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_COMMUNITY, + ResourceIterator fromIter = neo4jGraph.findNodes(NODE_LABEL, NODE_COMMUNITY, nodeCommunity); - for (Node node : fromIter) + while (fromIter.hasNext()) { + final Node node = fromIter.next(); node.setProperty(COMMUNITY, toCommunity); } tx.success(); @@ -582,8 +588,7 @@ public int getCommunity(int nodeCommunity) { try { - Node node = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_COMMUNITY, nodeCommunity).iterator() - .next(); + final Node node = neo4jGraph.findNodes(NODE_LABEL, NODE_COMMUNITY, nodeCommunity).next(); community = (Integer) (node.getProperty(COMMUNITY)); tx.success(); } @@ -606,8 +611,7 @@ public int getCommunityFromNode(int nodeId) try { // Node node = nodeIndex.get(NODE_ID, nodeId).getSingle(); - Node node = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_ID, String.valueOf(nodeId)).iterator() - .next(); + final Node node = neo4jGraph.findNodes(NODE_LABEL, NODE_ID, String.valueOf(nodeId)).next(); community = (Integer) (node.getProperty(COMMUNITY)); tx.success(); } @@ -630,9 +634,10 @@ public int getCommunitySize(int community) { try { - ResourceIterable nodes = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, COMMUNITY, community); - for (Node n : nodes) + ResourceIterator nodes = neo4jGraph.findNodes(NODE_LABEL, COMMUNITY, community); + while (nodes.hasNext()) { + final Node n = nodes.next(); Integer nodeCommunity = (Integer) (n.getProperty(COMMUNITY)); nodeCommunities.add(nodeCommunity); } @@ -659,10 +664,11 @@ public Map> mapCommunities(int numberOfCommunities) { for (int i = 0; i < numberOfCommunities; i++) { - ResourceIterable nodesIter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, COMMUNITY, i); + ResourceIterator nodesIter = neo4jGraph.findNodes(NODE_LABEL, COMMUNITY, i); List nodes = new ArrayList(); - for (Node n : nodesIter) + while (nodesIter.hasNext()) { + final Node n = nodesIter.next(); String nodeIdString = (String) (n.getProperty(NODE_ID)); nodes.add(Integer.valueOf(nodeIdString)); } @@ -687,8 +693,8 @@ public boolean nodeExists(int nodeId) { try { - ResourceIterable nodesIter = neo4jGraph.findNodesByLabelAndProperty(NODE_LABEL, NODE_ID, nodeId); - if (nodesIter.iterator().hasNext()) + ResourceIterator nodesIter = neo4jGraph.findNodes(NODE_LABEL, NODE_ID, nodeId); + if (nodesIter.hasNext()) { tx.success(); return true; @@ -779,9 +785,8 @@ public Node nextVertex(Iterator it) @Override public Node getVertex(Integer i) { - // note, this probably should be run in the context of an active transaction. - return neo4jGraph.findNodesByLabelAndProperty(Neo4jGraphDatabase.NODE_LABEL, NODE_ID, i).iterator() - .next(); + // TODO(amcp) check, this probably should be run in the context of an active transaction. + return neo4jGraph.findNodes(Neo4jGraphDatabase.NODE_LABEL, NODE_ID, i).next(); } } diff --git a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java index 8fd7ade..fc9d6e6 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java @@ -1,6 +1,5 @@ package eu.socialsensor.graphdatabases; -import com.google.common.collect.Iterables; import com.google.common.collect.Iterators; import com.orientechnologies.orient.core.config.OGlobalConfiguration; @@ -123,6 +122,7 @@ public void shutdownMassiveGraph() @Override public void shortestPath(final Vertex v1, Integer i) { + @SuppressWarnings("unused") final OrientVertex v2 = (OrientVertex) getVertex(i); //TODO(amcp) need to do something about the number 5 diff --git a/src/main/java/eu/socialsensor/insert/Neo4jSingleInsertion.java b/src/main/java/eu/socialsensor/insert/Neo4jSingleInsertion.java index 7ba92f7..086b977 100644 --- a/src/main/java/eu/socialsensor/insert/Neo4jSingleInsertion.java +++ b/src/main/java/eu/socialsensor/insert/Neo4jSingleInsertion.java @@ -4,12 +4,10 @@ import java.util.HashMap; import java.util.Map; -import org.neo4j.cypher.javacompat.ExecutionEngine; import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.ResourceIterator; import org.neo4j.graphdb.Transaction; -import org.neo4j.kernel.GraphDatabaseAPI; import eu.socialsensor.graphdatabases.Neo4jGraphDatabase; import eu.socialsensor.main.BenchmarkingException; @@ -22,31 +20,28 @@ * @author Alexander Patrikalakis * */ -@SuppressWarnings("deprecation") public class Neo4jSingleInsertion extends InsertionBase { private final GraphDatabaseService neo4jGraph; - private final ExecutionEngine engine; public Neo4jSingleInsertion(GraphDatabaseService neo4jGraph, File resultsPath) { super(GraphDatabaseType.NEO4J, resultsPath); this.neo4jGraph = neo4jGraph; - engine = new ExecutionEngine(this.neo4jGraph); } public Node getOrCreate(String nodeId) { Node result = null; - try(final Transaction tx = ((GraphDatabaseAPI) neo4jGraph).tx().unforced().begin()) + try(final Transaction tx = neo4jGraph.beginTx()) { try { String queryString = "MERGE (n:Node {nodeId: {nodeId}}) RETURN n"; Map parameters = new HashMap(); parameters.put("nodeId", nodeId); - ResourceIterator resultIterator = engine.execute(queryString, parameters).columnAs("n"); + ResourceIterator resultIterator = neo4jGraph.execute(queryString, parameters).columnAs("n"); result = resultIterator.next(); tx.success(); } @@ -63,7 +58,7 @@ public Node getOrCreate(String nodeId) @Override public void relateNodes(Node src, Node dest) { - try (final Transaction tx = ((GraphDatabaseAPI) neo4jGraph).tx().unforced().begin()) + try (final Transaction tx = neo4jGraph.beginTx()) { try { From 0bb31e04ae65d81453f630f333dea591c5d89c7a Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Fri, 4 Mar 2016 02:32:02 +0900 Subject: [PATCH 04/26] updated some sparksee references --- README.md | 6 +++--- src/main/java/eu/socialsensor/main/GraphDatabaseType.java | 5 ++--- src/test/resources/META-INF/input.properties | 1 - 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index d1a1956..9c1bf17 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ graphdb-benchmarks ================== -The project graphdb-benchmarks is a benchmark between popular graph dataases. Currently the framework supports [Titan](http://thinkaurelius.github.io/titan/), [OrientDB](http://www.orientechnologies.com/orientdb/), [Neo4j](http://neo4j.com/) and [Sparksee](http://www.sparsity-technologies.com/). The purpose of this benchmark is to examine the performance of each graph database in terms of execution time. The benchmark is composed of four workloads, Clustering, Massive Insertion, Single Insertion and Query Workload. Every workload has been designed to simulate common operations in graph database systems. +The project graphdb-benchmarks is a benchmark between popular graph databases. Currently the framework supports [Titan](http://thinkaurelius.github.io/titan/), [OrientDB](http://www.orientechnologies.com/orientdb/), [Neo4j](http://neo4j.com/). The purpose of this benchmark is to examine the performance of each graph database in terms of execution time. The benchmark is composed of four workloads, Clustering, Massive Insertion, Single Insertion and Query Workload. Every workload has been designed to simulate common operations in graph database systems. - *Clustering Workload (CW)*: CW consists of a well-known community detection algorithm for modularity optimization, the Louvain Method. We adapt the algorithm on top of the benchmarked graph databases and employ cache techniques to take advantage of both graph database capabilities and in-memory execution speed. We measure the time the algorithm needs to converge. - *Massive Insertion Workload (MIW)*: we create the graph database and configure it for massive loading, then we populate it with a particular dataset. We measure the time for the creation of the whole graph. @@ -16,11 +16,11 @@ For our evaluation we use both synthetic and real data. More specifically, we ex For further information about the study please refer to the [published paper](http://link.springer.com/chapter/10.1007/978-3-319-10518-5_1) on Springer site and the presentation on [Slideshare](http://www.slideshare.net/sympapadopoulos/adbis2014-presentation). -**Note 1:** The published paper contains the experimental study of Titan, OrientDB and Neo4j. After the publication we included the Sparksee graph database. +**Note 1:** The published paper contains the experimental study of Titan, OrientDB and Neo4j. After the publication we included the Sparksee graph database. Sparksee does not implement TinkerPop 3 yet. **Note 2:** After the very useful comments and contributions of OrientDB developers, we updated the benchmark implementations and re-run the experiments. We have updated the initial presentation with the new results and uploaded a new version of the paper in the following [link](http://mklab.iti.gr/files/beis_adbis2014_corrected.pdf). -**Note 3:** Alexander Patrikalakis, a software developer at Amazon Web Services, refactored the benchmark, added support for Blueprints 2.5 and added support for the DynamoDB Storage Backend for Titan. +**Note 3:** Alexander Patrikalakis, a software developer at Amazon Web Services, refactored the benchmark, added support for Apache TinkerPop 3 and added support for the DynamoDB Storage Backend for Titan. Instructions ------------ diff --git a/src/main/java/eu/socialsensor/main/GraphDatabaseType.java b/src/main/java/eu/socialsensor/main/GraphDatabaseType.java index 4e167e7..84b353a 100644 --- a/src/main/java/eu/socialsensor/main/GraphDatabaseType.java +++ b/src/main/java/eu/socialsensor/main/GraphDatabaseType.java @@ -17,10 +17,9 @@ public enum GraphDatabaseType TITAN_CASSANDRA("Titan", "cassandra", "tc"), TITAN_CASSANDRA_EMBEDDED("TitanEmbedded", "embeddedcassandra", "tce"), TITAN_HBASE("Titan", "hbase", "thb"), - TITAN_PERSISTIT("TitanEmbedded", "persistit", "tp"), + TITAN_PERSISTIT("TitanEmbedded", "inmemory", "tp"), ORIENT_DB("OrientDB", null, "orient"), - NEO4J("Neo4j", null, "neo4j"), - SPARKSEE("Sparksee", null, "sparksee"); + NEO4J("Neo4j", null, "neo4j"); private final String backend; private final String api; diff --git a/src/test/resources/META-INF/input.properties b/src/test/resources/META-INF/input.properties index 7c271b5..47e8ca8 100644 --- a/src/test/resources/META-INF/input.properties +++ b/src/test/resources/META-INF/input.properties @@ -25,7 +25,6 @@ eu.socialsensor.databases=tddb #eu.socialsensor.databases=tp #eu.socialsensor.databases=orient #eu.socialsensor.databases=neo4j -#eu.socialsensor.databases=sparksee # Database specific options # Titan options From f781c180d7f7581c3713ef1c9514676ef0611dbd Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Fri, 4 Mar 2016 09:55:29 +0900 Subject: [PATCH 05/26] added TODOs for official OrientDB TP3 support --- .../eu/socialsensor/graphdatabases/OrientGraphDatabase.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java index fc9d6e6..765fce7 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java @@ -32,7 +32,8 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; /** - * OrientDB graph database implementation + * OrientDB graph database implementation. + * TODO(amcp) replace with the official OrientDB implementation when available. * * @author sotbeis, sotbeis@iti.gr * @author Alexander Patrikalakis @@ -381,6 +382,7 @@ private OrientGraph getGraph(final File dbPath) { Configuration config = new PropertiesConfiguration(); config.setProperty(OrientGraph.CONFIG_URL, "plocal:" + dbPath.getAbsolutePath()); + // TODO(amcp) replace with the official OrientDB implementation when available. final OrientGraphFactory graphFactory = new OrientGraphFactory(config); return graphFactory.getTx(); } From ac8e3a1842a5b28cdf1a26349e9246dcf232178e Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Fri, 4 Mar 2016 21:49:43 +0900 Subject: [PATCH 06/26] fixed guava version issue --- README.md | 2 +- pom.xml | 21 ++++++++++++------ .../benchmarks/ClusteringBenchmark.java | 3 +-- .../benchmarks/DeleteGraphBenchmark.java | 3 +-- .../FindNeighboursOfAllNodesBenchmark.java | 3 +-- .../FindNodesOfAllEdgesBenchmark.java | 3 +-- .../benchmarks/FindShortestPathBenchmark.java | 3 +-- .../benchmarks/MassiveInsertionBenchmark.java | 3 +-- .../graphdatabases/OrientGraphDatabase.java | 5 ++--- .../eu/socialsensor/insert/InsertionBase.java | 7 ++---- .../socialsensor/main/GraphDatabaseType.java | 2 +- src/test/resources/META-INF/input.properties | 22 +++++++------------ 12 files changed, 34 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 9c1bf17..5fbc4ca 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ For further information about the study please refer to the [published paper](ht Instructions ------------ -To run the project at first you have to choose one of the aforementioned datasets. Of course you can select any dataset, but because there is not any utility class to convert the dataset in the appropriate format (for now), the format of the data must be identical with the tested datasets. The input parameters are configured from the src/test/resources/input.properties file. Please follow the instructions in this file to select the correct parameters. Then, run `mvn dependency:copy-dependencies && mvn test -Pbench` to execute the benchmarking run. +To run the project at first you have to choose one of the aforementioned datasets. Of course you can select any dataset, but because there is not any utility class to convert the dataset in the appropriate format (for now), the format of the data must be identical with the tested datasets. The input parameters are configured from the src/test/resources/input.properties file. Please follow the instructions in this file to select the correct parameters. Then, run `mvn install && mvn test -Pbench` to execute the benchmarking run. Results ------- diff --git a/pom.xml b/pom.xml index 0edf869..28e8dff 100644 --- a/pom.xml +++ b/pom.xml @@ -52,11 +52,13 @@ 3.0.1-incubating 1.0.0 - 0.98.8-hadoop2 + 1.0.2 2.3.2 1.0.0 2.1 - 2.18.1 + 2.10 + 2.19.1 + 2.6 1.8 @@ -64,7 +66,7 @@ com.google.guava guava - 14.0.1 + 18.0 org.apache.commons @@ -137,6 +139,11 @@ titan-hbase ${titan.version} + + org.apache.hbase + hbase-client + ${hbase.version} + com.amazonaws dynamodb-titan100-storage-backend @@ -232,16 +239,16 @@ org.apache.maven.plugins maven-dependency-plugin - 2.2 + ${dependency.plugin.version} copy-dependencies - package + process-test-resources copy-dependencies - ${project.build.directory}/dependency + ${project.build.directory}/dependencies false false true @@ -286,7 +293,7 @@ maven-assembly-plugin - 2.5.3 + ${maven.assembly.version} src/assembly/component.xml diff --git a/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java index 0d7bdd1..9bc2c65 100644 --- a/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java +++ b/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java @@ -102,8 +102,7 @@ private SortedMap clusteringBenchmark(GraphDatabaseType type) t LOG.info("Graph Database: " + type.getShortname() + ", Dataset: " + bench.getDataset().getName() + ", Cache Size: " + cacheSize); - Stopwatch watch = new Stopwatch(); - watch.start(); + Stopwatch watch = Stopwatch.createStarted(); LouvainMethod louvainMethodCache = new LouvainMethod(graphDatabase, cacheSize, bench.randomizedClustering()); louvainMethodCache.computeModularity(); timeMap.put(cacheSize, watch.elapsed(TimeUnit.MILLISECONDS) / 1000.0); diff --git a/src/main/java/eu/socialsensor/benchmarks/DeleteGraphBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/DeleteGraphBenchmark.java index 2c00564..7af3f50 100644 --- a/src/main/java/eu/socialsensor/benchmarks/DeleteGraphBenchmark.java +++ b/src/main/java/eu/socialsensor/benchmarks/DeleteGraphBenchmark.java @@ -24,8 +24,7 @@ public DeleteGraphBenchmark(BenchmarkConfiguration bench) @Override public void benchmarkOne(GraphDatabaseType type, int scenarioNumber) { - Stopwatch watch = new Stopwatch(); - watch.start(); + Stopwatch watch = Stopwatch.createStarted(); Utils.deleteDatabase(type, bench); times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS)); } diff --git a/src/main/java/eu/socialsensor/benchmarks/FindNeighboursOfAllNodesBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/FindNeighboursOfAllNodesBenchmark.java index 3c24ffa..4b690bf 100644 --- a/src/main/java/eu/socialsensor/benchmarks/FindNeighboursOfAllNodesBenchmark.java +++ b/src/main/java/eu/socialsensor/benchmarks/FindNeighboursOfAllNodesBenchmark.java @@ -28,8 +28,7 @@ public void benchmarkOne(GraphDatabaseType type, int scenarioNumber) { GraphDatabase graphDatabase = Utils.createDatabaseInstance(bench, type); graphDatabase.open(); - Stopwatch watch = new Stopwatch(); - watch.start(); + Stopwatch watch = Stopwatch.createStarted(); graphDatabase.findAllNodeNeighbours(); graphDatabase.shutdown(); times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS)); diff --git a/src/main/java/eu/socialsensor/benchmarks/FindNodesOfAllEdgesBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/FindNodesOfAllEdgesBenchmark.java index a201e42..69a33e6 100644 --- a/src/main/java/eu/socialsensor/benchmarks/FindNodesOfAllEdgesBenchmark.java +++ b/src/main/java/eu/socialsensor/benchmarks/FindNodesOfAllEdgesBenchmark.java @@ -28,8 +28,7 @@ public void benchmarkOne(GraphDatabaseType type, int scenarioNumber) { GraphDatabase graphDatabase = Utils.createDatabaseInstance(bench, type); graphDatabase.open(); - Stopwatch watch = new Stopwatch(); - watch.start(); + Stopwatch watch = Stopwatch.createStarted(); graphDatabase.findNodesOfAllEdges(); graphDatabase.shutdown(); times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS)); diff --git a/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java index bc78ce0..1084939 100644 --- a/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java +++ b/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java @@ -35,8 +35,7 @@ public void benchmarkOne(GraphDatabaseType type, int scenarioNumber) { GraphDatabase graphDatabase = Utils.createDatabaseInstance(bench, type); graphDatabase.open(); - Stopwatch watch = new Stopwatch(); - watch.start(); + Stopwatch watch = Stopwatch.createStarted(); graphDatabase.shortestPaths(generatedNodes); graphDatabase.shutdown(); times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS)); diff --git a/src/main/java/eu/socialsensor/benchmarks/MassiveInsertionBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/MassiveInsertionBenchmark.java index 3bfb4d8..dbe065b 100644 --- a/src/main/java/eu/socialsensor/benchmarks/MassiveInsertionBenchmark.java +++ b/src/main/java/eu/socialsensor/benchmarks/MassiveInsertionBenchmark.java @@ -40,8 +40,7 @@ public void benchmarkOne(GraphDatabaseType type, int scenarioNumber) // it is not related to the action of inserting. graphDatabase.createGraphForMassiveLoad(); logger.debug("Massive load graph in database type {}", type.getShortname()); - Stopwatch watch = new Stopwatch(); - watch.start(); + Stopwatch watch = Stopwatch.createStarted(); graphDatabase.massiveModeLoading(bench.getDataset()); logger.debug("Shutdown massive graph in database type {}", type.getShortname()); graphDatabase.shutdownMassiveGraph(); diff --git a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java index 765fce7..d2db83e 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java @@ -364,8 +364,8 @@ public Map> mapCommunities(int numberOfCommunities) protected void createSchema() { createIndex(NODE_ID, NODE_LABEL, "UNIQUE_HASH_INDEX", "INTEGER"); - createIndex(COMMUNITY, null /*label*/, "NOTUNIQUE_HASH_INDEX", "INTEGER"); - createIndex(NODE_COMMUNITY, null /*label*/, "NOTUNIQUE_HASH_INDEX", "INTEGER"); + createIndex(COMMUNITY, NODE_LABEL, "NOTUNIQUE_HASH_INDEX", "INTEGER"); + createIndex(NODE_COMMUNITY, NODE_LABEL, "NOTUNIQUE_HASH_INDEX", "INTEGER"); } private void createIndex(String key, String label, String type, String keytype) { @@ -375,7 +375,6 @@ private void createIndex(String key, String label, String type, String keytype) final Configuration nodeIdIndexConfig = new PropertiesConfiguration(); nodeIdIndexConfig.addProperty("type", type); nodeIdIndexConfig.addProperty("keytype", keytype); - graph.createVertexIndex(NODE_ID, label, nodeIdIndexConfig); } private OrientGraph getGraph(final File dbPath) diff --git a/src/main/java/eu/socialsensor/insert/InsertionBase.java b/src/main/java/eu/socialsensor/insert/InsertionBase.java index 564d66c..94bd7f6 100644 --- a/src/main/java/eu/socialsensor/insert/InsertionBase.java +++ b/src/main/java/eu/socialsensor/insert/InsertionBase.java @@ -85,9 +85,7 @@ public final void createGraph(File datasetFile, int scenarioNumber) Dataset dataset = DatasetFactory.getInstance().getDataset(datasetFile); T srcNode, dstNode; - Stopwatch thousandWatch = new Stopwatch(), watch = new Stopwatch(); - thousandWatch.start(); - watch.start(); + Stopwatch thousandWatch = Stopwatch.createStarted(), watch = Stopwatch.createStarted(); int i = 4; for (List line : dataset) { @@ -116,8 +114,7 @@ public final void createGraph(File datasetFile, int scenarioNumber) { insertionTimes.add((double) thousandWatch.elapsed(TimeUnit.MILLISECONDS)); thousandWatch.stop(); - thousandWatch = new Stopwatch(); - thousandWatch.start(); + thousandWatch = Stopwatch.createStarted(); } i++; } diff --git a/src/main/java/eu/socialsensor/main/GraphDatabaseType.java b/src/main/java/eu/socialsensor/main/GraphDatabaseType.java index 84b353a..c3b52d6 100644 --- a/src/main/java/eu/socialsensor/main/GraphDatabaseType.java +++ b/src/main/java/eu/socialsensor/main/GraphDatabaseType.java @@ -17,7 +17,7 @@ public enum GraphDatabaseType TITAN_CASSANDRA("Titan", "cassandra", "tc"), TITAN_CASSANDRA_EMBEDDED("TitanEmbedded", "embeddedcassandra", "tce"), TITAN_HBASE("Titan", "hbase", "thb"), - TITAN_PERSISTIT("TitanEmbedded", "inmemory", "tp"), + TITAN_PERSISTIT("TitanEmbedded", "inmemory", "ti"), ORIENT_DB("OrientDB", null, "orient"), NEO4J("Neo4j", null, "neo4j"); diff --git a/src/test/resources/META-INF/input.properties b/src/test/resources/META-INF/input.properties index 47e8ca8..b96a9c3 100644 --- a/src/test/resources/META-INF/input.properties +++ b/src/test/resources/META-INF/input.properties @@ -4,8 +4,8 @@ #eu.socialsensor.dataset=data/Amazon0601.txt #eu.socialsensor.dataset=data/com-lj.ungraph.txt #can change the number in the filename of the synthetic datasets to 1000, 5000, 10000, 20000, 30000, 40000, 50000 -eu.socialsensor.dataset=data/network1000.dat -eu.socialsensor.actual-communities=data/community1000.dat +eu.socialsensor.dataset=data/network5000.dat +eu.socialsensor.actual-communities=data/community5000.dat eu.socialsensor.database-storage-directory=storage # Sample meters this frequently (milliseconds) @@ -18,13 +18,13 @@ eu.socialsensor.metrics.csv.directory=metrics # Choose which databases you want to in the benchmark by removing the comments. # Available dbs are: eu.socialsensor.databases=tbdb -eu.socialsensor.databases=tddb +#eu.socialsensor.databases=tddb #eu.socialsensor.databases=tc #eu.socialsensor.databases=thb #eu.socialsensor.databases=tce -#eu.socialsensor.databases=tp -#eu.socialsensor.databases=orient -#eu.socialsensor.databases=neo4j +#eu.socialsensor.databases=ti +eu.socialsensor.databases=orient +eu.socialsensor.databases=neo4j # Database specific options # Titan options @@ -61,12 +61,6 @@ eu.socialsensor.dynamodb.credentials.constructor-args= eu.socialsensor.dynamodb.endpoint=http://127.0.0.1:4567 #eu.socialsensor.dynamodb.endpoint=https://dynamodb.us-east-1.amazonaws.com -# OrientDB options -eu.socialsensor.orient.lightweight-edges=true - -# Sparksee options -eu.socialsensor.sparksee.license-key=DEADBEEF - # The following five benchmarks are permutable (that is, the suite can run them # many times in different database order). To turn on permutations, set # eu.socialsensor.permute-benchmarks=true @@ -78,8 +72,8 @@ eu.socialsensor.benchmarks=MASSIVE_INSERTION #eu.socialsensor.benchmarks=SINGLE_INSERTION eu.socialsensor.benchmarks=FIND_NEIGHBOURS eu.socialsensor.benchmarks=FIND_ADJACENT_NODES -eu.socialsensor.benchmarks=FIND_SHORTEST_PATH -eu.socialsensor.shortest-path-random-nodes=100 +#eu.socialsensor.benchmarks=FIND_SHORTEST_PATH +#eu.socialsensor.shortest-path-random-nodes=100 # The clustering benchmark is not permutable even if eu.socialsensor.permute-benchmarks=true #eu.socialsensor.benchmarks=CLUSTERING From 4ea3a4e36628ea269ec7a2c625fff21749b7b1bd Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Sat, 5 Mar 2016 00:44:02 +0900 Subject: [PATCH 07/26] set label correctly. create orient index. enable bdb. --- .gitignore | 3 +++ .../graphdatabases/GraphDatabaseBase.java | 1 + .../graphdatabases/OrientGraphDatabase.java | 14 ++++++++------ .../graphdatabases/TitanGraphDatabase.java | 6 ++++++ .../java/eu/socialsensor/insert/InsertionBase.java | 6 ++++-- .../insert/OrientMassiveInsertion.java | 6 +++--- .../socialsensor/insert/OrientSingleInsertion.java | 7 +++---- .../socialsensor/insert/TitanMassiveInsertion.java | 5 +++-- .../socialsensor/insert/TitanSingleInsertion.java | 8 ++++---- .../socialsensor/main/GraphDatabaseBenchmark.java | 3 +++ src/test/resources/META-INF/input.properties | 2 +- 11 files changed, 39 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 86b0401..0415b5c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ target/* /.gitignore /.settings /.classpath +/metrics +/storage +/results diff --git a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java index b47fa95..64f00fe 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java @@ -19,6 +19,7 @@ public abstract class GraphDatabaseBase, Iterator, Vertex, Edge> { - //to look up the existence of indexes in OrientDB, you need to have vertex labels. - public static final String NODE_LABEL = "NODE"; + public static final String UNIQUE_HASH_INDEX = "UNIQUE_HASH_INDEX"; + public static final String NOTUNIQUE_HASH_INDEX = "NOTUNIQUE_HASH_INDEX"; private OrientGraph graph = null; public OrientGraphDatabase(BenchmarkConfiguration config, File dbStorageDirectoryIn) @@ -363,18 +364,19 @@ public Map> mapCommunities(int numberOfCommunities) protected void createSchema() { - createIndex(NODE_ID, NODE_LABEL, "UNIQUE_HASH_INDEX", "INTEGER"); - createIndex(COMMUNITY, NODE_LABEL, "NOTUNIQUE_HASH_INDEX", "INTEGER"); - createIndex(NODE_COMMUNITY, NODE_LABEL, "NOTUNIQUE_HASH_INDEX", "INTEGER"); + createIndex(NODE_ID, NODE_LABEL, UNIQUE_HASH_INDEX, OType.INTEGER); + createIndex(COMMUNITY, NODE_LABEL, NOTUNIQUE_HASH_INDEX, OType.INTEGER); + createIndex(NODE_COMMUNITY, NODE_LABEL, NOTUNIQUE_HASH_INDEX, OType.INTEGER); } - private void createIndex(String key, String label, String type, String keytype) { + private void createIndex(String key, String label, String type, OType keytype) { if(graph.getVertexIndexedKeys(label).contains(NODE_ID)) { return; } final Configuration nodeIdIndexConfig = new PropertiesConfiguration(); nodeIdIndexConfig.addProperty("type", type); nodeIdIndexConfig.addProperty("keytype", keytype); + graph.createVertexIndex(key, label, nodeIdIndexConfig); } private OrientGraph getGraph(final File dbPath) diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index f13d28e..367a354 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -38,6 +38,7 @@ import com.thinkaurelius.titan.core.TitanFactory; import com.thinkaurelius.titan.core.TitanGraph; import com.thinkaurelius.titan.core.schema.TitanManagement; +import com.thinkaurelius.titan.core.schema.VertexLabelMaker; import com.thinkaurelius.titan.core.util.TitanCleanup; import com.thinkaurelius.titan.graphdb.configuration.GraphDatabaseConfiguration; @@ -561,6 +562,10 @@ public Map> mapCommunities(int numberOfCommunities) private void createSchema() { final TitanManagement mgmt = graph.openManagement(); + if(!mgmt.containsVertexLabel(NODE_LABEL)) { + final VertexLabelMaker maker = mgmt.makeVertexLabel(NODE_LABEL); + maker.make(); + } if (null == mgmt.getGraphIndex(NODE_ID)) { final PropertyKey key = mgmt.makePropertyKey(NODE_ID).dataType(Integer.class).make(); @@ -581,6 +586,7 @@ private void createSchema() mgmt.makeEdgeLabel(SIMILAR).multiplicity(Multiplicity.MULTI).directed().make(); } mgmt.commit(); + graph.tx().commit(); } @Override diff --git a/src/main/java/eu/socialsensor/insert/InsertionBase.java b/src/main/java/eu/socialsensor/insert/InsertionBase.java index 94bd7f6..13b48a3 100644 --- a/src/main/java/eu/socialsensor/insert/InsertionBase.java +++ b/src/main/java/eu/socialsensor/insert/InsertionBase.java @@ -14,6 +14,7 @@ import eu.socialsensor.benchmarks.SingleInsertionBenchmark; import eu.socialsensor.dataset.Dataset; import eu.socialsensor.dataset.DatasetFactory; +import eu.socialsensor.graphdatabases.GraphDatabaseBase; import eu.socialsensor.main.GraphDatabaseBenchmark; import eu.socialsensor.main.GraphDatabaseType; import eu.socialsensor.utils.Utils; @@ -30,8 +31,9 @@ public abstract class InsertionBase implements Insertion { private static final Logger logger = LogManager.getLogger(); public static final String INSERTION_CONTEXT = ".eu.socialsensor.insertion."; - public static final String SIMILAR = "similar"; - public static final String NODEID = "nodeId"; + public static final String SIMILAR = GraphDatabaseBase.SIMILAR; + public static final String NODEID = GraphDatabaseBase.NODE_ID; + public static final String NODE_LABEL = GraphDatabaseBase.NODE_LABEL; private final Timer getOrCreateTimes; private final Timer relateNodesTimes; diff --git a/src/main/java/eu/socialsensor/insert/OrientMassiveInsertion.java b/src/main/java/eu/socialsensor/insert/OrientMassiveInsertion.java index 14cf4c5..322dbc2 100644 --- a/src/main/java/eu/socialsensor/insert/OrientMassiveInsertion.java +++ b/src/main/java/eu/socialsensor/insert/OrientMassiveInsertion.java @@ -2,6 +2,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.structure.Graph; +import org.apache.tinkerpop.gremlin.structure.T; import org.apache.tinkerpop.gremlin.structure.Vertex; import eu.socialsensor.graphdatabases.OrientGraphDatabase; @@ -28,9 +29,8 @@ public OrientMassiveInsertion(Graph graph) protected Vertex getOrCreate(String value) { final Integer intValue = Integer.valueOf(value); - final GraphTraversal traversal = graph.traversal().V().has(NODEID, intValue); - final Vertex vertex = traversal.hasNext() ? traversal.next() : graph.addVertex(OrientGraphDatabase.NODE_LABEL); - vertex.property(NODEID, intValue); + final GraphTraversal traversal = graph.traversal().V().hasLabel(NODE_LABEL).has(NODEID, intValue); + final Vertex vertex = traversal.hasNext() ? traversal.next() : graph.addVertex(T.label, OrientGraphDatabase.NODE_LABEL, NODEID, intValue); return vertex; } diff --git a/src/main/java/eu/socialsensor/insert/OrientSingleInsertion.java b/src/main/java/eu/socialsensor/insert/OrientSingleInsertion.java index 6fa63f1..a746ceb 100644 --- a/src/main/java/eu/socialsensor/insert/OrientSingleInsertion.java +++ b/src/main/java/eu/socialsensor/insert/OrientSingleInsertion.java @@ -4,9 +4,9 @@ import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.structure.Graph; +import org.apache.tinkerpop.gremlin.structure.T; import org.apache.tinkerpop.gremlin.structure.Vertex; -import eu.socialsensor.graphdatabases.OrientGraphDatabase; import eu.socialsensor.main.GraphDatabaseType; /** @@ -42,9 +42,8 @@ protected void relateNodes(Vertex src, Vertex dest) protected Vertex getOrCreate(final String value) { final Integer intValue = Integer.valueOf(value); - final GraphTraversal traversal = graph.traversal().V().has(NODEID, intValue); - final Vertex vertex = traversal.hasNext() ? traversal.next() : graph.addVertex(OrientGraphDatabase.NODE_LABEL); - vertex.property(NODEID, intValue); + final GraphTraversal traversal = graph.traversal().V().hasLabel(NODE_LABEL).has(NODEID, intValue); + final Vertex vertex = traversal.hasNext() ? traversal.next() : graph.addVertex(T.label, NODE_LABEL, NODEID, intValue); graph.tx().commit(); return vertex; } diff --git a/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java b/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java index 8fa0630..1575ba2 100644 --- a/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java +++ b/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java @@ -2,6 +2,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.structure.Graph; +import org.apache.tinkerpop.gremlin.structure.T; import org.apache.tinkerpop.gremlin.structure.Vertex; import eu.socialsensor.main.GraphDatabaseType; @@ -28,8 +29,8 @@ public TitanMassiveInsertion(Graph graph, GraphDatabaseType type) public Vertex getOrCreate(String value) { Integer intVal = Integer.valueOf(value); - final GraphTraversal t = graph.traversal().V().has(NODEID, intVal); - final Vertex vertex = t.hasNext() ? t.next() : graph.addVertex(NODEID, intVal); + final GraphTraversal t = graph.traversal().V().hasLabel(NODE_LABEL).has(NODEID, intVal); + final Vertex vertex = t.hasNext() ? t.next() : graph.addVertex(T.label, NODE_LABEL, NODEID, intVal); return vertex; } diff --git a/src/main/java/eu/socialsensor/insert/TitanSingleInsertion.java b/src/main/java/eu/socialsensor/insert/TitanSingleInsertion.java index 2a37339..2e39a4a 100644 --- a/src/main/java/eu/socialsensor/insert/TitanSingleInsertion.java +++ b/src/main/java/eu/socialsensor/insert/TitanSingleInsertion.java @@ -4,6 +4,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.structure.Graph; +import org.apache.tinkerpop.gremlin.structure.T; import org.apache.tinkerpop.gremlin.structure.Transaction; import org.apache.tinkerpop.gremlin.structure.Vertex; @@ -29,11 +30,10 @@ public TitanSingleInsertion(Graph titanGraph, GraphDatabaseType type, File resul @Override public Vertex getOrCreate(String value) { - final Transaction tx = graph.tx(); final Integer intValue = Integer.valueOf(value); - final GraphTraversal traversal = graph.traversal().V().has(NODEID, intValue); - final Vertex vertex = traversal.hasNext() ? traversal.next() : graph.addVertex(NODEID, intValue); - tx.commit(); + final GraphTraversal traversal = graph.traversal().V().hasLabel(NODE_LABEL).has(NODEID, intValue); + final Vertex vertex = traversal.hasNext() ? traversal.next() : graph.addVertex(T.label, NODE_LABEL, NODEID, intValue); + graph.tx().commit(); return vertex; } diff --git a/src/main/java/eu/socialsensor/main/GraphDatabaseBenchmark.java b/src/main/java/eu/socialsensor/main/GraphDatabaseBenchmark.java index 395ebc5..b26b871 100644 --- a/src/main/java/eu/socialsensor/main/GraphDatabaseBenchmark.java +++ b/src/main/java/eu/socialsensor/main/GraphDatabaseBenchmark.java @@ -75,6 +75,9 @@ public GraphDatabaseBenchmark(String inputPath) throws IllegalArgumentException } config = new BenchmarkConfiguration(appconfig); if(config.publishCsvMetrics()) { + if(!config.getCsvDir().mkdirs()) { + throw new IllegalArgumentException("unable to prepare metrics directory " + config.getCsvDir().getAbsolutePath()); + } final CsvReporter reporter = CsvReporter.forRegistry(metrics) .formatFor(Locale.US) .convertRatesTo(TimeUnit.SECONDS) diff --git a/src/test/resources/META-INF/input.properties b/src/test/resources/META-INF/input.properties index b96a9c3..7d843d5 100644 --- a/src/test/resources/META-INF/input.properties +++ b/src/test/resources/META-INF/input.properties @@ -24,7 +24,7 @@ eu.socialsensor.databases=tbdb #eu.socialsensor.databases=tce #eu.socialsensor.databases=ti eu.socialsensor.databases=orient -eu.socialsensor.databases=neo4j +#eu.socialsensor.databases=neo4j # Database specific options # Titan options From e2f0d5fe5a8e25e90dcadd8992beec073a93d296 Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Sat, 5 Mar 2016 21:56:01 -0800 Subject: [PATCH 08/26] fixed titan vertex id creation. ran synthetic data sets and updated results. --- README.md | 83 +++++--- .../benchmarks/ClusteringBenchmark.java | 3 +- .../FindNeighboursOfAllNodesBenchmark.java | 3 +- .../FindNodesOfAllEdgesBenchmark.java | 3 +- .../benchmarks/FindShortestPathBenchmark.java | 3 +- .../benchmarks/MassiveInsertionBenchmark.java | 6 +- .../benchmarks/PermutingBenchmarkBase.java | 2 - .../benchmarks/SingleInsertionBenchmark.java | 3 +- .../java/eu/socialsensor/dataset/Dataset.java | 4 + .../graphdatabases/GraphDatabase.java | 31 --- .../graphdatabases/GraphDatabaseBase.java | 159 ++++++-------- .../graphdatabases/Neo4jGraphDatabase.java | 196 ++++++++---------- .../graphdatabases/OrientGraphDatabase.java | 35 +--- .../graphdatabases/TitanGraphDatabase.java | 143 ++++--------- .../eu/socialsensor/insert/InsertionBase.java | 18 +- .../insert/Neo4jMassiveInsertion.java | 2 +- .../insert/TitanMassiveInsertion.java | 53 ++++- .../insert/TitanSingleInsertion.java | 17 +- .../main/GraphDatabaseBenchmark.java | 2 +- .../java/eu/socialsensor/utils/Utils.java | 16 +- .../main/GraphDatabaseBenchmarkTest.java | 4 + src/test/resources/META-INF/input.properties | 13 +- 22 files changed, 349 insertions(+), 450 deletions(-) diff --git a/README.md b/README.md index 5fbc4ca..6cb236b 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,18 @@ graphdb-benchmarks ================== -The project graphdb-benchmarks is a benchmark between popular graph databases. Currently the framework supports [Titan](http://thinkaurelius.github.io/titan/), [OrientDB](http://www.orientechnologies.com/orientdb/), [Neo4j](http://neo4j.com/). The purpose of this benchmark is to examine the performance of each graph database in terms of execution time. The benchmark is composed of four workloads, Clustering, Massive Insertion, Single Insertion and Query Workload. Every workload has been designed to simulate common operations in graph database systems. +The project graphdb-benchmarks is a benchmark between popular graph databases. Currently the framework supports [Titan](http://thinkaurelius.github.io/titan/), [OrientDB](http://www.orientechnologies.com/orientdb/), and [Neo4j](http://neo4j.com/). The purpose of this benchmark is to examine the performance of each graph database in terms of execution time. The benchmark is composed of four workloads, Clustering, Massive Insertion, Single Insertion and Query Workload. Every workload has been designed to simulate common operations in graph database systems. - *Clustering Workload (CW)*: CW consists of a well-known community detection algorithm for modularity optimization, the Louvain Method. We adapt the algorithm on top of the benchmarked graph databases and employ cache techniques to take advantage of both graph database capabilities and in-memory execution speed. We measure the time the algorithm needs to converge. -- *Massive Insertion Workload (MIW)*: we create the graph database and configure it for massive loading, then we populate it with a particular dataset. We measure the time for the creation of the whole graph. -- *Single Insertion Workload (SIW)*: we create the graph database and load it with a particular dataset. Every object insertion (node or edge) is committed directly and the graph is constructed incrementally. We measure the insertion time per block, which consists of one thousand edges and the nodes that appear during the insertion of these edges. +- *Massive Insertion Workload (MIW)*: we create the graph database and configure it for massive loading, then we populate it with a particular data set. We measure the time for the creation of the whole graph. +- *Single Insertion Workload (SIW)*: we create the graph database and load it with a particular data set. Every object insertion (node or edge) is committed directly and the graph is constructed incrementally. We measure the insertion time per block, which consists of one thousand edges and the nodes that appear during the insertion of these edges. - *Query Workload (QW)*: we execute three common queries: - * FindNeighbours (FN): finds the neighbours of all nodes. + * FindNeighbours (FN): finds the neighbors of all nodes. * FindAdjacentNodes (FA): finds the adjacent nodes of all edges. * FindShortestPath (FS): finds the shortest path between the first node and 100 randomly picked nodes. Here we measure the execution time of each query. -For our evaluation we use both synthetic and real data. More specifically, we execute MIW, SIW and QW with real data derived from the SNAP dataset collection ([Enron Dataset](http://snap.stanford.edu/data/email-Enron.html), [Amazon dataset](http://snap.stanford.edu/data/amazon0601.html), [Youtube dataset](http://snap.stanford.edu/data/com-Youtube.html) and [LiveJournal dataset](http://snap.stanford.edu/data/com-LiveJournal.html)). On the other hand, with the CW we use synthetic data generated with the [LFR-Benchmark generator](https://sites.google.com/site/andrealancichinetti/files) which produces networks with power-law degree distribution and implanted communities within the network. The synthetic data can be downloaded form [here](http://figshare.com/articles/Synthetic_Data_for_graphdb_benchmark/1221760). +For our evaluation we use both synthetic and real data. More specifically, we execute MIW, SIW and QW with real data derived from the SNAP data set collection ([Enron data set](http://snap.stanford.edu/data/email-Enron.html), [Amazon data set](http://snap.stanford.edu/data/amazon0601.html), [Youtube data set](http://snap.stanford.edu/data/com-Youtube.html) and [LiveJournal data set](http://snap.stanford.edu/data/com-LiveJournal.html)). On the other hand, with the CW we use synthetic data generated with the [LFR-Benchmark generator](https://sites.google.com/site/andrealancichinetti/files) which produces networks with power-law degree distribution and implanted communities within the network. The synthetic data can be downloaded from [here](http://figshare.com/articles/Synthetic_Data_for_graphdb_benchmark/1221760). For further information about the study please refer to the [published paper](http://link.springer.com/chapter/10.1007/978-3-319-10518-5_1) on Springer site and the presentation on [Slideshare](http://www.slideshare.net/sympapadopoulos/adbis2014-presentation). @@ -24,7 +24,7 @@ For further information about the study please refer to the [published paper](ht Instructions ------------ -To run the project at first you have to choose one of the aforementioned datasets. Of course you can select any dataset, but because there is not any utility class to convert the dataset in the appropriate format (for now), the format of the data must be identical with the tested datasets. The input parameters are configured from the src/test/resources/input.properties file. Please follow the instructions in this file to select the correct parameters. Then, run `mvn install && mvn test -Pbench` to execute the benchmarking run. +To run the project at first you have to choose one of the aforementioned data sets. Of course you can select any data set, but because there is not any utility class to convert the data set in the appropriate format (for now), the format of the data must be identical with the tested data sets. The input parameters are configured from the src/test/resources/input.properties file. Please follow the instructions in this file to select the correct parameters. Then, run `mvn install && mvn test -Pbench` to execute the benchmarking run. Results ------- @@ -88,32 +88,55 @@ Below we list the results of the CW for graphs with 1,000, 5,000, 10,0000, 20,00 ####MIW & QW results Below we list the results of MIW and QW for each dataset. - -| Dataset | Workload | Titan | OrientDB | Neo4j | -| ------- | -------- | ----- | -------- | ----- | -| EN | MIW |9.36 |62.77 |**6.77** | -| AM | MIW |34.00 |97.00 |**10.61** | -| YT | MIW |104.27 |252.15 |**24.69** | -| LJ | MIW |663.03 |9416.74 |**349.55**| -| | -| EN | QW-FN |1.87 |**0.56** |0.95 | -| AM | QW-FN |6.47 |3.50 |**1.85** | -| YT | QW-FN |20.71 |9.34 |**4.51** | -| LJ | QW-FN |213.41 |303.09 |**47.07** | -| | -| EN | QW-FA |3.78 |0.71 |**0.16** | -| AM | QW-FA |13.77 |2.30 |**0.36** | -| YT | QW-FA |42.82 |6.15 |**1.46** | -| LJ | QW-FA |460.25 |518.12 |**16.53** | -| | -| EN | QW-FS |1.63 |3.09 |**0.16** | -| AM | QW-FS |0.12 |83.29 |**0.302** | -| YT | QW-FS |24.87 |23.47 |**0.08** | -| LJ | QW-FS |123.50 |86.87 |**18.13** | - +The results are measured in seconds. + +| Dataset | Workload | Titan-BDB (new) | Neo4j (new) | +| ------- | -------- | ---------------- | ----------- | +| EN | QW-FA | 5.235 | **0.311** | +| AM | QW-FA | __13.770__ | 1.730 | +| YT | QW-FA | __42.820__ | 1.512 | +| LJ | QW-FA | __460.250__ | __16.530__ | +| 1k | QW-FA | 0.348 | **0.031** | +| 5k | QW-FA | 1.935 | **0.115** | +| 10k | QW-FA | 4.056 | **0.225** | +| 20k | QW-FA | 10.212 | **0.590** | +| 30k | QW-FA | 20.108 | **0.793** | +| 40k | QW-FA | 27.879 | **1.145** | +| 50k | QW-FA | 35.398 | **1.376** | +| | | | | +| EN | QW-FN | 6.411 | **0.720** | +| AM | QW-FN | __6.470__ | 5.788 | +| YT | QW-FN | __20.710__ | 5.567 | +| LJ | QW-FN | __213.410__ | __47.070__ | +| 1k | QW-FN | 0.614 | **0.103** | +| 5k | QW-FN | 2.548 | **0.329** | +| 10k | QW-FN | 5.643 | **0.636** | +| 20k | QW-FN | 12.717 | **1.333** | +| 30k | QW-FN | 21.015 | **2.290** | +| 40k | QW-FN | 32.035 | **3.708** | +| 50k | QW-FN | 42.97 | **6.465** | +| | | | | +| EN | MIW | 9.514 | **1.970** | +| AM | MIW | __34.000__ | 16.464 | +| YT | MIW | __104.270__ | 19.810 | +| LJ | MIW | __663.030__ | __349.550__ | +| 1k | MIW | 1.207 | **0.525** | +| 5k | MIW | 3.701 | **1.235** | +| 10k | MIW | 7.520 | **1.821** | +| 20k | MIW | 15.816 | **3.580** | +| 30k | MIW | 29.423 | **5.912** | +| 40k | MIW | 44.702 | **9.097** | +| 50k | MIW | 57.315 | **11.687** | + +Note, Find Shortest Path benchmark is currently broken. +Consequently, I did not update the QW-FS numbers. +Also, OrientDB's TP3 implementation is not official yet, +so I did not run numbers for OrientDB as well. +Finally, bold numbers indicate the fastest performer and italics indicate +tests that have not been run for updated results yet. ####SIW results -Below we list the results of SIW for each dataset. +Below we list the results of SIW for each data set. ![siw_benchmark_updated](https://cloud.githubusercontent.com/assets/8163869/12272282/62b1c9f4-b914-11e5-85be-efd3f58e1e05.png) + + 1.10.57 @@ -149,6 +152,11 @@ dynamodb-titan100-storage-backend ${dynamodb.titan.version} + + jp.classmethod + tupl-titan100-storage-backend + 1.0.0 + junit junit @@ -329,6 +337,7 @@ **/GraphDatabaseBenchmarkTest.java + -Xmx32g false ${basedir}/src/test/resources/META-INF/log4j2.xml @@ -347,4 +356,18 @@ https://jitpack.io + + + + com.amazonaws + aws-java-sdk-dynamodb + ${aws.java.sdk.version} + + + com.amazonaws + DynamoDBLocal + ${aws.java.sdk.version} + + + diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index 7372e12..42fbef0 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -48,6 +48,7 @@ import eu.socialsensor.main.BenchmarkConfiguration; import eu.socialsensor.main.GraphDatabaseType; import eu.socialsensor.utils.Utils; +import jp.classmethod.titan.diskstorage.tupl.TuplStoreManager; /** * Titan graph database implementation @@ -149,6 +150,12 @@ private static final StandardTitanGraph buildTitanGraph(GraphDatabaseType type, storage.addProperty(GraphDatabaseConfiguration.STORAGE_HOSTS.getName(), "localhost"); } + else if (GraphDatabaseType.TITAN_TUPL == type) + { + final Configuration tupl = storage.subset(TuplStoreManager.TUPL_NS.getName()); + tupl.addProperty(TuplStoreManager.TUPL_PREFIX.getName(), "tupldb"); + tupl.addProperty(TuplStoreManager.TUPL_MIN_CACHE_SIZE.getName(), Long.toString(bench.getTuplMinCacheSize())); + } else if (GraphDatabaseType.TITAN_DYNAMODB == type) { final Configuration dynamodb = storage.subset("dynamodb"); diff --git a/src/main/java/eu/socialsensor/insert/InsertionBase.java b/src/main/java/eu/socialsensor/insert/InsertionBase.java index d18fc5d..338f26f 100644 --- a/src/main/java/eu/socialsensor/insert/InsertionBase.java +++ b/src/main/java/eu/socialsensor/insert/InsertionBase.java @@ -121,6 +121,7 @@ public final void createGraph(File datasetFile, int scenarioNumber) } }); post(); + logger.info("Edges: " + i.get()); insertionTimes.add((double) watch.elapsed(TimeUnit.MILLISECONDS)); if (single) diff --git a/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java b/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java index 6c7b91a..c461f3f 100644 --- a/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java +++ b/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java @@ -2,6 +2,8 @@ import java.util.Map; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.tinkerpop.gremlin.structure.Vertex; import com.google.common.base.Preconditions; @@ -22,6 +24,7 @@ */ public abstract class TitanMassiveInsertion extends InsertionBase { + private static final Logger logger = LogManager.getLogger(); protected final StandardTitanGraph graph; protected final StandardTitanTx tx; @@ -47,6 +50,7 @@ public void relateNodes(Vertex src, Vertex dest) @Override protected void post() { + logger.info("vertices: " + vertexCache.size()); tx.commit(); //mutation work is done here Preconditions.checkState(graph.getOpenTransactions().isEmpty()); } diff --git a/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java b/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java index f7ccf0f..27965ce 100644 --- a/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java +++ b/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java @@ -17,6 +17,7 @@ import com.thinkaurelius.titan.graphdb.configuration.GraphDatabaseConfiguration; import eu.socialsensor.dataset.DatasetFactory; +import jp.classmethod.titan.diskstorage.tupl.TuplStoreManager; /** * @@ -105,6 +106,7 @@ public class BenchmarkConfiguration private final boolean dynamodbPrecreateTables; private final String dynamodbTablePrefix; private final boolean customIds; + private final long tuplMinCacheSize; public String getDynamodbCredentialsFqClassName() { @@ -164,6 +166,9 @@ public BenchmarkConfiguration(Configuration appconfig) pageSize = titan.getInt(PAGE_SIZE, GraphDatabaseConfiguration.PAGE_SIZE.getDefaultValue()); customIds = titan.getBoolean(CUSTOM_IDS, false /*default*/); + final Configuration tupl = socialsensor.subset("tupl"); + tuplMinCacheSize = tupl.getLong(TuplStoreManager.TUPL_MIN_CACHE_SIZE.getName(), TuplStoreManager.TUPL_MIN_CACHE_SIZE.getDefaultValue()); + // database storage directory if (!socialsensor.containsKey(DATABASE_STORAGE_DIRECTORY)) { @@ -442,4 +447,8 @@ public boolean publishGraphiteMetrics() public boolean isCustomIds() { return customIds; } + + public long getTuplMinCacheSize() { + return tuplMinCacheSize; + } } diff --git a/src/main/java/eu/socialsensor/main/GraphDatabaseType.java b/src/main/java/eu/socialsensor/main/GraphDatabaseType.java index c3b52d6..9702912 100644 --- a/src/main/java/eu/socialsensor/main/GraphDatabaseType.java +++ b/src/main/java/eu/socialsensor/main/GraphDatabaseType.java @@ -14,6 +14,7 @@ public enum GraphDatabaseType { TITAN_BERKELEYDB("Titan", "berkeleyje", "tbdb"), TITAN_DYNAMODB("Titan", "com.amazon.titan.diskstorage.dynamodb.DynamoDBStoreManager", "tddb"), + TITAN_TUPL("Titan", "jp.classmethod.titan.diskstorage.tupl.TuplStoreManager", "ttupl"), TITAN_CASSANDRA("Titan", "cassandra", "tc"), TITAN_CASSANDRA_EMBEDDED("TitanEmbedded", "embeddedcassandra", "tce"), TITAN_HBASE("Titan", "hbase", "thb"), @@ -34,6 +35,7 @@ public enum GraphDatabaseType STRING_REP_MAP.put(db.getShortname(), db); } TITAN_FLAVORS.add(TITAN_BERKELEYDB); + TITAN_FLAVORS.add(TITAN_TUPL); TITAN_FLAVORS.add(TITAN_DYNAMODB); TITAN_FLAVORS.add(TITAN_CASSANDRA); TITAN_FLAVORS.add(TITAN_CASSANDRA_EMBEDDED); diff --git a/src/test/resources/META-INF/input.properties b/src/test/resources/META-INF/input.properties index dde0fab..676e9b6 100644 --- a/src/test/resources/META-INF/input.properties +++ b/src/test/resources/META-INF/input.properties @@ -1,16 +1,18 @@ # Choose which data sets you want to include in the benchmark by removing the contents. +#Enron #eu.socialsensor.dataset=data/Email-Enron.txt -#eu.socialsensor.dataset=data/com-youtube.ungraph.txt -#eu.socialsensor.dataset=data/Amazon0601.txt -#eu.socialsensor.dataset=data/com-lj.ungraph.txt -#can change the number in the filename of the synthetic datasets to 1000, 5000, 10000, 20000, 30000, 40000, 50000 -#eu.socialsensor.dataset=data/Email-Enron.txt +#Amazon #eu.socialsensor.dataset=data/Amazon0601.txt +#YouTube #eu.socialsensor.dataset=data/com-youtube.ungraph.txt #eu.socialsensor.actual-communities=data/com-youtube.all.cmty.txt - -eu.socialsensor.dataset=data/network30000.dat -eu.socialsensor.actual-communities=data/community30000.dat +#LiveJournal +#eu.socialsensor.dataset=data/com-lj.ungraph.txt +#eu.socialsensor.actual-communities=com-lj.all.cmty.txt +#Synthetic +#can change the number in the filename of the synthetic datasets to 1000, 5000, 10000, 20000, 30000, 40000, 50000 +eu.socialsensor.dataset=data/network10000.dat +#eu.socialsensor.actual-communities=data/community50000.dat eu.socialsensor.database-storage-directory=storage # Sample meters this frequently (milliseconds) @@ -23,13 +25,14 @@ eu.socialsensor.metrics.csv.directory=metrics # Choose which databases you want to in the benchmark by removing the comments. # Available dbs are: eu.socialsensor.databases=tbdb +eu.socialsensor.databases=ttupl #eu.socialsensor.databases=tddb #eu.socialsensor.databases=tc #eu.socialsensor.databases=thb #eu.socialsensor.databases=tce #eu.socialsensor.databases=ti #eu.socialsensor.databases=orient -#eu.socialsensor.databases=neo4j +eu.socialsensor.databases=neo4j # Database specific options # Titan options @@ -66,6 +69,7 @@ eu.socialsensor.dynamodb.credentials.constructor-args= # or the https endpoint of a production region of the DynamoDB service. eu.socialsensor.dynamodb.endpoint=http://127.0.0.1:4567 #eu.socialsensor.dynamodb.endpoint=https://dynamodb.us-east-1.amazonaws.com +eu.socialsensor.tupl.min-cache-size=1000000000 # The following five benchmarks are permutable (that is, the suite can run them # many times in different database order). To turn on permutations, set @@ -76,8 +80,8 @@ eu.socialsensor.permute-benchmarks=false # workload and then query/clustering workloads afterward. eu.socialsensor.benchmarks=MASSIVE_INSERTION #eu.socialsensor.benchmarks=SINGLE_INSERTION -#eu.socialsensor.benchmarks=FIND_NEIGHBOURS -#eu.socialsensor.benchmarks=FIND_ADJACENT_NODES +eu.socialsensor.benchmarks=FIND_NEIGHBOURS +eu.socialsensor.benchmarks=FIND_ADJACENT_NODES #eu.socialsensor.benchmarks=FIND_SHORTEST_PATH #eu.socialsensor.shortest-path-random-nodes=100 @@ -99,7 +103,7 @@ eu.socialsensor.cache-values=150 #eu.socialsensor.cache-values-count=6 # This benchmark measures the time it takes to delete the database -eu.socialsensor.benchmarks=DELETION +#eu.socialsensor.benchmarks=DELETION # Results folder path eu.socialsensor.results-path=results From 73c073ff27d46740af638f17c4e3e70bcef884cb Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Sun, 3 Apr 2016 02:39:37 +0900 Subject: [PATCH 11/26] enable direct page access and set checkpoint size threshold to zero --- .../eu/socialsensor/graphdatabases/TitanGraphDatabase.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index 42fbef0..64f9a9f 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -154,7 +154,11 @@ else if (GraphDatabaseType.TITAN_TUPL == type) { final Configuration tupl = storage.subset(TuplStoreManager.TUPL_NS.getName()); tupl.addProperty(TuplStoreManager.TUPL_PREFIX.getName(), "tupldb"); + tupl.addProperty(TuplStoreManager.TUPL_DIRECT_PAGE_ACCESS.getName(), Boolean.TRUE.toString()); tupl.addProperty(TuplStoreManager.TUPL_MIN_CACHE_SIZE.getName(), Long.toString(bench.getTuplMinCacheSize())); + final Configuration checkpoint = tupl.subset(TuplStoreManager.TUPL_CHECKPOINT_NS.getName()); + //TODO make this conditioned on running the Massive Insertion Workload + checkpoint.addProperty(TuplStoreManager.TUPL_CHECKPOINT_SIZE_THRESHOLD.getName(), 0); } else if (GraphDatabaseType.TITAN_DYNAMODB == type) { From 3c1d48e2dd24d71a9937f25f5313904cefb04232 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Sat, 2 Apr 2016 18:07:28 +0000 Subject: [PATCH 12/26] updated for tupl at 1.3-SNAPSHOT, direct page access --- README.md | 4 ++-- pom.xml | 2 +- .../eu/socialsensor/graphdatabases/TitanGraphDatabase.java | 2 +- src/test/resources/META-INF/input.properties | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 863bd02..d4be8ab 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ The results are measured in seconds. | 20k | QW-FA | 10.568 | 2.521 | **0.826** | | 30k | QW-FA | 18.356 | 4.638 | **1.383** | | 40k | QW-FA | 27.907 | 7.107 | **2.010** | -| 50k | QW-FA | 34.284 | 9.716 | **2.472** | +| 50k | QW-FA | 34.284 | 9.481 | **2.472** | | AM | QW-FA | 61.811 | 19.015 | **3.413** | | | | | | | | 1k | QW-FN | 0.607 | 0.229 | **0.131** | @@ -119,7 +119,7 @@ The results are measured in seconds. | 20k | MIW | 17.011 | 12.711 | **4.511** | | 30k | MIW | 30.252 | 19.929 | **8.767** | | 40k | MIW | 44.450 | 31.763 | **12.761** | -| 50k | MIW | 57.001 | 36.281 | **15.755** | +| 50k | MIW | 57.001 | 35.116 | **15.755** | | AM | MIW | 98.405 | 64.286 | **23.867** | Note, Find Shortest Path benchmark is currently broken. diff --git a/pom.xml b/pom.xml index 7b4f979..bcff834 100644 --- a/pom.xml +++ b/pom.xml @@ -155,7 +155,7 @@ jp.classmethod tupl-titan100-storage-backend - 1.0.0 + 1.0.1 junit diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index 64f9a9f..928e461 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -158,7 +158,7 @@ else if (GraphDatabaseType.TITAN_TUPL == type) tupl.addProperty(TuplStoreManager.TUPL_MIN_CACHE_SIZE.getName(), Long.toString(bench.getTuplMinCacheSize())); final Configuration checkpoint = tupl.subset(TuplStoreManager.TUPL_CHECKPOINT_NS.getName()); //TODO make this conditioned on running the Massive Insertion Workload - checkpoint.addProperty(TuplStoreManager.TUPL_CHECKPOINT_SIZE_THRESHOLD.getName(), 0); + //checkpoint.addProperty(TuplStoreManager.TUPL_CHECKPOINT_SIZE_THRESHOLD.getName(), 0); } else if (GraphDatabaseType.TITAN_DYNAMODB == type) { diff --git a/src/test/resources/META-INF/input.properties b/src/test/resources/META-INF/input.properties index 676e9b6..b1b6fe7 100644 --- a/src/test/resources/META-INF/input.properties +++ b/src/test/resources/META-INF/input.properties @@ -11,7 +11,7 @@ #eu.socialsensor.actual-communities=com-lj.all.cmty.txt #Synthetic #can change the number in the filename of the synthetic datasets to 1000, 5000, 10000, 20000, 30000, 40000, 50000 -eu.socialsensor.dataset=data/network10000.dat +eu.socialsensor.dataset=data/network50000.dat #eu.socialsensor.actual-communities=data/community50000.dat eu.socialsensor.database-storage-directory=storage @@ -24,7 +24,7 @@ eu.socialsensor.metrics.csv.directory=metrics # Choose which databases you want to in the benchmark by removing the comments. # Available dbs are: -eu.socialsensor.databases=tbdb +#eu.socialsensor.databases=tbdb eu.socialsensor.databases=ttupl #eu.socialsensor.databases=tddb #eu.socialsensor.databases=tc From 2d2ceebe91e1e51a1b0b011c77bb4433585f865c Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Sun, 3 Apr 2016 03:32:55 +0900 Subject: [PATCH 13/26] map data files in memory for tupl --- .../java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index 928e461..813663a 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -156,6 +156,7 @@ else if (GraphDatabaseType.TITAN_TUPL == type) tupl.addProperty(TuplStoreManager.TUPL_PREFIX.getName(), "tupldb"); tupl.addProperty(TuplStoreManager.TUPL_DIRECT_PAGE_ACCESS.getName(), Boolean.TRUE.toString()); tupl.addProperty(TuplStoreManager.TUPL_MIN_CACHE_SIZE.getName(), Long.toString(bench.getTuplMinCacheSize())); + tupl.addProperty(TuplStoreManager.TUPL_MAP_DATA_FILES.getName(), Boolean.TRUE.toString()); final Configuration checkpoint = tupl.subset(TuplStoreManager.TUPL_CHECKPOINT_NS.getName()); //TODO make this conditioned on running the Massive Insertion Workload //checkpoint.addProperty(TuplStoreManager.TUPL_CHECKPOINT_SIZE_THRESHOLD.getName(), 0); From fadc19f81e31f8c29182fa86fd02208be7eba82e Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Sat, 2 Apr 2016 18:37:07 +0000 Subject: [PATCH 14/26] updated benchmark results --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d4be8ab..c889311 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ The results are measured in seconds. | 20k | QW-FA | 10.568 | 2.521 | **0.826** | | 30k | QW-FA | 18.356 | 4.638 | **1.383** | | 40k | QW-FA | 27.907 | 7.107 | **2.010** | -| 50k | QW-FA | 34.284 | 9.481 | **2.472** | +| 50k | QW-FA | 34.284 | 9.521 | **2.472** | | AM | QW-FA | 61.811 | 19.015 | **3.413** | | | | | | | | 1k | QW-FN | 0.607 | 0.229 | **0.131** | @@ -109,7 +109,7 @@ The results are measured in seconds. | 20k | QW-FN | 12.861 | 5.218 | **2.841** | | 30k | QW-FN | 21.816 | 8.340 | **4.603** | | 40k | QW-FN | 31.187 | 11.632 | **7.272** | -| 50k | QW-FN | 41.175 | 14.246 | **8.489** | +| 50k | QW-FN | 41.175 | 14.742 | **8.489** | | AM | QW-FN | 76.562 | 28.242 | **12.466** | | | | | | | | 1k | MIW | 1.167 | 0.673 | **0.481** | @@ -119,7 +119,7 @@ The results are measured in seconds. | 20k | MIW | 17.011 | 12.711 | **4.511** | | 30k | MIW | 30.252 | 19.929 | **8.767** | | 40k | MIW | 44.450 | 31.763 | **12.761** | -| 50k | MIW | 57.001 | 35.116 | **15.755** | +| 50k | MIW | 57.001 | 35.008 | **15.755** | | AM | MIW | 98.405 | 64.286 | **23.867** | Note, Find Shortest Path benchmark is currently broken. From 322208855e8c5ad8d4d5f8372024d0d54c264a31 Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Thu, 7 Apr 2016 15:19:11 +0900 Subject: [PATCH 15/26] shortest path QW compiles, run enron --- .gitignore | 2 ++ .../graphdatabases/GraphDatabaseBase.java | 2 ++ .../graphdatabases/TitanGraphDatabase.java | 28 +++++++++---------- .../insert/TitanMassiveCustomIds.java | 2 +- src/test/resources/META-INF/input.properties | 10 +++---- 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/.gitignore b/.gitignore index 0415b5c..4922a64 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ target/* /metrics /storage /results +/.idea/ +/graphdb-benchmarks.iml diff --git a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java index 784aa38..91a2278 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java @@ -137,6 +137,8 @@ public void shortestPaths(Set nodes) { ctxt = shortestPathTimes.time(); try { shortestPath(from, i); + } catch(Exception e) { + e.printStackTrace(); } finally { ctxt.stop(); } diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index 813663a..a535ab9 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -1,14 +1,7 @@ package eu.socialsensor.graphdatabases; import java.io.File; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.concurrent.TimeUnit; import org.apache.commons.configuration.Configuration; @@ -17,6 +10,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; +import org.apache.tinkerpop.gremlin.process.traversal.util.FastNoSuchElementException; import org.apache.tinkerpop.gremlin.structure.Direction; import org.apache.tinkerpop.gremlin.structure.Edge; import org.apache.tinkerpop.gremlin.structure.Property; @@ -288,12 +282,16 @@ public void shortestPath(final Vertex fromNode, Integer node) { final GraphTraversalSource g = graph.traversal(); final Vertex toNode = getVertex(node); - //TODO(amcp) how to limit depth to 5? - List paths = g.V(fromNode).repeat(__.both().simplePath()).until(__.is(toNode)).limit(1).path().toList(); - - for(Path path : paths) { - path.size(); - } + // repeat the contained traversal + // map from this vertex to inV on SIMILAR edges without looping + // until you map to the target toNode and the path is six vertices long or less + // only return one path + GraphTraversal t = + g.V(fromNode).repeat(__.both().simplePath()).until(__.is(toNode).and(__.filter(it -> it.path().size() <= 6))) + .limit(1).path(); + //when the size of the path in the traverser object is six, that means this traverser made 5 hops from the + //fromNode, a total of 6 vertices + t.tryNext().ifPresent( it -> it.size()); } @Override @@ -624,7 +622,7 @@ public Vertex nextVertex(Iterator it) public Vertex getVertex(Integer i) { final GraphTraversalSource g = graph.traversal(); - final Vertex vertex = g.V(T.label, NODE_LABEL).has(NODE_ID, i).next(); + final Vertex vertex = g.V().has(NODE_ID, i).next(); return vertex; } } diff --git a/src/main/java/eu/socialsensor/insert/TitanMassiveCustomIds.java b/src/main/java/eu/socialsensor/insert/TitanMassiveCustomIds.java index 7af5185..aacc025 100644 --- a/src/main/java/eu/socialsensor/insert/TitanMassiveCustomIds.java +++ b/src/main/java/eu/socialsensor/insert/TitanMassiveCustomIds.java @@ -26,7 +26,7 @@ public Vertex getOrCreate(String value) { final Long longVal = Long.valueOf(value); //the value used in data files //the value used in data files sometimes is zero so add one for the purposes of generating ids - final Long longPositiveVal = Long.valueOf(value) + 1; + final Long longPositiveVal = longVal + 1; //send everything to partition 1 by adding 1 final long titanVertexId = TitanId.toVertexId((longPositiveVal << 1) + 1 /*move over 1 bit for 2 partitions (2^1 = 2)*/); diff --git a/src/test/resources/META-INF/input.properties b/src/test/resources/META-INF/input.properties index b1b6fe7..25e833d 100644 --- a/src/test/resources/META-INF/input.properties +++ b/src/test/resources/META-INF/input.properties @@ -1,6 +1,6 @@ # Choose which data sets you want to include in the benchmark by removing the contents. #Enron -#eu.socialsensor.dataset=data/Email-Enron.txt +eu.socialsensor.dataset=data/Email-Enron.txt #Amazon #eu.socialsensor.dataset=data/Amazon0601.txt #YouTube @@ -11,7 +11,7 @@ #eu.socialsensor.actual-communities=com-lj.all.cmty.txt #Synthetic #can change the number in the filename of the synthetic datasets to 1000, 5000, 10000, 20000, 30000, 40000, 50000 -eu.socialsensor.dataset=data/network50000.dat +#eu.socialsensor.dataset=data/network10000.dat #eu.socialsensor.actual-communities=data/community50000.dat eu.socialsensor.database-storage-directory=storage @@ -32,7 +32,7 @@ eu.socialsensor.databases=ttupl #eu.socialsensor.databases=tce #eu.socialsensor.databases=ti #eu.socialsensor.databases=orient -eu.socialsensor.databases=neo4j +#eu.socialsensor.databases=neo4j # Database specific options # Titan options @@ -82,8 +82,8 @@ eu.socialsensor.benchmarks=MASSIVE_INSERTION #eu.socialsensor.benchmarks=SINGLE_INSERTION eu.socialsensor.benchmarks=FIND_NEIGHBOURS eu.socialsensor.benchmarks=FIND_ADJACENT_NODES -#eu.socialsensor.benchmarks=FIND_SHORTEST_PATH -#eu.socialsensor.shortest-path-random-nodes=100 +eu.socialsensor.benchmarks=FIND_SHORTEST_PATH +eu.socialsensor.shortest-path-random-nodes=100 # The clustering benchmark is not permutable even if eu.socialsensor.permute-benchmarks=true #eu.socialsensor.benchmarks=CLUSTERING From 9788031429f713dc48d9c8d9355106b979d5d4f3 Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Thu, 7 Apr 2016 18:23:36 +0900 Subject: [PATCH 16/26] reverted to old target node selection logic for QW-SP --- .../benchmarks/FindShortestPathBenchmark.java | 3 +- .../java/eu/socialsensor/dataset/Dataset.java | 60 ++++++++++--------- .../graphdatabases/GraphDatabase.java | 2 +- .../graphdatabases/GraphDatabaseBase.java | 10 ++-- .../graphdatabases/TitanGraphDatabase.java | 51 +++++++++++++--- src/test/resources/META-INF/input.properties | 6 +- 6 files changed, 87 insertions(+), 45 deletions(-) diff --git a/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java index daf70e2..b8d4630 100644 --- a/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java +++ b/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java @@ -7,6 +7,7 @@ import eu.socialsensor.main.GraphDatabaseType; import eu.socialsensor.utils.Utils; +import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -21,7 +22,7 @@ public class FindShortestPathBenchmark extends PermutingBenchmarkBase implements RequiresGraphData { - private final Set generatedNodes; + private final List generatedNodes; public FindShortestPathBenchmark(BenchmarkConfiguration config) { diff --git a/src/main/java/eu/socialsensor/dataset/Dataset.java b/src/main/java/eu/socialsensor/dataset/Dataset.java index 103b99c..ca19dcf 100644 --- a/src/main/java/eu/socialsensor/dataset/Dataset.java +++ b/src/main/java/eu/socialsensor/dataset/Dataset.java @@ -1,11 +1,7 @@ package eu.socialsensor.dataset; import java.io.File; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Set; +import java.util.*; import org.apache.commons.math3.util.MathArrays; @@ -25,31 +21,39 @@ public Dataset(File datasetFile) data = Utils.readTabulatedLines(datasetFile, 4 /* numberOfLinesToSkip */); } - public Set generateRandomNodes(int numRandomNodes) + public List generateRandomNodes(int numRandomNodes) { - Set nodes = new HashSet(); - for (List line : data.subList(4, data.size())) - { - for (String nodeId : line) - { - nodes.add(nodeId.trim()); - } +// Set nodes = new HashSet(); +// for (List line : data.subList(4, data.size())) +// { +// for (String nodeId : line) +// { +// nodes.add(nodeId.trim()); +// } +// } +// +// List nodeList = new ArrayList(nodes); +// int[] nodeIndexList = new int[nodeList.size()]; +// for (int i = 0; i < nodeList.size(); i++) +// { +// nodeIndexList[i] = i; +// } +// MathArrays.shuffle(nodeIndexList); +// +// Set generatedNodes = new HashSet(); +// for (int i = 0; i < numRandomNodes; i++) +// { +// generatedNodes.add(Integer.valueOf(nodeList.get(nodeIndexList[i]))); +// } + //Use old logic for now + final int max = 1000; + final int min = 2; + final Random rand = new Random(17); + final Set generatedNodes = new HashSet<>(); + while(generatedNodes.size() < numRandomNodes + 1) { //generate one more so that we can + generatedNodes.add(rand.nextInt((max - min) +1) + min); } - - List nodeList = new ArrayList(nodes); - int[] nodeIndexList = new int[nodeList.size()]; - for (int i = 0; i < nodeList.size(); i++) - { - nodeIndexList[i] = i; - } - MathArrays.shuffle(nodeIndexList); - - Set generatedNodes = new HashSet(); - for (int i = 0; i < numRandomNodes; i++) - { - generatedNodes.add(Integer.valueOf(nodeList.get(nodeIndexList[i]))); - } - return generatedNodes; + return new LinkedList<>(generatedNodes); } @Override diff --git a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java index bd9cdd4..4264b8f 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java @@ -76,7 +76,7 @@ public interface GraphDatabase nodes); + public void shortestPaths(List nodes); /** * Execute findShortestPaths query from the Query interface diff --git a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java index 91a2278..7c3f1f5 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java @@ -2,6 +2,7 @@ import java.io.File; import java.util.Iterator; +import java.util.List; import java.util.Set; import org.apache.logging.log4j.LogManager; @@ -125,20 +126,19 @@ public void findNodesOfAllEdges() { } @Override - public void shortestPaths(Set nodes) { + public void shortestPaths(List nodes) { //randomness of selected node comes from the hashing function of hash set final Iterator it = nodes.iterator(); Preconditions.checkArgument(it.hasNext()); final VertexType from = getVertex(it.next()); - it.remove();//now the set has 99 nodes + it.remove();//now the set has n-1 nodes Timer.Context ctxt; - for(Integer i : nodes) { + while(it.hasNext()) { + final Integer i = it.next(); //time this ctxt = shortestPathTimes.time(); try { shortestPath(from, i); - } catch(Exception e) { - e.printStackTrace(); } finally { ctxt.stop(); } diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index a535ab9..1260268 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -1,16 +1,25 @@ package eu.socialsensor.graphdatabases; import java.io.File; -import java.util.*; +import java.util.Set; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; import java.util.concurrent.TimeUnit; +import com.google.common.base.Stopwatch; import org.apache.commons.configuration.Configuration; import org.apache.commons.configuration.MapConfiguration; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.tinkerpop.gremlin.process.traversal.Path; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; -import org.apache.tinkerpop.gremlin.process.traversal.util.FastNoSuchElementException; import org.apache.tinkerpop.gremlin.structure.Direction; import org.apache.tinkerpop.gremlin.structure.Edge; import org.apache.tinkerpop.gremlin.structure.Property; @@ -44,6 +53,8 @@ import eu.socialsensor.utils.Utils; import jp.classmethod.titan.diskstorage.tupl.TuplStoreManager; +import static org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource.computer; + /** * Titan graph database implementation * @@ -52,6 +63,7 @@ */ public class TitanGraphDatabase extends GraphDatabaseBase, Iterator, Vertex, Edge> { + private static final Logger LOG = LogManager.getLogger(); public static final String INSERTION_TIMES_OUTPUT_PATH = "data/titan.insertion.times"; double totalWeight; @@ -282,16 +294,41 @@ public void shortestPath(final Vertex fromNode, Integer node) { final GraphTraversalSource g = graph.traversal(); final Vertex toNode = getVertex(node); + final Object toNodeId = toNode.id(); + LOG.debug("from @" + fromNode.value(NODE_ID) + "," + fromNode.id() + + " to @" + toNode.value(NODE_ID) + "," + toNode.id() + " "); + final Stopwatch watch = Stopwatch.createStarted(); // repeat the contained traversal // map from this vertex to inV on SIMILAR edges without looping // until you map to the target toNode and the path is six vertices long or less // only return one path +//g.V().has("nodeId", 14597).repeat(both().simplePath()).until(id().is(241640).and().filter {it.path().size() <= 6}).limit(1).path() GraphTraversal t = - g.V(fromNode).repeat(__.both().simplePath()).until(__.is(toNode).and(__.filter(it -> it.path().size() <= 6))) - .limit(1).path(); - //when the size of the path in the traverser object is six, that means this traverser made 5 hops from the - //fromNode, a total of 6 vertices - t.tryNext().ifPresent( it -> it.size()); + g.V().has(NODE_ID, fromNode.value(NODE_ID)) + .repeat( + __.both() + .simplePath()) + .until( + __.id().is(toNodeId) + .and( + __.filter(it -> { +//when the size of the path in the traverser object is six, that means this traverser made 5 hops from the +//fromNode, a total of 6 vertices + return it.path().size() <= 6; + })) + ) + .limit(1) + .path(); + + t.tryNext() + .ifPresent( it -> { + final int pathSize = it.size(); + final long elapsed = watch.elapsed(TimeUnit.MILLISECONDS); + watch.stop(); + LOG.debug("took " + elapsed + " ms, " + pathSize + ": " + it.toString()); + }); + + } @Override diff --git a/src/test/resources/META-INF/input.properties b/src/test/resources/META-INF/input.properties index 25e833d..c7000fb 100644 --- a/src/test/resources/META-INF/input.properties +++ b/src/test/resources/META-INF/input.properties @@ -78,10 +78,10 @@ eu.socialsensor.permute-benchmarks=false # Choose which benchmark you want to run by removing the comments. Choose one Insertion # workload and then query/clustering workloads afterward. -eu.socialsensor.benchmarks=MASSIVE_INSERTION +#eu.socialsensor.benchmarks=MASSIVE_INSERTION #eu.socialsensor.benchmarks=SINGLE_INSERTION -eu.socialsensor.benchmarks=FIND_NEIGHBOURS -eu.socialsensor.benchmarks=FIND_ADJACENT_NODES +#eu.socialsensor.benchmarks=FIND_NEIGHBOURS +#eu.socialsensor.benchmarks=FIND_ADJACENT_NODES eu.socialsensor.benchmarks=FIND_SHORTEST_PATH eu.socialsensor.shortest-path-random-nodes=100 From 572349f0e90f686726454e052cc65206d1e84ac3 Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Thu, 7 Apr 2016 18:25:04 +0900 Subject: [PATCH 17/26] compiles --- .../java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java index 45f5de9..0e895d6 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java @@ -183,7 +183,7 @@ public void shutdownMassiveGraph() } @Override - public void shortestPaths(Set nodes) { + public void shortestPaths(List nodes) { try (Transaction tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx()) { super.shortestPaths(nodes); } From 23c84f21d7d541a076f7674c22f2214b398ad03f Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Fri, 8 Apr 2016 00:40:55 +0900 Subject: [PATCH 18/26] progress on the shortest path tp3 query workload --- .../graphdatabases/TitanGraphDatabase.java | 28 +++++++++---------- .../insert/TitanMassiveCustomIds.java | 6 ++-- src/test/resources/META-INF/input.properties | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index 1260268..a703192 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -290,31 +290,27 @@ public void shutdownMassiveGraph() } @Override - public void shortestPath(final Vertex fromNode, Integer node) + public void shortestPath(final Vertex fromNode, Integer targetNode) { final GraphTraversalSource g = graph.traversal(); - final Vertex toNode = getVertex(node); - final Object toNodeId = toNode.id(); - LOG.debug("from @" + fromNode.value(NODE_ID) + "," + fromNode.id() + - " to @" + toNode.value(NODE_ID) + "," + toNode.id() + " "); final Stopwatch watch = Stopwatch.createStarted(); // repeat the contained traversal // map from this vertex to inV on SIMILAR edges without looping // until you map to the target toNode and the path is six vertices long or less // only return one path -//g.V().has("nodeId", 14597).repeat(both().simplePath()).until(id().is(241640).and().filter {it.path().size() <= 6}).limit(1).path() +//g.V().has("nodeId", 775).repeat(both().simplePath()).until(has('nodeId', 990).and().filter {it.path().size() <= 5}).limit(1).path().by('nodeId') GraphTraversal t = g.V().has(NODE_ID, fromNode.value(NODE_ID)) .repeat( __.both() .simplePath()) .until( - __.id().is(toNodeId) + __.has(NODE_ID, targetNode) .and( __.filter(it -> { -//when the size of the path in the traverser object is six, that means this traverser made 5 hops from the -//fromNode, a total of 6 vertices - return it.path().size() <= 6; +//when the size of the path in the traverser object is six, that means this traverser made 4 hops from the +//fromNode, a total of 5 vertices + return it.path().size() <= 5; })) ) .limit(1) @@ -322,10 +318,14 @@ public void shortestPath(final Vertex fromNode, Integer node) t.tryNext() .ifPresent( it -> { - final int pathSize = it.size(); - final long elapsed = watch.elapsed(TimeUnit.MILLISECONDS); - watch.stop(); - LOG.debug("took " + elapsed + " ms, " + pathSize + ": " + it.toString()); + final int pathSize = it.size(); + final long elapsed = watch.elapsed(TimeUnit.MILLISECONDS); + watch.stop(); + if(elapsed > 200) { //threshold for debugging + LOG.info("from @ " + fromNode.value(NODE_ID) + + " to @ " + targetNode.toString() + + " took " + elapsed + " ms, " + pathSize + ": " + it.toString()); + } }); diff --git a/src/main/java/eu/socialsensor/insert/TitanMassiveCustomIds.java b/src/main/java/eu/socialsensor/insert/TitanMassiveCustomIds.java index aacc025..348863e 100644 --- a/src/main/java/eu/socialsensor/insert/TitanMassiveCustomIds.java +++ b/src/main/java/eu/socialsensor/insert/TitanMassiveCustomIds.java @@ -35,11 +35,11 @@ public Vertex getOrCreate(String value) // final GraphTraversal t = tx.traversal().V().has(NODEID, longVal); //add to cache for first time - if(!vertexCache.containsKey(longPositiveVal)) { + if(!vertexCache.containsKey(longVal)) { final TitanVertex vertex = tx.addVertex(titanVertexId, nodeLabel /*vertexLabel*/); vertex.property(NODEID, longVal); - vertexCache.put(longPositiveVal, vertex); + vertexCache.put(longVal, vertex); } - return vertexCache.get(longPositiveVal); + return vertexCache.get(longVal); } } \ No newline at end of file diff --git a/src/test/resources/META-INF/input.properties b/src/test/resources/META-INF/input.properties index c7000fb..30899d5 100644 --- a/src/test/resources/META-INF/input.properties +++ b/src/test/resources/META-INF/input.properties @@ -78,7 +78,7 @@ eu.socialsensor.permute-benchmarks=false # Choose which benchmark you want to run by removing the comments. Choose one Insertion # workload and then query/clustering workloads afterward. -#eu.socialsensor.benchmarks=MASSIVE_INSERTION +eu.socialsensor.benchmarks=MASSIVE_INSERTION #eu.socialsensor.benchmarks=SINGLE_INSERTION #eu.socialsensor.benchmarks=FIND_NEIGHBOURS #eu.socialsensor.benchmarks=FIND_ADJACENT_NODES From d7214137d49b423b323bb62c02648603102f04c8 Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Fri, 8 Apr 2016 01:28:13 +0900 Subject: [PATCH 19/26] sped up shortest path traversal by a factor of five times --- .../eu/socialsensor/graphdatabases/TitanGraphDatabase.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index a703192..ab2bb27 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -298,11 +298,11 @@ public void shortestPath(final Vertex fromNode, Integer targetNode) // map from this vertex to inV on SIMILAR edges without looping // until you map to the target toNode and the path is six vertices long or less // only return one path -//g.V().has("nodeId", 775).repeat(both().simplePath()).until(has('nodeId', 990).and().filter {it.path().size() <= 5}).limit(1).path().by('nodeId') +//g.V().has("nodeId", 775).repeat(out('similar').simplePath()).until(has('nodeId', 990).and().filter {it.path().size() <= 5}).limit(1).path().by('nodeId') GraphTraversal t = g.V().has(NODE_ID, fromNode.value(NODE_ID)) .repeat( - __.both() + __.out(SIMILAR) .simplePath()) .until( __.has(NODE_ID, targetNode) From 4a89379dec805d0d3e45a1e82444fb6eb3259e70 Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Fri, 8 Apr 2016 08:42:59 +0900 Subject: [PATCH 20/26] use the same list of nodes for each graph database --- .../benchmarks/FindShortestPathBenchmark.java | 3 +- .../java/eu/socialsensor/dataset/Dataset.java | 35 --------------- .../main/BenchmarkConfiguration.java | 45 +++++++++++++++---- 3 files changed, 38 insertions(+), 45 deletions(-) diff --git a/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java index b8d4630..85ec63e 100644 --- a/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java +++ b/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java @@ -27,8 +27,7 @@ public class FindShortestPathBenchmark extends PermutingBenchmarkBase implements public FindShortestPathBenchmark(BenchmarkConfiguration config) { super(config, BenchmarkType.FIND_SHORTEST_PATH); - generatedNodes = DatasetFactory.getInstance().getDataset(config.getDataset()) - .generateRandomNodes(config.getRandomNodes()); + generatedNodes = config.getRandomNodeList(); } @Override diff --git a/src/main/java/eu/socialsensor/dataset/Dataset.java b/src/main/java/eu/socialsensor/dataset/Dataset.java index ca19dcf..aec0d9d 100644 --- a/src/main/java/eu/socialsensor/dataset/Dataset.java +++ b/src/main/java/eu/socialsensor/dataset/Dataset.java @@ -21,41 +21,6 @@ public Dataset(File datasetFile) data = Utils.readTabulatedLines(datasetFile, 4 /* numberOfLinesToSkip */); } - public List generateRandomNodes(int numRandomNodes) - { -// Set nodes = new HashSet(); -// for (List line : data.subList(4, data.size())) -// { -// for (String nodeId : line) -// { -// nodes.add(nodeId.trim()); -// } -// } -// -// List nodeList = new ArrayList(nodes); -// int[] nodeIndexList = new int[nodeList.size()]; -// for (int i = 0; i < nodeList.size(); i++) -// { -// nodeIndexList[i] = i; -// } -// MathArrays.shuffle(nodeIndexList); -// -// Set generatedNodes = new HashSet(); -// for (int i = 0; i < numRandomNodes; i++) -// { -// generatedNodes.add(Integer.valueOf(nodeList.get(nodeIndexList[i]))); -// } - //Use old logic for now - final int max = 1000; - final int min = 2; - final Random rand = new Random(17); - final Set generatedNodes = new HashSet<>(); - while(generatedNodes.size() < numRandomNodes + 1) { //generate one more so that we can - generatedNodes.add(rand.nextInt((max - min) +1) + min); - } - return new LinkedList<>(generatedNodes); - } - @Override public Iterator> iterator() { diff --git a/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java b/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java index 27965ce..5b6f8b1 100644 --- a/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java +++ b/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java @@ -1,12 +1,7 @@ package eu.socialsensor.main; import java.io.File; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; +import java.util.*; import org.apache.commons.configuration.Configuration; import org.apache.commons.math3.util.CombinatoricsUtils; @@ -107,6 +102,7 @@ public class BenchmarkConfiguration private final String dynamodbTablePrefix; private final boolean customIds; private final long tuplMinCacheSize; + private final List randomNodeList; public String getDynamodbCredentialsFqClassName() { @@ -122,6 +118,9 @@ public String getDynamodbEndpoint() { return dynamodbEndpoint; } + public List getRandomNodeList() { + return randomNodeList; + } public BenchmarkConfiguration(Configuration appconfig) { @@ -178,6 +177,38 @@ public BenchmarkConfiguration(Configuration appconfig) dataset = validateReadableFile(socialsensor.getString(DATASET), DATASET); // load the dataset + // Set nodes = new HashSet(); +// for (List line : data.subList(4, data.size())) +// { +// for (String nodeId : line) +// { +// nodes.add(nodeId.trim()); +// } +// } +// +// List nodeList = new ArrayList(nodes); +// int[] nodeIndexList = new int[nodeList.size()]; +// for (int i = 0; i < nodeList.size(); i++) +// { +// nodeIndexList[i] = i; +// } +// MathArrays.shuffle(nodeIndexList); +// +// Set generatedNodes = new HashSet(); +// for (int i = 0; i < numRandomNodes; i++) +// { +// generatedNodes.add(Integer.valueOf(nodeList.get(nodeIndexList[i]))); +// } + //Use old logic for now + randomNodes = socialsensor.getInteger(RANDOM_NODES, new Integer(100)); + final int max = 1000; + final int min = 2; + final Random rand = new Random(17); + final Set generatedNodes = new HashSet<>(); + while(generatedNodes.size() < randomNodes + 1) { //generate one more so that we can + generatedNodes.add(rand.nextInt((max - min) +1) + min); + } + randomNodeList = new LinkedList<>(generatedNodes); DatasetFactory.getInstance().getDataset(dataset); if (!socialsensor.containsKey(PERMUTE_BENCHMARKS)) @@ -215,8 +246,6 @@ public BenchmarkConfiguration(Configuration appconfig) throw new IllegalArgumentException("unable to write to results directory"); } - randomNodes = socialsensor.getInteger(RANDOM_NODES, new Integer(100)); - if (this.benchmarkTypes.contains(BenchmarkType.CLUSTERING)) { if (!socialsensor.containsKey(NODES_COUNT)) From 5a56f7ab074308bac0d9414f42fea73d961a0aee Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Fri, 8 Apr 2016 19:31:34 +0900 Subject: [PATCH 21/26] externalized random number generator seed. --- .../benchmarks/ClusteringBenchmark.java | 3 +- .../benchmarks/FindShortestPathBenchmark.java | 5 +- .../clustering/LouvainMethod.java | 11 ++- .../java/eu/socialsensor/dataset/Dataset.java | 27 +++++++- .../socialsensor/dataset/DatasetFactory.java | 12 +++- .../graphdatabases/GraphDatabase.java | 12 ++-- .../graphdatabases/GraphDatabaseBase.java | 10 +-- .../graphdatabases/Neo4jGraphDatabase.java | 8 +-- .../graphdatabases/OrientGraphDatabase.java | 2 +- .../graphdatabases/TitanGraphDatabase.java | 2 +- .../eu/socialsensor/insert/InsertionBase.java | 2 +- .../main/BenchmarkConfiguration.java | 67 +++++-------------- .../java/eu/socialsensor/utils/Utils.java | 2 +- src/test/resources/META-INF/input.properties | 8 +-- 14 files changed, 85 insertions(+), 86 deletions(-) diff --git a/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java index 9c1f48c..efbc41e 100644 --- a/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java +++ b/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java @@ -102,7 +102,8 @@ private SortedMap clusteringBenchmark(GraphDatabaseType type) t + ", Cache Size: " + cacheSize); Stopwatch watch = Stopwatch.createStarted(); - LouvainMethod louvainMethodCache = new LouvainMethod(graphDatabase, cacheSize, bench.randomizedClustering()); + LouvainMethod louvainMethodCache = new LouvainMethod(graphDatabase, cacheSize, + bench.randomizedClustering() ? bench.getRandom() : null); louvainMethodCache.computeModularity(); timeMap.put(cacheSize, watch.elapsed(TimeUnit.MILLISECONDS) / 1000.0); diff --git a/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java index 85ec63e..cc4fa04 100644 --- a/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java +++ b/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java @@ -22,12 +22,9 @@ public class FindShortestPathBenchmark extends PermutingBenchmarkBase implements RequiresGraphData { - private final List generatedNodes; - public FindShortestPathBenchmark(BenchmarkConfiguration config) { super(config, BenchmarkType.FIND_SHORTEST_PATH); - generatedNodes = config.getRandomNodeList(); } @Override @@ -35,7 +32,7 @@ public void benchmarkOne(GraphDatabaseType type, int scenarioNumber) { GraphDatabase graphDatabase = Utils.createDatabaseInstance(bench, type, false /*batchLoading*/); Stopwatch watch = Stopwatch.createStarted(); - graphDatabase.shortestPaths(generatedNodes); + graphDatabase.shortestPaths(); graphDatabase.shutdown(); times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS)); } diff --git a/src/main/java/eu/socialsensor/clustering/LouvainMethod.java b/src/main/java/eu/socialsensor/clustering/LouvainMethod.java index 8b05957..a05a6da 100644 --- a/src/main/java/eu/socialsensor/clustering/LouvainMethod.java +++ b/src/main/java/eu/socialsensor/clustering/LouvainMethod.java @@ -17,7 +17,7 @@ */ public class LouvainMethod { - boolean isRandomized; + private final Random random; private double resolution = 1.0; private double graphWeightSum; private int N; @@ -27,10 +27,10 @@ public class LouvainMethod GraphDatabase graphDatabase; Cache cache; - public LouvainMethod(GraphDatabase graphDatabase, int cacheSize, boolean isRandomized) throws ExecutionException + public LouvainMethod(GraphDatabase graphDatabase, int cacheSize, Random random) throws ExecutionException { this.graphDatabase = graphDatabase; - this.isRandomized = isRandomized; + this.random = random; initialize(); cache = new Cache(graphDatabase, cacheSize); } @@ -52,7 +52,6 @@ private void initialize() public void computeModularity() throws ExecutionException { - Random rand = new Random(); boolean someChange = true; while (someChange) { @@ -62,9 +61,9 @@ public void computeModularity() throws ExecutionException { localChange = false; int start = 0; - if (this.isRandomized) + if (null != this.random) { - start = Math.abs(rand.nextInt()) % this.N; + start = Math.abs(random.nextInt()) % this.N; } int step = 0; for (int i = start; step < this.N; i = (i + 1) % this.N) diff --git a/src/main/java/eu/socialsensor/dataset/Dataset.java b/src/main/java/eu/socialsensor/dataset/Dataset.java index aec0d9d..f9ae36b 100644 --- a/src/main/java/eu/socialsensor/dataset/Dataset.java +++ b/src/main/java/eu/socialsensor/dataset/Dataset.java @@ -15,10 +15,32 @@ public class Dataset implements Iterable> { private final List> data; + private final List generatedNodes; - public Dataset(File datasetFile) + public Dataset(File datasetFile, Random random, int randomNodeSetSize) { data = Utils.readTabulatedLines(datasetFile, 4 /* numberOfLinesToSkip */); + final Set nodes = new HashSet<>(); + //read node strings and convert to Integers and add to HashSet + data.stream().forEach(line -> { //TODO evaluate parallelStream + line.stream().forEach(nodeId -> { + nodes.add(Integer.valueOf(nodeId.trim())); + }); + }); + if(randomNodeSetSize > nodes.size()) { + throw new IllegalArgumentException("cant select more random nodes than there are unique nodes in dataset"); + } + + //shuffle + final List nodeList = new ArrayList<>(nodes); + Collections.shuffle(nodeList); + + //choose randomNodeSetSize of them + generatedNodes = new ArrayList(randomNodeSetSize); + Iterator it = nodeList.iterator(); + while(generatedNodes.size() < randomNodeSetSize) { + generatedNodes.add(it.next()); + } } @Override @@ -30,4 +52,7 @@ public Iterator> iterator() public List> getList() { return new ArrayList>(data); } + public List getRandomNodes() { + return generatedNodes; + } } diff --git a/src/main/java/eu/socialsensor/dataset/DatasetFactory.java b/src/main/java/eu/socialsensor/dataset/DatasetFactory.java index f34475c..825a9a5 100644 --- a/src/main/java/eu/socialsensor/dataset/DatasetFactory.java +++ b/src/main/java/eu/socialsensor/dataset/DatasetFactory.java @@ -3,6 +3,7 @@ import java.io.File; import java.util.HashMap; import java.util.Map; +import java.util.Random; /** * @@ -28,11 +29,18 @@ public static DatasetFactory getInstance() return theInstance; } - public Dataset getDataset(File datasetFile) + public Dataset getDataset(File datasetFile) { + if (!datasetMap.containsKey(datasetFile)) + { + throw new IllegalArgumentException("no mapping for data file " + datasetFile.getAbsolutePath()); + } + return datasetMap.get(datasetFile); + } + public Dataset createAndGetDataset(File datasetFile, Random random, int randomNodeSetSize) { if (!datasetMap.containsKey(datasetFile)) { - datasetMap.put(datasetFile, new Dataset(datasetFile)); + datasetMap.put(datasetFile, new Dataset(datasetFile, random, randomNodeSetSize)); } return datasetMap.get(datasetFile); diff --git a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java index 4264b8f..2df41ac 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java @@ -72,19 +72,19 @@ public interface GraphDatabase nodes); + public void shortestPaths(); /** * Execute findShortestPaths query from the Query interface - * - * @param nodes + * + * @param fromNode + * @param toNode * any number of random nodes */ - public void shortestPath(final VertexType fromNode, Integer node); + public void shortestPath(final VertexType fromNode, Integer toNode); /** * @return the number of nodes diff --git a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java index 7c3f1f5..83aca1b 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java @@ -25,7 +25,6 @@ public abstract class GraphDatabaseBase randomNodes; - protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory) + protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory, List randomNodes) { this.type = type; final String queryTypeContext = type.getShortname() + QUERY_CONTEXT; @@ -44,6 +44,7 @@ protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory) this.getOtherVertexFromEdgeTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getOtherVertexFromEdge"); this.getAllEdgesTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getAllEdges"); this.shortestPathTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "shortestPath"); + this.randomNodes = randomNodes; this.dbStorageDirectory = dbStorageDirectory; if (!this.dbStorageDirectory.exists()) @@ -126,12 +127,11 @@ public void findNodesOfAllEdges() { } @Override - public void shortestPaths(List nodes) { + public void shortestPaths() { //randomness of selected node comes from the hashing function of hash set - final Iterator it = nodes.iterator(); + final Iterator it = randomNodes.iterator(); Preconditions.checkArgument(it.hasNext()); final VertexType from = getVertex(it.next()); - it.remove();//now the set has n-1 nodes Timer.Context ctxt; while(it.hasNext()) { final Integer i = it.next(); diff --git a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java index 0e895d6..02226c8 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java @@ -62,9 +62,9 @@ public static enum RelTypes implements RelationshipType public static Label NODE_LABEL = DynamicLabel.label("Node"); - public Neo4jGraphDatabase(File dbStorageDirectoryIn, boolean batchLoading) + public Neo4jGraphDatabase(File dbStorageDirectoryIn, boolean batchLoading, List randomNodes) { - super(GraphDatabaseType.NEO4J, dbStorageDirectoryIn); + super(GraphDatabaseType.NEO4J, dbStorageDirectoryIn, randomNodes); if(batchLoading) { neo4jGraph = null; @@ -183,9 +183,9 @@ public void shutdownMassiveGraph() } @Override - public void shortestPaths(List nodes) { + public void shortestPaths() { try (Transaction tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx()) { - super.shortestPaths(nodes); + super.shortestPaths(); } } diff --git a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java index c6b981d..e75a3e8 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java @@ -49,7 +49,7 @@ public class OrientGraphDatabase extends GraphDatabaseBase, Ite @SuppressWarnings("deprecation") public OrientGraphDatabase(BenchmarkConfiguration config, File dbStorageDirectoryIn) { - super(GraphDatabaseType.ORIENT_DB, dbStorageDirectoryIn); + super(GraphDatabaseType.ORIENT_DB, dbStorageDirectoryIn, config.getRandomNodeList()); OGlobalConfiguration.STORAGE_COMPRESSION_METHOD.setValue("nothing"); OGlobalConfiguration.STORAGE_KEEP_OPEN.setValue(false); graph = getGraph(dbStorageDirectory); diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index ab2bb27..04ac741 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -74,7 +74,7 @@ public class TitanGraphDatabase extends GraphDatabaseBase, Iter public TitanGraphDatabase(GraphDatabaseType type, BenchmarkConfiguration config, File dbStorageDirectory, boolean batchLoading) { - super(type, dbStorageDirectory); + super(type, dbStorageDirectory, config.getRandomNodeList()); this.config = config; if (!GraphDatabaseType.TITAN_FLAVORS.contains(type)) { diff --git a/src/main/java/eu/socialsensor/insert/InsertionBase.java b/src/main/java/eu/socialsensor/insert/InsertionBase.java index 338f26f..3ca52c5 100644 --- a/src/main/java/eu/socialsensor/insert/InsertionBase.java +++ b/src/main/java/eu/socialsensor/insert/InsertionBase.java @@ -85,7 +85,7 @@ public final void createGraph(File datasetFile, int scenarioNumber) { logger.info("Loading data in {} mode in {} database . . . .", single ? "single" : "massive", type.name()); - Dataset dataset = DatasetFactory.getInstance().getDataset(datasetFile); + final Dataset dataset = DatasetFactory.getInstance().getDataset(datasetFile); Stopwatch thousandWatch = Stopwatch.createStarted(), watch = Stopwatch.createStarted(); diff --git a/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java b/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java index 5b6f8b1..1568303 100644 --- a/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java +++ b/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java @@ -53,6 +53,7 @@ public class BenchmarkConfiguration private static final String CACHE_VALUES_COUNT = "cache-values-count"; private static final String PERMUTE_BENCHMARKS = "permute-benchmarks"; private static final String RANDOM_NODES = "shortest-path-random-nodes"; + private static final String RANDOM_SEED = "random-seed"; private static final Set metricsReporters = new HashSet(); static { @@ -80,7 +81,7 @@ public class BenchmarkConfiguration private final boolean dynamodbConsistentRead; // shortest path - private final int randomNodes; + private final int numShortestPathRandomNodes; // clustering private final Boolean randomizedClustering; @@ -102,7 +103,8 @@ public class BenchmarkConfiguration private final String dynamodbTablePrefix; private final boolean customIds; private final long tuplMinCacheSize; - private final List randomNodeList; + + private final Random random; public String getDynamodbCredentialsFqClassName() { @@ -118,9 +120,6 @@ public String getDynamodbEndpoint() { return dynamodbEndpoint; } - public List getRandomNodeList() { - return randomNodeList; - } public BenchmarkConfiguration(Configuration appconfig) { @@ -176,40 +175,11 @@ public BenchmarkConfiguration(Configuration appconfig) dbStorageDirectory = new File(socialsensor.getString(DATABASE_STORAGE_DIRECTORY)); dataset = validateReadableFile(socialsensor.getString(DATASET), DATASET); + // load the dataset - // Set nodes = new HashSet(); -// for (List line : data.subList(4, data.size())) -// { -// for (String nodeId : line) -// { -// nodes.add(nodeId.trim()); -// } -// } -// -// List nodeList = new ArrayList(nodes); -// int[] nodeIndexList = new int[nodeList.size()]; -// for (int i = 0; i < nodeList.size(); i++) -// { -// nodeIndexList[i] = i; -// } -// MathArrays.shuffle(nodeIndexList); -// -// Set generatedNodes = new HashSet(); -// for (int i = 0; i < numRandomNodes; i++) -// { -// generatedNodes.add(Integer.valueOf(nodeList.get(nodeIndexList[i]))); -// } - //Use old logic for now - randomNodes = socialsensor.getInteger(RANDOM_NODES, new Integer(100)); - final int max = 1000; - final int min = 2; - final Random rand = new Random(17); - final Set generatedNodes = new HashSet<>(); - while(generatedNodes.size() < randomNodes + 1) { //generate one more so that we can - generatedNodes.add(rand.nextInt((max - min) +1) + min); - } - randomNodeList = new LinkedList<>(generatedNodes); - DatasetFactory.getInstance().getDataset(dataset); + random = new Random(socialsensor.getInt(RANDOM_SEED, 17 /*default*/)); + numShortestPathRandomNodes = socialsensor.getInteger(RANDOM_NODES, new Integer(101)); + DatasetFactory.getInstance().createAndGetDataset(dataset, random, numShortestPathRandomNodes); if (!socialsensor.containsKey(PERMUTE_BENCHMARKS)) { @@ -393,26 +363,18 @@ public int getScenarios() return scenarios; } - private static final File validateReadableFile(String fileName, String fileType) - { + private static final File validateReadableFile(String fileName, String fileType) { File file = new File(fileName); - if (!file.exists()) - { + if (!file.exists()) { throw new IllegalArgumentException(String.format("the %s does not exist", fileType)); } - if (!(file.isFile() && file.canRead())) - { + if (!(file.isFile() && file.canRead())) { throw new IllegalArgumentException(String.format("the %s must be a file that this user can read", fileType)); } return file; } - public int getRandomNodes() - { - return randomNodes; - } - public long getCsvReportingInterval() { return csvReportingInterval; @@ -480,4 +442,11 @@ public boolean isCustomIds() { public long getTuplMinCacheSize() { return tuplMinCacheSize; } + + public Random getRandom() { + return random; + } + public List getRandomNodeList() { + return DatasetFactory.getInstance().getDataset(this.dataset).getRandomNodes(); + } } diff --git a/src/main/java/eu/socialsensor/utils/Utils.java b/src/main/java/eu/socialsensor/utils/Utils.java index d2541cf..44de6ff 100644 --- a/src/main/java/eu/socialsensor/utils/Utils.java +++ b/src/main/java/eu/socialsensor/utils/Utils.java @@ -191,7 +191,7 @@ public static final GraphDatabase createDatabaseInstance(BenchmarkConfi } else if (GraphDatabaseType.NEO4J == type) { - graphDatabase = new Neo4jGraphDatabase(dbStorageDirectory, batchLoading); + graphDatabase = new Neo4jGraphDatabase(dbStorageDirectory, batchLoading, config.getRandomNodeList()); } else if (GraphDatabaseType.ORIENT_DB == type) { diff --git a/src/test/resources/META-INF/input.properties b/src/test/resources/META-INF/input.properties index 30899d5..448e9fc 100644 --- a/src/test/resources/META-INF/input.properties +++ b/src/test/resources/META-INF/input.properties @@ -24,7 +24,7 @@ eu.socialsensor.metrics.csv.directory=metrics # Choose which databases you want to in the benchmark by removing the comments. # Available dbs are: -#eu.socialsensor.databases=tbdb +eu.socialsensor.databases=tbdb eu.socialsensor.databases=ttupl #eu.socialsensor.databases=tddb #eu.socialsensor.databases=tc @@ -80,9 +80,9 @@ eu.socialsensor.permute-benchmarks=false # workload and then query/clustering workloads afterward. eu.socialsensor.benchmarks=MASSIVE_INSERTION #eu.socialsensor.benchmarks=SINGLE_INSERTION -#eu.socialsensor.benchmarks=FIND_NEIGHBOURS -#eu.socialsensor.benchmarks=FIND_ADJACENT_NODES -eu.socialsensor.benchmarks=FIND_SHORTEST_PATH +eu.socialsensor.benchmarks=FIND_NEIGHBOURS +eu.socialsensor.benchmarks=FIND_ADJACENT_NODES +#eu.socialsensor.benchmarks=FIND_SHORTEST_PATH eu.socialsensor.shortest-path-random-nodes=100 # The clustering benchmark is not permutable even if eu.socialsensor.permute-benchmarks=true From 432efd0dbb2e4e471193b59ab6dd74a8ba0f2dda Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Fri, 8 Apr 2016 22:29:28 +0900 Subject: [PATCH 22/26] plumb configuration for qw-fs max hops, made depth predicate class --- pom.xml | 12 +++++- .../benchmarks/PermutingBenchmarkBase.java | 2 +- .../java/eu/socialsensor/dataset/Dataset.java | 2 +- .../graphdatabases/GraphDatabaseBase.java | 6 ++- .../graphdatabases/Neo4jGraphDatabase.java | 6 +-- .../graphdatabases/OrientGraphDatabase.java | 6 +-- .../graphdatabases/TitanGraphDatabase.java | 38 ++++++++++++------- .../eu/socialsensor/insert/InsertionBase.java | 2 +- .../insert/TitanMassiveInsertion.java | 2 +- .../main/BenchmarkConfiguration.java | 7 ++++ .../java/eu/socialsensor/utils/Utils.java | 3 +- .../main/GraphDatabaseBenchmarkTest.java | 1 + src/test/resources/META-INF/input.properties | 14 ++++--- src/test/resources/META-INF/log4j.properties | 9 ----- src/test/resources/META-INF/log4j2.xml | 26 +++++++------ 15 files changed, 83 insertions(+), 53 deletions(-) delete mode 100644 src/test/resources/META-INF/log4j.properties diff --git a/pom.xml b/pom.xml index bcff834..3d468ef 100644 --- a/pom.xml +++ b/pom.xml @@ -96,11 +96,21 @@ log4j-api ${log4j2.version} + + org.apache.logging.log4j + log4j-1.2-api + ${log4j2.version} + org.apache.logging.log4j log4j-core ${log4j2.version} + + org.slf4j + slf4j-log4j12 + 1.7.12 + org.apache.lucene lucene-core @@ -337,7 +347,7 @@ **/GraphDatabaseBenchmarkTest.java - -Xmx32g + -Xmx32g -ea false ${basedir}/src/test/resources/META-INF/log4j2.xml diff --git a/src/main/java/eu/socialsensor/benchmarks/PermutingBenchmarkBase.java b/src/main/java/eu/socialsensor/benchmarks/PermutingBenchmarkBase.java index 8ea4bca..9d3cc4b 100644 --- a/src/main/java/eu/socialsensor/benchmarks/PermutingBenchmarkBase.java +++ b/src/main/java/eu/socialsensor/benchmarks/PermutingBenchmarkBase.java @@ -55,7 +55,7 @@ public void startBenchmarkInternal() startBenchmarkInternalOnePermutation(bench.getSelectedDatabases(), 1); } - LOG.info(String.format("%s Benchmark finished", type.longname())); + LOG.info(String.format("%s Benchmark Finished", type.longname())); post(); } diff --git a/src/main/java/eu/socialsensor/dataset/Dataset.java b/src/main/java/eu/socialsensor/dataset/Dataset.java index f9ae36b..6034f3e 100644 --- a/src/main/java/eu/socialsensor/dataset/Dataset.java +++ b/src/main/java/eu/socialsensor/dataset/Dataset.java @@ -33,7 +33,7 @@ public Dataset(File datasetFile, Random random, int randomNodeSetSize) //shuffle final List nodeList = new ArrayList<>(nodes); - Collections.shuffle(nodeList); + Collections.shuffle(nodeList, random); //choose randomNodeSetSize of them generatedNodes = new ArrayList(randomNodeSetSize); diff --git a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java index 83aca1b..86ed39e 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java @@ -5,6 +5,7 @@ import java.util.List; import java.util.Set; +import org.apache.commons.logging.Log; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -33,8 +34,10 @@ public abstract class GraphDatabaseBase randomNodes; + protected final int maxHops; - protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory, List randomNodes) + protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory, List randomNodes, + int shortestPathMaxHops) { this.type = type; final String queryTypeContext = type.getShortname() + QUERY_CONTEXT; @@ -45,6 +48,7 @@ protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory, Lis this.getAllEdgesTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getAllEdges"); this.shortestPathTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "shortestPath"); this.randomNodes = randomNodes; + this.maxHops = shortestPathMaxHops; this.dbStorageDirectory = dbStorageDirectory; if (!this.dbStorageDirectory.exists()) diff --git a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java index 02226c8..075d7ae 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java @@ -62,9 +62,9 @@ public static enum RelTypes implements RelationshipType public static Label NODE_LABEL = DynamicLabel.label("Node"); - public Neo4jGraphDatabase(File dbStorageDirectoryIn, boolean batchLoading, List randomNodes) + public Neo4jGraphDatabase(File dbStorageDirectoryIn, boolean batchLoading, List randomNodes, int shortestPathMaxHops) { - super(GraphDatabaseType.NEO4J, dbStorageDirectoryIn, randomNodes); + super(GraphDatabaseType.NEO4J, dbStorageDirectoryIn, randomNodes, shortestPathMaxHops); if(batchLoading) { neo4jGraph = null; @@ -207,7 +207,7 @@ public void findAllNodeNeighbours() { public void shortestPath(Node n1, Integer i) { PathFinder finder - = GraphAlgoFactory.shortestPath(PathExpanders.forType(Neo4jGraphDatabase.RelTypes.SIMILAR), 5); + = GraphAlgoFactory.shortestPath(PathExpanders.forType(Neo4jGraphDatabase.RelTypes.SIMILAR), maxHops); Node n2 = getVertex(i); Path path = finder.findSinglePath(n1, n2); diff --git a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java index e75a3e8..edd59af 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java @@ -49,7 +49,8 @@ public class OrientGraphDatabase extends GraphDatabaseBase, Ite @SuppressWarnings("deprecation") public OrientGraphDatabase(BenchmarkConfiguration config, File dbStorageDirectoryIn) { - super(GraphDatabaseType.ORIENT_DB, dbStorageDirectoryIn, config.getRandomNodeList()); + super(GraphDatabaseType.ORIENT_DB, dbStorageDirectoryIn, config.getRandomNodeList(), + config.getShortestPathMaxHops()); OGlobalConfiguration.STORAGE_COMPRESSION_METHOD.setValue("nothing"); OGlobalConfiguration.STORAGE_KEEP_OPEN.setValue(false); graph = getGraph(dbStorageDirectory); @@ -102,9 +103,8 @@ public void shortestPath(final Vertex v1, Integer i) @SuppressWarnings("unused") final OrientVertex v2 = (OrientVertex) getVertex(i); - //TODO(amcp) need to do something about the number 5 // List result = (List) new OSQLFunctionShortestPath().execute(graph, -// null, null, new Object[] { ((OrientVertex) v1).getRecord(), v2.getRecord(), Direction.OUT, 5 }, +// null, null, new Object[] { ((OrientVertex) v1).getRecord(), v2.getRecord(), Direction.OUT, maxHops }, // new OBasicCommandContext()); // // result.size(); diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index 04ac741..039c273 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -10,6 +10,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; import com.google.common.base.Stopwatch; import org.apache.commons.configuration.Configuration; @@ -17,6 +18,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.tinkerpop.gremlin.process.traversal.Path; +import org.apache.tinkerpop.gremlin.process.traversal.Traverser; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; @@ -64,9 +66,6 @@ public class TitanGraphDatabase extends GraphDatabaseBase, Iterator, Vertex, Edge> { private static final Logger LOG = LogManager.getLogger(); - public static final String INSERTION_TIMES_OUTPUT_PATH = "data/titan.insertion.times"; - - double totalWeight; private final StandardTitanGraph graph; private final BenchmarkConfiguration config; @@ -74,7 +73,7 @@ public class TitanGraphDatabase extends GraphDatabaseBase, Iter public TitanGraphDatabase(GraphDatabaseType type, BenchmarkConfiguration config, File dbStorageDirectory, boolean batchLoading) { - super(type, dbStorageDirectory, config.getRandomNodeList()); + super(type, dbStorageDirectory, config.getRandomNodeList(), config.getShortestPathMaxHops()); this.config = config; if (!GraphDatabaseType.TITAN_FLAVORS.contains(type)) { @@ -289,6 +288,19 @@ public void shutdownMassiveGraph() shutdown(); } + public class DepthPredicate implements Predicate> { + private final int hops; + public DepthPredicate(int hops) { + this.hops = hops; + } + + @Override + public boolean test(Traverser it) { + LOG.trace("testing {}", it.path()); + return it.path().size() <= hops; + } + } + @Override public void shortestPath(final Vertex fromNode, Integer targetNode) { @@ -299,19 +311,17 @@ public void shortestPath(final Vertex fromNode, Integer targetNode) // until you map to the target toNode and the path is six vertices long or less // only return one path //g.V().has("nodeId", 775).repeat(out('similar').simplePath()).until(has('nodeId', 990).and().filter {it.path().size() <= 5}).limit(1).path().by('nodeId') - GraphTraversal t = - g.V().has(NODE_ID, fromNode.value(NODE_ID)) + final DepthPredicate maxDepth = new DepthPredicate(maxHops); + final Integer fromNodeId = fromNode.value(NODE_ID); + LOG.trace("finding path from {} to {} max hops {}", fromNodeId, targetNode, maxHops); + final GraphTraversal t = + g.V().has(NODE_ID, fromNodeId) .repeat( __.out(SIMILAR) .simplePath()) .until( __.has(NODE_ID, targetNode) - .and( - __.filter(it -> { -//when the size of the path in the traverser object is six, that means this traverser made 4 hops from the -//fromNode, a total of 5 vertices - return it.path().size() <= 5; - })) + .and(__.filter( maxDepth )) ) .limit(1) .path(); @@ -321,8 +331,8 @@ public void shortestPath(final Vertex fromNode, Integer targetNode) final int pathSize = it.size(); final long elapsed = watch.elapsed(TimeUnit.MILLISECONDS); watch.stop(); - if(elapsed > 200) { //threshold for debugging - LOG.info("from @ " + fromNode.value(NODE_ID) + + if(elapsed > 500) { //threshold for debugging + LOG.warn("from @ " + fromNode.value(NODE_ID) + " to @ " + targetNode.toString() + " took " + elapsed + " ms, " + pathSize + ": " + it.toString()); } diff --git a/src/main/java/eu/socialsensor/insert/InsertionBase.java b/src/main/java/eu/socialsensor/insert/InsertionBase.java index 3ca52c5..9a19eb3 100644 --- a/src/main/java/eu/socialsensor/insert/InsertionBase.java +++ b/src/main/java/eu/socialsensor/insert/InsertionBase.java @@ -121,7 +121,7 @@ public final void createGraph(File datasetFile, int scenarioNumber) } }); post(); - logger.info("Edges: " + i.get()); + logger.trace("Edges: " + i.get()); insertionTimes.add((double) watch.elapsed(TimeUnit.MILLISECONDS)); if (single) diff --git a/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java b/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java index c461f3f..3e05a36 100644 --- a/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java +++ b/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java @@ -50,7 +50,7 @@ public void relateNodes(Vertex src, Vertex dest) @Override protected void post() { - logger.info("vertices: " + vertexCache.size()); + logger.trace("vertices: " + vertexCache.size()); tx.commit(); //mutation work is done here Preconditions.checkState(graph.getOpenTransactions().isEmpty()); } diff --git a/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java b/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java index 1568303..47fc3ed 100644 --- a/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java +++ b/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java @@ -54,6 +54,7 @@ public class BenchmarkConfiguration private static final String PERMUTE_BENCHMARKS = "permute-benchmarks"; private static final String RANDOM_NODES = "shortest-path-random-nodes"; private static final String RANDOM_SEED = "random-seed"; + private static final String MAX_HOPS = "shortest-path-max-hops"; private static final Set metricsReporters = new HashSet(); static { @@ -103,6 +104,7 @@ public class BenchmarkConfiguration private final String dynamodbTablePrefix; private final boolean customIds; private final long tuplMinCacheSize; + private final int shortestPathMaxHops; private final Random random; @@ -179,6 +181,7 @@ public BenchmarkConfiguration(Configuration appconfig) // load the dataset random = new Random(socialsensor.getInt(RANDOM_SEED, 17 /*default*/)); numShortestPathRandomNodes = socialsensor.getInteger(RANDOM_NODES, new Integer(101)); + shortestPathMaxHops = socialsensor.getInteger(MAX_HOPS, 5); DatasetFactory.getInstance().createAndGetDataset(dataset, random, numShortestPathRandomNodes); if (!socialsensor.containsKey(PERMUTE_BENCHMARKS)) @@ -449,4 +452,8 @@ public Random getRandom() { public List getRandomNodeList() { return DatasetFactory.getInstance().getDataset(this.dataset).getRandomNodes(); } + + public int getShortestPathMaxHops() { + return shortestPathMaxHops; + } } diff --git a/src/main/java/eu/socialsensor/utils/Utils.java b/src/main/java/eu/socialsensor/utils/Utils.java index 44de6ff..066a391 100644 --- a/src/main/java/eu/socialsensor/utils/Utils.java +++ b/src/main/java/eu/socialsensor/utils/Utils.java @@ -191,7 +191,8 @@ public static final GraphDatabase createDatabaseInstance(BenchmarkConfi } else if (GraphDatabaseType.NEO4J == type) { - graphDatabase = new Neo4jGraphDatabase(dbStorageDirectory, batchLoading, config.getRandomNodeList()); + graphDatabase = new Neo4jGraphDatabase(dbStorageDirectory, batchLoading, config.getRandomNodeList(), + config.getShortestPathMaxHops()); } else if (GraphDatabaseType.ORIENT_DB == type) { diff --git a/src/test/java/eu/socialsensor/main/GraphDatabaseBenchmarkTest.java b/src/test/java/eu/socialsensor/main/GraphDatabaseBenchmarkTest.java index c79ff99..0be2968 100644 --- a/src/test/java/eu/socialsensor/main/GraphDatabaseBenchmarkTest.java +++ b/src/test/java/eu/socialsensor/main/GraphDatabaseBenchmarkTest.java @@ -9,6 +9,7 @@ public class GraphDatabaseBenchmarkTest public void testGraphDatabaseBenchmark() { GraphDatabaseBenchmark bench = new GraphDatabaseBenchmark(null /* inputPath */); + try { bench.run(); diff --git a/src/test/resources/META-INF/input.properties b/src/test/resources/META-INF/input.properties index 448e9fc..1f90575 100644 --- a/src/test/resources/META-INF/input.properties +++ b/src/test/resources/META-INF/input.properties @@ -1,6 +1,6 @@ # Choose which data sets you want to include in the benchmark by removing the contents. #Enron -eu.socialsensor.dataset=data/Email-Enron.txt +#eu.socialsensor.dataset=data/Email-Enron.txt #Amazon #eu.socialsensor.dataset=data/Amazon0601.txt #YouTube @@ -11,7 +11,7 @@ eu.socialsensor.dataset=data/Email-Enron.txt #eu.socialsensor.actual-communities=com-lj.all.cmty.txt #Synthetic #can change the number in the filename of the synthetic datasets to 1000, 5000, 10000, 20000, 30000, 40000, 50000 -#eu.socialsensor.dataset=data/network10000.dat +eu.socialsensor.dataset=data/network5000.dat #eu.socialsensor.actual-communities=data/community50000.dat eu.socialsensor.database-storage-directory=storage @@ -24,7 +24,7 @@ eu.socialsensor.metrics.csv.directory=metrics # Choose which databases you want to in the benchmark by removing the comments. # Available dbs are: -eu.socialsensor.databases=tbdb +#eu.socialsensor.databases=tbdb eu.socialsensor.databases=ttupl #eu.socialsensor.databases=tddb #eu.socialsensor.databases=tc @@ -40,7 +40,8 @@ eu.socialsensor.titan.custom-ids=true # page-size - Number of results to pull when iterating over a storage backend (default 100) eu.socialsensor.titan.page-size=100 # to disable buffering on mutations, set to zero. Default 1024. This will set the queue size as well -eu.socialsensor.titan.buffer-size=10000 +# use max int as buffer size +eu.socialsensor.titan.buffer-size=2147483647 # id block size default 10000 eu.socialsensor.titan.ids.block-size=10000 # Titan DynamoDB options @@ -82,8 +83,9 @@ eu.socialsensor.benchmarks=MASSIVE_INSERTION #eu.socialsensor.benchmarks=SINGLE_INSERTION eu.socialsensor.benchmarks=FIND_NEIGHBOURS eu.socialsensor.benchmarks=FIND_ADJACENT_NODES -#eu.socialsensor.benchmarks=FIND_SHORTEST_PATH -eu.socialsensor.shortest-path-random-nodes=100 +eu.socialsensor.benchmarks=FIND_SHORTEST_PATH +eu.socialsensor.shortest-path-random-nodes=101 +eu.socialsensor.shortest-path-max-hops=5 # The clustering benchmark is not permutable even if eu.socialsensor.permute-benchmarks=true #eu.socialsensor.benchmarks=CLUSTERING diff --git a/src/test/resources/META-INF/log4j.properties b/src/test/resources/META-INF/log4j.properties deleted file mode 100644 index 52b7745..0000000 --- a/src/test/resources/META-INF/log4j.properties +++ /dev/null @@ -1,9 +0,0 @@ -log4j.rootLogger=info, stdout -#log4j.logger.com.amazon.titan=trace -#log4j.logger.com.thinkaurelius=debug -log4j.logger.com.amazonaws=off -log4j.appender.stdout=org.apache.logging.log4j.core.appender.ConsoleAppender -log4j.appender.stdout.layout=org.apache.logging.log4j.core.layout.PatternLayout - -# Pattern to output the caller's file name and line number. -log4j.appender.stdout.layout.ConversionPattern=%d (%t) [%5p] (%F:%L) - %m%n diff --git a/src/test/resources/META-INF/log4j2.xml b/src/test/resources/META-INF/log4j2.xml index 37eb594..3528d61 100644 --- a/src/test/resources/META-INF/log4j2.xml +++ b/src/test/resources/META-INF/log4j2.xml @@ -1,13 +1,17 @@ - - - - - - - - - - - + + + + + + + + + + + + + + + From e08b4bd12cb86f96c911222cfa2ba345786fa640 Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Sat, 9 Apr 2016 10:07:24 +0900 Subject: [PATCH 23/26] started adding shortest path results; updated some MIW --- README.md | 91 +++++++++++++++---- .../graphdatabases/TitanGraphDatabase.java | 2 +- 2 files changed, 72 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index c889311..c282262 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,71 @@ graphdb-benchmarks ================== -The project graphdb-benchmarks is a benchmark between popular graph databases. Currently the framework supports [Titan](http://thinkaurelius.github.io/titan/), [OrientDB](http://www.orientechnologies.com/orientdb/), and [Neo4j](http://neo4j.com/). The purpose of this benchmark is to examine the performance of each graph database in terms of execution time. The benchmark is composed of four workloads, Clustering, Massive Insertion, Single Insertion and Query Workload. Every workload has been designed to simulate common operations in graph database systems. - -- *Clustering Workload (CW)*: CW consists of a well-known community detection algorithm for modularity optimization, the Louvain Method. We adapt the algorithm on top of the benchmarked graph databases and employ cache techniques to take advantage of both graph database capabilities and in-memory execution speed. We measure the time the algorithm needs to converge. -- *Massive Insertion Workload (MIW)*: we create the graph database and configure it for massive loading, then we populate it with a particular data set. We measure the time for the creation of the whole graph. -- *Single Insertion Workload (SIW)*: we create the graph database and load it with a particular data set. Every object insertion (node or edge) is committed directly and the graph is constructed incrementally. We measure the insertion time per block, which consists of one thousand edges and the nodes that appear during the insertion of these edges. +The project graphdb-benchmarks is a benchmark between popular graph databases. +Currently the framework supports [Titan](http://thinkaurelius.github.io/titan/), +[OrientDB](http://www.orientechnologies.com/orientdb/), and [Neo4j](http://neo4j.com/). +The purpose of this benchmark is to examine the performance of each graph database in +terms of execution time. The benchmark is composed of four workloads, Clustering, Massive +Insertion, Single Insertion and Query Workload. Every workload has been designed to +simulate common operations in graph database systems. + +- *Clustering Workload (CW)*: CW consists of a well-known community detection algorithm +for modularity optimization, the Louvain Method. We adapt the algorithm on top of the +benchmarked graph databases and employ cache techniques to take advantage of both graph +database capabilities and in-memory execution speed. We measure the time the algorithm +needs to converge. +- *Massive Insertion Workload (MIW)*: we create the graph database and configure it for +massive loading, then we populate it with a particular data set. We measure the time for +the creation of the whole graph. +- *Single Insertion Workload (SIW)*: we create the graph database and load it with a +particular data set. Every object insertion (node or edge) is committed directly and +the graph is constructed incrementally. We measure the insertion time per block, which +consists of one thousand edges and the nodes that appear during the insertion of these +edges. - *Query Workload (QW)*: we execute three common queries: * FindNeighbours (FN): finds the neighbors of all nodes. * FindAdjacentNodes (FA): finds the adjacent nodes of all edges. - * FindShortestPath (FS): finds the shortest path between the first node and 100 randomly picked nodes. + * FindShortestPath (FS): finds the shortest path between a random node and 100 other random nodes. Here we measure the execution time of each query. -For our evaluation we use both synthetic and real data. More specifically, we execute MIW, SIW and QW with real data derived from the SNAP data set collection ([Enron data set](http://snap.stanford.edu/data/email-Enron.html), [Amazon data set](http://snap.stanford.edu/data/amazon0601.html), [Youtube data set](http://snap.stanford.edu/data/com-Youtube.html) and [LiveJournal data set](http://snap.stanford.edu/data/com-LiveJournal.html)). On the other hand, with the CW we use synthetic data generated with the [LFR-Benchmark generator](https://sites.google.com/site/andrealancichinetti/files) which produces networks with power-law degree distribution and implanted communities within the network. The synthetic data can be downloaded from [here](http://figshare.com/articles/Synthetic_Data_for_graphdb_benchmark/1221760). - -For further information about the study please refer to the [published paper](http://link.springer.com/chapter/10.1007/978-3-319-10518-5_1) on Springer site and the presentation on [Slideshare](http://www.slideshare.net/sympapadopoulos/adbis2014-presentation). - -**Note 1:** The published paper contains the experimental study of Titan, OrientDB and Neo4j. After the publication we included the Sparksee graph database. Sparksee does not implement TinkerPop 3 yet. - -**Note 2:** After the very useful comments and contributions of OrientDB developers, we updated the benchmark implementations and re-run the experiments. We have updated the initial presentation with the new results and uploaded a new version of the paper in the following [link](http://mklab.iti.gr/files/beis_adbis2014_corrected.pdf). - -**Note 3:** Alexander Patrikalakis, a software developer at Amazon Web Services, refactored the benchmark, added support for Apache TinkerPop 3 and added support for the DynamoDB Storage Backend for Titan. +For our evaluation we use both synthetic and real data. More specifically, we execute +MIW, SIW and QW with real data derived from the SNAP data set collection +([Enron data set](http://snap.stanford.edu/data/email-Enron.html), +[Amazon data set](http://snap.stanford.edu/data/amazon0601.html), +[Youtube data set](http://snap.stanford.edu/data/com-Youtube.html) and +[LiveJournal data set](http://snap.stanford.edu/data/com-LiveJournal.html)). On the +other hand, with the CW we use synthetic data generated with the +[LFR-Benchmark generator](https://sites.google.com/site/andrealancichinetti/files) which +produces networks with power-law degree distribution and implanted communities within the +network. The synthetic data can be downloaded from +[here](http://figshare.com/articles/Synthetic_Data_for_graphdb_benchmark/1221760). + +For further information about the study please refer to the +[published paper](http://link.springer.com/chapter/10.1007/978-3-319-10518-5_1) on +Springer site and the presentation on +[Slideshare](http://www.slideshare.net/sympapadopoulos/adbis2014-presentation). + +**Note 1:** The published paper contains the experimental study of Titan, OrientDB and Neo4j. +After the publication we included the Sparksee graph database. Sparksee does not implement TinkerPop 3 yet. + +**Note 2:** After the very useful comments and contributions of OrientDB developers, we +updated the benchmark implementations and re-run the experiments. We have updated the +initial presentation with the new results and uploaded a new version of the paper in the +following [link](http://mklab.iti.gr/files/beis_adbis2014_corrected.pdf). + +**Note 3:** Alexander Patrikalakis, a software developer at Amazon Web Services, refactored +the benchmark, added support for Apache TinkerPop 3 and added support for the DynamoDB Storage +Backend for Titan. He also added support for the Tupl Storage Backend for Titan. Instructions ------------ -To run the project at first you have to choose one of the aforementioned data sets. Of course you can select any data set, but because there is not any utility class to convert the data set in the appropriate format (for now), the format of the data must be identical with the tested data sets. The input parameters are configured from the src/test/resources/input.properties file. Please follow the instructions in this file to select the correct parameters. Then, run `mvn install && mvn test -Pbench` to execute the benchmarking run. +To run the project at first you have to choose one of the aforementioned data sets. Of +course you can select any data set, but because there is not any utility class to +convert the data set in the appropriate format (for now), the format of the data must +be identical with the tested data sets. The input parameters are configured from the +src/test/resources/input.properties file. Please follow the instructions in this file +to select the correct parameters. Then, run `mvn install && mvn test -Pbench` to execute the +benchmarking run. Results ------- @@ -112,11 +153,21 @@ The results are measured in seconds. | 50k | QW-FN | 41.175 | 14.742 | **8.489** | | AM | QW-FN | 76.562 | 28.242 | **12.466** | | | | | | | -| 1k | MIW | 1.167 | 0.673 | **0.481** | -| 5k | MIW | 4.276 | 2.918 | **1.239** | -| 10k | MIW | 8.247 | 5.659 | **2.334** | +| 1k | QW-FS | 2.932 | 2.555 | | +| 5k | QW-FS | 18.743 | 17.995 | | +| 10k | QW-FS | 31.006 | 30.289 | | +| EN | QW-FS | | | | +| 20k | QW-FS | 122.864 | 122.204 | | +| 30k | QW-FS | 21.816 | 8.340 | | +| 40k | QW-FS | 31.187 | 11.632 | | +| 50k | QW-FS | 41.175 | 14.742 | | +| AM | QW-FS | 76.562 | 28.242 | | +| | | | | | +| 1k | MIW | 1.204 | 0.696 | **0.481** | +| 5k | MIW | 4.293 | 2.755 | **1.239** | +| 10k | MIW | 8.291 | 5.707 | **2.334** | | EN | MIW | 9.858 | 6.960 | **2.401** | -| 20k | MIW | 17.011 | 12.711 | **4.511** | +| 20k | MIW | 16.872 | 11.829 | **4.511** | | 30k | MIW | 30.252 | 19.929 | **8.767** | | 40k | MIW | 44.450 | 31.763 | **12.761** | | 50k | MIW | 57.001 | 35.008 | **15.755** | diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index 039c273..4826a61 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -331,7 +331,7 @@ public void shortestPath(final Vertex fromNode, Integer targetNode) final int pathSize = it.size(); final long elapsed = watch.elapsed(TimeUnit.MILLISECONDS); watch.stop(); - if(elapsed > 500) { //threshold for debugging + if(elapsed > 2000) { //threshold for debugging LOG.warn("from @ " + fromNode.value(NODE_ID) + " to @ " + targetNode.toString() + " took " + elapsed + " ms, " + pathSize + ": " + it.toString()); From 44d95b04e7d7be691e3f26583e1eb8d4f4b37f5e Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Sat, 9 Apr 2016 11:32:55 +0900 Subject: [PATCH 24/26] updated synthetic dataset QW-FS numbers, implemented neo shortest path --- README.md | 12 ++-- .../graphdatabases/DepthPredicate.java | 28 +++++++++ .../graphdatabases/Neo4jGraphDatabase.java | 58 +++++++++++++------ .../graphdatabases/TitanGraphDatabase.java | 22 ------- 4 files changed, 74 insertions(+), 46 deletions(-) create mode 100644 src/main/java/eu/socialsensor/graphdatabases/DepthPredicate.java diff --git a/README.md b/README.md index c282262..d39b4c6 100644 --- a/README.md +++ b/README.md @@ -158,18 +158,18 @@ The results are measured in seconds. | 10k | QW-FS | 31.006 | 30.289 | | | EN | QW-FS | | | | | 20k | QW-FS | 122.864 | 122.204 | | -| 30k | QW-FS | 21.816 | 8.340 | | -| 40k | QW-FS | 31.187 | 11.632 | | -| 50k | QW-FS | 41.175 | 14.742 | | -| AM | QW-FS | 76.562 | 28.242 | | +| 30k | QW-FS | 136.276 | 124.886 | | +| 40k | QW-FS | 276.389 | 261.699 | | +| 50k | QW-FS | 339.146 | 310.307 | | +| AM | QW-FS | | | | | | | | | | | 1k | MIW | 1.204 | 0.696 | **0.481** | | 5k | MIW | 4.293 | 2.755 | **1.239** | | 10k | MIW | 8.291 | 5.707 | **2.334** | | EN | MIW | 9.858 | 6.960 | **2.401** | | 20k | MIW | 16.872 | 11.829 | **4.511** | -| 30k | MIW | 30.252 | 19.929 | **8.767** | -| 40k | MIW | 44.450 | 31.763 | **12.761** | +| 30k | MIW | 29.851 | 20.081 | **8.767** | +| 40k | MIW | 44.257 | 34.078 | **12.761** | | 50k | MIW | 57.001 | 35.008 | **15.755** | | AM | MIW | 98.405 | 64.286 | **23.867** | diff --git a/src/main/java/eu/socialsensor/graphdatabases/DepthPredicate.java b/src/main/java/eu/socialsensor/graphdatabases/DepthPredicate.java new file mode 100644 index 0000000..bb71946 --- /dev/null +++ b/src/main/java/eu/socialsensor/graphdatabases/DepthPredicate.java @@ -0,0 +1,28 @@ +package eu.socialsensor.graphdatabases; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.tinkerpop.gremlin.process.traversal.Traverser; +import org.apache.tinkerpop.gremlin.structure.T; + +import java.util.function.Predicate; + +/** + * Depth predicate for shortest path + * + * @author Alexander Patrikalakis + */ +public class DepthPredicate implements Predicate> { + private static final Logger LOG = LogManager.getLogger(); + private final int hops; + + public DepthPredicate(int hops) { + this.hops = hops; + } + + @Override + public boolean test(Traverser it) { + LOG.trace("testing {}", it.path()); + return it.path().size() <= hops; + } +} diff --git a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java index 075d7ae..8327b80 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java @@ -10,23 +10,23 @@ import eu.socialsensor.main.GraphDatabaseType; import eu.socialsensor.utils.Utils; -import org.neo4j.graphalgo.GraphAlgoFactory; -import org.neo4j.graphalgo.PathFinder; +import org.apache.tinkerpop.gremlin.neo4j.structure.Neo4jGraph; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; import org.neo4j.graphdb.Direction; import org.neo4j.graphdb.DynamicLabel; import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.graphdb.Label; import org.neo4j.graphdb.Node; -import org.neo4j.graphdb.Path; -import org.neo4j.graphdb.PathExpanders; import org.neo4j.graphdb.Relationship; import org.neo4j.graphdb.RelationshipType; import org.neo4j.graphdb.ResourceIterator; import org.neo4j.graphdb.Transaction; -import org.neo4j.graphdb.factory.GraphDatabaseFactory; import org.neo4j.graphdb.schema.IndexDefinition; import org.neo4j.graphdb.schema.Schema; import org.neo4j.helpers.collection.IteratorUtil; +import org.neo4j.tinkerpop.api.impl.Neo4jGraphAPIImpl; import org.neo4j.tooling.GlobalGraphOperations; import org.neo4j.unsafe.batchinsert.BatchInserter; import org.neo4j.unsafe.batchinsert.BatchInserters; @@ -51,11 +51,12 @@ public class Neo4jGraphDatabase extends GraphDatabaseBase, Iterator, Node, Relationship> { private final GraphDatabaseService neo4jGraph; + private final Neo4jGraph neo4jTp; private final Schema schema; private BatchInserter inserter = null; - public static enum RelTypes implements RelationshipType + public enum RelTypes implements RelationshipType { SIMILAR } @@ -68,6 +69,7 @@ public Neo4jGraphDatabase(File dbStorageDirectoryIn, boolean batchLoading, List< if(batchLoading) { neo4jGraph = null; + neo4jTp = null; schema = null; Map config = new HashMap(); @@ -88,7 +90,8 @@ public Neo4jGraphDatabase(File dbStorageDirectoryIn, boolean batchLoading, List< inserter.createDeferredSchemaIndex(NODE_LABEL).on(COMMUNITY).create(); inserter.createDeferredSchemaIndex(NODE_LABEL).on(NODE_COMMUNITY).create(); } else { - neo4jGraph = new GraphDatabaseFactory().newEmbeddedDatabase(dbStorageDirectory); + neo4jTp = Neo4jGraph.open(dbStorageDirectory.getAbsolutePath()); + neo4jGraph = ((Neo4jGraphAPIImpl) neo4jTp.getBaseGraph()).getGraphDatabase(); try (final Transaction tx = neo4jGraph.beginTx()) { schema = neo4jGraph.schema(); @@ -206,17 +209,36 @@ public void findAllNodeNeighbours() { @Override public void shortestPath(Node n1, Integer i) { - PathFinder finder - = GraphAlgoFactory.shortestPath(PathExpanders.forType(Neo4jGraphDatabase.RelTypes.SIMILAR), maxHops); - Node n2 = getVertex(i); - Path path = finder.findSinglePath(n1, n2); - - @SuppressWarnings("unused") - int length = 0; - if (path != null) - { - length = path.length(); - } +// PathFinder finder +// = GraphAlgoFactory.shortestPath(PathExpanders.forType(Neo4jGraphDatabase.RelTypes.SIMILAR), maxHops); +// Node n2 = getVertex(i); +// Path path = finder.findSinglePath(n1, n2); +// +// @SuppressWarnings("unused") +// int length = 0; +// if (path != null) +// { +// length = path.length(); +// } + final GraphTraversalSource g = neo4jTp.traversal(); + final DepthPredicate maxDepth = new DepthPredicate(maxHops); + final Integer fromNodeId = (Integer) n1.getProperty(NODE_ID); + final GraphTraversal t = + g.V().has(NODE_ID, fromNodeId) + .repeat( + __.out(SIMILAR) + .simplePath()) + .until( + __.has(NODE_ID, i) + .and(__.filter( maxDepth )) + ) + .limit(1) + .path(); + + t.tryNext() + .ifPresent( it -> { + final int pathSize = it.size(); + }); } @Override diff --git a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java index 4826a61..bc754df 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/TitanGraphDatabase.java @@ -10,7 +10,6 @@ import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; -import java.util.function.Predicate; import com.google.common.base.Stopwatch; import org.apache.commons.configuration.Configuration; @@ -18,7 +17,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.tinkerpop.gremlin.process.traversal.Path; -import org.apache.tinkerpop.gremlin.process.traversal.Traverser; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; @@ -288,29 +286,11 @@ public void shutdownMassiveGraph() shutdown(); } - public class DepthPredicate implements Predicate> { - private final int hops; - public DepthPredicate(int hops) { - this.hops = hops; - } - - @Override - public boolean test(Traverser it) { - LOG.trace("testing {}", it.path()); - return it.path().size() <= hops; - } - } - @Override public void shortestPath(final Vertex fromNode, Integer targetNode) { final GraphTraversalSource g = graph.traversal(); final Stopwatch watch = Stopwatch.createStarted(); - // repeat the contained traversal - // map from this vertex to inV on SIMILAR edges without looping - // until you map to the target toNode and the path is six vertices long or less - // only return one path -//g.V().has("nodeId", 775).repeat(out('similar').simplePath()).until(has('nodeId', 990).and().filter {it.path().size() <= 5}).limit(1).path().by('nodeId') final DepthPredicate maxDepth = new DepthPredicate(maxHops); final Integer fromNodeId = fromNode.value(NODE_ID); LOG.trace("finding path from {} to {} max hops {}", fromNodeId, targetNode, maxHops); @@ -337,8 +317,6 @@ public void shortestPath(final Vertex fromNode, Integer targetNode) " took " + elapsed + " ms, " + pathSize + ": " + it.toString()); } }); - - } @Override From 5180eb45168ae3caa716e548f10311ed8200cd08 Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Sat, 9 Apr 2016 11:59:42 +0900 Subject: [PATCH 25/26] some finer neo4j transaction handling --- .../graphdatabases/Neo4jGraphDatabase.java | 50 +++++++++++++------ src/test/resources/META-INF/input.properties | 4 +- 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java index 8327b80..7e96c5d 100644 --- a/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java +++ b/src/main/java/eu/socialsensor/graphdatabases/Neo4jGraphDatabase.java @@ -23,9 +23,11 @@ import org.neo4j.graphdb.RelationshipType; import org.neo4j.graphdb.ResourceIterator; import org.neo4j.graphdb.Transaction; +import org.neo4j.graphdb.schema.IndexCreator; import org.neo4j.graphdb.schema.IndexDefinition; import org.neo4j.graphdb.schema.Schema; import org.neo4j.helpers.collection.IteratorUtil; +import org.neo4j.kernel.api.exceptions.index.ExceptionDuringFlipKernelException; import org.neo4j.tinkerpop.api.impl.Neo4jGraphAPIImpl; import org.neo4j.tooling.GlobalGraphOperations; import org.neo4j.unsafe.batchinsert.BatchInserter; @@ -53,8 +55,7 @@ public class Neo4jGraphDatabase extends GraphDatabaseBase, Iterat private final GraphDatabaseService neo4jGraph; private final Neo4jGraph neo4jTp; private final Schema schema; - - private BatchInserter inserter = null; + private final BatchInserter inserter; public enum RelTypes implements RelationshipType { @@ -66,7 +67,6 @@ public enum RelTypes implements RelationshipType public Neo4jGraphDatabase(File dbStorageDirectoryIn, boolean batchLoading, List randomNodes, int shortestPathMaxHops) { super(GraphDatabaseType.NEO4J, dbStorageDirectoryIn, randomNodes, shortestPathMaxHops); - if(batchLoading) { neo4jGraph = null; neo4jTp = null; @@ -90,6 +90,7 @@ public Neo4jGraphDatabase(File dbStorageDirectoryIn, boolean batchLoading, List< inserter.createDeferredSchemaIndex(NODE_LABEL).on(COMMUNITY).create(); inserter.createDeferredSchemaIndex(NODE_LABEL).on(NODE_COMMUNITY).create(); } else { + inserter = null; neo4jTp = Neo4jGraph.open(dbStorageDirectory.getAbsolutePath()); neo4jGraph = ((Neo4jGraphAPIImpl) neo4jTp.getBaseGraph()).getGraphDatabase(); try (final Transaction tx = neo4jGraph.beginTx()) @@ -107,8 +108,6 @@ public Neo4jGraphDatabase(File dbStorageDirectoryIn, boolean batchLoading, List< schema.awaitIndexesOnline(10l, TimeUnit.MINUTES); tx.success(); } - - inserter = null; } } @@ -181,28 +180,41 @@ public void shutdownMassiveGraph() { throw new BenchmarkingException("could not remove lock"); } - - inserter = null; } @Override public void shortestPaths() { - try (Transaction tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx()) { - super.shortestPaths(); + try (Transaction tx = neo4jGraph.beginTx()) { + try { + super.shortestPaths(); + tx.success(); + } catch(Exception e) { + tx.failure(); + } } } @Override public void findNodesOfAllEdges() { - try (Transaction tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx()) { - super.findNodesOfAllEdges(); + try (Transaction tx = neo4jGraph.beginTx()) { + try { + super.findNodesOfAllEdges(); + tx.success(); + } catch(Exception e) { + tx.failure(); + } } } @Override public void findAllNodeNeighbours() { - try (Transaction tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx()) { - super.findAllNodeNeighbours(); + try (Transaction tx = neo4jGraph.beginTx()) { + try{ + super.findAllNodeNeighbours(); + tx.success(); + } catch(Exception e) { + tx.failure(); + } } } @@ -793,8 +805,16 @@ public Node nextVertex(Iterator it) @Override public Node getVertex(Integer i) { - // TODO(amcp) check, this probably should be run in the context of an active transaction. - return neo4jGraph.findNodes(Neo4jGraphDatabase.NODE_LABEL, NODE_ID, i).next(); + Node result = null; + try (final Transaction tx = neo4jGraph.beginTx()) { + try { + result = neo4jGraph.findNodes(Neo4jGraphDatabase.NODE_LABEL, NODE_ID, i).next(); + tx.success(); + } catch(Exception e) { + tx.failure(); + } + } + return result; } } diff --git a/src/test/resources/META-INF/input.properties b/src/test/resources/META-INF/input.properties index 1f90575..5aaced6 100644 --- a/src/test/resources/META-INF/input.properties +++ b/src/test/resources/META-INF/input.properties @@ -11,7 +11,7 @@ #eu.socialsensor.actual-communities=com-lj.all.cmty.txt #Synthetic #can change the number in the filename of the synthetic datasets to 1000, 5000, 10000, 20000, 30000, 40000, 50000 -eu.socialsensor.dataset=data/network5000.dat +eu.socialsensor.dataset=data/network1000.dat #eu.socialsensor.actual-communities=data/community50000.dat eu.socialsensor.database-storage-directory=storage @@ -24,7 +24,7 @@ eu.socialsensor.metrics.csv.directory=metrics # Choose which databases you want to in the benchmark by removing the comments. # Available dbs are: -#eu.socialsensor.databases=tbdb +eu.socialsensor.databases=tbdb eu.socialsensor.databases=ttupl #eu.socialsensor.databases=tddb #eu.socialsensor.databases=tc From c3cf59502bf2f6c51daff8d5a850bdb582629ff7 Mon Sep 17 00:00:00 2001 From: Alexander Patrikalakis Date: Sat, 9 Apr 2016 12:33:34 +0900 Subject: [PATCH 26/26] only accept cache percentages for clustering analysis --- .../benchmarks/ClusteringBenchmark.java | 28 +++------- .../eu/socialsensor/clustering/Cache.java | 4 +- .../clustering/LouvainMethod.java | 26 ++++----- .../main/BenchmarkConfiguration.java | 54 ++++--------------- src/test/resources/META-INF/input.properties | 14 ++--- 5 files changed, 41 insertions(+), 85 deletions(-) diff --git a/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java b/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java index efbc41e..208f428 100644 --- a/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java +++ b/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java @@ -35,24 +35,12 @@ public class ClusteringBenchmark extends BenchmarkBase implements RequiresGraphData { private static final Logger LOG = LogManager.getLogger(); - private final List cacheValues; + private final List cachePercentages; public ClusteringBenchmark(BenchmarkConfiguration config) { super(config, BenchmarkType.CLUSTERING); - this.cacheValues = new ArrayList(); - if (config.getCacheValues() == null) - { - int cacheValueMultiplier = (int) config.getCacheIncrementFactor().intValue() * config.getNodesCount(); - for (int i = 1; i <= config.getCacheValuesCount(); i++) - { - cacheValues.add(i * cacheValueMultiplier); - } - } - else - { - cacheValues.addAll(config.getCacheValues()); - } + this.cachePercentages = new ArrayList(config.getCachePercentages()); } @Override @@ -96,23 +84,23 @@ private SortedMap clusteringBenchmark(GraphDatabaseType type) t GraphDatabase graphDatabase = Utils.createDatabaseInstance(bench, type, false /*batchLoading*/); SortedMap timeMap = new TreeMap(); - for (int cacheSize : cacheValues) + for (int cachePercentage : cachePercentages) { LOG.info("Graph Database: " + type.getShortname() + ", Dataset: " + bench.getDataset().getName() - + ", Cache Size: " + cacheSize); + + ", Cache Size: " + cachePercentage); Stopwatch watch = Stopwatch.createStarted(); - LouvainMethod louvainMethodCache = new LouvainMethod(graphDatabase, cacheSize, + LouvainMethod louvainMethodCache = new LouvainMethod(graphDatabase, cachePercentage, bench.randomizedClustering() ? bench.getRandom() : null); louvainMethodCache.computeModularity(); - timeMap.put(cacheSize, watch.elapsed(TimeUnit.MILLISECONDS) / 1000.0); + timeMap.put(cachePercentage, watch.elapsed(TimeUnit.MILLISECONDS) / 1000.0); // evaluation with NMI - Map> predictedCommunities = graphDatabase.mapCommunities(louvainMethodCache.getN()); + Map> predictedCommunities = graphDatabase.mapCommunities(louvainMethodCache.getNodeCount()); Map> actualCommunities = mapNodesToCommunities(Utils.readTabulatedLines( bench.getActualCommunitiesFile(), 4 /* numberOfLinesToSkip */)); Metrics metrics = new Metrics(); - double NMI = metrics.normalizedMutualInformation(bench.getNodesCount(), actualCommunities, + double NMI = metrics.normalizedMutualInformation(louvainMethodCache.getNodeCount(), actualCommunities, predictedCommunities); LOG.info("NMI value: " + NMI); } diff --git a/src/main/java/eu/socialsensor/clustering/Cache.java b/src/main/java/eu/socialsensor/clustering/Cache.java index 5f76615..51ad50c 100644 --- a/src/main/java/eu/socialsensor/clustering/Cache.java +++ b/src/main/java/eu/socialsensor/clustering/Cache.java @@ -36,8 +36,10 @@ public class Cache LoadingCache nodeToCommunityMap; // key=nodeId // value=communityId - public Cache(final GraphDatabase graphDatabase, int cacheSize) throws ExecutionException + public Cache(final GraphDatabase graphDatabase, int cachePercentage, int nodeCount) throws ExecutionException { + final int cacheSize = Math.max(0, Math.min(nodeCount, + Math.round(((float) cachePercentage) / 100.0f * nodeCount))); nodeNeighbours = CacheBuilder.newBuilder().maximumSize(cacheSize) .build(new CacheLoader>() { public Set load(Integer nodeId) diff --git a/src/main/java/eu/socialsensor/clustering/LouvainMethod.java b/src/main/java/eu/socialsensor/clustering/LouvainMethod.java index a05a6da..fd5ea51 100644 --- a/src/main/java/eu/socialsensor/clustering/LouvainMethod.java +++ b/src/main/java/eu/socialsensor/clustering/LouvainMethod.java @@ -20,29 +20,29 @@ public class LouvainMethod private final Random random; private double resolution = 1.0; private double graphWeightSum; - private int N; + private int nodeCount; private List communityWeights; private boolean communityUpdate = false; GraphDatabase graphDatabase; Cache cache; - public LouvainMethod(GraphDatabase graphDatabase, int cacheSize, Random random) throws ExecutionException + public LouvainMethod(GraphDatabase graphDatabase, int cachePercentage, Random random) throws ExecutionException { this.graphDatabase = graphDatabase; this.random = random; initialize(); - cache = new Cache(graphDatabase, cacheSize); + cache = new Cache(graphDatabase, cachePercentage, nodeCount); } private void initialize() { - this.N = this.graphDatabase.getNodeCount();// this step takes a long + this.nodeCount = this.graphDatabase.getNodeCount();// this step takes a long // time on dynamodb. this.graphWeightSum = this.graphDatabase.getGraphWeightSum() / 2; - this.communityWeights = new ArrayList(this.N); - for (int i = 0; i < this.N; i++) + this.communityWeights = new ArrayList(this.nodeCount); + for (int i = 0; i < this.nodeCount; i++) { this.communityWeights.add(0.0); } @@ -63,10 +63,10 @@ public void computeModularity() throws ExecutionException int start = 0; if (null != this.random) { - start = Math.abs(random.nextInt()) % this.N; + start = Math.abs(random.nextInt()) % this.nodeCount; } int step = 0; - for (int i = start; step < this.N; i = (i + 1) % this.N) + for (int i = start; step < this.nodeCount; i = (i + 1) % this.nodeCount) { step++; int bestCommunity = updateBestCommunity(i); @@ -136,18 +136,18 @@ private double q(int nodeCommunity, int community) throws ExecutionException public void zoomOut() { - this.N = this.graphDatabase.reInitializeCommunities(); + this.nodeCount = this.graphDatabase.reInitializeCommunities(); this.cache.reInitializeCommunities(); - this.communityWeights = new ArrayList(this.N); - for (int i = 0; i < this.N; i++) + this.communityWeights = new ArrayList(this.nodeCount); + for (int i = 0; i < this.nodeCount; i++) { this.communityWeights.add(graphDatabase.getCommunityWeight(i)); } } - public int getN() + public int getNodeCount() { - return this.N; + return this.nodeCount; } } \ No newline at end of file diff --git a/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java b/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java index 47fc3ed..2f64191 100644 --- a/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java +++ b/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java @@ -46,11 +46,8 @@ public class BenchmarkConfiguration private static final String DATASET = "dataset"; private static final String DATABASE_STORAGE_DIRECTORY = "database-storage-directory"; private static final String ACTUAL_COMMUNITIES = "actual-communities"; - private static final String NODES_COUNT = "nodes-count"; private static final String RANDOMIZE_CLUSTERING = "randomize-clustering"; - private static final String CACHE_VALUES = "cache-values"; - private static final String CACHE_INCREMENT_FACTOR = "cache-increment-factor"; - private static final String CACHE_VALUES_COUNT = "cache-values-count"; + private static final String CACHE_PERCENTAGES = "cache-percentages"; private static final String PERMUTE_BENCHMARKS = "permute-benchmarks"; private static final String RANDOM_NODES = "shortest-path-random-nodes"; private static final String RANDOM_SEED = "random-seed"; @@ -86,10 +83,9 @@ public class BenchmarkConfiguration // clustering private final Boolean randomizedClustering; - private final Integer nodesCount; private final Integer cacheValuesCount; private final Double cacheIncrementFactor; - private final List cacheValues; + private final List cachePercentages; private final File actualCommunities; private final boolean permuteBenchmarks; private final int scenarios; @@ -221,12 +217,6 @@ public BenchmarkConfiguration(Configuration appconfig) if (this.benchmarkTypes.contains(BenchmarkType.CLUSTERING)) { - if (!socialsensor.containsKey(NODES_COUNT)) - { - throw new IllegalArgumentException("the CW benchmark requires nodes-count integer in config"); - } - nodesCount = socialsensor.getInt(NODES_COUNT); - if (!socialsensor.containsKey(RANDOMIZE_CLUSTERING)) { throw new IllegalArgumentException("the CW benchmark requires randomize-clustering bool in config"); @@ -239,49 +229,30 @@ public BenchmarkConfiguration(Configuration appconfig) } actualCommunities = validateReadableFile(socialsensor.getString(ACTUAL_COMMUNITIES), ACTUAL_COMMUNITIES); - final boolean notGenerating = socialsensor.containsKey(CACHE_VALUES); + final boolean notGenerating = socialsensor.containsKey(CACHE_PERCENTAGES); if (notGenerating) { - List objects = socialsensor.getList(CACHE_VALUES); - cacheValues = new ArrayList(objects.size()); + List objects = socialsensor.getList(CACHE_PERCENTAGES); + cachePercentages = new ArrayList(objects.size()); cacheValuesCount = null; cacheIncrementFactor = null; for (Object o : objects) { - cacheValues.add(Integer.valueOf(o.toString())); + cachePercentages.add(Integer.valueOf(o.toString())); } } - else if (socialsensor.containsKey(CACHE_VALUES_COUNT) && socialsensor.containsKey(CACHE_INCREMENT_FACTOR)) - { - cacheValues = null; - // generate the cache values with parameters - if (!socialsensor.containsKey(CACHE_VALUES_COUNT)) - { - throw new IllegalArgumentException( - "the CW benchmark requires cache-values-count int in config when cache-values not specified"); - } - cacheValuesCount = socialsensor.getInt(CACHE_VALUES_COUNT); - - if (!socialsensor.containsKey(CACHE_INCREMENT_FACTOR)) - { - throw new IllegalArgumentException( - "the CW benchmark requires cache-increment-factor int in config when cache-values not specified"); - } - cacheIncrementFactor = socialsensor.getDouble(CACHE_INCREMENT_FACTOR); - } else { throw new IllegalArgumentException( - "when doing CW benchmark, must provide cache-values or parameters to generate them"); + "when doing CW benchmark, must provide cache-percentages"); } } else { randomizedClustering = null; - nodesCount = null; cacheValuesCount = null; cacheIncrementFactor = null; - cacheValues = null; + cachePercentages = null; actualCommunities = null; } } @@ -331,11 +302,6 @@ public Boolean randomizedClustering() return randomizedClustering; } - public Integer getNodesCount() - { - return nodesCount; - } - public Integer getCacheValuesCount() { return cacheValuesCount; @@ -346,9 +312,9 @@ public Double getCacheIncrementFactor() return cacheIncrementFactor; } - public List getCacheValues() + public List getCachePercentages() { - return cacheValues; + return cachePercentages; } public File getActualCommunitiesFile() diff --git a/src/test/resources/META-INF/input.properties b/src/test/resources/META-INF/input.properties index 5aaced6..44801df 100644 --- a/src/test/resources/META-INF/input.properties +++ b/src/test/resources/META-INF/input.properties @@ -12,7 +12,7 @@ #Synthetic #can change the number in the filename of the synthetic datasets to 1000, 5000, 10000, 20000, 30000, 40000, 50000 eu.socialsensor.dataset=data/network1000.dat -#eu.socialsensor.actual-communities=data/community50000.dat +#eu.socialsensor.actual-communities=data/community1000.dat eu.socialsensor.database-storage-directory=storage # Sample meters this frequently (milliseconds) @@ -93,12 +93,12 @@ eu.socialsensor.randomize-clustering=false eu.socialsensor.nodes-count=1000 # Choose the cache values you want run the CW benchmark, or have them generated. To choose: -eu.socialsensor.cache-values=25 -eu.socialsensor.cache-values=50 -eu.socialsensor.cache-values=75 -eu.socialsensor.cache-values=100 -eu.socialsensor.cache-values=125 -eu.socialsensor.cache-values=150 +eu.socialsensor.cache-percentages=5 +eu.socialsensor.cache-percentages=10 +eu.socialsensor.cache-percentages=15 +eu.socialsensor.cache-percentages=20 +eu.socialsensor.cache-percentages=25 +eu.socialsensor.cache-percentages=30 # To have the cache values generated for the CW benchmark. #eu.socialsensor.cache-increment-factor=1