You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by ok...@apache.org on 2016/01/07 17:43:12 UTC

incubator-tinkerpop git commit: added a Storage test case to ensure that residual data for Persist.NOTHING is consistent for both Giraph and Spark. Giraph always left behind sideEffects (memory) on disk. Spark doesn't. Decided NOTHING means destroy persi

Repository: incubator-tinkerpop
Updated Branches:
  refs/heads/TINKERPOP-1033 a7db52bda -> cc7cfffe4


added a Storage test case to ensure that residual data for Persist.NOTHING is consistent for both Giraph and Spark. Giraph always left behind sideEffects (memory) on disk. Spark doesn't. Decided NOTHING means destroy persisted memory as well. For 3.2.0, we need to have contracts for all this specified. Made a ticket. Rebuilt docs and ran test suites, all is still golden.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/cc7cfffe
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/cc7cfffe
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/cc7cfffe

Branch: refs/heads/TINKERPOP-1033
Commit: cc7cfffe47ae284ee6ecd67afffb34a6fe3b0b42
Parents: a7db52b
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Thu Jan 7 09:43:07 2016 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Thu Jan 7 09:43:07 2016 -0700

----------------------------------------------------------------------
 .../process/computer/GiraphGraphComputer.java   |  6 +++---
 .../computer/GiraphHadoopGraphProvider.java     |  6 +++---
 .../structure/io/AbstractStorageCheck.java      | 22 ++++++++++++++++++++
 .../structure/io/FileSystemStorageCheck.java    |  8 +++++++
 .../structure/io/SparkContextStorageCheck.java  | 11 +++++++---
 5 files changed, 44 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/cc7cfffe/giraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphGraphComputer.java
----------------------------------------------------------------------
diff --git a/giraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphGraphComputer.java b/giraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphGraphComputer.java
index dfe8e8c..82b3ec1 100644
--- a/giraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphGraphComputer.java
+++ b/giraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphGraphComputer.java
@@ -120,7 +120,7 @@ public final class GiraphGraphComputer extends AbstractHadoopGraphComputer imple
         return ComputerSubmissionHelper.runWithBackgroundThread(this::submitWithExecutor, "GiraphSubmitter");
     }
 
-    private Future<ComputerResult> submitWithExecutor(Executor exec) {
+    private Future<ComputerResult> submitWithExecutor(final Executor exec) {
         final long startTime = System.currentTimeMillis();
         final Configuration apacheConfiguration = ConfUtil.makeApacheConfiguration(this.giraphConfiguration);
         return CompletableFuture.<ComputerResult>supplyAsync(() -> {
@@ -203,9 +203,9 @@ public final class GiraphGraphComputer extends AbstractHadoopGraphComputer imple
                 MapReduceHelper.executeMapReduceJob(mapReduce, this.memory, this.giraphConfiguration);
             }
 
-            // if no persistence, delete the graph output
+            // if no persistence, delete the graph and memory output
             if (this.persist.equals(Persist.NOTHING))
-                storage.rm(Constants.getGraphLocation(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION)));
+                storage.rm(this.giraphConfiguration.get(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION));
         } catch (final Exception e) {
             throw new IllegalStateException(e.getMessage(), e);
         }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/cc7cfffe/giraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphHadoopGraphProvider.java
----------------------------------------------------------------------
diff --git a/giraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphHadoopGraphProvider.java b/giraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphHadoopGraphProvider.java
index 9257006..07b7897 100644
--- a/giraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphHadoopGraphProvider.java
+++ b/giraph-gremlin/src/test/java/org/apache/tinkerpop/gremlin/giraph/process/computer/GiraphHadoopGraphProvider.java
@@ -39,13 +39,13 @@ public final class GiraphHadoopGraphProvider extends HadoopGraphProvider {
     @Override
     public Map<String, Object> getBaseConfiguration(final String graphName, final Class<?> test, final String testMethodName, final LoadGraphWith.GraphData loadGraphWith) {
         final Map<String, Object> config = super.getBaseConfiguration(graphName, test, testMethodName, loadGraphWith);
-        config.put("mapreduce.job.reduces", 4);
+        config.put("mapreduce.job.reduces", 2);
         /// giraph configuration
         config.put(GiraphConstants.LOCAL_TEST_MODE.getKey(), true); // local testing can only spawn one worker
         config.put(GiraphConstants.MIN_WORKERS, 1);
         config.put(GiraphConstants.MAX_WORKERS, 1);
         config.put(GiraphConstants.SPLIT_MASTER_WORKER.getKey(), false);
-        config.put(GiraphConstants.ZOOKEEPER_SERVER_PORT.getKey(), 2181);  // you must have a local zookeeper running on this port
+        config.put(GiraphConstants.ZOOKEEPER_IS_EXTERNAL.getKey(), false);
         config.put(GiraphConstants.NETTY_SERVER_USE_EXECUTION_HANDLER.getKey(), false); // this prevents so many integration tests running out of threads
         config.put(GiraphConstants.NETTY_CLIENT_USE_EXECUTION_HANDLER.getKey(), false); // this prevents so many integration tests running out of threads
         config.put(GiraphConstants.NETTY_USE_DIRECT_MEMORY.getKey(), true);
@@ -53,7 +53,7 @@ public final class GiraphHadoopGraphProvider extends HadoopGraphProvider {
         config.put(GiraphConstants.NUM_COMPUTE_THREADS.getKey(), 2);
         config.put(GiraphConstants.MAX_MASTER_SUPERSTEP_WAIT_MSECS.getKey(), TimeUnit.MINUTES.toMillis(60L));
         config.put(GiraphConstants.VERTEX_OUTPUT_FORMAT_THREAD_SAFE.getKey(), false);
-        config.put(GiraphConstants.NUM_OUTPUT_THREADS.getKey(), 2);
+        config.put(GiraphConstants.NUM_OUTPUT_THREADS.getKey(), 1);
         return config;
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/cc7cfffe/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/AbstractStorageCheck.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/AbstractStorageCheck.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/AbstractStorageCheck.java
index bec9c72..1a73093 100644
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/AbstractStorageCheck.java
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/AbstractStorageCheck.java
@@ -24,9 +24,14 @@ import org.apache.tinkerpop.gremlin.hadoop.Constants;
 import org.apache.tinkerpop.gremlin.process.computer.ComputerResult;
 import org.apache.tinkerpop.gremlin.process.computer.clustering.peerpressure.ClusterCountMapReduce;
 import org.apache.tinkerpop.gremlin.process.computer.clustering.peerpressure.PeerPressureVertexProgram;
+import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
+import org.apache.tinkerpop.gremlin.structure.Graph;
+import org.apache.tinkerpop.gremlin.structure.Vertex;
 import org.apache.tinkerpop.gremlin.structure.io.Storage;
 import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils;
 
+import java.util.Map;
+
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
@@ -120,4 +125,21 @@ public abstract class AbstractStorageCheck extends AbstractGremlinTest {
         assertEquals(1, IteratorUtils.count(storage.head(outputLocation, "clusterCount", outputMemoryParserClass)));
         assertEquals(1, IteratorUtils.count(storage.head(newOutputLocation, "clusterCount", outputMemoryParserClass)));
     }
+
+    public void checkResidualDataInStorage(final Storage storage, final String outputLocation) throws Exception {
+        final GraphTraversal<Vertex, Long> traversal = g.V().both("knows").groupCount("m").by("age").count();
+        assertEquals(4l, traversal.next().longValue());
+        assertFalse(storage.exists(outputLocation));
+        assertFalse(storage.exists(Constants.getMemoryLocation(outputLocation, "m")));
+        assertFalse(storage.exists(Constants.getMemoryLocation(outputLocation, Graph.Hidden.hide("reducing"))));
+        assertFalse(storage.exists(Constants.getGraphLocation(outputLocation)));
+        ///
+        assertEquals(3, traversal.asAdmin().getSideEffects().<Map<Integer, Long>>get("m").get().size());
+        assertEquals(1, traversal.asAdmin().getSideEffects().<Map<Integer, Long>>get("m").get().get(27).longValue());
+        assertEquals(2, traversal.asAdmin().getSideEffects().<Map<Integer, Long>>get("m").get().get(29).longValue());
+        assertEquals(1, traversal.asAdmin().getSideEffects().<Map<Integer, Long>>get("m").get().get(32).longValue());
+        ///
+        assertEquals(4l, traversal.asAdmin().getSideEffects().<Long>get(Graph.Hidden.hide("reducing")).get().longValue());
+
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/cc7cfffe/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/FileSystemStorageCheck.java
----------------------------------------------------------------------
diff --git a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/FileSystemStorageCheck.java b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/FileSystemStorageCheck.java
index 1b2c04e..846582e 100644
--- a/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/FileSystemStorageCheck.java
+++ b/hadoop-gremlin/src/test/java/org/apache/tinkerpop/gremlin/hadoop/structure/io/FileSystemStorageCheck.java
@@ -70,6 +70,14 @@ public class FileSystemStorageCheck extends AbstractStorageCheck {
 
     }
 
+    @Test
+    @LoadGraphWith(LoadGraphWith.GraphData.MODERN)
+    public void shouldNotHaveResidualDataInStorage() throws Exception {
+        final Storage storage = FileSystemStorage.open(ConfUtil.makeHadoopConfiguration(graph.configuration()));
+        final String outputLocation = graph.configuration().getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
+        super.checkResidualDataInStorage(storage, outputLocation);
+    }
+
     private static void deleteDirectory(final String location) throws IOException {
         // TestHelper creates the directory and we need it not to exist
         assertTrue(new File(location).isDirectory());

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/cc7cfffe/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/structure/io/SparkContextStorageCheck.java
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/structure/io/SparkContextStorageCheck.java b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/structure/io/SparkContextStorageCheck.java
index 9d9fa37..c5746b6 100644
--- a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/structure/io/SparkContextStorageCheck.java
+++ b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/structure/io/SparkContextStorageCheck.java
@@ -27,8 +27,6 @@ import org.apache.tinkerpop.gremlin.structure.io.Storage;
 import org.junit.Before;
 import org.junit.Test;
 
-import static org.junit.Assert.assertFalse;
-
 /**
  * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
@@ -46,7 +44,6 @@ public class SparkContextStorageCheck extends AbstractStorageCheck {
     public void shouldSupportHeadMethods() throws Exception {
         final Storage storage = SparkContextStorage.open("local[4]");
         final String outputLocation = graph.configuration().getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
-        assertFalse(storage.exists(outputLocation));
         super.checkHeadMethods(storage, graph.configuration().getString(Constants.GREMLIN_HADOOP_INPUT_LOCATION), outputLocation, PersistedInputRDD.class, PersistedInputRDD.class);
     }
 
@@ -66,4 +63,12 @@ public class SparkContextStorageCheck extends AbstractStorageCheck {
         final String newOutputLocation = "new-location-for-copy";
         super.checkCopyMethods(storage, outputLocation, newOutputLocation, PersistedInputRDD.class, PersistedInputRDD.class);
     }
+
+    @Test
+    @LoadGraphWith(LoadGraphWith.GraphData.MODERN)
+    public void shouldNotHaveResidualDataInStorage() throws Exception {
+        final Storage storage = SparkContextStorage.open("local[4]");
+        final String outputLocation = graph.configuration().getString(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION);
+        super.checkResidualDataInStorage(storage, outputLocation);
+    }
 }
\ No newline at end of file