You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by sp...@apache.org on 2017/01/03 15:44:11 UTC

[02/50] [abbrv] tinkerpop git commit: Limit JVM system props passed around in Spark jobs

Limit JVM system props passed around in Spark jobs

Prior to this commit, SGC indiscriminately set the entire job config
as JVM system properties on Spark executors.  It didn't account for
the fact that some config values (e.g. spark.job.description) could
have spaces.  A value with a space wouldn't get quoted.  This led to
Spark workers failing to start, because part of the unquoted value
would be erroneously interpreted as the JVM main class.

This commit makes SGC only pass two config settings as system props:

 * gremlin.io.registry
 * gremlin.io.kryoShimService


Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/f7b71b37
Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/f7b71b37
Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/f7b71b37

Branch: refs/heads/TINKERPOP-1130
Commit: f7b71b376fef46327b25bc4800860cfb404c8613
Parents: 18eae6d
Author: Dan LaRocque <da...@hopcount.org>
Authored: Thu Nov 17 11:33:58 2016 -0500
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Nov 29 04:58:19 2016 -0700

----------------------------------------------------------------------
 .../spark/process/computer/SparkGraphComputer.java | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/f7b71b37/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java
index 4e74088..ee3ebe1 100644
--- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java
+++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java
@@ -71,11 +71,16 @@ import org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD;
 import org.apache.tinkerpop.gremlin.spark.structure.io.SparkContextStorage;
 import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.kryoshim.unshaded.UnshadedKryoShimService;
 import org.apache.tinkerpop.gremlin.structure.Direction;
+import org.apache.tinkerpop.gremlin.structure.io.IoRegistry;
 import org.apache.tinkerpop.gremlin.structure.io.Storage;
 import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimServiceLoader;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
 import java.util.concurrent.Executor;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -91,6 +96,16 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer {
     private boolean workersSet = false;
     private final ThreadFactory threadFactoryBoss = new BasicThreadFactory.Builder().namingPattern(SparkGraphComputer.class.getSimpleName() + "-boss").build();
 
+    private static final Set<String> KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES;
+
+    static
+    {
+        Set<String> s = new HashSet<>();
+        s.add(KryoShimServiceLoader.KRYO_SHIM_SERVICE);
+        s.add(IoRegistry.IO_REGISTRY);
+        KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES = Collections.unmodifiableSet(s);
+    }
+
     /**
      * An {@code ExecutorService} that schedules up background work. Since a {@link GraphComputer} is only used once
      * for a {@link VertexProgram} a single threaded executor is sufficient.
@@ -146,7 +161,7 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer {
             ///////////
             final StringBuilder params = new StringBuilder();
             this.sparkConfiguration.getKeys().forEachRemaining(key -> {
-                if (key.startsWith("gremlin") || key.startsWith("spark")) {
+                if (KEYS_PASSED_IN_JVM_SYSTEM_PROPERTIES.contains(key)) {
                     params.append(" -D").append("tinkerpop.").append(key).append("=").append(this.sparkConfiguration.getProperty(key));
                     System.setProperty("tinkerpop." + key, this.sparkConfiguration.getProperty(key).toString());
                 }