You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by sp...@apache.org on 2017/01/03 15:44:16 UTC
[07/50] [abbrv] tinkerpop git commit: merged Spark 2.0.0 work.
Massive undertaking that provided us performance improvements. Thanks @dalaro
and @yucx.
merged Spark 2.0.0 work. Massive undertaking that provided us performance improvements. Thanks @dalaro and @yucx.
Project: http://git-wip-us.apache.org/repos/asf/tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/tinkerpop/commit/3fc700fd
Tree: http://git-wip-us.apache.org/repos/asf/tinkerpop/tree/3fc700fd
Diff: http://git-wip-us.apache.org/repos/asf/tinkerpop/diff/3fc700fd
Branch: refs/heads/TINKERPOP-1130
Commit: 3fc700fdc19a6cb44d57aecf457a00b8eba0346a
Parents: 77bbb42 b8a2452
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Jan 3 07:43:41 2017 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Jan 3 07:43:41 2017 -0700
----------------------------------------------------------------------
CHANGELOG.asciidoc | 4 +
docs/src/upgrade/release-3.3.x.asciidoc | 22 +++
giraph-gremlin/pom.xml | 4 +-
.../process/computer/GiraphGraphComputer.java | 2 +
.../giraph/GiraphGremlinIntegrateTest.java | 33 +++++
.../gremlin/giraph/GiraphGremlinSuite.java | 35 +++++
.../structure/io/GiraphIoRegistryCheck.java | 59 ++++++++
.../gremlin/structure/io/IoRegistry.java | 3 +
.../tinkerpop/gremlin/structure/io/Mapper.java | 19 +++
.../gremlin/structure/io/gryo/GryoMapper.java | 115 ++++++++++++++-
.../gremlin/structure/io/gryo/GryoPool.java | 56 +-------
.../io/gryo/kryoshim/KryoShimService.java | 12 +-
.../io/gryo/kryoshim/KryoShimServiceLoader.java | 139 +++++++++----------
.../gryo/kryoshim/shaded/ShadedKryoAdapter.java | 18 +--
.../shaded/ShadedSerializerAdapter.java | 6 +-
.../structure/io/util/IoRegistryHelper.java | 88 ++++++++++++
.../tinkerpop/gremlin/util/SystemUtil.java | 55 ++++++++
.../gremlin/structure/io/gryo/GryoPoolTest.java | 25 ++--
.../tinkerpop/gremlin/util/SystemUtilTest.java | 89 ++++++++++++
.../gremlin/process/ProcessComputerSuite.java | 2 +-
hadoop-gremlin/conf/hadoop-graphson.properties | 2 +
.../conf/hadoop-grateful-gryo.properties | 25 ++--
hadoop-gremlin/conf/hadoop-gryo.properties | 6 +-
hadoop-gremlin/conf/hadoop-script.properties | 5 +-
hadoop-gremlin/pom.xml | 24 ++--
.../tinkerpop/gremlin/hadoop/Constants.java | 2 +
.../structure/io/HadoopPoolShimService.java | 52 +++----
.../hadoop/structure/io/HadoopPools.java | 23 ++-
.../structure/io/HadoopPoolsConfigurable.java | 2 +-
.../io/graphson/GraphSONRecordReader.java | 13 +-
.../io/graphson/GraphSONRecordWriter.java | 12 +-
.../structure/io/gryo/GryoRecordReader.java | 13 +-
.../structure/io/gryo/GryoRecordWriter.java | 14 +-
.../gremlin/hadoop/HadoopGraphProvider.java | 21 ++-
.../structure/io/AbstractIoRegistryCheck.java | 122 ++++++++++++++++
.../GraphSONRecordReaderWriterTest.java | 2 +-
.../hadoop/structure/io/gryo/ToyIoRegistry.java | 70 ++++++++++
.../hadoop/structure/io/gryo/ToyPoint.java | 113 +++++++++++++++
.../hadoop/structure/io/gryo/ToyTriangle.java | 120 ++++++++++++++++
spark-gremlin/pom.xml | 56 +++++++-
.../io/gryo/CompactBufferSerializer.groovy | 2 -
.../spark/process/computer/SparkExecutor.java | 18 +--
.../process/computer/SparkGraphComputer.java | 48 ++++++-
.../SparkStarBarrierInterceptor.java | 10 +-
.../structure/io/gryo/GryoRegistrator.java | 30 +++-
.../spark/structure/io/gryo/GryoSerializer.java | 102 ++++++++------
.../io/gryo/IoRegistryAwareKryoSerializer.java | 110 ---------------
.../io/gryo/ObjectWritableSerializer.java | 1 -
.../structure/io/gryo/Tuple2Serializer.java | 2 -
.../structure/io/gryo/Tuple3Serializer.java | 3 -
.../io/gryo/VertexWritableSerializer.java | 1 -
.../io/gryo/WrappedArraySerializer.java | 1 -
.../unshaded/UnshadedKryoShimService.java | 109 +++++----------
.../unshaded/UnshadedSerializerAdapter.java | 9 +-
.../gremlin/spark/AbstractSparkTest.java | 11 +-
.../spark/SparkGremlinGryoSerializerTest.java | 33 +++++
.../gremlin/spark/SparkGremlinSuite.java | 3 +-
...tratorGraphComputerProcessIntegrateTest.java | 33 -----
...alizerGraphComputerProcessIntegrateTest.java | 33 +++++
...SparkHadoopGraphGryoRegistratorProvider.java | 52 -------
.../SparkHadoopGraphGryoSerializerProvider.java | 46 ++++++
.../computer/SparkHadoopGraphProvider.java | 28 +++-
.../structure/io/SparkContextStorageCheck.java | 11 +-
.../structure/io/SparkIoRegistryCheck.java | 64 +++++++++
64 files changed, 1621 insertions(+), 622 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3fc700fd/CHANGELOG.asciidoc
----------------------------------------------------------------------
diff --cc CHANGELOG.asciidoc
index c233f0d,c08fecd..31e4a92
--- a/CHANGELOG.asciidoc
+++ b/CHANGELOG.asciidoc
@@@ -47,6 -46,10 +47,10 @@@ TinkerPop 3.3.0 (Release Date: NOT OFFI
* Removed all performance tests that were not part of `gremlin-benchmark`.
* Removed dependency on `junit-benchmarks` and it's related reference to `h2`.
* Moved the source for the "home page" into the repository under `/site` so that it easier to accept contributions.
-* Bumped to support Spark 2.0.0.
++* Bumped to support Spark 2.0.0 with Scala 2.11.
+ * Added `UnshadedKryoShimService` as the new default serializer model for `SparkGraphComputer`.
+ * `GryoRegistrator` is more efficient than the previous `GryoSerializer` model in `SparkGraphComputer`.
+ * Added support for `IoRegistry` custom serialization in Spark/Giraph and provided a general `hadoop-gremlin` test suite.
* Replaced term `REST` with `HTTP` to remove any confusion as to the design of the API.
* Moved `gremlin-benchmark` under `gremlin-tools` module.
* Added `gremlin-tools` and its submodule `gremlin-coverage`.
http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3fc700fd/docs/src/upgrade/release-3.3.x.asciidoc
----------------------------------------------------------------------
diff --cc docs/src/upgrade/release-3.3.x.asciidoc
index 542ff89,335696d..30a404f
--- a/docs/src/upgrade/release-3.3.x.asciidoc
+++ b/docs/src/upgrade/release-3.3.x.asciidoc
@@@ -147,3 -140,23 +147,25 @@@ Gremlin-server.ba
^^^^^^^^^^^^^^^^^^
The switch name has changed for installing dependencies. `-i` has been deprecated and replaced by `install`.
+
+ SparkGraphComputer GryoRegistrator
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ Historically, `SparkGraphComputer` has used `GryoSerializer` to handle the serialization of objects in Spark. The reason
+ this exists is because TinkerPop uses a shaded version of Kryo and thus, couldn't use the standard `KryoSerializer`-model
+ provided by Spark. However, a "shim model" was created which allows for the shaded and unshaded versions of Kryo to
+ interact with one another. To this end, `KryoSerializer` can now be used with a `GryoRegistrator`. The properties file
+ for a `SparkGraphComputer` now looks as follows:
+
+ ```
+ spark.serializer=org.apache.spark.serializer.KryoSerializer
+ spark.kryo.registrator=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator
+ ```
+
+ If the old `GryoSerializer` model is desired, then the properties file should simply look as before:
+
+ ```
+ spark.serializer=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer
-```
++```
++
++See: link:https://issues.apache.org/jira/browse/TINKERPOP-1389
http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3fc700fd/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/gryo/GryoMapper.java
----------------------------------------------------------------------
diff --cc gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/gryo/GryoMapper.java
index c42a4fc,7b3a6b4..48b670c
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/gryo/GryoMapper.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/structure/io/gryo/GryoMapper.java
@@@ -18,13 -18,100 +18,15 @@@
*/
package org.apache.tinkerpop.gremlin.structure.io.gryo;
+ import org.apache.commons.lang.builder.ToStringBuilder;
-import org.apache.tinkerpop.gremlin.process.computer.GraphFilter;
-import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
-import org.apache.tinkerpop.gremlin.process.computer.traversal.strategy.decoration.VertexProgramStrategy;
-import org.apache.tinkerpop.gremlin.process.computer.traversal.strategy.optimization.GraphFilterStrategy;
-import org.apache.tinkerpop.gremlin.process.computer.util.MapMemory;
-import org.apache.tinkerpop.gremlin.process.remote.traversal.DefaultRemoteTraverser;
-import org.apache.tinkerpop.gremlin.process.traversal.Bytecode;
-import org.apache.tinkerpop.gremlin.process.traversal.Contains;
-import org.apache.tinkerpop.gremlin.process.traversal.Operator;
-import org.apache.tinkerpop.gremlin.process.traversal.Order;
-import org.apache.tinkerpop.gremlin.process.traversal.P;
-import org.apache.tinkerpop.gremlin.process.traversal.Path;
-import org.apache.tinkerpop.gremlin.process.traversal.Pop;
-import org.apache.tinkerpop.gremlin.process.traversal.SackFunctions;
-import org.apache.tinkerpop.gremlin.process.traversal.Scope;
-import org.apache.tinkerpop.gremlin.process.traversal.step.TraversalOptionParent;
-import org.apache.tinkerpop.gremlin.process.traversal.step.filter.RangeGlobalStep;
-import org.apache.tinkerpop.gremlin.process.traversal.step.map.FoldStep;
-import org.apache.tinkerpop.gremlin.process.traversal.step.map.GroupCountStep;
-import org.apache.tinkerpop.gremlin.process.traversal.step.map.GroupStep;
-import org.apache.tinkerpop.gremlin.process.traversal.step.map.GroupStepV3d0;
-import org.apache.tinkerpop.gremlin.process.traversal.step.map.MatchStep;
-import org.apache.tinkerpop.gremlin.process.traversal.step.map.MeanGlobalStep;
-import org.apache.tinkerpop.gremlin.process.traversal.step.map.OrderGlobalStep;
-import org.apache.tinkerpop.gremlin.process.traversal.step.map.TreeStep;
-import org.apache.tinkerpop.gremlin.process.traversal.step.util.BulkSet;
-import org.apache.tinkerpop.gremlin.process.traversal.step.util.ProfileStep;
-import org.apache.tinkerpop.gremlin.process.traversal.step.util.Tree;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.ConnectiveStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.HaltedTraverserStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.PartitionStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.decoration.SubgraphStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.finalization.MatchAlgorithmStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.AdjacentToIncidentStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.FilterRankingStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IdentityRemovalStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IncidentToAdjacentStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.InlineFilterStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.LazyBarrierStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.MatchPredicateStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.OrderLimitStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.PathProcessorStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.PathRetractionStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.RangeByIsCountStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.RepeatUnrollStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.verification.LambdaRestrictionStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.strategy.verification.ReadOnlyStrategy;
-import org.apache.tinkerpop.gremlin.process.traversal.traverser.B_LP_O_P_S_SE_SL_Traverser;
-import org.apache.tinkerpop.gremlin.process.traversal.traverser.B_LP_O_S_SE_SL_Traverser;
-import org.apache.tinkerpop.gremlin.process.traversal.traverser.B_O_S_SE_SL_Traverser;
-import org.apache.tinkerpop.gremlin.process.traversal.traverser.B_O_Traverser;
-import org.apache.tinkerpop.gremlin.process.traversal.traverser.LP_O_OB_P_S_SE_SL_Traverser;
-import org.apache.tinkerpop.gremlin.process.traversal.traverser.LP_O_OB_S_SE_SL_Traverser;
-import org.apache.tinkerpop.gremlin.process.traversal.traverser.O_OB_S_SE_SL_Traverser;
-import org.apache.tinkerpop.gremlin.process.traversal.traverser.O_Traverser;
-import org.apache.tinkerpop.gremlin.process.traversal.traverser.util.TraverserSet;
-import org.apache.tinkerpop.gremlin.process.traversal.util.AndP;
-import org.apache.tinkerpop.gremlin.process.traversal.util.DefaultTraversalMetrics;
-import org.apache.tinkerpop.gremlin.process.traversal.util.ImmutableMetrics;
-import org.apache.tinkerpop.gremlin.process.traversal.util.MutableMetrics;
-import org.apache.tinkerpop.gremlin.process.traversal.util.OrP;
-import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalExplanation;
-import org.apache.tinkerpop.gremlin.structure.Column;
-import org.apache.tinkerpop.gremlin.structure.Direction;
-import org.apache.tinkerpop.gremlin.structure.Edge;
import org.apache.tinkerpop.gremlin.structure.Graph;
-import org.apache.tinkerpop.gremlin.structure.Property;
-import org.apache.tinkerpop.gremlin.structure.T;
-import org.apache.tinkerpop.gremlin.structure.Vertex;
-import org.apache.tinkerpop.gremlin.structure.VertexProperty;
import org.apache.tinkerpop.gremlin.structure.io.IoRegistry;
import org.apache.tinkerpop.gremlin.structure.io.Mapper;
import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.SerializerShim;
+ import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.shaded.ShadedSerializerAdapter;
-import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedEdge;
-import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedPath;
-import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedProperty;
-import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedVertex;
-import org.apache.tinkerpop.gremlin.structure.util.detached.DetachedVertexProperty;
-import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceEdge;
-import org.apache.tinkerpop.gremlin.structure.util.reference.ReferencePath;
-import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceProperty;
-import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceVertex;
-import org.apache.tinkerpop.gremlin.structure.util.reference.ReferenceVertexProperty;
-import org.apache.tinkerpop.gremlin.structure.util.star.StarGraph;
-import org.apache.tinkerpop.gremlin.structure.util.star.StarGraphSerializer;
-import org.apache.tinkerpop.gremlin.util.function.HashSetSupplier;
-import org.apache.tinkerpop.gremlin.util.function.Lambda;
import org.apache.tinkerpop.shaded.kryo.ClassResolver;
import org.apache.tinkerpop.shaded.kryo.Kryo;
-import org.apache.tinkerpop.shaded.kryo.KryoSerializable;
import org.apache.tinkerpop.shaded.kryo.Serializer;
-import org.apache.tinkerpop.shaded.kryo.serializers.JavaSerializer;
import org.apache.tinkerpop.shaded.kryo.util.DefaultStreamFactory;
import org.apache.tinkerpop.shaded.kryo.util.MapReferenceResolver;
import org.javatuples.Pair;
@@@ -155,7 -451,22 +157,8 @@@ public final class GryoMapper implement
private boolean referenceTracking = true;
private Supplier<ClassResolver> classResolver = GryoClassResolver::new;
- private Builder() { }
+ private Builder() {
- // Validate the default registrations
- // For justification of these default registration rules, see TinkerPopKryoRegistrator
- for (TypeRegistration<?> tr : typeRegistrations) {
- if (tr.hasSerializer() /* no serializer is acceptable */ &&
- null == tr.getSerializerShim() /* a shim serializer is acceptable */ &&
- !(tr.getShadedSerializer() instanceof JavaSerializer) /* shaded JavaSerializer is acceptable */) {
- // everything else is invalid
- final String msg = String.format("The default GryoMapper type registration %s is invalid. " +
- "It must supply either an implementation of %s or %s, but supplies neither. " +
- "This is probably a bug in GryoMapper's default serialization class registrations.", tr,
- SerializerShim.class.getCanonicalName(), JavaSerializer.class.getCanonicalName());
- throw new IllegalStateException(msg);
- }
- }
+ }
/**
* {@inheritDoc}
@@@ -304,4 -607,112 +309,112 @@@
typeRegistrations.add(newRegistrationBuilder.apply(registrationId));
}
}
+
+ private static class GryoTypeReg<T> implements TypeRegistration<T> {
+
+ private final Class<T> clazz;
+ private final Serializer<T> shadedSerializer;
+ private final SerializerShim<T> serializerShim;
+ private final Function<Kryo, Serializer> functionOfShadedKryo;
+ private final int id;
+
+ private GryoTypeReg(final Class<T> clazz,
+ final Serializer<T> shadedSerializer,
+ final SerializerShim<T> serializerShim,
+ final Function<Kryo, Serializer> functionOfShadedKryo,
+ final int id) {
+ this.clazz = clazz;
+ this.shadedSerializer = shadedSerializer;
+ this.serializerShim = serializerShim;
+ this.functionOfShadedKryo = functionOfShadedKryo;
+ this.id = id;
+
+ int serializerCount = 0;
+ if (null != this.shadedSerializer)
+ serializerCount++;
+ if (null != this.serializerShim)
+ serializerCount++;
+ if (null != this.functionOfShadedKryo)
+ serializerCount++;
+
+ if (1 < serializerCount) {
+ final String msg = String.format(
+ "GryoTypeReg accepts at most one kind of serializer, but multiple " +
+ "serializers were supplied for class %s (id %s). " +
+ "Shaded serializer: %s. Shim serializer: %s. Shaded serializer function: %s.",
+ this.clazz.getCanonicalName(), id,
+ this.shadedSerializer, this.serializerShim, this.functionOfShadedKryo);
+ throw new IllegalArgumentException(msg);
+ }
+ }
+
+ private static <T> GryoTypeReg<T> of(final Class<T> clazz, final int id) {
+ return new GryoTypeReg<>(clazz, null, null, null, id);
+ }
+
+ private static <T> GryoTypeReg<T> of(final Class<T> clazz, final int id, final Serializer<T> shadedSerializer) {
+ return new GryoTypeReg<>(clazz, shadedSerializer, null, null, id);
+ }
+
+ private static <T> GryoTypeReg<T> of(final Class<T> clazz, final int id, final SerializerShim<T> serializerShim) {
+ return new GryoTypeReg<>(clazz, null, serializerShim, null, id);
+ }
+
+ private static <T> GryoTypeReg<T> of(final Class clazz, final int id, final Function<Kryo, Serializer> fct) {
+ return new GryoTypeReg<>(clazz, null, null, fct, id);
+ }
+
+ @Override
+ public Serializer<T> getShadedSerializer() {
+ return shadedSerializer;
+ }
+
+ @Override
+ public SerializerShim<T> getSerializerShim() {
+ return serializerShim;
+ }
+
+ @Override
+ public Function<Kryo, Serializer> getFunctionOfShadedKryo() {
+ return functionOfShadedKryo;
+ }
+
+ @Override
+ public Class<T> getTargetClass() {
+ return clazz;
+ }
+
+ @Override
+ public int getId() {
+ return id;
+ }
+
+ @Override
+ public Kryo registerWith(final Kryo kryo) {
+ if (null != functionOfShadedKryo)
+ kryo.register(clazz, functionOfShadedKryo.apply(kryo), id);
+ else if (null != shadedSerializer)
+ kryo.register(clazz, shadedSerializer, id);
+ else if (null != serializerShim)
+ kryo.register(clazz, new ShadedSerializerAdapter<>(serializerShim), id);
+ else {
+ kryo.register(clazz, kryo.getDefaultSerializer(clazz), id);
+ // Suprisingly, the preceding call is not equivalent to
+ // kryo.register(clazz, id);
+ }
+
+ return kryo;
+ }
+
+ @Override
+ public String toString() {
+ return new ToStringBuilder(this)
+ .append("targetClass", clazz)
+ .append("id", id)
+ .append("shadedSerializer", shadedSerializer)
+ .append("serializerShim", serializerShim)
+ .append("functionOfShadedKryo", functionOfShadedKryo)
+ .toString();
+ }
+ }
-}
+}
http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3fc700fd/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/ProcessComputerSuite.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tinkerpop/blob/3fc700fd/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/structure/io/gryo/kryoshim/unshaded/UnshadedKryoShimService.java
----------------------------------------------------------------------