You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by sp...@apache.org on 2022/01/14 02:00:02 UTC

[tinkerpop] 02/02: wip

This is an automated email from the ASF dual-hosted git repository.

spmallette pushed a commit to branch TINKERPOP-2681
in repository https://gitbox.apache.org/repos/asf/tinkerpop.git

commit 0b5ee8da7e5ffbe278560c9d7f36e8d42413502a
Author: Stephen Mallette <st...@amazon.com>
AuthorDate: Thu Jan 13 20:57:57 2022 -0500

    wip
---
 .../traversal/step/map/MergeVertexStep.java        | 32 +++++---
 .../traversal/step/map/TinkerMergeVertexStep.java  | 87 ++++++++++++++++++++++
 .../TinkerMergeVertexStepStrategy.java             | 53 +++++++++++++
 .../gremlin/tinkergraph/structure/TinkerGraph.java |  4 +-
 4 files changed, 166 insertions(+), 10 deletions(-)

diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MergeVertexStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MergeVertexStep.java
index ce77200..319db5d 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MergeVertexStep.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/MergeVertexStep.java
@@ -127,7 +127,7 @@ public class MergeVertexStep<S> extends FlatMapStep<S, Vertex> implements Mutati
 
     @Override
     public Parameters getParameters() {
-        // merge doesn't take foldups of property() calls. those need to get treated as regular old PropertyStep
+        // merge doesn't take fold ups of property() calls. those need to get treated as regular old PropertyStep
         // instances. not sure if this should support with() though.....none of the other Mutating steps do.
         return null;
     }
@@ -143,15 +143,19 @@ public class MergeVertexStep<S> extends FlatMapStep<S, Vertex> implements Mutati
         return super.processNextStart();
     }
 
-    @Override
-    protected Iterator<Vertex> flatMap(final Traverser.Admin<S> traverser) {
-        final Map<Object,Object> searchCreate = TraversalUtil.apply(traverser, searchCreateTraversal);
-
+    /**
+     * Use the {@code Map} of search criteria to most efficiently return a {@code Stream<Vertex>} of matching elements.
+     * Providers might override this method when extending this step to provide their own optimized mechanisms for
+     * matching the list of vertices. This implementation is only optimized for the {@link T#id} so any other usage
+     * will simply be in-memory filtering which could be slow.
+     */
+    protected Stream<Vertex> createSearchStream(final Map<Object,Object> search) {
         final Graph graph = this.getTraversal().getGraph().get();
+
         Stream<Vertex> stream;
         // prioritize lookup by id
-        if (searchCreate.containsKey(T.id))
-            stream = IteratorUtils.stream(graph.vertices(searchCreate.get(T.id)));
+        if (search.containsKey(T.id))
+            stream = IteratorUtils.stream(graph.vertices(search.get(T.id)));
         else
             stream = IteratorUtils.stream(graph.vertices());
 
@@ -159,7 +163,7 @@ public class MergeVertexStep<S> extends FlatMapStep<S, Vertex> implements Mutati
         // for other Mutation steps
         stream = stream.filter(v -> {
             // try to match on all search criteria skipping T.id as it was handled above
-            return searchCreate.entrySet().stream().filter(kv -> kv.getKey() != T.id).allMatch(kv -> {
+            return search.entrySet().stream().filter(kv -> kv.getKey() != T.id).allMatch(kv -> {
                 if (kv.getKey() == T.label) {
                     return v.label().equals(kv.getValue());
                 } else {
@@ -167,7 +171,17 @@ public class MergeVertexStep<S> extends FlatMapStep<S, Vertex> implements Mutati
                     return vp.isPresent() && kv.getValue().equals(vp.value());
                 }
             });
-        }).map(v -> {
+        });
+
+        return stream;
+    }
+
+    @Override
+    protected Iterator<Vertex> flatMap(final Traverser.Admin<S> traverser) {
+        final Map<Object,Object> searchCreate = TraversalUtil.apply(traverser, searchCreateTraversal);
+
+        Stream<Vertex> stream = createSearchStream(searchCreate);
+        stream = stream.map(v -> {
             // if no onMatch is defined then there is no update - return the vertex unchanged
             if (null == onMatchTraversal) return v;
 
diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/process/traversal/step/map/TinkerMergeVertexStep.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/process/traversal/step/map/TinkerMergeVertexStep.java
new file mode 100644
index 0000000..5876879
--- /dev/null
+++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/process/traversal/step/map/TinkerMergeVertexStep.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.tinkergraph.process.traversal.step.map;
+
+import org.apache.tinkerpop.gremlin.process.traversal.Merge;
+import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
+import org.apache.tinkerpop.gremlin.process.traversal.step.map.MergeVertexStep;
+import org.apache.tinkerpop.gremlin.structure.Graph;
+import org.apache.tinkerpop.gremlin.structure.T;
+import org.apache.tinkerpop.gremlin.structure.Vertex;
+import org.apache.tinkerpop.gremlin.structure.VertexProperty;
+import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph;
+import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerHelper;
+import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils;
+
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Stream;
+
+/**
+ * Optimizes {@code mergeV()} searches by attempting to use an index where possible.
+ */
+public class TinkerMergeVertexStep<S> extends MergeVertexStep<S> {
+    public TinkerMergeVertexStep(final MergeVertexStep step) {
+        super(step.getTraversal(), step.isStart(), step.getSearchCreateTraversal());
+        if (step.getOnMatchTraversal() != null) this.addChildOption(Merge.onMatch, step.getOnMatchTraversal());
+        if (step.getOnCreateTraversal() != null) this.addChildOption(Merge.onCreate, step.getOnCreateTraversal());
+    }
+
+    @Override
+    protected Stream<Vertex> createSearchStream(final Map<Object, Object> search) {
+        final TinkerGraph graph = (TinkerGraph) this.getTraversal().getGraph().get();
+        Optional<String> firstIndex = Optional.empty();
+
+        Stream<Vertex> stream;
+        // prioritize lookup by id but otherwise attempt an index lookup
+        if (search.containsKey(T.id)) {
+            stream = IteratorUtils.stream(graph.vertices(search.get(T.id)));
+        } else {
+            // look for the first index we can find - that's the lucky winner. may or may not be the most selective
+            final Set<String> indexedKeys = graph.getIndexedKeys(Vertex.class);
+            firstIndex = search.keySet().stream().
+                    filter(k -> k instanceof String).
+                    map(k -> (String) k).
+                    filter(indexedKeys::contains).findFirst();
+
+            // use the index if possible otherwise just in memory filter
+            stream = firstIndex.map(s -> TinkerHelper.queryVertexIndex(graph, s, search.get(s)).stream().map(v -> (Vertex) v)).
+                    orElseGet(() -> IteratorUtils.stream(graph.vertices()));
+        }
+
+        final Optional<String> indexUsed = firstIndex;
+        stream = stream.filter(v -> {
+            // try to match on all search criteria skipping T.id as it was handled above
+            return search.entrySet().stream().filter(kv -> {
+                final Object k = kv.getKey();
+                return k != T.id && !(indexUsed.isPresent() && indexUsed.get().equals(k));
+            }).allMatch(kv -> {
+                if (kv.getKey() == T.label) {
+                    return v.label().equals(kv.getValue());
+                } else {
+                    final VertexProperty<Object> vp = v.property(kv.getKey().toString());
+                    return vp.isPresent() && kv.getValue().equals(vp.value());
+                }
+            });
+        });
+
+        return stream;
+    }
+}
diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/process/traversal/strategy/optimization/TinkerMergeVertexStepStrategy.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/process/traversal/strategy/optimization/TinkerMergeVertexStepStrategy.java
new file mode 100644
index 0000000..e5806ea
--- /dev/null
+++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/process/traversal/strategy/optimization/TinkerMergeVertexStepStrategy.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.tinkerpop.gremlin.tinkergraph.process.traversal.strategy.optimization;
+
+import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
+import org.apache.tinkerpop.gremlin.process.traversal.TraversalStrategy;
+import org.apache.tinkerpop.gremlin.process.traversal.step.map.MergeVertexStep;
+import org.apache.tinkerpop.gremlin.process.traversal.strategy.AbstractTraversalStrategy;
+import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalHelper;
+import org.apache.tinkerpop.gremlin.tinkergraph.process.traversal.step.map.TinkerMergeVertexStep;
+
+/**
+ * Optimizes {@code mergeV()} search lookups by using {@link TinkerMergeVertexStep}.
+ */
+public final class TinkerMergeVertexStepStrategy extends AbstractTraversalStrategy<TraversalStrategy.ProviderOptimizationStrategy>
+        implements TraversalStrategy.ProviderOptimizationStrategy {
+
+    private static final TinkerMergeVertexStepStrategy INSTANCE = new TinkerMergeVertexStepStrategy();
+
+    private TinkerMergeVertexStepStrategy() {
+    }
+
+    @Override
+    public void apply(final Traversal.Admin<?, ?> traversal) {
+        if (TraversalHelper.onGraphComputer(traversal))
+            return;
+
+        for (final MergeVertexStep originalMergeVertexStep : TraversalHelper.getStepsOfClass(MergeVertexStep.class, traversal)) {
+            final TinkerMergeVertexStep tinkerMergeVertexStep = new TinkerMergeVertexStep(originalMergeVertexStep);
+            TraversalHelper.replaceStep(originalMergeVertexStep, tinkerMergeVertexStep, traversal);
+        }
+    }
+
+    public static TinkerMergeVertexStepStrategy instance() {
+        return INSTANCE;
+    }
+}
diff --git a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraph.java b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraph.java
index b9dda16..514d008 100644
--- a/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraph.java
+++ b/tinkergraph-gremlin/src/main/java/org/apache/tinkerpop/gremlin/tinkergraph/structure/TinkerGraph.java
@@ -39,6 +39,7 @@ import org.apache.tinkerpop.gremlin.tinkergraph.process.computer.TinkerGraphComp
 import org.apache.tinkerpop.gremlin.tinkergraph.process.computer.TinkerGraphComputerView;
 import org.apache.tinkerpop.gremlin.tinkergraph.process.traversal.strategy.optimization.TinkerGraphCountStrategy;
 import org.apache.tinkerpop.gremlin.tinkergraph.process.traversal.strategy.optimization.TinkerGraphStepStrategy;
+import org.apache.tinkerpop.gremlin.tinkergraph.process.traversal.strategy.optimization.TinkerMergeVertexStepStrategy;
 import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils;
 
 import java.io.File;
@@ -72,7 +73,8 @@ public final class TinkerGraph implements Graph {
     static {
         TraversalStrategies.GlobalCache.registerStrategies(TinkerGraph.class, TraversalStrategies.GlobalCache.getStrategies(Graph.class).clone().addStrategies(
                 TinkerGraphStepStrategy.instance(),
-                TinkerGraphCountStrategy.instance()));
+                TinkerGraphCountStrategy.instance(),
+                TinkerMergeVertexStepStrategy.instance()));
     }
 
     private static final Configuration EMPTY_CONFIGURATION = new BaseConfiguration() {{