You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by sp...@apache.org on 2021/01/05 02:09:51 UTC

[tinkerpop] 01/01: TINKERPOP-2481 Installed IdentityRemovalStrategy

This is an automated email from the ASF dual-hosted git repository.

spmallette pushed a commit to branch TINKERPOP-2481
in repository https://gitbox.apache.org/repos/asf/tinkerpop.git

commit 0e75c645582380ec686f91eeb9b8ba663609175d
Author: Stephen Mallette <st...@amazon.com>
AuthorDate: Mon Jan 4 21:07:22 2021 -0500

    TINKERPOP-2481 Installed IdentityRemovalStrategy
    
    This strategy was always present but never installed. Research for the reasoning is written up on the JIRA for this issue. Needed to change the Step interface to allow for a way to access start objects without iterating them to get rid of a bit of hackery in BranchStep which needed access to such information without actually processing its child traversals.
---
 CHANGELOG.asciidoc                                 |  2 ++
 .../tinkerpop/gremlin/process/traversal/Step.java  |  9 ++++++
 .../process/traversal/TraversalStrategies.java     |  2 ++
 .../process/traversal/step/branch/BranchStep.java  | 19 +++--------
 .../process/traversal/step/util/AbstractStep.java  |  5 +++
 .../process/traversal/step/util/EmptyStep.java     |  5 +++
 .../optimization/IdentityRemovalStrategy.java      | 21 ++++++++++--
 .../gremlin/process/traversal/TraversalTest.java   |  5 +++
 .../optimization/IdentityRemovalStrategyTest.java  | 37 +++++++++++++---------
 .../EarlyLimitStrategyProcessTest.java             |  9 +++---
 10 files changed, 79 insertions(+), 35 deletions(-)

diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc
index 191355f..4af15ba 100644
--- a/CHANGELOG.asciidoc
+++ b/CHANGELOG.asciidoc
@@ -38,6 +38,8 @@ This release also includes changes from <<release-3-4-3, 3.4.3>>.
 * Added `Grouping` step interface.
 * Added `TraversalParent.replaceTraversal()` which can replace a direct child traversal.
 * Added `ByModulatorOptimizationStrategy` which replaces certain standard traversals w/ optimized traversals (e.g. `TokenTraversal`).
+* Improved `IdentityRemovalStrategy` by accounting for `EndStep` situations.
+* Added `IdentityRemovalStrategy` to the standard list of `TraversalStrategies`.
 * Refactored `MapStep` to move its logic to `ScalarMapStep` so that the old behavior could be preserved while allow other implementations to have more flexibility.
 * Modified TinkerGraph to support `null` property values and can be configured to disable that feature.
 * Modified `null` handling in mutations to be consistent for a new `Vertex` as well as update to an existing one.
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/Step.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/Step.java
index a216783..6bc34f5 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/Step.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/Step.java
@@ -19,6 +19,7 @@
 package org.apache.tinkerpop.gremlin.process.traversal;
 
 import org.apache.tinkerpop.gremlin.process.traversal.step.util.EmptyStep;
+import org.apache.tinkerpop.gremlin.process.traversal.step.util.ReducingBarrierStep;
 import org.apache.tinkerpop.gremlin.process.traversal.traverser.TraverserRequirement;
 
 import java.io.Serializable;
@@ -54,6 +55,14 @@ public interface Step<S, E> extends Iterator<Traverser.Admin<E>>, Serializable,
     public void addStart(final Traverser.Admin<S> start);
 
     /**
+     * Determines if starts objects are present without iterating forward. This function has special applicability
+     * around {@link ReducingBarrierStep} implementations where they always return {@code true} for calls to
+     * {@link #hasNext()}. Using this function gives insight to what the step itself is holding in its iterator without
+     * performing any sort of processing on the step itself.
+     */
+    public boolean hasStarts();
+
+    /**
      * Set the step that is previous to the current step.
      * Used for linking steps together to form a function chain.
      *
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/TraversalStrategies.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/TraversalStrategies.java
index 63842b0..4f5430c 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/TraversalStrategies.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/TraversalStrategies.java
@@ -29,6 +29,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.Coun
 import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.EarlyLimitStrategy;
 import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.ByModulatorOptimizationStrategy;
 import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.FilterRankingStrategy;
+import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IdentityRemovalStrategy;
 import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.IncidentToAdjacentStrategy;
 import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.InlineFilterStrategy;
 import org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization.LazyBarrierStrategy;
@@ -217,6 +218,7 @@ public interface TraversalStrategies extends Serializable, Cloneable, Iterable<T
         static {
             final TraversalStrategies graphStrategies = new DefaultTraversalStrategies();
             graphStrategies.addStrategies(
+                    IdentityRemovalStrategy.instance(),
                     ConnectiveStrategy.instance(),
                     EarlyLimitStrategy.instance(),
                     InlineFilterStrategy.instance(),
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/branch/BranchStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/branch/BranchStep.java
index d92ce2d..361ed54 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/branch/BranchStep.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/branch/BranchStep.java
@@ -18,15 +18,12 @@
  */
 package org.apache.tinkerpop.gremlin.process.traversal.step.branch;
 
-import org.apache.tinkerpop.gremlin.process.traversal.P;
 import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
 import org.apache.tinkerpop.gremlin.process.traversal.Traverser;
 import org.apache.tinkerpop.gremlin.process.traversal.lambda.PredicateTraversal;
 import org.apache.tinkerpop.gremlin.process.traversal.step.Barrier;
 import org.apache.tinkerpop.gremlin.process.traversal.step.TraversalOptionParent;
-import org.apache.tinkerpop.gremlin.process.traversal.step.sideEffect.IdentityStep;
 import org.apache.tinkerpop.gremlin.process.traversal.step.util.ComputerAwareStep;
-import org.apache.tinkerpop.gremlin.process.traversal.step.util.ReducingBarrierStep;
 import org.apache.tinkerpop.gremlin.process.traversal.traverser.TraverserRequirement;
 import org.apache.tinkerpop.gremlin.process.traversal.util.FastNoSuchElementException;
 import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalHelper;
@@ -41,7 +38,6 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.function.Predicate;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -82,10 +78,7 @@ public class BranchStep<S, E, M> extends ComputerAwareStep<S, E> implements Trav
             }
             this.traversalOptions.add(Pair.with(pickOptionTraversal, traversalOption));
         }
-        // adding an IdentityStep acts as a placeholder when reducing barriers get in the way - see the
-        // standardAlgorithm() method for more information.
-        if (TraversalHelper.hasStepOfAssignableClass(ReducingBarrierStep.class, traversalOption))
-            traversalOption.addStep(0, new IdentityStep(traversalOption));
+
         traversalOption.addStep(new EndStep(traversalOption));
 
         if (!this.hasBarrier && !TraversalHelper.getStepsOfAssignableClassRecursively(Barrier.class, traversalOption).isEmpty())
@@ -117,13 +110,11 @@ public class BranchStep<S, E, M> extends ComputerAwareStep<S, E> implements Trav
                 // this block is ignored on the first pass through the while(true) giving the opportunity for
                 // the traversalOptions to be prepared. Iterate all of them and simply return the ones that yield
                 // results. applyCurrentTraverser() will have only injected the current traverser into the options
-                // that met the choice requirements.  Note that in addGlobalChildOption an IdentityStep was added to
-                // be a holder for that current traverser. That allows us to check that first step for an injected
-                // traverser as part of the condition for using that traversal option in the output. This is necessary
-                // because barriers like fold(), max(), etc. will always return true for hasNext() even if a traverser
-                // was not seeded in applyCurrentTraverser().
+                // that met the choice requirements.
                 for (final Traversal.Admin<S, E> option : getGlobalChildren()) {
-                    if (option.getStartStep().hasNext() && option.hasNext())
+                    // checking hasStarts() first on the step in case there is a ReducingBarrierStep which will
+                    // always return true for hasNext()
+                    if (option.getStartStep().hasStarts() && option.hasNext())
                         return option.getEndStep();
                 }
             }
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/AbstractStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/AbstractStep.java
index 54fa197..2073e34 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/AbstractStep.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/AbstractStep.java
@@ -98,6 +98,11 @@ public abstract class AbstractStep<S, E> implements Step<S, E> {
     }
 
     @Override
+    public boolean hasStarts() {
+        return this.starts.hasNext();
+    }
+
+    @Override
     public void setPreviousStep(final Step<?, S> step) {
         this.previousStep = step;
     }
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/EmptyStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/EmptyStep.java
index b377c81..704cce3 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/EmptyStep.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/util/EmptyStep.java
@@ -50,6 +50,11 @@ public final class EmptyStep<S, E> implements Step<S, E>, TraversalParent {
     }
 
     @Override
+    public boolean hasStarts() {
+        return false;
+    }
+
+    @Override
     public void addStart(final Traverser.Admin<S> start) {
 
     }
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/IdentityRemovalStrategy.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/IdentityRemovalStrategy.java
index 3c4a875..ca6657d 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/IdentityRemovalStrategy.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/IdentityRemovalStrategy.java
@@ -21,10 +21,13 @@ package org.apache.tinkerpop.gremlin.process.traversal.strategy.optimization;
 import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
 import org.apache.tinkerpop.gremlin.process.traversal.TraversalStrategy;
 import org.apache.tinkerpop.gremlin.process.traversal.step.sideEffect.IdentityStep;
+import org.apache.tinkerpop.gremlin.process.traversal.step.util.ComputerAwareStep;
 import org.apache.tinkerpop.gremlin.process.traversal.step.util.EmptyStep;
 import org.apache.tinkerpop.gremlin.process.traversal.strategy.AbstractTraversalStrategy;
 import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalHelper;
 
+import java.util.List;
+
 /**
  * {@code IdentityRemovalStrategy} looks for {@link IdentityStep} instances and removes them.
  * If the identity step is labeled, its labels are added to the previous step.
@@ -46,13 +49,27 @@ public final class IdentityRemovalStrategy extends AbstractTraversalStrategy<Tra
 
     @Override
     public void apply(final Traversal.Admin<?, ?> traversal) {
+        // if there is just one step we would keep the step whether it was identity() or not.
         if (traversal.getSteps().size() <= 1)
             return;
 
         for (final IdentityStep<?> identityStep : TraversalHelper.getStepsOfClass(IdentityStep.class, traversal)) {
+            // with no labels on the identity() it can just be dropped. if there are labels then they should be
+            // moved to the previous step. if there is no previous step then this is a start of a labelled traversal
+            // and is kept
             if (identityStep.getLabels().isEmpty() || !(identityStep.getPreviousStep() instanceof EmptyStep)) {
-                TraversalHelper.copyLabels(identityStep, identityStep.getPreviousStep(), false);
-                traversal.removeStep(identityStep);
+
+                // for branch()/union() type steps an EndStep gets added which would lead to something like:
+                // [UnionStep([[VertexStep(OUT,vertex), EndStep], [EndStep], [VertexStep(OUT,vertex), EndStep]])]
+                // if the identity() was removed. seems to make sense to account for that case so that the traversal
+                // gets to be:
+                // [UnionStep([[VertexStep(OUT,vertex), EndStep], [IdentityStep, EndStep], [VertexStep(OUT,vertex), EndStep]])]
+                // EndStep seems to just behave like a identity() in the above case, but perhaps it is more consistent
+                // to keep the identity() placeholder rather than a step that doesn't actually exist
+                if (!(identityStep.getNextStep() instanceof ComputerAwareStep.EndStep && traversal.getSteps().size() == 2)) {
+                    TraversalHelper.copyLabels(identityStep, identityStep.getPreviousStep(), false);
+                    traversal.removeStep(identityStep);
+                }
             }
         }
     }
diff --git a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/TraversalTest.java b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/TraversalTest.java
index 90eab14..9a60216 100644
--- a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/TraversalTest.java
+++ b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/TraversalTest.java
@@ -179,6 +179,11 @@ public class TraversalTest {
         }
 
         @Override
+        public boolean hasStarts() {
+            return false;
+        }
+
+        @Override
         public void setPreviousStep(final Step step) {
 
         }
diff --git a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/IdentityRemovalStrategyTest.java b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/IdentityRemovalStrategyTest.java
index eb791af..b10a447 100644
--- a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/IdentityRemovalStrategyTest.java
+++ b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/IdentityRemovalStrategyTest.java
@@ -28,6 +28,9 @@ import org.junit.runners.Parameterized;
 
 import java.util.Arrays;
 
+import static org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__.inE;
+import static org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__.outE;
+import static org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__.union;
 import static org.junit.Assert.assertEquals;
 
 /**
@@ -42,32 +45,36 @@ public class IdentityRemovalStrategyTest {
     @Parameterized.Parameter(value = 1)
     public Traversal optimized;
 
-
-    private void applyIdentityRemovalStrategy(final Traversal traversal) {
-        final TraversalStrategies strategies = new DefaultTraversalStrategies();
-        strategies.addStrategies(IdentityRemovalStrategy.instance());
-        traversal.asAdmin().setStrategies(strategies);
-        traversal.asAdmin().applyStrategies();
-
-    }
-
-    @Test
-    public void doTest() {
-        applyIdentityRemovalStrategy(original);
-        assertEquals(optimized, original);
-    }
-
     @Parameterized.Parameters(name = "{0}")
     public static Iterable<Object[]> generateTestParameters() {
 
         return Arrays.asList(new Traversal[][]{
                 {__.identity(), __.identity()},
                 {__.identity().out(), __.out()},
+                {__.identity().out().identity().identity(), __.out()},
+                {__.match(__.as("a").out("knows").identity().as("b"),__.as("b").identity()).identity(), __.match(__.as("a").out("knows").as("b"),__.as("b"))},
+                {__.union(__.out().identity(), __.identity(), __.out()), __.union(__.out(), __.identity(), __.out())},
+                {__.choose(__.out().identity(), __.identity(), __.out("knows")), __.choose(__.out(), __.identity(), __.out("knows"))},
                 {__.identity().out().identity(), __.out()},
                 {__.identity().as("a").out().identity(), __.identity().as("a").out()},
                 {__.identity().as("a").out().identity().as("b"), __.identity().as("a").out().as("b")},
                 {__.identity().as("a").out().in().identity().identity().as("b").identity().out(), __.identity().as("a").out().in().as("b").out()},
                 {__.out().identity().as("a").out().in().identity().identity().as("b").identity().out(), __.out().as("a").out().in().as("b").out()},
+                {__.V(1, 2).local(union(outE().count(), inE().count(), (Traversal) outE().values("weight").sum())), __.V(1, 2).local(union(outE().count(), inE().count(), (Traversal) outE().values("weight").sum()))}
         });
     }
+
+    @Test
+    public void doTest() {
+        applyIdentityRemovalStrategy(original);
+        assertEquals(optimized, original);
+    }
+
+    private void applyIdentityRemovalStrategy(final Traversal traversal) {
+        final TraversalStrategies strategies = new DefaultTraversalStrategies();
+        strategies.addStrategies(IdentityRemovalStrategy.instance());
+        traversal.asAdmin().setStrategies(strategies);
+        traversal.asAdmin().applyStrategies();
+
+    }
 }
diff --git a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategyProcessTest.java b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategyProcessTest.java
index 3806f03..a7e955b 100644
--- a/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategyProcessTest.java
+++ b/gremlin-test/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/strategy/optimization/EarlyLimitStrategyProcessTest.java
@@ -35,8 +35,9 @@ import java.util.List;
 import java.util.Map;
 
 import static org.apache.tinkerpop.gremlin.LoadGraphWith.GraphData.GRATEFUL;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.is;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
 import static org.junit.Assume.assumeTrue;
 
 /**
@@ -92,10 +93,10 @@ public class EarlyLimitStrategyProcessTest extends AbstractGremlinProcessTest {
 
         if (t.asAdmin().getStrategies().getStrategy(EarlyLimitStrategy.class).isPresent()) {
             assertEquals(10, metrics.getMetrics().size());
-            assertTrue(metrics.getMetrics(5).getName().endsWith("@[d]"));
+            assertThat(metrics.getMetrics(5).getName().endsWith("@[d]"), is(true));
             assertEquals("RangeGlobalStep(0,1)", metrics.getMetrics(6).getName());
             assertEquals("PathStep@[e]", metrics.getMetrics(7).getName());
-            assertTrue(metrics.getMetrics(7).getCounts().values().stream().allMatch(x -> x == 1L));
+            assertThat(metrics.getMetrics(7).getCounts().values().stream().allMatch(x -> x == 1L), is(true));
         } else {
             assertEquals(11, metrics.getMetrics().size());
             assertEquals("RangeGlobalStep(0,5)@[d]", metrics.getMetrics(6).getName());
@@ -105,7 +106,7 @@ public class EarlyLimitStrategyProcessTest extends AbstractGremlinProcessTest {
             // forward pulling an extra traverser, so pretty sure the correct assertion is to match what happens with
             // EarlyLimitStrategy above. i'm not even sure there is a point to asserting this anymore as the original
             // intent does not appear clear to me, but i will leave it for now.
-            assertTrue(metrics.getMetrics(7).getCounts().values().stream().allMatch(x -> x == 1L));
+            assertThat(metrics.getMetrics(7).getCounts().values().stream().allMatch(x -> x == 1L), is(true));
         }
     }
 }
\ No newline at end of file