You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by ok...@apache.org on 2016/01/19 21:31:39 UTC

incubator-tinkerpop git commit: Fixed a very inefficient implementation of SampleLocalStep. Added some nice test cases to SampleLocalStepTest and SampleGlobalStepTest. I think we should use the XXXStepTest methods more for checking things and not the XXX

Repository: incubator-tinkerpop
Updated Branches:
  refs/heads/master 9eddd6a3a -> d54d666a6


Fixed a very inefficient implementation of SampleLocalStep. Added some nice test cases to SampleLocalStepTest and SampleGlobalStepTest. I think we should use the XXXStepTest methods more for checking things and not the XXXTests which are way more involved. CTR.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/d54d666a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/d54d666a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/d54d666a

Branch: refs/heads/master
Commit: d54d666a642bbd94d3240bfabcce909043336470
Parents: 9eddd6a
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Jan 19 13:31:44 2016 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Jan 19 13:31:44 2016 -0700

----------------------------------------------------------------------
 CHANGELOG.asciidoc                              |  1 +
 .../traversal/step/map/SampleLocalStep.java     | 68 ++++++++------------
 .../step/filter/SampleGlobalStepTest.java       | 22 +++++++
 .../traversal/step/map/SampleLocalStepTest.java | 21 ++++++
 4 files changed, 70 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/d54d666a/CHANGELOG.asciidoc
----------------------------------------------------------------------
diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc
index 0a14e50..c3846b0 100644
--- a/CHANGELOG.asciidoc
+++ b/CHANGELOG.asciidoc
@@ -26,6 +26,7 @@ image::https://raw.githubusercontent.com/apache/incubator-tinkerpop/master/docs/
 TinkerPop 3.1.1 (NOT OFFICIALLY RELEASED YET)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+* Optimized a very inefficient implementation of `SampleLocalStep`.
 * Reduced the complexity and execution time of all `AbstractLambdaTraversal` instances.
 * `DefaultTraversal` has a well defined `hashCode()` and `equals()`.
 * Added serializers to Gryo for `java.time` related classes.

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/d54d666a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStep.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStep.java
index fc67575..5f83c53 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStep.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStep.java
@@ -21,14 +21,20 @@ package org.apache.tinkerpop.gremlin.process.traversal.step.map;
 
 import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
 import org.apache.tinkerpop.gremlin.process.traversal.Traverser;
-import org.apache.tinkerpop.gremlin.process.traversal.step.util.BulkSet;
 import org.apache.tinkerpop.gremlin.process.traversal.traverser.TraverserRequirement;
-import org.apache.tinkerpop.gremlin.process.traversal.util.FastNoSuchElementException;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
 
 /**
  * @author Daniel Kuppitz (http://gremlin.guru)
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
  */
 public final class SampleLocalStep<S> extends MapStep<S, S> {
 
@@ -48,56 +54,34 @@ public final class SampleLocalStep<S> extends MapStep<S, S> {
             return mapMap((Map) start);
         } else if (start instanceof Collection) {
             return mapCollection((Collection) start);
-        } else if (RANDOM.nextDouble() > 0.5d) {
-            throw FastNoSuchElementException.instance();
+        } else {
+            return start;
         }
-        return start;
     }
 
     private S mapCollection(final Collection collection) {
-        final int size = collection.size();
-        if (size <= this.amountToSample) {
+        if (collection.size() <= this.amountToSample)
             return (S) collection;
+
+        final List<S> original = new ArrayList<>(collection);
+        final List<S> target = new ArrayList<>();
+        while (target.size() < this.amountToSample) {
+            target.add(original.remove(RANDOM.nextInt(original.size())));
         }
-        final double individualWeight = 1.0d / size;
-        final Collection result = new BulkSet();
-        double runningWeight = 0.0d;
-        int runningAmountToSample = 0;
-        while (runningAmountToSample < this.amountToSample) {
-            for (final Object item : collection) {
-                runningWeight = runningWeight + individualWeight;
-                if (RANDOM.nextDouble() <= (runningWeight / size)) {
-                    result.add(item);
-                    if (++runningAmountToSample == this.amountToSample) {
-                        break;
-                    }
-                }
-            }
-        }
-        return (S) result;
+        return (S) target;
     }
 
     private S mapMap(final Map map) {
-        final int size = map.size();
-        if (size <= this.amountToSample) {
+        if (map.size() <= this.amountToSample)
             return (S) map;
+
+        final List<Map.Entry> original = new ArrayList<>(map.entrySet());
+        final Map target = new LinkedHashMap<>(this.amountToSample);
+        while (target.size() < this.amountToSample) {
+            final Map.Entry entry = original.remove(RANDOM.nextInt(original.size()));
+            target.put(entry.getKey(), entry.getValue());
         }
-        final double individualWeight = 1.0d / size;
-        final Map result = new HashMap(this.amountToSample);
-        double runningWeight = 0.0d;
-        while (result.size() < this.amountToSample) {
-            for (final Object obj : map.entrySet()) {
-                runningWeight = runningWeight + individualWeight;
-                final Map.Entry entry = (Map.Entry) obj;
-                if (RANDOM.nextDouble() <= (runningWeight / size)) {
-                    result.put(entry.getKey(), entry.getValue());
-                    if (result.size() == this.amountToSample) {
-                        break;
-                    }
-                }
-            }
-        }
-        return (S) result;
+        return (S) target;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/d54d666a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/SampleGlobalStepTest.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/SampleGlobalStepTest.java b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/SampleGlobalStepTest.java
index 0721e82..3d032ee 100644
--- a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/SampleGlobalStepTest.java
+++ b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/SampleGlobalStepTest.java
@@ -18,13 +18,20 @@
  */
 package org.apache.tinkerpop.gremlin.process.traversal.step.filter;
 
+import org.apache.tinkerpop.gremlin.process.traversal.Scope;
 import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
 import org.apache.tinkerpop.gremlin.process.traversal.step.StepTest;
+import org.junit.Test;
 
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.List;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 /**
  * @author Daniel Kuppitz (http://gremlin.guru)
  */
@@ -37,4 +44,19 @@ public class SampleGlobalStepTest extends StepTest {
                 __.sample(10)
         );
     }
+
+    @Test
+    public void shouldSelectSubsetsCorrectly() {
+        final List<Integer> list = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            list.add(i);
+        }
+        for(int i=0; i<100; i++) {
+            List<Integer> result;
+            result = (List) __.inject(list).unfold().sample(i).toList();
+            assertEquals(i, result.size());
+            assertEquals(i, new HashSet<>(result).size());
+            assertTrue(list.containsAll(result));
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/d54d666a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStepTest.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStepTest.java b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStepTest.java
index 91e20db..b9dc5a6 100644
--- a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStepTest.java
+++ b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStepTest.java
@@ -22,10 +22,16 @@ import org.apache.tinkerpop.gremlin.process.traversal.Scope;
 import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
 import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
 import org.apache.tinkerpop.gremlin.process.traversal.step.StepTest;
+import org.junit.Test;
 
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.List;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 /**
  * @author Daniel Kuppitz (http://gremlin.guru)
  */
@@ -38,4 +44,19 @@ public class SampleLocalStepTest extends StepTest {
                 __.sample(Scope.local, 10)
         );
     }
+
+    @Test
+    public void shouldSelectSubsetsCorrectly() {
+        final List<Integer> list = new ArrayList<>();
+        for (int i = 0; i < 100; i++) {
+            list.add(i);
+        }
+        for(int i=0; i<100; i++) {
+            List<Integer> result;
+            result = __.inject(list).sample(Scope.local, i).next();
+            assertEquals(i, result.size());
+            assertEquals(i, new HashSet<>(result).size());
+            assertTrue(list.containsAll(result));
+        }
+    }
 }