You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tinkerpop.apache.org by ok...@apache.org on 2016/01/19 21:31:39 UTC
incubator-tinkerpop git commit: Fixed a very inefficient
implementation of SampleLocalStep. Added some nice test cases to
SampleLocalStepTest and SampleGlobalStepTest. I think we should use the
XXXStepTest methods more for checking things and not the XXX
Repository: incubator-tinkerpop
Updated Branches:
refs/heads/master 9eddd6a3a -> d54d666a6
Fixed a very inefficient implementation of SampleLocalStep. Added some nice test cases to SampleLocalStepTest and SampleGlobalStepTest. I think we should use the XXXStepTest methods more for checking things and not the XXXTests which are way more involved. CTR.
Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/d54d666a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/d54d666a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/d54d666a
Branch: refs/heads/master
Commit: d54d666a642bbd94d3240bfabcce909043336470
Parents: 9eddd6a
Author: Marko A. Rodriguez <ok...@gmail.com>
Authored: Tue Jan 19 13:31:44 2016 -0700
Committer: Marko A. Rodriguez <ok...@gmail.com>
Committed: Tue Jan 19 13:31:44 2016 -0700
----------------------------------------------------------------------
CHANGELOG.asciidoc | 1 +
.../traversal/step/map/SampleLocalStep.java | 68 ++++++++------------
.../step/filter/SampleGlobalStepTest.java | 22 +++++++
.../traversal/step/map/SampleLocalStepTest.java | 21 ++++++
4 files changed, 70 insertions(+), 42 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/d54d666a/CHANGELOG.asciidoc
----------------------------------------------------------------------
diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc
index 0a14e50..c3846b0 100644
--- a/CHANGELOG.asciidoc
+++ b/CHANGELOG.asciidoc
@@ -26,6 +26,7 @@ image::https://raw.githubusercontent.com/apache/incubator-tinkerpop/master/docs/
TinkerPop 3.1.1 (NOT OFFICIALLY RELEASED YET)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+* Optimized a very inefficient implementation of `SampleLocalStep`.
* Reduced the complexity and execution time of all `AbstractLambdaTraversal` instances.
* `DefaultTraversal` has a well defined `hashCode()` and `equals()`.
* Added serializers to Gryo for `java.time` related classes.
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/d54d666a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStep.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStep.java b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStep.java
index fc67575..5f83c53 100644
--- a/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStep.java
+++ b/gremlin-core/src/main/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStep.java
@@ -21,14 +21,20 @@ package org.apache.tinkerpop.gremlin.process.traversal.step.map;
import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
import org.apache.tinkerpop.gremlin.process.traversal.Traverser;
-import org.apache.tinkerpop.gremlin.process.traversal.step.util.BulkSet;
import org.apache.tinkerpop.gremlin.process.traversal.traverser.TraverserRequirement;
-import org.apache.tinkerpop.gremlin.process.traversal.util.FastNoSuchElementException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
/**
* @author Daniel Kuppitz (http://gremlin.guru)
+ * @author Marko A. Rodriguez (http://markorodriguez.com)
*/
public final class SampleLocalStep<S> extends MapStep<S, S> {
@@ -48,56 +54,34 @@ public final class SampleLocalStep<S> extends MapStep<S, S> {
return mapMap((Map) start);
} else if (start instanceof Collection) {
return mapCollection((Collection) start);
- } else if (RANDOM.nextDouble() > 0.5d) {
- throw FastNoSuchElementException.instance();
+ } else {
+ return start;
}
- return start;
}
private S mapCollection(final Collection collection) {
- final int size = collection.size();
- if (size <= this.amountToSample) {
+ if (collection.size() <= this.amountToSample)
return (S) collection;
+
+ final List<S> original = new ArrayList<>(collection);
+ final List<S> target = new ArrayList<>();
+ while (target.size() < this.amountToSample) {
+ target.add(original.remove(RANDOM.nextInt(original.size())));
}
- final double individualWeight = 1.0d / size;
- final Collection result = new BulkSet();
- double runningWeight = 0.0d;
- int runningAmountToSample = 0;
- while (runningAmountToSample < this.amountToSample) {
- for (final Object item : collection) {
- runningWeight = runningWeight + individualWeight;
- if (RANDOM.nextDouble() <= (runningWeight / size)) {
- result.add(item);
- if (++runningAmountToSample == this.amountToSample) {
- break;
- }
- }
- }
- }
- return (S) result;
+ return (S) target;
}
private S mapMap(final Map map) {
- final int size = map.size();
- if (size <= this.amountToSample) {
+ if (map.size() <= this.amountToSample)
return (S) map;
+
+ final List<Map.Entry> original = new ArrayList<>(map.entrySet());
+ final Map target = new LinkedHashMap<>(this.amountToSample);
+ while (target.size() < this.amountToSample) {
+ final Map.Entry entry = original.remove(RANDOM.nextInt(original.size()));
+ target.put(entry.getKey(), entry.getValue());
}
- final double individualWeight = 1.0d / size;
- final Map result = new HashMap(this.amountToSample);
- double runningWeight = 0.0d;
- while (result.size() < this.amountToSample) {
- for (final Object obj : map.entrySet()) {
- runningWeight = runningWeight + individualWeight;
- final Map.Entry entry = (Map.Entry) obj;
- if (RANDOM.nextDouble() <= (runningWeight / size)) {
- result.put(entry.getKey(), entry.getValue());
- if (result.size() == this.amountToSample) {
- break;
- }
- }
- }
- }
- return (S) result;
+ return (S) target;
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/d54d666a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/SampleGlobalStepTest.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/SampleGlobalStepTest.java b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/SampleGlobalStepTest.java
index 0721e82..3d032ee 100644
--- a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/SampleGlobalStepTest.java
+++ b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/filter/SampleGlobalStepTest.java
@@ -18,13 +18,20 @@
*/
package org.apache.tinkerpop.gremlin.process.traversal.step.filter;
+import org.apache.tinkerpop.gremlin.process.traversal.Scope;
import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import org.apache.tinkerpop.gremlin.process.traversal.step.StepTest;
+import org.junit.Test;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashSet;
import java.util.List;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
/**
* @author Daniel Kuppitz (http://gremlin.guru)
*/
@@ -37,4 +44,19 @@ public class SampleGlobalStepTest extends StepTest {
__.sample(10)
);
}
+
+ @Test
+ public void shouldSelectSubsetsCorrectly() {
+ final List<Integer> list = new ArrayList<>();
+ for (int i = 0; i < 100; i++) {
+ list.add(i);
+ }
+ for(int i=0; i<100; i++) {
+ List<Integer> result;
+ result = (List) __.inject(list).unfold().sample(i).toList();
+ assertEquals(i, result.size());
+ assertEquals(i, new HashSet<>(result).size());
+ assertTrue(list.containsAll(result));
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/d54d666a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStepTest.java
----------------------------------------------------------------------
diff --git a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStepTest.java b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStepTest.java
index 91e20db..b9dc5a6 100644
--- a/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStepTest.java
+++ b/gremlin-core/src/test/java/org/apache/tinkerpop/gremlin/process/traversal/step/map/SampleLocalStepTest.java
@@ -22,10 +22,16 @@ import org.apache.tinkerpop.gremlin.process.traversal.Scope;
import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import org.apache.tinkerpop.gremlin.process.traversal.step.StepTest;
+import org.junit.Test;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashSet;
import java.util.List;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
/**
* @author Daniel Kuppitz (http://gremlin.guru)
*/
@@ -38,4 +44,19 @@ public class SampleLocalStepTest extends StepTest {
__.sample(Scope.local, 10)
);
}
+
+ @Test
+ public void shouldSelectSubsetsCorrectly() {
+ final List<Integer> list = new ArrayList<>();
+ for (int i = 0; i < 100; i++) {
+ list.add(i);
+ }
+ for(int i=0; i<100; i++) {
+ List<Integer> result;
+ result = __.inject(list).sample(Scope.local, i).next();
+ assertEquals(i, result.size());
+ assertEquals(i, new HashSet<>(result).size());
+ assertTrue(list.containsAll(result));
+ }
+ }
}