You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/03/25 20:41:31 UTC
[1/2] lucene-solr:branch_6_0: RandomCodec: remove extra BKDWriter
params
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6_0 5733f05c7 -> 00d0b52d4
RandomCodec: remove extra BKDWriter params
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/00d0b52d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/00d0b52d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/00d0b52d
Branch: refs/heads/branch_6_0
Commit: 00d0b52d4162436e1a45b2311198de0ca13fe442
Parents: 2590551
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Mar 25 15:42:54 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Mar 25 15:43:01 2016 -0400
----------------------------------------------------------------------
.../src/java/org/apache/lucene/index/RandomCodec.java | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/00d0b52d/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
index af0aade..81f1d4e 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
@@ -119,8 +119,6 @@ public class RandomCodec extends AssertingCodec {
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode,
maxMBSortInHeap,
- values.size(fieldInfo.name),
- singleValuePerDoc,
bkdSplitRandomSeed ^ fieldInfo.name.hashCode())) {
values.intersect(fieldInfo.name, new IntersectVisitor() {
@Override
@@ -276,8 +274,8 @@ public class RandomCodec extends AssertingCodec {
public RandomlySplittingBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims,
int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap,
- long totalPointCount, boolean singleValuePerDoc, int randomSeed) throws IOException {
- super(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount, singleValuePerDoc);
+ int randomSeed) throws IOException {
+ super(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, maxPointsInLeafNode, maxMBSortInHeap);
this.random = new Random(randomSeed);
}
[2/2] lucene-solr:branch_6_0: randomize how BKDWriter splits in
RandomCodec so we exercise geo shape APIs with more exotic rectangles
Posted by mi...@apache.org.
randomize how BKDWriter splits in RandomCodec so we exercise geo shape APIs with more exotic rectangles
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/25905517
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/25905517
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/25905517
Branch: refs/heads/branch_6_0
Commit: 259055176c16ca51da89058004b9defd2b526126
Parents: 5733f05
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Mar 25 15:40:16 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Mar 25 15:43:01 2016 -0400
----------------------------------------------------------------------
.../codecs/lucene60/Lucene60PointsWriter.java | 4 +-
.../org/apache/lucene/util/bkd/BKDWriter.java | 3 +-
.../org/apache/lucene/index/RandomCodec.java | 68 +++++++++++++++++++-
3 files changed, 70 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/25905517/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
index 3d09c45..de98701 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsWriter.java
@@ -42,8 +42,8 @@ import org.apache.lucene.util.bkd.BKDWriter;
/** Writes dimensional values */
public class Lucene60PointsWriter extends PointsWriter implements Closeable {
- final IndexOutput dataOut;
- final Map<String,Long> indexFPs = new HashMap<>();
+ protected final IndexOutput dataOut;
+ protected final Map<String,Long> indexFPs = new HashMap<>();
final SegmentWriteState writeState;
final int maxPointsInLeafNode;
final double maxMBSortInHeap;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/25905517/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index e03b0d7..c9c40c7 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -997,8 +997,7 @@ public class BKDWriter implements Closeable {
return true;
}
- // TODO: make this protected when we want to subclass to play with different splitting criteria
- private int split(byte[] minPackedValue, byte[] maxPackedValue) {
+ protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
// Find which dim has the largest span so we can split on it:
int splitDim = -1;
for(int dim=0;dim<numDims;dim++) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/25905517/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
index 662001e..af0aade 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
@@ -52,8 +52,11 @@ import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
+import org.apache.lucene.index.PointValues.IntersectVisitor;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
+import org.apache.lucene.util.bkd.BKDWriter;
/**
* Codec that assigns per-field random postings formats.
@@ -93,13 +96,55 @@ public class RandomCodec extends AssertingCodec {
// TODO: improve how we randomize this...
private final int maxPointsInLeafNode;
private final double maxMBSortInHeap;
+ private final int bkdSplitRandomSeed;
@Override
public PointsFormat pointsFormat() {
return new AssertingPointsFormat(new PointsFormat() {
@Override
public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
- return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
+
+ // Randomize how BKDWriter chooses its splis:
+
+ return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap) {
+ @Override
+ public void writeField(FieldInfo fieldInfo, PointsReader values) throws IOException {
+
+ boolean singleValuePerDoc = values.size(fieldInfo.name) == values.getDocCount(fieldInfo.name);
+
+ try (BKDWriter writer = new RandomlySplittingBKDWriter(writeState.segmentInfo.maxDoc(),
+ writeState.directory,
+ writeState.segmentInfo.name,
+ fieldInfo.getPointDimensionCount(),
+ fieldInfo.getPointNumBytes(),
+ maxPointsInLeafNode,
+ maxMBSortInHeap,
+ values.size(fieldInfo.name),
+ singleValuePerDoc,
+ bkdSplitRandomSeed ^ fieldInfo.name.hashCode())) {
+ values.intersect(fieldInfo.name, new IntersectVisitor() {
+ @Override
+ public void visit(int docID) {
+ throw new IllegalStateException();
+ }
+
+ public void visit(int docID, byte[] packedValue) throws IOException {
+ writer.add(packedValue, docID);
+ }
+
+ @Override
+ public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+ return PointValues.Relation.CELL_CROSSES_QUERY;
+ }
+ });
+
+ // We could have 0 points on merge since all docs with dimensional fields may be deleted:
+ if (writer.getPointCount() > 0) {
+ indexFPs.put(fieldInfo.name, writer.finish(dataOut));
+ }
+ }
+ }
+ };
}
@Override
@@ -152,6 +197,7 @@ public class RandomCodec extends AssertingCodec {
maxPointsInLeafNode = TestUtil.nextInt(random, 16, 2048);
maxMBSortInHeap = 4.0 + (3*random.nextDouble());
+ bkdSplitRandomSeed = random.nextInt();
add(avoidCodecs,
TestUtil.getDefaultPostingsFormat(minItemsPerBlock, maxItemsPerBlock),
@@ -221,4 +267,24 @@ public class RandomCodec extends AssertingCodec {
", maxPointsInLeafNode=" + maxPointsInLeafNode +
", maxMBSortInHeap=" + maxMBSortInHeap;
}
+
+ /** Just like {@link BKDWriter} except it evilly picks random ways to split cells on
+ * recursion to try to provoke geo APIs that get upset at fun rectangles. */
+ private static class RandomlySplittingBKDWriter extends BKDWriter {
+
+ final Random random;
+
+ public RandomlySplittingBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims,
+ int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap,
+ long totalPointCount, boolean singleValuePerDoc, int randomSeed) throws IOException {
+ super(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount, singleValuePerDoc);
+ this.random = new Random(randomSeed);
+ }
+
+ @Override
+ protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
+ // BKD normally defaults by the widest dimension, to try to make as squarish cells as possible, but we just pick a random one ;)
+ return random.nextInt(numDims);
+ }
+ }
}