You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/06/11 15:49:32 UTC
[16/21] lucene-solr:branch_6x: LUCENE_6766: add missing first/last
tests
LUCENE_6766: add missing first/last tests
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/dc5c5126
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/dc5c5126
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/dc5c5126
Branch: refs/heads/branch_6x
Commit: dc5c5126a2fb6cb97c2d3883de4b72d08082e779
Parents: c26bb87
Author: Mike McCandless <mi...@apache.org>
Authored: Sun May 8 06:41:55 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sat Jun 11 11:48:40 2016 -0400
----------------------------------------------------------------------
.../lucene62/Lucene62SegmentInfoFormat.java | 2 +-
.../org/apache/lucene/index/MultiSorter.java | 12 +-
.../org/apache/lucene/index/SegmentMerger.java | 7 +
.../apache/lucene/index/TestIndexSorting.java | 381 ++++++++++++++++++-
4 files changed, 383 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dc5c5126/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
index 762b2c0..da19594 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
@@ -299,7 +299,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
break;
case FLOAT:
output.writeByte((byte) 1);
- output.writeLong(Float.floatToIntBits(((Float) missingValue).floatValue()));
+ output.writeInt(Float.floatToIntBits(((Float) missingValue).floatValue()));
break;
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dc5c5126/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
index 39ef8d8..1c67fd5 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
@@ -136,9 +136,9 @@ final class MultiSorter {
}
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
- missingOrd = Integer.MIN_VALUE;
- } else {
missingOrd = Integer.MAX_VALUE;
+ } else {
+ missingOrd = Integer.MIN_VALUE;
}
final int reverseMul;
@@ -180,10 +180,10 @@ final class MultiSorter {
reverseMul = 1;
}
- final int missingValue;
+ final long missingValue;
if (sortField.getMissingValue() != null) {
- missingValue = (Integer) sortField.getMissingValue();
+ missingValue = (Long) sortField.getMissingValue();
} else {
missingValue = 0;
}
@@ -193,14 +193,14 @@ final class MultiSorter {
public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
long valueA;
if (docsWithFields.get(readerIndexA).get(docIDA)) {
- valueA = (int) values.get(readerIndexA).get(docIDA);
+ valueA = values.get(readerIndexA).get(docIDA);
} else {
valueA = missingValue;
}
long valueB;
if (docsWithFields.get(readerIndexB).get(docIDB)) {
- valueB = (int) values.get(readerIndexB).get(docIDB);
+ valueB = values.get(readerIndexB).get(docIDB);
} else {
valueB = missingValue;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dc5c5126/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
index b0d9bcf..0cc1823 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -48,6 +48,8 @@ final class SegmentMerger {
final MergeState mergeState;
private final FieldInfos.Builder fieldInfosBuilder;
+ // nocommit make sure infoStream states per-segment-being-merged if they are already sorted
+
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
SegmentMerger(List<CodecReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir,
FieldInfos.FieldNumbers fieldNumbers, IOContext context) throws IOException {
@@ -59,6 +61,11 @@ final class SegmentMerger {
this.codec = segmentInfo.getCodec();
this.context = context;
this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers);
+ if (mergeState.infoStream.isEnabled("SM")) {
+ if (segmentInfo.getIndexSort() != null) {
+ mergeState.infoStream.message("SM", "index sort during merge: " + segmentInfo.getIndexSort());
+ }
+ }
}
/** True if any merging should happen */
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dc5c5126/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 278aadc..3eb30ec 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -118,6 +118,76 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
+ public void testMissingStringFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.STRING);
+ sortField.setMissingValue(SortField.STRING_FIRST);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("zzz")));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("mmm")));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ SortedDocValues values = leaf.getSortedDocValues("foo");
+ assertEquals(-1, values.getOrd(0));
+ assertEquals("mmm", values.get(1).utf8ToString());
+ assertEquals("zzz", values.get(2).utf8ToString());
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingStringLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.STRING);
+ sortField.setMissingValue(SortField.STRING_LAST);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("zzz")));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new SortedDocValuesField("foo", new BytesRef("mmm")));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ SortedDocValues values = leaf.getSortedDocValues("foo");
+ assertEquals("mmm", values.get(0).utf8ToString());
+ assertEquals("zzz", values.get(1).utf8ToString());
+ assertEquals(-1, values.getOrd(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
public void testBasicLong() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@@ -152,6 +222,80 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
+ public void testMissingLongFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.LONG);
+ sortField.setMissingValue(Long.valueOf(Long.MIN_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0, values.get(0));
+ assertFalse(docsWithField.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingLongLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.LONG);
+ sortField.setMissingValue(Long.valueOf(Long.MAX_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7, values.get(0));
+ assertEquals(18, values.get(1));
+ assertEquals(0, values.get(2));
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
public void testBasicInt() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@@ -186,6 +330,80 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
+ public void testMissingIntFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.INT);
+ sortField.setMissingValue(Integer.valueOf(Integer.MIN_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0, values.get(0));
+ assertFalse(docsWithField.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingIntLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.INT);
+ sortField.setMissingValue(Integer.valueOf(Integer.MAX_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7, values.get(0));
+ assertEquals(18, values.get(1));
+ assertEquals(0, values.get(2));
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
public void testBasicDouble() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@@ -220,6 +438,80 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
+ public void testMissingDoubleFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
+ sortField.setMissingValue(Double.NEGATIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 18.0));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 7.0));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0.0, Double.longBitsToDouble(values.get(0)), 0.0);
+ assertFalse(docsWithField.get(0));
+ assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
+ assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingDoubleLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
+ sortField.setMissingValue(Double.POSITIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 18.0));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 7.0));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7.0, Double.longBitsToDouble(values.get(0)), 0.0);
+ assertEquals(18.0, Double.longBitsToDouble(values.get(1)), 0.0);
+ assertEquals(0.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
public void testBasicFloat() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@@ -254,7 +546,82 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
- public void testSortOnMerge(boolean withDeletes) throws IOException {
+ public void testMissingFloatFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.FLOAT);
+ sortField.setMissingValue(Float.NEGATIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 18.0f));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 7.0f));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
+ assertFalse(docsWithField.get(0));
+ assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
+ assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingFloatLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.FLOAT);
+ sortField.setMissingValue(Float.POSITIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 18.0f));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 7.0f));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
+ assertEquals(18.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
+ assertEquals(0.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testRandom1() throws IOException {
+ boolean withDeletes = random().nextBoolean();
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
@@ -323,14 +690,6 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
- public void testSortOnMerge() throws IOException {
- testSortOnMerge(false);
- }
-
- public void testSortOnMergeWithDeletes() throws IOException {
- testSortOnMerge(true);
- }
-
static class UpdateRunnable implements Runnable {
private final int numDocs;
@@ -666,9 +1025,7 @@ public class TestIndexSorting extends LuceneTestCase {
}
}
- // nocommit testrandom1 with deletions
-
- public void testRandom1() throws Exception {
+ public void testRandom2() throws Exception {
int numDocs = atLeast(100);
FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);