You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/11/16 15:37:56 UTC
[1/3] lucene-solr:branch_6x: LUCENE-7537: fix some 6.x backport issues
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x 696238118 -> 64b9eefaa
LUCENE-7537: fix some 6.x backport issues
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/64b9eefa
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/64b9eefa
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/64b9eefa
Branch: refs/heads/branch_6x
Commit: 64b9eefaa931b4fc8b2345e2307eff4a317e3450
Parents: e357f95
Author: Mike McCandless <mi...@apache.org>
Authored: Wed Nov 16 10:35:50 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Wed Nov 16 10:37:02 2016 -0500
----------------------------------------------------------------------
.../apache/lucene/codecs/DocValuesConsumer.java | 22 +++++++++++----
.../org/apache/lucene/index/MultiDocValues.java | 29 +++++++++++++++++++-
.../org/apache/lucene/index/MultiSorter.java | 11 +++++---
.../apache/lucene/index/TestIndexSorting.java | 27 ++++++++++++++----
.../asserting/AssertingDocValuesFormat.java | 2 +-
5 files changed, 74 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b9eefa/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
index 427b520..defe438 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@@ -221,21 +221,26 @@ public abstract class DocValuesConsumer implements Closeable {
mergeSortedSetField(mergeFieldInfo, mergeState, toMerge);
} else if (type == DocValuesType.SORTED_NUMERIC) {
List<SortedNumericDocValues> toMerge = new ArrayList<>();
+ List<SortedNumericDocValues> toMerge2 = new ArrayList<>();
for (int i=0;i<mergeState.docValuesProducers.length;i++) {
SortedNumericDocValues values = null;
+ SortedNumericDocValues values2 = null;
DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i];
if (docValuesProducer != null) {
FieldInfo fieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name);
if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.SORTED_NUMERIC) {
values = docValuesProducer.getSortedNumeric(fieldInfo);
+ values2 = docValuesProducer.getSortedNumeric(fieldInfo);
}
}
if (values == null) {
values = DocValues.emptySortedNumeric(mergeState.maxDocs[i]);
+ values2 = values;
}
toMerge.add(values);
+ toMerge2.add(values2);
}
- mergeSortedNumericField(mergeFieldInfo, mergeState, toMerge);
+ mergeSortedNumericField(mergeFieldInfo, mergeState, toMerge, toMerge2);
} else {
throw new AssertionError("type=" + type);
}
@@ -445,6 +450,11 @@ public abstract class DocValuesConsumer implements Closeable {
return docID;
}
}
+
+ @Override
+ public String toString() {
+ return "SortedNumericDocValuesSub values=" + values + " docID=" + docID + " mappedDocID=" + mappedDocID;
+ }
}
/**
@@ -452,8 +462,10 @@ public abstract class DocValuesConsumer implements Closeable {
* <p>
* The default implementation calls {@link #addSortedNumericField}, passing
* iterables that filter deleted documents.
+ * <p>
+ * We require two <code>toMerge</code> lists because we need to separately iterate the values for each segment concurrently.
*/
- public void mergeSortedNumericField(FieldInfo fieldInfo, final MergeState mergeState, List<SortedNumericDocValues> toMerge) throws IOException {
+ public void mergeSortedNumericField(FieldInfo fieldInfo, final MergeState mergeState, List<SortedNumericDocValues> toMerge, List<SortedNumericDocValues> toMerge2) throws IOException {
addSortedNumericField(fieldInfo,
// doc -> value count
@@ -514,9 +526,9 @@ public abstract class DocValuesConsumer implements Closeable {
public Iterator<Number> iterator() {
// We must make a new DocIDMerger for each iterator:
List<SortedNumericDocValuesSub> subs = new ArrayList<>();
- assert mergeState.docMaps.length == toMerge.size();
- for(int i=0;i<toMerge.size();i++) {
- subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], toMerge.get(i), mergeState.maxDocs[i]));
+ assert mergeState.docMaps.length == toMerge2.size();
+ for(int i=0;i<toMerge2.size();i++) {
+ subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], toMerge2.get(i), mergeState.maxDocs[i]));
}
final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = new DocIDMerger<>(subs, mergeState.segmentInfo.getIndexSort() != null);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b9eefa/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
index af4dcfc..8fa4a59 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
@@ -143,7 +143,7 @@ public class MultiDocValues {
};
}
}
-
+
/** Returns a Bits for a reader's docsWithField (potentially merging on-the-fly)
* <p>
* This is a slow way to access this bitset. Instead, access them per-segment
@@ -330,6 +330,33 @@ public class MultiDocValues {
return new MultiSortedDocValues(values, starts, mapping);
}
}
+
+ /** Expert: returns a SortedDocValues from an array of leaf reader's sorted doc values (potentially doing extremely slow things).
+ * <p>
+ * This is an extremely slow way to access sorted values. Instead, access them per-segment
+ * with {@link LeafReader#getSortedDocValues(String)}
+ * </p>
+ */
+ public static SortedDocValues getSortedValues(IndexReader r, final SortedDocValues[] leafValues, final int[] docStarts) throws IOException {
+ final List<LeafReaderContext> leaves = r.leaves();
+ final int size = leaves.size();
+
+ if (leafValues.length != size) {
+ throw new IllegalArgumentException("leafValues must match the number of leaves; got leafValues.length=" + leafValues.length + " vs leaves.size()=" + leaves.size());
+ }
+ if (docStarts.length != size+1) {
+ throw new IllegalArgumentException("docStarts must match the number of leaves, plus one; got docStarts.length=" + docStarts.length + " vs leaves.size()=" + leaves.size());
+ }
+
+ if (leafValues.length == 0) {
+ return null;
+ } else if (leafValues.length == 1) {
+ return leafValues[0];
+ }
+
+ OrdinalMap mapping = OrdinalMap.build(r.getCoreCacheKey(), leafValues, PackedInts.DEFAULT);
+ return new MultiSortedDocValues(leafValues, docStarts, mapping);
+ }
/** Returns a SortedSetDocValues for a reader's docvalues (potentially doing extremely slow things).
* <p>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b9eefa/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
index 7c2c3be..4e461e3 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
@@ -128,18 +128,21 @@ final class MultiSorter {
final int reverseMul = sortField.getReverse() ? -1 : 1;
final SortField.Type sortType = Sorter.getSortFieldType(sortField);
- switch(sortField.getType()) {
+ switch(sortType) {
case STRING:
{
// this uses the efficient segment-local ordinal map:
MultiReader multiReader = new MultiReader(readers.toArray(new LeafReader[readers.size()]));
- final SortedDocValues sorted = MultiDocValues.getSortedValues(multiReader, sortField.getField());
- final int[] docStarts = new int[readers.size()];
+ final int[] docStarts = new int[readers.size()+1];
List<LeafReaderContext> leaves = multiReader.leaves();
+ final SortedDocValues[] leafValues = new SortedDocValues[readers.size()];
for(int i=0;i<readers.size();i++) {
+ leafValues[i] = Sorter.getOrWrapSorted(readers.get(i), sortField);
docStarts[i] = leaves.get(i).docBase;
}
+ docStarts[readers.size()] = multiReader.maxDoc();
+ final SortedDocValues sorted = MultiDocValues.getSortedValues(multiReader, leafValues, docStarts);
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE;
@@ -176,7 +179,7 @@ final class MultiSorter {
if (sortField.getMissingValue() != null) {
missingValue = (Long) sortField.getMissingValue();
} else {
- missingValue = 0;
+ missingValue = 0l;
}
return new CrossReaderComparator() {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b9eefa/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index e25558d..ae8756b 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -411,6 +411,9 @@ public class TestIndexSorting extends LuceneTestCase {
assertFalse(docsWithField.get(0));
assertEquals(7, values.get(1));
assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
}
public void testMissingMultiValuedLongFirst() throws Exception {
@@ -1016,9 +1019,9 @@ public class TestIndexSorting extends LuceneTestCase {
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
- assertEquals(-1.0, Double.longBitsToDouble(values.get(0)), 0.0);
- assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
- assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ assertEquals(-1.0, Float.intBitsToFloat((int) values.get(0)), 0.0);
+ assertEquals(7.0, Float.intBitsToFloat((int) values.get(1)), 0.0);
+ assertEquals(18.0, Float.intBitsToFloat((int) values.get(2)), 0.0);
r.close();
w.close();
dir.close();
@@ -1298,11 +1301,21 @@ public class TestIndexSorting extends LuceneTestCase {
IndexWriter w = new IndexWriter(dir, iwc);
final int numDocs = atLeast(1000);
final FixedBitSet deleted = new FixedBitSet(numDocs);
+ if (VERBOSE) {
+ System.out.println("TEST: " + numDocs + " docs");
+ }
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
int num = random().nextInt(10);
+ if (VERBOSE) {
+ System.out.println("doc id=" + i + " count=" + num);
+ }
for (int j = 0; j < num; j++) {
- doc.add(new SortedNumericDocValuesField("foo", random().nextInt(2000)));
+ int n = random().nextInt(2000);
+ if (VERBOSE) {
+ System.out.println(" " + n);
+ }
+ doc.add(new SortedNumericDocValuesField("foo", n));
}
doc.add(new StringField("id", Integer.toString(i), Store.YES));
doc.add(new NumericDocValuesField("id", i));
@@ -1315,6 +1328,9 @@ public class TestIndexSorting extends LuceneTestCase {
final int id = TestUtil.nextInt(random(), 0, i);
deleted.set(id);
w.deleteDocuments(new Term("id", Integer.toString(id)));
+ if (VERBOSE) {
+ System.out.println(" delete doc id=" + id);
+ }
}
}
@@ -1329,8 +1345,7 @@ public class TestIndexSorting extends LuceneTestCase {
} else {
assertEquals(1, topDocs.totalHits);
NumericDocValues values = MultiDocValues.getNumericValues(reader, "id");
- assertEquals(topDocs.scoreDocs[0].doc, values.advance(topDocs.scoreDocs[0].doc));
- assertEquals(i, values.longValue());
+ assertEquals(i, MultiDocValues.getNumericValues(reader, "id").get(topDocs.scoreDocs[0].doc));
Document document = reader.document(topDocs.scoreDocs[0].doc);
assertEquals(Integer.toString(i), document.get("id"));
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/64b9eefa/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
index e0af9a1..0a54e2e 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingDocValuesFormat.java
@@ -149,7 +149,7 @@ public class AssertingDocValuesFormat extends DocValuesFormat {
Number next = valueIterator.next();
assert next != null;
long nextValue = next.longValue();
- assert nextValue >= previous;
+ assert nextValue >= previous: "nextValue=" + nextValue + " vs previous=" + previous;
previous = nextValue;
}
}
[2/3] lucene-solr:branch_6x: LUCENE-7537: Index time sorting now
supports multi-valued sorts using selectors (MIN, MAX, etc.)
Posted by mi...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 3c457f3..e25558d 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -61,6 +61,8 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.TopDocs;
@@ -111,6 +113,46 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
+ public void testBasicMultiValuedString() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortedSetSortField("foo", false));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("id", 3));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("zzz")));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 1));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("aaa")));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("zzz")));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("bcg")));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("mmm")));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("pppp")));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1l, values.get(0));
+ assertEquals(2l, values.get(1));
+ assertEquals(3l, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
public void testMissingStringFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@@ -146,6 +188,48 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
+ public void testMissingMultiValuedStringFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortedSetSortField("foo", false);
+ sortField.setMissingValue(SortField.STRING_FIRST);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("id", 3));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("zzz")));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("zzza")));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("zzzd")));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 1));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("mmm")));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("nnnn")));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1l, values.get(0));
+ assertEquals(2l, values.get(1));
+ assertEquals(3l, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
public void testMissingStringLast() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@@ -181,6 +265,47 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
+ public void testMissingMultiValuedStringLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortedSetSortField("foo", false);
+ sortField.setMissingValue(SortField.STRING_LAST);
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("zzz")));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("zzzd")));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 3));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 1));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("mmm")));
+ doc.add(new SortedSetDocValuesField("foo", new BytesRef("ppp")));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1l, values.get(0));
+ assertEquals(2l, values.get(1));
+ assertEquals(3l, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
public void testBasicLong() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@@ -215,6 +340,45 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
+ public void testBasicMultiValuedLong() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.LONG));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("id", 3));
+ doc.add(new SortedNumericDocValuesField("foo", 18));
+ doc.add(new SortedNumericDocValuesField("foo", 35));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 1));
+ doc.add(new SortedNumericDocValuesField("foo", -1));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedNumericDocValuesField("foo", 7));
+ doc.add(new SortedNumericDocValuesField("foo", 22));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1, values.get(0));
+ assertEquals(2, values.get(1));
+ assertEquals(3, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
public void testMissingLongFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
@@ -247,21 +411,439 @@ public class TestIndexSorting extends LuceneTestCase {
assertFalse(docsWithField.get(0));
assertEquals(7, values.get(1));
assertEquals(18, values.get(2));
+ }
+
+ public void testMissingMultiValuedLongFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortedNumericSortField("foo", SortField.Type.LONG);
+ sortField.setMissingValue(Long.valueOf(Long.MIN_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("id", 3));
+ doc.add(new SortedNumericDocValuesField("foo", 18));
+ doc.add(new SortedNumericDocValuesField("foo", 27));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 1));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedNumericDocValuesField("foo", 7));
+ doc.add(new SortedNumericDocValuesField("foo", 24));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1, values.get(0));
+ assertEquals(2, values.get(1));
+ assertEquals(3, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingLongLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.LONG);
+ sortField.setMissingValue(Long.valueOf(Long.MAX_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(7, values.get(0));
+ assertEquals(18, values.get(1));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingMultiValuedLongLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortedNumericSortField("foo", SortField.Type.LONG);
+ sortField.setMissingValue(Long.valueOf(Long.MAX_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedNumericDocValuesField("foo", 18));
+ doc.add(new SortedNumericDocValuesField("foo", 65));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 3));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 1));
+ doc.add(new SortedNumericDocValuesField("foo", 7));
+ doc.add(new SortedNumericDocValuesField("foo", 34));
+ doc.add(new SortedNumericDocValuesField("foo", 74));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1, values.get(0));
+ assertEquals(2, values.get(1));
+ assertEquals(3, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicInt() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", -1));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1, values.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicMultiValuedInt() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.INT));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("id", 3));
+ doc.add(new SortedNumericDocValuesField("foo", 18));
+ doc.add(new SortedNumericDocValuesField("foo", 34));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 1));
+ doc.add(new SortedNumericDocValuesField("foo", -1));
+ doc.add(new SortedNumericDocValuesField("foo", 34));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedNumericDocValuesField("foo", 7));
+ doc.add(new SortedNumericDocValuesField("foo", 22));
+ doc.add(new SortedNumericDocValuesField("foo", 27));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1, values.get(0));
+ assertEquals(2, values.get(1));
+ assertEquals(3, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingIntFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.INT);
+ sortField.setMissingValue(Integer.valueOf(Integer.MIN_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(0, values.get(0));
+ assertFalse(docsWithField.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingMultiValuedIntFirst() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortedNumericSortField("foo", SortField.Type.INT);
+ sortField.setMissingValue(Integer.valueOf(Integer.MIN_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("id", 3));
+ doc.add(new SortedNumericDocValuesField("foo", 18));
+ doc.add(new SortedNumericDocValuesField("foo", 187667));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 1));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedNumericDocValuesField("foo", 7));
+ doc.add(new SortedNumericDocValuesField("foo", 34));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1, values.get(0));
+ assertEquals(2, values.get(1));
+ assertEquals(3, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingIntLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortField("foo", SortField.Type.INT);
+ sortField.setMissingValue(Integer.valueOf(Integer.MAX_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ w.addDocument(new Document());
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ Bits docsWithField = leaf.getDocsWithField("foo");
+ assertEquals(7, values.get(0));
+ assertEquals(18, values.get(1));
+ assertFalse(docsWithField.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMissingMultiValuedIntLast() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ SortField sortField = new SortedNumericSortField("foo", SortField.Type.INT);
+ sortField.setMissingValue(Integer.valueOf(Integer.MAX_VALUE));
+ Sort indexSort = new Sort(sortField);
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedNumericDocValuesField("foo", 18));
+ doc.add(new SortedNumericDocValuesField("foo", 6372));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ // missing
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 3));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 1));
+ doc.add(new SortedNumericDocValuesField("foo", 7));
+ doc.add(new SortedNumericDocValuesField("foo", 8));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1, values.get(0));
+ assertEquals(2, values.get(1));
+ assertEquals(3, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicDouble() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.DOUBLE));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 18.0));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", -1.0));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 7.0));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1.0, Double.longBitsToDouble(values.get(0)), 0.0);
+ assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
+ assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicMultiValuedDouble() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.DOUBLE));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("id", 3));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(7.54)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(27.0)));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 1));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(-1.0)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(0.0)));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(7.0)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(7.67)));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1, values.get(0));
+ assertEquals(2, values.get(1));
+ assertEquals(3, values.get(2));
r.close();
w.close();
dir.close();
}
- public void testMissingLongLast() throws Exception {
+ public void testMissingDoubleFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- SortField sortField = new SortField("foo", SortField.Type.LONG);
- sortField.setMissingValue(Long.valueOf(Long.MAX_VALUE));
+ SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
+ sortField.setMissingValue(Double.NEGATIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
- doc.add(new NumericDocValuesField("foo", 18));
+ doc.add(new DoubleDocValuesField("foo", 18.0));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
@@ -271,7 +853,7 @@ public class TestIndexSorting extends LuceneTestCase {
w.commit();
doc = new Document();
- doc.add(new NumericDocValuesField("foo", 7));
+ doc.add(new DoubleDocValuesField("foo", 7.0));
w.addDocument(doc);
w.forceMerge(1);
@@ -280,59 +862,66 @@ public class TestIndexSorting extends LuceneTestCase {
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
Bits docsWithField = leaf.getDocsWithField("foo");
- assertEquals(7, values.get(0));
- assertEquals(18, values.get(1));
- assertEquals(0, values.get(2));
- assertFalse(docsWithField.get(2));
+ assertEquals(0.0, Double.longBitsToDouble(values.get(0)), 0.0);
+ assertFalse(docsWithField.get(0));
+ assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
+ assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
r.close();
w.close();
dir.close();
}
- public void testBasicInt() throws Exception {
+ public void testMissingMultiValuedDoubleFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
+ SortField sortField = new SortedNumericSortField("foo", SortField.Type.DOUBLE);
+ sortField.setMissingValue(Double.NEGATIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
- doc.add(new NumericDocValuesField("foo", 18));
+ doc.add(new NumericDocValuesField("id", 3));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(18.0)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(18.76)));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
+ // missing
doc = new Document();
- doc.add(new NumericDocValuesField("foo", -1));
+ doc.add(new NumericDocValuesField("id", 1));
w.addDocument(doc);
w.commit();
doc = new Document();
- doc.add(new NumericDocValuesField("foo", 7));
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(7.0)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(70.0)));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
- NumericDocValues values = leaf.getNumericDocValues("foo");
- assertEquals(-1, values.get(0));
- assertEquals(7, values.get(1));
- assertEquals(18, values.get(2));
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1, values.get(0));
+ assertEquals(2, values.get(1));
+ assertEquals(3, values.get(2));
r.close();
w.close();
dir.close();
}
- public void testMissingIntFirst() throws Exception {
+ public void testMissingDoubleLast() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- SortField sortField = new SortField("foo", SortField.Type.INT);
- sortField.setMissingValue(Integer.valueOf(Integer.MIN_VALUE));
+ SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
+ sortField.setMissingValue(Double.POSITIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
- doc.add(new NumericDocValuesField("foo", 18));
+ doc.add(new DoubleDocValuesField("foo", 18.0));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
@@ -342,7 +931,7 @@ public class TestIndexSorting extends LuceneTestCase {
w.commit();
doc = new Document();
- doc.add(new NumericDocValuesField("foo", 7));
+ doc.add(new DoubleDocValuesField("foo", 7.0));
w.addDocument(doc);
w.forceMerge(1);
@@ -351,71 +940,75 @@ public class TestIndexSorting extends LuceneTestCase {
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
Bits docsWithField = leaf.getDocsWithField("foo");
- assertEquals(0, values.get(0));
- assertFalse(docsWithField.get(0));
- assertEquals(7, values.get(1));
- assertEquals(18, values.get(2));
+ assertEquals(7.0, Double.longBitsToDouble(values.get(0)), 0.0);
+ assertEquals(18.0, Double.longBitsToDouble(values.get(1)), 0.0);
+ assertEquals(0.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ assertFalse(docsWithField.get(2));
r.close();
w.close();
dir.close();
}
- public void testMissingIntLast() throws Exception {
+ public void testMissingMultiValuedDoubleLast() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- SortField sortField = new SortField("foo", SortField.Type.INT);
- sortField.setMissingValue(Integer.valueOf(Integer.MAX_VALUE));
+ SortField sortField = new SortedNumericSortField("foo", SortField.Type.DOUBLE);
+ sortField.setMissingValue(Double.POSITIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
- doc.add(new NumericDocValuesField("foo", 18));
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(18.0)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(8262.0)));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
- w.addDocument(new Document());
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 3));
+ w.addDocument(doc);
w.commit();
doc = new Document();
- doc.add(new NumericDocValuesField("foo", 7));
+ doc.add(new NumericDocValuesField("id", 1));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(7.0)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.doubleToSortableLong(7.87)));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
- NumericDocValues values = leaf.getNumericDocValues("foo");
- Bits docsWithField = leaf.getDocsWithField("foo");
- assertEquals(7, values.get(0));
- assertEquals(18, values.get(1));
- assertEquals(0, values.get(2));
- assertFalse(docsWithField.get(2));
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1, values.get(0));
+ assertEquals(2, values.get(1));
+ assertEquals(3, values.get(2));
r.close();
w.close();
dir.close();
}
- public void testBasicDouble() throws Exception {
+ public void testBasicFloat() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- Sort indexSort = new Sort(new SortField("foo", SortField.Type.DOUBLE));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.FLOAT));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
- doc.add(new DoubleDocValuesField("foo", 18.0));
+ doc.add(new FloatDocValuesField("foo", 18.0f));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
doc = new Document();
- doc.add(new DoubleDocValuesField("foo", -1.0));
+ doc.add(new FloatDocValuesField("foo", -1.0f));
w.addDocument(doc);
w.commit();
doc = new Document();
- doc.add(new DoubleDocValuesField("foo", 7.0));
+ doc.add(new FloatDocValuesField("foo", 7.0f));
w.addDocument(doc);
w.forceMerge(1);
@@ -431,53 +1024,55 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
- public void testMissingDoubleFirst() throws Exception {
+ public void testBasicMultiValuedFloat() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
- sortField.setMissingValue(Double.NEGATIVE_INFINITY);
- Sort indexSort = new Sort(sortField);
+ Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.FLOAT));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
- doc.add(new DoubleDocValuesField("foo", 18.0));
+ doc.add(new NumericDocValuesField("id", 3));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(18.0f)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(29.0f)));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
- // missing
- w.addDocument(new Document());
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 1));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(-1.0f)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(34.0f)));
+ w.addDocument(doc);
w.commit();
doc = new Document();
- doc.add(new DoubleDocValuesField("foo", 7.0));
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(7.0f)));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
- NumericDocValues values = leaf.getNumericDocValues("foo");
- Bits docsWithField = leaf.getDocsWithField("foo");
- assertEquals(0.0, Double.longBitsToDouble(values.get(0)), 0.0);
- assertFalse(docsWithField.get(0));
- assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
- assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1, values.get(0));
+ assertEquals(2, values.get(1));
+ assertEquals(3, values.get(2));
r.close();
w.close();
dir.close();
}
- public void testMissingDoubleLast() throws Exception {
+ public void testMissingFloatFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- SortField sortField = new SortField("foo", SortField.Type.DOUBLE);
- sortField.setMissingValue(Double.POSITIVE_INFINITY);
+ SortField sortField = new SortField("foo", SortField.Type.FLOAT);
+ sortField.setMissingValue(Float.NEGATIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
- doc.add(new DoubleDocValuesField("foo", 18.0));
+ doc.add(new FloatDocValuesField("foo", 18.0f));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
@@ -487,7 +1082,7 @@ public class TestIndexSorting extends LuceneTestCase {
w.commit();
doc = new Document();
- doc.add(new DoubleDocValuesField("foo", 7.0));
+ doc.add(new FloatDocValuesField("foo", 7.0f));
w.addDocument(doc);
w.forceMerge(1);
@@ -496,54 +1091,61 @@ public class TestIndexSorting extends LuceneTestCase {
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
Bits docsWithField = leaf.getDocsWithField("foo");
- assertEquals(7.0, Double.longBitsToDouble(values.get(0)), 0.0);
- assertEquals(18.0, Double.longBitsToDouble(values.get(1)), 0.0);
- assertEquals(0.0, Double.longBitsToDouble(values.get(2)), 0.0);
- assertFalse(docsWithField.get(2));
+ assertEquals(0.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
+ assertFalse(docsWithField.get(0));
+ assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
+ assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
r.close();
w.close();
dir.close();
}
- public void testBasicFloat() throws Exception {
+ public void testMissingMultiValuedFloatFirst() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- Sort indexSort = new Sort(new SortField("foo", SortField.Type.FLOAT));
+ SortField sortField = new SortedNumericSortField("foo", SortField.Type.FLOAT);
+ sortField.setMissingValue(Float.NEGATIVE_INFINITY);
+ Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
- doc.add(new FloatDocValuesField("foo", 18.0f));
+ doc.add(new NumericDocValuesField("id", 3));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(18.0f)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(726.0f)));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
+ // missing
doc = new Document();
- doc.add(new FloatDocValuesField("foo", -1.0f));
+ doc.add(new NumericDocValuesField("id", 1));
w.addDocument(doc);
w.commit();
doc = new Document();
- doc.add(new FloatDocValuesField("foo", 7.0f));
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(7.0f)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(18.0f)));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
- NumericDocValues values = leaf.getNumericDocValues("foo");
- assertEquals(-1.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
- assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
- assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1, values.get(0));
+ assertEquals(2, values.get(1));
+ assertEquals(3, values.get(2));
r.close();
w.close();
dir.close();
}
- public void testMissingFloatFirst() throws Exception {
+ public void testMissingFloatLast() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
SortField sortField = new SortField("foo", SortField.Type.FLOAT);
- sortField.setMissingValue(Float.NEGATIVE_INFINITY);
+ sortField.setMissingValue(Float.POSITIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
@@ -567,47 +1169,51 @@ public class TestIndexSorting extends LuceneTestCase {
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
Bits docsWithField = leaf.getDocsWithField("foo");
- assertEquals(0.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
- assertFalse(docsWithField.get(0));
- assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
- assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+ assertEquals(7.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
+ assertEquals(18.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
+ assertEquals(0.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+ assertFalse(docsWithField.get(2));
r.close();
w.close();
dir.close();
}
- public void testMissingFloatLast() throws Exception {
+ public void testMissingMultiValuedFloatLast() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- SortField sortField = new SortField("foo", SortField.Type.FLOAT);
+ SortField sortField = new SortedNumericSortField("foo", SortField.Type.FLOAT);
sortField.setMissingValue(Float.POSITIVE_INFINITY);
Sort indexSort = new Sort(sortField);
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
- doc.add(new FloatDocValuesField("foo", 18.0f));
+ doc.add(new NumericDocValuesField("id", 2));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(726.0f)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(18.0f)));
w.addDocument(doc);
// so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
w.commit();
// missing
- w.addDocument(new Document());
+ doc = new Document();
+ doc.add(new NumericDocValuesField("id", 3));
+ w.addDocument(doc);
w.commit();
doc = new Document();
- doc.add(new FloatDocValuesField("foo", 7.0f));
+ doc.add(new NumericDocValuesField("id", 1));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(12.67f)));
+ doc.add(new SortedNumericDocValuesField("foo", NumericUtils.floatToSortableInt(7.0f)));
w.addDocument(doc);
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
- NumericDocValues values = leaf.getNumericDocValues("foo");
- Bits docsWithField = leaf.getDocsWithField("foo");
- assertEquals(7.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
- assertEquals(18.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
- assertEquals(0.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
- assertFalse(docsWithField.get(2));
+ NumericDocValues values = leaf.getNumericDocValues("id");
+ assertEquals(1, values.get(0));
+ assertEquals(2, values.get(1));
+ assertEquals(3, values.get(2));
r.close();
w.close();
dir.close();
@@ -683,6 +1289,58 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
+ public void testMultiValuedRandom1() throws IOException {
+ boolean withDeletes = random().nextBoolean();
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortedNumericSortField("foo", SortField.Type.LONG));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ final int numDocs = atLeast(1000);
+ final FixedBitSet deleted = new FixedBitSet(numDocs);
+ for (int i = 0; i < numDocs; ++i) {
+ Document doc = new Document();
+ int num = random().nextInt(10);
+ for (int j = 0; j < num; j++) {
+ doc.add(new SortedNumericDocValuesField("foo", random().nextInt(2000)));
+ }
+ doc.add(new StringField("id", Integer.toString(i), Store.YES));
+ doc.add(new NumericDocValuesField("id", i));
+ w.addDocument(doc);
+ if (random().nextInt(5) == 0) {
+ w.getReader().close();
+ } else if (random().nextInt(30) == 0) {
+ w.forceMerge(2);
+ } else if (random().nextInt(4) == 0) {
+ final int id = TestUtil.nextInt(random(), 0, i);
+ deleted.set(id);
+ w.deleteDocuments(new Term("id", Integer.toString(id)));
+ }
+ }
+
+ DirectoryReader reader = w.getReader();
+ // Now check that the index is consistent
+ IndexSearcher searcher = newSearcher(reader);
+ for (int i = 0; i < numDocs; ++i) {
+ TermQuery termQuery = new TermQuery(new Term("id", Integer.toString(i)));
+ final TopDocs topDocs = searcher.search(termQuery, 1);
+ if (deleted.get(i)) {
+ assertEquals(0, topDocs.totalHits);
+ } else {
+ assertEquals(1, topDocs.totalHits);
+ NumericDocValues values = MultiDocValues.getNumericValues(reader, "id");
+ assertEquals(topDocs.scoreDocs[0].doc, values.advance(topDocs.scoreDocs[0].doc));
+ assertEquals(i, values.longValue());
+ Document document = reader.document(topDocs.scoreDocs[0].doc);
+ assertEquals(Integer.toString(i), document.get("id"));
+ }
+ }
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
static class UpdateRunnable implements Runnable {
private final int numDocs;
@@ -746,6 +1404,7 @@ public class TestIndexSorting extends LuceneTestCase {
final int numDocs = atLeast(100);
Thread[] threads = new Thread[2];
+
final AtomicInteger updateCount = new AtomicInteger(atLeast(1000));
final CountDownLatch latch = new CountDownLatch(1);
for (int i = 0; i < threads.length; ++i) {
@@ -975,13 +1634,13 @@ public class TestIndexSorting extends LuceneTestCase {
}
static final class NormsSimilarity extends Similarity {
-
+
private final Similarity in;
-
+
public NormsSimilarity(Similarity in) {
this.in = in;
}
-
+
@Override
public long computeNorm(FieldInvertState state) {
if (state.getName().equals("norms")) {
@@ -990,39 +1649,39 @@ public class TestIndexSorting extends LuceneTestCase {
return in.computeNorm(state);
}
}
-
+
@Override
public SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
return in.computeWeight(collectionStats, termStats);
}
-
+
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
return in.simScorer(weight, context);
}
-
+
}
-
+
static final class PositionsTokenStream extends TokenStream {
-
+
private final CharTermAttribute term;
private final PayloadAttribute payload;
private final OffsetAttribute offset;
-
+
private int pos, off;
-
+
public PositionsTokenStream() {
term = addAttribute(CharTermAttribute.class);
payload = addAttribute(PayloadAttribute.class);
offset = addAttribute(OffsetAttribute.class);
}
-
+
@Override
public boolean incrementToken() throws IOException {
if (pos == 0) {
return false;
}
-
+
clearAttributes();
term.append("#all#");
payload.setPayload(new BytesRef(Integer.toString(pos)));
@@ -1031,7 +1690,7 @@ public class TestIndexSorting extends LuceneTestCase {
++off;
return true;
}
-
+
void setId(int id) {
pos = id / 10 + 1;
off = 0;
@@ -1072,10 +1731,10 @@ public class TestIndexSorting extends LuceneTestCase {
doc.add(norms);
doc.add(new BinaryDocValuesField("binary", new BytesRef(Integer.toString(id))));
doc.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(id))));
- doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Integer.toString(id))));
- doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Integer.toString(id + 1))));
- doc.add(new SortedNumericDocValuesField("sorted_numeric", id));
- doc.add(new SortedNumericDocValuesField("sorted_numeric", id + 1));
+ doc.add(new SortedSetDocValuesField("multi_valued_string", new BytesRef(Integer.toString(id))));
+ doc.add(new SortedSetDocValuesField("multi_valued_string", new BytesRef(Integer.toString(id + 1))));
+ doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id));
+ doc.add(new SortedNumericDocValuesField("multi_valued_numeric", id + 1));
doc.add(new Field("term_vectors", Integer.toString(id), TERM_VECTORS_TYPE));
byte[] bytes = new byte[4];
NumericUtils.intToSortableBytes(id, bytes, 0);
@@ -1088,7 +1747,7 @@ public class TestIndexSorting extends LuceneTestCase {
// We add document alread in ID order for the first writer:
Directory dir1 = newFSDirectory(createTempDir());
-
+
Random random1 = new Random(seed);
IndexWriterConfig iwc1 = newIndexWriterConfig(random1, a);
iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity())); // for testing norms field
@@ -1105,7 +1764,7 @@ public class TestIndexSorting extends LuceneTestCase {
// We shuffle documents, but set index sort, for the second writer:
Directory dir2 = newFSDirectory(createTempDir());
-
+
Random random2 = new Random(seed);
IndexWriterConfig iwc2 = newIndexWriterConfig(random2, a);
iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity())); // for testing norms field
@@ -1146,10 +1805,16 @@ public class TestIndexSorting extends LuceneTestCase {
private static final class RandomDoc {
public final int id;
public final int intValue;
+ public final int[] intValues;
public final long longValue;
+ public final long[] longValues;
public final float floatValue;
+ public final float[] floatValues;
public final double doubleValue;
+ public final double[] doubleValues;
public final byte[] bytesValue;
+ public final byte[][] bytesValues;
+
public RandomDoc(int id) {
this.id = id;
@@ -1159,16 +1824,28 @@ public class TestIndexSorting extends LuceneTestCase {
doubleValue = random().nextDouble();
bytesValue = new byte[TestUtil.nextInt(random(), 1, 50)];
random().nextBytes(bytesValue);
+
+ int numValues = random().nextInt(10);
+ intValues = new int[numValues];
+ longValues = new long[numValues];
+ floatValues = new float[numValues];
+ doubleValues = new double[numValues];
+ bytesValues = new byte[numValues][];
+ for (int i = 0; i < numValues; i++) {
+ intValues[i] = random().nextInt();
+ longValues[i] = random().nextLong();
+ floatValues[i] = random().nextFloat();
+ doubleValues[i] = random().nextDouble();
+ bytesValues[i] = new byte[TestUtil.nextInt(random(), 1, 50)];
+ random().nextBytes(bytesValue);
+ }
}
}
- private static Sort randomSort() {
- int numFields = TestUtil.nextInt(random(), 1, 3);
- SortField[] sortFields = new SortField[numFields];
- for(int i=0;i<numFields-1;i++) {
- boolean reversed = random().nextBoolean();
- SortField sortField;
- switch(random().nextInt(5)) {
+ private static SortField randomIndexSortField() {
+ boolean reversed = random().nextBoolean();
+ SortField sortField;
+ switch(random().nextInt(10)) {
case 0:
sortField = new SortField("int", SortField.Type.INT, reversed);
if (random().nextBoolean()) {
@@ -1176,32 +1853,73 @@ public class TestIndexSorting extends LuceneTestCase {
}
break;
case 1:
+ sortField = new SortedNumericSortField("multi_valued_int", SortField.Type.INT, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextInt());
+ }
+ break;
+ case 2:
sortField = new SortField("long", SortField.Type.LONG, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextLong());
}
break;
- case 2:
+ case 3:
+ sortField = new SortedNumericSortField("multi_valued_long", SortField.Type.LONG, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextLong());
+ }
+ break;
+ case 4:
sortField = new SortField("float", SortField.Type.FLOAT, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextFloat());
}
break;
- case 3:
+ case 5:
+ sortField = new SortedNumericSortField("multi_valued_float", SortField.Type.FLOAT, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextFloat());
+ }
+ break;
+ case 6:
sortField = new SortField("double", SortField.Type.DOUBLE, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextDouble());
}
break;
- case 4:
+ case 7:
+ sortField = new SortedNumericSortField("multi_valued_double", SortField.Type.DOUBLE, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextDouble());
+ }
+ break;
+ case 8:
sortField = new SortField("bytes", SortField.Type.STRING, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(SortField.STRING_LAST);
}
break;
+ case 9:
+ sortField = new SortedSetSortField("multi_valued_bytes", reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(SortField.STRING_LAST);
+ }
+ break;
default:
- throw new AssertionError();
- }
+ sortField = null;
+ fail();
+ }
+ return sortField;
+ }
+
+
+ private static Sort randomSort() {
+ // at least 2
+ int numFields = TestUtil.nextInt(random(), 2, 4);
+ SortField[] sortFields = new SortField[numFields];
+ for(int i=0;i<numFields-1;i++) {
+ SortField sortField = randomIndexSortField();
sortFields[i] = sortField;
}
@@ -1261,6 +1979,27 @@ public class TestIndexSorting extends LuceneTestCase {
doc.add(new DoubleDocValuesField("double", docValues.doubleValue));
doc.add(new FloatDocValuesField("float", docValues.floatValue));
doc.add(new SortedDocValuesField("bytes", new BytesRef(docValues.bytesValue)));
+
+ for (int value : docValues.intValues) {
+ doc.add(new SortedNumericDocValuesField("multi_valued_int", value));
+ }
+
+ for (long value : docValues.longValues) {
+ doc.add(new SortedNumericDocValuesField("multi_valued_long", value));
+ }
+
+ for (float value : docValues.floatValues) {
+ doc.add(new SortedNumericDocValuesField("multi_valued_float", NumericUtils.floatToSortableInt(value)));
+ }
+
+ for (double value : docValues.doubleValues) {
+ doc.add(new SortedNumericDocValuesField("multi_valued_double", NumericUtils.doubleToSortableLong(value)));
+ }
+
+ for (byte[] value : docValues.bytesValues) {
+ doc.add(new SortedSetDocValuesField("multi_valued_bytes", new BytesRef(value)));
+ }
+
w1.addDocument(doc);
w2.addDocument(doc);
if (random().nextDouble() < deleteChance) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java
index 49d19ae..ae5416f 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java
@@ -28,6 +28,8 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.MockDirectoryWrapper;
@@ -167,6 +169,78 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
return true;
}
+ private SortField randomIndexSortField() {
+ boolean reversed = random().nextBoolean();
+ SortField sortField;
+ switch(random().nextInt(10)) {
+ case 0:
+ sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.INT, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextInt());
+ }
+ break;
+ case 1:
+ sortField = new SortedNumericSortField(TestUtil.randomSimpleString(random()), SortField.Type.INT, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextInt());
+ }
+ break;
+
+ case 2:
+ sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.LONG, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextLong());
+ }
+ break;
+ case 3:
+ sortField = new SortedNumericSortField(TestUtil.randomSimpleString(random()), SortField.Type.LONG, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextLong());
+ }
+ break;
+ case 4:
+ sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.FLOAT, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextFloat());
+ }
+ break;
+ case 5:
+ sortField = new SortedNumericSortField(TestUtil.randomSimpleString(random()), SortField.Type.FLOAT, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextFloat());
+ }
+ break;
+ case 6:
+ sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.DOUBLE, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextDouble());
+ }
+ break;
+ case 7:
+ sortField = new SortedNumericSortField(TestUtil.randomSimpleString(random()), SortField.Type.DOUBLE, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(random().nextDouble());
+ }
+ break;
+ case 8:
+ sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.STRING, reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(SortField.STRING_LAST);
+ }
+ break;
+ case 9:
+ sortField = new SortedSetSortField(TestUtil.randomSimpleString(random()), reversed);
+ if (random().nextBoolean()) {
+ sortField.setMissingValue(SortField.STRING_LAST);
+ }
+ break;
+ default:
+ sortField = null;
+ fail();
+ }
+ return sortField;
+ }
+
/** Test sort */
public void testSort() throws IOException {
assumeTrue("test requires a codec that can read/write index sort", supportsIndexSort());
@@ -180,22 +254,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
final int numSortFields = TestUtil.nextInt(random(), 1, 3);
SortField[] sortFields = new SortField[numSortFields];
for (int j = 0; j < numSortFields; ++j) {
- sortFields[j] = new SortField(
- TestUtil.randomSimpleString(random()),
- random().nextBoolean() ? SortField.Type.LONG : SortField.Type.STRING,
- random().nextBoolean());
- if (random().nextBoolean()) {
- switch (sortFields[j].getType()) {
- case LONG:
- sortFields[j].setMissingValue(random().nextLong());
- break;
- case STRING:
- sortFields[j].setMissingValue(random().nextBoolean() ? SortField.STRING_FIRST : SortField.STRING_LAST);
- break;
- default:
- fail();
- }
- }
+ sortFields[j] = randomIndexSortField();
}
sort = new Sort(sortFields);
}
[3/3] lucene-solr:branch_6x: LUCENE-7537: Index time sorting now
supports multi-valued sorts using selectors (MIN, MAX, etc.)
Posted by mi...@apache.org.
LUCENE-7537: Index time sorting now supports multi-valued sorts using selectors (MIN, MAX, etc.)
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e357f957
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e357f957
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e357f957
Branch: refs/heads/branch_6x
Commit: e357f957f3059add5582b9695f838794c386dcad
Parents: 6962381
Author: Mike McCandless <mi...@apache.org>
Authored: Tue Nov 15 16:22:51 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Wed Nov 16 10:37:02 2016 -0500
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../simpletext/SimpleTextSegmentInfoFormat.java | 148 ++-
.../lucene62/Lucene62SegmentInfoFormat.java | 118 ++-
.../apache/lucene/index/IndexWriterConfig.java | 3 +-
.../org/apache/lucene/index/MultiSorter.java | 63 +-
.../java/org/apache/lucene/index/Sorter.java | 40 +-
.../lucene/search/SortedNumericSortField.java | 5 +
.../apache/lucene/index/TestIndexSorting.java | 987 ++++++++++++++++---
.../index/BaseSegmentInfoFormatTestCase.java | 91 +-
9 files changed, 1247 insertions(+), 211 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 90dea97..3aa6042 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -50,6 +50,9 @@ Improvements
control how text is analyzed and converted into a query (Matt Weber
via Mike McCandless)
+* LUCENE-7537: Index time sorting now supports multi-valued sorts
+ using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless)
+
Other
* LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
index 146e92a..3d38d72 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
@@ -33,9 +33,14 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSelector;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.SortedSetSelector;
+import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -64,6 +69,7 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
final static BytesRef SI_SORT = new BytesRef(" sort ");
final static BytesRef SI_SORT_FIELD = new BytesRef(" field ");
final static BytesRef SI_SORT_TYPE = new BytesRef(" type ");
+ final static BytesRef SI_SELECTOR_TYPE = new BytesRef(" selector ");
final static BytesRef SI_SORT_REVERSE = new BytesRef(" reverse ");
final static BytesRef SI_SORT_MISSING = new BytesRef(" missing ");
@@ -158,6 +164,8 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
final String typeAsString = readString(SI_SORT_TYPE.length, scratch);
final SortField.Type type;
+ SortedSetSelector.Type selectorSet = null;
+ SortedNumericSelector.Type selectorNumeric = null;
switch (typeAsString) {
case "string":
type = SortField.Type.STRING;
@@ -174,6 +182,26 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
case "float":
type = SortField.Type.FLOAT;
break;
+ case "multi_valued_string":
+ type = SortField.Type.STRING;
+ selectorSet = readSetSelector(input, scratch);
+ break;
+ case "multi_valued_long":
+ type = SortField.Type.LONG;
+ selectorNumeric = readNumericSelector(input, scratch);
+ break;
+ case "multi_valued_int":
+ type = SortField.Type.INT;
+ selectorNumeric = readNumericSelector(input, scratch);
+ break;
+ case "multi_valued_double":
+ type = SortField.Type.DOUBLE;
+ selectorNumeric = readNumericSelector(input, scratch);
+ break;
+ case "multi_valued_float":
+ type = SortField.Type.FLOAT;
+ selectorNumeric = readNumericSelector(input, scratch);
+ break;
default:
throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
}
@@ -245,7 +273,13 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
default:
throw new AssertionError();
}
- sortField[i] = new SortField(field, type, reverse);
+ if (selectorSet != null) {
+ sortField[i] = new SortedSetSortField(field, reverse);
+ } else if (selectorNumeric != null) {
+ sortField[i] = new SortedNumericSortField(field, type, reverse);
+ } else {
+ sortField[i] = new SortField(field, type, reverse);
+ }
if (missingValue != null) {
sortField[i].setMissingValue(missingValue);
}
@@ -265,6 +299,38 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
private String readString(int offset, BytesRefBuilder scratch) {
return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8);
}
+
+ private SortedSetSelector.Type readSetSelector(IndexInput input, BytesRefBuilder scratch) throws IOException {
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SELECTOR_TYPE);
+ final String selectorAsString = readString(SI_SELECTOR_TYPE.length, scratch);
+ switch (selectorAsString) {
+ case "min":
+ return SortedSetSelector.Type.MIN;
+ case "middle_min":
+ return SortedSetSelector.Type.MIDDLE_MIN;
+ case "middle_max":
+ return SortedSetSelector.Type.MIDDLE_MAX;
+ case "max":
+ return SortedSetSelector.Type.MAX;
+ default:
+ throw new CorruptIndexException("unable to parse SortedSetSelector type: " + selectorAsString, input);
+ }
+ }
+
+ private SortedNumericSelector.Type readNumericSelector(IndexInput input, BytesRefBuilder scratch) throws IOException {
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SELECTOR_TYPE);
+ final String selectorAsString = readString(SI_SELECTOR_TYPE.length, scratch);
+ switch (selectorAsString) {
+ case "min":
+ return SortedNumericSelector.Type.MIN;
+ case "max":
+ return SortedNumericSelector.Type.MAX;
+ default:
+ throw new CorruptIndexException("unable to parse SortedNumericSelector type: " + selectorAsString, input);
+ }
+ }
@Override
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
@@ -352,29 +418,93 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_SORT_TYPE);
- final String sortType;
- switch (sortField.getType()) {
+ final String sortTypeString;
+ final SortField.Type sortType;
+ final boolean multiValued;
+ if (sortField instanceof SortedSetSortField) {
+ sortType = SortField.Type.STRING;
+ multiValued = true;
+ } else if (sortField instanceof SortedNumericSortField) {
+ sortType = ((SortedNumericSortField) sortField).getNumericType();
+ multiValued = true;
+ } else {
+ sortType = sortField.getType();
+ multiValued = false;
+ }
+ switch (sortType) {
case STRING:
- sortType = "string";
+ if (multiValued) {
+ sortTypeString = "multi_valued_string";
+ } else {
+ sortTypeString = "string";
+ }
break;
case LONG:
- sortType = "long";
+ if (multiValued) {
+ sortTypeString = "multi_valued_long";
+ } else {
+ sortTypeString = "long";
+ }
break;
case INT:
- sortType = "int";
+ if (multiValued) {
+ sortTypeString = "multi_valued_int";
+ } else {
+ sortTypeString = "int";
+ }
break;
case DOUBLE:
- sortType = "double";
+ if (multiValued) {
+ sortTypeString = "multi_valued_double";
+ } else {
+ sortTypeString = "double";
+ }
break;
case FLOAT:
- sortType = "float";
+ if (multiValued) {
+ sortTypeString = "multi_valued_float";
+ } else {
+ sortTypeString = "float";
+ }
break;
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
}
- SimpleTextUtil.write(output, sortType, scratch);
+ SimpleTextUtil.write(output, sortTypeString, scratch);
SimpleTextUtil.writeNewline(output);
+ if (sortField instanceof SortedSetSortField) {
+ SortedSetSelector.Type selector = ((SortedSetSortField) sortField).getSelector();
+ final String selectorString;
+ if (selector == SortedSetSelector.Type.MIN) {
+ selectorString = "min";
+ } else if (selector == SortedSetSelector.Type.MIDDLE_MIN) {
+ selectorString = "middle_min";
+ } else if (selector == SortedSetSelector.Type.MIDDLE_MAX) {
+ selectorString = "middle_max";
+ } else if (selector == SortedSetSelector.Type.MAX) {
+ selectorString = "max";
+ } else {
+ throw new IllegalStateException("Unexpected SortedSetSelector type selector: " + selector);
+ }
+ SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
+ SimpleTextUtil.write(output, selectorString, scratch);
+ SimpleTextUtil.writeNewline(output);
+ } else if (sortField instanceof SortedNumericSortField) {
+ SortedNumericSelector.Type selector = ((SortedNumericSortField) sortField).getSelector();
+ final String selectorString;
+ if (selector == SortedNumericSelector.Type.MIN) {
+ selectorString = "min";
+ } else if (selector == SortedNumericSelector.Type.MAX) {
+ selectorString = "max";
+ } else {
+ throw new IllegalStateException("Unexpected SortedNumericSelector type selector: " + selector);
+ }
+ SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
+ SimpleTextUtil.write(output, selectorString, scratch);
+ SimpleTextUtil.writeNewline(output);
+ }
+
SimpleTextUtil.write(output, SI_SORT_REVERSE);
SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch);
SimpleTextUtil.writeNewline(output);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
index 1ee5258..da6e395 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
@@ -29,6 +29,10 @@ import org.apache.lucene.index.SegmentInfo; // javadocs
import org.apache.lucene.index.SegmentInfos; // javadocs
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSelector;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.SortedSetSelector;
+import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
@@ -69,7 +73,7 @@ import org.apache.lucene.util.Version;
* addIndexes), etc.</li>
* <li>Files is a list of files referred to by this segment.</li>
* </ul>
- *
+ *
* @see SegmentInfos
* @lucene.experimental
*/
@@ -78,7 +82,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
/** Sole constructor. */
public Lucene62SegmentInfoFormat() {
}
-
+
@Override
public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene62SegmentInfoFormat.SI_EXTENSION);
@@ -91,13 +95,13 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
Lucene62SegmentInfoFormat.VERSION_CURRENT,
segmentID, "");
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
-
+
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
-
+
final Map<String,String> diagnostics = input.readMapOfStrings();
final Set<String> files = input.readSetOfStrings();
final Map<String,String> attributes = input.readMapOfStrings();
@@ -110,6 +114,8 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
String fieldName = input.readString();
int sortTypeID = input.readVInt();
SortField.Type sortType;
+ SortedSetSelector.Type sortedSetSelector = null;
+ SortedNumericSelector.Type sortedNumericSelector = null;
switch(sortTypeID) {
case 0:
sortType = SortField.Type.STRING;
@@ -126,6 +132,43 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case 4:
sortType = SortField.Type.FLOAT;
break;
+ case 5:
+ sortType = SortField.Type.STRING;
+ byte selector = input.readByte();
+ if (selector == 0) {
+ sortedSetSelector = SortedSetSelector.Type.MIN;
+ } else if (selector == 1) {
+ sortedSetSelector = SortedSetSelector.Type.MAX;
+ } else if (selector == 2) {
+ sortedSetSelector = SortedSetSelector.Type.MIDDLE_MIN;
+ } else if (selector == 3) {
+ sortedSetSelector = SortedSetSelector.Type.MIDDLE_MAX;
+ } else {
+ throw new CorruptIndexException("invalid index SortedSetSelector ID: " + selector, input);
+ }
+ break;
+ case 6:
+ byte type = input.readByte();
+ if (type == 0) {
+ sortType = SortField.Type.LONG;
+ } else if (type == 1) {
+ sortType = SortField.Type.INT;
+ } else if (type == 2) {
+ sortType = SortField.Type.DOUBLE;
+ } else if (type == 3) {
+ sortType = SortField.Type.FLOAT;
+ } else {
+ throw new CorruptIndexException("invalid index SortedNumericSortField type ID: " + type, input);
+ }
+ byte numericSelector = input.readByte();
+ if (numericSelector == 0) {
+ sortedNumericSelector = SortedNumericSelector.Type.MIN;
+ } else if (numericSelector == 1) {
+ sortedNumericSelector = SortedNumericSelector.Type.MAX;
+ } else {
+ throw new CorruptIndexException("invalid index SortedNumericSelector ID: " + numericSelector, input);
+ }
+ break;
default:
throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input);
}
@@ -139,7 +182,13 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
throw new CorruptIndexException("invalid index sort reverse: " + b, input);
}
- sortFields[i] = new SortField(fieldName, sortType, reverse);
+ if (sortedSetSelector != null) {
+ sortFields[i] = new SortedSetSortField(fieldName, reverse, sortedSetSelector);
+ } else if (sortedNumericSelector != null) {
+ sortFields[i] = new SortedNumericSortField(fieldName, sortType, reverse, sortedNumericSelector);
+ } else {
+ sortFields[i] = new SortField(fieldName, sortType, reverse);
+ }
Object missingValue;
b = input.readByte();
@@ -194,7 +243,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
} else {
indexSort = null;
}
-
+
si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort);
si.setFiles(files);
} catch (Throwable exception) {
@@ -213,8 +262,8 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
// Only add the file once we've successfully created it, else IFD assert can trip:
si.addFile(fileName);
- CodecUtil.writeIndexHeader(output,
- Lucene62SegmentInfoFormat.CODEC_NAME,
+ CodecUtil.writeIndexHeader(output,
+ Lucene62SegmentInfoFormat.CODEC_NAME,
Lucene62SegmentInfoFormat.VERSION_CURRENT,
si.getId(),
"");
@@ -245,6 +294,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
output.writeVInt(numSortFields);
for (int i = 0; i < numSortFields; ++i) {
SortField sortField = indexSort.getSort()[i];
+ SortField.Type sortType = sortField.getType();
output.writeString(sortField.getField());
int sortTypeID;
switch (sortField.getType()) {
@@ -263,10 +313,55 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case FLOAT:
sortTypeID = 4;
break;
+ case CUSTOM:
+ if (sortField instanceof SortedSetSortField) {
+ sortTypeID = 5;
+ sortType = SortField.Type.STRING;
+ } else if (sortField instanceof SortedNumericSortField) {
+ sortTypeID = 6;
+ sortType = ((SortedNumericSortField) sortField).getNumericType();
+ } else {
+ throw new IllegalStateException("Unexpected SortedNumericSortField " + sortField);
+ }
+ break;
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
}
output.writeVInt(sortTypeID);
+ if (sortTypeID == 5) {
+ SortedSetSortField ssf = (SortedSetSortField) sortField;
+ if (ssf.getSelector() == SortedSetSelector.Type.MIN) {
+ output.writeByte((byte) 0);
+ } else if (ssf.getSelector() == SortedSetSelector.Type.MAX) {
+ output.writeByte((byte) 1);
+ } else if (ssf.getSelector() == SortedSetSelector.Type.MIDDLE_MIN) {
+ output.writeByte((byte) 2);
+ } else if (ssf.getSelector() == SortedSetSelector.Type.MIDDLE_MAX) {
+ output.writeByte((byte) 3);
+ } else {
+ throw new IllegalStateException("Unexpected SortedSetSelector type: " + ssf.getSelector());
+ }
+ } else if (sortTypeID == 6) {
+ SortedNumericSortField snsf = (SortedNumericSortField) sortField;
+ if (snsf.getNumericType() == SortField.Type.LONG) {
+ output.writeByte((byte) 0);
+ } else if (snsf.getNumericType() == SortField.Type.INT) {
+ output.writeByte((byte) 1);
+ } else if (snsf.getNumericType() == SortField.Type.DOUBLE) {
+ output.writeByte((byte) 2);
+ } else if (snsf.getNumericType() == SortField.Type.FLOAT) {
+ output.writeByte((byte) 3);
+ } else {
+ throw new IllegalStateException("Unexpected SortedNumericSelector type: " + snsf.getNumericType());
+ }
+ if (snsf.getSelector() == SortedNumericSelector.Type.MIN) {
+ output.writeByte((byte) 0);
+ } else if (snsf.getSelector() == SortedNumericSelector.Type.MAX) {
+ output.writeByte((byte) 1);
+ } else {
+ throw new IllegalStateException("Unexpected sorted numeric selector type: " + snsf.getSelector());
+ }
+ }
output.writeByte((byte) (sortField.getReverse() ? 0 : 1));
// write missing value
@@ -274,7 +369,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
if (missingValue == null) {
output.writeByte((byte) 0);
} else {
- switch(sortField.getType()) {
+ switch(sortType) {
case STRING:
if (missingValue == SortField.STRING_LAST) {
output.writeByte((byte) 1);
@@ -305,7 +400,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
}
}
}
-
+
CodecUtil.writeFooter(output);
}
}
@@ -314,5 +409,6 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
public final static String SI_EXTENSION = "si";
static final String CODEC_NAME = "Lucene62SegmentInfo";
static final int VERSION_START = 0;
- static final int VERSION_CURRENT = VERSION_START;
+ static final int VERSION_MULTI_VALUED_SORT = 1;
+ static final int VERSION_CURRENT = VERSION_MULTI_VALUED_SORT;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
index 368259a..4f642ee 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
@@ -468,7 +468,8 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
*/
public IndexWriterConfig setIndexSort(Sort sort) {
for(SortField sortField : sort.getSort()) {
- if (ALLOWED_INDEX_SORT_TYPES.contains(sortField.getType()) == false) {
+ final SortField.Type sortType = Sorter.getSortFieldType(sortField);
+ if (ALLOWED_INDEX_SORT_TYPES.contains(sortType) == false) {
throw new IllegalArgumentException("invalid SortField type: must be one of " + ALLOWED_INDEX_SORT_TYPES + " but got: " + sortField);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
index 51cabab..7c2c3be 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
@@ -123,7 +123,11 @@ final class MultiSorter {
public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB);
}
+ /** Returns {@code CrossReaderComparator} for the provided readers to represent the requested {@link SortField} sort order. */
private static CrossReaderComparator getComparator(List<CodecReader> readers, SortField sortField) throws IOException {
+ final int reverseMul = sortField.getReverse() ? -1 : 1;
+ final SortField.Type sortType = Sorter.getSortFieldType(sortField);
+
switch(sortField.getType()) {
case STRING:
@@ -138,16 +142,9 @@ final class MultiSorter {
}
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
- missingOrd = Integer.MAX_VALUE;
+ missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE;
} else {
- missingOrd = Integer.MIN_VALUE;
- }
-
- final int reverseMul;
- if (sortField.getReverse()) {
- reverseMul = -1;
- } else {
- reverseMul = 1;
+ missingOrd = sortField.getReverse() ? Integer.MAX_VALUE : Integer.MIN_VALUE;
}
return new CrossReaderComparator() {
@@ -171,19 +168,11 @@ final class MultiSorter {
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
for(CodecReader reader : readers) {
- values.add(DocValues.getNumeric(reader, sortField.getField()));
+ values.add(Sorter.getOrWrapNumeric(reader, sortField));
docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
}
- final int reverseMul;
- if (sortField.getReverse()) {
- reverseMul = -1;
- } else {
- reverseMul = 1;
- }
-
- final long missingValue;
-
+ final Long missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Long) sortField.getMissingValue();
} else {
@@ -216,19 +205,11 @@ final class MultiSorter {
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
for(CodecReader reader : readers) {
- values.add(DocValues.getNumeric(reader, sortField.getField()));
+ values.add(Sorter.getOrWrapNumeric(reader, sortField));
docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
}
- final int reverseMul;
- if (sortField.getReverse()) {
- reverseMul = -1;
- } else {
- reverseMul = 1;
- }
-
- final int missingValue;
-
+ final Integer missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Integer) sortField.getMissingValue();
} else {
@@ -261,19 +242,11 @@ final class MultiSorter {
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
for(CodecReader reader : readers) {
- values.add(DocValues.getNumeric(reader, sortField.getField()));
+ values.add(Sorter.getOrWrapNumeric(reader, sortField));
docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
}
- final int reverseMul;
- if (sortField.getReverse()) {
- reverseMul = -1;
- } else {
- reverseMul = 1;
- }
-
- final double missingValue;
-
+ final Double missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Double) sortField.getMissingValue();
} else {
@@ -306,19 +279,11 @@ final class MultiSorter {
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
for(CodecReader reader : readers) {
- values.add(DocValues.getNumeric(reader, sortField.getField()));
+ values.add(Sorter.getOrWrapNumeric(reader, sortField));
docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
}
- final int reverseMul;
- if (sortField.getReverse()) {
- reverseMul = -1;
- } else {
- reverseMul = 1;
- }
-
- final float missingValue;
-
+ final Float missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Float) sortField.getMissingValue();
} else {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/core/src/java/org/apache/lucene/index/Sorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/Sorter.java b/lucene/core/src/java/org/apache/lucene/index/Sorter.java
index cf75c18..9ec472a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/Sorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Sorter.java
@@ -24,6 +24,10 @@ import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSelector;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.SortedSetSelector;
+import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.util.TimSorter;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@@ -198,6 +202,40 @@ final class Sorter {
};
}
+ /** Returns the native sort type for {@link SortedSetSortField} and {@link SortedNumericSortField},
+ * {@link SortField#getType()} otherwise */
+ static SortField.Type getSortFieldType(SortField sortField) {
+ if (sortField instanceof SortedSetSortField) {
+ return SortField.Type.STRING;
+ } else if (sortField instanceof SortedNumericSortField) {
+ return ((SortedNumericSortField) sortField).getNumericType();
+ } else {
+ return sortField.getType();
+ }
+ }
+
+ /** Wraps a {@link SortedNumericDocValues} as a single-valued view if the field is an instance of {@link SortedNumericSortField},
+ * returns {@link NumericDocValues} for the field otherwise. */
+ static NumericDocValues getOrWrapNumeric(LeafReader reader, SortField sortField) throws IOException {
+ if (sortField instanceof SortedNumericSortField) {
+ SortedNumericSortField sf = (SortedNumericSortField) sortField;
+ return SortedNumericSelector.wrap(DocValues.getSortedNumeric(reader, sf.getField()), sf.getSelector(), sf.getNumericType());
+ } else {
+ return DocValues.getNumeric(reader, sortField.getField());
+ }
+ }
+
+ /** Wraps a {@link SortedSetDocValues} as a single-valued view if the field is an instance of {@link SortedSetSortField},
+ * returns {@link SortedDocValues} for the field otherwise. */
+ static SortedDocValues getOrWrapSorted(LeafReader reader, SortField sortField) throws IOException {
+ if (sortField instanceof SortedSetSortField) {
+ SortedSetSortField sf = (SortedSetSortField) sortField;
+ return SortedSetSelector.wrap(DocValues.getSortedSet(reader, sf.getField()), sf.getSelector());
+ } else {
+ return DocValues.getSorted(reader, sortField.getField());
+ }
+ }
+
/**
* Returns a mapping from the old document ID to its new location in the
* sorted index. Implementations can use the auxiliary
@@ -258,7 +296,7 @@ final class Sorter {
public String toString() {
return getID();
}
-
+
static final Scorer FAKESCORER = new Scorer(null) {
float score;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java b/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java
index 188a408..6e45047 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java
@@ -82,6 +82,11 @@ public class SortedNumericSortField extends SortField {
this.selector = selector;
this.type = type;
}
+
+ /** Returns the numeric type in use for this sort */
+ public SortField.Type getNumericType() {
+ return type;
+ }
/** Returns the selector in use for this sort */
public SortedNumericSelector.Type getSelector() {