You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/11/16 15:37:58 UTC
[3/3] lucene-solr:branch_6x: LUCENE-7537: Index time sorting now
supports multi-valued sorts using selectors (MIN, MAX, etc.)
LUCENE-7537: Index time sorting now supports multi-valued sorts using selectors (MIN, MAX, etc.)
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e357f957
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e357f957
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e357f957
Branch: refs/heads/branch_6x
Commit: e357f957f3059add5582b9695f838794c386dcad
Parents: 6962381
Author: Mike McCandless <mi...@apache.org>
Authored: Tue Nov 15 16:22:51 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Wed Nov 16 10:37:02 2016 -0500
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../simpletext/SimpleTextSegmentInfoFormat.java | 148 ++-
.../lucene62/Lucene62SegmentInfoFormat.java | 118 ++-
.../apache/lucene/index/IndexWriterConfig.java | 3 +-
.../org/apache/lucene/index/MultiSorter.java | 63 +-
.../java/org/apache/lucene/index/Sorter.java | 40 +-
.../lucene/search/SortedNumericSortField.java | 5 +
.../apache/lucene/index/TestIndexSorting.java | 987 ++++++++++++++++---
.../index/BaseSegmentInfoFormatTestCase.java | 91 +-
9 files changed, 1247 insertions(+), 211 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 90dea97..3aa6042 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -50,6 +50,9 @@ Improvements
control how text is analyzed and converted into a query (Matt Weber
via Mike McCandless)
+* LUCENE-7537: Index time sorting now supports multi-valued sorts
+ using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless)
+
Other
* LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
index 146e92a..3d38d72 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
@@ -33,9 +33,14 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSelector;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.SortedSetSelector;
+import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -64,6 +69,7 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
final static BytesRef SI_SORT = new BytesRef(" sort ");
final static BytesRef SI_SORT_FIELD = new BytesRef(" field ");
final static BytesRef SI_SORT_TYPE = new BytesRef(" type ");
+ final static BytesRef SI_SELECTOR_TYPE = new BytesRef(" selector ");
final static BytesRef SI_SORT_REVERSE = new BytesRef(" reverse ");
final static BytesRef SI_SORT_MISSING = new BytesRef(" missing ");
@@ -158,6 +164,8 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
final String typeAsString = readString(SI_SORT_TYPE.length, scratch);
final SortField.Type type;
+ SortedSetSelector.Type selectorSet = null;
+ SortedNumericSelector.Type selectorNumeric = null;
switch (typeAsString) {
case "string":
type = SortField.Type.STRING;
@@ -174,6 +182,26 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
case "float":
type = SortField.Type.FLOAT;
break;
+ case "multi_valued_string":
+ type = SortField.Type.STRING;
+ selectorSet = readSetSelector(input, scratch);
+ break;
+ case "multi_valued_long":
+ type = SortField.Type.LONG;
+ selectorNumeric = readNumericSelector(input, scratch);
+ break;
+ case "multi_valued_int":
+ type = SortField.Type.INT;
+ selectorNumeric = readNumericSelector(input, scratch);
+ break;
+ case "multi_valued_double":
+ type = SortField.Type.DOUBLE;
+ selectorNumeric = readNumericSelector(input, scratch);
+ break;
+ case "multi_valued_float":
+ type = SortField.Type.FLOAT;
+ selectorNumeric = readNumericSelector(input, scratch);
+ break;
default:
throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
}
@@ -245,7 +273,13 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
default:
throw new AssertionError();
}
- sortField[i] = new SortField(field, type, reverse);
+ if (selectorSet != null) {
+ sortField[i] = new SortedSetSortField(field, reverse);
+ } else if (selectorNumeric != null) {
+ sortField[i] = new SortedNumericSortField(field, type, reverse);
+ } else {
+ sortField[i] = new SortField(field, type, reverse);
+ }
if (missingValue != null) {
sortField[i].setMissingValue(missingValue);
}
@@ -265,6 +299,38 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
private String readString(int offset, BytesRefBuilder scratch) {
return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8);
}
+
+ private SortedSetSelector.Type readSetSelector(IndexInput input, BytesRefBuilder scratch) throws IOException {
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SELECTOR_TYPE);
+ final String selectorAsString = readString(SI_SELECTOR_TYPE.length, scratch);
+ switch (selectorAsString) {
+ case "min":
+ return SortedSetSelector.Type.MIN;
+ case "middle_min":
+ return SortedSetSelector.Type.MIDDLE_MIN;
+ case "middle_max":
+ return SortedSetSelector.Type.MIDDLE_MAX;
+ case "max":
+ return SortedSetSelector.Type.MAX;
+ default:
+ throw new CorruptIndexException("unable to parse SortedSetSelector type: " + selectorAsString, input);
+ }
+ }
+
+ private SortedNumericSelector.Type readNumericSelector(IndexInput input, BytesRefBuilder scratch) throws IOException {
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_SELECTOR_TYPE);
+ final String selectorAsString = readString(SI_SELECTOR_TYPE.length, scratch);
+ switch (selectorAsString) {
+ case "min":
+ return SortedNumericSelector.Type.MIN;
+ case "max":
+ return SortedNumericSelector.Type.MAX;
+ default:
+ throw new CorruptIndexException("unable to parse SortedNumericSelector type: " + selectorAsString, input);
+ }
+ }
@Override
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
@@ -352,29 +418,93 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_SORT_TYPE);
- final String sortType;
- switch (sortField.getType()) {
+ final String sortTypeString;
+ final SortField.Type sortType;
+ final boolean multiValued;
+ if (sortField instanceof SortedSetSortField) {
+ sortType = SortField.Type.STRING;
+ multiValued = true;
+ } else if (sortField instanceof SortedNumericSortField) {
+ sortType = ((SortedNumericSortField) sortField).getNumericType();
+ multiValued = true;
+ } else {
+ sortType = sortField.getType();
+ multiValued = false;
+ }
+ switch (sortType) {
case STRING:
- sortType = "string";
+ if (multiValued) {
+ sortTypeString = "multi_valued_string";
+ } else {
+ sortTypeString = "string";
+ }
break;
case LONG:
- sortType = "long";
+ if (multiValued) {
+ sortTypeString = "multi_valued_long";
+ } else {
+ sortTypeString = "long";
+ }
break;
case INT:
- sortType = "int";
+ if (multiValued) {
+ sortTypeString = "multi_valued_int";
+ } else {
+ sortTypeString = "int";
+ }
break;
case DOUBLE:
- sortType = "double";
+ if (multiValued) {
+ sortTypeString = "multi_valued_double";
+ } else {
+ sortTypeString = "double";
+ }
break;
case FLOAT:
- sortType = "float";
+ if (multiValued) {
+ sortTypeString = "multi_valued_float";
+ } else {
+ sortTypeString = "float";
+ }
break;
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
}
- SimpleTextUtil.write(output, sortType, scratch);
+ SimpleTextUtil.write(output, sortTypeString, scratch);
SimpleTextUtil.writeNewline(output);
+ if (sortField instanceof SortedSetSortField) {
+ SortedSetSelector.Type selector = ((SortedSetSortField) sortField).getSelector();
+ final String selectorString;
+ if (selector == SortedSetSelector.Type.MIN) {
+ selectorString = "min";
+ } else if (selector == SortedSetSelector.Type.MIDDLE_MIN) {
+ selectorString = "middle_min";
+ } else if (selector == SortedSetSelector.Type.MIDDLE_MAX) {
+ selectorString = "middle_max";
+ } else if (selector == SortedSetSelector.Type.MAX) {
+ selectorString = "max";
+ } else {
+ throw new IllegalStateException("Unexpected SortedSetSelector type selector: " + selector);
+ }
+ SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
+ SimpleTextUtil.write(output, selectorString, scratch);
+ SimpleTextUtil.writeNewline(output);
+ } else if (sortField instanceof SortedNumericSortField) {
+ SortedNumericSelector.Type selector = ((SortedNumericSortField) sortField).getSelector();
+ final String selectorString;
+ if (selector == SortedNumericSelector.Type.MIN) {
+ selectorString = "min";
+ } else if (selector == SortedNumericSelector.Type.MAX) {
+ selectorString = "max";
+ } else {
+ throw new IllegalStateException("Unexpected SortedNumericSelector type selector: " + selector);
+ }
+ SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
+ SimpleTextUtil.write(output, selectorString, scratch);
+ SimpleTextUtil.writeNewline(output);
+ }
+
SimpleTextUtil.write(output, SI_SORT_REVERSE);
SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch);
SimpleTextUtil.writeNewline(output);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
index 1ee5258..da6e395 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
@@ -29,6 +29,10 @@ import org.apache.lucene.index.SegmentInfo; // javadocs
import org.apache.lucene.index.SegmentInfos; // javadocs
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSelector;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.SortedSetSelector;
+import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
@@ -69,7 +73,7 @@ import org.apache.lucene.util.Version;
* addIndexes), etc.</li>
* <li>Files is a list of files referred to by this segment.</li>
* </ul>
- *
+ *
* @see SegmentInfos
* @lucene.experimental
*/
@@ -78,7 +82,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
/** Sole constructor. */
public Lucene62SegmentInfoFormat() {
}
-
+
@Override
public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene62SegmentInfoFormat.SI_EXTENSION);
@@ -91,13 +95,13 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
Lucene62SegmentInfoFormat.VERSION_CURRENT,
segmentID, "");
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
-
+
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
-
+
final Map<String,String> diagnostics = input.readMapOfStrings();
final Set<String> files = input.readSetOfStrings();
final Map<String,String> attributes = input.readMapOfStrings();
@@ -110,6 +114,8 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
String fieldName = input.readString();
int sortTypeID = input.readVInt();
SortField.Type sortType;
+ SortedSetSelector.Type sortedSetSelector = null;
+ SortedNumericSelector.Type sortedNumericSelector = null;
switch(sortTypeID) {
case 0:
sortType = SortField.Type.STRING;
@@ -126,6 +132,43 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case 4:
sortType = SortField.Type.FLOAT;
break;
+ case 5:
+ sortType = SortField.Type.STRING;
+ byte selector = input.readByte();
+ if (selector == 0) {
+ sortedSetSelector = SortedSetSelector.Type.MIN;
+ } else if (selector == 1) {
+ sortedSetSelector = SortedSetSelector.Type.MAX;
+ } else if (selector == 2) {
+ sortedSetSelector = SortedSetSelector.Type.MIDDLE_MIN;
+ } else if (selector == 3) {
+ sortedSetSelector = SortedSetSelector.Type.MIDDLE_MAX;
+ } else {
+ throw new CorruptIndexException("invalid index SortedSetSelector ID: " + selector, input);
+ }
+ break;
+ case 6:
+ byte type = input.readByte();
+ if (type == 0) {
+ sortType = SortField.Type.LONG;
+ } else if (type == 1) {
+ sortType = SortField.Type.INT;
+ } else if (type == 2) {
+ sortType = SortField.Type.DOUBLE;
+ } else if (type == 3) {
+ sortType = SortField.Type.FLOAT;
+ } else {
+ throw new CorruptIndexException("invalid index SortedNumericSortField type ID: " + type, input);
+ }
+ byte numericSelector = input.readByte();
+ if (numericSelector == 0) {
+ sortedNumericSelector = SortedNumericSelector.Type.MIN;
+ } else if (numericSelector == 1) {
+ sortedNumericSelector = SortedNumericSelector.Type.MAX;
+ } else {
+ throw new CorruptIndexException("invalid index SortedNumericSelector ID: " + numericSelector, input);
+ }
+ break;
default:
throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input);
}
@@ -139,7 +182,13 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
throw new CorruptIndexException("invalid index sort reverse: " + b, input);
}
- sortFields[i] = new SortField(fieldName, sortType, reverse);
+ if (sortedSetSelector != null) {
+ sortFields[i] = new SortedSetSortField(fieldName, reverse, sortedSetSelector);
+ } else if (sortedNumericSelector != null) {
+ sortFields[i] = new SortedNumericSortField(fieldName, sortType, reverse, sortedNumericSelector);
+ } else {
+ sortFields[i] = new SortField(fieldName, sortType, reverse);
+ }
Object missingValue;
b = input.readByte();
@@ -194,7 +243,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
} else {
indexSort = null;
}
-
+
si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort);
si.setFiles(files);
} catch (Throwable exception) {
@@ -213,8 +262,8 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
// Only add the file once we've successfully created it, else IFD assert can trip:
si.addFile(fileName);
- CodecUtil.writeIndexHeader(output,
- Lucene62SegmentInfoFormat.CODEC_NAME,
+ CodecUtil.writeIndexHeader(output,
+ Lucene62SegmentInfoFormat.CODEC_NAME,
Lucene62SegmentInfoFormat.VERSION_CURRENT,
si.getId(),
"");
@@ -245,6 +294,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
output.writeVInt(numSortFields);
for (int i = 0; i < numSortFields; ++i) {
SortField sortField = indexSort.getSort()[i];
+ SortField.Type sortType = sortField.getType();
output.writeString(sortField.getField());
int sortTypeID;
switch (sortField.getType()) {
@@ -263,10 +313,55 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case FLOAT:
sortTypeID = 4;
break;
+ case CUSTOM:
+ if (sortField instanceof SortedSetSortField) {
+ sortTypeID = 5;
+ sortType = SortField.Type.STRING;
+ } else if (sortField instanceof SortedNumericSortField) {
+ sortTypeID = 6;
+ sortType = ((SortedNumericSortField) sortField).getNumericType();
+ } else {
+ throw new IllegalStateException("Unexpected SortedNumericSortField " + sortField);
+ }
+ break;
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
}
output.writeVInt(sortTypeID);
+ if (sortTypeID == 5) {
+ SortedSetSortField ssf = (SortedSetSortField) sortField;
+ if (ssf.getSelector() == SortedSetSelector.Type.MIN) {
+ output.writeByte((byte) 0);
+ } else if (ssf.getSelector() == SortedSetSelector.Type.MAX) {
+ output.writeByte((byte) 1);
+ } else if (ssf.getSelector() == SortedSetSelector.Type.MIDDLE_MIN) {
+ output.writeByte((byte) 2);
+ } else if (ssf.getSelector() == SortedSetSelector.Type.MIDDLE_MAX) {
+ output.writeByte((byte) 3);
+ } else {
+ throw new IllegalStateException("Unexpected SortedSetSelector type: " + ssf.getSelector());
+ }
+ } else if (sortTypeID == 6) {
+ SortedNumericSortField snsf = (SortedNumericSortField) sortField;
+ if (snsf.getNumericType() == SortField.Type.LONG) {
+ output.writeByte((byte) 0);
+ } else if (snsf.getNumericType() == SortField.Type.INT) {
+ output.writeByte((byte) 1);
+ } else if (snsf.getNumericType() == SortField.Type.DOUBLE) {
+ output.writeByte((byte) 2);
+ } else if (snsf.getNumericType() == SortField.Type.FLOAT) {
+ output.writeByte((byte) 3);
+ } else {
+ throw new IllegalStateException("Unexpected SortedNumericSelector type: " + snsf.getNumericType());
+ }
+ if (snsf.getSelector() == SortedNumericSelector.Type.MIN) {
+ output.writeByte((byte) 0);
+ } else if (snsf.getSelector() == SortedNumericSelector.Type.MAX) {
+ output.writeByte((byte) 1);
+ } else {
+ throw new IllegalStateException("Unexpected sorted numeric selector type: " + snsf.getSelector());
+ }
+ }
output.writeByte((byte) (sortField.getReverse() ? 0 : 1));
// write missing value
@@ -274,7 +369,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
if (missingValue == null) {
output.writeByte((byte) 0);
} else {
- switch(sortField.getType()) {
+ switch(sortType) {
case STRING:
if (missingValue == SortField.STRING_LAST) {
output.writeByte((byte) 1);
@@ -305,7 +400,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
}
}
}
-
+
CodecUtil.writeFooter(output);
}
}
@@ -314,5 +409,6 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
public final static String SI_EXTENSION = "si";
static final String CODEC_NAME = "Lucene62SegmentInfo";
static final int VERSION_START = 0;
- static final int VERSION_CURRENT = VERSION_START;
+ static final int VERSION_MULTI_VALUED_SORT = 1;
+ static final int VERSION_CURRENT = VERSION_MULTI_VALUED_SORT;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
index 368259a..4f642ee 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
@@ -468,7 +468,8 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
*/
public IndexWriterConfig setIndexSort(Sort sort) {
for(SortField sortField : sort.getSort()) {
- if (ALLOWED_INDEX_SORT_TYPES.contains(sortField.getType()) == false) {
+ final SortField.Type sortType = Sorter.getSortFieldType(sortField);
+ if (ALLOWED_INDEX_SORT_TYPES.contains(sortType) == false) {
throw new IllegalArgumentException("invalid SortField type: must be one of " + ALLOWED_INDEX_SORT_TYPES + " but got: " + sortField);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
index 51cabab..7c2c3be 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
@@ -123,7 +123,11 @@ final class MultiSorter {
public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB);
}
+ /** Returns {@code CrossReaderComparator} for the provided readers to represent the requested {@link SortField} sort order. */
private static CrossReaderComparator getComparator(List<CodecReader> readers, SortField sortField) throws IOException {
+ final int reverseMul = sortField.getReverse() ? -1 : 1;
+ final SortField.Type sortType = Sorter.getSortFieldType(sortField);
+
switch(sortField.getType()) {
case STRING:
@@ -138,16 +142,9 @@ final class MultiSorter {
}
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
- missingOrd = Integer.MAX_VALUE;
+ missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE;
} else {
- missingOrd = Integer.MIN_VALUE;
- }
-
- final int reverseMul;
- if (sortField.getReverse()) {
- reverseMul = -1;
- } else {
- reverseMul = 1;
+ missingOrd = sortField.getReverse() ? Integer.MAX_VALUE : Integer.MIN_VALUE;
}
return new CrossReaderComparator() {
@@ -171,19 +168,11 @@ final class MultiSorter {
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
for(CodecReader reader : readers) {
- values.add(DocValues.getNumeric(reader, sortField.getField()));
+ values.add(Sorter.getOrWrapNumeric(reader, sortField));
docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
}
- final int reverseMul;
- if (sortField.getReverse()) {
- reverseMul = -1;
- } else {
- reverseMul = 1;
- }
-
- final long missingValue;
-
+ final Long missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Long) sortField.getMissingValue();
} else {
@@ -216,19 +205,11 @@ final class MultiSorter {
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
for(CodecReader reader : readers) {
- values.add(DocValues.getNumeric(reader, sortField.getField()));
+ values.add(Sorter.getOrWrapNumeric(reader, sortField));
docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
}
- final int reverseMul;
- if (sortField.getReverse()) {
- reverseMul = -1;
- } else {
- reverseMul = 1;
- }
-
- final int missingValue;
-
+ final Integer missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Integer) sortField.getMissingValue();
} else {
@@ -261,19 +242,11 @@ final class MultiSorter {
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
for(CodecReader reader : readers) {
- values.add(DocValues.getNumeric(reader, sortField.getField()));
+ values.add(Sorter.getOrWrapNumeric(reader, sortField));
docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
}
- final int reverseMul;
- if (sortField.getReverse()) {
- reverseMul = -1;
- } else {
- reverseMul = 1;
- }
-
- final double missingValue;
-
+ final Double missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Double) sortField.getMissingValue();
} else {
@@ -306,19 +279,11 @@ final class MultiSorter {
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
for(CodecReader reader : readers) {
- values.add(DocValues.getNumeric(reader, sortField.getField()));
+ values.add(Sorter.getOrWrapNumeric(reader, sortField));
docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
}
- final int reverseMul;
- if (sortField.getReverse()) {
- reverseMul = -1;
- } else {
- reverseMul = 1;
- }
-
- final float missingValue;
-
+ final Float missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Float) sortField.getMissingValue();
} else {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/core/src/java/org/apache/lucene/index/Sorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/Sorter.java b/lucene/core/src/java/org/apache/lucene/index/Sorter.java
index cf75c18..9ec472a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/Sorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Sorter.java
@@ -24,6 +24,10 @@ import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedNumericSelector;
+import org.apache.lucene.search.SortedNumericSortField;
+import org.apache.lucene.search.SortedSetSelector;
+import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.util.TimSorter;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@@ -198,6 +202,40 @@ final class Sorter {
};
}
+ /** Returns the native sort type for {@link SortedSetSortField} and {@link SortedNumericSortField},
+ * {@link SortField#getType()} otherwise */
+ static SortField.Type getSortFieldType(SortField sortField) {
+ if (sortField instanceof SortedSetSortField) {
+ return SortField.Type.STRING;
+ } else if (sortField instanceof SortedNumericSortField) {
+ return ((SortedNumericSortField) sortField).getNumericType();
+ } else {
+ return sortField.getType();
+ }
+ }
+
+ /** Wraps a {@link SortedNumericDocValues} as a single-valued view if the field is an instance of {@link SortedNumericSortField},
+ * returns {@link NumericDocValues} for the field otherwise. */
+ static NumericDocValues getOrWrapNumeric(LeafReader reader, SortField sortField) throws IOException {
+ if (sortField instanceof SortedNumericSortField) {
+ SortedNumericSortField sf = (SortedNumericSortField) sortField;
+ return SortedNumericSelector.wrap(DocValues.getSortedNumeric(reader, sf.getField()), sf.getSelector(), sf.getNumericType());
+ } else {
+ return DocValues.getNumeric(reader, sortField.getField());
+ }
+ }
+
+ /** Wraps a {@link SortedSetDocValues} as a single-valued view if the field is an instance of {@link SortedSetSortField},
+ * returns {@link SortedDocValues} for the field otherwise. */
+ static SortedDocValues getOrWrapSorted(LeafReader reader, SortField sortField) throws IOException {
+ if (sortField instanceof SortedSetSortField) {
+ SortedSetSortField sf = (SortedSetSortField) sortField;
+ return SortedSetSelector.wrap(DocValues.getSortedSet(reader, sf.getField()), sf.getSelector());
+ } else {
+ return DocValues.getSorted(reader, sortField.getField());
+ }
+ }
+
/**
* Returns a mapping from the old document ID to its new location in the
* sorted index. Implementations can use the auxiliary
@@ -258,7 +296,7 @@ final class Sorter {
public String toString() {
return getID();
}
-
+
static final Scorer FAKESCORER = new Scorer(null) {
float score;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e357f957/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java b/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java
index 188a408..6e45047 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java
@@ -82,6 +82,11 @@ public class SortedNumericSortField extends SortField {
this.selector = selector;
this.type = type;
}
+
+ /** Returns the numeric type in use for this sort */
+ public SortField.Type getNumericType() {
+ return type;
+ }
/** Returns the selector in use for this sort */
public SortedNumericSelector.Type getSelector() {