You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/06/11 15:49:29 UTC
[13/21] lucene-solr:branch_6x: LUCENE-6766: add float, double
LUCENE-6766: add float, double
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/a30c2632
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/a30c2632
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/a30c2632
Branch: refs/heads/branch_6x
Commit: a30c2632c8c7800287d3c9b319b703d34d55f7d8
Parents: b62cad3
Author: Mike McCandless <mi...@apache.org>
Authored: Sat May 7 18:36:13 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sat Jun 11 11:48:40 2016 -0400
----------------------------------------------------------------------
.../simpletext/SimpleTextSegmentInfoFormat.java | 36 ++++-
.../lucene62/Lucene62SegmentInfoFormat.java | 32 +++++
.../org/apache/lucene/index/CheckIndex.java | 13 +-
.../org/apache/lucene/index/MultiSorter.java | 109 +++++++++++++--
.../apache/lucene/index/SortingLeafReader.java | 1 -
.../apache/lucene/index/TestIndexSorting.java | 139 +++++++++++++++++++
6 files changed, 312 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a30c2632/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
index bf9d3de..8ab45be 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
@@ -168,6 +168,12 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
case "int":
type = SortField.Type.INT;
break;
+ case "double":
+ type = SortField.Type.DOUBLE;
+ break;
+ case "float":
+ type = SortField.Type.FLOAT;
+ break;
default:
throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
}
@@ -216,6 +222,26 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
break;
}
break;
+ case DOUBLE:
+ switch (missingLastAsString) {
+ case "null":
+ missingValue = null;
+ break;
+ default:
+ missingValue = Double.parseDouble(missingLastAsString);
+ break;
+ }
+ break;
+ case FLOAT:
+ switch (missingLastAsString) {
+ case "null":
+ missingValue = null;
+ break;
+ default:
+ missingValue = Float.parseFloat(missingLastAsString);
+ break;
+ }
+ break;
// nocommit need the rest
default:
throw new AssertionError();
@@ -338,6 +364,12 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
case INT:
sortType = "int";
break;
+ case DOUBLE:
+ sortType = "double";
+ break;
+ case FLOAT:
+ sortType = "float";
+ break;
// nocommit the rest:
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
@@ -358,10 +390,8 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
missing = "first";
} else if (missingValue == SortField.STRING_LAST) {
missing = "last";
- } else if (missingValue instanceof Long) {
- missing = Long.toString((Long) missingValue);
} else {
- throw new IllegalStateException("Unexpected missing sort value: " + missingValue);
+ missing = missingValue.toString();
}
SimpleTextUtil.write(output, missing, scratch);
SimpleTextUtil.writeNewline(output);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a30c2632/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
index 53d2734..bb52eeb 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
@@ -121,6 +121,12 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case 2:
sortType = SortField.Type.INT;
break;
+ case 3:
+ sortType = SortField.Type.DOUBLE;
+ break;
+ case 4:
+ sortType = SortField.Type.FLOAT;
+ break;
default:
throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input);
}
@@ -163,6 +169,18 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
}
missingValue = input.readInt();
break;
+ case DOUBLE:
+ if (b != 1) {
+ throw new CorruptIndexException("invalid missing value flag: " + b, input);
+ }
+ missingValue = Double.longBitsToDouble(input.readLong());
+ break;
+ case FLOAT:
+ if (b != 1) {
+ throw new CorruptIndexException("invalid missing value flag: " + b, input);
+ }
+ missingValue = Float.intBitsToFloat(input.readInt());
+ break;
default:
throw new AssertionError("unhandled sortType=" + sortType);
}
@@ -240,6 +258,12 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case INT:
sortTypeID = 2;
break;
+ case DOUBLE:
+ sortTypeID = 3;
+ break;
+ case FLOAT:
+ sortTypeID = 4;
+ break;
// nocommit the rest:
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
@@ -270,6 +294,14 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
output.writeByte((byte) 1);
output.writeInt(((Integer) missingValue).intValue());
break;
+ case DOUBLE:
+ output.writeByte((byte) 1);
+ output.writeLong(Double.doubleToLongBits(((Double) missingValue).doubleValue()));
+ break;
+ case FLOAT:
+ output.writeByte((byte) 1);
+ output.writeLong(Float.floatToIntBits(((Float) missingValue).floatValue()));
+ break;
// nocommit the rest:
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a30c2632/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index fb2dc80..1031d22 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -648,6 +648,7 @@ public final class CheckIndex implements Closeable {
int toLoseDocCount = info.info.maxDoc();
SegmentReader reader = null;
+ Sort previousIndexSort = null;
try {
msg(infoStream, " version=" + (version == null ? "3.0" : version));
@@ -661,6 +662,13 @@ public final class CheckIndex implements Closeable {
Sort indexSort = info.info.getIndexSort();
if (indexSort != null) {
msg(infoStream, " sort=" + indexSort);
+ if (previousIndexSort != null) {
+ if (previousIndexSort.equals(indexSort) == false) {
+ throw new RuntimeException("index sort changed from " + previousIndexSort + " to " + indexSort);
+ }
+ } else {
+ previousIndexSort = indexSort;
+ }
}
segInfoStat.numFiles = info.files().size();
segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);
@@ -835,8 +843,6 @@ public final class CheckIndex implements Closeable {
for (int i = 0; i < fields.length; i++) {
reverseMul[i] = fields[i].getReverse() ? -1 : 1;
comparators[i] = fields[i].getComparator(1, i).getLeafComparator(readerContext);
- // nocommit we prevent SCORE?
- //comparators[i].setScorer(FAKESCORER);
}
int maxDoc = reader.maxDoc();
@@ -2585,9 +2591,6 @@ public final class CheckIndex implements Closeable {
}
}
- // nocommit must check index is sorted, if it claims to be
- // nocommit must check that all segments have the same sort, if any segment is sorted
-
/**
* Parse command line args into fields
* @param args The command line arguments
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a30c2632/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
index ca1ebe5..4c78aa1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
@@ -164,7 +164,7 @@ final class MultiSorter {
};
}
- case INT:
+ case LONG:
{
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
@@ -191,25 +191,25 @@ final class MultiSorter {
return new CrossReaderComparator() {
@Override
public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
- int valueA;
+ long valueA;
if (docsWithFields.get(readerIndexA).get(docIDA)) {
valueA = (int) values.get(readerIndexA).get(docIDA);
} else {
valueA = missingValue;
}
- int valueB;
+ long valueB;
if (docsWithFields.get(readerIndexB).get(docIDB)) {
valueB = (int) values.get(readerIndexB).get(docIDB);
} else {
valueB = missingValue;
}
- return reverseMul * Integer.compare(valueA, valueB);
+ return reverseMul * Long.compare(valueA, valueB);
}
};
}
- case LONG:
- // nocommit refactor/share at least numerics here:
+
+ case INT:
{
List<NumericDocValues> values = new ArrayList<>();
List<Bits> docsWithFields = new ArrayList<>();
@@ -236,23 +236,114 @@ final class MultiSorter {
return new CrossReaderComparator() {
@Override
public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
- long valueA;
+ int valueA;
if (docsWithFields.get(readerIndexA).get(docIDA)) {
valueA = (int) values.get(readerIndexA).get(docIDA);
} else {
valueA = missingValue;
}
- long valueB;
+ int valueB;
if (docsWithFields.get(readerIndexB).get(docIDB)) {
valueB = (int) values.get(readerIndexB).get(docIDB);
} else {
valueB = missingValue;
}
- return reverseMul * Long.compare(valueA, valueB);
+ return reverseMul * Integer.compare(valueA, valueB);
+ }
+ };
+ }
+
+ case DOUBLE:
+ {
+ List<NumericDocValues> values = new ArrayList<>();
+ List<Bits> docsWithFields = new ArrayList<>();
+ for(CodecReader reader : readers) {
+ values.add(DocValues.getNumeric(reader, sortField.getField()));
+ docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
+ }
+
+ final int reverseMul;
+ if (sortField.getReverse()) {
+ reverseMul = -1;
+ } else {
+ reverseMul = 1;
+ }
+
+ final double missingValue;
+
+ if (sortField.getMissingValue() != null) {
+ missingValue = (Double) sortField.getMissingValue();
+ } else {
+ missingValue = 0.0;
+ }
+
+ return new CrossReaderComparator() {
+ @Override
+ public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+ double valueA;
+ if (docsWithFields.get(readerIndexA).get(docIDA)) {
+ valueA = Double.longBitsToDouble(values.get(readerIndexA).get(docIDA));
+ } else {
+ valueA = missingValue;
+ }
+
+ double valueB;
+ if (docsWithFields.get(readerIndexB).get(docIDB)) {
+ valueB = Double.longBitsToDouble(values.get(readerIndexB).get(docIDB));
+ } else {
+ valueB = missingValue;
+ }
+ return reverseMul * Double.compare(valueA, valueB);
}
};
}
+
+ case FLOAT:
+ {
+ List<NumericDocValues> values = new ArrayList<>();
+ List<Bits> docsWithFields = new ArrayList<>();
+ for(CodecReader reader : readers) {
+ values.add(DocValues.getNumeric(reader, sortField.getField()));
+ docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
+ }
+
+ final int reverseMul;
+ if (sortField.getReverse()) {
+ reverseMul = -1;
+ } else {
+ reverseMul = 1;
+ }
+
+ final float missingValue;
+
+ if (sortField.getMissingValue() != null) {
+ missingValue = (Float) sortField.getMissingValue();
+ } else {
+ missingValue = 0.0f;
+ }
+
+ return new CrossReaderComparator() {
+ @Override
+ public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+ float valueA;
+ if (docsWithFields.get(readerIndexA).get(docIDA)) {
+ valueA = Float.intBitsToFloat((int) values.get(readerIndexA).get(docIDA));
+ } else {
+ valueA = missingValue;
+ }
+
+ float valueB;
+ if (docsWithFields.get(readerIndexB).get(docIDB)) {
+ valueB = Float.intBitsToFloat((int) values.get(readerIndexB).get(docIDB));
+ } else {
+ valueB = missingValue;
+ }
+ return reverseMul * Float.compare(valueA, valueB);
+ }
+ };
+ }
+
// nocommit do the rest:
default:
throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType());
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a30c2632/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
index b6558f7..70d5d20 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
@@ -840,7 +840,6 @@ class SortingLeafReader extends FilterLeafReader {
if (inPointValues == null) {
return null;
} else {
- // nocommit make sure this is tested
return new SortingPointValues(inPointValues, docMap);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a30c2632/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 1da6c82..4e26063 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -36,9 +36,11 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
@@ -72,6 +74,7 @@ import org.junit.BeforeClass;
// nocommit test tie break
// nocommit test multiple sorts
// nocommit test update dvs
+// nocommit test missing value
// nocommit test EarlyTerminatingCollector
@@ -113,6 +116,142 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close();
}
+ public void testBasicLong() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", -1));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1, values.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicInt() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 18));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", -1));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new NumericDocValuesField("foo", 7));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1, values.get(0));
+ assertEquals(7, values.get(1));
+ assertEquals(18, values.get(2));
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicDouble() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.DOUBLE));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 18.0));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", -1.0));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new DoubleDocValuesField("foo", 7.0));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1.0, Double.longBitsToDouble(values.get(0)), 0.0);
+ assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
+ assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testBasicFloat() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+ Sort indexSort = new Sort(new SortField("foo", SortField.Type.FLOAT));
+ iwc.setIndexSort(indexSort);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 18.0f));
+ w.addDocument(doc);
+ // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+ w.commit();
+
+ doc = new Document();
+ doc.add(new FloatDocValuesField("foo", -1.0f));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new FloatDocValuesField("foo", 7.0f));
+ w.addDocument(doc);
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w);
+ LeafReader leaf = getOnlyLeafReader(r);
+ assertEquals(3, leaf.maxDoc());
+ NumericDocValues values = leaf.getNumericDocValues("foo");
+ assertEquals(-1.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
+ assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
+ assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+ r.close();
+ w.close();
+ dir.close();
+ }
+
public void testSortOnMerge(boolean withDeletes) throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));