You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/06/11 15:49:29 UTC

[13/21] lucene-solr:branch_6x: LUCENE-6766: add float, double

LUCENE-6766: add float, double


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/a30c2632
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/a30c2632
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/a30c2632

Branch: refs/heads/branch_6x
Commit: a30c2632c8c7800287d3c9b319b703d34d55f7d8
Parents: b62cad3
Author: Mike McCandless <mi...@apache.org>
Authored: Sat May 7 18:36:13 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sat Jun 11 11:48:40 2016 -0400

----------------------------------------------------------------------
 .../simpletext/SimpleTextSegmentInfoFormat.java |  36 ++++-
 .../lucene62/Lucene62SegmentInfoFormat.java     |  32 +++++
 .../org/apache/lucene/index/CheckIndex.java     |  13 +-
 .../org/apache/lucene/index/MultiSorter.java    | 109 +++++++++++++--
 .../apache/lucene/index/SortingLeafReader.java  |   1 -
 .../apache/lucene/index/TestIndexSorting.java   | 139 +++++++++++++++++++
 6 files changed, 312 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a30c2632/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
index bf9d3de..8ab45be 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
@@ -168,6 +168,12 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
           case "int":
             type = SortField.Type.INT;
             break;
+          case "double":
+            type = SortField.Type.DOUBLE;
+            break;
+          case "float":
+            type = SortField.Type.FLOAT;
+            break;
           default:
             throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
         }
@@ -216,6 +222,26 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
                 break;
             }
             break;
+          case DOUBLE:
+            switch (missingLastAsString) {
+              case "null":
+                missingValue = null;
+                break;
+              default:
+                missingValue = Double.parseDouble(missingLastAsString);
+                break;
+            }
+            break;
+          case FLOAT:
+            switch (missingLastAsString) {
+              case "null":
+                missingValue = null;
+                break;
+              default:
+                missingValue = Float.parseFloat(missingLastAsString);
+                break;
+            }
+            break;
           // nocommit need the rest
           default:
             throw new AssertionError();
@@ -338,6 +364,12 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
           case INT:
             sortType = "int";
             break;
+          case DOUBLE:
+            sortType = "double";
+            break;
+          case FLOAT:
+            sortType = "float";
+            break;
           // nocommit the rest:
           default:
             throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
@@ -358,10 +390,8 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
           missing = "first";
         } else if (missingValue == SortField.STRING_LAST) {
           missing = "last";
-        } else if (missingValue instanceof Long) {
-          missing = Long.toString((Long) missingValue);
         } else {
-          throw new IllegalStateException("Unexpected missing sort value: " + missingValue);
+          missing = missingValue.toString();
         }
         SimpleTextUtil.write(output, missing, scratch);
         SimpleTextUtil.writeNewline(output);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a30c2632/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
index 53d2734..bb52eeb 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
@@ -121,6 +121,12 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
             case 2:
               sortType = SortField.Type.INT;
               break;
+            case 3:
+              sortType = SortField.Type.DOUBLE;
+              break;
+            case 4:
+              sortType = SortField.Type.FLOAT;
+              break;
             default:
               throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input);
             }
@@ -163,6 +169,18 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
                 }
                 missingValue = input.readInt();
                 break;
+              case DOUBLE:
+                if (b != 1) {
+                  throw new CorruptIndexException("invalid missing value flag: " + b, input);
+                }
+                missingValue = Double.longBitsToDouble(input.readLong());
+                break;
+              case FLOAT:
+                if (b != 1) {
+                  throw new CorruptIndexException("invalid missing value flag: " + b, input);
+                }
+                missingValue = Float.intBitsToFloat(input.readInt());
+                break;
               default:
                 throw new AssertionError("unhandled sortType=" + sortType);
               }
@@ -240,6 +258,12 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
           case INT:
             sortTypeID = 2;
             break;
+          case DOUBLE:
+            sortTypeID = 3;
+            break;
+          case FLOAT:
+            sortTypeID = 4;
+            break;
           // nocommit the rest:
           default:
             throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
@@ -270,6 +294,14 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
             output.writeByte((byte) 1);
             output.writeInt(((Integer) missingValue).intValue());
             break;
+          case DOUBLE:
+            output.writeByte((byte) 1);
+            output.writeLong(Double.doubleToLongBits(((Double) missingValue).doubleValue()));
+            break;
+          case FLOAT:
+            output.writeByte((byte) 1);
+            output.writeLong(Float.floatToIntBits(((Float) missingValue).floatValue()));
+            break;
           // nocommit the rest:
           default:
             throw new IllegalStateException("Unexpected sort type: " + sortField.getType());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a30c2632/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index fb2dc80..1031d22 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -648,6 +648,7 @@ public final class CheckIndex implements Closeable {
       int toLoseDocCount = info.info.maxDoc();
 
       SegmentReader reader = null;
+      Sort previousIndexSort = null;
 
       try {
         msg(infoStream, "    version=" + (version == null ? "3.0" : version));
@@ -661,6 +662,13 @@ public final class CheckIndex implements Closeable {
         Sort indexSort = info.info.getIndexSort();
         if (indexSort != null) {
           msg(infoStream, "    sort=" + indexSort);
+          if (previousIndexSort != null) {
+            if (previousIndexSort.equals(indexSort) == false) {
+              throw new RuntimeException("index sort changed from " + previousIndexSort + " to " + indexSort);
+            }
+          } else {
+            previousIndexSort = indexSort;
+          }
         }
         segInfoStat.numFiles = info.files().size();
         segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);
@@ -835,8 +843,6 @@ public final class CheckIndex implements Closeable {
       for (int i = 0; i < fields.length; i++) {
         reverseMul[i] = fields[i].getReverse() ? -1 : 1;
         comparators[i] = fields[i].getComparator(1, i).getLeafComparator(readerContext);
-        // nocommit we prevent SCORE?
-        //comparators[i].setScorer(FAKESCORER);
       }
 
       int maxDoc = reader.maxDoc();
@@ -2585,9 +2591,6 @@ public final class CheckIndex implements Closeable {
     }
   }
 
-  // nocommit must check index is sorted, if it claims to be
-  // nocommit must check that all segments have the same sort, if any segment is sorted
-
   /**
    * Parse command line args into fields
    * @param args The command line arguments

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a30c2632/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
index ca1ebe5..4c78aa1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
@@ -164,7 +164,7 @@ final class MultiSorter {
         };
       }
 
-    case INT:
+    case LONG:
       {
         List<NumericDocValues> values = new ArrayList<>();
         List<Bits> docsWithFields = new ArrayList<>();
@@ -191,25 +191,25 @@ final class MultiSorter {
         return new CrossReaderComparator() {
           @Override
           public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
-            int valueA;
+            long valueA;
             if (docsWithFields.get(readerIndexA).get(docIDA)) {
               valueA = (int) values.get(readerIndexA).get(docIDA);
             } else {
               valueA = missingValue;
             }
 
-            int valueB;
+            long valueB;
             if (docsWithFields.get(readerIndexB).get(docIDB)) {
               valueB = (int) values.get(readerIndexB).get(docIDB);
             } else {
               valueB = missingValue;
             }
-            return reverseMul * Integer.compare(valueA, valueB);
+            return reverseMul * Long.compare(valueA, valueB);
           }
         };
       }
-    case LONG:
-      // nocommit refactor/share at least numerics here:
+
+    case INT:
       {
         List<NumericDocValues> values = new ArrayList<>();
         List<Bits> docsWithFields = new ArrayList<>();
@@ -236,23 +236,114 @@ final class MultiSorter {
         return new CrossReaderComparator() {
           @Override
           public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
-            long valueA;
+            int valueA;
             if (docsWithFields.get(readerIndexA).get(docIDA)) {
               valueA = (int) values.get(readerIndexA).get(docIDA);
             } else {
               valueA = missingValue;
             }
 
-            long valueB;
+            int valueB;
             if (docsWithFields.get(readerIndexB).get(docIDB)) {
               valueB = (int) values.get(readerIndexB).get(docIDB);
             } else {
               valueB = missingValue;
             }
-            return reverseMul * Long.compare(valueA, valueB);
+            return reverseMul * Integer.compare(valueA, valueB);
+          }
+        };
+      }
+
+    case DOUBLE:
+      {
+        List<NumericDocValues> values = new ArrayList<>();
+        List<Bits> docsWithFields = new ArrayList<>();
+        for(CodecReader reader : readers) {
+          values.add(DocValues.getNumeric(reader, sortField.getField()));
+          docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
+        }
+
+        final int reverseMul;
+        if (sortField.getReverse()) {
+          reverseMul = -1;
+        } else {
+          reverseMul = 1;
+        }
+
+        final double missingValue;
+
+        if (sortField.getMissingValue() != null) {
+          missingValue = (Double) sortField.getMissingValue();
+        } else {
+          missingValue = 0.0;
+        }
+
+        return new CrossReaderComparator() {
+          @Override
+          public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+            double valueA;
+            if (docsWithFields.get(readerIndexA).get(docIDA)) {
+              valueA = Double.longBitsToDouble(values.get(readerIndexA).get(docIDA));
+            } else {
+              valueA = missingValue;
+            }
+
+            double valueB;
+            if (docsWithFields.get(readerIndexB).get(docIDB)) {
+              valueB = Double.longBitsToDouble(values.get(readerIndexB).get(docIDB));
+            } else {
+              valueB = missingValue;
+            }
+            return reverseMul * Double.compare(valueA, valueB);
           }
         };
       }
+
+    case FLOAT:
+      {
+        List<NumericDocValues> values = new ArrayList<>();
+        List<Bits> docsWithFields = new ArrayList<>();
+        for(CodecReader reader : readers) {
+          values.add(DocValues.getNumeric(reader, sortField.getField()));
+          docsWithFields.add(DocValues.getDocsWithField(reader, sortField.getField()));
+        }
+
+        final int reverseMul;
+        if (sortField.getReverse()) {
+          reverseMul = -1;
+        } else {
+          reverseMul = 1;
+        }
+
+        final float missingValue;
+
+        if (sortField.getMissingValue() != null) {
+          missingValue = (Float) sortField.getMissingValue();
+        } else {
+          missingValue = 0.0f;
+        }
+
+        return new CrossReaderComparator() {
+          @Override
+          public int compare(int readerIndexA, int docIDA, int readerIndexB, int docIDB) {
+            float valueA;
+            if (docsWithFields.get(readerIndexA).get(docIDA)) {
+              valueA = Float.intBitsToFloat((int) values.get(readerIndexA).get(docIDA));
+            } else {
+              valueA = missingValue;
+            }
+
+            float valueB;
+            if (docsWithFields.get(readerIndexB).get(docIDB)) {
+              valueB = Float.intBitsToFloat((int) values.get(readerIndexB).get(docIDB));
+            } else {
+              valueB = missingValue;
+            }
+            return reverseMul * Float.compare(valueA, valueB);
+          }
+        };
+      }
+
     // nocommit do the rest:
     default:
       throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a30c2632/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
index b6558f7..70d5d20 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingLeafReader.java
@@ -840,7 +840,6 @@ class SortingLeafReader extends FilterLeafReader {
     if (inPointValues == null) {
       return null;
     } else {
-      // nocommit make sure this is tested
       return new SortingPointValues(inPointValues, docMap);
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a30c2632/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 1da6c82..4e26063 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -36,9 +36,11 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.BinaryPoint;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleDocValuesField;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.FloatDocValuesField;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
@@ -72,6 +74,7 @@ import org.junit.BeforeClass;
 // nocommit test tie break
 // nocommit test multiple sorts
 // nocommit test update dvs
+// nocommit test missing value
 
 // nocommit test EarlyTerminatingCollector
 
@@ -113,6 +116,142 @@ public class TestIndexSorting extends LuceneTestCase {
     dir.close();
   }
 
+  public void testBasicLong() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+    iwc.setIndexSort(indexSort);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    Document doc = new Document();
+    doc.add(new NumericDocValuesField("foo", 18));
+    w.addDocument(doc);
+    // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+    w.commit();
+
+    doc = new Document();
+    doc.add(new NumericDocValuesField("foo", -1));
+    w.addDocument(doc);
+    w.commit();
+
+    doc = new Document();
+    doc.add(new NumericDocValuesField("foo", 7));
+    w.addDocument(doc);
+    w.forceMerge(1);
+
+    DirectoryReader r = DirectoryReader.open(w);
+    LeafReader leaf = getOnlyLeafReader(r);
+    assertEquals(3, leaf.maxDoc());
+    NumericDocValues values = leaf.getNumericDocValues("foo");
+    assertEquals(-1, values.get(0));
+    assertEquals(7, values.get(1));
+    assertEquals(18, values.get(2));
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testBasicInt() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
+    iwc.setIndexSort(indexSort);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    Document doc = new Document();
+    doc.add(new NumericDocValuesField("foo", 18));
+    w.addDocument(doc);
+    // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+    w.commit();
+
+    doc = new Document();
+    doc.add(new NumericDocValuesField("foo", -1));
+    w.addDocument(doc);
+    w.commit();
+
+    doc = new Document();
+    doc.add(new NumericDocValuesField("foo", 7));
+    w.addDocument(doc);
+    w.forceMerge(1);
+
+    DirectoryReader r = DirectoryReader.open(w);
+    LeafReader leaf = getOnlyLeafReader(r);
+    assertEquals(3, leaf.maxDoc());
+    NumericDocValues values = leaf.getNumericDocValues("foo");
+    assertEquals(-1, values.get(0));
+    assertEquals(7, values.get(1));
+    assertEquals(18, values.get(2));
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testBasicDouble() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    Sort indexSort = new Sort(new SortField("foo", SortField.Type.DOUBLE));
+    iwc.setIndexSort(indexSort);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    Document doc = new Document();
+    doc.add(new DoubleDocValuesField("foo", 18.0));
+    w.addDocument(doc);
+    // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+    w.commit();
+
+    doc = new Document();
+    doc.add(new DoubleDocValuesField("foo", -1.0));
+    w.addDocument(doc);
+    w.commit();
+
+    doc = new Document();
+    doc.add(new DoubleDocValuesField("foo", 7.0));
+    w.addDocument(doc);
+    w.forceMerge(1);
+
+    DirectoryReader r = DirectoryReader.open(w);
+    LeafReader leaf = getOnlyLeafReader(r);
+    assertEquals(3, leaf.maxDoc());
+    NumericDocValues values = leaf.getNumericDocValues("foo");
+    assertEquals(-1.0, Double.longBitsToDouble(values.get(0)), 0.0);
+    assertEquals(7.0, Double.longBitsToDouble(values.get(1)), 0.0);
+    assertEquals(18.0, Double.longBitsToDouble(values.get(2)), 0.0);
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testBasicFloat() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    Sort indexSort = new Sort(new SortField("foo", SortField.Type.FLOAT));
+    iwc.setIndexSort(indexSort);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    Document doc = new Document();
+    doc.add(new FloatDocValuesField("foo", 18.0f));
+    w.addDocument(doc);
+    // so we get more than one segment, so that forceMerge actually does merge, since we only get a sorted segment by merging:
+    w.commit();
+
+    doc = new Document();
+    doc.add(new FloatDocValuesField("foo", -1.0f));
+    w.addDocument(doc);
+    w.commit();
+
+    doc = new Document();
+    doc.add(new FloatDocValuesField("foo", 7.0f));
+    w.addDocument(doc);
+    w.forceMerge(1);
+
+    DirectoryReader r = DirectoryReader.open(w);
+    LeafReader leaf = getOnlyLeafReader(r);
+    assertEquals(3, leaf.maxDoc());
+    NumericDocValues values = leaf.getNumericDocValues("foo");
+    assertEquals(-1.0f, Float.intBitsToFloat((int) values.get(0)), 0.0f);
+    assertEquals(7.0f, Float.intBitsToFloat((int) values.get(1)), 0.0f);
+    assertEquals(18.0f, Float.intBitsToFloat((int) values.get(2)), 0.0f);
+    r.close();
+    w.close();
+    dir.close();
+  }
+
   public void testSortOnMerge(boolean withDeletes) throws IOException {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));