You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/12/11 09:27:33 UTC

[04/47] lucene-solr:jira/http2: Add the ability to set the number of hits to track accurately

Add the ability to set the number of hits to track accurately

In Lucene 8 searches can skip non-competitive hits if the total hit count is not requested.
It is also possible to track the number of hits up to a certain threshold. This is a trade off to speed up searches while still being able to know a lower bound of the total hit count. This change adds the ability to set this threshold directly in the track_total_hits search option. A boolean value (true, false) indicates whether the total hit count should be tracked in the response. When set as an integer this option allows to compute a lower bound of the total hits while preserving the ability to skip non-competitive hits when enough matches have been collected.

Relates #33028


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/84d5015e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/84d5015e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/84d5015e

Branch: refs/heads/jira/http2
Commit: 84d5015eee00992e3c0c5185e6146ed48fe3dd25
Parents: 17fca05
Author: Jim Ferenczi <ji...@apache.org>
Authored: Fri Dec 7 19:11:59 2018 +0100
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Fri Dec 7 19:11:59 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |    9 +-
 .../index/TestBackwardsCompatibility.java       |   27 +
 .../lucene/index/sorted-invalid.7.5.0.zip       |  Bin 0 -> 468799 bytes
 .../org/apache/lucene/index/MultiSorter.java    |   30 +-
 .../apache/lucene/index/TestIndexSorting.java   | 1738 ++++++++++--------
 5 files changed, 1020 insertions(+), 784 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/84d5015e/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 14b4961..007d35b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -314,7 +314,14 @@ Bug fixes
 
 * LUCENE-8595: Fix interleaved DV update and reset. Interleaved update and reset value
   to the same doc in the same updates package looses an update if the reset comes before
-  the update as well as loosing the reset if the update comes frist. (Simon Willnauer, Adrien Grant)
+  the update as well as loosing the reset if the update comes frist. (Simon Willnauer, Adrien Grand)
+
+* LUCENE-8592: Fix index sorting corruption due to numeric overflow. The merge of sorted segments
+  can produce an invalid sort if the sort field is an Integer/Long that uses reverse order and contains
+  values equal to Integer/Long#MIN_VALUE. These values are always sorted first during a merge
+  (instead of last because of the reverse order) due to this bug. Indices affected by the bug can be
+  detected by running the CheckIndex command on a distribution that contains the fix (7.6+).
+  (Jim Ferenczi, Adrien Grand, Mike McCandless, Simon Willnauer)
 
 New Features
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/84d5015e/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
index 6f28797..44911db 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@@ -1717,6 +1717,33 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
       dir.close();
     }
   }
+
+  /**
+   * Tests that {@link CheckIndex} can detect invalid sort on sorted indices created
+   * before https://issues.apache.org/jira/browse/LUCENE-8592.
+   */
+  public void testSortedIndexWithInvalidSort() throws Exception {
+    Path path = createTempDir("sorted");
+    String name = "sorted-invalid.7.5.0.zip";
+    InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name);
+    assertNotNull("Sorted index index " + name + " not found", resource);
+    TestUtil.unzip(resource, path);
+
+    Directory dir = FSDirectory.open(path);
+
+    DirectoryReader reader = DirectoryReader.open(dir);
+    assertEquals(1, reader.leaves().size());
+    Sort sort = reader.leaves().get(0).reader().getMetaData().getSort();
+    assertNotNull(sort);
+    assertEquals("<long: \"dateDV\">! missingValue=-9223372036854775808", sort.toString());
+    reader.close();
+    CheckIndex.Status status = TestUtil.checkIndex(dir);
+    assertEquals(1, status.segmentInfos.size());
+    assertNotNull(status.segmentInfos.get(0).indexSortStatus.error);
+    assertEquals(status.segmentInfos.get(0).indexSortStatus.error.getMessage(),
+        "segment has indexSort=<long: \"dateDV\">! missingValue=-9223372036854775808 but docID=4 sorts after docID=5");
+    dir.close();
+  }
   
   static long getValue(BinaryDocValues bdv) throws IOException {
     BytesRef term = bdv.binaryValue();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/84d5015e/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted-invalid.7.5.0.zip
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted-invalid.7.5.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted-invalid.7.5.0.zip
new file mode 100644
index 0000000..30c68a3
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted-invalid.7.5.0.zip differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/84d5015e/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
index b484228..d81ac75 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
@@ -42,17 +42,18 @@ final class MultiSorter {
 
     SortField fields[] = sort.getSort();
     final ComparableProvider[][] comparables = new ComparableProvider[fields.length][];
+    final int[] reverseMuls = new int[fields.length];
     for(int i=0;i<fields.length;i++) {
       comparables[i] = getComparableProviders(readers, fields[i]);
+      reverseMuls[i] = fields[i].getReverse() ? -1 : 1;
     }
-
     int leafCount = readers.size();
 
     PriorityQueue<LeafAndDocID> queue = new PriorityQueue<LeafAndDocID>(leafCount) {
         @Override
         public boolean lessThan(LeafAndDocID a, LeafAndDocID b) {
           for(int i=0;i<comparables.length;i++) {
-            int cmp = a.values[i].compareTo(b.values[i]);
+            int cmp = reverseMuls[i] * a.values[i].compareTo(b.values[i]);
             if (cmp != 0) {
               return cmp < 0;
             }
@@ -146,14 +147,13 @@ final class MultiSorter {
 
   /** Returns an object for this docID whose .compareTo represents the requested {@link SortField} sort order. */
   private interface ComparableProvider {
-    public Comparable getComparable(int docID) throws IOException;
+    Comparable getComparable(int docID) throws IOException;
   }
 
   /** Returns {@code ComparableProvider}s for the provided readers to represent the requested {@link SortField} sort order. */
   private static ComparableProvider[] getComparableProviders(List<CodecReader> readers, SortField sortField) throws IOException {
 
     ComparableProvider[] providers = new ComparableProvider[readers.size()];
-    final int reverseMul = sortField.getReverse() ? -1 : 1;
     final SortField.Type sortType = Sorter.getSortFieldType(sortField);
 
     switch(sortType) {
@@ -169,9 +169,9 @@ final class MultiSorter {
         OrdinalMap ordinalMap = OrdinalMap.build(null, values, PackedInts.DEFAULT);
         final int missingOrd;
         if (sortField.getMissingValue() == SortField.STRING_LAST) {
-          missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE;
+          missingOrd = Integer.MAX_VALUE;
         } else {
-          missingOrd = sortField.getReverse() ? Integer.MAX_VALUE : Integer.MIN_VALUE;
+          missingOrd = Integer.MIN_VALUE;
         }
 
         for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
@@ -197,7 +197,7 @@ final class MultiSorter {
                 }
                 if (readerDocID == docID) {
                   // translate segment's ord to global ord space:
-                  return reverseMul * (int) globalOrds.get(readerValues.ordValue());
+                  return Math.toIntExact(globalOrds.get(readerValues.ordValue()));
                 } else {
                   return missingOrd;
                 }
@@ -238,9 +238,9 @@ final class MultiSorter {
                   readerDocID = values.advance(docID);
                 }
                 if (readerDocID == docID) {
-                  return reverseMul * values.longValue();
+                  return values.longValue();
                 } else {
-                  return reverseMul * missingValue;
+                  return missingValue;
                 }
               }
             };
@@ -279,9 +279,9 @@ final class MultiSorter {
                   readerDocID = values.advance(docID);
                 }
                 if (readerDocID == docID) {
-                  return reverseMul * (int) values.longValue();
+                  return (int) values.longValue();
                 } else {
-                  return reverseMul * missingValue;
+                  return missingValue;
                 }
               }
             };
@@ -320,9 +320,9 @@ final class MultiSorter {
                   readerDocID = values.advance(docID);
                 }
                 if (readerDocID == docID) {
-                  return reverseMul * Double.longBitsToDouble(values.longValue());
+                  return Double.longBitsToDouble(values.longValue());
                 } else {
-                  return reverseMul * missingValue;
+                  return missingValue;
                 }
               }
             };
@@ -361,9 +361,9 @@ final class MultiSorter {
                   readerDocID = values.advance(docID);
                 }
                 if (readerDocID == docID) {
-                  return reverseMul * Float.intBitsToFloat((int) values.longValue());
+                  return Float.intBitsToFloat((int) values.longValue());
                 } else {
-                  return reverseMul * missingValue;
+                  return missingValue;
                 }
               }
             };