You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ji...@apache.org on 2018/12/07 18:34:22 UTC
[2/3] lucene-solr:branch_7_6: LUCENE-8592: Fix index sorting
corruption due to numeric overflow
LUCENE-8592: Fix index sorting corruption due to numeric overflow
The merge sort of sorted segments can produce an invalid
sort if the sort field is an Integer/Long that uses reverse order and contains values equal to
Integer/Long#MIN_VALUE. These values are always sorted first during a merge
(instead of last because of the reverse order) due to this bug.
Indices affected by the bug can be detected by running the CheckIndex command on a
distribution that contains the fix (7.6+).
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3098af2b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3098af2b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3098af2b
Branch: refs/heads/branch_7_6
Commit: 3098af2b76e447e72f11d5de509ae154e0cec644
Parents: 10cadbe
Author: Jim Ferenczi <ji...@apache.org>
Authored: Fri Dec 7 19:16:42 2018 +0100
Committer: Jim Ferenczi <ji...@apache.org>
Committed: Fri Dec 7 19:32:58 2018 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 9 +-
.../index/TestBackwardsCompatibility.java | 27 +
.../lucene/index/sorted-invalid.7.5.0.zip | Bin 0 -> 468799 bytes
.../org/apache/lucene/index/MultiSorter.java | 30 +-
.../apache/lucene/index/TestIndexSorting.java | 1744 ++++++++++--------
5 files changed, 1023 insertions(+), 787 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3098af2b/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e548ed1..847ea90 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -54,7 +54,14 @@ Bug fixes
* LUCENE-8595: Fix interleaved DV update and reset. Interleaved update and reset value
to the same doc in the same updates package looses an update if the reset comes before
- the update as well as loosing the reset if the update comes frist. (Simon Willnauer, Adrien Grant)
+ the update as well as loosing the reset if the update comes frist. (Simon Willnauer, Adrien Grand)
+
+* LUCENE-8592: Fix index sorting corruption due to numeric overflow. The merge of sorted segments
+ can produce an invalid sort if the sort field is an Integer/Long that uses reverse order and contains
+ values equal to Integer/Long#MIN_VALUE. These values are always sorted first during a merge
+ (instead of last because of the reverse order) due to this bug. Indices affected by the bug can be
+ detected by running the CheckIndex command on a distribution that contains the fix (7.6+).
+ (Jim Ferenczi, Adrien Grand, Mike McCandless, Simon Willnauer)
New Features
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3098af2b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
index 9c44f48..9962901 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@@ -1641,6 +1641,33 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
dir.close();
}
}
+
+ /**
+ * Tests that {@link CheckIndex} can detect invalid sort on sorted indices created
+ * before https://issues.apache.org/jira/browse/LUCENE-8592.
+ */
+ public void testSortedIndexWithInvalidSort() throws Exception {
+ Path path = createTempDir("sorted");
+ String name = "sorted-invalid.7.5.0.zip";
+ InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name);
+ assertNotNull("Sorted index index " + name + " not found", resource);
+ TestUtil.unzip(resource, path);
+
+ Directory dir = FSDirectory.open(path);
+
+ DirectoryReader reader = DirectoryReader.open(dir);
+ assertEquals(1, reader.leaves().size());
+ Sort sort = reader.leaves().get(0).reader().getMetaData().getSort();
+ assertNotNull(sort);
+ assertEquals("<long: \"dateDV\">! missingValue=-9223372036854775808", sort.toString());
+ reader.close();
+ CheckIndex.Status status = TestUtil.checkIndex(dir);
+ assertEquals(1, status.segmentInfos.size());
+ assertNotNull(status.segmentInfos.get(0).indexSortStatus.error);
+ assertEquals(status.segmentInfos.get(0).indexSortStatus.error.getMessage(),
+ "segment has indexSort=<long: \"dateDV\">! missingValue=-9223372036854775808 but docID=4 sorts after docID=5");
+ dir.close();
+ }
static long getValue(BinaryDocValues bdv) throws IOException {
BytesRef term = bdv.binaryValue();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3098af2b/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted-invalid.7.5.0.zip
----------------------------------------------------------------------
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted-invalid.7.5.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted-invalid.7.5.0.zip
new file mode 100644
index 0000000..30c68a3
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/index/sorted-invalid.7.5.0.zip differ
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3098af2b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
index b484228..d81ac75 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
@@ -42,17 +42,18 @@ final class MultiSorter {
SortField fields[] = sort.getSort();
final ComparableProvider[][] comparables = new ComparableProvider[fields.length][];
+ final int[] reverseMuls = new int[fields.length];
for(int i=0;i<fields.length;i++) {
comparables[i] = getComparableProviders(readers, fields[i]);
+ reverseMuls[i] = fields[i].getReverse() ? -1 : 1;
}
-
int leafCount = readers.size();
PriorityQueue<LeafAndDocID> queue = new PriorityQueue<LeafAndDocID>(leafCount) {
@Override
public boolean lessThan(LeafAndDocID a, LeafAndDocID b) {
for(int i=0;i<comparables.length;i++) {
- int cmp = a.values[i].compareTo(b.values[i]);
+ int cmp = reverseMuls[i] * a.values[i].compareTo(b.values[i]);
if (cmp != 0) {
return cmp < 0;
}
@@ -146,14 +147,13 @@ final class MultiSorter {
/** Returns an object for this docID whose .compareTo represents the requested {@link SortField} sort order. */
private interface ComparableProvider {
- public Comparable getComparable(int docID) throws IOException;
+ Comparable getComparable(int docID) throws IOException;
}
/** Returns {@code ComparableProvider}s for the provided readers to represent the requested {@link SortField} sort order. */
private static ComparableProvider[] getComparableProviders(List<CodecReader> readers, SortField sortField) throws IOException {
ComparableProvider[] providers = new ComparableProvider[readers.size()];
- final int reverseMul = sortField.getReverse() ? -1 : 1;
final SortField.Type sortType = Sorter.getSortFieldType(sortField);
switch(sortType) {
@@ -169,9 +169,9 @@ final class MultiSorter {
OrdinalMap ordinalMap = OrdinalMap.build(null, values, PackedInts.DEFAULT);
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
- missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE;
+ missingOrd = Integer.MAX_VALUE;
} else {
- missingOrd = sortField.getReverse() ? Integer.MAX_VALUE : Integer.MIN_VALUE;
+ missingOrd = Integer.MIN_VALUE;
}
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
@@ -197,7 +197,7 @@ final class MultiSorter {
}
if (readerDocID == docID) {
// translate segment's ord to global ord space:
- return reverseMul * (int) globalOrds.get(readerValues.ordValue());
+ return Math.toIntExact(globalOrds.get(readerValues.ordValue()));
} else {
return missingOrd;
}
@@ -238,9 +238,9 @@ final class MultiSorter {
readerDocID = values.advance(docID);
}
if (readerDocID == docID) {
- return reverseMul * values.longValue();
+ return values.longValue();
} else {
- return reverseMul * missingValue;
+ return missingValue;
}
}
};
@@ -279,9 +279,9 @@ final class MultiSorter {
readerDocID = values.advance(docID);
}
if (readerDocID == docID) {
- return reverseMul * (int) values.longValue();
+ return (int) values.longValue();
} else {
- return reverseMul * missingValue;
+ return missingValue;
}
}
};
@@ -320,9 +320,9 @@ final class MultiSorter {
readerDocID = values.advance(docID);
}
if (readerDocID == docID) {
- return reverseMul * Double.longBitsToDouble(values.longValue());
+ return Double.longBitsToDouble(values.longValue());
} else {
- return reverseMul * missingValue;
+ return missingValue;
}
}
};
@@ -361,9 +361,9 @@ final class MultiSorter {
readerDocID = values.advance(docID);
}
if (readerDocID == docID) {
- return reverseMul * Float.intBitsToFloat((int) values.longValue());
+ return Float.intBitsToFloat((int) values.longValue());
} else {
- return reverseMul * missingValue;
+ return missingValue;
}
}
};