You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dn...@apache.org on 2021/09/15 13:22:07 UTC
[lucene] branch main updated: LUCENE-10106: Sort optimization
wrongly skip first docs (#300)
This is an automated email from the ASF dual-hosted git repository.
dnhatn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new b7a286d LUCENE-10106: Sort optimization wrongly skip first docs (#300)
b7a286d is described below
commit b7a286dd69f33261637182cd274bc5bb35c773f6
Author: Nhat Nguyen <nh...@elastic.co>
AuthorDate: Wed Sep 15 09:21:59 2021 -0400
LUCENE-10106: Sort optimization wrongly skip first docs (#300)
The first documents of subsequent segments are mistakenly skipped when
sort optimization is enabled. We should initialize maxDocVisited in
NumericComparator to -1 instead of 0.
---
.../search/comparators/NumericComparator.java | 2 +-
.../apache/lucene/search/TestSortOptimization.java | 84 ++++++++++++++++++++++
2 files changed, 85 insertions(+), 1 deletion(-)
diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java
index 0455cea..051d9cc 100644
--- a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java
@@ -84,7 +84,7 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
private DocIdSetIterator competitiveIterator;
private long iteratorCost;
- private int maxDocVisited = 0;
+ private int maxDocVisited = -1;
private int updateCounter = 0;
public NumericLeafComparator(LeafReaderContext context) throws IOException {
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
index ec6ec66..4581cf1 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
@@ -20,6 +20,10 @@ import static org.apache.lucene.search.SortField.FIELD_DOC;
import static org.apache.lucene.search.SortField.FIELD_SCORE;
import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.LongStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
@@ -633,4 +637,84 @@ public class TestSortOptimization extends LuceneTestCase {
reader.close();
dir.close();
}
+
+ public void testMaxDocVisited() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+ int numDocs = atLeast(10000);
+ long offset = 100 + random().nextInt(100);
+ long smallestValue = 50 + random().nextInt(50);
+ boolean flushed = false;
+ for (int i = 0; i < numDocs; ++i) {
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("my_field", i + offset));
+ doc.add(new LongPoint("my_field", i + offset));
+ writer.addDocument(doc);
+ if (i >= 5000 && flushed == false) {
+ flushed = true;
+ writer.flush();
+ // Index the smallest value to the first slot of the second segment
+ doc = new Document();
+ doc.add(new NumericDocValuesField("my_field", smallestValue));
+ doc.add(new LongPoint("my_field", smallestValue));
+ writer.addDocument(doc);
+ }
+ }
+ IndexReader reader = DirectoryReader.open(writer);
+ writer.close();
+ IndexSearcher searcher = new IndexSearcher(reader);
+ SortField sortField = new SortField("my_field", SortField.Type.LONG);
+ TopFieldDocs topDocs =
+ searcher.search(new MatchAllDocsQuery(), 1 + random().nextInt(100), new Sort(sortField));
+ FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[0];
+ assertEquals(smallestValue, ((Long) fieldDoc.fields[0]).intValue());
+ reader.close();
+ dir.close();
+ }
+
+ public void testRandomLong() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+ List<Long> seqNos = LongStream.range(0, atLeast(10_000)).boxed().collect(Collectors.toList());
+ Collections.shuffle(seqNos, random());
+ int pendingDocs = 0;
+ for (long seqNo : seqNos) {
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("seq_no", seqNo));
+ doc.add(new LongPoint("seq_no", seqNo));
+ writer.addDocument(doc);
+ pendingDocs++;
+ if (pendingDocs > 500 && random().nextInt(100) <= 5) {
+ pendingDocs = 0;
+ writer.flush();
+ }
+ }
+ writer.flush();
+ seqNos.sort(Long::compare);
+ IndexReader reader = DirectoryReader.open(writer);
+ writer.close();
+ IndexSearcher searcher = new IndexSearcher(reader);
+ SortField sortField = new SortField("seq_no", SortField.Type.LONG);
+ int visitedHits = 0;
+ ScoreDoc after = null;
+ while (visitedHits < seqNos.size()) {
+ int batch = 1 + random().nextInt(100);
+ Query query =
+ random().nextBoolean()
+ ? new MatchAllDocsQuery()
+ : LongPoint.newRangeQuery("seq_no", 0, Long.MAX_VALUE);
+ TopDocs topDocs = searcher.searchAfter(after, query, batch, new Sort(sortField));
+ int expectedHits = Math.min(seqNos.size() - visitedHits, batch);
+ assertEquals(expectedHits, topDocs.scoreDocs.length);
+ after = topDocs.scoreDocs[expectedHits - 1];
+ for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+ FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
+ long expectedSeqNo = seqNos.get(visitedHits);
+ assertEquals(expectedSeqNo, ((Long) fieldDoc.fields[0]).intValue());
+ visitedHits++;
+ }
+ }
+ reader.close();
+ dir.close();
+ }
}