You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/09/20 11:51:12 UTC
[lucene] branch branch_9_4 updated: Fix handling of ghost fields in string sorts. (#11792)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_9_4
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9_4 by this push:
     new 4eb4c8e09c2 Fix handling of ghost fields in string sorts. (#11792)
4eb4c8e09c2 is described below

commit 4eb4c8e09c222d25cb077e1f3da08f3a5a225fd6
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue Sep 20 13:49:52 2022 +0200

    Fix handling of ghost fields in string sorts. (#11792)
    
    Introduction of dynamic pruning for string sorts (#11669) introduced a bug with
    string sorts and ghost fields, triggering a `NullPointerException` because the
    code assumes that `LeafReader#terms` is not null if the field is indexed
    according to field infos.
    
    This commit fixes the issue and adds tests for ghost fields across all sort
    types.
    
    Hopefully we can simplify and remove the null check in the future when we
    improve handling of ghost fields (#11393).
---
 .../search/comparators/TermOrdValComparator.java   |   2 +-
 .../test/org/apache/lucene/search/TestSort.java    | 193 +++++++++++++++++++++
 2 files changed, 194 insertions(+), 1 deletion(-)

diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
index b0f2260148c..548bbb401b2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
@@ -251,7 +251,7 @@ public class TermOrdValComparator extends FieldComparator<BytesRef> {
           enableSkipping = false;
         } else {
           Terms terms = context.reader().terms(field);
-          dense = terms.getDocCount() == context.reader().maxDoc();
+          dense = terms != null && terms.getDocCount() == context.reader().maxDoc();
           if (dense || topValue != null) {
             enableSkipping = true;
           } else if (reverse == sortMissingLast) {
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSort.java b/lucene/core/src/test/org/apache/lucene/search/TestSort.java
index 1ef4883b318..c7fb099db48 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestSort.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSort.java
@@ -20,11 +20,21 @@ import java.io.IOException;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoubleDocValuesField;
+import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.FloatDocValuesField;
+import org.apache.lucene.document.FloatPoint;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.apache.lucene.tests.util.LuceneTestCase;
@@ -847,4 +857,187 @@ public class TestSort extends LuceneTestCase {
       assertSame(sort, sort.rewrite(searcher));
     }
   }
+
+  // Ghost tests make sure that sorting can cope with segments that are missing values while their
+  // FieldInfo reports that the field exists.
+
+  public void testStringGhost() throws IOException {
+    doTestStringGhost(true);
+    doTestStringGhost(false);
+  }
+
+  private void doTestStringGhost(boolean indexed) throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+    Document doc = new Document();
+    doc.add(new SortedDocValuesField("value", newBytesRef("foo")));
+    if (indexed) {
+      doc.add(newStringField("value", "foo", Field.Store.YES));
+    }
+    doc.add(new StringField("id", "0", Store.NO));
+    writer.addDocument(doc);
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.deleteDocuments(new Term("id", "0"));
+    writer.forceMerge(1);
+    IndexReader ir = DirectoryReader.open(writer);
+    writer.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.STRING));
+
+    TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(2, td.totalHits.value);
+    assertNull(((FieldDoc) td.scoreDocs[0]).fields[0]);
+    assertNull(((FieldDoc) td.scoreDocs[1]).fields[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  public void testIntGhost() throws IOException {
+    doTestIntGhost(true);
+    doTestIntGhost(false);
+  }
+
+  private void doTestIntGhost(boolean indexed) throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+    Document doc = new Document();
+    doc.add(new NumericDocValuesField("value", 3));
+    if (indexed) {
+      doc.add(new IntPoint("value", 3));
+    }
+    doc.add(new StringField("id", "0", Store.NO));
+    writer.addDocument(doc);
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.deleteDocuments(new Term("id", "0"));
+    writer.forceMerge(1);
+    IndexReader ir = DirectoryReader.open(writer);
+    writer.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.INT));
+
+    TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(2, td.totalHits.value);
+    assertEquals(0, ((FieldDoc) td.scoreDocs[0]).fields[0]);
+    assertEquals(0, ((FieldDoc) td.scoreDocs[1]).fields[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  public void testLongGhost() throws IOException {
+    doTestLongGhost(true);
+    doTestLongGhost(false);
+  }
+
+  private void doTestLongGhost(boolean indexed) throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+    Document doc = new Document();
+    doc.add(new NumericDocValuesField("value", 3L));
+    if (indexed) {
+      doc.add(new LongPoint("value", 3L));
+    }
+    doc.add(new StringField("id", "0", Store.NO));
+    writer.addDocument(doc);
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.deleteDocuments(new Term("id", "0"));
+    writer.forceMerge(1);
+    IndexReader ir = DirectoryReader.open(writer);
+    writer.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.LONG));
+
+    TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(2, td.totalHits.value);
+    assertEquals(0L, ((FieldDoc) td.scoreDocs[0]).fields[0]);
+    assertEquals(0L, ((FieldDoc) td.scoreDocs[1]).fields[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  public void testDoubleGhost() throws IOException {
+    doTestDoubleGhost(true);
+    doTestDoubleGhost(false);
+  }
+
+  private void doTestDoubleGhost(boolean indexed) throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+    Document doc = new Document();
+    doc.add(new DoubleDocValuesField("value", 1.25));
+    if (indexed) {
+      doc.add(new DoublePoint("value", 1.25));
+    }
+    doc.add(new StringField("id", "0", Store.NO));
+    writer.addDocument(doc);
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.deleteDocuments(new Term("id", "0"));
+    writer.forceMerge(1);
+    IndexReader ir = DirectoryReader.open(writer);
+    writer.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.DOUBLE));
+
+    TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(2, td.totalHits.value);
+    assertEquals(0.0, ((FieldDoc) td.scoreDocs[0]).fields[0]);
+    assertEquals(0.0, ((FieldDoc) td.scoreDocs[1]).fields[0]);
+
+    ir.close();
+    dir.close();
+  }
+
+  public void testFloatGhost() throws IOException {
+    doTestFloatGhost(true);
+    doTestFloatGhost(false);
+  }
+
+  private void doTestFloatGhost(boolean indexed) throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+    Document doc = new Document();
+    doc.add(new FloatDocValuesField("value", 1.25f));
+    if (indexed) {
+      doc.add(new FloatPoint("value", 1.25f));
+    }
+    doc.add(new StringField("id", "0", Store.NO));
+    writer.addDocument(doc);
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.addDocument(new Document());
+    writer.flush();
+    writer.deleteDocuments(new Term("id", "0"));
+    writer.forceMerge(1);
+    IndexReader ir = DirectoryReader.open(writer);
+    writer.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    Sort sort = new Sort(new SortField("value", SortField.Type.FLOAT));
+
+    TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
+    assertEquals(2, td.totalHits.value);
+    assertEquals(0.0f, ((FieldDoc) td.scoreDocs[0]).fields[0]);
+    assertEquals(0.0f, ((FieldDoc) td.scoreDocs[1]).fields[0]);
+
+    ir.close();
+    dir.close();
+  }
 }