You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2021/11/30 10:18:02 UTC
[lucene] branch branch_9x updated: LUCENE-10263: Implement Weight.count() on NormsFieldExistsQuery (#477)
This is an automated email from the ASF dual-hosted git repository.
romseygeek pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new b697745 LUCENE-10263: Implement Weight.count() on NormsFieldExistsQuery (#477)
b697745 is described below
commit b697745407e1dd3ea09a296f06edf9ca84ba2e1c
Author: Alan Woodward <ro...@apache.org>
AuthorDate: Tue Nov 30 10:00:38 2021 +0000
LUCENE-10263: Implement Weight.count() on NormsFieldExistsQuery (#477)
If all documents in the segment have a value, then `Reader.getDocCount()` will
equal `maxDoc` and we can return `numDocs` as a shortcut.
---
lucene/CHANGES.txt | 3 +
.../lucene/search/NormsFieldExistsQuery.java | 14 +++++
.../lucene/search/TestNormsFieldExistsQuery.java | 71 ++++++++++++++++++++++
3 files changed, 88 insertions(+)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 8f240fa..682ea1d 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -25,6 +25,9 @@ New Features
points are indexed.
(Quentin Pradet, Adrien Grand)
+* LUCENE-10263: Added Weight#count to NormsFieldExistsQuery to speed up the query if all
+ documents have the field.. (Alan Woodward)
+
Improvements
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java
index 5d1ba88..d23171c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java
@@ -80,6 +80,20 @@ public final class NormsFieldExistsQuery extends Query {
}
@Override
+ public int count(LeafReaderContext context) throws IOException {
+ final LeafReader reader = context.reader();
+ final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
+ if (fieldInfo == null || fieldInfo.hasNorms() == false) {
+ return 0;
+ }
+ // If every field has a value then we can shortcut
+ if (reader.getDocCount(field) == reader.maxDoc()) {
+ return reader.numDocs();
+ }
+ return super.count(context);
+ }
+
+ @Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestNormsFieldExistsQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestNormsFieldExistsQuery.java
index 33c6c1e..73fb211 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestNormsFieldExistsQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestNormsFieldExistsQuery.java
@@ -18,15 +18,22 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
public class TestNormsFieldExistsQuery extends LuceneTestCase {
@@ -199,4 +206,68 @@ public class TestNormsFieldExistsQuery extends LuceneTestCase {
}
}
}
+
+ public void testQueryMatchesCount() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+ int randomNumDocs = TestUtil.nextInt(random(), 10, 100);
+
+ FieldType noNormsFieldType = new FieldType();
+ noNormsFieldType.setOmitNorms(true);
+ noNormsFieldType.setIndexOptions(IndexOptions.DOCS);
+
+ Document doc = new Document();
+ doc.add(new TextField("text", "always here", Store.NO));
+ doc.add(new TextField("text_s", "", Store.NO));
+ doc.add(new Field("text_n", "always here", noNormsFieldType));
+ w.addDocument(doc);
+
+ for (int i = 1; i < randomNumDocs; i++) {
+ doc.clear();
+ doc.add(new TextField("text", "some text", Store.NO));
+ doc.add(new TextField("text_s", "some text", Store.NO));
+ doc.add(new Field("text_n", "some here", noNormsFieldType));
+ w.addDocument(doc);
+ }
+ w.forceMerge(1);
+
+ DirectoryReader reader = w.getReader();
+ final IndexSearcher searcher = new IndexSearcher(reader);
+
+ assertCountWithShortcut(searcher, "text", randomNumDocs);
+ assertCountWithShortcut(searcher, "doesNotExist", 0);
+ assertCountWithShortcut(searcher, "text_n", 0);
+
+ // docs that have a text field that analyzes to an empty token
+ // stream still have a recorded norm value but don't show up in
+ // Reader.getDocCount(field), so we can't use the shortcut for
+ // these fields
+ assertCountWithoutShortcut(searcher, "text_s", randomNumDocs);
+
+ // We can still shortcut with deleted docs
+ w.w.getConfig().setMergePolicy(NoMergePolicy.INSTANCE);
+ w.deleteDocuments(new Term("text", "text")); // deletes all but the first doc
+ DirectoryReader reader2 = w.getReader();
+ final IndexSearcher searcher2 = new IndexSearcher(reader2);
+ assertCountWithShortcut(searcher2, "text", 1);
+
+ IOUtils.close(reader, reader2, w, dir);
+ }
+
+ private void assertCountWithoutShortcut(IndexSearcher searcher, String field, int expectedCount)
+ throws IOException {
+ final Query q = new NormsFieldExistsQuery(field);
+ final Weight weight = searcher.createWeight(q, ScoreMode.COMPLETE, 1);
+ assertEquals(-1, weight.count(searcher.reader.leaves().get(0)));
+ assertEquals(expectedCount, searcher.count(q));
+ }
+
+ private void assertCountWithShortcut(IndexSearcher searcher, String field, int numMatchingDocs)
+ throws IOException {
+ final Query testQuery = new NormsFieldExistsQuery(field);
+ assertEquals(numMatchingDocs, searcher.count(testQuery));
+ final Weight weight = searcher.createWeight(testQuery, ScoreMode.COMPLETE, 1);
+ assertEquals(numMatchingDocs, weight.count(searcher.reader.leaves().get(0)));
+ }
}