You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2021/11/19 17:07:09 UTC
[lucene] branch main updated: LUCENE-10085: Implement Weight#count on DocValuesFieldExistsQuery (#445)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 1a869c1 LUCENE-10085: Implement Weight#count on DocValuesFieldExistsQuery (#445)
1a869c1 is described below
commit 1a869c185b69dbef10d3861c74beb11bf1ffd3de
Author: Quentin Pradet <qu...@apache.org>
AuthorDate: Fri Nov 19 21:06:58 2021 +0400
LUCENE-10085: Implement Weight#count on DocValuesFieldExistsQuery (#445)
Co-authored-by: Adrien Grand <jp...@gmail.com>
---
lucene/CHANGES.txt | 4 ++
.../lucene/search/DocValuesFieldExistsQuery.java | 18 +++++++
.../search/TestDocValuesFieldExistsQuery.java | 55 ++++++++++++++++++++++
3 files changed, 77 insertions(+)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 341d80b5..88a8d9a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -45,6 +45,10 @@ New Features
* LUCENE-10220: Add an utility method to get IntervalSource from analyzed text (or token stream).
(Uwe Schindler, Dawid Weiss, Alan Woodward)
+* LUCENE-10085: Added Weight#count on DocValuesFieldExistsQuery to speed up the query if terms or
+ points are indexed.
+ (Quentin Pradet, Adrien Grand)
+
Improvements
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java
index 851c5bf..acdd899 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java
@@ -19,7 +19,9 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
@@ -75,6 +77,22 @@ public final class DocValuesFieldExistsQuery extends Query {
}
@Override
+ public int count(LeafReaderContext context) throws IOException {
+ final LeafReader reader = context.reader();
+ final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
+ if (fieldInfo == null || fieldInfo.getDocValuesType() == DocValuesType.NONE) {
+ return 0; // the field doesn't index doc values
+ } else if (!reader.hasDeletions()) {
+ if (fieldInfo.getPointDimensionCount() > 0) {
+ return reader.getPointValues(field).getDocCount();
+ } else if (fieldInfo.getIndexOptions() != IndexOptions.NONE) {
+ return reader.terms(field).getDocCount();
+ }
+ }
+ return super.count(context);
+ }
+
+ @Override
public boolean isCacheable(LeafReaderContext ctx) {
return DocValues.isCacheable(ctx, field);
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesFieldExistsQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesFieldExistsQuery.java
index 3bdc4a3..489ee0f 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesFieldExistsQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesFieldExistsQuery.java
@@ -19,15 +19,22 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
public class TestDocValuesFieldExistsQuery extends LuceneTestCase {
@@ -206,6 +213,54 @@ public class TestDocValuesFieldExistsQuery extends LuceneTestCase {
dir.close();
}
+ public void testQueryMatchesCount() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+ int randomNumDocs = TestUtil.nextInt(random(), 10, 100);
+ int numMatchingDocs = 0;
+
+ for (int i = 0; i < randomNumDocs; i++) {
+ Document doc = new Document();
+ if (random().nextBoolean()) {
+ doc.add(new LongPoint("long", i));
+ doc.add(new NumericDocValuesField("long", i));
+ doc.add(new StringField("string", "value", Store.NO));
+ doc.add(new SortedDocValuesField("string", new BytesRef("value")));
+ numMatchingDocs++;
+ }
+ w.addDocument(doc);
+ }
+ w.forceMerge(1);
+
+ DirectoryReader reader = w.getReader();
+ final IndexSearcher searcher = new IndexSearcher(reader);
+
+ assertSameCount(reader, searcher, "long", numMatchingDocs);
+ assertSameCount(reader, searcher, "string", numMatchingDocs);
+ assertSameCount(reader, searcher, "doesNotExist", 0);
+
+ // Test that we can't count in O(1) when there are deleted documents
+ w.w.getConfig().setMergePolicy(NoMergePolicy.INSTANCE);
+ w.deleteDocuments(LongPoint.newRangeQuery("long", 0L, 10L));
+ DirectoryReader reader2 = w.getReader();
+ final IndexSearcher searcher2 = new IndexSearcher(reader2);
+ final Query testQuery = new DocValuesFieldExistsQuery("long");
+ final Weight weight2 = searcher2.createWeight(testQuery, ScoreMode.COMPLETE, 1);
+ assertEquals(weight2.count(reader2.leaves().get(0)), -1);
+
+ IOUtils.close(reader, reader2, w, dir);
+ }
+
+ private void assertSameCount(
+ IndexReader reader, IndexSearcher searcher, String field, int numMatchingDocs)
+ throws IOException {
+ final Query testQuery = new DocValuesFieldExistsQuery(field);
+ assertEquals(searcher.count(testQuery), numMatchingDocs);
+ final Weight weight = searcher.createWeight(testQuery, ScoreMode.COMPLETE, 1);
+ assertEquals(weight.count(reader.leaves().get(0)), numMatchingDocs);
+ }
+
private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores)
throws IOException {
final int maxDoc = searcher.getIndexReader().maxDoc();