You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/06/14 08:11:48 UTC
[lucene] branch branch_9x updated: LUCENE-10608: Implement Weight#count on pure conjunctions. (#950)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 4da1a16835d LUCENE-10608: Implement Weight#count on pure conjunctions. (#950)
4da1a16835d is described below
commit 4da1a16835d36b322bbd359e5ddc21f71c4fe3aa
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue Jun 14 09:41:38 2022 +0200
LUCENE-10608: Implement Weight#count on pure conjunctions. (#950)
---
.../org/apache/lucene/search/BooleanWeight.java | 42 +++++++++++++
.../org/apache/lucene/search/TestBooleanQuery.java | 69 ++++++++++++++++++++++
.../apache/lucene/search/TestLRUQueryCache.java | 2 +-
3 files changed, 112 insertions(+), 1 deletion(-)
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
index 2fb9c4515b6..d8bbe09e34d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
@@ -344,6 +344,48 @@ final class BooleanWeight extends Weight {
}
}
+ @Override
+ public int count(LeafReaderContext context) throws IOException {
+ // Implement counting for pure conjunctions in the case when one clause doesn't match any docs,
+ // or all clauses but one match all docs.
+ if (weightedClauses.isEmpty()) {
+ return 0;
+ }
+ for (WeightedBooleanClause weightedClause : weightedClauses) {
+ switch (weightedClause.clause.getOccur()) {
+ case FILTER:
+ case MUST:
+ break;
+ case MUST_NOT:
+ case SHOULD:
+ default:
+ return super.count(context);
+ }
+ }
+ // From now on we know the query is a pure conjunction
+ final int numDocs = context.reader().numDocs();
+ int conjunctionCount = numDocs;
+ for (WeightedBooleanClause weightedClause : weightedClauses) {
+ int count = weightedClause.weight.count(context);
+ if (count == -1 || count == 0) {
+ // If the count of one clause is unknown, then the count of the conjunction is unknown too.
+ // If one clause doesn't match any docs then the conjunction doesn't match any docs either.
+ return count;
+ } else if (count == numDocs) {
+ // the query matches all docs, it can be safely ignored
+ } else if (conjunctionCount == numDocs) {
+ // all clauses seen so far match all docs, so the count of the new clause is also the count
+ // of the conjunction
+ conjunctionCount = count;
+ } else {
+ // We have two clauses whose count is in [1, numDocs), we can't figure out the number of
+ // docs that match the conjunction without running the query.
+ return super.count(context);
+ }
+ }
+ return conjunctionCount;
+ }
+
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
index e9c7610467e..5de0f67a999 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
@@ -29,10 +29,14 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
@@ -770,6 +774,71 @@ public class TestBooleanQuery extends LuceneTestCase {
IOUtils.close(reader, w, dir);
}
+ public void testConjunctionMatchesCount() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+ Document doc = new Document();
+ LongPoint longPoint = new LongPoint("long", 3L);
+ doc.add(longPoint);
+ StringField stringField = new StringField("string", "abc", Store.NO);
+ doc.add(stringField);
+ writer.addDocument(doc);
+ longPoint.setLongValue(10);
+ stringField.setStringValue("xyz");
+ writer.addDocument(doc);
+ IndexReader reader = DirectoryReader.open(writer);
+ writer.close();
+ IndexSearcher searcher = new IndexSearcher(reader);
+
+ Query query =
+ new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("string", "abc")), Occur.MUST)
+ .add(LongPoint.newExactQuery("long", 3L), Occur.FILTER)
+ .build();
+ Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
+ // Both queries match a single doc, BooleanWeight can't figure out the count of the conjunction
+ assertEquals(-1, weight.count(reader.leaves().get(0)));
+
+ query =
+ new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("string", "missing")), Occur.MUST)
+ .add(LongPoint.newExactQuery("long", 3L), Occur.FILTER)
+ .build();
+ weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
+ // One query has a count of 0, the conjunction has a count of 0 too
+ assertEquals(0, weight.count(reader.leaves().get(0)));
+
+ query =
+ new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("string", "abc")), Occur.MUST)
+ .add(LongPoint.newExactQuery("long", 5L), Occur.FILTER)
+ .build();
+ weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
+ // One query has a count of 0, the conjunction has a count of 0 too
+ assertEquals(0, weight.count(reader.leaves().get(0)));
+
+ query =
+ new BooleanQuery.Builder()
+ .add(new TermQuery(new Term("string", "abc")), Occur.MUST)
+ .add(LongPoint.newRangeQuery("long", 0L, 10L), Occur.FILTER)
+ .build();
+ // One query matches all docs, the count of the conjunction is the count of the other query
+ weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
+ assertEquals(1, weight.count(reader.leaves().get(0)));
+
+ query =
+ new BooleanQuery.Builder()
+ .add(new MatchAllDocsQuery(), Occur.MUST)
+ .add(LongPoint.newRangeQuery("long", 1L, 5L), Occur.FILTER)
+ .build();
+ // One query matches all docs, the count of the conjunction is the count of the other query
+ weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
+ assertEquals(1, weight.count(reader.leaves().get(0)));
+
+ reader.close();
+ dir.close();
+ }
+
public void testToString() {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "a")), Occur.SHOULD);
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
index d1db1551b48..a30bb757e60 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
@@ -1273,7 +1273,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
query.add(bar, Occur.FILTER);
query.add(foo, Occur.FILTER);
}
- indexSearcher.count(query.build());
+ indexSearcher.search(query.build(), new TotalHitCountCollectorManager());
assertEquals(1, policy.frequency(query.build()));
assertEquals(1, policy.frequency(foo));
assertEquals(1, policy.frequency(bar));