You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/06/14 08:11:48 UTC

[lucene] branch branch_9x updated: LUCENE-10608: Implement Weight#count on pure conjunctions. (#950)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 4da1a16835d LUCENE-10608: Implement Weight#count on pure conjunctions. (#950)
4da1a16835d is described below

commit 4da1a16835d36b322bbd359e5ddc21f71c4fe3aa
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Tue Jun 14 09:41:38 2022 +0200

    LUCENE-10608: Implement Weight#count on pure conjunctions. (#950)
---
 .../org/apache/lucene/search/BooleanWeight.java    | 42 +++++++++++++
 .../org/apache/lucene/search/TestBooleanQuery.java | 69 ++++++++++++++++++++++
 .../apache/lucene/search/TestLRUQueryCache.java    |  2 +-
 3 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
index 2fb9c4515b6..d8bbe09e34d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
@@ -344,6 +344,48 @@ final class BooleanWeight extends Weight {
     }
   }
 
+  @Override
+  public int count(LeafReaderContext context) throws IOException {
+    // Implement counting for pure conjunctions in the case when one clause doesn't match any docs,
+    // or all clauses but one match all docs.
+    if (weightedClauses.isEmpty()) {
+      return 0;
+    }
+    for (WeightedBooleanClause weightedClause : weightedClauses) {
+      switch (weightedClause.clause.getOccur()) {
+        case FILTER:
+        case MUST:
+          break;
+        case MUST_NOT:
+        case SHOULD:
+        default:
+          return super.count(context);
+      }
+    }
+    // From now on we know the query is a pure conjunction
+    final int numDocs = context.reader().numDocs();
+    int conjunctionCount = numDocs;
+    for (WeightedBooleanClause weightedClause : weightedClauses) {
+      int count = weightedClause.weight.count(context);
+      if (count == -1 || count == 0) {
+        // If the count of one clause is unknown, then the count of the conjunction is unknown too.
+        // If one clause doesn't match any docs then the conjunction doesn't match any docs either.
+        return count;
+      } else if (count == numDocs) {
+        // the query matches all docs, it can be safely ignored
+      } else if (conjunctionCount == numDocs) {
+        // all clauses seen so far match all docs, so the count of the new clause is also the count
+        // of the conjunction
+        conjunctionCount = count;
+      } else {
+        // We have two clauses whose count is in [1, numDocs), we can't figure out the number of
+        // docs that match the conjunction without running the query.
+        return super.count(context);
+      }
+    }
+    return conjunctionCount;
+  }
+
   @Override
   public Scorer scorer(LeafReaderContext context) throws IOException {
     ScorerSupplier scorerSupplier = scorerSupplier(context);
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
index e9c7610467e..5de0f67a999 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
@@ -29,10 +29,14 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.index.Term;
@@ -770,6 +774,71 @@ public class TestBooleanQuery extends LuceneTestCase {
     IOUtils.close(reader, w, dir);
   }
 
+  public void testConjunctionMatchesCount() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+    Document doc = new Document();
+    LongPoint longPoint = new LongPoint("long", 3L);
+    doc.add(longPoint);
+    StringField stringField = new StringField("string", "abc", Store.NO);
+    doc.add(stringField);
+    writer.addDocument(doc);
+    longPoint.setLongValue(10);
+    stringField.setStringValue("xyz");
+    writer.addDocument(doc);
+    IndexReader reader = DirectoryReader.open(writer);
+    writer.close();
+    IndexSearcher searcher = new IndexSearcher(reader);
+
+    Query query =
+        new BooleanQuery.Builder()
+            .add(new TermQuery(new Term("string", "abc")), Occur.MUST)
+            .add(LongPoint.newExactQuery("long", 3L), Occur.FILTER)
+            .build();
+    Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
+    // Both queries match a single doc, BooleanWeight can't figure out the count of the conjunction
+    assertEquals(-1, weight.count(reader.leaves().get(0)));
+
+    query =
+        new BooleanQuery.Builder()
+            .add(new TermQuery(new Term("string", "missing")), Occur.MUST)
+            .add(LongPoint.newExactQuery("long", 3L), Occur.FILTER)
+            .build();
+    weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
+    // One query has a count of 0, the conjunction has a count of 0 too
+    assertEquals(0, weight.count(reader.leaves().get(0)));
+
+    query =
+        new BooleanQuery.Builder()
+            .add(new TermQuery(new Term("string", "abc")), Occur.MUST)
+            .add(LongPoint.newExactQuery("long", 5L), Occur.FILTER)
+            .build();
+    weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
+    // One query has a count of 0, the conjunction has a count of 0 too
+    assertEquals(0, weight.count(reader.leaves().get(0)));
+
+    query =
+        new BooleanQuery.Builder()
+            .add(new TermQuery(new Term("string", "abc")), Occur.MUST)
+            .add(LongPoint.newRangeQuery("long", 0L, 10L), Occur.FILTER)
+            .build();
+    // One query matches all docs, the count of the conjunction is the count of the other query
+    weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
+    assertEquals(1, weight.count(reader.leaves().get(0)));
+
+    query =
+        new BooleanQuery.Builder()
+            .add(new MatchAllDocsQuery(), Occur.MUST)
+            .add(LongPoint.newRangeQuery("long", 1L, 5L), Occur.FILTER)
+            .build();
+    // One query matches all docs, the count of the conjunction is the count of the other query
+    weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
+    assertEquals(1, weight.count(reader.leaves().get(0)));
+
+    reader.close();
+    dir.close();
+  }
+
   public void testToString() {
     BooleanQuery.Builder bq = new BooleanQuery.Builder();
     bq.add(new TermQuery(new Term("field", "a")), Occur.SHOULD);
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
index d1db1551b48..a30bb757e60 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
@@ -1273,7 +1273,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
           query.add(bar, Occur.FILTER);
           query.add(foo, Occur.FILTER);
         }
-        indexSearcher.count(query.build());
+        indexSearcher.search(query.build(), new TotalHitCountCollectorManager());
         assertEquals(1, policy.frequency(query.build()));
         assertEquals(1, policy.frequency(foo));
         assertEquals(1, policy.frequency(bar));