You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2020/04/07 20:28:05 UTC
[lucene-solr] branch branch_8x updated: SOLR-14376: optimize SolrIndexSearcher.getDocSet when matches everything * getProcessedFilter now returns null filter if it's all docs more reliably * getProcessedFilter now documented clearly as an internal method * getDocSet detects all-docs and exits early with getLiveDocs * small refactoring to getDocSetBits/makeDocSetBits Closes #1399

This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 6905405  SOLR-14376: optimize SolrIndexSearcher.getDocSet when matches everything * getProcessedFilter now returns null filter if it's all docs more reliably * getProcessedFilter now documented clearly as an internal method * getDocSet detects all-docs and exits early with getLiveDocs * small refactoring to getDocSetBits/makeDocSetBits Closes #1399
6905405 is described below

commit 6905405d3fc34d96d68e3ec8705085d8e920a1cd
Author: David Smiley <ds...@apache.org>
AuthorDate: Thu Apr 2 23:53:04 2020 -0400

    SOLR-14376: optimize SolrIndexSearcher.getDocSet when matches everything
    * getProcessedFilter now returns null filter if it's all docs more reliably
    * getProcessedFilter now documented clearly as an internal method
    * getDocSet detects all-docs and exits early with getLiveDocs
    * small refactoring to getDocSetBits/makeDocSetBits
    Closes #1399
    
    (cherry picked from commit 013898dec51c87c2cf9fb4d119c51e56354e23c6)
---
 solr/CHANGES.txt                                   |  2 +
 .../org/apache/solr/search/SolrIndexSearcher.java  | 96 ++++++++++++++--------
 2 files changed, 66 insertions(+), 32 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 644e085..b2d7316 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -38,6 +38,8 @@ Optimizations
 * SOLR-14340: Remove unnecessary configset verification checks when SolrCloud merely wants to know which configset a
   collection is using.  Improves CLUSTERSTATUS times for massive clusters.  (Mathieu Marie, David Smiley)
 
+* SOLR-14376: Optimize filter queries that match all docs. (David Smiley)
+
 Bug Fixes
 ---------------------
 * SOLR-13264: IndexSizeTrigger aboveOp / belowOp properties not in valid properties.
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index ecca345..058287e 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -757,6 +757,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
   private BitDocSet makeBitDocSet(DocSet answer) {
     // TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
     // or make DocSet instances remember maxDoc
+    if (answer instanceof BitDocSet) {
+      return (BitDocSet) answer;
+    }
     FixedBitSet bs = new FixedBitSet(maxDoc());
     DocIterator iter = answer.iterator();
     while (iter.hasNext()) {
@@ -768,11 +771,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
 
   public BitDocSet getDocSetBits(Query q) throws IOException {
     DocSet answer = getDocSet(q);
-    if (answer instanceof BitDocSet) {
-      return (BitDocSet) answer;
-    }
     BitDocSet answerBits = makeBitDocSet(answer);
-    if (filterCache != null) {
+    if (answerBits != answer && filterCache != null) {
       filterCache.put(q, answerBits);
     }
     return answerBits;
@@ -876,18 +876,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
   public void setLiveDocs(DocSet docs) {
     // a few places currently expect BitDocSet
     assert docs.size() == numDocs();
-    if (docs instanceof BitDocSet) {
-      this.liveDocs = (BitDocSet)docs;
-    } else {
-      this.liveDocs = makeBitDocSet(docs);
-    }
-  }
-
-  public static class ProcessedFilter {
-    public DocSet answer; // the answer, if non-null
-    public Filter filter;
-    public DelegatingCollector postFilter;
-    public boolean hasDeletedDocs;  // true if it's possible that filter may match deleted docs
+    this.liveDocs = makeBitDocSet(docs);
   }
 
   private static Comparator<Query> sortByCost = (q1, q2) -> ((ExtendedQuery) q1).getCost() - ((ExtendedQuery) q2).getCost();
@@ -920,6 +909,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
    * Returns the set of document ids matching all queries. This method is cache-aware and attempts to retrieve the
    * answer from the cache if possible. If the answer was not cached, it may have been inserted into the cache as a
    * result of this call. This method can handle negative queries.
+   * A null/empty list results in {@link #getLiveDocSet()}.
    * <p>
    * The DocSet returned should <b>not</b> be modified.
    */
@@ -934,7 +924,14 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
     }
 
     ProcessedFilter pf = getProcessedFilter(null, queries);
-    if (pf.answer != null) return pf.answer;
+
+    if (pf.postFilter == null) {
+      if (pf.answer != null) {
+        return pf.answer;
+      } else if (pf.filter == null) {
+        return getLiveDocSet(); // note: this is what happens when queries is an empty list
+      }
+    }
 
     DocSetCollector setCollector = new DocSetCollector(maxDoc());
     Collector collector = setCollector;
@@ -988,13 +985,36 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
     return DocSetUtil.getDocSet(setCollector, this);
   }
 
+  /**
+   * INTERNAL: The response object from {@link #getProcessedFilter(DocSet, List)}.
+   * Holds a filter and postFilter pair that together match a set of documents.
+   * Either of them may be null, in which case the semantics are to match everything.
+   * @see #getProcessedFilter(DocSet, List)
+   */
+  public static class ProcessedFilter {
+    public DocSet answer; // maybe null. Sometimes we have a docSet answer that represents the complete answer / result.
+    public Filter filter; // maybe null
+    public DelegatingCollector postFilter; // maybe null
+    public boolean hasDeletedDocs;  // true if it's possible that filter may match deleted docs
+  }
+
+  /**
+   * INTERNAL: Processes conjunction (AND) of both args into a {@link ProcessedFilter} result.
+   * Either arg may be null/empty thus doesn't restrict the matching docs.
+   * Queries typically are resolved against the filter cache, and populate it.
+   */
   public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
     ProcessedFilter pf = new ProcessedFilter();
     if (queries == null || queries.size() == 0) {
-      if (setFilter != null) pf.filter = setFilter.getTopFilter();
+      if (setFilter != null) {
+        pf.answer = setFilter;
+        pf.filter = setFilter.getTopFilter();
+      }
       return pf;
     }
 
+    // We combine all the filter queries that come from the filter cache & setFilter into "answer".
+    // This might become pf.filterAsDocSet but not if there are any non-cached filters
     DocSet answer = null;
 
     boolean[] neg = new boolean[queries.size() + 1];
@@ -1008,7 +1028,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
     if (setFilter != null) {
       answer = sets[end++] = setFilter;
       smallestIndex = end;
-    }
+    } // we are done with setFilter at this point
 
     int smallestCount = Integer.MAX_VALUE;
     for (Query q : queries) {
@@ -1070,7 +1090,30 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
       if (!neg[i] && i != smallestIndex) answer = answer.intersection(sets[i]);
     }
 
-    if (notCached != null) {
+    // ignore "answer" if it simply matches all docs
+    if (answer != null && answer.size() == numDocs()) {
+      answer = null;
+    }
+
+    // answer is done.
+
+    // If no notCached nor postFilters, we can return now.
+    if (notCached == null && postFilters == null) {
+      // "answer" is the only part of the filter, so set it.
+      if (answer != null) {
+        pf.answer = answer;
+        pf.filter = answer.getTopFilter();
+      }
+      return pf;
+    }
+    // pf.answer will remain null ...  (our local "answer" var is not the complete answer)
+
+    // Set pf.filter based on combining "answer" and "notCached"
+    if (notCached == null) {
+      if (answer != null) {
+        pf.filter = answer.getTopFilter();
+      }
+    } else {
       Collections.sort(notCached, sortByCost);
       List<Weight> weights = new ArrayList<>(notCached.size());
       for (Query q : notCached) {
@@ -1079,20 +1122,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
       }
       pf.filter = new FilterImpl(answer, weights);
       pf.hasDeletedDocs = (answer == null);  // if all clauses were uncached, the resulting filter may match deleted docs
-    } else {
-      if (postFilters == null) {
-        if (answer == null) {
-          answer = getLiveDocSet();
-        }
-        // "answer" is the only part of the filter, so set it.
-        pf.answer = answer;
-      }
-
-      if (answer != null) {
-        pf.filter = answer.getTopFilter();
-      }
     }
 
+    // Set pf.postFilter
     if (postFilters != null) {
       Collections.sort(postFilters, sortByCost);
       for (int i = postFilters.size() - 1; i >= 0; i--) {