You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2020/04/07 20:28:05 UTC
[lucene-solr] branch branch_8x updated: SOLR-14376: optimize
SolrIndexSearcher.getDocSet when matches everything * getProcessedFilter
now returns null filter if it's all docs more reliably * getProcessedFilter
now documented clearly as an internal method * getDocSet detects all-docs
and exits early with getLiveDocs * small refactoring to
getDocSetBits/makeDocSetBits Closes #1399
This is an automated email from the ASF dual-hosted git repository.
dsmiley pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 6905405 SOLR-14376: optimize SolrIndexSearcher.getDocSet when matches everything * getProcessedFilter now returns null filter if it's all docs more reliably * getProcessedFilter now documented clearly as an internal method * getDocSet detects all-docs and exits early with getLiveDocs * small refactoring to getDocSetBits/makeDocSetBits Closes #1399
6905405 is described below
commit 6905405d3fc34d96d68e3ec8705085d8e920a1cd
Author: David Smiley <ds...@apache.org>
AuthorDate: Thu Apr 2 23:53:04 2020 -0400
SOLR-14376: optimize SolrIndexSearcher.getDocSet when matches everything
* getProcessedFilter now returns null filter if it's all docs more reliably
* getProcessedFilter now documented clearly as an internal method
* getDocSet detects all-docs and exits early with getLiveDocs
* small refactoring to getDocSetBits/makeDocSetBits
Closes #1399
(cherry picked from commit 013898dec51c87c2cf9fb4d119c51e56354e23c6)
---
solr/CHANGES.txt | 2 +
.../org/apache/solr/search/SolrIndexSearcher.java | 96 ++++++++++++++--------
2 files changed, 66 insertions(+), 32 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 644e085..b2d7316 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -38,6 +38,8 @@ Optimizations
* SOLR-14340: Remove unnecessary configset verification checks when SolrCloud merely wants to know which configset a
collection is using. Improves CLUSTERSTATUS times for massive clusters. (Mathieu Marie, David Smiley)
+* SOLR-14376: Optimize filter queries that match all docs. (David Smiley)
+
Bug Fixes
---------------------
* SOLR-13264: IndexSizeTrigger aboveOp / belowOp properties not in valid properties.
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index ecca345..058287e 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -757,6 +757,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
private BitDocSet makeBitDocSet(DocSet answer) {
// TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
// or make DocSet instances remember maxDoc
+ if (answer instanceof BitDocSet) {
+ return (BitDocSet) answer;
+ }
FixedBitSet bs = new FixedBitSet(maxDoc());
DocIterator iter = answer.iterator();
while (iter.hasNext()) {
@@ -768,11 +771,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
public BitDocSet getDocSetBits(Query q) throws IOException {
DocSet answer = getDocSet(q);
- if (answer instanceof BitDocSet) {
- return (BitDocSet) answer;
- }
BitDocSet answerBits = makeBitDocSet(answer);
- if (filterCache != null) {
+ if (answerBits != answer && filterCache != null) {
filterCache.put(q, answerBits);
}
return answerBits;
@@ -876,18 +876,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
public void setLiveDocs(DocSet docs) {
// a few places currently expect BitDocSet
assert docs.size() == numDocs();
- if (docs instanceof BitDocSet) {
- this.liveDocs = (BitDocSet)docs;
- } else {
- this.liveDocs = makeBitDocSet(docs);
- }
- }
-
- public static class ProcessedFilter {
- public DocSet answer; // the answer, if non-null
- public Filter filter;
- public DelegatingCollector postFilter;
- public boolean hasDeletedDocs; // true if it's possible that filter may match deleted docs
+ this.liveDocs = makeBitDocSet(docs);
}
private static Comparator<Query> sortByCost = (q1, q2) -> ((ExtendedQuery) q1).getCost() - ((ExtendedQuery) q2).getCost();
@@ -920,6 +909,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
* Returns the set of document ids matching all queries. This method is cache-aware and attempts to retrieve the
* answer from the cache if possible. If the answer was not cached, it may have been inserted into the cache as a
* result of this call. This method can handle negative queries.
+ * A null/empty list results in {@link #getLiveDocSet()}.
* <p>
* The DocSet returned should <b>not</b> be modified.
*/
@@ -934,7 +924,14 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
}
ProcessedFilter pf = getProcessedFilter(null, queries);
- if (pf.answer != null) return pf.answer;
+
+ if (pf.postFilter == null) {
+ if (pf.answer != null) {
+ return pf.answer;
+ } else if (pf.filter == null) {
+ return getLiveDocSet(); // note: this is what happens when queries is an empty list
+ }
+ }
DocSetCollector setCollector = new DocSetCollector(maxDoc());
Collector collector = setCollector;
@@ -988,13 +985,36 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
return DocSetUtil.getDocSet(setCollector, this);
}
+ /**
+ * INTERNAL: The response object from {@link #getProcessedFilter(DocSet, List)}.
+ * Holds a filter and postFilter pair that together match a set of documents.
+ * Either of them may be null, in which case the semantics are to match everything.
+ * @see #getProcessedFilter(DocSet, List)
+ */
+ public static class ProcessedFilter {
+ public DocSet answer; // maybe null. Sometimes we have a docSet answer that represents the complete answer / result.
+ public Filter filter; // maybe null
+ public DelegatingCollector postFilter; // maybe null
+ public boolean hasDeletedDocs; // true if it's possible that filter may match deleted docs
+ }
+
+ /**
+ * INTERNAL: Processes conjunction (AND) of both args into a {@link ProcessedFilter} result.
+ * Either arg may be null/empty thus doesn't restrict the matching docs.
+ * Queries typically are resolved against the filter cache, and populate it.
+ */
public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
ProcessedFilter pf = new ProcessedFilter();
if (queries == null || queries.size() == 0) {
- if (setFilter != null) pf.filter = setFilter.getTopFilter();
+ if (setFilter != null) {
+ pf.answer = setFilter;
+ pf.filter = setFilter.getTopFilter();
+ }
return pf;
}
+ // We combine all the filter queries that come from the filter cache & setFilter into "answer".
+ // This might become pf.filterAsDocSet but not if there are any non-cached filters
DocSet answer = null;
boolean[] neg = new boolean[queries.size() + 1];
@@ -1008,7 +1028,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
if (setFilter != null) {
answer = sets[end++] = setFilter;
smallestIndex = end;
- }
+ } // we are done with setFilter at this point
int smallestCount = Integer.MAX_VALUE;
for (Query q : queries) {
@@ -1070,7 +1090,30 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
if (!neg[i] && i != smallestIndex) answer = answer.intersection(sets[i]);
}
- if (notCached != null) {
+ // ignore "answer" if it simply matches all docs
+ if (answer != null && answer.size() == numDocs()) {
+ answer = null;
+ }
+
+ // answer is done.
+
+ // If no notCached nor postFilters, we can return now.
+ if (notCached == null && postFilters == null) {
+ // "answer" is the only part of the filter, so set it.
+ if (answer != null) {
+ pf.answer = answer;
+ pf.filter = answer.getTopFilter();
+ }
+ return pf;
+ }
+ // pf.answer will remain null ... (our local "answer" var is not the complete answer)
+
+ // Set pf.filter based on combining "answer" and "notCached"
+ if (notCached == null) {
+ if (answer != null) {
+ pf.filter = answer.getTopFilter();
+ }
+ } else {
Collections.sort(notCached, sortByCost);
List<Weight> weights = new ArrayList<>(notCached.size());
for (Query q : notCached) {
@@ -1079,20 +1122,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
}
pf.filter = new FilterImpl(answer, weights);
pf.hasDeletedDocs = (answer == null); // if all clauses were uncached, the resulting filter may match deleted docs
- } else {
- if (postFilters == null) {
- if (answer == null) {
- answer = getLiveDocSet();
- }
- // "answer" is the only part of the filter, so set it.
- pf.answer = answer;
- }
-
- if (answer != null) {
- pf.filter = answer.getTopFilter();
- }
}
+ // Set pf.postFilter
if (postFilters != null) {
Collections.sort(postFilters, sortByCost);
for (int i = postFilters.size() - 1; i >= 0; i--) {