You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ma...@apache.org on 2022/04/04 19:36:14 UTC
[solr] branch main updated: SOLR-14765: Optimize DocList creation by skipping sort for sort-irrelevant cases (#592)
This is an automated email from the ASF dual-hosted git repository.
magibney pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 3621ef3be09 SOLR-14765: Optimize DocList creation by skipping sort for sort-irrelevant cases (#592)
3621ef3be09 is described below
commit 3621ef3be093ae70d25d9a04d68c17e1c24a4f19
Author: Michael Gibney <mi...@michaelgibney.net>
AuthorDate: Mon Apr 4 15:36:08 2022 -0400
SOLR-14765: Optimize DocList creation by skipping sort for sort-irrelevant cases (#592)
---
solr/CHANGES.txt | 3 +
.../java/org/apache/solr/search/DocSetUtil.java | 83 ++--
.../java/org/apache/solr/search/QueryUtils.java | 40 ++
.../org/apache/solr/search/SolrIndexSearcher.java | 231 +++++++++--
.../src/test/org/apache/solr/CursorPagingTest.java | 3 +-
.../test/org/apache/solr/search/TestDocSet.java | 20 +
.../apache/solr/search/TestMainQueryCaching.java | 447 +++++++++++++++++++++
.../configuration-guide/pages/caches-warming.adoc | 7 +-
8 files changed, 773 insertions(+), 61 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 8ae7f19180c..d3df960b043 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -45,6 +45,9 @@ Optimizations
---------------------
* SOLR-16120: Optimise hl.fl expansion. (Christine Poerschke, David Smiley, Mike Drob)
+* SOLR-14765: Optimize DocList creation for sort-irrelevant cases. This issue also reworks the building and caching of liveDocs
+ in SolrIndexSearcher, and refines/clarifies the effect of `useFilterForSortedQuery` (Michael Gibney, Mike Drob, David Smiley)
+
Bug Fixes
---------------------
* SOLR-15918: Skip repetitive parent znode creation on config set upload (Mike Drob)
diff --git a/solr/core/src/java/org/apache/solr/search/DocSetUtil.java b/solr/core/src/java/org/apache/solr/search/DocSetUtil.java
index 9a44d7d1c3d..146ded58d22 100644
--- a/solr/core/src/java/org/apache/solr/search/DocSetUtil.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetUtil.java
@@ -30,12 +30,12 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
-import org.apache.solr.common.SolrException;
/**
* @lucene.experimental
@@ -79,16 +79,10 @@ public class DocSetUtil {
* @lucene.experimental
*/
public static DocSet getDocSet(DocSetCollector collector, SolrIndexSearcher searcher) {
- if (collector.size() == searcher.numDocs()) {
- if (!searcher.isLiveDocsInstantiated()) {
- searcher.setLiveDocs(collector.getDocSet());
- }
- try {
- return searcher.getLiveDocSet();
- } catch (IOException e) {
- // should be impossible... liveDocs should exist, so no IO should be necessary
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
- }
+ final int size = collector.size();
+ if (size == searcher.numDocs()) {
+ // see comment under similar block in `getDocSet(DocSet, SolrIndexSearcher)`
+ return searcher.offerLiveDocs(collector::getDocSet, size);
}
return collector.getDocSet();
@@ -101,18 +95,15 @@ public class DocSetUtil {
* @lucene.experimental
*/
public static DocSet getDocSet(DocSet docs, SolrIndexSearcher searcher) {
- if (docs.size() == searcher.numDocs()) {
- if (!searcher.isLiveDocsInstantiated()) {
- searcher.setLiveDocs(docs);
- }
- try {
- // if this docset has the same cardinality as liveDocs, return liveDocs instead
- // so this set will be short lived garbage.
- return searcher.getLiveDocSet();
- } catch (IOException e) {
- // should be impossible... liveDocs should exist, so no IO should be necessary
- throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
- }
+ final int size = docs.size();
+ if (size == searcher.numDocs()) {
+ // if this docset has the same cardinality as liveDocs, return liveDocs instead
+ // so this set will be short lived garbage.
+ // This could be a very broad query, or some unusual way of running MatchAllDocsQuery?
+ // In any event, we already _have_ a viable `liveDocs` DocSet, so offer it to the
+ // SolrIndexSearcher, and use whatever canonical `liveDocs` instance the searcher returns
+ // (which may or may not be derived from the set that we offered)
+ return searcher.offerLiveDocs(() -> docs, size);
}
return docs;
@@ -135,6 +126,10 @@ public class DocSetUtil {
DocSet set = ((DocSetProducer) query).createDocSet(searcher);
// assert equals(set, createDocSetGeneric(searcher, query));
return set;
+ } else if (query instanceof MatchAllDocsQuery) {
+ DocSet set = searcher.getLiveDocSet();
+ // assert equals(set, createDocSetGeneric(searcher, query));
+ return set;
}
return createDocSetGeneric(searcher, query);
@@ -292,4 +287,46 @@ public class DocSetUtil {
leafCollector.collect(doc - segBase); // per-seg collectors
}
}
+
+ /**
+ * Utility method to copy a specified range of {@link Bits} to a specified offset in a destination
+ * {@link FixedBitSet}. This can be useful, e.g., for translating per-segment bits ranges to
+ * composite DocSet bits ranges.
+ *
+ * @param src source Bits
+ * @param srcOffset start offset (inclusive) in source Bits
+ * @param srcLimit end offset (exclusive) in source Bits
+ * @param dest destination FixedBitSet
+ * @param destOffset start offset of range in destination
+ */
+ static void copyTo(
+ Bits src, final int srcOffset, int srcLimit, FixedBitSet dest, int destOffset) {
+ /*
+ NOTE: `adjustedSegDocBase` +1 to compensate for the fact that `segOrd` always has to "read
+ ahead" by 1. Adding 1 to set `adjustedSegDocBase` once allows us to use `segOrd` as-is (with
+ no "pushback") for both `startIndex` and `endIndex` args to `dest.set(startIndex, endIndex)`
+ */
+ final int adjustedSegDocBase = destOffset - srcOffset + 1;
+ int segOrd = srcLimit;
+ do {
+ /*
+ NOTE: we check deleted range before live range in the outer loop in order to not have
+ to explicitly guard against `dest.set(maxDoc, maxDoc)` in the event that the global max doc
+ is a delete (this case would trigger a bounds-checking AssertionError in
+ `FixedBitSet.set(int, int)`).
+ */
+ do {
+ // consume deleted range
+ if (--segOrd < srcOffset) {
+ // we're currently in a "deleted" run, so just return; no need to do anything further
+ return;
+ }
+ } while (!src.get(segOrd));
+ final int limit = segOrd; // set highest ord (inclusive) of live range
+ while (segOrd-- > srcOffset && src.get(segOrd)) {
+ // consume live range
+ }
+ dest.set(adjustedSegDocBase + segOrd, adjustedSegDocBase + limit);
+ } while (segOrd > srcOffset);
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/search/QueryUtils.java b/solr/core/src/java/org/apache/solr/search/QueryUtils.java
index f4b65d70dbd..ff03b64fcce 100644
--- a/solr/core/src/java/org/apache/solr/search/QueryUtils.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryUtils.java
@@ -24,6 +24,7 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
@@ -42,6 +43,38 @@ public class QueryUtils {
return true;
}
+ /**
+ * Recursively unwraps the specified query to determine whether it is capable of producing a score
+ * that varies across different documents. Returns true if this query is not capable of producing
+ * a varying score (i.e., it is a constant score query).
+ */
+ public static boolean isConstantScoreQuery(Query q) {
+ while (true) {
+ if (q instanceof BoostQuery) {
+ q = ((BoostQuery) q).getQuery();
+ } else if (q instanceof WrappedQuery) {
+ q = ((WrappedQuery) q).getWrappedQuery();
+ } else if (q instanceof ConstantScoreQuery) {
+ return true;
+ } else if (q instanceof MatchAllDocsQuery) {
+ return true;
+ } else if (q instanceof MatchNoDocsQuery) {
+ return true;
+ } else if (q instanceof DocSetQuery) {
+ return true;
+ } else if (q instanceof BooleanQuery) {
+ // NOTE: this check can be very simple because:
+ // 1. there's no need to check `q == clause.getQuery()` because BooleanQuery is final, with
+ // a builder that prevents direct loops.
+ // 2. we don't bother recursing to second-guess a nominally "scoring" clause that actually
+ // wraps a constant-score query.
+ return ((BooleanQuery) q).clauses().stream().noneMatch(BooleanClause::isScoring);
+ } else {
+ return false;
+ }
+ }
+ }
+
/**
* Returns the original query if it was already a positive query, otherwise return the negative of
* the query (i.e., a positive query).
@@ -162,6 +195,13 @@ public class QueryUtils {
if (filterQuery == null) {
return new MatchAllDocsQuery(); // default if nothing -- match everything
} else {
+ /*
+ NOTE: we _must_ wrap filter in a ConstantScoreQuery (default score `1f`) in order to
+ guarantee score parity with the actual user-specified scoreQuery (i.e., MatchAllDocsQuery).
+ This should only matter if score is _explicitly_ requested to be returned, but we don't know
+ that here, and it's probably not worth jumping through the necessary hoops simply to avoid
+ wrapping in the case where `true==isConstantScoreQuery(filterQuery)`
+ */
return new ConstantScoreQuery(filterQuery);
}
} else {
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index 4ed5feda3d9..10e1b65bcba 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -35,6 +35,8 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
+import java.util.concurrent.atomic.LongAdder;
+import java.util.function.Supplier;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.ExitableDirectoryReader;
@@ -50,6 +52,7 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.search.TotalHits.Relation;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
@@ -122,6 +125,11 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
private final SolrCache<Query, DocSet> filterCache;
private final SolrCache<QueryResultKey, DocList> queryResultCache;
private final SolrCache<String, UnInvertedField> fieldValueCache;
+ private final LongAdder fullSortCount = new LongAdder();
+ private final LongAdder skipSortCount = new LongAdder();
+ private final LongAdder liveDocsNaiveCacheHitCount = new LongAdder();
+ private final LongAdder liveDocsInsertsCount = new LongAdder();
+ private final LongAdder liveDocsHitCount = new LongAdder();
// map of generic caches - not synchronized since it's read-only after the constructor.
private final Map<String, SolrCache<?, ?>> cacheMap;
@@ -918,6 +926,11 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
assert !(query instanceof WrappedQuery) : "should have unwrapped";
assert filterCache != null : "must check for caching before calling this method";
+ if (query instanceof MatchAllDocsQuery) {
+ // bypass the filterCache for MatchAllDocsQuery
+ return getLiveDocSet();
+ }
+
if (SolrQueryTimeoutImpl.getInstance().isTimeoutEnabled()) {
// If there is a possibility of timeout for this query, then don't reserve a computation slot.
// Further, we can't naively wait for an in progress computation to finish, because if we time
@@ -935,8 +948,63 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
return filterCache.computeIfAbsent(query, q -> getDocSetNC(q, null));
}
- private static Query matchAllDocsQuery = new MatchAllDocsQuery();
- private volatile BitDocSet liveDocs;
+ private static final MatchAllDocsQuery MATCH_ALL_DOCS_QUERY = new MatchAllDocsQuery();
+
+ /** Used as a synchronization point to handle the lazy-init of {@link #liveDocs}. */
+ private final Object liveDocsCacheLock = new Object();
+
+ private BitDocSet liveDocs;
+
+ private static final BitDocSet EMPTY = new BitDocSet(new FixedBitSet(0), 0);
+
+ private BitDocSet computeLiveDocs() {
+ switch (leafContexts.size()) {
+ case 0:
+ assert numDocs() == 0;
+ return EMPTY;
+ case 1:
+ final Bits onlySegLiveDocs = leafContexts.get(0).reader().getLiveDocs();
+ final FixedBitSet fbs;
+ if (onlySegLiveDocs == null) {
+ // `LeafReader.getLiveDocs()` returns null if no deleted docs -- accordingly, set all bits
+ final int onlySegMaxDoc = maxDoc();
+ fbs = new FixedBitSet(onlySegMaxDoc);
+ fbs.set(0, onlySegMaxDoc);
+ } else {
+ fbs = FixedBitSet.copyOf(onlySegLiveDocs);
+ }
+ assert fbs.cardinality() == numDocs();
+ return new BitDocSet(fbs, numDocs());
+ default:
+ final FixedBitSet bs = new FixedBitSet(maxDoc());
+ for (LeafReaderContext ctx : leafContexts) {
+ final LeafReader r = ctx.reader();
+ final Bits segLiveDocs = r.getLiveDocs();
+ final int segDocBase = ctx.docBase;
+ if (segLiveDocs == null) {
+ // `LeafReader.getLiveDocs()` returns null if no deleted docs -- accordingly, set all
+ // bits in seg range
+ bs.set(segDocBase, segDocBase + r.maxDoc());
+ } else {
+ DocSetUtil.copyTo(segLiveDocs, 0, r.maxDoc(), bs, segDocBase);
+ }
+ }
+ assert bs.cardinality() == numDocs();
+ return new BitDocSet(bs, numDocs());
+ }
+ }
+
+ private BitDocSet populateLiveDocs(Supplier<BitDocSet> liveDocsSupplier) {
+ synchronized (liveDocsCacheLock) {
+ if (liveDocs != null) {
+ liveDocsHitCount.increment();
+ } else {
+ liveDocs = liveDocsSupplier.get();
+ liveDocsInsertsCount.increment();
+ }
+ }
+ return liveDocs;
+ }
/**
* Returns an efficient random-access {@link DocSet} of the live docs. It's cached. Never null.
@@ -944,13 +1012,12 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
* @lucene.internal the type of DocSet returned may change in the future
*/
public BitDocSet getLiveDocSet() throws IOException {
- // Going through the filter cache will provide thread safety here if we only had getLiveDocs,
- // but the addition of setLiveDocs means we needed to add volatile to "liveDocs".
BitDocSet docs = liveDocs;
- if (docs == null) {
- // note: maybe should instead calc manually by segment, using FixedBitSet.copyOf(segLiveDocs);
- // avoid filter cache?
- liveDocs = docs = getDocSetBits(matchAllDocsQuery);
+ if (docs != null) {
+ liveDocsNaiveCacheHitCount.increment();
+ } else {
+ docs = populateLiveDocs(this::computeLiveDocs);
+ // assert DocSetUtil.equals(docs, DocSetUtil.createDocSetGeneric(this, MATCH_ALL_DOCS_QUERY));
}
assert docs.size() == numDocs();
return docs;
@@ -968,19 +1035,22 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
}
/**
+ * If some process external to {@link SolrIndexSearcher} has produced a DocSet whose cardinality
+ * matches that of `liveDocs`, this method provides such caller the ability to offer its own
+ * DocSet to be cached in the searcher. The caller should then use the returned value (which may
+ * or may not be derived from the DocSet instance supplied), allowing more efficient memory use.
+ *
* @lucene.internal
*/
- public boolean isLiveDocsInstantiated() {
- return liveDocs != null;
- }
-
- /**
- * @lucene.internal
- */
- public void setLiveDocs(DocSet docs) {
+ public BitDocSet offerLiveDocs(Supplier<DocSet> docSetSupplier, int suppliedSize) {
+ assert suppliedSize == numDocs();
+ final BitDocSet ret = liveDocs;
+ if (ret != null) {
+ liveDocsNaiveCacheHitCount.increment();
+ return ret;
+ }
// a few places currently expect BitDocSet
- assert docs.size() == numDocs();
- this.liveDocs = makeBitDocSet(docs);
+ return populateLiveDocs(() -> makeBitDocSet(docSetSupplier.get()));
}
private static Comparator<ExtendedQuery> sortByCost =
@@ -1013,7 +1083,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
collector = pf.postFilter;
}
- Query query = pf.filter != null ? pf.filter : matchAllDocsQuery;
+ Query query = pf.filter != null ? pf.filter : MATCH_ALL_DOCS_QUERY;
search(query, collector);
@@ -1385,6 +1455,34 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
public static final int GET_DOCLIST = 0x02; // get the documents actually returned in a response
public static final int GET_SCORES = 0x01;
+ private static boolean sortIncludesOtherThanScore(final Sort sort) {
+ if (sort == null) {
+ return false;
+ }
+ final SortField[] sortFields = sort.getSort();
+ return sortFields.length > 1 || sortFields[0].getType() != Type.SCORE;
+ }
+
+ private boolean useFilterCacheForDynamicScoreQuery(boolean needSort, QueryCommand cmd) {
+ if (!useFilterForSortedQuery) {
+ // under no circumstance use filterCache
+ return false;
+ } else if (!needSort) {
+ // if don't need to sort at all, doesn't matter whether score would be needed
+ return true;
+ } else {
+ // we _do_ need to sort; only use filterCache if `score` is not a factor for sort
+ final Sort sort = cmd.getSort();
+ if (sort == null) {
+ // defaults to sort-by-score, so can't use filterCache
+ return false;
+ } else {
+ return Arrays.stream(sort.getSort())
+ .noneMatch((sf) -> sf.getType() == SortField.Type.SCORE);
+ }
+ }
+ }
+
/**
* getDocList version that uses+populates query and filter caches. In the event of a timeout, the
* cache is not populated.
@@ -1473,19 +1571,38 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
// - we don't want score returned.
// check if we should try and use the filter cache
- boolean useFilterCache = false;
- if ((flags & (GET_SCORES | NO_CHECK_FILTERCACHE)) == 0
- && useFilterForSortedQuery
- && cmd.getSort() != null
- && filterCache != null) {
- useFilterCache = true;
- SortField[] sfields = cmd.getSort().getSort();
- for (SortField sf : sfields) {
- if (sf.getType() == SortField.Type.SCORE) {
- useFilterCache = false;
- break;
- }
+ final boolean needSort;
+ final boolean useFilterCache;
+ if ((flags & (GET_SCORES | NO_CHECK_FILTERCACHE)) != 0 || filterCache == null) {
+ needSort = true; // this value should be irrelevant when `useFilterCache=false`
+ useFilterCache = false;
+ } else if (q instanceof MatchAllDocsQuery
+ || (useFilterForSortedQuery && QueryUtils.isConstantScoreQuery(q))) {
+ // special-case MatchAllDocsQuery: implicit default useFilterForSortedQuery=true;
+ // otherwise, default behavior should not risk filterCache thrashing, so require
+ // `useFilterForSortedQuery==true`
+
+ // We only need to sort if we're returning results AND sorting by something other than SCORE
+ // (sort by "score" alone is pointless for these constant score queries)
+ final Sort sort = cmd.getSort();
+ needSort = cmd.getLen() > 0 && sortIncludesOtherThanScore(sort);
+ if (!needSort) {
+ useFilterCache = true;
+ } else {
+ /*
+ NOTE: if `sort:score` is specified, it will have no effect, so we really _could_ in
+ principle always use filterCache; but this would be a user request misconfiguration,
+ and supporting it would require us to mess with user sort, or ignore the fact that sort
+ expects `score` to be present ... so just make the optimization contingent on the absence
+ of `score` in the requested sort.
+ */
+ useFilterCache =
+ Arrays.stream(sort.getSort()).noneMatch((sf) -> sf.getType() == SortField.Type.SCORE);
}
+ } else {
+ // for non-constant-score queries, must sort unless no docs requested
+ needSort = cmd.getLen() > 0;
+ useFilterCache = useFilterCacheForDynamicScoreQuery(needSort, cmd);
}
if (useFilterCache) {
@@ -1496,14 +1613,32 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
out.docSet = getDocSet(cmd.getQuery(), cmd.getFilter());
List<Query> filterList = cmd.getFilterList();
if (filterList != null && !filterList.isEmpty()) {
- out.docSet = out.docSet.intersection(getDocSet(cmd.getFilterList()));
+ out.docSet = DocSetUtil.getDocSet(out.docSet.intersection(getDocSet(filterList)), this);
}
}
// todo: there could be a sortDocSet that could take a list of
// the filters instead of anding them first...
// perhaps there should be a multi-docset-iterator
- sortDocSet(qr, cmd);
+ if (needSort) {
+ fullSortCount.increment();
+ sortDocSet(qr, cmd);
+ } else {
+ skipSortCount.increment();
+ // put unsorted list in place
+ out.docList = constantScoreDocList(cmd.getOffset(), cmd.getLen(), out.docSet);
+ if (0 == cmd.getSupersetMaxDoc()) {
+ // this is the only case where `cursorMark && !needSort`
+ qr.setNextCursorMark(cmd.getCursorMark());
+ } else {
+ // cursorMark should always add a `uniqueKey` sort field tie-breaker, which
+ // should prevent `needSort` from ever being false in conjunction with
+ // cursorMark, _except_ in the event of `rows=0` (accounted for in the clause
+ // above)
+ assert cmd.getCursorMark() == null;
+ }
+ }
} else {
+ fullSortCount.increment();
// do it the normal way...
if ((flags & GET_DOCSET) != 0) {
// this currently conflates returning the docset for the base query vs
@@ -2081,6 +2216,23 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
return qr.getDocListAndSet();
}
+ private DocList constantScoreDocList(int offset, int length, DocSet docs) {
+ final int size = docs.size();
+ if (length == 0 || size <= offset) {
+ return new DocSlice(0, 0, new int[0], null, size, 0f, TotalHits.Relation.EQUAL_TO);
+ }
+ final DocIterator iter = docs.iterator();
+ for (int i = offset; i > 0; i--) {
+ iter.nextDoc(); // discard
+ }
+ final int returnSize = Math.min(length, size - offset);
+ final int[] docIds = new int[returnSize];
+ for (int i = 0; i < returnSize; i++) {
+ docIds[i] = iter.nextDoc();
+ }
+ return new DocSlice(0, returnSize, docIds, null, size, 0f, TotalHits.Relation.EQUAL_TO);
+ }
+
protected void sortDocSet(QueryResult qr, QueryCommand cmd) throws IOException {
DocSet set = qr.getDocListAndSet().docSet;
int nDocs = cmd.getSupersetMaxDoc();
@@ -2353,6 +2505,19 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
parentContext.gauge(() -> warmupTime, true, "warmupTime", Category.SEARCHER.toString(), scope);
parentContext.gauge(
() -> registerTime, true, "registeredAt", Category.SEARCHER.toString(), scope);
+ parentContext.gauge(
+ fullSortCount::sum, true, "fullSortCount", Category.SEARCHER.toString(), scope);
+ parentContext.gauge(
+ skipSortCount::sum, true, "skipSortCount", Category.SEARCHER.toString(), scope);
+ final MetricsMap liveDocsCacheMetrics =
+ new MetricsMap(
+ (map) -> {
+ map.put("inserts", liveDocsInsertsCount.sum());
+ map.put("hits", liveDocsHitCount.sum());
+ map.put("naiveHits", liveDocsNaiveCacheHitCount.sum());
+ });
+ parentContext.gauge(
+ liveDocsCacheMetrics, true, "liveDocsCache", Category.SEARCHER.toString(), scope);
// reader stats
parentContext.gauge(
rgauge(parentContext.nullNumber(), () -> reader.numDocs()),
diff --git a/solr/core/src/test/org/apache/solr/CursorPagingTest.java b/solr/core/src/test/org/apache/solr/CursorPagingTest.java
index 698f55c15fd..0fba2d9971a 100644
--- a/solr/core/src/test/org/apache/solr/CursorPagingTest.java
+++ b/solr/core/src/test/org/apache/solr/CursorPagingTest.java
@@ -759,8 +759,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
final long postFcHits = (Long) filterCacheStats.getValue().get("hits");
assertEquals("query cache inserts changed", preQcIn, postQcIn);
- // NOTE: use of pure negative filters clauses "*:* to be tracked in filterCache
- assertEquals("filter cache did not grow correctly", 3, postFcIn - preFcIn);
+ assertEquals("filter cache did not grow correctly", 2, postFcIn - preFcIn);
assertTrue("filter cache did not have any new cache hits", 0 < postFcHits - preFcHits);
}
diff --git a/solr/core/src/test/org/apache/solr/search/TestDocSet.java b/solr/core/src/test/org/apache/solr/search/TestDocSet.java
index 943d9a314d8..d18e6a5da8f 100644
--- a/solr/core/src/test/org/apache/solr/search/TestDocSet.java
+++ b/solr/core/src/test/org/apache/solr/search/TestDocSet.java
@@ -618,6 +618,26 @@ public class TestDocSet extends SolrTestCase {
return scorer != null ? scorer.iterator() : null;
}
+ private static final int MAX_SRC_SIZE = 130; // push _just_ into 3 `long` "words"
+
+ public void testCopyBitsToRange() {
+ // Sanity-check round-tripping.
+ for (int i = 0; i < 1000; i++) {
+ final int sz = rand.nextInt(MAX_SRC_SIZE);
+ final FixedBitSet src = getRandomSet(sz, rand.nextInt(sz + 1));
+ final int destSize = sz * 2;
+ final int destOffset =
+ sz == 0
+ ? 0
+ : (rand.nextInt(sz) + 1); // +1 here b/c we want nextInt(sz) _inclusive_ of sz.
+ final FixedBitSet dest = new FixedBitSet(destSize);
+ final FixedBitSet roundTrip = new FixedBitSet(sz);
+ DocSetUtil.copyTo(src.asReadOnlyBits(), 0, sz, dest, destOffset);
+ DocSetUtil.copyTo(dest.asReadOnlyBits(), destOffset, destOffset + sz, roundTrip, 0);
+ assertEquals(src, roundTrip);
+ }
+ }
+
private Bits getExpectedBits(final DocSet docSet, final LeafReaderContext context) {
if (context.isTopLevel) {
return docSet.getBits();
diff --git a/solr/core/src/test/org/apache/solr/search/TestMainQueryCaching.java b/solr/core/src/test/org/apache/solr/search/TestMainQueryCaching.java
new file mode 100644
index 00000000000..54920b6ed74
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/TestMainQueryCaching.java
@@ -0,0 +1,447 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import static org.apache.solr.common.util.Utils.fromJSONString;
+
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.util.ExecutorUtil;
+import org.apache.solr.common.util.SolrNamedThreadFactory;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.metrics.MetricsMap;
+import org.apache.solr.metrics.SolrMetricManager;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/** Verify caching interactions between main query and filterCache */
+public class TestMainQueryCaching extends SolrTestCaseJ4 {
+
+ private static final int MOST_DOCS = 100;
+ private static final int ALL_DOCS = MOST_DOCS + 1;
+ private static final String TEST_UFFSQ_PROPNAME = "solr.test.useFilterForSortedQuery";
+ static String RESTORE_UFFSQ_PROP;
+ static boolean USE_FILTER_FOR_SORTED_QUERY;
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ USE_FILTER_FOR_SORTED_QUERY = random().nextBoolean();
+ RESTORE_UFFSQ_PROP =
+ System.setProperty(TEST_UFFSQ_PROPNAME, Boolean.toString(USE_FILTER_FOR_SORTED_QUERY));
+ initCore("solrconfig-deeppaging.xml", "schema-sorts.xml");
+ createIndex();
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ if (RESTORE_UFFSQ_PROP == null) {
+ System.clearProperty(TEST_UFFSQ_PROPNAME);
+ } else {
+ System.setProperty(TEST_UFFSQ_PROPNAME, RESTORE_UFFSQ_PROP);
+ }
+ }
+
+ public static void createIndex() {
+ for (int i = 0; i < MOST_DOCS; i++) {
+ assertU(adoc("id", Integer.toString(i), "str", "d" + i));
+ if (random().nextInt(MOST_DOCS) == 0) {
+ assertU(commit()); // sometimes make multiple segments
+ }
+ }
+ // add an extra doc to distinguish scoring query from `*:*`
+ assertU(adoc("id", Integer.toString(MOST_DOCS), "str", "e" + MOST_DOCS));
+ assertU(commit());
+ }
+
+ @Before
+ public void beforeTest() throws Exception {
+ // testing caching, it's far simpler to just reload the core every time to prevent
+ // subsequent requests from affecting each other
+ h.reload();
+ }
+
+ private static long coreToInserts(SolrCore core) {
+ return (long)
+ ((MetricsMap)
+ ((SolrMetricManager.GaugeWrapper<?>)
+ core.getCoreMetricManager()
+ .getRegistry()
+ .getMetrics()
+ .get("CACHE.searcher.filterCache"))
+ .getGauge())
+ .getValue()
+ .get("inserts");
+ }
+
+ private static long coreToSortCount(SolrCore core, String skipOrFull) {
+ return (long)
+ ((SolrMetricManager.GaugeWrapper<?>)
+ core.getCoreMetricManager()
+ .getRegistry()
+ .getMetrics()
+ .get("SEARCHER.searcher." + skipOrFull + "SortCount"))
+ .getGauge()
+ .getValue();
+ }
+
+ private static long coreToMatchAllDocsInsertCount(SolrCore core) {
+ return (long) coreToLiveDocsCacheMetrics(core).get("inserts");
+ }
+
+ private static Map<String, Object> coreToLiveDocsCacheMetrics(SolrCore core) {
+ return ((MetricsMap)
+ ((SolrMetricManager.GaugeWrapper<?>)
+ core.getCoreMetricManager()
+ .getRegistry()
+ .getMetrics()
+ .get("SEARCHER.searcher.liveDocsCache"))
+ .getGauge())
+ .getValue();
+ }
+
+ private static final String SCORING_QUERY = "str:d*";
+
+ // wrapped as a ConstantScoreQuery
+ private static final String CONSTANT_SCORE_QUERY = "(" + SCORING_QUERY + ")^=1.0";
+
+ private static final String MATCH_ALL_DOCS_QUERY = "*:*";
+
+ private static final String[] ALL_QUERIES =
+ new String[] {SCORING_QUERY, CONSTANT_SCORE_QUERY, MATCH_ALL_DOCS_QUERY};
+
+ @Test
+ public void testScoringQuery() throws Exception {
+ // plain request should have no caching or sorting optimization
+ String response = JQ(req("q", SCORING_QUERY, "indent", "true"));
+ assertMetricCounts(response, false, 0, 1, 0);
+ }
+
+ @Test
+ public void testConstantScoreFlScore() throws Exception {
+ // explicitly requesting scores should unconditionally disable caching and sorting optimizations
+ String response =
+ JQ(
+ req(
+ "q",
+ CONSTANT_SCORE_QUERY,
+ "indent",
+ "true",
+ "rows",
+ "0",
+ "fl",
+ "id,score",
+ "sort",
+ (random().nextBoolean() ? "id asc" : "score desc")));
+ assertMetricCounts(response, false, 0, 1, 0);
+ }
+
+ @Test
+ public void testScoringQueryNonScoreSort() throws Exception {
+ // plain request with no score in sort should consult filterCache, but need full sorting
+ String response = JQ(req("q", SCORING_QUERY, "indent", "true", "sort", "id asc"));
+ assertMetricCounts(response, false, USE_FILTER_FOR_SORTED_QUERY ? 1 : 0, 1, 0);
+ }
+
+ @Test
+ public void testScoringQueryZeroRows() throws Exception {
+ // always hit cache, optimize sort because rows=0
+ String response =
+ JQ(
+ req(
+ "q",
+ SCORING_QUERY,
+ "indent",
+ "true",
+ "rows",
+ "0",
+ "sort",
+ (random().nextBoolean() ? "id asc" : "score desc")));
+ final int insertAndSkipCount = USE_FILTER_FOR_SORTED_QUERY ? 1 : 0;
+ assertMetricCounts(
+ response,
+ false,
+ insertAndSkipCount,
+ USE_FILTER_FOR_SORTED_QUERY ? 0 : 1,
+ insertAndSkipCount);
+ }
+
+ @Test
+ public void testConstantScoreSortByScore() throws Exception {
+ // hit cache and skip sort because constant score query
+ String response = JQ(req("q", CONSTANT_SCORE_QUERY, "indent", "true"));
+ final int insertAndSkipCount = USE_FILTER_FOR_SORTED_QUERY ? 1 : 0;
+ assertMetricCounts(
+ response,
+ false,
+ insertAndSkipCount,
+ USE_FILTER_FOR_SORTED_QUERY ? 0 : 1,
+ insertAndSkipCount);
+ }
+
+ @Test
+ public void testConstantScoreNonScoreSort() throws Exception {
+ // consult filterCache because constant score query, but no skip sort (because sort-by-id)
+ String response = JQ(req("q", CONSTANT_SCORE_QUERY, "indent", "true", "sort", "id asc"));
+ assertMetricCounts(response, false, USE_FILTER_FOR_SORTED_QUERY ? 1 : 0, 1, 0);
+ }
+
+ /**
+ * As {@link #testConstantScoreNonScoreSort} (though an analogous test could be written
+ * corresponding to {@link #testConstantScoreSortByScore()}, etc...); but with an additional
+ * constant-score clause that causes the associated DocSet, (if {@link
+ * #USE_FILTER_FOR_SORTED_QUERY}==true) to be cached as equivalent to MatchAllDocsQuery/liveDocs,
+ * _in addition to_ in the filterCache.
+ *
+ * <p>This is an edge case, but it's the behavior we want, and despite there being two entries,
+ * the actual DocSet will be the same (`==`) in both locations (liveDocs and filterCache)
+ */
+ @Test
+ public void testConstantScoreMatchesAllDocsNonScoreSort() throws Exception {
+ String response =
+ JQ(
+ req(
+ "q",
+ CONSTANT_SCORE_QUERY + " OR (str:e*)^=4.0",
+ "indent",
+ "true",
+ "sort",
+ "id asc"));
+ assertMetricCounts(
+ response, USE_FILTER_FOR_SORTED_QUERY, USE_FILTER_FOR_SORTED_QUERY ? 1 : 0, 1, 0, ALL_DOCS);
+ }
+
+ @Test
+ public void testMatchAllDocsPlain() throws Exception {
+ // plain request with "score" sort should skip sort even if `rows` requested
+ String response = JQ(req("q", MATCH_ALL_DOCS_QUERY, "indent", "true"));
+ assertMetricCounts(response, true, 0, 0, 1);
+ }
+
+ @Test
+ public void testMatchAllDocsFlScore() throws Exception {
+ // explicitly requesting scores should unconditionally disable all cache consultation and sort
+ // optimization
+ String response =
+ JQ(
+ req(
+ "q",
+ MATCH_ALL_DOCS_QUERY,
+ "indent",
+ "true",
+ "rows",
+ "0",
+ "fl",
+ "id,score",
+ "sort",
+ (random().nextBoolean() ? "id asc" : "score desc")));
+ /*
+ NOTE: pretend we're not MatchAllDocs, from the perspective of `assertMetricsCounts(...)`
+ We're specifically choosing `*:*` here because we want a query that would _definitely_ hit
+ our various optimizations, _but_ for the fact that we explicitly requested `score` to be
+ returned. This would be a bit of an anti-pattern in "real life", but it's useful (e.g., in
+ tests) to have this case just disable the optimizations.
+ */
+ assertMetricCounts(response, false, 0, 1, 0, ALL_DOCS);
+ }
+
+ @Test
+ public void testMatchAllDocsZeroRows() throws Exception {
+ // plain request should _always_ skip sort when `rows=0`
+ String response =
+ JQ(req("q", MATCH_ALL_DOCS_QUERY, "indent", "true", "rows", "0", "sort", "id asc"));
+ assertMetricCounts(response, true, 0, 0, 1);
+ }
+
+ @Test
+ public void testMatchAllDocsNonScoreSort() throws Exception {
+ // plain request _with_ rows and non-score sort should consult cache, but not skip sort
+ String response = JQ(req("q", MATCH_ALL_DOCS_QUERY, "indent", "true", "sort", "id asc"));
+ assertMetricCounts(response, true, 0, 1, 0);
+ }
+
+ @Test
+ public void testCursorMark() throws Exception {
+ String q = pickRandom(ALL_QUERIES);
+ boolean includeScoreInSort = random().nextBoolean();
+ String response =
+ JQ(
+ req(
+ "q",
+ q,
+ "indent",
+ "true",
+ "cursorMark",
+ "*",
+ "sort",
+ includeScoreInSort ? "score desc,id asc" : "id asc"));
+ final int expectNumFound = MATCH_ALL_DOCS_QUERY.equals(q) ? ALL_DOCS : MOST_DOCS;
+ final boolean consultMatchAllDocs;
+ final boolean insertFilterCache;
+ if (includeScoreInSort) {
+ consultMatchAllDocs = false;
+ insertFilterCache = false;
+ } else if (MATCH_ALL_DOCS_QUERY.equals(q)) {
+ consultMatchAllDocs = true;
+ insertFilterCache = false;
+ } else {
+ consultMatchAllDocs = false;
+ insertFilterCache = USE_FILTER_FOR_SORTED_QUERY;
+ }
+ assertMetricCounts(
+ response, consultMatchAllDocs, insertFilterCache ? 1 : 0, 1, 0, expectNumFound);
+ }
+
+ @Test
+ public void testCursorMarkZeroRows() throws Exception {
+ String q = pickRandom(ALL_QUERIES);
+ String response =
+ JQ(
+ req(
+ "q",
+ q,
+ "indent",
+ "true",
+ "cursorMark",
+ "*",
+ "rows",
+ "0",
+ "sort",
+ random().nextBoolean() ? "id asc" : "score desc,id asc"));
+ final boolean consultMatchAllDocs;
+ final boolean insertFilterCache;
+ final boolean skipSort;
+ if (MATCH_ALL_DOCS_QUERY.equals(q)) {
+ consultMatchAllDocs = true;
+ insertFilterCache = false;
+ skipSort = true;
+ } else {
+ consultMatchAllDocs = false;
+ insertFilterCache = USE_FILTER_FOR_SORTED_QUERY;
+ skipSort = USE_FILTER_FOR_SORTED_QUERY;
+ }
+ assertMetricCounts(
+ response,
+ consultMatchAllDocs,
+ insertFilterCache ? 1 : 0,
+ skipSort ? 0 : 1,
+ skipSort ? 1 : 0);
+ }
+
+ private static void assertMetricCounts(
+ String response,
+ boolean matchAllDocs,
+ int expectFilterCacheInsertCount,
+ int expectFullSortCount,
+ int expectSkipSortCount) {
+ assertMetricCounts(
+ response,
+ matchAllDocs,
+ expectFilterCacheInsertCount,
+ expectFullSortCount,
+ expectSkipSortCount,
+ matchAllDocs ? ALL_DOCS : MOST_DOCS);
+ }
+
+ private static void assertMetricCounts(
+ String response,
+ boolean matchAllDocs,
+ int expectFilterCacheInsertCount,
+ int expectFullSortCount,
+ int expectSkipSortCount,
+ int expectNumFound) {
+ Map<?, ?> res = (Map<?, ?>) fromJSONString(response);
+ Map<?, ?> body = (Map<?, ?>) (res.get("response"));
+ SolrCore core = h.getCore();
+ assertEquals(
+ "Bad matchAllDocs insert count",
+ (matchAllDocs ? 1 : 0),
+ coreToMatchAllDocsInsertCount(core));
+ assertEquals("Bad filterCache insert count", expectFilterCacheInsertCount, coreToInserts(core));
+ assertEquals("Bad full sort count", expectFullSortCount, coreToSortCount(core, "full"));
+ assertEquals("Bad skip sort count", expectSkipSortCount, coreToSortCount(core, "skip"));
+ assertEquals(
+ "Should have exactly " + expectNumFound, expectNumFound, (long) (body.get("numFound")));
+ }
+
+ @Test
+ public void testConcurrentMatchAllDocsInitialization() throws Exception {
+ final int nThreads = 20;
+ final ExecutorService executor =
+ ExecutorUtil.newMDCAwareFixedThreadPool(
+ nThreads, new SolrNamedThreadFactory(getTestName()));
+ final Future<?>[] followup = new Future<?>[nThreads];
+ for (int i = 0; i < nThreads; i++) {
+ final int myI = i;
+ followup[i] =
+ executor.submit(
+ () -> {
+ try {
+ /*
+ NOTE: we use cursorMark=* here because it prevents consulting the
+ queryResultCache, which can interfere with DocSet fetching (which
+ is what we care about in this test).
+ */
+ String response =
+ JQ(
+ req(
+ "q",
+ MATCH_ALL_DOCS_QUERY,
+ "request_id",
+ Integer.toString(myI),
+ "cursorMark",
+ "*",
+ "sort",
+ "id asc"));
+ Map<?, ?> res = (Map<?, ?>) fromJSONString(response);
+ Map<?, ?> body = (Map<?, ?>) (res.get("response"));
+ assertEquals(
+ "Should have exactly " + ALL_DOCS, ALL_DOCS, (long) (body.get("numFound")));
+ } catch (Exception ex) {
+ throw new RuntimeException(ex);
+ }
+ });
+ }
+ try {
+ for (Future<?> f : followup) {
+ f.get(); // to access exceptions/errors
+ }
+ } finally {
+ executor.shutdown();
+
+ // tasks should already have completed
+ assertTrue(executor.awaitTermination(5, TimeUnit.SECONDS));
+ }
+ final SolrCore core = h.getCore();
+ Map<String, Object> liveDocsCacheMetrics = coreToLiveDocsCacheMetrics(core);
+
+ // the one and only liveDocs computation
+ long inserts = (long) liveDocsCacheMetrics.get("inserts");
+
+ // hits during the initial phase
+ long hits = (long) liveDocsCacheMetrics.get("hits");
+
+ long naiveHits = (long) liveDocsCacheMetrics.get("naiveHits");
+
+ assertEquals(1, inserts);
+ assertEquals(nThreads - 1, hits + naiveHits);
+ }
+}
diff --git a/solr/solr-ref-guide/modules/configuration-guide/pages/caches-warming.adoc b/solr/solr-ref-guide/modules/configuration-guide/pages/caches-warming.adoc
index 23197c78879..67e447ef959 100644
--- a/solr/solr-ref-guide/modules/configuration-guide/pages/caches-warming.adoc
+++ b/solr/solr-ref-guide/modules/configuration-guide/pages/caches-warming.adoc
@@ -246,9 +246,10 @@ This can boost performance if the most common queries only need a small subset o
=== <useFilterForSortedQuery> Element
-This parameter configures Solr to use a filter to satisfy a search.
-If the requested sort does not include "score", the `filterCache` will be checked for a filter matching the query.
-For most situations, this is only useful if the same search is requested often with different sort options and none of them ever use "score".
+This setting only affects queries where the requested sort does not include "score" (or for which score is irrelevant -- e.g., no docs requested, query outputs a constant score).
+In such cases, configuring this element to `true` causes the `filterCache` to be consulted for a filter matching the main query.
+This can be useful if the same search is issued repeatedly with different sort options or different pagination via `offset` or `cursorMark` (again, provided that "score" is not included or not relevant to the requested sort).
+If enabling this option, ensure that the `filterCache` has sufficient capacity to support expected use patterns.
[source,xml]
----