You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2018/05/31 15:03:32 UTC
lucene-solr:branch_7x: * SOLR-12366: A slow "live docs"
implementation was being used instead of a bitset. Affects classic faceting
enum method, JSON Facets enum method, UnInvertedField faceting,
GraphTermsQParser, JoinQParser. Renamed SolrIndexSearc
Repository: lucene-solr
Updated Branches:
refs/heads/branch_7x bc58932b9 -> d65f40f38
* SOLR-12366: A slow "live docs" implementation was being used instead of a bitset.
Affects classic faceting enum method, JSON Facets enum method, UnInvertedField faceting, GraphTermsQParser, JoinQParser.
Renamed SolrIndexSearcher.getLiveDocs to getLiveDocSet.
(cherry picked from commit 1e63b32)
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d65f40f3
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d65f40f3
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d65f40f3
Branch: refs/heads/branch_7x
Commit: d65f40f3852be74bf0fc5c17d8252c669ea325d8
Parents: bc58932
Author: David Smiley <ds...@apache.org>
Authored: Thu May 31 10:55:12 2018 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Thu May 31 11:02:55 2018 -0400
----------------------------------------------------------------------
solr/CHANGES.txt | 6 +--
.../handler/component/SpatialHeatmapFacets.java | 2 +-
.../org/apache/solr/query/SolrRangeQuery.java | 2 +-
.../org/apache/solr/request/SimpleFacets.java | 8 ++--
.../java/org/apache/solr/search/DocSetUtil.java | 4 +-
.../solr/search/GraphTermsQParserPlugin.java | 25 ++++------
.../apache/solr/search/JoinQParserPlugin.java | 4 +-
.../apache/solr/search/SolrIndexSearcher.java | 50 +++++++++++++-------
.../FacetFieldProcessorByEnumTermsStream.java | 2 +-
.../solr/search/facet/FacetProcessor.java | 2 +-
.../solr/search/facet/UnInvertedField.java | 2 +-
.../org/apache/solr/search/TestFiltering.java | 12 +++--
12 files changed, 65 insertions(+), 54 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d65f40f3/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index cc8349d..178fed4 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -292,10 +292,8 @@ Optimizations
* SOLR-12338: Replay buffering tlog in parallel. (Cao Manh Dat, David Smiley)
-* SOLR-12375: Optimize Lucene needsScore / ScoreMode use:
- A non-cached filter query could be told incorrectly that scores were needed.
- The /export (ExportQParserPlugin) would declare incorrectly that scores are needed.
- Expanded docs (expand component) could be told incorrectly that scores are needed. (David Smiley)
+* SOLR-12366: A slow "live docs" implementation was being used instead of a bitset. Affects classic faceting
+ enum method, JSON Facets enum method, UnInvertedField faceting, GraphTermsQParser, JoinQParser. (David Smiley)
Other Changes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d65f40f3/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java b/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java
index 656372a..0a39e5b 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SpatialHeatmapFacets.java
@@ -174,7 +174,7 @@ public class SpatialHeatmapFacets {
}
private static Bits getTopAcceptDocs(DocSet docSet, SolrIndexSearcher searcher) throws IOException {
- if (searcher.getLiveDocs() == docSet) {
+ if (searcher.getLiveDocSet() == docSet) {
return null; // means match everything (all live docs). This can speedup things a lot.
} else if (docSet.size() == 0) {
return new Bits.MatchNoBits(searcher.maxDoc()); // can speedup things a lot
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d65f40f3/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
index 546e9d1..fd1e5dc 100644
--- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
+++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
@@ -155,7 +155,7 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
private DocSet createDocSet(SolrIndexSearcher searcher, long cost) throws IOException {
int maxDoc = searcher.maxDoc();
- BitDocSet liveDocs = searcher.getLiveDocs();
+ BitDocSet liveDocs = searcher.getLiveDocSet();
FixedBitSet liveBits = liveDocs.size() == maxDoc ? null : liveDocs.getBits();
DocSetBuilder builder = new DocSetBuilder(maxDoc, cost);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d65f40f3/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
index 0b5a082..b5d97d2 100644
--- a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
+++ b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
@@ -40,6 +40,7 @@ import java.util.function.Predicate;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.MultiPostingsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
@@ -946,9 +947,6 @@ public class SimpleFacets {
IndexSchema schema = searcher.getSchema();
FieldType ft = schema.getFieldType(field);
assert !ft.isPointField(): "Point Fields don't support enum method";
-
- LeafReader r = searcher.getSlowAtomicReader();
-
boolean sortByCount = sort.equals("count") || sort.equals("true");
final int maxsize = limit>=0 ? offset+limit : Integer.MAX_VALUE-1;
@@ -965,7 +963,7 @@ public class SimpleFacets {
prefixTermBytes = new BytesRef(indexedPrefix);
}
- Terms terms = r.terms(field);
+ Terms terms = MultiFields.getTerms(searcher.getIndexReader(), field);
TermsEnum termsEnum = null;
SolrIndexSearcher.DocsEnumState deState = null;
BytesRef term = null;
@@ -1011,7 +1009,7 @@ public class SimpleFacets {
if (deState == null) {
deState = new SolrIndexSearcher.DocsEnumState();
deState.fieldName = field;
- deState.liveDocs = r.getLiveDocs();
+ deState.liveDocs = searcher.getLiveDocsBits();
deState.termsEnum = termsEnum;
deState.postingsEnum = postingsEnum;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d65f40f3/solr/core/src/java/org/apache/solr/search/DocSetUtil.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/DocSetUtil.java b/solr/core/src/java/org/apache/solr/search/DocSetUtil.java
index 0bc6dac..de3776b 100644
--- a/solr/core/src/java/org/apache/solr/search/DocSetUtil.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetUtil.java
@@ -83,7 +83,7 @@ public class DocSetUtil {
searcher.setLiveDocs( collector.getDocSet() );
}
try {
- return searcher.getLiveDocs();
+ return searcher.getLiveDocSet();
} catch (IOException e) {
// should be impossible... liveDocs should exist, so no IO should be necessary
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
@@ -106,7 +106,7 @@ public class DocSetUtil {
try {
// if this docset has the same cardinality as liveDocs, return liveDocs instead
// so this set will be short lived garbage.
- return searcher.getLiveDocs();
+ return searcher.getLiveDocSet();
} catch (IOException e) {
// should be impossible... liveDocs should exist, so no IO should be necessary
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d65f40f3/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java
index 5309a35..835fdff 100644
--- a/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java
@@ -48,13 +48,13 @@ import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitDocIdSet;
-import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefIterator;
@@ -547,22 +547,17 @@ abstract class PointSetQuery extends Query implements DocSetProducer {
}
private FixedBitSet getLiveDocs(IndexSearcher searcher) throws IOException {
+ if (!searcher.getIndexReader().hasDeletions()) {
+ return null;
+ }
if (searcher instanceof SolrIndexSearcher) {
- BitDocSet liveDocs = ((SolrIndexSearcher) searcher).getLiveDocs();
- FixedBitSet liveBits = liveDocs.size() == ((SolrIndexSearcher) searcher).maxDoc() ? null : liveDocs.getBits();
- return liveBits;
+ return ((SolrIndexSearcher) searcher).getLiveDocSet().getBits();
} else {
- if (searcher.getTopReaderContext().reader().maxDoc() == searcher.getTopReaderContext().reader().numDocs()) return null;
- FixedBitSet bs = new FixedBitSet(searcher.getTopReaderContext().reader().maxDoc());
- for (LeafReaderContext ctx : searcher.getTopReaderContext().leaves()) {
- Bits liveDocs = ctx.reader().getLiveDocs();
- int max = ctx.reader().maxDoc();
- int base = ctx.docBase;
- for (int i=0; i<max; i++) {
- if (liveDocs.get(i)) bs.set(i + base);
- }
- }
- return bs;
+ // TODO Does this ever happen? In Solr should always be SolrIndexSearcher?
+ //smallSetSize==0 thus will always produce a BitDocSet (FixedBitSet)
+ DocSetCollector docSetCollector = new DocSetCollector(0, searcher.getIndexReader().maxDoc());
+ searcher.search(new MatchAllDocsQuery(), docSetCollector);
+ return ((BitDocSet) docSetCollector.getDocSet()).getBits();
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d65f40f3/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
index cc6d6b1..2c8ffea 100644
--- a/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
@@ -373,8 +373,8 @@ class JoinQuery extends Query {
}
}
- Bits fromLiveDocs = fromSearcher.getSlowAtomicReader().getLiveDocs();
- Bits toLiveDocs = fromSearcher == toSearcher ? fromLiveDocs : toSearcher.getSlowAtomicReader().getLiveDocs();
+ Bits fromLiveDocs = fromSearcher.getLiveDocsBits();
+ Bits toLiveDocs = fromSearcher == toSearcher ? fromLiveDocs : toSearcher.getLiveDocsBits();
fromDeState = new SolrIndexSearcher.DocsEnumState();
fromDeState.fieldName = fromField;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d65f40f3/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index 45fcadf..53eff8d 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -683,17 +683,14 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
* @return the first document number containing the term
*/
public int getFirstMatch(Term t) throws IOException {
- Terms terms = leafReader.terms(t.field());
- if (terms == null) return -1;
- BytesRef termBytes = t.bytes();
- final TermsEnum termsEnum = terms.iterator();
- if (!termsEnum.seekExact(termBytes)) {
+ long pair = lookupId(t.field(), t.bytes());
+ if (pair == -1) {
return -1;
+ } else {
+ final int segIndex = (int) (pair >> 32);
+ final int segDocId = (int) pair;
+ return leafContexts.get(segIndex).docBase + segDocId;
}
- PostingsEnum docs = termsEnum.postings(null, PostingsEnum.NONE);
- docs = BitsFilteredPostingsEnum.wrap(docs, leafReader.getLiveDocs());
- int id = docs.nextDoc();
- return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
/**
@@ -703,8 +700,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
* @lucene.internal
*/
public long lookupId(BytesRef idBytes) throws IOException {
- String field = schema.getUniqueKeyField().getName();
+ return lookupId(schema.getUniqueKeyField().getName(), idBytes);
+ }
+ private long lookupId(String field, BytesRef idBytes) throws IOException {
for (int i = 0, c = leafContexts.size(); i < c; i++) {
final LeafReaderContext leaf = leafContexts.get(i);
final LeafReader reader = leaf.reader();
@@ -812,12 +811,12 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
DocSet absAnswer = filterCache.get(absQ);
if (absAnswer != null) {
if (positive) return absAnswer;
- else return getLiveDocs().andNot(absAnswer);
+ else return getLiveDocSet().andNot(absAnswer);
}
}
DocSet absAnswer = getDocSetNC(absQ, null);
- DocSet answer = positive ? absAnswer : getLiveDocs().andNot(absAnswer);
+ DocSet answer = positive ? absAnswer : getLiveDocSet().andNot(absAnswer);
if (filterCache != null) {
// cache negative queries as positive
@@ -842,18 +841,37 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
private static Query matchAllDocsQuery = new MatchAllDocsQuery();
private volatile BitDocSet liveDocs;
- /** @lucene.internal the type of DocSet returned may change in the future */
+ @Deprecated // TODO remove for 8.0
public BitDocSet getLiveDocs() throws IOException {
+ return getLiveDocSet();
+ }
+
+ /**
+ * Returns an efficient random-access {@link DocSet} of the live docs. It's cached. Never null.
+ * @lucene.internal the type of DocSet returned may change in the future
+ */
+ public BitDocSet getLiveDocSet() throws IOException {
// Going through the filter cache will provide thread safety here if we only had getLiveDocs,
// but the addition of setLiveDocs means we needed to add volatile to "liveDocs".
BitDocSet docs = liveDocs;
if (docs == null) {
+ //note: maybe should instead calc manually by segment, using FixedBitSet.copyOf(segLiveDocs); avoid filter cache?
liveDocs = docs = getDocSetBits(matchAllDocsQuery);
}
assert docs.size() == numDocs();
return docs;
}
+ /**
+ * Returns an efficient random-access {@link Bits} of the live docs. It's cached. Null means all docs are live.
+ * Use this like {@link LeafReader#getLiveDocs()}.
+ * @lucene.internal
+ */
+ //TODO rename to getLiveDocs in 8.0
+ public Bits getLiveDocsBits() throws IOException {
+ return getIndexReader().hasDeletions() ? getLiveDocSet().getBits() : null;
+ }
+
/** @lucene.internal */
public boolean isLiveDocsInstantiated() {
return liveDocs != null;
@@ -1045,7 +1063,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
// Are all of our normal cached filters negative?
if (end > 0 && answer == null) {
- answer = getLiveDocs();
+ answer = getLiveDocSet();
}
// do negative queries first to shrink set size
@@ -1069,7 +1087,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
} else {
if (postFilters == null) {
if (answer == null) {
- answer = getLiveDocs();
+ answer = getLiveDocSet();
}
// "answer" is the only part of the filter, so set it.
pf.answer = answer;
@@ -2108,7 +2126,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
// if both negative, we need to create a temp DocSet since we
// don't have a counting method that takes three.
- DocSet all = getLiveDocs();
+ DocSet all = getLiveDocSet();
// -a -b == *:*.andNot(a).andNotSize(b) == *.*.andNotSize(a.union(b))
// we use the last form since the intermediate DocSet should normally be smaller.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d65f40f3/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
index f939bba..9257ec7 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
@@ -231,7 +231,7 @@ class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implement
if (deState == null) {
deState = new SolrIndexSearcher.DocsEnumState();
deState.fieldName = sf.getName();
- deState.liveDocs = fcontext.searcher.getSlowAtomicReader().getLiveDocs();
+ deState.liveDocs = fcontext.searcher.getLiveDocsBits();
deState.termsEnum = termsEnum;
deState.postingsEnum = postingsEnum;
deState.minSetSizeCached = minDfFilterCache;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d65f40f3/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java b/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java
index 6b66cfd..454b85f 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java
@@ -296,7 +296,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
// We need to remember to not redundantly re-apply these filters after.
DocSet acceptDocs = this.filter;
if (acceptDocs == null) {
- acceptDocs = fcontext.searcher.getLiveDocs();
+ acceptDocs = fcontext.searcher.getLiveDocSet();
} else {
appliedFilters = true;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d65f40f3/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java
index 3349bb2..f751ba4 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java
@@ -147,7 +147,7 @@ public class UnInvertedField extends DocTermOrds {
if (deState == null) {
deState = new SolrIndexSearcher.DocsEnumState();
deState.fieldName = field;
- deState.liveDocs = searcher.getSlowAtomicReader().getLiveDocs();
+ deState.liveDocs = searcher.getLiveDocsBits();
deState.termsEnum = te; // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
deState.postingsEnum = postingsEnum;
deState.minSetSizeCached = maxTermDocFreq;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d65f40f3/solr/core/src/test/org/apache/solr/search/TestFiltering.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestFiltering.java b/solr/core/src/test/org/apache/solr/search/TestFiltering.java
index 16d9192..b2ad9cf 100644
--- a/solr/core/src/test/org/apache/solr/search/TestFiltering.java
+++ b/solr/core/src/test/org/apache/solr/search/TestFiltering.java
@@ -17,22 +17,24 @@
package org.apache.solr.search;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.SolrException;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.lang.invoke.MethodHandles;
-import java.util.*;
-
public class TestFiltering extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -73,7 +75,7 @@ public class TestFiltering extends SolrTestCaseJ4 {
// System.out.println("getting set for " + q);
DocSet set = searcher.getDocSet(q);
if (live == null) {
- live = searcher.getLiveDocs();
+ live = searcher.getLiveDocSet();
}
assertTrue( set == live);