You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2016/07/21 13:38:09 UTC
[47/51] [abbrv] lucene-solr:apiv2: LUCENE-7311: Cached term queries
do not seek the terms dictionary anymore.
LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/71541bcd
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/71541bcd
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/71541bcd
Branch: refs/heads/apiv2
Commit: 71541bcd6cfd1e279faa1f2402403ac74cc5362d
Parents: ee44da6
Author: Adrien Grand <jp...@gmail.com>
Authored: Wed Jul 20 17:30:30 2016 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Wed Jul 20 17:42:51 2016 +0200
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../org/apache/lucene/search/TermQuery.java | 58 ++++---
.../org/apache/lucene/search/TestTermQuery.java | 154 +++++++++++++++++++
3 files changed, 196 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/71541bcd/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index ec395a3..432e1d2 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -137,6 +137,9 @@ Optimizations
* LUCENE-7371: Point values are now better compressed using run-length
encoding. (Adrien Grand)
+* LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
+ (Adrien Grand)
+
Other
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/71541bcd/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
index 590c3b3..73170b9 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
@@ -29,6 +29,7 @@ import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
@@ -51,8 +52,10 @@ public class TermQuery extends Query {
public TermWeight(IndexSearcher searcher, boolean needsScores,
float boost, TermContext termStates) throws IOException {
super(TermQuery.this);
+ if (needsScores && termStates == null) {
+ throw new IllegalStateException("termStates are required when scores are needed");
+ }
this.needsScores = needsScores;
- assert termStates != null : "TermContext must not be null";
this.termStates = termStates;
this.similarity = searcher.getSimilarity(needsScores);
@@ -62,12 +65,10 @@ public class TermQuery extends Query {
collectionStats = searcher.collectionStatistics(term.field());
termStats = searcher.termStatistics(term, termStates);
} else {
- // do not bother computing actual stats, scores are not needed
+ // we do not need the actual stats, use fake stats with docFreq=maxDoc and ttf=-1
final int maxDoc = searcher.getIndexReader().maxDoc();
- final int docFreq = termStates.docFreq();
- final long totalTermFreq = termStates.totalTermFreq();
collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1);
- termStats = new TermStatistics(term.bytes(), docFreq, totalTermFreq);
+ termStats = new TermStatistics(term.bytes(), maxDoc, -1);
}
this.stats = similarity.computeWeight(boost, collectionStats, termStats);
@@ -85,7 +86,7 @@ public class TermQuery extends Query {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
- assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
+ assert termStates == null || termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);;
final TermsEnum termsEnum = getTermsEnum(context);
if (termsEnum == null) {
return null;
@@ -100,17 +101,30 @@ public class TermQuery extends Query {
* the term does not exist in the given context
*/
private TermsEnum getTermsEnum(LeafReaderContext context) throws IOException {
- final TermState state = termStates.get(context.ord);
- if (state == null) { // term is not present in that reader
- assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
- return null;
+ if (termStates != null) {
+ // TermQuery either used as a Query or the term states have been provided at construction time
+ assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
+ final TermState state = termStates.get(context.ord);
+ if (state == null) { // term is not present in that reader
+ assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
+ return null;
+ }
+ final TermsEnum termsEnum = context.reader().terms(term.field()).iterator();
+ termsEnum.seekExact(term.bytes(), state);
+ return termsEnum;
+ } else {
+ // TermQuery used as a filter, so the term states have not been built up front
+ Terms terms = context.reader().terms(term.field());
+ if (terms == null) {
+ return null;
+ }
+ final TermsEnum termsEnum = terms.iterator();
+ if (termsEnum.seekExact(term.bytes())) {
+ return termsEnum;
+ } else {
+ return null;
+ }
}
- // System.out.println("LD=" + reader.getLiveDocs() + " set?=" +
- // (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
- final TermsEnum termsEnum = context.reader().terms(term.field())
- .iterator();
- termsEnum.seekExact(term.bytes(), state);
- return termsEnum;
}
private boolean termNotInReader(LeafReader reader, Term term) throws IOException {
@@ -168,9 +182,15 @@ public class TermQuery extends Query {
final TermContext termState;
if (perReaderTermState == null
|| perReaderTermState.topReaderContext != context) {
- // make TermQuery single-pass if we don't have a PRTS or if the context
- // differs!
- termState = TermContext.build(context, term);
+ if (needsScores) {
+ // make TermQuery single-pass if we don't have a PRTS or if the context
+ // differs!
+ termState = TermContext.build(context, term);
+ } else {
+ // do not compute the term state, this will help save seeks in the terms
+ // dict on segments that have a cache entry for this query
+ termState = null;
+ }
} else {
// PRTS was pre-build for this IS
termState = this.perReaderTermState;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/71541bcd/lucene/core/src/test/org/apache/lucene/search/TestTermQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestTermQuery.java
new file mode 100644
index 0000000..a994118
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTermQuery.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterDirectoryReader;
+import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestTermQuery extends LuceneTestCase {
+
+ public void testEquals() throws IOException {
+ QueryUtils.checkEqual(
+ new TermQuery(new Term("foo", "bar")),
+ new TermQuery(new Term("foo", "bar")));
+ QueryUtils.checkUnequal(
+ new TermQuery(new Term("foo", "bar")),
+ new TermQuery(new Term("foo", "baz")));
+ QueryUtils.checkEqual(
+ new TermQuery(new Term("foo", "bar")),
+ new TermQuery(new Term("foo", "bar"), TermContext.build(new MultiReader().getContext(), new Term("foo", "bar"))));
+ }
+
+ public void testCreateWeightDoesNotSeekIfScoresAreNotNeeded() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
+ // segment that contains the term
+ Document doc = new Document();
+ doc.add(new StringField("foo", "bar", Store.NO));
+ w.addDocument(doc);
+ w.getReader().close();
+ // segment that does not contain the term
+ doc = new Document();
+ doc.add(new StringField("foo", "baz", Store.NO));
+ w.addDocument(doc);
+ w.getReader().close();
+ // segment that does not contain the field
+ w.addDocument(new Document());
+
+ DirectoryReader reader = w.getReader();
+ FilterDirectoryReader noSeekReader = new NoSeekDirectoryReader(reader);
+ IndexSearcher noSeekSearcher = new IndexSearcher(noSeekReader);
+ Query query = new TermQuery(new Term("foo", "bar"));
+ AssertionError e = expectThrows(AssertionError.class,
+ () -> noSeekSearcher.createNormalizedWeight(query, true));
+ assertEquals("no seek", e.getMessage());
+
+ noSeekSearcher.createNormalizedWeight(query, false); // no exception
+ IndexSearcher searcher = new IndexSearcher(reader);
+ // use a collector rather than searcher.count() which would just read the
+ // doc freq instead of creating a scorer
+ TotalHitCountCollector collector = new TotalHitCountCollector();
+ searcher.search(query, collector);
+ assertEquals(1, collector.getTotalHits());
+ TermQuery queryWithContext = new TermQuery(new Term("foo", "bar"),
+ TermContext.build(reader.getContext(), new Term("foo", "bar")));
+ collector = new TotalHitCountCollector();
+ searcher.search(queryWithContext, collector);
+ assertEquals(1, collector.getTotalHits());
+
+ IOUtils.close(reader, w, dir);
+ }
+
+ private static class NoSeekDirectoryReader extends FilterDirectoryReader {
+
+ public NoSeekDirectoryReader(DirectoryReader in) throws IOException {
+ super(in, new SubReaderWrapper() {
+ @Override
+ public LeafReader wrap(LeafReader reader) {
+ return new NoSeekLeafReader(reader);
+ }
+ });
+ }
+
+ @Override
+ protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
+ return new NoSeekDirectoryReader(in);
+ }
+
+ }
+
+ private static class NoSeekLeafReader extends FilterLeafReader {
+
+ public NoSeekLeafReader(LeafReader in) {
+ super(in);
+ }
+
+ @Override
+ public Fields fields() throws IOException {
+ return new FilterFields(super.fields()) {
+ @Override
+ public Terms terms(String field) throws IOException {
+ return new FilterTerms(super.terms(field)) {
+ @Override
+ public TermsEnum iterator() throws IOException {
+ return new FilterTermsEnum(super.iterator()) {
+ @Override
+ public SeekStatus seekCeil(BytesRef text) throws IOException {
+ throw new AssertionError("no seek");
+ }
+ @Override
+ public void seekExact(BytesRef term, TermState state) throws IOException {
+ throw new AssertionError("no seek");
+ }
+ @Override
+ public boolean seekExact(BytesRef text) throws IOException {
+ throw new AssertionError("no seek");
+ }
+ @Override
+ public void seekExact(long ord) throws IOException {
+ throw new AssertionError("no seek");
+ }
+ };
+ }
+ };
+ }
+ };
+ }
+
+ };
+
+}