You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/01/03 10:51:57 UTC
[09/18] lucene-solr:jira/solr-11702: LUCENE-7993: Faster phrases if
total hit counts are not required.
LUCENE-7993: Faster phrases if total hit counts are not required.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c95dc6d9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c95dc6d9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c95dc6d9
Branch: refs/heads/jira/solr-11702
Commit: c95dc6d95743f4a9a1ffe9baa04c3a9d1e3acdf9
Parents: b2f2481
Author: Adrien Grand <jp...@gmail.com>
Authored: Fri Dec 29 09:14:32 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Fri Dec 29 10:06:00 2017 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 ++
.../apache/lucene/search/ExactPhraseScorer.java | 23 ++++++++--
.../apache/lucene/search/MultiPhraseQuery.java | 14 +++---
.../org/apache/lucene/search/PhraseQuery.java | 14 +++---
.../apache/lucene/search/TestPhraseQuery.java | 47 ++++++++++++++++++++
5 files changed, 84 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c95dc6d9/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0da2cfe..ef7e005 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -61,6 +61,9 @@ Optimizations
* LUCENE-4100: Disjunctions now support faster collection of top hits when the
total hit count is not required. (Stefan Pohl, Adrien Grand, Robert Muir)
+* LUCENE-7993: Phrase queries are now faster if total hit counts are not
+ required. (Adrien Grand)
+
======================= Lucene 7.3.0 =======================
API Changes
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c95dc6d9/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
index 85a242e..f4a7ca7 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@@ -43,15 +43,17 @@ final class ExactPhraseScorer extends Scorer {
private int freq;
private final Similarity.SimScorer docScorer;
- private final boolean needsScores;
+ private final boolean needsScores, needsTotalHitCount;
private float matchCost;
+ private float minCompetitiveScore;
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
- Similarity.SimScorer docScorer, boolean needsScores,
+ Similarity.SimScorer docScorer, ScoreMode scoreMode,
float matchCost) throws IOException {
super(weight);
this.docScorer = docScorer;
- this.needsScores = needsScores;
+ this.needsScores = scoreMode.needsScores();
+ this.needsTotalHitCount = scoreMode != ScoreMode.TOP_SCORES;
List<DocIdSetIterator> iterators = new ArrayList<>();
List<PostingsAndPosition> postingsAndPositions = new ArrayList<>();
@@ -66,10 +68,25 @@ final class ExactPhraseScorer extends Scorer {
}
@Override
+ public void setMinCompetitiveScore(float minScore) {
+ minCompetitiveScore = minScore;
+ }
+
+ @Override
public TwoPhaseIterator twoPhaseIterator() {
return new TwoPhaseIterator(conjunction) {
@Override
public boolean matches() throws IOException {
+ if (needsTotalHitCount == false && minCompetitiveScore > 0) {
+ int minFreq = postings[0].postings.freq();
+ for (int i = 1; i < postings.length; ++i) {
+ minFreq = Math.min(postings[i].postings.freq(), minFreq);
+ }
+ if (docScorer.score(docID(), minFreq) < minCompetitiveScore) {
+ // The maximum score we could get is less than the min competitive score
+ return false;
+ }
+ }
return phraseFreq() > 0;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c95dc6d9/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
index 44a5ad0..34361a7 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
@@ -185,13 +185,13 @@ public class MultiPhraseQuery extends Query {
private final Similarity similarity;
private final Similarity.SimWeight stats;
private final Map<Term,TermContext> termContexts = new HashMap<>();
- private final boolean needsScores;
+ private final ScoreMode scoreMode;
- public MultiPhraseWeight(IndexSearcher searcher, boolean needsScores, float boost)
+ public MultiPhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
super(MultiPhraseQuery.this);
- this.needsScores = needsScores;
- this.similarity = searcher.getSimilarity(needsScores);
+ this.scoreMode = scoreMode;
+ this.similarity = searcher.getSimilarity(scoreMode.needsScores());
final IndexReaderContext context = searcher.getTopReaderContext();
// compute idf
@@ -283,11 +283,11 @@ public class MultiPhraseQuery extends Query {
if (slop == 0) {
return new ExactPhraseScorer(this, postingsFreqs,
similarity.simScorer(stats, context),
- needsScores, totalMatchCost);
+ scoreMode, totalMatchCost);
} else {
return new SloppyPhraseScorer(this, postingsFreqs, slop,
similarity.simScorer(stats, context),
- needsScores, totalMatchCost);
+ scoreMode.needsScores(), totalMatchCost);
}
}
@@ -335,7 +335,7 @@ public class MultiPhraseQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
- return new MultiPhraseWeight(searcher, scoreMode.needsScores(), boost);
+ return new MultiPhraseWeight(searcher, scoreMode, boost);
}
/** Prints a user-readable version of this query. */
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c95dc6d9/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
index e0b60be..3d359b4 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
@@ -353,10 +353,10 @@ public class PhraseQuery extends Query {
private class PhraseWeight extends Weight {
private final Similarity similarity;
private final Similarity.SimWeight stats;
- private final boolean needsScores;
+ private final ScoreMode scoreMode;
private transient TermContext states[];
- public PhraseWeight(IndexSearcher searcher, boolean needsScores, float boost)
+ public PhraseWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
super(PhraseQuery.this);
final int[] positions = PhraseQuery.this.getPositions();
@@ -365,8 +365,8 @@ public class PhraseQuery extends Query {
} else if (positions[0] != 0) {
throw new IllegalStateException("PhraseWeight requires that the first position is 0, call rewrite first");
}
- this.needsScores = needsScores;
- this.similarity = searcher.getSimilarity(needsScores);
+ this.scoreMode = scoreMode;
+ this.similarity = searcher.getSimilarity(scoreMode.needsScores());
final IndexReaderContext context = searcher.getTopReaderContext();
states = new TermContext[terms.length];
TermStatistics termStats[] = new TermStatistics[terms.length];
@@ -434,11 +434,11 @@ public class PhraseQuery extends Query {
if (slop == 0) { // optimize exact case
return new ExactPhraseScorer(this, postingsFreqs,
similarity.simScorer(stats, context),
- needsScores, totalMatchCost);
+ scoreMode, totalMatchCost);
} else {
return new SloppyPhraseScorer(this, postingsFreqs, slop,
similarity.simScorer(stats, context),
- needsScores, totalMatchCost);
+ scoreMode.needsScores(), totalMatchCost);
}
}
@@ -510,7 +510,7 @@ public class PhraseQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
- return new PhraseWeight(searcher, scoreMode.needsScores(), boost);
+ return new PhraseWeight(searcher, scoreMode, boost);
}
/** Prints a user-readable version of this query. */
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c95dc6d9/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
index 2bbd0dd..eb31128 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
@@ -19,6 +19,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import java.util.Random;
@@ -33,7 +35,10 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@@ -713,4 +718,46 @@ public class TestPhraseQuery extends LuceneTestCase {
builder.add(new Term("field", "three"), 4);
});
}
+
+ static String[] DOCS = new String[] {
+ "a b c d e f g h",
+ "b c b",
+ "c d d d e f g b",
+ "c b a b c",
+ "a a b b c c d d",
+ "a b c d a b c d a b c d"
+ };
+
+ public void testTopPhrases() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+ String[] docs = Arrays.copyOf(DOCS, DOCS.length);
+ Collections.shuffle(Arrays.asList(docs), random());
+ for (String value : DOCS) {
+ Document doc = new Document();
+ doc.add(new TextField("f", value, Store.NO));
+ w.addDocument(doc);
+ }
+ IndexReader r = DirectoryReader.open(w);
+ w.close();
+ IndexSearcher searcher = newSearcher(r);
+ for (Query query : Arrays.asList(
+ new PhraseQuery("f", "b", "c"), // common phrase
+ new PhraseQuery("f", "e", "f"), // always appear next to each other
+ new PhraseQuery("f", "d", "d") // repeated term
+ )) {
+ for (int topN = 1; topN <= 2; ++topN) {
+ TopScoreDocCollector collector1 = TopScoreDocCollector.create(topN, null, true);
+ searcher.search(query, collector1);
+ ScoreDoc[] hits1 = collector1.topDocs().scoreDocs;
+ TopScoreDocCollector collector2 = TopScoreDocCollector.create(topN, null, false);
+ searcher.search(query, collector2);
+ ScoreDoc[] hits2 = collector2.topDocs().scoreDocs;
+ assertTrue("" + query, hits1.length > 0);
+ CheckHits.checkEqual(query, hits1, hits2);
+ }
+ }
+ r.close();
+ dir.close();
+ }
}