You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/02/23 18:55:29 UTC
svn commit: r1661727 - in /lucene/dev/trunk/lucene: CHANGES.txt
core/src/java/org/apache/lucene/search/PhrasePositions.java
core/src/java/org/apache/lucene/search/PhraseQueue.java
core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
Author: jpountz
Date: Mon Feb 23 17:55:29 2015
New Revision: 1661727
URL: http://svn.apache.org/r1661727
Log:
LUCENE-6275: Use ConjunctionDISI in SloppyPhraseScorer.
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1661727&r1=1661726&r2=1661727&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Feb 23 17:55:29 2015
@@ -104,6 +104,10 @@ Optimizations
* LUCENE-6263: MultiCollector automatically caches scores when several
collectors need them. (Adrien Grand)
+* LUCENE-6275: SloppyPhraseScorer now uses the same logic as ConjunctionScorer
+ in order to advance doc IDs, which takes advantage of the cost() API.
+ (Adrien Grand)
+
API Changes
* LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files()
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java?rev=1661727&r1=1661726&r2=1661727&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java Mon Feb 23 17:55:29 2015
@@ -24,7 +24,6 @@ import org.apache.lucene.index.*;
* Position of a term in a document that takes into account the term offset within the phrase.
*/
final class PhrasePositions {
- int doc; // current doc
int position; // position in doc
int count; // remaining pos in this doc
int offset; // position in phrase
@@ -42,22 +41,6 @@ final class PhrasePositions {
this.terms = terms;
}
- final boolean next() throws IOException { // increments to next doc
- doc = postings.nextDoc();
- if (doc == DocIdSetIterator.NO_MORE_DOCS) {
- return false;
- }
- return true;
- }
-
- final boolean skipTo(int target) throws IOException {
- doc = postings.advance(target);
- if (doc == DocIdSetIterator.NO_MORE_DOCS) {
- return false;
- }
- return true;
- }
-
final void firstPosition() throws IOException {
count = postings.freq(); // read first pos
nextPosition();
@@ -80,7 +63,7 @@ final class PhrasePositions {
/** for debug purposes */
@Override
public String toString() {
- String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count;
+ String s = "o:"+offset+" p:"+position+" c:"+count;
if (rptGroup >=0 ) {
s += " rpt:"+rptGroup+",i"+rptInd;
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java?rev=1661727&r1=1661726&r2=1661727&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java Mon Feb 23 17:55:29 2015
@@ -26,20 +26,16 @@ final class PhraseQueue extends Priority
@Override
protected final boolean lessThan(PhrasePositions pp1, PhrasePositions pp2) {
- if (pp1.doc == pp2.doc)
- if (pp1.position == pp2.position)
- // same doc and pp.position, so decide by actual term positions.
- // rely on: pp.position == tp.position - offset.
- if (pp1.offset == pp2.offset) {
- return pp1.ord < pp2.ord;
- } else {
- return pp1.offset < pp2.offset;
- }
- else {
- return pp1.position < pp2.position;
+ if (pp1.position == pp2.position)
+ // same doc and pp.position, so decide by actual term positions.
+ // rely on: pp.position == tp.position - offset.
+ if (pp1.offset == pp2.offset) {
+ return pp1.ord < pp2.ord;
+ } else {
+ return pp1.offset < pp2.offset;
}
else {
- return pp1.doc < pp2.doc;
+ return pp1.position < pp2.position;
}
}
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java?rev=1661727&r1=1661726&r2=1661727&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java Mon Feb 23 17:55:29 2015
@@ -30,7 +30,9 @@ import org.apache.lucene.search.similari
import org.apache.lucene.util.FixedBitSet;
final class SloppyPhraseScorer extends Scorer {
- private PhrasePositions min, max;
+
+ private final ConjunctionDISI conjunction;
+ private final PhrasePositions[] phrasePositions;
private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq().
@@ -49,7 +51,6 @@ final class SloppyPhraseScorer extends S
private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps
private int numMatches;
- private final long cost;
final boolean needsScores;
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
@@ -60,25 +61,13 @@ final class SloppyPhraseScorer extends S
this.slop = slop;
this.numPostings = postings==null ? 0 : postings.length;
pq = new PhraseQueue(postings.length);
- // min(cost)
- cost = postings[0].postings.cost();
- // convert tps to a list of phrase positions.
- // note: phrase-position differs from term-position in that its position
- // reflects the phrase offset: pp.pos = tp.pos - offset.
- // this allows to easily identify a matching (exact) phrase
- // when all PhrasePositions have exactly the same position.
- if (postings.length > 0) {
- min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
- max = min;
- max.doc = -1;
- for (int i = 1; i < postings.length; i++) {
- PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
- max.next = pp;
- max = pp;
- max.doc = -1;
- }
- max.next = min; // make it cyclic for easier manipulation
+ DocIdSetIterator[] iterators = new DocIdSetIterator[postings.length];
+ phrasePositions = new PhrasePositions[postings.length];
+ for (int i = 0; i < postings.length; ++i) {
+ iterators[i] = postings[i].postings;
+ phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
}
+ conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
}
/**
@@ -245,7 +234,7 @@ final class SloppyPhraseScorer extends S
//System.err.println("initSimple: doc: "+min.doc);
pq.clear();
// position pps and build queue from list
- for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+ for (PhrasePositions pp : phrasePositions) {
pp.firstPosition();
if (pp.position > end) {
end = pp.position;
@@ -267,7 +256,7 @@ final class SloppyPhraseScorer extends S
/** move all PPs to their first position */
private void placeFirstPositions() throws IOException {
- for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+ for (PhrasePositions pp : phrasePositions) {
pp.firstPosition();
}
}
@@ -275,7 +264,7 @@ final class SloppyPhraseScorer extends S
/** Fill the queue (all pps are already placed */
private void fillQueue() {
pq.clear();
- for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+ for (PhrasePositions pp : phrasePositions) { // iterate cyclic list: done once handled max
if (pp.position > end) {
end = pp.position;
}
@@ -448,7 +437,7 @@ final class SloppyPhraseScorer extends S
private LinkedHashMap<Term,Integer> repeatingTerms() {
LinkedHashMap<Term,Integer> tord = new LinkedHashMap<>();
HashMap<Term,Integer> tcnt = new HashMap<>();
- for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+ for (PhrasePositions pp : phrasePositions) {
for (Term t : pp.terms) {
Integer cnt0 = tcnt.get(t);
Integer cnt = cnt0==null ? new Integer(1) : new Integer(1+cnt0.intValue());
@@ -464,7 +453,7 @@ final class SloppyPhraseScorer extends S
/** find repeating pps, and for each, if has multi-terms, update this.hasMultiTermRpts */
private PhrasePositions[] repeatingPPs(HashMap<Term,Integer> rptTerms) {
ArrayList<PhrasePositions> rp = new ArrayList<>();
- for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+ for (PhrasePositions pp : phrasePositions) {
for (Term t : pp.terms) {
if (rptTerms.containsKey(t)) {
rp.add(pp);
@@ -553,55 +542,47 @@ final class SloppyPhraseScorer extends S
// }
// }
- private boolean advanceMin(int target) throws IOException {
- if (!min.skipTo(target)) {
- max.doc = NO_MORE_DOCS; // for further calls to docID()
- return false;
- }
- min = min.next; // cyclic
- max = max.next; // cyclic
- return true;
- }
@Override
public int docID() {
- return max.doc;
+ return conjunction.docID();
}
@Override
public int nextDoc() throws IOException {
- return advance(max.doc + 1); // advance to the next doc after #docID()
+ int doc;
+ for (doc = conjunction.nextDoc(); doc != NO_MORE_DOCS; doc = conjunction.nextDoc()) {
+ sloppyFreq = phraseFreq(); // check for phrase
+ if (sloppyFreq != 0f) {
+ break;
+ }
+ }
+
+ return doc;
}
@Override
public float score() {
- return docScorer.score(max.doc, sloppyFreq);
+ return docScorer.score(docID(), sloppyFreq);
}
@Override
public int advance(int target) throws IOException {
assert target > docID();
- do {
- if (!advanceMin(target)) {
- return NO_MORE_DOCS;
- }
- while (min.doc < max.doc) {
- if (!advanceMin(max.doc)) {
- return NO_MORE_DOCS;
- }
- }
- // found a doc with all of the terms
+ int doc;
+ for (doc = conjunction.advance(target); doc != NO_MORE_DOCS; doc = conjunction.nextDoc()) {
sloppyFreq = phraseFreq(); // check for phrase
- target = min.doc + 1; // next target in case sloppyFreq is still 0
- } while (sloppyFreq == 0f);
+ if (sloppyFreq != 0f) {
+ break;
+ }
+ }
- // found a match
- return max.doc;
+ return doc;
}
@Override
public long cost() {
- return cost;
+ return conjunction.cost();
}
@Override
@@ -612,36 +593,7 @@ final class SloppyPhraseScorer extends S
return new TwoPhaseDocIdSetIterator() {
@Override
public DocIdSetIterator approximation() {
- return new DocIdSetIterator() {
- @Override
- public int docID() {
- return SloppyPhraseScorer.this.docID();
- }
-
- @Override
- public int nextDoc() throws IOException {
- return advance(max.doc + 1);
- }
-
- @Override
- public int advance(int target) throws IOException {
- assert target > docID();
- if (!advanceMin(target)) {
- return NO_MORE_DOCS;
- }
- while (min.doc < max.doc) {
- if (!advanceMin(max.doc)) {
- return NO_MORE_DOCS;
- }
- }
- return max.doc;
- }
-
- @Override
- public long cost() {
- return SloppyPhraseScorer.this.cost();
- }
- };
+ return conjunction;
}
@Override