You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/07/10 14:35:58 UTC
svn commit: r1609453 - in
/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene:
codecs/lucene41/Lucene41PostingsReader.java search/ExactPhraseScorer.java
search/MultiPhraseQuery.java search/PhraseQuery.java
Author: rmuir
Date: Thu Jul 10 12:35:58 2014
New Revision: 1609453
URL: http://svn.apache.org/r1609453
Log:
LUCENE-5809: Simplify ExactPhraseScorer
Modified:
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java?rev=1609453&r1=1609452&r2=1609453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java Thu Jul 10 12:35:58 2014
@@ -656,7 +656,11 @@ public final class Lucene41PostingsReade
doc = -1;
accum = 0;
docUpto = 0;
- nextSkipDoc = BLOCK_SIZE - 1;
+ if (docFreq > BLOCK_SIZE) {
+ nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
+ } else {
+ nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
+ }
docBufferUpto = BLOCK_SIZE;
skipped = false;
return this;
@@ -781,7 +785,7 @@ public final class Lucene41PostingsReade
// System.out.println(" FPR.advance target=" + target);
// }
- if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
+ if (target > nextSkipDoc) {
// if (DEBUG) {
// System.out.println(" try skipper");
// }
@@ -1117,7 +1121,11 @@ public final class Lucene41PostingsReade
doc = -1;
accum = 0;
docUpto = 0;
- nextSkipDoc = BLOCK_SIZE - 1;
+ if (docFreq > BLOCK_SIZE) {
+ nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
+ } else {
+ nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
+ }
docBufferUpto = BLOCK_SIZE;
skipped = false;
return this;
@@ -1301,7 +1309,7 @@ public final class Lucene41PostingsReade
// System.out.println(" FPR.advance target=" + target);
// }
- if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
+ if (target > nextSkipDoc) {
// if (DEBUG) {
// System.out.println(" try skipper");
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java?rev=1609453&r1=1609452&r2=1609453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java Thu Jul 10 12:35:58 2014
@@ -32,26 +32,24 @@ final class ExactPhraseScorer extends Sc
private final int[] counts = new int[CHUNK];
private final int[] gens = new int[CHUNK];
- boolean noDocs;
private final long cost;
private final static class ChunkState {
final DocsAndPositionsEnum posEnum;
final int offset;
- final boolean useAdvance;
int posUpto;
int posLimit;
int pos;
int lastPos;
- public ChunkState(DocsAndPositionsEnum posEnum, int offset, boolean useAdvance) {
+ public ChunkState(DocsAndPositionsEnum posEnum, int offset) {
this.posEnum = posEnum;
this.offset = offset;
- this.useAdvance = useAdvance;
}
}
private final ChunkState[] chunkStates;
+ private final DocsAndPositionsEnum lead;
private int docID = -1;
private int freq;
@@ -67,119 +65,53 @@ final class ExactPhraseScorer extends Sc
endMinus1 = postings.length-1;
+ lead = postings[0].postings;
// min(cost)
- cost = postings[0].postings.cost();
+ cost = lead.cost();
for(int i=0;i<postings.length;i++) {
-
- // Coarse optimization: advance(target) is fairly
- // costly, so, if the relative freq of the 2nd
- // rarest term is not that much (> 1/5th) rarer than
- // the first term, then we just use .nextDoc() when
- // ANDing. This buys ~15% gain for phrases where
- // freq of rarest 2 terms is close:
- final boolean useAdvance = postings[i].docFreq > 5*postings[0].docFreq;
- chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position, useAdvance);
- if (i > 0 && postings[i].postings.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
- noDocs = true;
- return;
- }
+ chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
}
}
-
- @Override
- public int nextDoc() throws IOException {
- while(true) {
-
- // first (rarest) term
- final int doc = chunkStates[0].posEnum.nextDoc();
- if (doc == DocIdSetIterator.NO_MORE_DOCS) {
- docID = doc;
- return doc;
- }
-
- // not-first terms
- int i = 1;
- while(i < chunkStates.length) {
- final ChunkState cs = chunkStates[i];
- int doc2 = cs.posEnum.docID();
- if (cs.useAdvance) {
- if (doc2 < doc) {
- doc2 = cs.posEnum.advance(doc);
- }
- } else {
- int iter = 0;
- while(doc2 < doc) {
- // safety net -- fallback to .advance if we've
- // done too many .nextDocs
- if (++iter == 50) {
- doc2 = cs.posEnum.advance(doc);
- break;
- } else {
- doc2 = cs.posEnum.nextDoc();
+
+ private int doNext(int doc) throws IOException {
+ for(;;) {
+ // TODO: don't dup this logic from conjunctionscorer :)
+ advanceHead: for(;;) {
+ for (int i = 1; i < chunkStates.length; i++) {
+ final DocsAndPositionsEnum de = chunkStates[i].posEnum;
+ if (de.docID() < doc) {
+ int d = de.advance(doc);
+
+ if (d > doc) {
+ // DocsEnum beyond the current doc - break and advance lead to the new highest doc.
+ doc = d;
+ break advanceHead;
}
}
}
- if (doc2 > doc) {
- break;
- }
- i++;
- }
-
- if (i == chunkStates.length) {
- // this doc has all the terms -- now test whether
- // phrase occurs
- docID = doc;
-
- freq = phraseFreq();
- if (freq != 0) {
- return docID;
+ // all DocsEnums are on the same doc
+ if (doc == NO_MORE_DOCS) {
+ return doc;
+ } else if (phraseFreq() > 0) {
+ return doc; // success: matches phrase
+ } else {
+ doc = lead.nextDoc(); // doesn't match phrase
}
}
+ // advance head for next iteration
+ doc = lead.advance(doc);
}
}
@Override
- public int advance(int target) throws IOException {
-
- // first term
- int doc = chunkStates[0].posEnum.advance(target);
- if (doc == DocIdSetIterator.NO_MORE_DOCS) {
- docID = DocIdSetIterator.NO_MORE_DOCS;
- return doc;
- }
-
- while(true) {
-
- // not-first terms
- int i = 1;
- while(i < chunkStates.length) {
- int doc2 = chunkStates[i].posEnum.docID();
- if (doc2 < doc) {
- doc2 = chunkStates[i].posEnum.advance(doc);
- }
- if (doc2 > doc) {
- break;
- }
- i++;
- }
-
- if (i == chunkStates.length) {
- // this doc has all the terms -- now test whether
- // phrase occurs
- docID = doc;
- freq = phraseFreq();
- if (freq != 0) {
- return docID;
- }
- }
+ public int nextDoc() throws IOException {
+ return docID = doNext(lead.nextDoc());
+ }
- doc = chunkStates[0].posEnum.nextDoc();
- if (doc == DocIdSetIterator.NO_MORE_DOCS) {
- docID = doc;
- return doc;
- }
- }
+ @Override
+ public int advance(int target) throws IOException {
+ return docID = doNext(lead.advance(target));
}
@Override
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1609453&r1=1609452&r2=1609453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java Thu Jul 10 12:35:58 2014
@@ -249,12 +249,7 @@ public class MultiPhraseQuery extends Qu
}
if (slop == 0) {
- ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
- if (s.noDocs) {
- return null;
- } else {
- return s;
- }
+ return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
} else {
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
}
@@ -472,7 +467,7 @@ class UnionDocsAndPositionsEnum extends
}
}
- private int _doc;
+ private int _doc = -1;
private int _freq;
private DocsQueue _queue;
private IntQueue _posList;
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1609453&r1=1609452&r2=1609453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java Thu Jul 10 12:35:58 2014
@@ -285,15 +285,9 @@ public class PhraseQuery extends Query {
}
if (slop == 0) { // optimize exact case
- ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
- if (s.noDocs) {
- return null;
- } else {
- return s;
- }
+ return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
} else {
- return
- new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
+ return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
}
}