You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/02/20 16:41:26 UTC
svn commit: r1661144 -
/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
Author: jpountz
Date: Fri Feb 20 15:41:26 2015
New Revision: 1661144
URL: http://svn.apache.org/r1661144
Log:
LUCENE-6260: Simplify ExactPhraseScorer.
Modified:
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java?rev=1661144&r1=1661143&r2=1661144&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java Fri Feb 20 15:41:26 2015
@@ -19,7 +19,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
import org.apache.lucene.index.PostingsEnum;
@@ -27,60 +26,40 @@ import org.apache.lucene.search.similari
import org.apache.lucene.util.BytesRef;
final class ExactPhraseScorer extends Scorer {
- private final int endMinus1;
- private final static int CHUNK = 4096;
+ private static class PostingsAndPosition {
+ private final PostingsEnum postings;
+ private final int offset;
+ private int freq, upTo, pos;
- private int gen;
- private final int[] counts = new int[CHUNK];
- private final int[] gens = new int[CHUNK];
-
- private final long cost;
-
- private final static class ChunkState {
- final PostingsEnum posEnum;
- final int offset;
- int posUpto;
- int posLimit;
- int pos;
- int lastPos;
-
- public ChunkState(PostingsEnum posEnum, int offset) {
- this.posEnum = posEnum;
+ public PostingsAndPosition(PostingsEnum postings, int offset) {
+ this.postings = postings;
this.offset = offset;
}
}
private final ConjunctionDISI conjunction;
-
- private final ChunkState[] chunkStates;
- private final PostingsEnum lead;
+ private final PostingsAndPosition[] postings;
private int freq;
private final Similarity.SimScorer docScorer;
private final boolean needsScores;
-
+
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.SimScorer docScorer, boolean needsScores) throws IOException {
super(weight);
this.docScorer = docScorer;
this.needsScores = needsScores;
- chunkStates = new ChunkState[postings.length];
-
- endMinus1 = postings.length-1;
-
- lead = postings[0].postings;
- // min(cost)
- cost = lead.cost();
-
List<DocIdSetIterator> iterators = new ArrayList<>();
- for(int i=0;i<postings.length;i++) {
- chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
- iterators.add(postings[i].postings);
+ List<PostingsAndPosition> postingsAndPositions = new ArrayList<>();
+ for(PhraseQuery.PostingsAndFreq posting : postings) {
+ iterators.add(posting.postings);
+ postingsAndPositions.add(new PostingsAndPosition(posting.postings, posting.position));
}
conjunction = ConjunctionDISI.intersect(iterators);
+ this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
}
@Override
@@ -157,129 +136,71 @@ final class ExactPhraseScorer extends Sc
return docScorer.score(docID(), freq);
}
- private int phraseFreq() throws IOException {
-
- freq = 0;
-
- // init chunks
- for(int i=0;i<chunkStates.length;i++) {
- final ChunkState cs = chunkStates[i];
- cs.posLimit = cs.posEnum.freq();
- cs.pos = cs.offset + cs.posEnum.nextPosition();
- cs.posUpto = 1;
- cs.lastPos = -1;
+ /** Advance the given pos enum to the first doc on or after {@code target}.
+ * Return {@code false} if the enum was exhausted before reaching
+ * {@code target} and {@code true} otherwise. */
+ private static boolean advancePosition(PostingsAndPosition posting, int target) throws IOException {
+ while (posting.pos < target) {
+ if (posting.upTo == posting.freq) {
+ return false;
+ } else {
+ posting.pos = posting.postings.nextPosition();
+ posting.upTo += 1;
+ }
}
+ return true;
+ }
- int chunkStart = 0;
- int chunkEnd = CHUNK;
-
- // process chunk by chunk
- boolean end = false;
-
- // TODO: we could fold in chunkStart into offset and
- // save one subtract per pos incr
-
- while(!end) {
-
- gen++;
-
- if (gen == 0) {
- // wraparound
- Arrays.fill(gens, 0);
- gen++;
- }
+ private int phraseFreq() throws IOException {
+ // reset state
+ final PostingsAndPosition[] postings = this.postings;
+ for (PostingsAndPosition posting : postings) {
+ posting.freq = posting.postings.freq();
+ posting.pos = posting.postings.nextPosition();
+ posting.upTo = 1;
+ }
- // first term
- {
- final ChunkState cs = chunkStates[0];
- while(cs.pos < chunkEnd) {
- if (cs.pos > cs.lastPos) {
- cs.lastPos = cs.pos;
- final int posIndex = cs.pos - chunkStart;
- counts[posIndex] = 1;
- assert gens[posIndex] != gen;
- gens[posIndex] = gen;
- }
+ int freq = 0;
+ final PostingsAndPosition lead = postings[0];
- if (cs.posUpto == cs.posLimit) {
- end = true;
- break;
- }
- cs.posUpto++;
- cs.pos = cs.offset + cs.posEnum.nextPosition();
+ advanceHead:
+ while (true) {
+ final int phrasePos = lead.pos - lead.offset;
+ for (int j = 1; j < postings.length; ++j) {
+ final PostingsAndPosition posting = postings[j];
+ final int expectedPos = phrasePos + posting.offset;
+
+ // advance up to the same position as the lead
+ if (advancePosition(posting, expectedPos) == false) {
+ break advanceHead;
}
- }
-
- // middle terms
- boolean any = true;
- for(int t=1;t<endMinus1;t++) {
- final ChunkState cs = chunkStates[t];
- any = false;
- while(cs.pos < chunkEnd) {
- if (cs.pos > cs.lastPos) {
- cs.lastPos = cs.pos;
- final int posIndex = cs.pos - chunkStart;
- if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == t) {
- // viable
- counts[posIndex]++;
- any = true;
- }
- }
- if (cs.posUpto == cs.posLimit) {
- end = true;
- break;
+ if (posting.pos != expectedPos) { // we advanced too far
+ if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) {
+ continue advanceHead;
+ } else {
+ break advanceHead;
}
- cs.posUpto++;
- cs.pos = cs.offset + cs.posEnum.nextPosition();
- }
-
- if (!any) {
- break;
}
}
- if (!any) {
- // petered out for this chunk
- chunkStart += CHUNK;
- chunkEnd += CHUNK;
- continue;
+ freq += 1;
+ if (needsScores == false) {
+ break;
}
- // last term
-
- {
- final ChunkState cs = chunkStates[endMinus1];
- while(cs.pos < chunkEnd) {
- if (cs.pos > cs.lastPos) {
- cs.lastPos = cs.pos;
- final int posIndex = cs.pos - chunkStart;
- if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1) {
- freq++;
- if (!needsScores) {
- return freq; // we determined there was a match.
- }
- }
- }
-
- if (cs.posUpto == cs.posLimit) {
- end = true;
- break;
- }
- cs.posUpto++;
- cs.pos = cs.offset + cs.posEnum.nextPosition();
- }
+ if (lead.upTo == lead.freq) {
+ break;
}
-
- chunkStart += CHUNK;
- chunkEnd += CHUNK;
+ lead.pos = lead.postings.nextPosition();
+ lead.upTo += 1;
}
- return freq;
+ return this.freq = freq;
}
@Override
public long cost() {
- return cost;
+ return conjunction.cost();
}
}