You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/02/20 16:41:26 UTC

svn commit: r1661144 - /lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java

Author: jpountz
Date: Fri Feb 20 15:41:26 2015
New Revision: 1661144

URL: http://svn.apache.org/r1661144
Log:
LUCENE-6260: Simplify ExactPhraseScorer.

Modified:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java?rev=1661144&r1=1661143&r2=1661144&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java Fri Feb 20 15:41:26 2015
@@ -19,7 +19,6 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 
 import org.apache.lucene.index.PostingsEnum;
@@ -27,60 +26,40 @@ import org.apache.lucene.search.similari
 import org.apache.lucene.util.BytesRef;
 
 final class ExactPhraseScorer extends Scorer {
-  private final int endMinus1;
 
-  private final static int CHUNK = 4096;
+  private static class PostingsAndPosition {
+    private final PostingsEnum postings;
+    private final int offset;
+    private int freq, upTo, pos;
 
-  private int gen;
-  private final int[] counts = new int[CHUNK];
-  private final int[] gens = new int[CHUNK];
-
-  private final long cost;
-
-  private final static class ChunkState {
-    final PostingsEnum posEnum;
-    final int offset;
-    int posUpto;
-    int posLimit;
-    int pos;
-    int lastPos;
-
-    public ChunkState(PostingsEnum posEnum, int offset) {
-      this.posEnum = posEnum;
+    public PostingsAndPosition(PostingsEnum postings, int offset) {
+      this.postings = postings;
       this.offset = offset;
     }
   }
 
   private final ConjunctionDISI conjunction;
-
-  private final ChunkState[] chunkStates;
-  private final PostingsEnum lead;
+  private final PostingsAndPosition[] postings;
 
   private int freq;
 
   private final Similarity.SimScorer docScorer;
   private final boolean needsScores;
-  
+
   ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
                     Similarity.SimScorer docScorer, boolean needsScores) throws IOException {
     super(weight);
     this.docScorer = docScorer;
     this.needsScores = needsScores;
 
-    chunkStates = new ChunkState[postings.length];
-
-    endMinus1 = postings.length-1;
-    
-    lead = postings[0].postings;
-    // min(cost)
-    cost = lead.cost();
-
     List<DocIdSetIterator> iterators = new ArrayList<>();
-    for(int i=0;i<postings.length;i++) {
-      chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
-      iterators.add(postings[i].postings);
+    List<PostingsAndPosition> postingsAndPositions = new ArrayList<>();
+    for(PhraseQuery.PostingsAndFreq posting : postings) {
+      iterators.add(posting.postings);
+      postingsAndPositions.add(new PostingsAndPosition(posting.postings, posting.position));
     }
     conjunction = ConjunctionDISI.intersect(iterators);
+    this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
   }
 
   @Override
@@ -157,129 +136,71 @@ final class ExactPhraseScorer extends Sc
     return docScorer.score(docID(), freq);
   }
 
-  private int phraseFreq() throws IOException {
-
-    freq = 0;
-
-    // init chunks
-    for(int i=0;i<chunkStates.length;i++) {
-      final ChunkState cs = chunkStates[i];
-      cs.posLimit = cs.posEnum.freq();
-      cs.pos = cs.offset + cs.posEnum.nextPosition();
-      cs.posUpto = 1;
-      cs.lastPos = -1;
+  /** Advance the given pos enum to the first doc on or after {@code target}.
+   *  Return {@code false} if the enum was exhausted before reaching
+   *  {@code target} and {@code true} otherwise. */
+  private static boolean advancePosition(PostingsAndPosition posting, int target) throws IOException {
+    while (posting.pos < target) {
+      if (posting.upTo == posting.freq) {
+        return false;
+      } else {
+        posting.pos = posting.postings.nextPosition();
+        posting.upTo += 1;
+      }
     }
+    return true;
+  }
 
-    int chunkStart = 0;
-    int chunkEnd = CHUNK;
-
-    // process chunk by chunk
-    boolean end = false;
-
-    // TODO: we could fold in chunkStart into offset and
-    // save one subtract per pos incr
-
-    while(!end) {
-
-      gen++;
-
-      if (gen == 0) {
-        // wraparound
-        Arrays.fill(gens, 0);
-        gen++;
-      }
+  private int phraseFreq() throws IOException {
+    // reset state
+    final PostingsAndPosition[] postings = this.postings;
+    for (PostingsAndPosition posting : postings) {
+      posting.freq = posting.postings.freq();
+      posting.pos = posting.postings.nextPosition();
+      posting.upTo = 1;
+    }
 
-      // first term
-      {
-        final ChunkState cs = chunkStates[0];
-        while(cs.pos < chunkEnd) {
-          if (cs.pos > cs.lastPos) {
-            cs.lastPos = cs.pos;
-            final int posIndex = cs.pos - chunkStart;
-            counts[posIndex] = 1;
-            assert gens[posIndex] != gen;
-            gens[posIndex] = gen;
-          }
+    int freq = 0;
+    final PostingsAndPosition lead = postings[0];
 
-          if (cs.posUpto == cs.posLimit) {
-            end = true;
-            break;
-          }
-          cs.posUpto++;
-          cs.pos = cs.offset + cs.posEnum.nextPosition();
+    advanceHead:
+    while (true) {
+      final int phrasePos = lead.pos - lead.offset;
+      for (int j = 1; j < postings.length; ++j) {
+        final PostingsAndPosition posting = postings[j];
+        final int expectedPos = phrasePos + posting.offset;
+
+        // advance up to the same position as the lead
+        if (advancePosition(posting, expectedPos) == false) {
+          break advanceHead;
         }
-      }
-
-      // middle terms
-      boolean any = true;
-      for(int t=1;t<endMinus1;t++) {
-        final ChunkState cs = chunkStates[t];
-        any = false;
-        while(cs.pos < chunkEnd) {
-          if (cs.pos > cs.lastPos) {
-            cs.lastPos = cs.pos;
-            final int posIndex = cs.pos - chunkStart;
-            if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == t) {
-              // viable
-              counts[posIndex]++;
-              any = true;
-            }
-          }
 
-          if (cs.posUpto == cs.posLimit) {
-            end = true;
-            break;
+        if (posting.pos != expectedPos) { // we advanced too far
+          if (advancePosition(lead, posting.pos - posting.offset + lead.offset)) {
+            continue advanceHead;
+          } else {
+            break advanceHead;
           }
-          cs.posUpto++;
-          cs.pos = cs.offset + cs.posEnum.nextPosition();
-        }
-
-        if (!any) {
-          break;
         }
       }
 
-      if (!any) {
-        // petered out for this chunk
-        chunkStart += CHUNK;
-        chunkEnd += CHUNK;
-        continue;
+      freq += 1;
+      if (needsScores == false) {
+        break;
       }
 
-      // last term
-
-      {
-        final ChunkState cs = chunkStates[endMinus1];
-        while(cs.pos < chunkEnd) {
-          if (cs.pos > cs.lastPos) {
-            cs.lastPos = cs.pos;
-            final int posIndex = cs.pos - chunkStart;
-            if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1) {
-              freq++;
-              if (!needsScores) {
-                return freq; // we determined there was a match.
-              }
-            }
-          }
-
-          if (cs.posUpto == cs.posLimit) {
-            end = true;
-            break;
-          }
-          cs.posUpto++;
-          cs.pos = cs.offset + cs.posEnum.nextPosition();
-        }
+      if (lead.upTo == lead.freq) {
+        break;
       }
-
-      chunkStart += CHUNK;
-      chunkEnd += CHUNK;
+      lead.pos = lead.postings.nextPosition();
+      lead.upTo += 1;
     }
 
-    return freq;
+    return this.freq = freq;
   }
 
   @Override
   public long cost() {
-    return cost;
+    return conjunction.cost();
   }
 }