You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/07/10 14:35:58 UTC
svn commit: r1609453 - in /lucene/dev/trunk/lucene/core/src/java/org/apache/lucene: codecs/lucene41/Lucene41PostingsReader.java search/ExactPhraseScorer.java search/MultiPhraseQuery.java search/PhraseQuery.java

Author: rmuir
Date: Thu Jul 10 12:35:58 2014
New Revision: 1609453

URL: http://svn.apache.org/r1609453
Log:
LUCENE-5809: Simplify ExactPhraseScorer

Modified:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java?rev=1609453&r1=1609452&r2=1609453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsReader.java Thu Jul 10 12:35:58 2014
@@ -656,7 +656,11 @@ public final class Lucene41PostingsReade
       doc = -1;
       accum = 0;
       docUpto = 0;
-      nextSkipDoc = BLOCK_SIZE - 1;
+      if (docFreq > BLOCK_SIZE) {
+        nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
+      } else {
+        nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
+      }
       docBufferUpto = BLOCK_SIZE;
       skipped = false;
       return this;
@@ -781,7 +785,7 @@ public final class Lucene41PostingsReade
       //   System.out.println("  FPR.advance target=" + target);
       // }
 
-      if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
+      if (target > nextSkipDoc) {
         // if (DEBUG) {
         //   System.out.println("    try skipper");
         // }
@@ -1117,7 +1121,11 @@ public final class Lucene41PostingsReade
       doc = -1;
       accum = 0;
       docUpto = 0;
-      nextSkipDoc = BLOCK_SIZE - 1;
+      if (docFreq > BLOCK_SIZE) {
+        nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
+      } else {
+        nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
+      }
       docBufferUpto = BLOCK_SIZE;
       skipped = false;
       return this;
@@ -1301,7 +1309,7 @@ public final class Lucene41PostingsReade
       //   System.out.println("  FPR.advance target=" + target);
       // }
 
-      if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
+      if (target > nextSkipDoc) {
 
         // if (DEBUG) {
         //   System.out.println("    try skipper");

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java?rev=1609453&r1=1609452&r2=1609453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java Thu Jul 10 12:35:58 2014
@@ -32,26 +32,24 @@ final class ExactPhraseScorer extends Sc
   private final int[] counts = new int[CHUNK];
   private final int[] gens = new int[CHUNK];
 
-  boolean noDocs;
   private final long cost;
 
   private final static class ChunkState {
     final DocsAndPositionsEnum posEnum;
     final int offset;
-    final boolean useAdvance;
     int posUpto;
     int posLimit;
     int pos;
     int lastPos;
 
-    public ChunkState(DocsAndPositionsEnum posEnum, int offset, boolean useAdvance) {
+    public ChunkState(DocsAndPositionsEnum posEnum, int offset) {
       this.posEnum = posEnum;
       this.offset = offset;
-      this.useAdvance = useAdvance;
     }
   }
 
   private final ChunkState[] chunkStates;
+  private final DocsAndPositionsEnum lead;
 
   private int docID = -1;
   private int freq;
@@ -67,119 +65,53 @@ final class ExactPhraseScorer extends Sc
 
     endMinus1 = postings.length-1;
     
+    lead = postings[0].postings;
     // min(cost)
-    cost = postings[0].postings.cost();
+    cost = lead.cost();
 
     for(int i=0;i<postings.length;i++) {
-
-      // Coarse optimization: advance(target) is fairly
-      // costly, so, if the relative freq of the 2nd
-      // rarest term is not that much (> 1/5th) rarer than
-      // the first term, then we just use .nextDoc() when
-      // ANDing.  This buys ~15% gain for phrases where
-      // freq of rarest 2 terms is close:
-      final boolean useAdvance = postings[i].docFreq > 5*postings[0].docFreq;
-      chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position, useAdvance);
-      if (i > 0 && postings[i].postings.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
-        noDocs = true;
-        return;
-      }
+      chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
     }
   }
-
-  @Override
-  public int nextDoc() throws IOException {
-    while(true) {
-
-      // first (rarest) term
-      final int doc = chunkStates[0].posEnum.nextDoc();
-      if (doc == DocIdSetIterator.NO_MORE_DOCS) {
-        docID = doc;
-        return doc;
-      }
-
-      // not-first terms
-      int i = 1;
-      while(i < chunkStates.length) {
-        final ChunkState cs = chunkStates[i];
-        int doc2 = cs.posEnum.docID();
-        if (cs.useAdvance) {
-          if (doc2 < doc) {
-            doc2 = cs.posEnum.advance(doc);
-          }
-        } else {
-          int iter = 0;
-          while(doc2 < doc) {
-            // safety net -- fallback to .advance if we've
-            // done too many .nextDocs
-            if (++iter == 50) {
-              doc2 = cs.posEnum.advance(doc);
-              break;
-            } else {
-              doc2 = cs.posEnum.nextDoc();
+  
+  private int doNext(int doc) throws IOException {
+    for(;;) {
+      // TODO: don't dup this logic from conjunctionscorer :)
+      advanceHead: for(;;) {
+        for (int i = 1; i < chunkStates.length; i++) {
+          final DocsAndPositionsEnum de = chunkStates[i].posEnum;
+          if (de.docID() < doc) {
+            int d = de.advance(doc);
+
+            if (d > doc) {
+              // DocsEnum beyond the current doc - break and advance lead to the new highest doc.
+              doc = d;
+              break advanceHead;
             }
           }
         }
-        if (doc2 > doc) {
-          break;
-        }
-        i++;
-      }
-
-      if (i == chunkStates.length) {
-        // this doc has all the terms -- now test whether
-        // phrase occurs
-        docID = doc;
-
-        freq = phraseFreq();
-        if (freq != 0) {
-          return docID;
+        // all DocsEnums are on the same doc
+        if (doc == NO_MORE_DOCS) {
+          return doc;
+        } else if (phraseFreq() > 0) {
+          return doc;            // success: matches phrase
+        } else {
+          doc = lead.nextDoc();  // doesn't match phrase
         }
       }
+      // advance head for next iteration
+      doc = lead.advance(doc);
     }
   }
 
   @Override
-  public int advance(int target) throws IOException {
-
-    // first term
-    int doc = chunkStates[0].posEnum.advance(target);
-    if (doc == DocIdSetIterator.NO_MORE_DOCS) {
-      docID = DocIdSetIterator.NO_MORE_DOCS;
-      return doc;
-    }
-
-    while(true) {
-      
-      // not-first terms
-      int i = 1;
-      while(i < chunkStates.length) {
-        int doc2 = chunkStates[i].posEnum.docID();
-        if (doc2 < doc) {
-          doc2 = chunkStates[i].posEnum.advance(doc);
-        }
-        if (doc2 > doc) {
-          break;
-        }
-        i++;
-      }
-
-      if (i == chunkStates.length) {
-        // this doc has all the terms -- now test whether
-        // phrase occurs
-        docID = doc;
-        freq = phraseFreq();
-        if (freq != 0) {
-          return docID;
-        }
-      }
+  public int nextDoc() throws IOException {
+    return docID = doNext(lead.nextDoc());
+  }
 
-      doc = chunkStates[0].posEnum.nextDoc();
-      if (doc == DocIdSetIterator.NO_MORE_DOCS) {
-        docID = doc;
-        return doc;
-      }
-    }
+  @Override
+  public int advance(int target) throws IOException {
+    return docID = doNext(lead.advance(target));
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1609453&r1=1609452&r2=1609453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java Thu Jul 10 12:35:58 2014
@@ -249,12 +249,7 @@ public class MultiPhraseQuery extends Qu
       }
 
       if (slop == 0) {
-        ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
-        if (s.noDocs) {
-          return null;
-        } else {
-          return s;
-        }
+        return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
       } else {
         return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
       }
@@ -472,7 +467,7 @@ class UnionDocsAndPositionsEnum extends 
     }
   }
 
-  private int _doc;
+  private int _doc = -1;
   private int _freq;
   private DocsQueue _queue;
   private IntQueue _posList;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1609453&r1=1609452&r2=1609453&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java Thu Jul 10 12:35:58 2014
@@ -285,15 +285,9 @@ public class PhraseQuery extends Query {
       }
 
       if (slop == 0) {  // optimize exact case
-        ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
-        if (s.noDocs) {
-          return null;
-        } else {
-          return s;
-        }
+        return new ExactPhraseScorer(this, postingsFreqs, similarity.simScorer(stats, context));
       } else {
-        return
-          new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
+        return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.simScorer(stats, context));
       }
     }