You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/02/23 18:58:17 UTC
svn commit: r1661728 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/search/

Author: jpountz
Date: Mon Feb 23 17:58:16 2015
New Revision: 1661728

URL: http://svn.apache.org/r1661728
Log:
LUCENE-6275: Use ConjunctionDISI in SloppyPhraseScorer.

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1661728&r1=1661727&r2=1661728&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Mon Feb 23 17:58:16 2015
@@ -72,6 +72,10 @@ Optimizations
 * LUCENE-6263: MultiCollector automatically caches scores when several
   collectors need them. (Adrien Grand)
 
+* LUCENE-6275: SloppyPhraseScorer now uses the same logic as ConjunctionScorer
+  in order to advance doc IDs, which takes advantage of the cost() API.
+  (Adrien Grand)
+
 API Changes
 
 * LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files()

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java?rev=1661728&r1=1661727&r2=1661728&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java Mon Feb 23 17:58:16 2015
@@ -24,7 +24,6 @@ import org.apache.lucene.index.*;
  * Position of a term in a document that takes into account the term offset within the phrase. 
  */
 final class PhrasePositions {
-  int doc;              // current doc
   int position;         // position in doc
   int count;            // remaining pos in this doc
   int offset;           // position in phrase
@@ -42,22 +41,6 @@ final class PhrasePositions {
     this.terms = terms;
   }
 
-  final boolean next() throws IOException {  // increments to next doc
-    doc = postings.nextDoc();
-    if (doc == DocIdSetIterator.NO_MORE_DOCS) {
-      return false;
-    }
-    return true;
-  }
-
-  final boolean skipTo(int target) throws IOException {
-    doc = postings.advance(target);
-    if (doc == DocIdSetIterator.NO_MORE_DOCS) {
-      return false;
-    }
-    return true;
-  }
-
   final void firstPosition() throws IOException {
     count = postings.freq();  // read first pos
     nextPosition();
@@ -80,7 +63,7 @@ final class PhrasePositions {
   /** for debug purposes */
   @Override
   public String toString() {
-    String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count;
+    String s = "o:"+offset+" p:"+position+" c:"+count;
     if (rptGroup >=0 ) {
       s += " rpt:"+rptGroup+",i"+rptInd;
     }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java?rev=1661728&r1=1661727&r2=1661728&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhraseQueue.java Mon Feb 23 17:58:16 2015
@@ -26,20 +26,16 @@ final class PhraseQueue extends Priority
 
   @Override
   protected final boolean lessThan(PhrasePositions pp1, PhrasePositions pp2) {
-    if (pp1.doc == pp2.doc) 
-      if (pp1.position == pp2.position)
-        // same doc and pp.position, so decide by actual term positions. 
-        // rely on: pp.position == tp.position - offset. 
-        if (pp1.offset == pp2.offset) {
-          return pp1.ord < pp2.ord;
-        } else {
-          return pp1.offset < pp2.offset;
-        }
-      else {
-        return pp1.position < pp2.position;
+    if (pp1.position == pp2.position)
+      // same doc and pp.position, so decide by actual term positions. 
+      // rely on: pp.position == tp.position - offset. 
+      if (pp1.offset == pp2.offset) {
+        return pp1.ord < pp2.ord;
+      } else {
+        return pp1.offset < pp2.offset;
       }
     else {
-      return pp1.doc < pp2.doc;
+      return pp1.position < pp2.position;
     }
   }
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java?rev=1661728&r1=1661727&r2=1661728&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java Mon Feb 23 17:58:16 2015
@@ -30,7 +30,9 @@ import org.apache.lucene.search.similari
 import org.apache.lucene.util.FixedBitSet;
 
 final class SloppyPhraseScorer extends Scorer {
-  private PhrasePositions min, max;
+
+  private final ConjunctionDISI conjunction;
+  private final PhrasePositions[] phrasePositions;
 
   private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq().
 
@@ -49,7 +51,6 @@ final class SloppyPhraseScorer extends S
   private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps 
   
   private int numMatches;
-  private final long cost;
   final boolean needsScores;
   
   SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
@@ -60,25 +61,13 @@ final class SloppyPhraseScorer extends S
     this.slop = slop;
     this.numPostings = postings==null ? 0 : postings.length;
     pq = new PhraseQueue(postings.length);
-    // min(cost)
-    cost = postings[0].postings.cost();
-    // convert tps to a list of phrase positions.
-    // note: phrase-position differs from term-position in that its position
-    // reflects the phrase offset: pp.pos = tp.pos - offset.
-    // this allows to easily identify a matching (exact) phrase 
-    // when all PhrasePositions have exactly the same position.
-    if (postings.length > 0) {
-      min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
-      max = min;
-      max.doc = -1;
-      for (int i = 1; i < postings.length; i++) {
-        PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
-        max.next = pp;
-        max = pp;
-        max.doc = -1;
-      }
-      max.next = min; // make it cyclic for easier manipulation
+    DocIdSetIterator[] iterators = new DocIdSetIterator[postings.length];
+    phrasePositions = new PhrasePositions[postings.length];
+    for (int i = 0; i < postings.length; ++i) {
+      iterators[i] = postings[i].postings;
+      phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
     }
+    conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
   }
 
   /**
@@ -245,7 +234,7 @@ final class SloppyPhraseScorer extends S
     //System.err.println("initSimple: doc: "+min.doc);
     pq.clear();
     // position pps and build queue from list
-    for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) {  // iterate cyclic list: done once handled max
+    for (PhrasePositions pp : phrasePositions) {
       pp.firstPosition();
       if (pp.position > end) {
         end = pp.position;
@@ -267,7 +256,7 @@ final class SloppyPhraseScorer extends S
 
   /** move all PPs to their first position */
   private void placeFirstPositions() throws IOException {
-    for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+    for (PhrasePositions pp : phrasePositions) {
       pp.firstPosition();
     }
   }
@@ -275,7 +264,7 @@ final class SloppyPhraseScorer extends S
   /** Fill the queue (all pps are already placed */
   private void fillQueue() {
     pq.clear();
-    for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) {  // iterate cyclic list: done once handled max
+    for (PhrasePositions pp : phrasePositions) {  // iterate cyclic list: done once handled max
       if (pp.position > end) {
         end = pp.position;
       }
@@ -448,7 +437,7 @@ final class SloppyPhraseScorer extends S
   private LinkedHashMap<Term,Integer> repeatingTerms() {
     LinkedHashMap<Term,Integer> tord = new LinkedHashMap<>();
     HashMap<Term,Integer> tcnt = new HashMap<>();
-    for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+    for (PhrasePositions pp : phrasePositions) {
       for (Term t : pp.terms) {
         Integer cnt0 = tcnt.get(t);
         Integer cnt = cnt0==null ? new Integer(1) : new Integer(1+cnt0.intValue());
@@ -464,7 +453,7 @@ final class SloppyPhraseScorer extends S
   /** find repeating pps, and for each, if has multi-terms, update this.hasMultiTermRpts */
   private PhrasePositions[] repeatingPPs(HashMap<Term,Integer> rptTerms) {
     ArrayList<PhrasePositions> rp = new ArrayList<>();
-    for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
+    for (PhrasePositions pp : phrasePositions) {
       for (Term t : pp.terms) {
         if (rptTerms.containsKey(t)) {
           rp.add(pp);
@@ -553,55 +542,47 @@ final class SloppyPhraseScorer extends S
 //    }
 //  }
   
-  private boolean advanceMin(int target) throws IOException {
-    if (!min.skipTo(target)) { 
-      max.doc = NO_MORE_DOCS; // for further calls to docID() 
-      return false;
-    }
-    min = min.next; // cyclic
-    max = max.next; // cyclic
-    return true;
-  }
   
   @Override
   public int docID() {
-    return max.doc; 
+    return conjunction.docID(); 
   }
 
   @Override
   public int nextDoc() throws IOException {
-    return advance(max.doc + 1); // advance to the next doc after #docID()
+    int doc;
+    for (doc = conjunction.nextDoc(); doc != NO_MORE_DOCS; doc = conjunction.nextDoc()) {
+      sloppyFreq = phraseFreq(); // check for phrase
+      if (sloppyFreq != 0f) {
+        break;
+      }
+    }
+
+    return doc;
   }
   
   @Override
   public float score() {
-    return docScorer.score(max.doc, sloppyFreq);
+    return docScorer.score(docID(), sloppyFreq);
   }
 
   @Override
   public int advance(int target) throws IOException {
     assert target > docID();
-    do {
-      if (!advanceMin(target)) {
-        return NO_MORE_DOCS;
-      }
-      while (min.doc < max.doc) {
-        if (!advanceMin(max.doc)) {
-          return NO_MORE_DOCS;
-        }
-      }
-      // found a doc with all of the terms
+    int doc;
+    for (doc = conjunction.advance(target); doc != NO_MORE_DOCS; doc = conjunction.nextDoc()) {
       sloppyFreq = phraseFreq(); // check for phrase
-      target = min.doc + 1; // next target in case sloppyFreq is still 0
-    } while (sloppyFreq == 0f);
+      if (sloppyFreq != 0f) {
+        break;
+      }
+    }
 
-    // found a match
-    return max.doc;
+    return doc;
   }
 
   @Override
   public long cost() {
-    return cost;
+    return conjunction.cost();
   }
 
   @Override
@@ -612,36 +593,7 @@ final class SloppyPhraseScorer extends S
     return new TwoPhaseDocIdSetIterator() {
       @Override
       public DocIdSetIterator approximation() {
-        return new DocIdSetIterator() {
-          @Override
-          public int docID() {
-            return SloppyPhraseScorer.this.docID();
-          }
-
-          @Override
-          public int nextDoc() throws IOException {
-            return advance(max.doc + 1);
-          }
-
-          @Override
-          public int advance(int target) throws IOException {
-            assert target > docID();
-            if (!advanceMin(target)) {
-              return NO_MORE_DOCS;
-            }
-            while (min.doc < max.doc) {
-              if (!advanceMin(max.doc)) {
-                return NO_MORE_DOCS;
-              }
-            }
-            return max.doc;
-          }
-
-          @Override
-          public long cost() {
-            return SloppyPhraseScorer.this.cost();
-          }
-        };
+        return conjunction;
       }
 
       @Override