You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2015/08/03 17:58:12 UTC

svn commit: r1693924 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/search/payloads/ lucene/core/src/java/org/apache/lucene/search/spans/ lucene/core/src/test/org/apache/lucene/search/payloads/ lucen...

Author: romseygeek
Date: Mon Aug  3 15:58:11 2015
New Revision: 1693924

URL: http://svn.apache.org/r1693924
Log:
LUCENE-6706: Add PayloadScoreQuery

Added:
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadScoreQuery.java
      - copied unchanged from r1693921, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadScoreQuery.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadScoreQuery.java
      - copied unchanged from r1693921, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadScoreQuery.java
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/package-info.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadExplanations.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1693924&r1=1693923&r2=1693924&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Mon Aug  3 15:58:11 2015
@@ -105,6 +105,9 @@ New Features
 * LUCENE-6695: Added a new BlendedTermQuery to blend statistics across several
   terms. (Simon Willnauer, Adrien Grand)
 
+* LUCENE-6706: Added a new PayloadScoreQuery that generalises the behaviour of
+  PayloadTermQuery and PayloadNearQuery to all Span queries. (Alan Woodward)
+
 * LUCENE-6697: Add experimental range tree doc values format and
   queries, based on a 1D version of the spatial BKD tree, for a faster
   and smaller alternative to postings-based numeric and binary term

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java?rev=1693924&r1=1693923&r2=1693924&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java Mon Aug  3 15:58:11 2015
@@ -39,7 +39,6 @@ import org.apache.lucene.search.spans.Sp
 import org.apache.lucene.search.spans.SpanScorer;
 import org.apache.lucene.search.spans.SpanWeight;
 import org.apache.lucene.search.spans.Spans;
-import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.ToStringUtils;
 
@@ -56,6 +55,8 @@ import org.apache.lucene.util.ToStringUt
  * Payload scores are aggregated using a pluggable {@link PayloadFunction}.
  *
  * @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
+ *
+ * @deprecated use {@link PayloadScoreQuery} to wrap {@link SpanNearQuery}
  */
 public class PayloadNearQuery extends SpanNearQuery {
 
@@ -215,22 +216,17 @@ public class PayloadNearQuery extends Sp
       }
     }
 
-    //
     @Override
-    protected void setFreqCurrentDoc() throws IOException {
-      freq = 0.0f;
+    protected void doStartCurrentDoc() throws IOException {
       payloadScore = 0;
       payloadsSeen = 0;
-      int startPos = spans.nextStartPosition();
-      assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
-      do {
-        int matchLength = spans.endPosition() - startPos;
-        freq += docScorer.computeSlopFactor(matchLength);
-        collector.reset();
-        spans.collect(collector);
-        processPayloads(collector.getPayloads(), startPos, spans.endPosition());
-        startPos = spans.nextStartPosition();
-      } while (startPos != Spans.NO_MORE_POSITIONS);
+    }
+
+    @Override
+    protected void doCurrentSpans() throws IOException {
+      collector.reset();
+      spans.collect(collector);
+      processPayloads(collector.getPayloads(), spans.startPosition(), spans.endPosition());
     }
 
     @Override

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java?rev=1693924&r1=1693923&r2=1693924&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java Mon Aug  3 15:58:11 2015
@@ -51,6 +51,8 @@ import org.apache.lucene.util.BytesRef;
  * <p>
  * Payload scores are aggregated using a pluggable {@link PayloadFunction}.
  * @see org.apache.lucene.search.similarities.Similarity.SimScorer#computePayloadFactor(int, int, int, BytesRef)
+ *
+ * @deprecated use {@link PayloadScoreQuery} to wrap {@link SpanTermQuery}
  **/
 public class PayloadTermQuery extends SpanTermQuery {
   protected PayloadFunction function;
@@ -116,27 +118,16 @@ public class PayloadTermQuery extends Sp
       }
 
       @Override
-      protected void setFreqCurrentDoc() throws IOException {
-        freq = 0.0f;
-        numMatches = 0;
+      protected void doStartCurrentDoc() throws IOException {
         payloadScore = 0;
         payloadsSeen = 0;
-        int startPos = spans.nextStartPosition();
-        assert startPos != Spans.NO_MORE_POSITIONS : "initial startPos NO_MORE_POSITIONS, spans="+spans;
-        do {
-          int matchLength = spans.endPosition() - startPos;
-          if (docScorer == null) {
-            freq = 1;
-            return;
-          }
-          freq += docScorer.computeSlopFactor(matchLength);
-          numMatches++;
-          payloadCollector.reset();
-          spans.collect(payloadCollector);
-          processPayload();
+      }
 
-          startPos = spans.nextStartPosition();
-        } while (startPos != Spans.NO_MORE_POSITIONS);
+      @Override
+      protected void doCurrentSpans() throws IOException {
+        payloadCollector.reset();
+        spans.collect(payloadCollector);
+        processPayload();
       }
 
       protected void processPayload() throws IOException {

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/package-info.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/package-info.java?rev=1693924&r1=1693923&r2=1693924&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/package-info.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/payloads/package-info.java Mon Aug  3 15:58:11 2015
@@ -20,6 +20,10 @@
  * <p>
  *   The following Query implementations are provided:
  *   <ol>
+ *    <li>
+ *      {@link org.apache.lucene.search.payloads.PayloadScoreQuery PayloadScoreQuery} --
+ *        Boost a document's score from a SpanQuery based on the values of the payloads located at the matching terms
+ *    </li>
  *    <li>{@link org.apache.lucene.search.payloads.PayloadTermQuery PayloadTermQuery} -- Boost a term's score based on the value of the payload located at that term.</li>
  *    <li>{@link org.apache.lucene.search.payloads.PayloadNearQuery PayloadNearQuery} -- A {@link org.apache.lucene.search.spans.SpanNearQuery SpanNearQuery} that factors in the value of the payloads located 
  *        at each of the positions where the spans occur.</li>

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java?rev=1693924&r1=1693923&r2=1693924&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/ContainSpans.java Mon Aug  3 15:58:11 2015
@@ -54,7 +54,8 @@ abstract class ContainSpans extends Conj
 
   @Override
   public void collect(SpanCollector collector) throws IOException {
-    sourceSpans.collect(collector);
+    bigSpans.collect(collector);
+    littleSpans.collect(collector);
   }
 
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java?rev=1693924&r1=1693923&r2=1693924&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java Mon Aug  3 15:58:11 2015
@@ -76,10 +76,12 @@ public class SpanScorer extends Scorer {
    * <p>
    * This will be called at most once per document.
    */
-  protected void setFreqCurrentDoc() throws IOException {
+  protected final void setFreqCurrentDoc() throws IOException {
     freq = 0.0f;
     numMatches = 0;
 
+    doStartCurrentDoc();
+
     assert spans.startPosition() == -1 : "incorrect initial start position, spans="+spans;
     assert spans.endPosition() == -1 : "incorrect initial end position, spans="+spans;
     int prevStartPos = -1;
@@ -100,6 +102,7 @@ public class SpanScorer extends Scorer {
         return;
       }
       freq += docScorer.computeSlopFactor(spans.width());
+      doCurrentSpans();
       prevStartPos = startPos;
       prevEndPos = endPos;
       startPos = spans.nextStartPosition();
@@ -108,6 +111,16 @@ public class SpanScorer extends Scorer {
     assert spans.startPosition() == Spans.NO_MORE_POSITIONS : "incorrect final start position, spans="+spans;
     assert spans.endPosition() == Spans.NO_MORE_POSITIONS : "incorrect final end position, spans="+spans;
   }
+
+  /**
+   * Called before the current doc's frequency is calculated
+   */
+  protected void doStartCurrentDoc() throws IOException {}
+
+  /**
+   * Called each time the scorer's Spans is advanced during frequency calculation
+   */
+  protected void doCurrentSpans() throws IOException {}
   
   /**
    * Score the current doc. The default implementation scores the doc 

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java?rev=1693924&r1=1693923&r2=1693924&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java Mon Aug  3 15:58:11 2015
@@ -33,7 +33,6 @@ import org.apache.lucene.search.TermStat
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.search.similarities.Similarity.SimScorer;
-import org.apache.lucene.util.Bits;
 
 /**
  * Expert-only.  Public for use by other weight implementations
@@ -143,10 +142,14 @@ public abstract class SpanWeight extends
     }
 
     Spans spans = getSpans(context, Postings.POSITIONS);
-    Similarity.SimScorer simScorer = simWeight == null ? null : similarity.simScorer(simWeight, context);
+    Similarity.SimScorer simScorer = getSimScorer(context);
     return (spans == null) ? null : new SpanScorer(spans, this, simScorer);
   }
 
+  public Similarity.SimScorer getSimScorer(LeafReaderContext context) throws IOException {
+    return simWeight == null ? null : similarity.simScorer(simWeight, context);
+  }
+
   @Override
   public Explanation explain(LeafReaderContext context, int doc) throws IOException {
     SpanScorer scorer = (SpanScorer) scorer(context);

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadExplanations.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadExplanations.java?rev=1693924&r1=1693923&r2=1693924&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadExplanations.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadExplanations.java Mon Aug  3 15:58:11 2015
@@ -18,17 +18,20 @@ package org.apache.lucene.search.payload
  */
 
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.similarities.DefaultSimilarity;
-import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.search.BaseExplanationTestCase;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.util.BytesRef;
 
 /**
  * TestExplanations subclass focusing on payload queries
  */
 public class TestPayloadExplanations extends BaseExplanationTestCase {
-  private PayloadFunction functions[] = new PayloadFunction[] { 
+
+  private static PayloadFunction functions[] = new PayloadFunction[] {
       new AveragePayloadFunction(),
       new MinPayloadFunction(),
       new MaxPayloadFunction(),
@@ -89,4 +92,45 @@ public class TestPayloadExplanations ext
   }
 
   // TODO: test the payloadnear query too!
+
+  /*
+    protected static final String[] docFields = {
+    "w1 w2 w3 w4 w5",
+    "w1 w3 w2 w3 zz",
+    "w1 xx w2 yy w3",
+    "w1 w3 xx w2 yy w3 zz"
+  };
+   */
+
+  public void testAllFunctions(SpanQuery query, int[] expected) throws Exception {
+    for (PayloadFunction fn : functions) {
+      qtest(new PayloadScoreQuery(query, fn), expected);
+    }
+  }
+
+  public void testSimpleTerm() throws Exception {
+    SpanTermQuery q = new SpanTermQuery(new Term(FIELD, "w2"));
+    testAllFunctions(q, new int[]{ 0, 1, 2, 3});
+  }
+
+  public void testOrTerm() throws Exception {
+    SpanOrQuery q = new SpanOrQuery(
+        new SpanTermQuery(new Term(FIELD, "xx")), new SpanTermQuery(new Term(FIELD, "yy"))
+    );
+    testAllFunctions(q, new int[]{ 2, 3 });
+  }
+
+  public void testOrderedNearQuery() throws Exception {
+    SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
+            new SpanTermQuery(new Term(FIELD, "w3")), new SpanTermQuery(new Term(FIELD, "w2"))
+        }, 1, true);
+    testAllFunctions(q, new int[]{ 1, 3 });
+  }
+
+  public void testUnorderedNearQuery() throws Exception {
+    SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
+        new SpanTermQuery(new Term(FIELD, "w2")), new SpanTermQuery(new Term(FIELD, "w3"))
+    }, 1, false);
+    testAllFunctions(q, new int[]{ 0, 1, 2, 3 });
+  }
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java?rev=1693924&r1=1693923&r2=1693924&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java Mon Aug  3 15:58:11 2015
@@ -157,11 +157,6 @@ final class JustCompileSearchSpans {
     }
 
     @Override
-    protected void setFreqCurrentDoc() {
-      throw new UnsupportedOperationException(UNSUPPORTED_MSG);
-    }
-
-    @Override
     protected float scoreCurrentDoc() throws IOException {
       throw new UnsupportedOperationException(UNSUPPORTED_MSG);
     }