You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2010/08/30 23:09:35 UTC

svn commit: r990942 - in /lucene/dev/branches/branch_3x/lucene: ./ src/java/org/apache/lucene/search/payloads/ src/test/org/apache/lucene/search/payloads/

Author: gsingers
Date: Mon Aug 30 21:09:35 2010
New Revision: 990942

URL: http://svn.apache.org/viewvc?rev=990942&view=rev
Log:
LUCENE-2272: fix payload near scoring/explain problem

Modified:
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=990942&r1=990941&r2=990942&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Mon Aug 30 21:09:35 2010
@@ -269,6 +269,8 @@ Bug fixes
 * LUCENE-2627: Fixed bug in MMapDirectory chunking when a file is an
   exact multiple of the chunk size.  (Robert Muir)
 
+* LUCENE-2272: Fix explain in PayloadNearQuery and also fix scoring issue (Peter Keegan via Grant Ingersoll)
+
 New features
 
 * LUCENE-2128: Parallelized fetching document frequencies during weight

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java?rev=990942&r1=990941&r2=990942&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java Mon Aug 30 21:09:35 2010
@@ -1,5 +1,7 @@
 package org.apache.lucene.search.payloads;
 
+import java.io.IOException;
+import org.apache.lucene.search.Explanation;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -35,6 +37,14 @@ public class AveragePayloadFunction exte
   public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) {
     return numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1;
   }
+  @Override
+  public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) {
+      Explanation payloadBoost = new Explanation();
+      float avgPayloadScore = (numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1);
+      payloadBoost.setValue(avgPayloadScore);
+      payloadBoost.setDescription("AveragePayloadFunction(...)");
+      return payloadBoost;
+  } 
 
   @Override
   public int hashCode() {

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java?rev=990942&r1=990941&r2=990942&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java Mon Aug 30 21:09:35 2010
@@ -1,5 +1,6 @@
 package org.apache.lucene.search.payloads;
 
+import org.apache.lucene.search.Explanation;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -40,6 +41,14 @@ public class MaxPayloadFunction extends 
   }
   
   @Override
+  public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) {
+	    Explanation expl = new Explanation();
+	    float maxPayloadScore = (numPayloadsSeen > 0 ? payloadScore : 1);
+	    expl.setValue(maxPayloadScore);
+	    expl.setDescription("MaxPayloadFunction(...)");
+	    return expl;
+	  } 
+  @Override
   public int hashCode() {
     final int prime = 31;
     int result = 1;

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java?rev=990942&r1=990941&r2=990942&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java Mon Aug 30 21:09:35 2010
@@ -1,5 +1,6 @@
 package org.apache.lucene.search.payloads;
 
+import org.apache.lucene.search.Explanation;
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -24,12 +25,12 @@ package org.apache.lucene.search.payload
 public class MinPayloadFunction extends PayloadFunction {
 
   @Override
-  public float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) {
+	public float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) {
     if (numPayloadsSeen == 0) {
       return currentPayloadScore;
     } else {
-      return Math.min(currentPayloadScore, currentScore);
-    }
+		return Math.min(currentPayloadScore, currentScore);
+	}
   }
 
   @Override
@@ -38,6 +39,14 @@ public class MinPayloadFunction extends 
   }
   
   @Override
+  public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) {
+	  Explanation expl = new Explanation();
+	  float minPayloadScore = (numPayloadsSeen > 0 ? payloadScore : 1);
+	  expl.setValue(minPayloadScore);
+	  expl.setDescription("MinPayloadFunction(...)");
+	  return expl;
+  }  
+  @Override
   public int hashCode() {
     final int prime = 31;
     int result = 1;

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java?rev=990942&r1=990941&r2=990942&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java Mon Aug 30 21:09:35 2010
@@ -17,6 +17,7 @@ package org.apache.lucene.search.payload
  */
 
 import java.io.Serializable;
+import org.apache.lucene.search.Explanation;
 
 /**
  * An abstract class that defines a way for Payload*Query instances to transform
@@ -55,6 +56,13 @@ public abstract class PayloadFunction im
    */
   public abstract float docScore(int docId, String field, int numPayloadsSeen, float payloadScore);
   
+  public Explanation explain(int docId, int numPayloadsSeen, float payloadScore){
+	  Explanation result = new Explanation();
+	  result.setDescription("Unimpl Payload Function Explain");
+	  result.setValue(1);
+	  return result;
+  };
+  
   @Override
   public abstract int hashCode();
   

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java?rev=990942&r1=990941&r2=990942&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java Mon Aug 30 21:09:35 2010
@@ -79,7 +79,7 @@ public class PayloadNearQuery extends Sp
       newClauses[i] = (SpanQuery) clauses.get(i).clone();
     }
     PayloadNearQuery boostingNearQuery = new PayloadNearQuery(newClauses, slop,
-        inOrder);
+        inOrder, function);
     boostingNearQuery.setBoost(getBoost());
     return boostingNearQuery;
   }
@@ -152,7 +152,6 @@ public class PayloadNearQuery extends Sp
 
   public class PayloadNearSpanScorer extends SpanScorer {
     Spans spans;
-
     protected float payloadScore;
     private int payloadsSeen;
     Similarity similarity = getSimilarity();
@@ -204,18 +203,24 @@ public class PayloadNearQuery extends Sp
     //
     @Override
     protected boolean setFreqCurrentDoc() throws IOException {
-      if (!more) {
-        return false;
-      }
-      Spans[] spansArr = new Spans[1];
-      spansArr[0] = spans;
-      payloadScore = 0;
-      payloadsSeen = 0;
-      getPayloads(spansArr);
-      return super.setFreqCurrentDoc();
+        if (!more) {
+            return false;
+          }
+          doc = spans.doc();
+          freq = 0.0f;
+          payloadScore = 0;
+          payloadsSeen = 0;
+          do {
+            int matchLength = spans.end() - spans.start();
+            freq += getSimilarity().sloppyFreq(matchLength);
+            Spans[] spansArr = new Spans[1];
+            spansArr[0] = spans;
+            getPayloads(spansArr);            
+            more = spans.next();
+          } while (more && (doc == spans.doc()));
+          return true;    	
     }
 
-    @Override
     public float score() throws IOException {
 
       return super.score()
@@ -225,16 +230,14 @@ public class PayloadNearQuery extends Sp
     @Override
     protected Explanation explain(int doc) throws IOException {
       Explanation result = new Explanation();
+      // Add detail about tf/idf...
       Explanation nonPayloadExpl = super.explain(doc);
       result.addDetail(nonPayloadExpl);
-      Explanation payloadBoost = new Explanation();
-      result.addDetail(payloadBoost);
-      float avgPayloadScore = (payloadsSeen > 0 ? (payloadScore / payloadsSeen)
-          : 1);
-      payloadBoost.setValue(avgPayloadScore);
-      payloadBoost.setDescription("scorePayload(...)");
-      result.setValue(nonPayloadExpl.getValue() * avgPayloadScore);
-      result.setDescription("bnq, product of:");
+      // Add detail about payload
+      Explanation payloadExpl = function.explain(doc, payloadsSeen, payloadScore);
+      result.addDetail(payloadExpl);
+      result.setValue(nonPayloadExpl.getValue() * payloadExpl.getValue());
+      result.setDescription("PayloadNearQuery, product of:");
       return result;
     }
   }

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java?rev=990942&r1=990941&r2=990942&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java Mon Aug 30 21:09:35 2010
@@ -32,7 +32,9 @@ import org.apache.lucene.index.Payload;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryUtils;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Searcher;
@@ -68,12 +70,14 @@ public class TestPayloadNearQuery extend
   }
 
   private class PayloadFilter extends TokenFilter {
+    String fieldName;
     int numSeen = 0;
     protected PayloadAttribute payAtt;
 
     public PayloadFilter(TokenStream input, String fieldName) {
       super(input);
-      payAtt = addAttribute(PayloadAttribute.class);
+      this.fieldName = fieldName;
+      payAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
     }
 
     @Override
@@ -92,13 +96,13 @@ public class TestPayloadNearQuery extend
     }
   }
   
-  private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder) {
+  private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder, PayloadFunction function ) {
     String[] words = phrase.split("[\\s]+");
     SpanQuery clauses[] = new SpanQuery[words.length];
     for (int i=0;i<clauses.length;i++) {
       clauses[i] = new SpanTermQuery(new Term(fieldName, words[i]));  
     } 
-    return new PayloadNearQuery(clauses, 0, inOrder);
+    return new PayloadNearQuery(clauses, 0, inOrder, function);
   }
 
   @Override
@@ -136,7 +140,7 @@ public class TestPayloadNearQuery extend
     PayloadNearQuery query;
     TopDocs hits;
 
-    query = newPhraseQuery("field", "twenty two", true);
+    query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
     QueryUtils.check(query);
 		
     // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
@@ -149,7 +153,7 @@ public class TestPayloadNearQuery extend
       assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
     }
     for (int i=1;i<10;i++) {
-      query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true);
+      query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true, new AveragePayloadFunction());
       // all should have score = 3 because adjacent terms have payloads of 2,4
       // and all the similarity factors are set to 1
       hits = searcher.search(query, null, 100);
@@ -185,7 +189,74 @@ public class TestPayloadNearQuery extend
     }
     */
   }
-
+  
+  public void testAverageFunction() throws IOException {
+	  PayloadNearQuery query;
+	  TopDocs hits;
+
+	  query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
+	  QueryUtils.check(query);
+	  // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
+	  // and all the similarity factors are set to 1
+	  hits = searcher.search(query, null, 100);
+	  assertTrue("hits is null and it shouldn't be", hits != null);
+	  assertTrue("should be 10 hits", hits.totalHits == 10);
+	  for (int j = 0; j < hits.scoreDocs.length; j++) {
+		  ScoreDoc doc = hits.scoreDocs[j];
+		  assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
+		  Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
+		  String exp = explain.toString();
+		  assertTrue(exp, exp.indexOf("AveragePayloadFunction") > -1);
+		  assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 3, explain.getValue() == 3f);
+	  }
+  }
+  public void testMaxFunction() throws IOException {
+	  PayloadNearQuery query;
+	  TopDocs hits;
+
+	  query = newPhraseQuery("field", "twenty two", true, new MaxPayloadFunction());
+	  QueryUtils.check(query);
+	  // all 10 hits should have score = 4 (max payload value)
+	  hits = searcher.search(query, null, 100);
+	  assertTrue("hits is null and it shouldn't be", hits != null);
+	  assertTrue("should be 10 hits", hits.totalHits == 10);
+	  for (int j = 0; j < hits.scoreDocs.length; j++) {
+		  ScoreDoc doc = hits.scoreDocs[j];
+		  assertTrue(doc.score + " does not equal: " + 4, doc.score == 4);
+		  Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
+		  String exp = explain.toString();
+		  assertTrue(exp, exp.indexOf("MaxPayloadFunction") > -1);
+		  assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 4, explain.getValue() == 4f);
+	  }
+  }  
+  public void testMinFunction() throws IOException {
+	  PayloadNearQuery query;
+	  TopDocs hits;
+
+	  query = newPhraseQuery("field", "twenty two", true, new MinPayloadFunction());
+	  QueryUtils.check(query);
+	  // all 10 hits should have score = 2 (min payload value)
+	  hits = searcher.search(query, null, 100);
+	  assertTrue("hits is null and it shouldn't be", hits != null);
+	  assertTrue("should be 10 hits", hits.totalHits == 10);
+	  for (int j = 0; j < hits.scoreDocs.length; j++) {
+		  ScoreDoc doc = hits.scoreDocs[j];
+		  assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
+		  Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
+		  String exp = explain.toString();
+		  assertTrue(exp, exp.indexOf("MinPayloadFunction") > -1);
+		  assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 2, explain.getValue() == 2f);
+	  }
+  }  
+  private SpanQuery[] getClauses() {
+	    SpanNearQuery q1, q2;
+	    q1 = spanNearQuery("field2", "twenty two");
+	    q2 = spanNearQuery("field2", "twenty three");
+	    SpanQuery[] clauses = new SpanQuery[2];
+	    clauses[0] = q1;
+	    clauses[1] = q2;
+	    return clauses;
+  }
   private SpanNearQuery spanNearQuery(String fieldName, String words) {
     String[] wordList = words.split("[\\s]+");
     SpanQuery clauses[] = new SpanQuery[wordList.length];
@@ -198,7 +269,7 @@ public class TestPayloadNearQuery extend
   public void testLongerSpan() throws IOException {
     PayloadNearQuery query;
     TopDocs hits;
-    query = newPhraseQuery("field", "nine hundred ninety nine", true);
+    query = newPhraseQuery("field", "nine hundred ninety nine", true, new AveragePayloadFunction());
     hits = searcher.search(query, null, 100);
     assertTrue("hits is null and it shouldn't be", hits != null);
     ScoreDoc doc = hits.scoreDocs[0];
@@ -215,10 +286,10 @@ public class TestPayloadNearQuery extend
 
     // combine ordered and unordered spans with some nesting to make sure all payloads are counted
 
-    SpanQuery q1 = newPhraseQuery("field", "nine hundred", true);
-    SpanQuery q2 = newPhraseQuery("field", "ninety nine", true);
-    SpanQuery q3 = newPhraseQuery("field", "nine ninety", false);
-    SpanQuery q4 = newPhraseQuery("field", "hundred nine", false);
+    SpanQuery q1 = newPhraseQuery("field", "nine hundred", true, new AveragePayloadFunction());
+    SpanQuery q2 = newPhraseQuery("field", "ninety nine", true, new AveragePayloadFunction());
+    SpanQuery q3 = newPhraseQuery("field", "nine ninety", false, new AveragePayloadFunction());
+    SpanQuery q4 = newPhraseQuery("field", "hundred nine", false, new AveragePayloadFunction());
     SpanQuery[]clauses = new SpanQuery[] {new PayloadNearQuery(new SpanQuery[] {q1,q2}, 0, true), new PayloadNearQuery(new SpanQuery[] {q3,q4}, 0, false)};
     query = new PayloadNearQuery(clauses, 0, false);
     hits = searcher.search(query, null, 100);
@@ -239,7 +310,6 @@ public class TestPayloadNearQuery extend
       //we know it is size 4 here, so ignore the offset/length
       return payload[0];
     }
-    
     //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     //Make everything else 1 so we see the effect of the payload
     //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -261,7 +331,6 @@ public class TestPayloadNearQuery extend
     @Override public float tf(float freq) {
       return 1.0f;
     }
-    
     // idf used for phrase queries
     @Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
       return new IDFExplanation() {