You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2010/08/30 22:58:48 UTC
svn commit: r990939 - in /lucene/dev/trunk/lucene: ./
src/java/org/apache/lucene/search/payloads/
src/test/org/apache/lucene/search/payloads/
Author: gsingers
Date: Mon Aug 30 20:58:47 2010
New Revision: 990939
URL: http://svn.apache.org/viewvc?rev=990939&view=rev
Log:
LUCENE-2272: fix payload near scoring/explain problem
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=990939&r1=990938&r2=990939&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Aug 30 20:58:47 2010
@@ -500,6 +500,8 @@ Bug fixes
* LUCENE-2627: Fixed bug in MMapDirectory chunking when a file is an
exact multiple of the chunk size. (Robert Muir)
+* LUCENE-2272: Fix explain in PayloadNearQuery and also fix scoring issue (Peter Keegan via Grant Ingersoll)
+
New features
* LUCENE-2128: Parallelized fetching document frequencies during weight
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java?rev=990939&r1=990938&r2=990939&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/AveragePayloadFunction.java Mon Aug 30 20:58:47 2010
@@ -1,5 +1,7 @@
package org.apache.lucene.search.payloads;
+import java.io.IOException;
+import org.apache.lucene.search.Explanation;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -35,6 +37,14 @@ public class AveragePayloadFunction exte
public float docScore(int docId, String field, int numPayloadsSeen, float payloadScore) {
return numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1;
}
+ @Override
+ public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) {
+ Explanation payloadBoost = new Explanation();
+ float avgPayloadScore = (numPayloadsSeen > 0 ? (payloadScore / numPayloadsSeen) : 1);
+ payloadBoost.setValue(avgPayloadScore);
+ payloadBoost.setDescription("AveragePayloadFunction(...)");
+ return payloadBoost;
+ }
@Override
public int hashCode() {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java?rev=990939&r1=990938&r2=990939&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/MaxPayloadFunction.java Mon Aug 30 20:58:47 2010
@@ -1,5 +1,6 @@
package org.apache.lucene.search.payloads;
+import org.apache.lucene.search.Explanation;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -40,6 +41,14 @@ public class MaxPayloadFunction extends
}
@Override
+ public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) {
+ Explanation expl = new Explanation();
+ float maxPayloadScore = (numPayloadsSeen > 0 ? payloadScore : 1);
+ expl.setValue(maxPayloadScore);
+ expl.setDescription("MaxPayloadFunction(...)");
+ return expl;
+ }
+ @Override
public int hashCode() {
final int prime = 31;
int result = 1;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java?rev=990939&r1=990938&r2=990939&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/MinPayloadFunction.java Mon Aug 30 20:58:47 2010
@@ -1,5 +1,6 @@
package org.apache.lucene.search.payloads;
+import org.apache.lucene.search.Explanation;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -24,12 +25,12 @@ package org.apache.lucene.search.payload
public class MinPayloadFunction extends PayloadFunction {
@Override
- public float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) {
+ public float currentScore(int docId, String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) {
if (numPayloadsSeen == 0) {
return currentPayloadScore;
} else {
- return Math.min(currentPayloadScore, currentScore);
- }
+ return Math.min(currentPayloadScore, currentScore);
+ }
}
@Override
@@ -38,6 +39,14 @@ public class MinPayloadFunction extends
}
@Override
+ public Explanation explain(int doc, int numPayloadsSeen, float payloadScore) {
+ Explanation expl = new Explanation();
+ float minPayloadScore = (numPayloadsSeen > 0 ? payloadScore : 1);
+ expl.setValue(minPayloadScore);
+ expl.setDescription("MinPayloadFunction(...)");
+ return expl;
+ }
+ @Override
public int hashCode() {
final int prime = 31;
int result = 1;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java?rev=990939&r1=990938&r2=990939&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadFunction.java Mon Aug 30 20:58:47 2010
@@ -17,6 +17,7 @@ package org.apache.lucene.search.payload
*/
import java.io.Serializable;
+import org.apache.lucene.search.Explanation;
/**
* An abstract class that defines a way for Payload*Query instances to transform
@@ -55,6 +56,13 @@ public abstract class PayloadFunction im
*/
public abstract float docScore(int docId, String field, int numPayloadsSeen, float payloadScore);
+ public Explanation explain(int docId, int numPayloadsSeen, float payloadScore){
+ Explanation result = new Explanation();
+ result.setDescription("Unimpl Payload Function Explain");
+ result.setValue(1);
+ return result;
+ };
+
@Override
public abstract int hashCode();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java?rev=990939&r1=990938&r2=990939&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java Mon Aug 30 20:58:47 2010
@@ -79,7 +79,7 @@ public class PayloadNearQuery extends Sp
newClauses[i] = (SpanQuery) clauses.get(i).clone();
}
PayloadNearQuery boostingNearQuery = new PayloadNearQuery(newClauses, slop,
- inOrder);
+ inOrder, function);
boostingNearQuery.setBoost(getBoost());
return boostingNearQuery;
}
@@ -152,7 +152,6 @@ public class PayloadNearQuery extends Sp
public class PayloadNearSpanScorer extends SpanScorer {
Spans spans;
-
protected float payloadScore;
private int payloadsSeen;
Similarity similarity = getSimilarity();
@@ -204,18 +203,24 @@ public class PayloadNearQuery extends Sp
//
@Override
protected boolean setFreqCurrentDoc() throws IOException {
- if (!more) {
- return false;
- }
- Spans[] spansArr = new Spans[1];
- spansArr[0] = spans;
- payloadScore = 0;
- payloadsSeen = 0;
- getPayloads(spansArr);
- return super.setFreqCurrentDoc();
+ if (!more) {
+ return false;
+ }
+ doc = spans.doc();
+ freq = 0.0f;
+ payloadScore = 0;
+ payloadsSeen = 0;
+ do {
+ int matchLength = spans.end() - spans.start();
+ freq += getSimilarity().sloppyFreq(matchLength);
+ Spans[] spansArr = new Spans[1];
+ spansArr[0] = spans;
+ getPayloads(spansArr);
+ more = spans.next();
+ } while (more && (doc == spans.doc()));
+ return true;
}
- @Override
public float score() throws IOException {
return super.score()
@@ -225,16 +230,14 @@ public class PayloadNearQuery extends Sp
@Override
protected Explanation explain(int doc) throws IOException {
Explanation result = new Explanation();
+ // Add detail about tf/idf...
Explanation nonPayloadExpl = super.explain(doc);
result.addDetail(nonPayloadExpl);
- Explanation payloadBoost = new Explanation();
- result.addDetail(payloadBoost);
- float avgPayloadScore = (payloadsSeen > 0 ? (payloadScore / payloadsSeen)
- : 1);
- payloadBoost.setValue(avgPayloadScore);
- payloadBoost.setDescription("scorePayload(...)");
- result.setValue(nonPayloadExpl.getValue() * avgPayloadScore);
- result.setDescription("bnq, product of:");
+ // Add detail about payload
+ Explanation payloadExpl = function.explain(doc, payloadsSeen, payloadScore);
+ result.addDetail(payloadExpl);
+ result.setValue(nonPayloadExpl.getValue() * payloadExpl.getValue());
+ result.setDescription("PayloadNearQuery, product of:");
return result;
}
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java?rev=990939&r1=990938&r2=990939&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java Mon Aug 30 20:58:47 2010
@@ -32,7 +32,9 @@ import org.apache.lucene.index.Payload;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
@@ -68,12 +70,14 @@ public class TestPayloadNearQuery extend
}
private class PayloadFilter extends TokenFilter {
+ String fieldName;
int numSeen = 0;
protected PayloadAttribute payAtt;
public PayloadFilter(TokenStream input, String fieldName) {
super(input);
- payAtt = addAttribute(PayloadAttribute.class);
+ this.fieldName = fieldName;
+ payAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
}
@Override
@@ -92,13 +96,13 @@ public class TestPayloadNearQuery extend
}
}
- private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder) {
+ private PayloadNearQuery newPhraseQuery (String fieldName, String phrase, boolean inOrder, PayloadFunction function ) {
String[] words = phrase.split("[\\s]+");
SpanQuery clauses[] = new SpanQuery[words.length];
for (int i=0;i<clauses.length;i++) {
clauses[i] = new SpanTermQuery(new Term(fieldName, words[i]));
}
- return new PayloadNearQuery(clauses, 0, inOrder);
+ return new PayloadNearQuery(clauses, 0, inOrder, function);
}
@Override
@@ -136,7 +140,7 @@ public class TestPayloadNearQuery extend
PayloadNearQuery query;
TopDocs hits;
- query = newPhraseQuery("field", "twenty two", true);
+ query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
QueryUtils.check(query);
// all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
@@ -149,7 +153,7 @@ public class TestPayloadNearQuery extend
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
}
for (int i=1;i<10;i++) {
- query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true);
+ query = newPhraseQuery("field", English.intToEnglish(i)+" hundred", true, new AveragePayloadFunction());
// all should have score = 3 because adjacent terms have payloads of 2,4
// and all the similarity factors are set to 1
hits = searcher.search(query, null, 100);
@@ -185,7 +189,74 @@ public class TestPayloadNearQuery extend
}
*/
}
-
+
+ public void testAverageFunction() throws IOException {
+ PayloadNearQuery query;
+ TopDocs hits;
+
+ query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
+ QueryUtils.check(query);
+ // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
+ // and all the similarity factors are set to 1
+ hits = searcher.search(query, null, 100);
+ assertTrue("hits is null and it shouldn't be", hits != null);
+ assertTrue("should be 10 hits", hits.totalHits == 10);
+ for (int j = 0; j < hits.scoreDocs.length; j++) {
+ ScoreDoc doc = hits.scoreDocs[j];
+ assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
+ Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
+ String exp = explain.toString();
+ assertTrue(exp, exp.indexOf("AveragePayloadFunction") > -1);
+ assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 3, explain.getValue() == 3f);
+ }
+ }
+ public void testMaxFunction() throws IOException {
+ PayloadNearQuery query;
+ TopDocs hits;
+
+ query = newPhraseQuery("field", "twenty two", true, new MaxPayloadFunction());
+ QueryUtils.check(query);
+ // all 10 hits should have score = 4 (max payload value)
+ hits = searcher.search(query, null, 100);
+ assertTrue("hits is null and it shouldn't be", hits != null);
+ assertTrue("should be 10 hits", hits.totalHits == 10);
+ for (int j = 0; j < hits.scoreDocs.length; j++) {
+ ScoreDoc doc = hits.scoreDocs[j];
+ assertTrue(doc.score + " does not equal: " + 4, doc.score == 4);
+ Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
+ String exp = explain.toString();
+ assertTrue(exp, exp.indexOf("MaxPayloadFunction") > -1);
+ assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 4, explain.getValue() == 4f);
+ }
+ }
+ public void testMinFunction() throws IOException {
+ PayloadNearQuery query;
+ TopDocs hits;
+
+ query = newPhraseQuery("field", "twenty two", true, new MinPayloadFunction());
+ QueryUtils.check(query);
+ // all 10 hits should have score = 2 (min payload value)
+ hits = searcher.search(query, null, 100);
+ assertTrue("hits is null and it shouldn't be", hits != null);
+ assertTrue("should be 10 hits", hits.totalHits == 10);
+ for (int j = 0; j < hits.scoreDocs.length; j++) {
+ ScoreDoc doc = hits.scoreDocs[j];
+ assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
+ Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
+ String exp = explain.toString();
+ assertTrue(exp, exp.indexOf("MinPayloadFunction") > -1);
+ assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 2, explain.getValue() == 2f);
+ }
+ }
+ private SpanQuery[] getClauses() {
+ SpanNearQuery q1, q2;
+ q1 = spanNearQuery("field2", "twenty two");
+ q2 = spanNearQuery("field2", "twenty three");
+ SpanQuery[] clauses = new SpanQuery[2];
+ clauses[0] = q1;
+ clauses[1] = q2;
+ return clauses;
+ }
private SpanNearQuery spanNearQuery(String fieldName, String words) {
String[] wordList = words.split("[\\s]+");
SpanQuery clauses[] = new SpanQuery[wordList.length];
@@ -198,7 +269,7 @@ public class TestPayloadNearQuery extend
public void testLongerSpan() throws IOException {
PayloadNearQuery query;
TopDocs hits;
- query = newPhraseQuery("field", "nine hundred ninety nine", true);
+ query = newPhraseQuery("field", "nine hundred ninety nine", true, new AveragePayloadFunction());
hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
ScoreDoc doc = hits.scoreDocs[0];
@@ -215,10 +286,10 @@ public class TestPayloadNearQuery extend
// combine ordered and unordered spans with some nesting to make sure all payloads are counted
- SpanQuery q1 = newPhraseQuery("field", "nine hundred", true);
- SpanQuery q2 = newPhraseQuery("field", "ninety nine", true);
- SpanQuery q3 = newPhraseQuery("field", "nine ninety", false);
- SpanQuery q4 = newPhraseQuery("field", "hundred nine", false);
+ SpanQuery q1 = newPhraseQuery("field", "nine hundred", true, new AveragePayloadFunction());
+ SpanQuery q2 = newPhraseQuery("field", "ninety nine", true, new AveragePayloadFunction());
+ SpanQuery q3 = newPhraseQuery("field", "nine ninety", false, new AveragePayloadFunction());
+ SpanQuery q4 = newPhraseQuery("field", "hundred nine", false, new AveragePayloadFunction());
SpanQuery[]clauses = new SpanQuery[] {new PayloadNearQuery(new SpanQuery[] {q1,q2}, 0, true), new PayloadNearQuery(new SpanQuery[] {q3,q4}, 0, false)};
query = new PayloadNearQuery(clauses, 0, false);
hits = searcher.search(query, null, 100);
@@ -239,7 +310,6 @@ public class TestPayloadNearQuery extend
//we know it is size 4 here, so ignore the offset/length
return payload[0];
}
-
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
//Make everything else 1 so we see the effect of the payload
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -261,7 +331,6 @@ public class TestPayloadNearQuery extend
@Override public float tf(float freq) {
return 1.0f;
}
-
// idf used for phrase queries
@Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
return new IDFExplanation() {