You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ma...@apache.org on 2009/02/19 15:28:36 UTC
svn commit: r745883 - in /lucene/java/branches/lucene_2_4/src:
java/org/apache/lucene/search/spans/NearSpansOrdered.java
test/org/apache/lucene/search/spans/TestPayloadSpans.java
Author: markrmiller
Date: Thu Feb 19 14:28:34 2009
New Revision: 745883
URL: http://svn.apache.org/viewvc?rev=745883&view=rev
Log:
LUCENE-1465: (2.4 backport) NearSpansOrdered returns payloads from first possible match rather than the correct, shortest match; Payloads could be returned even if the max slop was exceeded; The wrong payload could be returned in certain situations.
Modified:
lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
Modified: lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java?rev=745883&r1=745882&r2=745883&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (original)
+++ lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java Thu Feb 19 14:28:34 2009
@@ -20,11 +20,15 @@
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Collection;
+import java.util.Set;
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
* where the subspans do not overlap and have a maximum slop between them.
@@ -234,17 +238,22 @@
private boolean shrinkToAfterShortestMatch() throws IOException {
matchStart = subSpans[subSpans.length - 1].start();
matchEnd = subSpans[subSpans.length - 1].end();
+ Set possibleMatchPayloads = new HashSet();
if (subSpans[subSpans.length - 1].isPayloadAvailable()) {
- matchPayload.addAll(subSpans[subSpans.length - 1].getPayload());
+ possibleMatchPayloads.addAll(subSpans[subSpans.length - 1].getPayload());
}
+
+ Collection possiblePayload = null;
+
int matchSlop = 0;
int lastStart = matchStart;
int lastEnd = matchEnd;
for (int i = subSpans.length - 2; i >= 0; i--) {
PayloadSpans prevSpans = subSpans[i];
-
- if (subSpans[i].isPayloadAvailable()) {
- matchPayload.addAll(0, subSpans[i].getPayload());
+ if (prevSpans.isPayloadAvailable()) {
+ Collection payload = prevSpans.getPayload();
+ possiblePayload = new ArrayList(payload.size());
+ possiblePayload.addAll(payload);
}
int prevStart = prevSpans.start();
@@ -265,9 +274,19 @@
} else { // prevSpans still before (lastStart, lastEnd)
prevStart = ppStart;
prevEnd = ppEnd;
+ if (prevSpans.isPayloadAvailable()) {
+ Collection payload = prevSpans.getPayload();
+ possiblePayload = new ArrayList(payload.size());
+ possiblePayload.addAll(payload);
+ }
}
}
}
+
+ if (possiblePayload != null) {
+ possibleMatchPayloads.addAll(possiblePayload);
+ }
+
assert prevStart <= matchStart;
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
matchSlop += (matchStart - prevEnd);
@@ -280,7 +299,14 @@
lastStart = prevStart;
lastEnd = prevEnd;
}
- return matchSlop <= allowedSlop; // ordered and allowed slop
+
+ boolean match = matchSlop <= allowedSlop;
+
+ if(match && possibleMatchPayloads.size() > 0) {
+ matchPayload.addAll(possibleMatchPayloads);
+ }
+
+ return match; // ordered and allowed slop
}
public String toString() {
@@ -288,4 +314,3 @@
(firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
}
}
-
Modified: lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java?rev=745883&r1=745882&r2=745883&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java (original)
+++ lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java Thu Feb 19 14:28:34 2009
@@ -18,6 +18,7 @@
import java.io.IOException;
import java.io.Reader;
+import java.io.StringReader;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
@@ -32,6 +33,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Payload;
@@ -40,8 +42,10 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.payloads.PayloadHelper;
import org.apache.lucene.search.payloads.PayloadSpanUtil;
+import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
public class TestPayloadSpans extends TestCase {
@@ -280,6 +284,115 @@
assertTrue(seen + " does not equal: " + expectedNumSpans, seen == expectedNumSpans);
}
+
+ public void testShrinkToAfterShortestMatch() throws CorruptIndexException,
+ LockObtainFailedException, IOException {
+ RAMDirectory directory = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(directory, new TestPayloadAnalyzer(),
+ IndexWriter.MaxFieldLength.LIMITED);
+ Document doc = new Document();
+ doc.add(new Field("content", new StringReader("a b c d e f g h i j a k")));
+ writer.addDocument(doc);
+ writer.close();
+
+ IndexSearcher is = new IndexSearcher(directory);
+
+ SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
+ SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
+ SpanQuery[] sqs = { stq1, stq2 };
+ SpanNearQuery snq = new SpanNearQuery(sqs, 1, true);
+ PayloadSpans spans = snq.getPayloadSpans(is.getIndexReader());
+
+ TopDocs topDocs = is.search(snq, 1);
+ Set payloadSet = new HashSet();
+ for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+ while (spans.next()) {
+ Collection payloads = spans.getPayload();
+
+ for (Iterator it = payloads.iterator(); it.hasNext();) {
+ payloadSet.add(new String((byte[]) it.next()));
+ }
+ }
+ }
+ assertEquals(2, payloadSet.size());
+ assertTrue(payloadSet.contains("a:Noise:10"));
+ assertTrue(payloadSet.contains("k:Noise:11"));
+ }
+
+ public void testShrinkToAfterShortestMatch2() throws CorruptIndexException,
+ LockObtainFailedException, IOException {
+ RAMDirectory directory = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(directory, new TestPayloadAnalyzer(),
+ IndexWriter.MaxFieldLength.LIMITED);
+ Document doc = new Document();
+ doc.add(new Field("content", new StringReader("a b a d k f a h i k a k")));
+ writer.addDocument(doc);
+ writer.close();
+
+ IndexSearcher is = new IndexSearcher(directory);
+
+ SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
+ SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
+ SpanQuery[] sqs = { stq1, stq2 };
+ SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
+ PayloadSpans spans = snq.getPayloadSpans(is.getIndexReader());
+
+ TopDocs topDocs = is.search(snq, 1);
+ Set payloadSet = new HashSet();
+ for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+ while (spans.next()) {
+ Collection payloads = spans.getPayload();
+ int cnt = 0;
+ for (Iterator it = payloads.iterator(); it.hasNext();) {
+ payloadSet.add(new String((byte[]) it.next()));
+ }
+ }
+ }
+ assertEquals(2, payloadSet.size());
+ assertTrue(payloadSet.contains("a:Noise:10"));
+ assertTrue(payloadSet.contains("k:Noise:11"));
+ }
+
+ public void testShrinkToAfterShortestMatch3() throws CorruptIndexException,
+ LockObtainFailedException, IOException {
+ RAMDirectory directory = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(directory, new TestPayloadAnalyzer(),
+ IndexWriter.MaxFieldLength.LIMITED);
+ Document doc = new Document();
+ doc.add(new Field("content", new StringReader("j k a l f k k p a t a k l k t a")));
+ writer.addDocument(doc);
+ writer.close();
+
+ IndexSearcher is = new IndexSearcher(directory);
+
+ SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
+ SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
+ SpanQuery[] sqs = { stq1, stq2 };
+ SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
+ PayloadSpans spans = snq.getPayloadSpans(is.getIndexReader());
+
+ TopDocs topDocs = is.search(snq, 1);
+ Set payloadSet = new HashSet();
+ for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+ while (spans.next()) {
+ Collection payloads = spans.getPayload();
+
+ for (Iterator it = payloads.iterator(); it.hasNext();) {
+ payloadSet.add(new String((byte[]) it.next()));
+ }
+ }
+ }
+ assertEquals(2, payloadSet.size());
+ if(DEBUG) {
+ Iterator pit = payloadSet.iterator();
+ while (pit.hasNext()) {
+ System.out.println("match:" + pit.next());
+ }
+ }
+ assertTrue(payloadSet.contains("a:Noise:10"));
+ assertTrue(payloadSet.contains("k:Noise:11"));
+ }
+
private IndexSearcher getSearcher() throws Exception {
RAMDirectory directory = new RAMDirectory();
PayloadAnalyzer analyzer = new PayloadAnalyzer();
@@ -374,4 +487,13 @@
return result;
}
}
-}
\ No newline at end of file
+
+ public class TestPayloadAnalyzer extends Analyzer {
+
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ TokenStream result = new LowerCaseTokenizer(reader);
+ result = new PayloadFilter(result, fieldName);
+ return result;
+ }
+ }
+}