You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ma...@apache.org on 2009/02/19 15:28:36 UTC

svn commit: r745883 - in /lucene/java/branches/lucene_2_4/src: java/org/apache/lucene/search/spans/NearSpansOrdered.java test/org/apache/lucene/search/spans/TestPayloadSpans.java

Author: markrmiller
Date: Thu Feb 19 14:28:34 2009
New Revision: 745883

URL: http://svn.apache.org/viewvc?rev=745883&view=rev
Log:
LUCENE-1465: (2.4 backport) NearSpansOrdered returns payloads from first possible match rather than the correct, shortest match; Payloads could be returned even if the max slop was exceeded; The wrong payload could be returned in certain situations.

Modified:
    lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
    lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java

Modified: lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java?rev=745883&r1=745882&r2=745883&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (original)
+++ lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java Thu Feb 19 14:28:34 2009
@@ -20,11 +20,15 @@
 import org.apache.lucene.index.IndexReader;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Collection;
+import java.util.Set;
 
 /** A Spans that is formed from the ordered subspans of a SpanNearQuery
  * where the subspans do not overlap and have a maximum slop between them.
@@ -234,17 +238,22 @@
   private boolean shrinkToAfterShortestMatch() throws IOException {
     matchStart = subSpans[subSpans.length - 1].start();
     matchEnd = subSpans[subSpans.length - 1].end();
+    Set possibleMatchPayloads = new HashSet();
     if (subSpans[subSpans.length - 1].isPayloadAvailable()) {
-      matchPayload.addAll(subSpans[subSpans.length - 1].getPayload());
+      possibleMatchPayloads.addAll(subSpans[subSpans.length - 1].getPayload());
     }
+
+    Collection possiblePayload = null;
+    
     int matchSlop = 0;
     int lastStart = matchStart;
     int lastEnd = matchEnd;
     for (int i = subSpans.length - 2; i >= 0; i--) {
       PayloadSpans prevSpans = subSpans[i];
-      
-      if (subSpans[i].isPayloadAvailable()) {
-        matchPayload.addAll(0, subSpans[i].getPayload());
+      if (prevSpans.isPayloadAvailable()) {
+        Collection payload = prevSpans.getPayload();
+        possiblePayload = new ArrayList(payload.size());
+        possiblePayload.addAll(payload);
       }
       
       int prevStart = prevSpans.start();
@@ -265,9 +274,19 @@
           } else { // prevSpans still before (lastStart, lastEnd)
             prevStart = ppStart;
             prevEnd = ppEnd;
+            if (prevSpans.isPayloadAvailable()) {
+              Collection payload = prevSpans.getPayload();
+              possiblePayload = new ArrayList(payload.size());
+              possiblePayload.addAll(payload);
+            }
           }
         }
       }
+
+      if (possiblePayload != null) {
+        possibleMatchPayloads.addAll(possiblePayload);
+      }
+      
       assert prevStart <= matchStart;
       if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
         matchSlop += (matchStart - prevEnd);
@@ -280,7 +299,14 @@
       lastStart = prevStart;
       lastEnd = prevEnd;
     }
-    return matchSlop <= allowedSlop; // ordered and allowed slop
+    
+    boolean match = matchSlop <= allowedSlop;
+    
+    if(match && possibleMatchPayloads.size() > 0) {
+      matchPayload.addAll(possibleMatchPayloads);
+    }
+
+    return match; // ordered and allowed slop
   }
 
   public String toString() {
@@ -288,4 +314,3 @@
       (firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));
   }
 }
-

Modified: lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java?rev=745883&r1=745882&r2=745883&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java (original)
+++ lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java Thu Feb 19 14:28:34 2009
@@ -18,6 +18,7 @@
 
 import java.io.IOException;
 import java.io.Reader;
+import java.io.StringReader;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -32,6 +33,7 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Payload;
@@ -40,8 +42,10 @@
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.payloads.PayloadHelper;
 import org.apache.lucene.search.payloads.PayloadSpanUtil;
+import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.store.RAMDirectory;
 
 public class TestPayloadSpans extends TestCase {
@@ -280,6 +284,115 @@
     assertTrue(seen + " does not equal: " + expectedNumSpans, seen == expectedNumSpans);
   }
   
+
+  public void testShrinkToAfterShortestMatch() throws CorruptIndexException,
+      LockObtainFailedException, IOException {
+    RAMDirectory directory = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(directory, new TestPayloadAnalyzer(),
+        IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+    doc.add(new Field("content", new StringReader("a b c d e f g h i j a k")));
+    writer.addDocument(doc);
+    writer.close();
+
+    IndexSearcher is = new IndexSearcher(directory);
+
+    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
+    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
+    SpanQuery[] sqs = { stq1, stq2 };
+    SpanNearQuery snq = new SpanNearQuery(sqs, 1, true);
+    PayloadSpans spans = snq.getPayloadSpans(is.getIndexReader());
+
+    TopDocs topDocs = is.search(snq, 1);
+    Set payloadSet = new HashSet();
+    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+      while (spans.next()) {
+        Collection payloads = spans.getPayload();
+
+        for (Iterator it = payloads.iterator(); it.hasNext();) {
+          payloadSet.add(new String((byte[]) it.next()));
+        }
+      }
+    }
+    assertEquals(2, payloadSet.size());
+    assertTrue(payloadSet.contains("a:Noise:10"));
+    assertTrue(payloadSet.contains("k:Noise:11"));
+  }
+  
+  public void testShrinkToAfterShortestMatch2() throws CorruptIndexException,
+      LockObtainFailedException, IOException {
+    RAMDirectory directory = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(directory, new TestPayloadAnalyzer(),
+        IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+    doc.add(new Field("content", new StringReader("a b a d k f a h i k a k")));
+    writer.addDocument(doc);
+    writer.close();
+
+    IndexSearcher is = new IndexSearcher(directory);
+
+    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
+    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
+    SpanQuery[] sqs = { stq1, stq2 };
+    SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
+    PayloadSpans spans = snq.getPayloadSpans(is.getIndexReader());
+
+    TopDocs topDocs = is.search(snq, 1);
+    Set payloadSet = new HashSet();
+    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+      while (spans.next()) {
+        Collection payloads = spans.getPayload();
+        int cnt = 0;
+        for (Iterator it = payloads.iterator(); it.hasNext();) {
+          payloadSet.add(new String((byte[]) it.next()));
+        }
+      }
+    }
+    assertEquals(2, payloadSet.size());
+    assertTrue(payloadSet.contains("a:Noise:10"));
+    assertTrue(payloadSet.contains("k:Noise:11"));
+  }
+  
+  public void testShrinkToAfterShortestMatch3() throws CorruptIndexException,
+      LockObtainFailedException, IOException {
+    RAMDirectory directory = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(directory, new TestPayloadAnalyzer(),
+        IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+    doc.add(new Field("content", new StringReader("j k a l f k k p a t a k l k t a")));
+    writer.addDocument(doc);
+    writer.close();
+
+    IndexSearcher is = new IndexSearcher(directory);
+
+    SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
+    SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
+    SpanQuery[] sqs = { stq1, stq2 };
+    SpanNearQuery snq = new SpanNearQuery(sqs, 0, true);
+    PayloadSpans spans = snq.getPayloadSpans(is.getIndexReader());
+
+    TopDocs topDocs = is.search(snq, 1);
+    Set payloadSet = new HashSet();
+    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+      while (spans.next()) {
+        Collection payloads = spans.getPayload();
+
+        for (Iterator it = payloads.iterator(); it.hasNext();) {
+          payloadSet.add(new String((byte[]) it.next()));
+        }
+      }
+    }
+    assertEquals(2, payloadSet.size());
+    if(DEBUG) {
+      Iterator pit = payloadSet.iterator();
+      while (pit.hasNext()) {
+        System.out.println("match:" + pit.next());
+      }
+    }
+    assertTrue(payloadSet.contains("a:Noise:10"));
+    assertTrue(payloadSet.contains("k:Noise:11"));
+  }
+  
   private IndexSearcher getSearcher() throws Exception {
     RAMDirectory directory = new RAMDirectory();
     PayloadAnalyzer analyzer = new PayloadAnalyzer();
@@ -374,4 +487,13 @@
       return result;
     }
   }
-}
\ No newline at end of file
+  
+  public class TestPayloadAnalyzer extends Analyzer {
+
+    public TokenStream tokenStream(String fieldName, Reader reader) {
+      TokenStream result = new LowerCaseTokenizer(reader);
+      result = new PayloadFilter(result, fieldName);
+      return result;
+    }
+  }
+}