You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/26 22:54:33 UTC

svn commit: r1450464 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/highlighter/ lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/ lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/

Author: rmuir
Date: Tue Feb 26 21:54:33 2013
New Revision: 1450464

URL: http://svn.apache.org/r1450464
Log:
LUCENE-4804: PostingsHighlighter sometimes applies term to the wrong passage

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
    lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
    lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1450464&r1=1450463&r2=1450464&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Tue Feb 26 21:54:33 2013
@@ -258,6 +258,9 @@ Bug Fixes
 
 * LUCENE-4802: Don't compute norms for drill-down facet fields. (Mike McCandless)
 
+* LUCENE-4804: PostingsHighlighter sometimes applied terms to the wrong passage,
+  if they started exactly on a passage boundary.  (Robert Muir)
+
 Documentation
 
 * LUCENE-4718: Fixed documentation of oal.queryparser.classic.

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1450464&r1=1450463&r2=1450464&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Tue Feb 26 21:54:33 2013
@@ -378,7 +378,7 @@ public final class PostingsHighlighter {
         throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
       }
       int end = dp.endOffset();
-      if (start > current.endOffset) {
+      if (start >= current.endOffset) {
         if (current.startOffset >= 0) {
           // finalize current
           current.score *= scorer.norm(current.startOffset);

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1450464&r1=1450463&r2=1450464&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Tue Feb 26 21:54:33 2013
@@ -310,4 +310,34 @@ public class TestPostingsHighlighter ext
     ir.close();
     dir.close();
   }
+  
+  public void testCuriousGeorge() throws Exception {
+    String text = "It’s the formula for success for preschoolers—Curious George and fire trucks! " + 
+                  "Curious George and the Firefighters is a story based on H. A. and Margret Rey’s " +
+                  "popular primate and painted in the original watercolor and charcoal style. " + 
+                  "Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
+    Directory dir = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
+    FieldType positionsType = new FieldType(TextField.TYPE_STORED);
+    positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Field body = new Field("body", text, positionsType);
+    Document document = new Document();
+    document.add(body);
+    iw.addDocument(document);
+    IndexReader ir = iw.getReader();
+    iw.close();
+    IndexSearcher searcher = newSearcher(ir);
+    PhraseQuery query = new PhraseQuery();
+    query.add(new Term("body", "curious"));
+    query.add(new Term("body", "george"));
+    TopDocs topDocs = searcher.search(query, 10);
+    assertEquals(1, topDocs.totalHits);
+    PostingsHighlighter highlighter = new PostingsHighlighter();
+    String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
+    assertEquals(1, snippets.length);
+    assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
+    ir.close();
+    dir.close();
+  }
 }

Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java?rev=1450464&r1=1450463&r2=1450464&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java Tue Feb 26 21:54:33 2013
@@ -172,6 +172,8 @@ public class TestPostingsHighlighterRank
           assertEquals("body", term.field());
           int matchStart = p.getMatchStarts()[i];
           assertTrue(matchStart >= 0);
+          // must at least start within the passage
+          assertTrue(matchStart < p.getEndOffset());
           int matchEnd = p.getMatchEnds()[i];
           assertTrue(matchEnd >= 0);
           // always moving forward