You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/26 22:54:33 UTC
svn commit: r1450464 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/highlighter/
lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/
lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/
Author: rmuir
Date: Tue Feb 26 21:54:33 2013
New Revision: 1450464
URL: http://svn.apache.org/r1450464
Log:
LUCENE-4804: PostingsHighlighter sometimes applies term to the wrong passage
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/ (props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1450464&r1=1450463&r2=1450464&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Tue Feb 26 21:54:33 2013
@@ -258,6 +258,9 @@ Bug Fixes
* LUCENE-4802: Don't compute norms for drill-down facet fields. (Mike McCandless)
+* LUCENE-4804: PostingsHighlighter sometimes applied terms to the wrong passage,
+ if they started exactly on a passage boundary. (Robert Muir)
+
Documentation
* LUCENE-4718: Fixed documentation of oal.queryparser.classic.
Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1450464&r1=1450463&r2=1450464&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Tue Feb 26 21:54:33 2013
@@ -378,7 +378,7 @@ public final class PostingsHighlighter {
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
}
int end = dp.endOffset();
- if (start > current.endOffset) {
+ if (start >= current.endOffset) {
if (current.startOffset >= 0) {
// finalize current
current.score *= scorer.norm(current.startOffset);
Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1450464&r1=1450463&r2=1450464&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Tue Feb 26 21:54:33 2013
@@ -310,4 +310,34 @@ public class TestPostingsHighlighter ext
ir.close();
dir.close();
}
+
+ public void testCuriousGeorge() throws Exception {
+ String text = "Itâs the formula for success for preschoolersâCurious George and fire trucks! " +
+ "Curious George and the Firefighters is a story based on H. A. and Margret Reyâs " +
+ "popular primate and painted in the original watercolor and charcoal style. " +
+ "Firefighters are a famously brave lot, but can they withstand a visit from one curious monkey?";
+ Directory dir = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
+ FieldType positionsType = new FieldType(TextField.TYPE_STORED);
+ positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ Field body = new Field("body", text, positionsType);
+ Document document = new Document();
+ document.add(body);
+ iw.addDocument(document);
+ IndexReader ir = iw.getReader();
+ iw.close();
+ IndexSearcher searcher = newSearcher(ir);
+ PhraseQuery query = new PhraseQuery();
+ query.add(new Term("body", "curious"));
+ query.add(new Term("body", "george"));
+ TopDocs topDocs = searcher.search(query, 10);
+ assertEquals(1, topDocs.totalHits);
+ PostingsHighlighter highlighter = new PostingsHighlighter();
+ String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
+ assertEquals(1, snippets.length);
+ assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
+ ir.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java?rev=1450464&r1=1450463&r2=1450464&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java Tue Feb 26 21:54:33 2013
@@ -172,6 +172,8 @@ public class TestPostingsHighlighterRank
assertEquals("body", term.field());
int matchStart = p.getMatchStarts()[i];
assertTrue(matchStart >= 0);
+ // must at least start within the passage
+ assertTrue(matchStart < p.getEndOffset());
int matchEnd = p.getMatchEnds()[i];
assertTrue(matchEnd >= 0);
// always moving forward