You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/08/12 21:39:54 UTC
svn commit: r1513231 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/highlighter/
lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/
lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/
Author: rmuir
Date: Mon Aug 12 19:39:53 2013
New Revision: 1513231
URL: http://svn.apache.org/r1513231
Log:
LUCENE-5166: PostingsHighlighter fails with IndexOutOfBoundsException
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/ (props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1513231&r1=1513230&r2=1513231&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Mon Aug 12 19:39:53 2013
@@ -68,6 +68,10 @@ Bug Fixes
time we start the read loop (where we check the length) and when we actually do
the read. (gsingers, yonik, Robert Muir, Uwe Schindler)
+* LUCENE-5166: PostingsHighlighter would throw IOOBE if a term spanned the maxLength
+ boundary, made it into the top-N and went to the formatter.
+ (Manuel Amoabeng, Michael McCandless, Robert Muir)
+
API Changes
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1513231&r1=1513230&r2=1513231&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Mon Aug 12 19:39:53 2013
@@ -506,6 +506,13 @@ public class PostingsHighlighter {
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
}
int end = dp.endOffset();
+ // LUCENE-5166: this hit would span the content limit... however more valid
+ // hits may exist (they are sorted by start). so we pretend like we never
+ // saw this term, it won't cause a passage to be added to passageQueue or anything.
+ assert EMPTY.startOffset() == Integer.MAX_VALUE;
+ if (start < contentLength && end > contentLength) {
+ continue;
+ }
if (start >= current.endOffset) {
if (current.startOffset >= 0) {
// finalize current
Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1513231&r1=1513230&r2=1513231&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Mon Aug 12 19:39:53 2013
@@ -86,6 +86,81 @@ public class TestPostingsHighlighter ext
dir.close();
}
+ public void testFormatWithMatchExceedingContentLength() throws Exception {
+
+ int maxLength = 17;
+ String bodyText = "123 5678 01234 TEST";
+
+ final Analyzer analyzer = new MockAnalyzer(random());
+
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ iwc.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+ final FieldType fieldType = new FieldType(TextField.TYPE_STORED);
+ fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ final Field body = new Field("body", bodyText, fieldType);
+
+ Document doc = new Document();
+ doc.add(body);
+
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+
+ Query query = new TermQuery(new Term("body", "test"));
+
+ TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+
+ PostingsHighlighter highlighter = new PostingsHighlighter(maxLength);
+ String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
+
+
+ assertEquals(1, snippets.length);
+ // LUCENE-5166: no snippet
+ assertEquals("123 5678 01234 TE", snippets[0]);
+
+ ir.close();
+ dir.close();
+ }
+
+ // simple test highlighting last word.
+ public void testHighlightLastWord() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ iwc.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+ FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+ offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ Field body = new Field("body", "", offsetsType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ PostingsHighlighter highlighter = new PostingsHighlighter();
+ Query query = new TermQuery(new Term("body", "test"));
+ TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+ String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
+ assertEquals(1, snippets.length);
+ assertEquals("This is a <b>test</b>", snippets[0]);
+
+ ir.close();
+ dir.close();
+ }
+
// simple test with one sentence documents.
public void testOneSentence() throws Exception {
Directory dir = newDirectory();
Modified: lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java?rev=1513231&r1=1513230&r2=1513231&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (original)
+++ lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java Mon Aug 12 19:39:53 2013
@@ -173,6 +173,8 @@ public class TestPostingsHighlighterRank
assertTrue(p.getNumMatches() > 0);
assertTrue(p.getStartOffset() >= 0);
assertTrue(p.getStartOffset() <= content.length());
+ assertTrue(p.getEndOffset() >= p.getStartOffset());
+ assertTrue(p.getEndOffset() <= content.length());
// we use a very simple analyzer. so we can assert the matches are correct
int lastMatchStart = -1;
for (int i = 0; i < p.getNumMatches(); i++) {