You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/26 16:10:20 UTC
svn commit: r1450206 - in /lucene/dev/trunk/lucene: ./
highlighter/src/java/org/apache/lucene/search/postingshighlight/
highlighter/src/test/org/apache/lucene/search/postingshighlight/
Author: rmuir
Date: Tue Feb 26 15:10:20 2013
New Revision: 1450206
URL: http://svn.apache.org/r1450206
Log:
LUCENE-4798: PostingsHighlighter's formatter sometimes doesnt highlight matched terms
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1450206&r1=1450205&r2=1450206&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Feb 26 15:10:20 2013
@@ -264,6 +264,8 @@ Bug Fixes
large performance impacts for many non-random or non-uniform
term distributions. (John Wang, yonik)
+* LUCENE-4798: PostingsHighlighter's formatter sometimes didn't highlight
+ matched terms. (Robert Muir)
Documentation
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java?rev=1450206&r1=1450205&r2=1450206&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/Passage.java Tue Feb 26 15:10:20 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.search.posting
import org.apache.lucene.index.Term;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.SorterTemplate;
/**
* Represents a passage (typically a sentence of the document).
@@ -53,6 +54,45 @@ public final class Passage {
numMatches++;
}
+ void sort() {
+ final int starts[] = matchStarts;
+ final int ends[] = matchEnds;
+ final Term terms[] = matchTerms;
+ new SorterTemplate() {
+ @Override
+ protected void swap(int i, int j) {
+ int temp = starts[i];
+ starts[i] = starts[j];
+ starts[j] = temp;
+
+ temp = ends[i];
+ ends[i] = ends[j];
+ ends[j] = temp;
+
+ Term tempTerm = terms[i];
+ terms[i] = terms[j];
+ terms[j] = tempTerm;
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ return Integer.compare(starts[i], starts[j]);
+ }
+
+ @Override
+ protected void setPivot(int i) {
+ pivot = starts[i];
+ }
+
+ @Override
+ protected int comparePivot(int j) {
+ return Integer.compare(pivot, starts[j]);
+ }
+
+ int pivot;
+ }.mergeSort(0, numMatches-1);
+ }
+
void reset() {
startOffset = endOffset = -1;
score = 0.0f;
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1450206&r1=1450205&r2=1450206&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Tue Feb 26 15:10:20 2013
@@ -399,6 +399,9 @@ public final class PostingsHighlighter {
if (start >= contentLength) {
Passage passages[] = new Passage[passageQueue.size()];
passageQueue.toArray(passages);
+ for (Passage p : passages) {
+ p.sort();
+ }
// sort in ascending order
Arrays.sort(passages, new Comparator<Passage>() {
@Override
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1450206&r1=1450205&r2=1450206&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Tue Feb 26 15:10:20 2013
@@ -19,6 +19,7 @@ package org.apache.lucene.search.posting
import java.util.Map;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
@@ -34,6 +35,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
@@ -272,4 +274,40 @@ public class TestPostingsHighlighter ext
ir.close();
dir.close();
}
+
+ public void testBuddhism() throws Exception {
+ String text = "This eight-volume set brings together seminal papers in Buddhist studies from a vast " +
+ "range of academic disciplines published over the last forty years. With a new introduction " +
+ "by the editor, this collection is a unique and unrivalled research resource for both " +
+ "student and scholar. Coverage includes: - Buddhist origins; early history of Buddhism in " +
+ "South and Southeast Asia - early Buddhist Schools and Doctrinal History; Theravada Doctrine " +
+ "- the Origins and nature of Mahayana Buddhism; some Mahayana religious topics - Abhidharma " +
+ "and Madhyamaka - Yogacara, the Epistemological tradition, and Tathagatagarbha - Tantric " +
+ "Buddhism (Including China and Japan); Buddhism in Nepal and Tibet - Buddhism in South and " +
+ "Southeast Asia, and - Buddhism in China, East Asia, and Japan.";
+ Directory dir = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
+
+ FieldType positionsType = new FieldType(TextField.TYPE_STORED);
+ positionsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ Field body = new Field("body", text, positionsType);
+ Document document = new Document();
+ document.add(body);
+ iw.addDocument(document);
+ IndexReader ir = iw.getReader();
+ iw.close();
+ IndexSearcher searcher = newSearcher(ir);
+ PhraseQuery query = new PhraseQuery();
+ query.add(new Term("body", "buddhist"));
+ query.add(new Term("body", "origins"));
+ TopDocs topDocs = searcher.search(query, 10);
+ assertEquals(1, topDocs.totalHits);
+ PostingsHighlighter highlighter = new PostingsHighlighter();
+ String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
+ assertEquals(1, snippets.length);
+ assertTrue(snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
+ ir.close();
+ dir.close();
+ }
}
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java?rev=1450206&r1=1450205&r2=1450206&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java Tue Feb 26 15:10:20 2013
@@ -166,6 +166,7 @@ public class TestPostingsHighlighterRank
assertTrue(p.getStartOffset() >= 0);
assertTrue(p.getStartOffset() <= content.length());
// we use a very simple analyzer. so we can assert the matches are correct
+ int lastMatchStart = -1;
for (int i = 0; i < p.getNumMatches(); i++) {
Term term = p.getMatchTerms()[i];
assertEquals("body", term.field());
@@ -173,6 +174,9 @@ public class TestPostingsHighlighterRank
assertTrue(matchStart >= 0);
int matchEnd = p.getMatchEnds()[i];
assertTrue(matchEnd >= 0);
+ // always moving forward
+ assertTrue(matchStart >= lastMatchStart);
+ lastMatchStart = matchStart;
// single character terms
assertEquals(matchStart+1, matchEnd);
// and the offsets must be correct...