You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/03 03:52:28 UTC
svn commit: r1428147 - in /lucene/dev/trunk/lucene/sandbox/src:
java/org/apache/lucene/sandbox/postingshighlight/
test/org/apache/lucene/sandbox/postingshighlight/
Author: rmuir
Date: Thu Jan 3 02:52:27 2013
New Revision: 1428147
URL: http://svn.apache.org/viewvc?rev=1428147&view=rev
Log:
LUCENE-4652: highlight multiple fields with postings highlighter
Modified:
lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/Passage.java
lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageFormatter.java
lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java
lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java
lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/sandbox/postingshighlight/TestPostingsHighlighter.java
lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/sandbox/postingshighlight/TestPostingsHighlighterRanking.java
Modified: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/Passage.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/Passage.java?rev=1428147&r1=1428146&r2=1428147&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/Passage.java (original)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/Passage.java Thu Jan 3 02:52:27 2013
@@ -61,6 +61,8 @@ public final class Passage {
/**
* Start offset of this passage.
+ * @return start index (inclusive) of the passage in the
+ * original content: always >= 0.
*/
public int getStartOffset() {
return startOffset;
@@ -68,6 +70,8 @@ public final class Passage {
/**
* End offset of this passage.
+ * @return end index (exclusive) of the passage in the
+ * original content: always >= {@link #getStartOffset()}
*/
public int getEndOffset() {
return endOffset;
@@ -91,6 +95,7 @@ public final class Passage {
/**
* Start offsets of the term matches, in increasing order.
+ * <p>
* Only {@link #getNumMatches} are valid. Note that these
* offsets are absolute (not relative to {@link #getStartOffset()}).
*/
@@ -99,19 +104,20 @@ public final class Passage {
}
/**
- * End offsets of the term matches, corresponding with
- * {@link #getMatchStarts}. Note that its possible that
- * an end offset could exceed beyond the bounds of the passage
- * ({@link #getEndOffset()}), if the Analyzer produced a term
- * which spans a passage boundary.
+ * End offsets of the term matches, corresponding with {@link #getMatchStarts}.
+ * <p>
+ * Only {@link #getNumMatches} are valid. Note that its possible that an end offset
+ * could exceed beyond the bounds of the passage ({@link #getEndOffset()}), if the
+ * Analyzer produced a term which spans a passage boundary.
*/
public int[] getMatchEnds() {
return matchEnds;
}
/**
- * Term of the matches, corresponding with
- * {@link #getMatchStarts()}.
+ * Term of the matches, corresponding with {@link #getMatchStarts()}.
+ * <p>
+ * Only {@link #getNumMatches()} are valid.
*/
public Term[] getMatchTerms() {
return matchTerms;
Modified: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageFormatter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageFormatter.java?rev=1428147&r1=1428146&r2=1428147&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageFormatter.java (original)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageFormatter.java Thu Jan 3 02:52:27 2013
@@ -18,7 +18,7 @@ package org.apache.lucene.sandbox.postin
*/
/**
- * Constructs a formatted passage.
+ * Creates a formatted snippet from the top passages.
* <p>
* The default implementation marks the query terms as bold, and places
* ellipses between unconnected passages.
@@ -26,6 +26,12 @@ package org.apache.lucene.sandbox.postin
*/
public class PassageFormatter {
/**
+ * Formats the top <code>passages</code> from <code>content</code>
+ * into a human-readable text snippet.
+ *
+ * @param passages top-N passages for the field. Note these are sorted in
+ * the order that they appear in the document for convenience.
+ * @param content content for the field.
* @return formatted highlight
*/
public String format(Passage passages[], String content) {
Modified: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java?rev=1428147&r1=1428146&r2=1428147&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java (original)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java Thu Jan 3 02:52:27 2013
@@ -18,7 +18,7 @@ package org.apache.lucene.sandbox.postin
*/
/**
- * Used for ranking passages.
+ * Ranks passages found by {@link PostingsHighlighter}.
* <p>
* Each passage is scored as a miniature document within the document.
* The final score is computed as {@link #norm} * ∑ ({@link #weight} * {@link #tf}).
Modified: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java?rev=1428147&r1=1428146&r2=1428147&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java Thu Jan 3 02:52:27 2013
@@ -62,19 +62,15 @@ import org.apache.lucene.util.UnicodeUti
* Field body = new Field("body", "foobar", offsetsType);
*
* // retrieve highlights at query time
- * PostingsHighlighter highlighter = new PostingsHighlighter("body");
+ * PostingsHighlighter highlighter = new PostingsHighlighter();
* Query query = new TermQuery(new Term("body", "highlighting"));
* TopDocs topDocs = searcher.search(query, n);
- * String highlights[] = highlighter.highlight(query, searcher, topDocs);
+ * String highlights[] = highlighter.highlight("body", query, searcher, topDocs);
* </pre>
* @lucene.experimental
*/
public final class PostingsHighlighter {
- // TODO: support highlighting multiple fields at once? someone is bound
- // to try to use this in a slow way (invoking over and over for each field), which
- // would be horrible.
-
// TODO: maybe allow re-analysis for tiny fields? currently we require offsets,
// but if the analyzer is really fast and the field is tiny, this might really be
// unnecessary.
@@ -86,25 +82,37 @@ public final class PostingsHighlighter {
* closer to the beginning of the document better summarize its content */
public static final int DEFAULT_MAX_LENGTH = 10000;
- private final String field;
- private final Term floor;
- private final Term ceiling;
private final int maxLength;
private final BreakIterator breakIterator;
private final PassageScorer scorer;
private final PassageFormatter formatter;
- public PostingsHighlighter(String field) {
- this(field, DEFAULT_MAX_LENGTH);
+ /**
+ * Creates a new highlighter with default parameters.
+ */
+ public PostingsHighlighter() {
+ this(DEFAULT_MAX_LENGTH);
}
- public PostingsHighlighter(String field, int maxLength) {
- this(field, maxLength, BreakIterator.getSentenceInstance(Locale.ROOT), new PassageScorer(), new PassageFormatter());
+ /**
+ * Creates a new highlighter, specifying maximum content length.
+ * @param maxLength maximum content size to process.
+ * @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
+ */
+ public PostingsHighlighter(int maxLength) {
+ this(maxLength, BreakIterator.getSentenceInstance(Locale.ROOT), new PassageScorer(), new PassageFormatter());
}
- public PostingsHighlighter(String field, int maxLength, BreakIterator breakIterator, PassageScorer scorer, PassageFormatter formatter) {
- this.field = field;
- if (maxLength == Integer.MAX_VALUE) {
+ /**
+ * Creates a new highlighter with custom parameters.
+ * @param maxLength maximum content size to process.
+ * @param breakIterator used for finding passage boundaries.
+ * @param scorer used for ranking passages.
+ * @param formatter used for formatting passages into highlighted snippets.
+ * @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
+ */
+ public PostingsHighlighter(int maxLength, BreakIterator breakIterator, PassageScorer scorer, PassageFormatter formatter) {
+ if (maxLength < 0 || maxLength == Integer.MAX_VALUE) {
// two reasons: no overflow problems in BreakIterator.preceding(offset+1),
// our sentinel in the offsets queue uses this value to terminate.
throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
@@ -113,26 +121,107 @@ public final class PostingsHighlighter {
this.breakIterator = breakIterator;
this.scorer = scorer;
this.formatter = formatter;
- floor = new Term(field, "");
- ceiling = new Term(field, UnicodeUtil.BIG_TERM);
}
/**
- * Calls {@link #highlight(Query, IndexSearcher, TopDocs, int) highlight(query, searcher, topDocs, 1)}
+ * Highlights the top passages from a single field.
+ *
+ * @param field field name to highlight.
+ * Must have a stored string value and also be indexed with offsets.
+ * @param query query to highlight.
+ * @param searcher searcher that was previously used to execute the query.
+ * @param topDocs TopDocs containing the summary result documents to highlight.
+ * @return Array of formatted snippets corresponding to the documents in <code>topDocs</code>.
+ * If no highlights were found for a document, its value is <code>null</code>.
+ * @throws IOException if an I/O error occurred during processing
+ * @throws IllegalArgumentException if <code>field</code> was indexed without
+ * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
+ */
+ public String[] highlight(String field, Query query, IndexSearcher searcher, TopDocs topDocs) throws IOException {
+ return highlight(field, query, searcher, topDocs, 1);
+ }
+
+ /**
+ * Highlights the top-N passages from a single field.
+ *
+ * @param field field name to highlight.
+ * Must have a stored string value and also be indexed with offsets.
+ * @param query query to highlight.
+ * @param searcher searcher that was previously used to execute the query.
+ * @param topDocs TopDocs containing the summary result documents to highlight.
+ * @param maxPassages The maximum number of top-N ranked passages used to
+ * form the highlighted snippets.
+ * @return Array of formatted snippets corresponding to the documents in <code>topDocs</code>.
+ * If no highlights were found for a document, its value is <code>null</code>.
+ * @throws IOException if an I/O error occurred during processing
+ * @throws IllegalArgumentException if <code>field</code> was indexed without
+ * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
+ */
+ public String[] highlight(String field, Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages) throws IOException {
+ Map<String,String[]> res = highlightFields(new String[] { field }, query, searcher, topDocs, maxPassages);
+ return res.get(field);
+ }
+
+ /**
+ * Highlights the top passages from multiple fields.
+ * <p>
+ * Conceptually, this behaves as a more efficent form of:
+ * <pre class="prettyprint">
+ * Map m = new HashMap();
+ * for (String field : fields) {
+ * m.put(field, highlight(field, query, searcher, topDocs));
+ * }
+ * return m;
+ * </pre>
+ *
+ * @param fields field names to highlight.
+ * Must have a stored string value and also be indexed with offsets.
+ * @param query query to highlight.
+ * @param searcher searcher that was previously used to execute the query.
+ * @param topDocs TopDocs containing the summary result documents to highlight.
+ * @return Map keyed on field name, containing the array of formatted snippets
+ * corresponding to the documents in <code>topDocs</code>.
+ * If no highlights were found for a document, its value is <code>null</code>.
+ * @throws IOException if an I/O error occurred during processing
+ * @throws IllegalArgumentException if <code>field</code> was indexed without
+ * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
*/
- public String[] highlight(Query query, IndexSearcher searcher, TopDocs topDocs) throws IOException {
- return highlight(query, searcher, topDocs, 1);
+ public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs) throws IOException {
+ return highlightFields(fields, query, searcher, topDocs, 1);
}
- public String[] highlight(Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages) throws IOException {
+ /**
+ * Highlights the top-N passages from multiple fields.
+ * <p>
+ * Conceptually, this behaves as a more efficient form of:
+ * <pre class="prettyprint">
+ * Map m = new HashMap();
+ * for (String field : fields) {
+ * m.put(field, highlight(field, query, searcher, topDocs, maxPassages));
+ * }
+ * return m;
+ * </pre>
+ *
+ * @param fields field names to highlight.
+ * Must have a stored string value and also be indexed with offsets.
+ * @param query query to highlight.
+ * @param searcher searcher that was previously used to execute the query.
+ * @param topDocs TopDocs containing the summary result documents to highlight.
+ * @param maxPassages The maximum number of top-N ranked passages per-field used to
+ * form the highlighted snippets.
+ * @return Map keyed on field name, containing the array of formatted snippets
+ * corresponding to the documents in <code>topDocs</code>.
+ * If no highlights were found for a document, its value is <code>null</code>.
+ * @throws IOException if an I/O error occurred during processing
+ * @throws IllegalArgumentException if <code>field</code> was indexed without
+ * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
+ */
+ public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages) throws IOException {
final IndexReader reader = searcher.getIndexReader();
final ScoreDoc scoreDocs[] = topDocs.scoreDocs;
query = rewrite(query);
- SortedSet<Term> terms = new TreeSet<Term>();
- query.extractTerms(terms);
- terms = terms.subSet(floor, ceiling);
- Term termTexts[] = terms.toArray(new Term[terms.size()]);
- // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords)
+ SortedSet<Term> queryTerms = new TreeSet<Term>();
+ query.extractTerms(queryTerms);
int docids[] = new int[scoreDocs.length];
for (int i = 0; i < docids.length; i++) {
@@ -140,21 +229,44 @@ public final class PostingsHighlighter {
}
IndexReaderContext readerContext = reader.getContext();
List<AtomicReaderContext> leaves = readerContext.leaves();
-
+
+ BreakIterator bi = (BreakIterator)breakIterator.clone();
+
// sort for sequential io
Arrays.sort(docids);
+ Arrays.sort(fields);
- // pull stored data
- LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(field, maxLength);
- String contents[] = new String[docids.length];
- for (int i = 0; i < contents.length; i++) {
+ // pull stored data:
+ LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, maxLength);
+ String contents[][] = new String[fields.length][docids.length];
+ for (int i = 0; i < docids.length; i++) {
reader.document(docids[i], visitor);
- contents[i] = visitor.getValue();
+ for (int j = 0; j < fields.length; j++) {
+ contents[j][i] = visitor.getValue(j).toString();
+ }
visitor.reset();
}
- BreakIterator bi = (BreakIterator)breakIterator.clone();
+ Map<String,String[]> highlights = new HashMap<String,String[]>();
+ for (int i = 0; i < fields.length; i++) {
+ String field = fields[i];
+ Term floor = new Term(field, "");
+ Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
+ SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling);
+ // TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords)
+ Term terms[] = fieldTerms.toArray(new Term[fieldTerms.size()]);
+ Map<Integer,String> fieldHighlights = highlightField(field, contents[i], bi, terms, docids, leaves, maxPassages);
+
+ String[] result = new String[scoreDocs.length];
+ for (int j = 0; j < scoreDocs.length; j++) {
+ result[j] = fieldHighlights.get(scoreDocs[j].doc);
+ }
+ highlights.put(field, result);
+ }
+ return highlights;
+ }
+ private Map<Integer,String> highlightField(String field, String contents[], BreakIterator bi, Term terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {
Map<Integer,String> highlights = new HashMap<Integer,String>();
// reuse in the real sense... for docs in same segment we just advance our old enum
@@ -178,9 +290,9 @@ public final class PostingsHighlighter {
}
if (leaf != lastLeaf) {
termsEnum = t.iterator(null);
- postings = new DocsAndPositionsEnum[terms.size()];
+ postings = new DocsAndPositionsEnum[terms.length];
}
- Passage passages[] = highlightDoc(termTexts, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
+ Passage passages[] = highlightDoc(field, terms, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
if (passages.length > 0) {
// otherwise a null snippet
highlights.put(doc, formatter.format(passages, content));
@@ -188,17 +300,13 @@ public final class PostingsHighlighter {
lastLeaf = leaf;
}
- String[] result = new String[scoreDocs.length];
- for (int i = 0; i < scoreDocs.length; i++) {
- result[i] = highlights.get(scoreDocs[i].doc);
- }
- return result;
+ return highlights;
}
// algorithm: treat sentence snippets as miniature documents
// we can intersect these with the postings lists via BreakIterator.preceding(offset),s
// score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
- private Passage[] highlightDoc(Term terms[], int contentLength, BreakIterator bi, int doc,
+ private Passage[] highlightDoc(String field, Term terms[], int contentLength, BreakIterator bi, int doc,
TermsEnum termsEnum, DocsAndPositionsEnum[] postings, int n) throws IOException {
PriorityQueue<OffsetsEnum> pq = new PriorityQueue<OffsetsEnum>();
float weights[] = new float[terms.length];
@@ -381,17 +489,24 @@ public final class PostingsHighlighter {
}
private static class LimitedStoredFieldVisitor extends StoredFieldVisitor {
- private final String field;
+ private final String fields[];
private final int maxLength;
- private final StringBuilder builder = new StringBuilder();
+ private final StringBuilder builders[];
+ private int currentField = -1;
- public LimitedStoredFieldVisitor(String field, int maxLength) {
- this.field = field;
+ public LimitedStoredFieldVisitor(String fields[], int maxLength) {
+ this.fields = fields;
this.maxLength = maxLength;
+ builders = new StringBuilder[fields.length];
+ for (int i = 0; i < builders.length; i++) {
+ builders[i] = new StringBuilder();
+ }
}
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
+ assert currentField >= 0;
+ StringBuilder builder = builders[currentField];
if (builder.length() > 0) {
builder.append(' '); // for the offset gap, TODO: make this configurable
}
@@ -404,22 +519,24 @@ public final class PostingsHighlighter {
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
- if (fieldInfo.name.equals(field)) {
- if (builder.length() > maxLength) {
- return Status.STOP;
- }
- return Status.YES;
- } else {
+ currentField = Arrays.binarySearch(fields, fieldInfo.name);
+ if (currentField < 0) {
return Status.NO;
+ } else if (builders[currentField].length() > maxLength) {
+ return fields.length == 1 ? Status.STOP : Status.NO;
}
+ return Status.YES;
}
- String getValue() {
- return builder.toString();
+ String getValue(int i) {
+ return builders[i].toString();
}
void reset() {
- builder.setLength(0);
+ currentField = -1;
+ for (int i = 0; i < fields.length; i++) {
+ builders[i].setLength(0);
+ }
}
}
}
Modified: lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/sandbox/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/sandbox/postingshighlight/TestPostingsHighlighter.java?rev=1428147&r1=1428146&r2=1428147&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/sandbox/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/sandbox/postingshighlight/TestPostingsHighlighter.java Thu Jan 3 02:52:27 2013
@@ -17,6 +17,8 @@ package org.apache.lucene.sandbox.postin
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
@@ -63,11 +65,11 @@ public class TestPostingsHighlighter ext
iw.close();
IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter("body");
+ PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
- String snippets[] = highlighter.highlight(query, searcher, topDocs);
+ String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
assertEquals(2, snippets.length);
assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
@@ -99,11 +101,11 @@ public class TestPostingsHighlighter ext
iw.close();
IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter("body");
+ PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
- String snippets[] = highlighter.highlight(query, searcher, topDocs);
+ String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
@@ -112,6 +114,47 @@ public class TestPostingsHighlighter ext
dir.close();
}
+ public void testMultipleFields() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
+ iwc.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+ FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+ offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ Field body = new Field("body", "", offsetsType);
+ Field title = new Field("title", "", offsetsType);
+ Document doc = new Document();
+ doc.add(body);
+ doc.add(title);
+
+ body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+ title.setStringValue("I am hoping for the best.");
+ iw.addDocument(doc);
+ body.setStringValue("Highlighting the first term. Hope it works.");
+ title.setStringValue("But best may not be good enough.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ PostingsHighlighter highlighter = new PostingsHighlighter();
+ BooleanQuery query = new BooleanQuery();
+ query.add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD);
+ query.add(new TermQuery(new Term("title", "best")), BooleanClause.Occur.SHOULD);
+ TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+ assertEquals(2, topDocs.totalHits);
+ Map<String,String[]> snippets = highlighter.highlightFields(new String [] { "body", "title" }, query, searcher, topDocs);
+ assertEquals(2, snippets.size());
+ assertEquals("Just a test <b>highlighting</b> from postings. ", snippets.get("body")[0]);
+ assertEquals("<b>Highlighting</b> the first term. ", snippets.get("body")[1]);
+ assertEquals("I am hoping for the <b>best</b>.", snippets.get("title")[0]);
+ assertEquals("But <b>best</b> may not be good enough.", snippets.get("title")[1]);
+ ir.close();
+ dir.close();
+ }
+
public void testMultipleTerms() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
@@ -133,14 +176,14 @@ public class TestPostingsHighlighter ext
iw.close();
IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter("body");
+ PostingsHighlighter highlighter = new PostingsHighlighter();
BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD);
query.add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD);
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
- String snippets[] = highlighter.highlight(query, searcher, topDocs);
+ String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
assertEquals(2, snippets.length);
assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
@@ -170,11 +213,11 @@ public class TestPostingsHighlighter ext
iw.close();
IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter("body");
+ PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
- String snippets[] = highlighter.highlight(query, searcher, topDocs, 2);
+ String snippets[] = highlighter.highlight("body", query, searcher, topDocs, 2);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>. Just a <b>test</b> highlighting from postings. ", snippets[0]);
assertEquals("This <b>test</b> is another <b>test</b>. ... <b>Test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[1]);
@@ -204,12 +247,12 @@ public class TestPostingsHighlighter ext
iw.close();
IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter("body");
+ PostingsHighlighter highlighter = new PostingsHighlighter();
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits);
try {
- highlighter.highlight(query, searcher, topDocs, 2);
+ highlighter.highlight("body", query, searcher, topDocs, 2);
fail("did not hit expected exception");
} catch (IllegalArgumentException iae) {
// expected
Modified: lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/sandbox/postingshighlight/TestPostingsHighlighterRanking.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/sandbox/postingshighlight/TestPostingsHighlighterRanking.java?rev=1428147&r1=1428146&r2=1428147&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/sandbox/postingshighlight/TestPostingsHighlighterRanking.java (original)
+++ lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/sandbox/postingshighlight/TestPostingsHighlighterRanking.java Thu Jan 3 02:52:27 2013
@@ -109,14 +109,12 @@ public class TestPostingsHighlighterRank
private void checkQuery(IndexSearcher is, Query query, int doc, int maxTopN) throws IOException {
for (int n = 1; n < maxTopN; n++) {
FakePassageFormatter f1 = new FakePassageFormatter();
- PostingsHighlighter p1 = new PostingsHighlighter("body",
- Integer.MAX_VALUE-1,
+ PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1,
BreakIterator.getSentenceInstance(Locale.ROOT),
new PassageScorer(),
f1);
FakePassageFormatter f2 = new FakePassageFormatter();
- PostingsHighlighter p2 = new PostingsHighlighter("body",
- Integer.MAX_VALUE-1,
+ PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1,
BreakIterator.getSentenceInstance(Locale.ROOT),
new PassageScorer(),
f2);
@@ -124,8 +122,8 @@ public class TestPostingsHighlighterRank
bq.add(query, BooleanClause.Occur.MUST);
bq.add(new TermQuery(new Term("id", Integer.toString(doc))), BooleanClause.Occur.MUST);
TopDocs td = is.search(bq, 1);
- p1.highlight(bq, is, td, n);
- p2.highlight(bq, is, td, n+1);
+ p1.highlight("body", bq, is, td, n);
+ p2.highlight("body", bq, is, td, n+1);
assertTrue(f2.seen.containsAll(f1.seen));
}
}