You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/09/12 17:34:56 UTC
svn commit: r1522619 - in /lucene/dev/trunk/lucene: ./
highlighter/src/java/org/apache/lucene/search/postingshighlight/
highlighter/src/test/org/apache/lucene/search/postingshighlight/
Author: mikemccand
Date: Thu Sep 12 15:34:55 2013
New Revision: 1522619
URL: http://svn.apache.org/r1522619
Log:
LUCENE-4906: PostingsHighlighter: add expert API to render highlights to Object
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1522619&r1=1522618&r2=1522619&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Sep 12 15:34:55 2013
@@ -47,6 +47,11 @@ Optimizations
======================= Lucene 4.6.0 =======================
+New Features
+
+* LUCENE-4906: PostingsHighlighter can now render to custom Object,
+ for advanced use cases where String is too restrictive (Luca
+ Cavanna, Robert Muir, Mike McCandless)
======================= Lucene 4.5.0 =======================
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java?rev=1522619&r1=1522618&r2=1522619&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java Thu Sep 12 15:34:55 2013
@@ -31,8 +31,11 @@ public abstract class PassageFormatter {
* @param passages top-N passages for the field. Note these are sorted in
* the order that they appear in the document for convenience.
* @param content content for the field.
- * @return formatted highlight
+ * @return formatted highlight. Note that for the
+ * non-expert APIs in {@link PostingsHighlighter} that
+ * return String, the toString method on the Object
+ * returned by this method is used to compute the string.
*/
- public abstract String format(Passage passages[], String content);
+ public abstract Object format(Passage passages[], String content);
}
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1522619&r1=1522618&r2=1522619&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Thu Sep 12 15:34:55 2013
@@ -267,7 +267,7 @@ public class PostingsHighlighter {
return highlightFields(fields, query, searcher, docids, maxPassages);
}
-
+
/**
* Highlights the top-N passages from multiple fields,
* for the provided int[] docids.
@@ -280,7 +280,7 @@ public class PostingsHighlighter {
* @param maxPassagesIn The maximum number of top-N ranked passages per-field used to
* form the highlighted snippets.
* @return Map keyed on field name, containing the array of formatted snippets
- * corresponding to the documents in <code>topDocs</code>.
+ * corresponding to the documents in <code>docidsIn</code>.
* If no highlights were found for a document, the
* first {@code maxPassages} from the field will
* be returned.
@@ -289,6 +289,45 @@ public class PostingsHighlighter {
* {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
*/
public Map<String,String[]> highlightFields(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
+ Map<String,String[]> snippets = new HashMap<String,String[]>();
+ for(Map.Entry<String,Object[]> ent : highlightFieldsAsObjects(fieldsIn, query, searcher, docidsIn, maxPassagesIn).entrySet()) {
+ Object[] snippetObjects = ent.getValue();
+ String[] snippetStrings = new String[snippetObjects.length];
+ snippets.put(ent.getKey(), snippetStrings);
+ for(int i=0;i<snippetObjects.length;i++) {
+ Object snippet = snippetObjects[i];
+ if (snippet != null) {
+ snippetStrings[i] = snippet.toString();
+ }
+ }
+ }
+
+ return snippets;
+ }
+
+ /**
+ * Expert: highlights the top-N passages from multiple fields,
+ * for the provided int[] docids, to custom Object as
+ * returned by the {@link PassageFormatter}. Use
+ * this API to render to something other than String.
+ *
+ * @param fieldsIn field names to highlight.
+ * Must have a stored string value and also be indexed with offsets.
+ * @param query query to highlight.
+ * @param searcher searcher that was previously used to execute the query.
+ * @param docidsIn containing the document IDs to highlight.
+ * @param maxPassagesIn The maximum number of top-N ranked passages per-field used to
+ * form the highlighted snippets.
+ * @return Map keyed on field name, containing the array of formatted snippets
+ * corresponding to the documents in <code>docidsIn</code>.
+ * If no highlights were found for a document, the
+ * first {@code maxPassages} from the field will
+ * be returned.
+ * @throws IOException if an I/O error occurred during processing
+ * @throws IllegalArgumentException if <code>field</code> was indexed without
+ * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
+ */
+ public Map<String,Object[]> highlightFieldsAsObjects(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
if (fieldsIn.length < 1) {
throw new IllegalArgumentException("fieldsIn must not be empty");
}
@@ -335,7 +374,7 @@ public class PostingsHighlighter {
// pull stored data:
String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
- Map<String,String[]> highlights = new HashMap<String,String[]>();
+ Map<String,Object[]> highlights = new HashMap<String,Object[]>();
for (int i = 0; i < fields.length; i++) {
String field = fields[i];
int numPassages = maxPassages[i];
@@ -350,9 +389,9 @@ public class PostingsHighlighter {
for(Term term : fieldTerms) {
terms[termUpto++] = term.bytes();
}
- Map<Integer,String> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, numPassages);
+ Map<Integer,Object> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, numPassages);
- String[] result = new String[docids.length];
+ Object[] result = new Object[docids.length];
for (int j = 0; j < docidsIn.length; j++) {
result[j] = fieldHighlights.get(docidsIn[j]);
}
@@ -394,8 +433,8 @@ public class PostingsHighlighter {
return ' ';
}
- private Map<Integer,String> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {
- Map<Integer,String> highlights = new HashMap<Integer,String>();
+ private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {
+ Map<Integer,Object> highlights = new HashMap<Integer,Object>();
// reuse in the real sense... for docs in same segment we just advance our old enum
DocsAndPositionsEnum postings[] = null;
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1522619&r1=1522618&r2=1522619&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Thu Sep 12 15:34:55 2013
@@ -21,6 +21,7 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.BreakIterator;
+import java.util.Arrays;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
@@ -47,8 +48,8 @@ import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util.LuceneTestCase;
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
public class TestPostingsHighlighter extends LuceneTestCase {
@@ -1068,4 +1069,54 @@ public class TestPostingsHighlighter ext
ir.close();
dir.close();
}
+
+ // LUCENE-4906
+ public void testObjectFormatter() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ iwc.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+
+ FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+ offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ Field body = new Field("body", "", offsetsType);
+ Document doc = new Document();
+ doc.add(body);
+
+ body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+ iw.addDocument(doc);
+
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ PostingsHighlighter highlighter = new PostingsHighlighter() {
+ @Override
+ protected PassageFormatter getFormatter(String field) {
+ return new PassageFormatter() {
+ PassageFormatter defaultFormatter = new DefaultPassageFormatter();
+
+ @Override
+ public String[] format(Passage passages[], String content) {
+ // Just turns the String snippet into a length 2
+ // array of String
+ return new String[] {"blah blah", defaultFormatter.format(passages, content).toString()};
+ }
+ };
+ }
+ };
+
+ Query query = new TermQuery(new Term("body", "highlighting"));
+ TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+ assertEquals(1, topDocs.totalHits);
+ int[] docIDs = new int[1];
+ docIDs[0] = topDocs.scoreDocs[0].doc;
+ Map<String,Object[]> snippets = highlighter.highlightFieldsAsObjects(new String[]{"body"}, query, searcher, docIDs, new int[] {1});
+ Object[] bodySnippets = snippets.get("body");
+ assertEquals(1, bodySnippets.length);
+ assertTrue(Arrays.equals(new String[] {"blah blah", "Just a test <b>highlighting</b> from postings. "}, (String[]) bodySnippets[0]));
+
+ ir.close();
+ dir.close();
+ }
}