You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/04/01 06:39:49 UTC
svn commit: r1463083 - in /lucene/dev/trunk: lucene/
lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/
lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/
solr/core/src/java/org/apache/solr/highlight/
Author: rmuir
Date: Mon Apr 1 04:39:49 2013
New Revision: 1463083
URL: http://svn.apache.org/r1463083
Log:
LUCENE-4861: make BreakIterator per-field in PostingsHighlighter
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1463083&r1=1463082&r2=1463083&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Apr 1 04:39:49 2013
@@ -89,8 +89,8 @@ New Features
* LUCENE-4820: Add payloads to Analyzing/FuzzySuggester, to record an
arbitrary byte[] per suggestion (Mike McCandless)
-* LUCENE-4816: Passing null as the BreakIterator to PostingsHighlighter
- now highlights the entire content as a single Passage. (Robert
+* LUCENE-4816: Add WholeBreakIterator to PostingsHighlighter
+ for treating the entire content as a single Passage. (Robert
Muir, Mike McCandless)
* LUCENE-4827: Add additional ctor to PostingsHighlighter PassageScorer
@@ -148,6 +148,10 @@ New Features
you only have points (1/doc) then "Intersects" is equivalent and faster.
See the javadocs. (David Smiley)
+* LUCENE-4861: Make BreakIterator per-field in PostingsHighlighter. This means
+ you can override getBreakIterator(String field) to use different mechanisms
+ for e.g. title vs. body fields. (Mike McCandless, Robert Muir)
+
Optimizations
* LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1463083&r1=1463082&r2=1463083&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Mon Apr 1 04:39:49 2013
@@ -96,7 +96,6 @@ public class PostingsHighlighter {
public static final int DEFAULT_MAX_LENGTH = 10000;
private final int maxLength;
- private final BreakIterator breakIterator;
/** Set the first time {@link #getFormatter} is called,
* and then reused. */
@@ -119,28 +118,20 @@ public class PostingsHighlighter {
* @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
*/
public PostingsHighlighter(int maxLength) {
- this(maxLength, BreakIterator.getSentenceInstance(Locale.ROOT));
- }
-
- /**
- * Creates a new highlighter with custom parameters.
- * @param maxLength maximum content size to process.
- * @param breakIterator used for finding passage
- * boundaries; pass null to highlight the entire
- * content as a single Passage.
- * @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
- */
- public PostingsHighlighter(int maxLength, BreakIterator breakIterator) {
if (maxLength < 0 || maxLength == Integer.MAX_VALUE) {
// two reasons: no overflow problems in BreakIterator.preceding(offset+1),
// our sentinel in the offsets queue uses this value to terminate.
throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
}
- if (breakIterator == null) {
- breakIterator = new WholeBreakIterator();
- }
this.maxLength = maxLength;
- this.breakIterator = breakIterator;
+ }
+
+ /** Returns the {@link BreakIterator} to use for
+ * dividing text into passages. This returns
+ * {@link BreakIterator#getSentenceInstance(Locale)} by default;
+ * subclasses can override to customize. */
+ protected BreakIterator getBreakIterator(String field) {
+ return BreakIterator.getSentenceInstance(Locale.ROOT);
}
/** Returns the {@link PassageFormatter} to use for
@@ -303,8 +294,6 @@ public class PostingsHighlighter {
IndexReaderContext readerContext = reader.getContext();
List<AtomicReaderContext> leaves = readerContext.leaves();
- BreakIterator bi = (BreakIterator)breakIterator.clone();
-
// Make our own copy because we sort in-place:
int[] docids = new int[docidsIn.length];
System.arraycopy(docidsIn, 0, docids, 0, docidsIn.length);
@@ -330,7 +319,7 @@ public class PostingsHighlighter {
for(Term term : fieldTerms) {
terms[termUpto++] = term.bytes();
}
- Map<Integer,String> fieldHighlights = highlightField(field, contents[i], bi, terms, docids, leaves, maxPassages);
+ Map<Integer,String> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, maxPassages);
String[] result = new String[docids.length];
for (int j = 0; j < docidsIn.length; j++) {
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java?rev=1463083&r1=1463082&r2=1463083&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java Mon Apr 1 04:39:49 2013
@@ -21,7 +21,7 @@ import java.text.BreakIterator;
import java.text.CharacterIterator;
/** Just produces one single fragment for the entire text */
-final class WholeBreakIterator extends BreakIterator {
+public final class WholeBreakIterator extends BreakIterator {
private CharacterIterator text;
private int start;
private int end;
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1463083&r1=1463082&r2=1463083&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Mon Apr 1 04:39:49 2013
@@ -457,7 +457,12 @@ public class TestPostingsHighlighter ext
iw.close();
IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter(10000, null);
+ PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+ @Override
+ protected BreakIterator getBreakIterator(String field) {
+ return new WholeBreakIterator();
+ }
+ };
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
@@ -527,7 +532,7 @@ public class TestPostingsHighlighter ext
IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter(10000, null) {
+ PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
@Override
protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
assert fields.length == 1;
@@ -536,6 +541,11 @@ public class TestPostingsHighlighter ext
contents[0][0] = text;
return contents;
}
+
+ @Override
+ protected BreakIterator getBreakIterator(String field) {
+ return new WholeBreakIterator();
+ }
};
Query query = new TermQuery(new Term("body", "test"));
@@ -636,7 +646,12 @@ public class TestPostingsHighlighter ext
iw.close();
IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter(10000, null);
+ PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+ @Override
+ protected BreakIterator getBreakIterator(String field) {
+ return new WholeBreakIterator();
+ }
+ };
Query query = new TermQuery(new Term("body", "highlighting"));
int[] docIDs = new int[] {0};
String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 2).get("body");
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java?rev=1463083&r1=1463082&r2=1463083&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java Mon Apr 1 04:39:49 2013
@@ -113,8 +113,7 @@ public class TestPostingsHighlighterRank
private void checkQuery(IndexSearcher is, Query query, int doc, int maxTopN) throws IOException {
for (int n = 1; n < maxTopN; n++) {
final FakePassageFormatter f1 = new FakePassageFormatter();
- PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1,
- BreakIterator.getSentenceInstance(Locale.ROOT)) {
+ PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
@Override
protected PassageFormatter getFormatter(String field) {
assertEquals("body", field);
@@ -123,8 +122,7 @@ public class TestPostingsHighlighterRank
};
final FakePassageFormatter f2 = new FakePassageFormatter();
- PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1,
- BreakIterator.getSentenceInstance(Locale.ROOT)) {
+ PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
@Override
protected PassageFormatter getFormatter(String field) {
assertEquals("body", field);
@@ -269,8 +267,7 @@ public class TestPostingsHighlighterRank
iw.close();
IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter(10000,
- BreakIterator.getSentenceInstance(Locale.ROOT)) {
+ PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
@Override
protected PassageScorer getScorer(String field) {
return new PassageScorer(1.2f, 0, 87);
@@ -309,8 +306,7 @@ public class TestPostingsHighlighterRank
iw.close();
IndexSearcher searcher = newSearcher(ir);
- PostingsHighlighter highlighter = new PostingsHighlighter(10000,
- BreakIterator.getSentenceInstance(Locale.ROOT)) {
+ PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
@Override
protected PassageScorer getScorer(String field) {
return new PassageScorer(0, 0.75f, 87);
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java?rev=1463083&r1=1463082&r2=1463083&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java Mon Apr 1 04:39:49 2013
@@ -80,7 +80,6 @@ public class PostingsSolrHighlighter ext
@Override
public void init(PluginInfo info) {
Map<String,String> attributes = info.attributes;
- BreakIterator breakIterator = BreakIterator.getSentenceInstance(Locale.ROOT);
// scorer parameters: k1/b/pivot
String k1 = attributes.get("k1");
@@ -127,7 +126,7 @@ public class PostingsSolrHighlighter ext
if (attributes.containsKey("maxLength")) {
maxLength = Integer.parseInt(attributes.get("maxLength"));
}
- highlighter = new PostingsHighlighter(maxLength, breakIterator) {
+ highlighter = new PostingsHighlighter(maxLength) {
@Override
protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
if (summarizeEmptyBoolean) {