You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/04/01 06:39:49 UTC

svn commit: r1463083 - in /lucene/dev/trunk: lucene/ lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/ lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/ solr/core/src/java/org/apache/solr/highlight/

Author: rmuir
Date: Mon Apr  1 04:39:49 2013
New Revision: 1463083

URL: http://svn.apache.org/r1463083
Log:
LUCENE-4861: make BreakIterator per-field in PostingsHighlighter

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java
    lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
    lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1463083&r1=1463082&r2=1463083&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Apr  1 04:39:49 2013
@@ -89,8 +89,8 @@ New Features
 * LUCENE-4820: Add payloads to Analyzing/FuzzySuggester, to record an
   arbitrary byte[] per suggestion (Mike McCandless)
 
-* LUCENE-4816: Passing null as the BreakIterator to PostingsHighlighter
-  now highlights the entire content as a single Passage.  (Robert
+* LUCENE-4816: Add WholeBreakIterator to PostingsHighlighter
+  for treating the entire content as a single Passage.  (Robert
   Muir, Mike McCandless)
 
 * LUCENE-4827: Add additional ctor to PostingsHighlighter PassageScorer
@@ -148,6 +148,10 @@ New Features
   you only have points (1/doc) then "Intersects" is equivalent and faster.
   See the javadocs.  (David Smiley)
 
+* LUCENE-4861: Make BreakIterator per-field in PostingsHighlighter. This means
+  you can override getBreakIterator(String field) to use different mechanisms
+  for e.g. title vs. body fields.  (Mike McCandless, Robert Muir)
+
 Optimizations
 
 * LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1463083&r1=1463082&r2=1463083&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Mon Apr  1 04:39:49 2013
@@ -96,7 +96,6 @@ public class PostingsHighlighter {
   public static final int DEFAULT_MAX_LENGTH = 10000;
     
   private final int maxLength;
-  private final BreakIterator breakIterator;
 
   /** Set the first time {@link #getFormatter} is called,
    *  and then reused. */
@@ -119,28 +118,20 @@ public class PostingsHighlighter {
    * @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
    */
   public PostingsHighlighter(int maxLength) {
-    this(maxLength, BreakIterator.getSentenceInstance(Locale.ROOT));
-  }
-  
-  /**
-   * Creates a new highlighter with custom parameters.
-   * @param maxLength maximum content size to process.
-   * @param breakIterator used for finding passage
-   *        boundaries; pass null to highlight the entire
-   *        content as a single Passage.
-   * @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
-   */
-  public PostingsHighlighter(int maxLength, BreakIterator breakIterator) {
     if (maxLength < 0 || maxLength == Integer.MAX_VALUE) {
       // two reasons: no overflow problems in BreakIterator.preceding(offset+1),
       // our sentinel in the offsets queue uses this value to terminate.
       throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
     }
-    if (breakIterator == null) {
-      breakIterator = new WholeBreakIterator();
-    }
     this.maxLength = maxLength;
-    this.breakIterator = breakIterator;
+  }
+  
+  /** Returns the {@link BreakIterator} to use for
+   *  dividing text into passages.  This returns 
+   *  {@link BreakIterator#getSentenceInstance(Locale)} by default;
+   *  subclasses can override to customize. */
+  protected BreakIterator getBreakIterator(String field) {
+    return BreakIterator.getSentenceInstance(Locale.ROOT);
   }
 
   /** Returns the {@link PassageFormatter} to use for
@@ -303,8 +294,6 @@ public class PostingsHighlighter {
     IndexReaderContext readerContext = reader.getContext();
     List<AtomicReaderContext> leaves = readerContext.leaves();
 
-    BreakIterator bi = (BreakIterator)breakIterator.clone();
-
     // Make our own copy because we sort in-place:
     int[] docids = new int[docidsIn.length];
     System.arraycopy(docidsIn, 0, docids, 0, docidsIn.length);
@@ -330,7 +319,7 @@ public class PostingsHighlighter {
       for(Term term : fieldTerms) {
         terms[termUpto++] = term.bytes();
       }
-      Map<Integer,String> fieldHighlights = highlightField(field, contents[i], bi, terms, docids, leaves, maxPassages);
+      Map<Integer,String> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, maxPassages);
         
       String[] result = new String[docids.length];
       for (int j = 0; j < docidsIn.length; j++) {

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java?rev=1463083&r1=1463082&r2=1463083&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/WholeBreakIterator.java Mon Apr  1 04:39:49 2013
@@ -21,7 +21,7 @@ import java.text.BreakIterator;
 import java.text.CharacterIterator;
 
 /** Just produces one single fragment for the entire text */
-final class WholeBreakIterator extends BreakIterator {
+public final class WholeBreakIterator extends BreakIterator {
   private CharacterIterator text;
   private int start;
   private int end;

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1463083&r1=1463082&r2=1463083&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Mon Apr  1 04:39:49 2013
@@ -457,7 +457,12 @@ public class TestPostingsHighlighter ext
     iw.close();
     
     IndexSearcher searcher = newSearcher(ir);
-    PostingsHighlighter highlighter = new PostingsHighlighter(10000, null);
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+      @Override
+      protected BreakIterator getBreakIterator(String field) {
+        return new WholeBreakIterator();
+      }
+    };
     Query query = new TermQuery(new Term("body", "test"));
     TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
     assertEquals(1, topDocs.totalHits);
@@ -527,7 +532,7 @@ public class TestPostingsHighlighter ext
     
     IndexSearcher searcher = newSearcher(ir);
 
-    PostingsHighlighter highlighter = new PostingsHighlighter(10000, null) {
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
         @Override
         protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
           assert fields.length == 1;
@@ -536,6 +541,11 @@ public class TestPostingsHighlighter ext
           contents[0][0] = text;
           return contents;
         }
+
+        @Override
+        protected BreakIterator getBreakIterator(String field) {
+          return new WholeBreakIterator();
+        }
       };
 
     Query query = new TermQuery(new Term("body", "test"));
@@ -636,7 +646,12 @@ public class TestPostingsHighlighter ext
     iw.close();
     
     IndexSearcher searcher = newSearcher(ir);
-    PostingsHighlighter highlighter = new PostingsHighlighter(10000, null);
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
+      @Override
+      protected BreakIterator getBreakIterator(String field) {
+        return new WholeBreakIterator();
+      }
+    };
     Query query = new TermQuery(new Term("body", "highlighting"));
     int[] docIDs = new int[] {0};
     String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, searcher, docIDs, 2).get("body");

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java?rev=1463083&r1=1463082&r2=1463083&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java Mon Apr  1 04:39:49 2013
@@ -113,8 +113,7 @@ public class TestPostingsHighlighterRank
   private void checkQuery(IndexSearcher is, Query query, int doc, int maxTopN) throws IOException {
     for (int n = 1; n < maxTopN; n++) {
       final FakePassageFormatter f1 = new FakePassageFormatter();
-      PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1, 
-                                                       BreakIterator.getSentenceInstance(Locale.ROOT)) {
+      PostingsHighlighter p1 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
           @Override
           protected PassageFormatter getFormatter(String field) {
             assertEquals("body", field);
@@ -123,8 +122,7 @@ public class TestPostingsHighlighterRank
         };
 
       final FakePassageFormatter f2 = new FakePassageFormatter();
-      PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1, 
-                                                       BreakIterator.getSentenceInstance(Locale.ROOT)) {
+      PostingsHighlighter p2 = new PostingsHighlighter(Integer.MAX_VALUE-1) {
           @Override
           protected PassageFormatter getFormatter(String field) {
             assertEquals("body", field);
@@ -269,8 +267,7 @@ public class TestPostingsHighlighterRank
     iw.close();
     
     IndexSearcher searcher = newSearcher(ir);
-    PostingsHighlighter highlighter = new PostingsHighlighter(10000, 
-                                                              BreakIterator.getSentenceInstance(Locale.ROOT)) {
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
         @Override
         protected PassageScorer getScorer(String field) {
           return new PassageScorer(1.2f, 0, 87);
@@ -309,8 +306,7 @@ public class TestPostingsHighlighterRank
     iw.close();
     
     IndexSearcher searcher = newSearcher(ir);
-    PostingsHighlighter highlighter = new PostingsHighlighter(10000, 
-                                                              BreakIterator.getSentenceInstance(Locale.ROOT)) {
+    PostingsHighlighter highlighter = new PostingsHighlighter(10000) {
         @Override
         protected PassageScorer getScorer(String field) {
           return new PassageScorer(0, 0.75f, 87);

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java?rev=1463083&r1=1463082&r2=1463083&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java Mon Apr  1 04:39:49 2013
@@ -80,7 +80,6 @@ public class PostingsSolrHighlighter ext
   @Override
   public void init(PluginInfo info) {
     Map<String,String> attributes = info.attributes;
-    BreakIterator breakIterator = BreakIterator.getSentenceInstance(Locale.ROOT);
     
     // scorer parameters: k1/b/pivot
     String k1 = attributes.get("k1");
@@ -127,7 +126,7 @@ public class PostingsSolrHighlighter ext
     if (attributes.containsKey("maxLength")) {
       maxLength = Integer.parseInt(attributes.get("maxLength"));
     }
-    highlighter = new PostingsHighlighter(maxLength, breakIterator) {
+    highlighter = new PostingsHighlighter(maxLength) {
         @Override
         protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
           if (summarizeEmptyBoolean) {