You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/03 04:50:16 UTC
svn commit: r1428159 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/sandbox/ lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/

Author: rmuir
Date: Thu Jan  3 03:50:15 2013
New Revision: 1428159

URL: http://svn.apache.org/viewvc?rev=1428159&view=rev
Log:
LUCENE-4290: clean up some typos, add a description (from mikes blog), null checks, and other sand

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/sandbox/   (props changed)
    lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java
    lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java

Modified: lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java?rev=1428159&r1=1428158&r2=1428159&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java Thu Jan  3 03:50:15 2013
@@ -27,7 +27,7 @@ package org.apache.lucene.sandbox.postin
  */
 public class PassageScorer {
   
-  // TODO: this formula completely made up. It might not provide relevant snippets!
+  // TODO: this formula is completely made up. It might not provide relevant snippets!
   
   /** BM25 k1 parameter, controls term frequency normalization */
   public static final float k1 = 1.2f;
@@ -36,7 +36,7 @@ public class PassageScorer {
   
   /**
    * A pivot used for length normalization.
-   * The default value is the typical average english sentence length.
+   * The default value is the typical average English sentence length.
    */
   public static final float pivot = 87f;
     

Modified: lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java?rev=1428159&r1=1428158&r2=1428159&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java Thu Jan  3 03:50:15 2013
@@ -53,8 +53,19 @@ import org.apache.lucene.util.UnicodeUti
  * Simple highlighter that does not analyze fields nor use
  * term vectors. Instead it requires 
  * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.
- * 
- * This is thread-safe, and can be used across different readers.
+ * <p>
+ * PostingsHighlighter treats the single original document as the whole corpus, and then scores individual
+ * passages as if they were documents in this corpus. It uses a {@link BreakIterator} to find 
+ * passages in the text; by default it breaks using {@link BreakIterator#getSentenceInstance(Locale) 
+ * getSentenceInstance(Locale.ROOT)}. It then iterates in parallel (merge sorting by offset) through 
+ * the positions of all terms from the query, coalescing those hits that occur in a single passage 
+ * into a {@link Passage}, and then scores each Passage using a separate {@link PassageScorer}. 
+ * Passages are finally formatted into highlighted snippets with a {@link PassageFormatter}.
+ * <p>
+ * <b>WARNING</b>: The code is very new and may still have some exciting bugs! This is why 
+ * it's located under Lucene's sandbox module. 
+ * <p>
+ * Example usage:
  * <pre class="prettyprint">
  *   // configure field with offsets at index time
  *   FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
@@ -67,6 +78,8 @@ import org.apache.lucene.util.UnicodeUti
  *   TopDocs topDocs = searcher.search(query, n);
  *   String highlights[] = highlighter.highlight("body", query, searcher, topDocs);
  * </pre>
+ * <p>
+ * This is thread-safe, and can be used across different readers.
  * @lucene.experimental
  */
 public final class PostingsHighlighter {
@@ -117,6 +130,9 @@ public final class PostingsHighlighter {
       // our sentinel in the offsets queue uses this value to terminate.
       throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
     }
+    if (breakIterator == null || scorer == null || formatter == null) {
+      throw new NullPointerException();
+    }
     this.maxLength = maxLength;
     this.breakIterator = breakIterator;
     this.scorer = scorer;
@@ -165,7 +181,7 @@ public final class PostingsHighlighter {
   /**
    * Highlights the top passages from multiple fields.
    * <p>
-   * Conceptually, this behaves as a more efficent form of:
+   * Conceptually, this behaves as a more efficient form of:
    * <pre class="prettyprint">
    * Map m = new HashMap();
    * for (String field : fields) {