You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/03 04:50:16 UTC
svn commit: r1428159 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/sandbox/
lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/
Author: rmuir
Date: Thu Jan 3 03:50:15 2013
New Revision: 1428159
URL: http://svn.apache.org/viewvc?rev=1428159&view=rev
Log:
LUCENE-4290: clean up some typos, add a description (from mikes blog), null checks, and other sand
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/sandbox/ (props changed)
lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java
lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java
Modified: lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java?rev=1428159&r1=1428158&r2=1428159&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PassageScorer.java Thu Jan 3 03:50:15 2013
@@ -27,7 +27,7 @@ package org.apache.lucene.sandbox.postin
*/
public class PassageScorer {
- // TODO: this formula completely made up. It might not provide relevant snippets!
+ // TODO: this formula is completely made up. It might not provide relevant snippets!
/** BM25 k1 parameter, controls term frequency normalization */
public static final float k1 = 1.2f;
@@ -36,7 +36,7 @@ public class PassageScorer {
/**
* A pivot used for length normalization.
- * The default value is the typical average english sentence length.
+ * The default value is the typical average English sentence length.
*/
public static final float pivot = 87f;
Modified: lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java?rev=1428159&r1=1428158&r2=1428159&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/sandbox/src/java/org/apache/lucene/sandbox/postingshighlight/PostingsHighlighter.java Thu Jan 3 03:50:15 2013
@@ -53,8 +53,19 @@ import org.apache.lucene.util.UnicodeUti
* Simple highlighter that does not analyze fields nor use
* term vectors. Instead it requires
* {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}.
- *
- * This is thread-safe, and can be used across different readers.
+ * <p>
+ * PostingsHighlighter treats the single original document as the whole corpus, and then scores individual
+ * passages as if they were documents in this corpus. It uses a {@link BreakIterator} to find
+ * passages in the text; by default it breaks using {@link BreakIterator#getSentenceInstance(Locale)
+ * getSentenceInstance(Locale.ROOT)}. It then iterates in parallel (merge sorting by offset) through
+ * the positions of all terms from the query, coalescing those hits that occur in a single passage
+ * into a {@link Passage}, and then scores each Passage using a separate {@link PassageScorer}.
+ * Passages are finally formatted into highlighted snippets with a {@link PassageFormatter}.
+ * <p>
+ * <b>WARNING</b>: The code is very new and may still have some exciting bugs! This is why
+ * it's located under Lucene's sandbox module.
+ * <p>
+ * Example usage:
* <pre class="prettyprint">
* // configure field with offsets at index time
* FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
@@ -67,6 +78,8 @@ import org.apache.lucene.util.UnicodeUti
* TopDocs topDocs = searcher.search(query, n);
* String highlights[] = highlighter.highlight("body", query, searcher, topDocs);
* </pre>
+ * <p>
+ * This is thread-safe, and can be used across different readers.
* @lucene.experimental
*/
public final class PostingsHighlighter {
@@ -117,6 +130,9 @@ public final class PostingsHighlighter {
// our sentinel in the offsets queue uses this value to terminate.
throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
}
+ if (breakIterator == null || scorer == null || formatter == null) {
+ throw new NullPointerException();
+ }
this.maxLength = maxLength;
this.breakIterator = breakIterator;
this.scorer = scorer;
@@ -165,7 +181,7 @@ public final class PostingsHighlighter {
/**
* Highlights the top passages from multiple fields.
* <p>
- * Conceptually, this behaves as a more efficent form of:
+ * Conceptually, this behaves as a more efficient form of:
* <pre class="prettyprint">
* Map m = new HashMap();
* for (String field : fields) {