You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/04/12 20:25:35 UTC

svn commit: r1467397 - in /lucene/dev/trunk: lucene/ lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/ lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/ solr/core/src/java/org/apache/solr/highlight/

Author: rmuir
Date: Fri Apr 12 18:25:34 2013
New Revision: 1467397

URL: http://svn.apache.org/r1467397
Log:
LUCENE-4896: make PassageFormatter abstract

Added:
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
    lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
    lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1467397&r1=1467396&r2=1467397&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Apr 12 18:25:34 2013
@@ -189,6 +189,11 @@ New Features
 * LUCENE-4895: Added support for the "IsDisjointTo" spatial predicate for
   RecursivePrefixTreeStrategy.  (David Smiley)
 
+API Changes
+
+* LUCENE-4896: Made PassageFormatter abstract in PostingsHighlighter, made
+  members of DefaultPassageFormatter protected.  (Luca Cavanna via Robert Muir)
+
 Optimizations
 
 * LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace

Added: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java?rev=1467397&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java (added)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java Fri Apr 12 18:25:34 2013
@@ -0,0 +1,138 @@
+package org.apache.lucene.search.postingshighlight;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Creates a formatted snippet from the top passages.
+ * <p>
+ * The default implementation marks the query terms as bold, and places
+ * ellipses between unconnected passages.
+ */
+public class DefaultPassageFormatter extends PassageFormatter {
+  /** text that will appear before highlighted terms */
+  protected final String preTag;
+  /** text that will appear after highlighted terms */
+  protected final String postTag;
+  /** text that will appear between two unconnected passages */
+  protected final String ellipsis;
+  /** true if we should escape for html */
+  protected final boolean escape;
+
+  /**
+   * Creates a new DefaultPassageFormatter with the default tags.
+   */
+  public DefaultPassageFormatter() {
+    this("<b>", "</b>", "... ", false);
+  }
+
+  /**
+   * Creates a new DefaultPassageFormatter with custom tags.
+   * @param preTag text which should appear before a highlighted term.
+   * @param postTag text which should appear after a highlighted term.
+   * @param ellipsis text which should be used to connect two unconnected passages.
+   * @param escape true if text should be html-escaped
+   */
+  public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
+    if (preTag == null || postTag == null || ellipsis == null) {
+      throw new NullPointerException();
+    }
+    this.preTag = preTag;
+    this.postTag = postTag;
+    this.ellipsis = ellipsis;
+    this.escape = escape;
+  }
+
+  @Override
+  public String format(Passage passages[], String content) {
+    StringBuilder sb = new StringBuilder();
+    int pos = 0;
+    for (Passage passage : passages) {
+      // don't add ellipsis if its the first one, or if its connected.
+      if (passage.startOffset > pos && pos > 0) {
+        sb.append(ellipsis);
+      }
+      pos = passage.startOffset;
+      for (int i = 0; i < passage.numMatches; i++) {
+        int start = passage.matchStarts[i];
+        int end = passage.matchEnds[i];
+        // its possible to have overlapping terms
+        if (start > pos) {
+          append(sb, content, pos, start);
+        }
+        if (end > pos) {
+          sb.append(preTag);
+          append(sb, content, Math.max(pos, start), end);
+          sb.append(postTag);
+          pos = end;
+        }
+      }
+      // its possible a "term" from the analyzer could span a sentence boundary.
+      append(sb, content, pos, Math.max(pos, passage.endOffset));
+      pos = passage.endOffset;
+    }
+    return sb.toString();
+  }
+
+  /** 
+   * Appends original text to the response.
+   * @param dest resulting text, possibly transformed or encoded
+   * @param content original text content
+   * @param start index of the first character in content
+   * @param end index of the character following the last character in content
+   */
+  protected void append(StringBuilder dest, String content, int start, int end) {
+    if (escape) {
+      // note: these are the rules from owasp.org
+      for (int i = start; i < end; i++) {
+        char ch = content.charAt(i);
+        switch(ch) {
+          case '&':
+            dest.append("&amp;");
+            break;
+          case '<':
+            dest.append("&lt;");
+            break;
+          case '>':
+            dest.append("&gt;");
+            break;
+          case '"':
+            dest.append("&quot;");
+            break;
+          case '\'':
+            dest.append("&#x27;");
+            break;
+          case '/':
+            dest.append("&#x2F;");
+            break;
+          default:
+            if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
+              dest.append(ch);
+            } else if (ch < 0xff) {
+              dest.append("&#");
+              dest.append((int)ch);
+              dest.append(";");
+            } else {
+              dest.append(ch);
+            }
+        }
+      }
+    } else {
+      dest.append(content, start, end);
+    }
+  }
+}

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java?rev=1467397&r1=1467396&r2=1467397&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java Fri Apr 12 18:25:34 2013
@@ -19,118 +19,20 @@ package org.apache.lucene.search.posting
 
 /**
  * Creates a formatted snippet from the top passages.
- * <p>
- * The default implementation marks the query terms as bold, and places
- * ellipses between unconnected passages.
+ *
  * @lucene.experimental
  */
-public class PassageFormatter {
-  private final String preTag;
-  private final String postTag;
-  private final String ellipsis;
-  private final boolean escape;
-  
-  /**
-   * Creates a new PassageFormatter with the default tags.
-   */
-  public PassageFormatter() {
-    this("<b>", "</b>", "... ", false);
-  }
-  
-  /**
-   * Creates a new PassageFormatter with custom tags.
-   * @param preTag text which should appear before a highlighted term.
-   * @param postTag text which should appear after a highlighted term.
-   * @param ellipsis text which should be used to connect two unconnected passages.
-   * @param escape true if text should be html-escaped
-   */
-  public PassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
-    if (preTag == null || postTag == null || ellipsis == null) {
-      throw new NullPointerException();
-    }
-    this.preTag = preTag;
-    this.postTag = postTag;
-    this.ellipsis = ellipsis;
-    this.escape = escape;
-  }
-  
+public abstract class PassageFormatter {
+
   /**
    * Formats the top <code>passages</code> from <code>content</code>
    * into a human-readable text snippet.
-   * 
+   *
    * @param passages top-N passages for the field. Note these are sorted in
    *        the order that they appear in the document for convenience.
    * @param content content for the field.
    * @return formatted highlight
    */
-  public String format(Passage passages[], String content) {
-    StringBuilder sb = new StringBuilder();
-    int pos = 0;
-    for (Passage passage : passages) {
-      // don't add ellipsis if its the first one, or if its connected.
-      if (passage.startOffset > pos && pos > 0) {
-        sb.append(ellipsis);
-      }
-      pos = passage.startOffset;
-      for (int i = 0; i < passage.numMatches; i++) {
-        int start = passage.matchStarts[i];
-        int end = passage.matchEnds[i];
-        // its possible to have overlapping terms
-        if (start > pos) {
-          append(sb, content, pos, start);
-        }
-        if (end > pos) {
-          sb.append(preTag);
-          append(sb, content, Math.max(pos, start), end);
-          sb.append(postTag);
-          pos = end;
-        }
-      }
-      // its possible a "term" from the analyzer could span a sentence boundary.
-      append(sb, content, pos, Math.max(pos, passage.endOffset));
-      pos = passage.endOffset;
-    }
-    return sb.toString();
-  }
+  public abstract String format(Passage passages[], String content);
 
-  private void append(StringBuilder dest, String content, int start, int end) {
-    if (escape) {
-      // note: these are the rules from owasp.org
-      for (int i = start; i < end; i++) {
-        char ch = content.charAt(i);
-        switch(ch) {
-          case '&': 
-            dest.append("&amp;");
-            break;
-          case '<':
-            dest.append("&lt;");
-            break;
-          case '>':
-            dest.append("&gt;");
-            break;
-          case '"':
-            dest.append("&quot;");
-            break;
-          case '\'':
-            dest.append("&#x27;");
-            break;
-          case '/':
-            dest.append("&#x2F;");
-            break;
-          default:
-            if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
-              dest.append(ch);
-            } else if (ch < 0xff) {
-              dest.append("&#");
-              dest.append((int)ch);
-              dest.append(";");
-            } else {
-              dest.append(ch);
-            }
-        }
-      }
-    } else {
-      dest.append(content, start, end);
-    }
-  }
 }

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1467397&r1=1467396&r2=1467397&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Fri Apr 12 18:25:34 2013
@@ -33,8 +33,8 @@ import java.util.TreeSet;
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReaderContext;
 import org.apache.lucene.index.MultiReader;
@@ -141,7 +141,7 @@ public class PostingsHighlighter {
    *  subclasses can override to customize. */
   protected PassageFormatter getFormatter(String field) {
     if (defaultFormatter == null) {
-      defaultFormatter = new PassageFormatter();
+      defaultFormatter = new DefaultPassageFormatter();
     }
     return defaultFormatter;
   }
@@ -346,7 +346,7 @@ public class PostingsHighlighter {
     // pull stored data:
     String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
     
-    Map<String,String[]> highlights = new HashMap<String,String[]>();;
+    Map<String,String[]> highlights = new HashMap<String,String[]>();
     for (int i = 0; i < fields.length; i++) {
       String field = fields[i];
       int numPassages = maxPassages[i];

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1467397&r1=1467396&r2=1467397&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Fri Apr 12 18:25:34 2013
@@ -47,8 +47,8 @@ import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 
 @SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
 public class TestPostingsHighlighter extends LuceneTestCase {
@@ -872,7 +872,7 @@ public class TestPostingsHighlighter ext
     PostingsHighlighter highlighter = new PostingsHighlighter() {
       @Override
       protected PassageFormatter getFormatter(String field) {
-        return new PassageFormatter("<b>", "</b>", "... ", true);
+        return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
       }
     };
     Query query = new TermQuery(new Term("body", "highlighting"));

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java?rev=1467397&r1=1467396&r2=1467397&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java Fri Apr 12 18:25:34 2013
@@ -18,9 +18,7 @@ package org.apache.lucene.search.posting
  */
 
 import java.io.IOException;
-import java.text.BreakIterator;
 import java.util.HashSet;
-import java.util.Locale;
 import java.util.Random;
 
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -30,10 +28,10 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
@@ -45,8 +43,8 @@ import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util._TestUtil;
 
 @SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
 public class TestPostingsHighlighterRanking extends LuceneTestCase {
@@ -75,7 +73,7 @@ public class TestPostingsHighlighterRank
     document.add(id);
     document.add(body);
     
-    for (int i = 0; i < numDocs; i++) {;
+    for (int i = 0; i < numDocs; i++) {
       StringBuilder bodyText = new StringBuilder();
       int numSentences = _TestUtil.nextInt(random(), 1, maxNumSentences);
       for (int j = 0; j < numSentences; j++) {

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java?rev=1467397&r1=1467396&r2=1467397&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java Fri Apr 12 18:25:34 2013
@@ -26,6 +26,7 @@ import java.util.Set;
 
 import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.postingshighlight.DefaultPassageFormatter;
 import org.apache.lucene.search.postingshighlight.Passage;
 import org.apache.lucene.search.postingshighlight.PassageFormatter;
 import org.apache.lucene.search.postingshighlight.PassageScorer;
@@ -146,7 +147,7 @@ public class PostingsSolrHighlighter ext
           String postTag = params.getFieldParam(fieldName, HighlightParams.TAG_POST, "</em>");
           String ellipsis = params.getFieldParam(fieldName, HighlightParams.TAG_ELLIPSIS, "... ");
           String encoder = params.getFieldParam(fieldName, HighlightParams.ENCODER, "simple");
-          return new PassageFormatter(preTag, postTag, ellipsis, "html".equals(encoder));
+          return new DefaultPassageFormatter(preTag, postTag, ellipsis, "html".equals(encoder));
         }
 
         @Override