You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/04/12 20:25:35 UTC
svn commit: r1467397 - in /lucene/dev/trunk: lucene/
lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/
lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/
solr/core/src/java/org/apache/solr/highlight/
Author: rmuir
Date: Fri Apr 12 18:25:34 2013
New Revision: 1467397
URL: http://svn.apache.org/r1467397
Log:
LUCENE-4896: make PassageFormatter abstract
Added:
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1467397&r1=1467396&r2=1467397&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Apr 12 18:25:34 2013
@@ -189,6 +189,11 @@ New Features
* LUCENE-4895: Added support for the "IsDisjointTo" spatial predicate for
RecursivePrefixTreeStrategy. (David Smiley)
+API Changes
+
+* LUCENE-4896: Made PassageFormatter abstract in PostingsHighlighter, made
+ members of DefaultPassageFormatter protected. (Luca Cavanna via Robert Muir)
+
Optimizations
* LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace
Added: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java?rev=1467397&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java (added)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/DefaultPassageFormatter.java Fri Apr 12 18:25:34 2013
@@ -0,0 +1,138 @@
+package org.apache.lucene.search.postingshighlight;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Creates a formatted snippet from the top passages.
+ * <p>
+ * The default implementation marks the query terms as bold, and places
+ * ellipses between unconnected passages.
+ */
+public class DefaultPassageFormatter extends PassageFormatter {
+ /** text that will appear before highlighted terms */
+ protected final String preTag;
+ /** text that will appear after highlighted terms */
+ protected final String postTag;
+ /** text that will appear between two unconnected passages */
+ protected final String ellipsis;
+ /** true if we should escape for html */
+ protected final boolean escape;
+
+ /**
+ * Creates a new DefaultPassageFormatter with the default tags.
+ */
+ public DefaultPassageFormatter() {
+ this("<b>", "</b>", "... ", false);
+ }
+
+ /**
+ * Creates a new DefaultPassageFormatter with custom tags.
+ * @param preTag text which should appear before a highlighted term.
+ * @param postTag text which should appear after a highlighted term.
+ * @param ellipsis text which should be used to connect two unconnected passages.
+ * @param escape true if text should be html-escaped
+ */
+ public DefaultPassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
+ if (preTag == null || postTag == null || ellipsis == null) {
+ throw new NullPointerException();
+ }
+ this.preTag = preTag;
+ this.postTag = postTag;
+ this.ellipsis = ellipsis;
+ this.escape = escape;
+ }
+
+ @Override
+ public String format(Passage passages[], String content) {
+ StringBuilder sb = new StringBuilder();
+ int pos = 0;
+ for (Passage passage : passages) {
+ // don't add ellipsis if its the first one, or if its connected.
+ if (passage.startOffset > pos && pos > 0) {
+ sb.append(ellipsis);
+ }
+ pos = passage.startOffset;
+ for (int i = 0; i < passage.numMatches; i++) {
+ int start = passage.matchStarts[i];
+ int end = passage.matchEnds[i];
+ // its possible to have overlapping terms
+ if (start > pos) {
+ append(sb, content, pos, start);
+ }
+ if (end > pos) {
+ sb.append(preTag);
+ append(sb, content, Math.max(pos, start), end);
+ sb.append(postTag);
+ pos = end;
+ }
+ }
+ // its possible a "term" from the analyzer could span a sentence boundary.
+ append(sb, content, pos, Math.max(pos, passage.endOffset));
+ pos = passage.endOffset;
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Appends original text to the response.
+ * @param dest resulting text, possibly transformed or encoded
+ * @param content original text content
+ * @param start index of the first character in content
+ * @param end index of the character following the last character in content
+ */
+ protected void append(StringBuilder dest, String content, int start, int end) {
+ if (escape) {
+ // note: these are the rules from owasp.org
+ for (int i = start; i < end; i++) {
+ char ch = content.charAt(i);
+ switch(ch) {
+ case '&':
+ dest.append("&");
+ break;
+ case '<':
+ dest.append("<");
+ break;
+ case '>':
+ dest.append(">");
+ break;
+ case '"':
+ dest.append(""");
+ break;
+ case '\'':
+ dest.append("'");
+ break;
+ case '/':
+ dest.append("/");
+ break;
+ default:
+ if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
+ dest.append(ch);
+ } else if (ch < 0xff) {
+ dest.append("&#");
+ dest.append((int)ch);
+ dest.append(";");
+ } else {
+ dest.append(ch);
+ }
+ }
+ }
+ } else {
+ dest.append(content, start, end);
+ }
+ }
+}
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java?rev=1467397&r1=1467396&r2=1467397&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PassageFormatter.java Fri Apr 12 18:25:34 2013
@@ -19,118 +19,20 @@ package org.apache.lucene.search.posting
/**
* Creates a formatted snippet from the top passages.
- * <p>
- * The default implementation marks the query terms as bold, and places
- * ellipses between unconnected passages.
+ *
* @lucene.experimental
*/
-public class PassageFormatter {
- private final String preTag;
- private final String postTag;
- private final String ellipsis;
- private final boolean escape;
-
- /**
- * Creates a new PassageFormatter with the default tags.
- */
- public PassageFormatter() {
- this("<b>", "</b>", "... ", false);
- }
-
- /**
- * Creates a new PassageFormatter with custom tags.
- * @param preTag text which should appear before a highlighted term.
- * @param postTag text which should appear after a highlighted term.
- * @param ellipsis text which should be used to connect two unconnected passages.
- * @param escape true if text should be html-escaped
- */
- public PassageFormatter(String preTag, String postTag, String ellipsis, boolean escape) {
- if (preTag == null || postTag == null || ellipsis == null) {
- throw new NullPointerException();
- }
- this.preTag = preTag;
- this.postTag = postTag;
- this.ellipsis = ellipsis;
- this.escape = escape;
- }
-
+public abstract class PassageFormatter {
+
/**
* Formats the top <code>passages</code> from <code>content</code>
* into a human-readable text snippet.
- *
+ *
* @param passages top-N passages for the field. Note these are sorted in
* the order that they appear in the document for convenience.
* @param content content for the field.
* @return formatted highlight
*/
- public String format(Passage passages[], String content) {
- StringBuilder sb = new StringBuilder();
- int pos = 0;
- for (Passage passage : passages) {
- // don't add ellipsis if its the first one, or if its connected.
- if (passage.startOffset > pos && pos > 0) {
- sb.append(ellipsis);
- }
- pos = passage.startOffset;
- for (int i = 0; i < passage.numMatches; i++) {
- int start = passage.matchStarts[i];
- int end = passage.matchEnds[i];
- // its possible to have overlapping terms
- if (start > pos) {
- append(sb, content, pos, start);
- }
- if (end > pos) {
- sb.append(preTag);
- append(sb, content, Math.max(pos, start), end);
- sb.append(postTag);
- pos = end;
- }
- }
- // its possible a "term" from the analyzer could span a sentence boundary.
- append(sb, content, pos, Math.max(pos, passage.endOffset));
- pos = passage.endOffset;
- }
- return sb.toString();
- }
+ public abstract String format(Passage passages[], String content);
- private void append(StringBuilder dest, String content, int start, int end) {
- if (escape) {
- // note: these are the rules from owasp.org
- for (int i = start; i < end; i++) {
- char ch = content.charAt(i);
- switch(ch) {
- case '&':
- dest.append("&");
- break;
- case '<':
- dest.append("<");
- break;
- case '>':
- dest.append(">");
- break;
- case '"':
- dest.append(""");
- break;
- case '\'':
- dest.append("'");
- break;
- case '/':
- dest.append("/");
- break;
- default:
- if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
- dest.append(ch);
- } else if (ch < 0xff) {
- dest.append("&#");
- dest.append((int)ch);
- dest.append(";");
- } else {
- dest.append(ch);
- }
- }
- }
- } else {
- dest.append(content, start, end);
- }
- }
}
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1467397&r1=1467396&r2=1467397&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Fri Apr 12 18:25:34 2013
@@ -33,8 +33,8 @@ import java.util.TreeSet;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.MultiReader;
@@ -141,7 +141,7 @@ public class PostingsHighlighter {
* subclasses can override to customize. */
protected PassageFormatter getFormatter(String field) {
if (defaultFormatter == null) {
- defaultFormatter = new PassageFormatter();
+ defaultFormatter = new DefaultPassageFormatter();
}
return defaultFormatter;
}
@@ -346,7 +346,7 @@ public class PostingsHighlighter {
// pull stored data:
String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
- Map<String,String[]> highlights = new HashMap<String,String[]>();;
+ Map<String,String[]> highlights = new HashMap<String,String[]>();
for (int i = 0; i < fields.length; i++) {
String field = fields[i];
int numPassages = maxPassages[i];
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1467397&r1=1467396&r2=1467397&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Fri Apr 12 18:25:34 2013
@@ -47,8 +47,8 @@ import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
public class TestPostingsHighlighter extends LuceneTestCase {
@@ -872,7 +872,7 @@ public class TestPostingsHighlighter ext
PostingsHighlighter highlighter = new PostingsHighlighter() {
@Override
protected PassageFormatter getFormatter(String field) {
- return new PassageFormatter("<b>", "</b>", "... ", true);
+ return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
}
};
Query query = new TermQuery(new Term("body", "highlighting"));
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java?rev=1467397&r1=1467396&r2=1467397&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java Fri Apr 12 18:25:34 2013
@@ -18,9 +18,7 @@ package org.apache.lucene.search.posting
*/
import java.io.IOException;
-import java.text.BreakIterator;
import java.util.HashSet;
-import java.util.Locale;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
@@ -30,10 +28,10 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -45,8 +43,8 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util._TestUtil;
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
public class TestPostingsHighlighterRanking extends LuceneTestCase {
@@ -75,7 +73,7 @@ public class TestPostingsHighlighterRank
document.add(id);
document.add(body);
- for (int i = 0; i < numDocs; i++) {;
+ for (int i = 0; i < numDocs; i++) {
StringBuilder bodyText = new StringBuilder();
int numSentences = _TestUtil.nextInt(random(), 1, maxNumSentences);
for (int j = 0; j < numSentences; j++) {
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java?rev=1467397&r1=1467396&r2=1467397&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java Fri Apr 12 18:25:34 2013
@@ -26,6 +26,7 @@ import java.util.Set;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.postingshighlight.DefaultPassageFormatter;
import org.apache.lucene.search.postingshighlight.Passage;
import org.apache.lucene.search.postingshighlight.PassageFormatter;
import org.apache.lucene.search.postingshighlight.PassageScorer;
@@ -146,7 +147,7 @@ public class PostingsSolrHighlighter ext
String postTag = params.getFieldParam(fieldName, HighlightParams.TAG_POST, "</em>");
String ellipsis = params.getFieldParam(fieldName, HighlightParams.TAG_ELLIPSIS, "... ");
String encoder = params.getFieldParam(fieldName, HighlightParams.ENCODER, "simple");
- return new PassageFormatter(preTag, postTag, ellipsis, "html".equals(encoder));
+ return new DefaultPassageFormatter(preTag, postTag, ellipsis, "html".equals(encoder));
}
@Override