You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/04/06 16:27:45 UTC
svn commit: r1465251 - in /lucene/dev/branches/branch_4x: ./ solr/
solr/core/ solr/core/src/java/org/apache/solr/highlight/
solr/core/src/test-files/solr/collection1/conf/
solr/core/src/test/org/apache/solr/highlight/ solr/solrj/
solr/solrj/src/java/or...
Author: rmuir
Date: Sat Apr 6 14:27:44 2013
New Revision: 1465251
URL: http://svn.apache.org/r1465251
Log:
SOLR-4683: add BreakIterator config to PostingsSolrHighlighter
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/core/ (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java
lucene/dev/branches/branch_4x/solr/solrj/ (props changed)
lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java?rev=1465251&r1=1465250&r2=1465251&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java Sat Apr 6 14:27:44 2013
@@ -20,6 +20,7 @@ package org.apache.solr.highlight;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.Collections;
+import java.util.Locale;
import java.util.Map;
import java.util.Set;
@@ -29,6 +30,7 @@ import org.apache.lucene.search.postings
import org.apache.lucene.search.postingshighlight.PassageFormatter;
import org.apache.lucene.search.postingshighlight.PassageScorer;
import org.apache.lucene.search.postingshighlight.PostingsHighlighter;
+import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
@@ -58,6 +60,10 @@ import org.apache.solr.util.plugin.Plugi
* <float name="hl.score.k1">1.2</float>
* <float name="hl.score.b">0.75</float>
* <float name="hl.score.pivot">87</float>
+ * <str name="hl.bs.language"></str>
+ * <str name="hl.bs.country"></str>
+ * <str name="hl.bs.variant"></str>
+ * <str name="hl.bs.type">SENTENCE</str>
* <int name="hl.maxAnalyzedChars">10000</int>
* </lst>
* </requestHandler>
@@ -74,7 +80,7 @@ import org.apache.solr.util.plugin.Plugi
* <li>fields to highlight must be configured with storeOffsetsWithPositions="true"
* <li>hl.q (string) can specify the query
* <li>hl.fl (string) specifies the field list.
- * <li>hl.snippets (int) specifies how many underlying sentence fragments form the resulting snippet.
+ * <li>hl.snippets (int) specifies how many underlying passages form the resulting snippet.
* <li>hl.tag.pre (string) specifies text which appears before a highlighted term.
* <li>hl.tag.post (string) specifies text which appears after a highlighted term.
* <li>hl.tag.ellipsis (string) specifies text which joins non-adjacent passages.
@@ -82,6 +88,10 @@ import org.apache.solr.util.plugin.Plugi
* <li>hl.score.k1 (float) specifies bm25 scoring parameter 'k1'
* <li>hl.score.b (float) specifies bm25 scoring parameter 'b'
* <li>hl.score.pivot (float) specifies bm25 scoring parameter 'avgdl'
+ * <li>hl.bs.type (string) specifies how to divide text into passages: [SENTENCE, LINE, WORD, CHAR, WHOLE]
+ * <li>hl.bs.language (string) specifies language code for BreakIterator. default is empty string (root locale)
+ * <li>hl.bs.country (string) specifies country code for BreakIterator. default is empty string (root locale)
+ * <li>hl.bs.variant (string) specifies country code for BreakIterator. default is empty string (root locale)
* <li>hl.maxAnalyzedChars specifies how many characters at most will be processed in a document.
* NOTE: currently hl.maxAnalyzedChars cannot yet be specified per-field
* </ul>
@@ -143,6 +153,16 @@ public class PostingsSolrHighlighter ext
float pivot = params.getFieldFloat(fieldName, HighlightParams.SCORE_PIVOT, 87f);
return new PassageScorer(k1, b, pivot);
}
+
+ @Override
+ protected BreakIterator getBreakIterator(String field) {
+ String language = params.getFieldParam(field, HighlightParams.BS_LANGUAGE);
+ String country = params.getFieldParam(field, HighlightParams.BS_COUNTRY);
+ String variant = params.getFieldParam(field, HighlightParams.BS_VARIANT);
+ Locale locale = parseLocale(language, country, variant);
+ String type = params.getFieldParam(field, HighlightParams.BS_TYPE);
+ return parseBreakIterator(type, locale);
+ }
};
Map<String,String[]> snippets = highlighter.highlightFields(fieldNames, query, searcher, docIDs, maxPassages);
@@ -212,4 +232,36 @@ public class PostingsSolrHighlighter ext
return new String[docIDs.length];
}
}
+
+ /** parse a break iterator type for the specified locale */
+ protected BreakIterator parseBreakIterator(String type, Locale locale) {
+ if (type == null || "SENTENCE".equals(type)) {
+ return BreakIterator.getSentenceInstance(locale);
+ } else if ("LINE".equals(type)) {
+ return BreakIterator.getLineInstance(locale);
+ } else if ("WORD".equals(type)) {
+ return BreakIterator.getWordInstance(locale);
+ } else if ("CHARACTER".equals(type)) {
+ return BreakIterator.getCharacterInstance(locale);
+ } else if ("WHOLE".equals(type)) {
+ return new WholeBreakIterator();
+ } else {
+ throw new IllegalArgumentException("Unknown " + HighlightParams.BS_TYPE + ": " + type);
+ }
+ }
+
+ /** parse a locale from a language+country+variant spec */
+ protected Locale parseLocale(String language, String country, String variant) {
+ if (language == null && country == null && variant == null) {
+ return Locale.ROOT;
+ } else if (language != null && country == null && variant != null) {
+ throw new IllegalArgumentException("To specify variant, country is required");
+ } else if (language != null && country != null && variant != null) {
+ return new Locale(language, country, variant);
+ } else if (language != null && country != null) {
+ return new Locale(language, country);
+ } else {
+ return new Locale(language);
+ }
+ }
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml?rev=1465251&r1=1465250&r2=1465251&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/collection1/conf/schema-postingshighlight.xml Sat Apr 6 14:27:44 2013
@@ -26,6 +26,7 @@
<fieldtype name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
@@ -33,6 +34,7 @@
<fieldtype name="text_offsets" class="solr.TextField" storeOffsetsWithPositions="true">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
</types>
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java?rev=1465251&r1=1465250&r2=1465251&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java Sat Apr 6 14:27:44 2013
@@ -42,7 +42,12 @@ public class TestPostingsSolrHighlighter
assertTrue(schema.getField("text").storeOffsetsWithPositions());
assertTrue(schema.getField("text3").storeOffsetsWithPositions());
assertFalse(schema.getField("text2").storeOffsetsWithPositions());
-
+ }
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ clearIndex();
assertU(adoc("text", "document one", "text2", "document one", "text3", "crappy document", "id", "101"));
assertU(adoc("text", "second document", "text2", "second document", "text3", "crappier document", "id", "102"));
assertU(commit());
@@ -126,4 +131,20 @@ public class TestPostingsSolrHighlighter
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier [document]'");
}
+
+ public void testBreakIterator() {
+ assertQ("different breakiterator",
+ req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WORD"),
+ "count(//lst[@name='highlighting']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em>'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='<em>document</em>'");
+ }
+
+ public void testBreakIterator2() {
+ assertU(adoc("text", "Document one has a first sentence. Document two has a second sentence.", "id", "103"));
+ assertU(commit());
+ assertQ("different breakiterator",
+ req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WHOLE"),
+ "//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first sentence. <em>Document</em> two has a second sentence.'");
+ }
}
Modified: lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java?rev=1465251&r1=1465250&r2=1465251&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java (original)
+++ lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java Sat Apr 6 14:27:44 2013
@@ -41,6 +41,7 @@ public interface HighlightParams {
public static final String BS_TYPE = HIGHLIGHT+".bs.type";
public static final String BS_LANGUAGE = HIGHLIGHT+".bs.language";
public static final String BS_COUNTRY = HIGHLIGHT+".bs.country";
+ public static final String BS_VARIANT = HIGHLIGHT+".bs.variant";
public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch";
public static final String DEFAULT_SUMMARY = HIGHLIGHT + ".defaultSummary";
public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";