You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2017/01/08 04:33:21 UTC
lucene-solr:master: SOLR-9935: Add hl.fragsize support when using the
UnifiedHighlighter
Repository: lucene-solr
Updated Branches:
refs/heads/master ea4998952 -> 570880d3a
SOLR-9935: Add hl.fragsize support when using the UnifiedHighlighter
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/570880d3
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/570880d3
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/570880d3
Branch: refs/heads/master
Commit: 570880d3acb45c925e8dc77172e0725ab8ba07b8
Parents: ea49989
Author: David Smiley <ds...@apache.org>
Authored: Sat Jan 7 23:32:37 2017 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Sat Jan 7 23:32:37 2017 -0500
----------------------------------------------------------------------
solr/CHANGES.txt | 5 +++-
.../solr/highlight/UnifiedSolrHighlighter.java | 12 +++++++++-
.../highlight/TestUnifiedSolrHighlighter.java | 24 ++++++++++++++++----
.../solr/common/params/HighlightParams.java | 2 +-
4 files changed, 35 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/570880d3/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 899dcd3..c18381e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -109,7 +109,7 @@ Upgrade Notes
* SOLR-9708: You are encouraged to try out the UnifiedHighlighter by setting hl.method=unified and report feedback. It
might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the
- original Highlighter. That said, some options aren't supported yet, notably hl.fragsize.
+ original Highlighter. That said, some options aren't supported yet.
It will get more features in time, especially with your input. See HighlightParams.java
for a listing of highlight parameters annotated with which highlighters use them.
hl.useFastVectorHighlighter is now considered deprecated in lieu of hl.method=fastVector.
@@ -225,6 +225,9 @@ New Features
* SOLR-7466: Enable leading wildcard in complexphrase query parser, optimize it with ReversedWildcardFilterFactory
when it's provided (Mikhail Khludnev)
+* SOLR-9935: Add hl.fragsize support when using the UnifiedHighlighter to avoid snippets/Passages that are too small.
+ Defaults to 70. (David Smiley)
+
Optimizations
----------------------
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/570880d3/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
index 910fa2b..5b59b85 100644
--- a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
@@ -30,6 +30,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
import org.apache.lucene.search.uhighlight.DefaultPassageFormatter;
+import org.apache.lucene.search.uhighlight.LengthGoalBreakIterator;
import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.apache.lucene.search.uhighlight.PassageScorer;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
@@ -299,7 +300,16 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf
String variant = params.getFieldParam(field, HighlightParams.BS_VARIANT);
Locale locale = parseLocale(language, country, variant);
String type = params.getFieldParam(field, HighlightParams.BS_TYPE);
- return parseBreakIterator(type, locale);
+ BreakIterator baseBI = parseBreakIterator(type, locale);
+
+ // Use a default fragsize the same as the regex Fragmenter (original Highlighter) since we're
+ // both likely shooting for sentence-like patterns.
+ int fragsize = params.getFieldInt(field, HighlightParams.FRAGSIZE, LuceneRegexFragmenter.DEFAULT_FRAGMENT_SIZE);
+ if (fragsize <= 1 || baseBI instanceof WholeBreakIterator) { // no real minimum size
+ return baseBI;
+ }
+ return LengthGoalBreakIterator.createMinLength(baseBI, fragsize);
+ // TODO option for using createClosestToLength()
}
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/570880d3/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
index e2511be..2eb4ba3 100644
--- a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
+++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
@@ -78,7 +78,8 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
"text2", "document one", "text3", "crappy document", "id", "101"));
assertU(commit());
assertQ("multiple snippets test",
- req("q", "text:document", "sort", "id asc", "hl", "true", "hl.snippets", "2", "hl.bs.type", "SENTENCE"),
+ req("q", "text:document", "sort", "id asc", "hl", "true", "hl.snippets", "2", "hl.bs.type", "SENTENCE",
+ "hl.fragsize", "0"),
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Document</em> snippet one. '",
"//lst[@name='highlighting']/lst[@name='101']/arr/str[2]='<em>Document</em> snippet two.'");
@@ -202,21 +203,34 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier [document]'");
}
- public void testBreakIterator() {
+ public void testBreakIteratorWord() {
assertQ("different breakiterator",
- req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WORD"),
+ req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WORD", "hl.fragsize", "-1"),
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em>'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='<em>document</em>'");
}
- public void testBreakIterator2() {
+ public void testBreakIteratorWhole() {
assertU(adoc("text", "Document one has a first sentence. Document two has a second sentence.", "id", "103"));
assertU(commit());
assertQ("different breakiterator",
- req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WHOLE"),
+ req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WHOLE", "hl.fragsize", "-1"),
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first sentence. <em>Document</em> two has a second sentence.'");
}
+
+ public void testFragsize() {
+ // test default is 70... so make a sentence that is a little less (closer to 70 than end of text)
+ clearIndex();
+ assertU(adoc("id", "10", "text", "This is a sentence just under seventy chars in length blah blah. Next sentence is here."));
+ assertU(commit());
+ assertQ("default fragsize",
+ req("q", "text:seventy", "hl", "true"),
+ "//lst[@name='highlighting']/lst[@name='10']/arr[@name='text']/str='This is a sentence just under <em>seventy</em> chars in length blah blah. Next sentence is here.'");
+ assertQ("smaller fragsize",
+ req("q", "text:seventy", "hl", "true", "hl.fragsize", "60"), // a bit smaller
+ "//lst[@name='highlighting']/lst[@name='10']/arr[@name='text']/str='This is a sentence just under <em>seventy</em> chars in length blah blah. '");
+ }
public void testEncoder() {
assertU(adoc("text", "Document one has a first <i>sentence</i>.", "id", "103"));
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/570880d3/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
index 917e9f5..997fc7e 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
@@ -49,7 +49,7 @@ public interface HighlightParams {
public static final String HIGHLIGHT_ALTERNATE = HIGHLIGHT+".highlightAlternate"; // OH, FVH
// sizing
- public static final String FRAGSIZE = HIGHLIGHT+".fragsize"; // OH, FVH
+ public static final String FRAGSIZE = HIGHLIGHT+".fragsize"; // OH, FVH, UH
public static final String FRAGMENTER = HIGHLIGHT+".fragmenter"; // OH
public static final String INCREMENT = HIGHLIGHT+".increment"; // OH
public static final String REGEX = "regex"; // OH