You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2015/04/13 16:08:07 UTC
svn commit: r1673200 - in /lucene/dev/trunk/solr: CHANGES.txt
core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
core/src/test/org/apache/solr/highlight/HighlighterTest.java
Author: dsmiley
Date: Mon Apr 13 14:08:07 2015
New Revision: 1673200
URL: http://svn.apache.org/r1673200
Log:
SOLR-6692: hl.maxAnalyzedChars should apply cumulatively on a multi-valued field
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1673200&r1=1673199&r2=1673200&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Mon Apr 13 14:08:07 2015
@@ -128,7 +128,11 @@ Other Changes
* SOLR-7384: Fix spurious failures in FullSolrCloudDistribCmdsTest. (shalin)
-* SOLR-6692: The default highlighter is much more extensible. (David Smiley)
+* SOLR-6692: Default highlighter changes:
+ - hl.maxAnalyzedChars now applies cumulatively on a multi-valied field.
+ - fragment ranking on a multi-valued field should be more relevant.
+ - Much more extensible.
+ (David Smiley)
================== 5.1.0 ==================
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java?rev=1673200&r1=1673199&r2=1673200&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java Mon Apr 13 14:08:07 2015
@@ -71,44 +71,44 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- *
+ *
* @since solr 1.3
*/
public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized
{
public static Logger log = LoggerFactory.getLogger(DefaultSolrHighlighter.class);
-
+
protected final SolrCore solrCore;
- /** Will be invoked via reflection */
+ //Will be invoked via reflection
public DefaultSolrHighlighter(SolrCore solrCore) {
this.solrCore = solrCore;
}
// Thread safe registry
protected final Map<String,SolrFormatter> formatters =
- new HashMap<>();
+ new HashMap<>();
// Thread safe registry
protected final Map<String,SolrEncoder> encoders =
- new HashMap<>();
+ new HashMap<>();
// Thread safe registry
protected final Map<String,SolrFragmenter> fragmenters =
- new HashMap<>() ;
+ new HashMap<>() ;
// Thread safe registry
protected final Map<String, SolrFragListBuilder> fragListBuilders =
- new HashMap<>() ;
+ new HashMap<>() ;
// Thread safe registry
protected final Map<String, SolrFragmentsBuilder> fragmentsBuilders =
- new HashMap<>() ;
+ new HashMap<>() ;
// Thread safe registry
protected final Map<String, SolrBoundaryScanner> boundaryScanners =
- new HashMap<>() ;
+ new HashMap<>() ;
@Override
public void init(PluginInfo info) {
@@ -143,7 +143,7 @@ public class DefaultSolrHighlighter exte
if( fragListBuilder == null ) fragListBuilder = new SimpleFragListBuilder();
fragListBuilders.put( "", fragListBuilder );
fragListBuilders.put( null, fragListBuilder );
-
+
// Load the FragmentsBuilders
SolrFragmentsBuilder fragsBuilder = solrCore.initPlugins(info.getChildren("fragmentsBuilder"),
fragmentsBuilders, SolrFragmentsBuilder.class, null);
@@ -175,12 +175,12 @@ public class DefaultSolrHighlighter exte
getFormatter(fieldName, params),
getEncoder(fieldName, params),
getSpanQueryScorer(query, fieldName, tokenStream, request));
-
+
highlighter.setTextFragmenter(getFragmenter(fieldName, params));
return highlighter;
}
-
+
/**
* Return a {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
* @param query The current Query
@@ -188,15 +188,15 @@ public class DefaultSolrHighlighter exte
* @param request The current SolrQueryRequest
*/
protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) {
- SolrParams params = request.getParams();
+ SolrParams params = request.getParams();
Highlighter highlighter = new Highlighter(
- getFormatter(fieldName, params),
- getEncoder(fieldName, params),
- getQueryScorer(query, fieldName, request));
- highlighter.setTextFragmenter(getFragmenter(fieldName, params));
- return highlighter;
+ getFormatter(fieldName, params),
+ getEncoder(fieldName, params),
+ getQueryScorer(query, fieldName, request));
+ highlighter.setTextFragmenter(getFragmenter(fieldName, params));
+ return highlighter;
}
-
+
/**
* Return a {@link org.apache.lucene.search.highlight.QueryScorer} suitable for this Query and field.
* @param query The current query
@@ -221,15 +221,14 @@ public class DefaultSolrHighlighter exte
* @param request The SolrQueryRequest
*/
protected Scorer getQueryScorer(Query query, String fieldName, SolrQueryRequest request) {
- boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
- if (reqFieldMatch) {
- return new QueryTermScorer(query, request.getSearcher().getIndexReader(), fieldName);
- }
- else {
- return new QueryTermScorer(query);
- }
+ boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
+ if (reqFieldMatch) {
+ return new QueryTermScorer(query, request.getSearcher().getIndexReader(), fieldName);
+ } else {
+ return new QueryTermScorer(query);
+ }
}
-
+
/**
* Return the max number of snippets for this field. If this has not
* been configured for this field, fall back to the configured default
@@ -238,7 +237,7 @@ public class DefaultSolrHighlighter exte
* @param params The params controlling Highlighting
*/
protected int getMaxSnippets(String fieldName, SolrParams params) {
- return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1);
+ return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1);
}
/**
@@ -249,17 +248,17 @@ public class DefaultSolrHighlighter exte
protected boolean isMergeContiguousFragments(String fieldName, SolrParams params){
return params.getFieldBool(fieldName, HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, false);
}
-
+
/**
* Return a {@link org.apache.lucene.search.highlight.Formatter} appropriate for this field. If a formatter
* has not been configured for this field, fall back to the configured
* default or the solr default ({@link org.apache.lucene.search.highlight.SimpleHTMLFormatter}).
- *
+ *
* @param fieldName The name of the field
* @param params The params controlling Highlighting
* @return An appropriate {@link org.apache.lucene.search.highlight.Formatter}.
*/
- protected Formatter getFormatter(String fieldName, SolrParams params )
+ protected Formatter getFormatter(String fieldName, SolrParams params )
{
String str = params.getFieldParam( fieldName, HighlightParams.FORMATTER );
SolrFormatter formatter = formatters.get(str);
@@ -273,7 +272,7 @@ public class DefaultSolrHighlighter exte
* Return an {@link org.apache.lucene.search.highlight.Encoder} appropriate for this field. If an encoder
* has not been configured for this field, fall back to the configured
* default or the solr default ({@link org.apache.lucene.search.highlight.DefaultEncoder}).
- *
+ *
* @param fieldName The name of the field
* @param params The params controlling Highlighting
* @return An appropriate {@link org.apache.lucene.search.highlight.Encoder}.
@@ -286,17 +285,17 @@ public class DefaultSolrHighlighter exte
}
return encoder.getEncoder(fieldName, params);
}
-
+
/**
* Return a {@link org.apache.lucene.search.highlight.Fragmenter} appropriate for this field. If a fragmenter
* has not been configured for this field, fall back to the configured
* default or the solr default ({@link GapFragmenter}).
- *
+ *
* @param fieldName The name of the field
* @param params The params controlling Highlighting
* @return An appropriate {@link org.apache.lucene.search.highlight.Fragmenter}.
*/
- protected Fragmenter getFragmenter(String fieldName, SolrParams params)
+ protected Fragmenter getFragmenter(String fieldName, SolrParams params)
{
String fmt = params.getFieldParam( fieldName, HighlightParams.FRAGMENTER );
SolrFragmenter frag = fragmenters.get( fmt );
@@ -305,7 +304,7 @@ public class DefaultSolrHighlighter exte
}
return frag.getFragmenter(fieldName, params);
}
-
+
protected FragListBuilder getFragListBuilder( String fieldName, SolrParams params ){
String flb = params.getFieldParam( fieldName, HighlightParams.FRAG_LIST_BUILDER );
SolrFragListBuilder solrFlb = fragListBuilders.get(flb);
@@ -314,7 +313,7 @@ public class DefaultSolrHighlighter exte
}
return solrFlb.getFragListBuilder(params);
}
-
+
protected FragmentsBuilder getFragmentsBuilder( String fieldName, SolrParams params ){
BoundaryScanner bs = getBoundaryScanner(fieldName, params);
return getSolrFragmentsBuilder( fieldName, params ).getFragmentsBuilder(params, bs);
@@ -322,7 +321,7 @@ public class DefaultSolrHighlighter exte
protected SolrFragmentsBuilder getSolrFragmentsBuilder( String fieldName, SolrParams params ){
String fb = params.getFieldParam( fieldName, HighlightParams.FRAGMENTS_BUILDER );
- SolrFragmentsBuilder solrFb = fragmentsBuilders.get(fb);
+ SolrFragmentsBuilder solrFb = fragmentsBuilders.get( fb );
if( solrFb == null ){
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmentsBuilder: " + fb );
}
@@ -337,7 +336,7 @@ public class DefaultSolrHighlighter exte
}
return solrBs.getBoundaryScanner(fieldName, params);
}
-
+
/**
* Generates a list of Highlighted query fragments for each item in a list
* of documents, or returns null if highlighting is disabled.
@@ -353,23 +352,23 @@ public class DefaultSolrHighlighter exte
@Override
@SuppressWarnings("unchecked")
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
- SolrParams params = req.getParams();
+ SolrParams params = req.getParams();
if (!isHighlightingEnabled(params))
- return null;
-
+ return null;
+
SolrIndexSearcher searcher = req.getSearcher();
IndexSchema schema = searcher.getSchema();
NamedList fragments = new SimpleOrderedMap();
String[] fieldNames = getHighlightFields(query, req, defaultFields);
Set<String> fset = new HashSet<>();
-
+
{
// pre-fetch documents using the Searcher's doc cache
for(String f : fieldNames) { fset.add(f); }
// fetch unique key if one exists.
SchemaField keyField = schema.getUniqueKeyField();
if(null != keyField)
- fset.add(keyField.getName());
+ fset.add(keyField.getName());
}
// get FastVectorHighlighter instance out of the processing loop
@@ -399,7 +398,7 @@ public class DefaultSolrHighlighter exte
}
return fragments;
}
-
+
/*
* If fieldName is undefined, this method returns false, then
* doHighlightingByHighlighter() will do nothing for the field.
@@ -438,7 +437,7 @@ public class DefaultSolrHighlighter exte
@SuppressWarnings("unchecked")
protected void doHighlightingByHighlighter( Query query, SolrQueryRequest req, NamedList docSummaries,
- int docId, StoredDocument doc, String fieldName ) throws IOException {
+ int docId, StoredDocument doc, String fieldName ) throws IOException {
final SolrIndexSearcher searcher = req.getSearcher();
final IndexSchema schema = searcher.getSchema();
final SolrParams params = req.getParams();
@@ -538,6 +537,7 @@ public class DefaultSolrHighlighter exte
}
highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
+ maxCharsToAnalyze -= thisText.length();
// Highlight!
try {
@@ -594,6 +594,7 @@ public class DefaultSolrHighlighter exte
String value = thisField.stringValue();
result.add(value);
+ maxCharsToAnalyze -= value.length();//we exit early if we'll never get to analyze the value
maxValues--;
if (maxValues <= 0 || maxCharsToAnalyze <= 0) {
break;
@@ -653,7 +654,7 @@ public class DefaultSolrHighlighter exte
}
}
}
-
+
protected TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException {
return new TokenOrderingFilter(schema.getIndexAnalyzer().tokenStream(fieldName, docText), 10);
}
@@ -669,7 +670,7 @@ final class TokenOrderingFilter extends
private final LinkedList<OrderedToken> queue = new LinkedList<>(); //TODO replace with Deque, Array impl
private boolean done=false;
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-
+
protected TokenOrderingFilter(TokenStream input, int windowSize) {
super(input);
this.windowSize = windowSize;
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java?rev=1673200&r1=1673199&r2=1673200&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java Mon Apr 13 14:08:07 2015
@@ -86,8 +86,8 @@ public class HighlighterTest extends Sol
SolrFragmenter regex = highlighter.fragmenters.get( "regex" );
SolrFragmenter frag = highlighter.fragmenters.get( null );
assertSame( gap, frag );
- assertTrue( gap instanceof GapFragmenter );
- assertTrue( regex instanceof RegexFragmenter );
+ assertTrue(gap instanceof GapFragmenter);
+ assertTrue(regex instanceof RegexFragmenter);
}
@Test
@@ -128,13 +128,13 @@ public class HighlighterTest extends Sol
sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
assertQ("Merge Contiguous",
- sumLRF.makeRequest("t_text:long"),
- "//lst[@name='highlighting']/lst[@name='1']",
- "//lst[@name='1']/arr[@name='t_text']/str[.='this is some <em>long</em> text. It has']",
- "//lst[@name='1']/arr[@name='t_text']/str[.=' the word <em>long</em> in many places. In fact, it has']",
- "//lst[@name='1']/arr[@name='t_text']/str[.=' <em>long</em> on some different fragments. Let us']",
- "//lst[@name='1']/arr[@name='t_text']/str[.=' see what happens to <em>long</em> in this case.']"
- );
+ sumLRF.makeRequest("t_text:long"),
+ "//lst[@name='highlighting']/lst[@name='1']",
+ "//lst[@name='1']/arr[@name='t_text']/str[.='this is some <em>long</em> text. It has']",
+ "//lst[@name='1']/arr[@name='t_text']/str[.=' the word <em>long</em> in many places. In fact, it has']",
+ "//lst[@name='1']/arr[@name='t_text']/str[.=' <em>long</em> on some different fragments. Let us']",
+ "//lst[@name='1']/arr[@name='t_text']/str[.=' see what happens to <em>long</em> in this case.']"
+ );
}
@Test
@@ -269,10 +269,10 @@ public class HighlighterTest extends Sol
assertU(commit());
assertU(optimize());
assertQ("Basic summarization",
- sumLRF.makeRequest("long"),
- "//lst[@name='highlighting']/lst[@name='1']",
- "//lst[@name='1']/arr[@name='tv_text']/str"
- );
+ sumLRF.makeRequest("long"),
+ "//lst[@name='highlighting']/lst[@name='1']",
+ "//lst[@name='1']/arr[@name='tv_text']/str"
+ );
// try the same thing without a q param
assertQ("Should not explode...", // q.alt should return everything
@@ -292,9 +292,9 @@ public class HighlighterTest extends Sol
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
- assertU(adoc("textgap", "first entry hasnt queryword",
- "textgap", "second entry has queryword long",
- "id", "1"));
+ assertU(adoc("textgap", "first entry hasnt queryword",
+ "textgap", "second entry has queryword long",
+ "id", "1"));
assertU(commit());
assertU(optimize());
assertQ("Basic summarization",
@@ -357,10 +357,10 @@ public class HighlighterTest extends Sol
assertU(commit());
assertU(optimize());
assertQ("Basic summarization",
- sumLRF.makeRequest("long"),
- "//lst[@name='highlighting']/lst[@name='1']",
- "//lst[@name='1']/arr[@name='t_text']/str"
- );
+ sumLRF.makeRequest("long"),
+ "//lst[@name='highlighting']/lst[@name='1']",
+ "//lst[@name='1']/arr[@name='t_text']/str"
+ );
}
@@ -432,15 +432,15 @@ public class HighlighterTest extends Sol
sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
assertQ("Test RequireFieldMatch",
- sumLRF.makeRequest("t_text1:random OR t_text2:words"),
- "//lst[@name='highlighting']/lst[@name='1']",
- "//lst[@name='1']/arr[@name='t_text1']/str[.='<em>random</em> words for highlighting tests']",
- "//lst[@name='1']/arr[@name='t_text2']/str[.='more random <em>words</em> for second field']"
- );
+ sumLRF.makeRequest("t_text1:random OR t_text2:words"),
+ "//lst[@name='highlighting']/lst[@name='1']",
+ "//lst[@name='1']/arr[@name='t_text1']/str[.='<em>random</em> words for highlighting tests']",
+ "//lst[@name='1']/arr[@name='t_text2']/str[.='more random <em>words</em> for second field']"
+ );
// test case for un-optimized index
assertU(adoc("t_text1", "random words for highlighting tests", "id", "2",
- "t_text2", "more random words for second field"));
+ "t_text2", "more random words for second field"));
assertU(delI("1"));
assertU(commit());
sumLRF = h.getRequestFactory(
@@ -461,7 +461,7 @@ public class HighlighterTest extends Sol
args.put("hl", "true");
args.put("hl.fl", "t_text");
args.put("hl.simple.pre","<B>");
- args.put("hl.simple.post","</B>");
+ args.put("hl.simple.post", "</B>");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
@@ -469,14 +469,14 @@ public class HighlighterTest extends Sol
assertU(commit());
assertU(optimize());
assertQ("Basic summarization",
- sumLRF.makeRequest("t_text:long"),
- "//lst[@name='highlighting']/lst[@name='1']",
- "//lst[@name='1']/arr[@name='t_text']/str[.='a <B>long</B> days night']"
- );
+ sumLRF.makeRequest("t_text:long"),
+ "//lst[@name='highlighting']/lst[@name='1']",
+ "//lst[@name='1']/arr[@name='t_text']/str[.='a <B>long</B> days night']"
+ );
// test a per-field override
- args.put("f.t_text.hl.simple.pre","<I>");
- args.put("f.t_text.hl.simple.post","</I>");
+ args.put("f.t_text.hl.simple.pre", "<I>");
+ args.put("f.t_text.hl.simple.post", "</I>");
sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
assertQ("Basic summarization",
@@ -515,35 +515,59 @@ public class HighlighterTest extends Sol
args.put("fl", "id score");
args.put("hl", "true");
args.put("hl.snippets", "10");
- args.put("hl.fl", "t_text");
+ final String field = random().nextBoolean() ? "t_text" : "tv_text";
+ args.put("hl.fl", field);
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
- assertU(adoc("t_text", LONG_TEXT, "id", "1"));
+ assertU(adoc(field, LONG_TEXT, "id", "1"));
assertU(commit());
- assertU(optimize());
+
assertQ("token at start of text",
- sumLRF.makeRequest("t_text:disjoint"),
+ sumLRF.makeRequest(field + ":disjoint"),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[count(str)=1]"
);
args.put("hl.maxAnalyzedChars", "20");
sumLRF = h.getRequestFactory("standard", 0, 200, args);
assertQ("token at end of text",
- sumLRF.makeRequest("t_text:disjoint"),
- "//lst[@name='highlighting']/lst[@name='1']",
- "//lst[@name='1'][not(*)]"
- );
+ sumLRF.makeRequest(field + ":disjoint"),
+ "//lst[@name='highlighting']/lst[@name='1']",
+ "//lst[@name='1'][not(*)]"
+ );
args.put("hl.maxAnalyzedChars", "-1");
sumLRF = h.getRequestFactory("standard", 0, 200, args);
assertQ("token at start of text",
- sumLRF.makeRequest("t_text:disjoint"),
+ sumLRF.makeRequest(field + ":disjoint"),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[count(str)=1]"
);
+
}
-
+
+ // Test multi-valued together with hl.maxAnalyzedChars
+ @Test
+ public void testMultiValuedMaxAnalyzedChars() throws Exception {
+ String shortText = "some short blah blah blah blah";
+ final String field = random().nextBoolean() ? "tv_mv_text" : "textgap"; // term vecs or not
+ assertU(adoc(field, shortText,
+ field, LONG_TEXT,
+ "id", "1"));
+ assertU(commit());
+
+ assertQ(req("q", field + ":(short OR long)",
+ "indent", "on",
+ "hl", "true",
+ "hl.fl", field,
+ "hl.snippets", "2",
+ "hl.maxAnalyzedChars", "8"),
+ "//lst[@name='highlighting']/lst[@name='1']/arr[count(*)=1]",
+ "//lst[@name='1']/arr/str[1][.='some <em>short</em>']"
+ //"//lst[@name='1']/arr/str[2][.='a <em>long</em> days']"
+ );
+ }
+
@Test
public void testRegexFragmenter() {
HashMap<String,String> args = new HashMap<>();