You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2013/04/02 15:44:58 UTC
svn commit: r1463543 - in /lucene/dev/trunk/solr: ./
core/src/java/org/apache/solr/highlight/
core/src/test-files/solr/collection1/conf/
core/src/test/org/apache/solr/highlight/
solrj/src/java/org/apache/solr/common/params/
Author: erick
Date: Tue Apr 2 13:44:58 2013
New Revision: 1463543
URL: http://svn.apache.org/r1463543
Log:
SOLR-4656, additional parameters for limiting work when highlighting multivalued fields
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema.xml
lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1463543&r1=1463542&r2=1463543&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Tue Apr 2 13:44:58 2013
@@ -103,13 +103,24 @@ New Features
"dynamicFields", respectively, to align with all other REST API outputs, which
use camelCase.
(Steve Rowe)
-
+
* SOLR-4658: In preparation for REST API requests that can modify the schema,
a "managed schema" is introduced.
Add '<schemaFactory class="ManagedSchemaFactory" mutable="true"/>' to solrconfig.xml
in order to use it, and to enable schema modifications via REST API requests.
(Steve Rowe, Robert Muir)
+* SOLR-4656: Added two new highlight parameters, hl.maxMultiValuedToMatch and
+ hl.maxMultiValuedToExamine. maxMultiValuedToMatch stops looking for snippets after
+ finding the specified number of matches, no matter how far into the multivalued field
+ you've gone. maxMultiValuedToExamine stops looking for matches after the specified
+ number of multiValued entries have been examined. If both are specified, the limit
+ hit first stops the loop. Also this patch cuts down on the copying of the document
+ entries during highlighting. These optimizations are probably unnoticeable unless
+ there are a large number of entries in the multiValued field. Conspicuously, this will
+ prevent the "best" match from being found if it appears later in the MV list than the
+ cutoff specified by either of these params. (Erick Erickson)
+
Bug Fixes
----------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java?rev=1463543&r1=1463542&r2=1463543&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java Tue Apr 2 13:44:58 2013
@@ -429,21 +429,15 @@ public class DefaultSolrHighlighter exte
)) return;
// END: Hack
- SolrParams params = req.getParams();
- StorableField[] docFields = doc.getFields(fieldName);
- List<String> listFields = new ArrayList<String>();
- for (StorableField field : docFields) {
- listFields.add(field.stringValue());
- }
+ SolrParams params = req.getParams();
// preserve order of values in a multiValued list
boolean preserveMulti = params.getFieldBool(fieldName, HighlightParams.PRESERVE_MULTI, false);
- String[] docTexts = (String[]) listFields.toArray(new String[listFields.size()]);
-
- // according to Document javadoc, doc.getValues() never returns null. check empty instead of null
- if (docTexts.length == 0) return;
-
+ List<StorableField> allFields = doc.getFields();
+ if (allFields != null && allFields.size() == 0) return; // No explicit contract that getFields returns != null,
+ // although currently it can't.
+
TokenStream tstream = null;
int numFragments = getMaxSnippets(fieldName, params);
boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);
@@ -456,15 +450,25 @@ public class DefaultSolrHighlighter exte
if (tvStream != null) {
tots = new TermOffsetsTokenStream(tvStream);
}
+ int mvToExamine = Integer.parseInt(req.getParams().get(HighlightParams.MAX_MULTIVALUED_TO_EXAMINE,
+ Integer.toString(Integer.MAX_VALUE)));
+ int mvToMatch = Integer.parseInt(req.getParams().get(HighlightParams.MAX_MULTIVALUED_TO_MATCH,
+ Integer.toString(Integer.MAX_VALUE)));
+
+ for (StorableField thisField : allFields) {
+ if (mvToExamine <= 0 || mvToMatch <= 0) break;
+
+ if (! thisField.name().equals(fieldName)) continue; // Is there a better way to do this?
- for (int j = 0; j < docTexts.length; j++) {
+ --mvToExamine;
+ String thisText = thisField.stringValue();
if( tots != null ) {
// if we're using TermOffsets optimization, then get the next
// field value's TokenStream (i.e. get field j's TokenStream) from tots:
- tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
+ tstream = tots.getMultiValuedTokenStream( thisText.length() );
} else {
// fall back to analyzer
- tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
+ tstream = createAnalyzerTStream(schema, fieldName, thisText);
}
int maxCharsToAnalyze = params.getFieldInt(fieldName,
@@ -491,21 +495,23 @@ public class DefaultSolrHighlighter exte
}
if (maxCharsToAnalyze < 0) {
- highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
+ highlighter.setMaxDocCharsToAnalyze(thisText.length());
} else {
highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
}
try {
- TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, docTexts[j], mergeContiguousFragments, numFragments);
+ TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, thisText, mergeContiguousFragments, numFragments);
for (int k = 0; k < bestTextFragments.length; k++) {
if (preserveMulti) {
if (bestTextFragments[k] != null) {
frags.add(bestTextFragments[k]);
+ --mvToMatch;
}
} else {
if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
frags.add(bestTextFragments[k]);
+ --mvToMatch;
}
}
}
Modified: lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema.xml?rev=1463543&r1=1463542&r2=1463543&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema.xml Tue Apr 2 13:44:58 2013
@@ -582,6 +582,8 @@
<field name="store" type="location" indexed="true" stored="true" omitNorms="false"/>
+ <field name="lower" type="lowertok" indexed="false" stored="true" multiValued="true" />
+
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java?rev=1463543&r1=1463542&r2=1463543&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java Tue Apr 2 13:44:58 2013
@@ -22,6 +22,7 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.handler.component.HighlightComponent;
+import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.*;
import org.apache.solr.common.params.HighlightParams;
@@ -849,4 +850,175 @@ public class HighlighterTest extends Sol
"//lst[@name='highlighting']/lst[@name='1']" +
"/arr[@name='title']/str='Apache Software <em>Foundation</em>'");
}
+
+ @Test
+ public void testMaxMvParams() {
+ assertU(adoc("title", "Apache Software Foundation", "id", "1000",
+ "lower", "gap1 target",
+ "lower", "gap2 target",
+ "lower", "gap3 nothing",
+ "lower", "gap4 nothing",
+ "lower", "gap5 target",
+ "lower", "gap6 target",
+ "lower", "gap7 nothing",
+ "lower", "gap8 nothing",
+ "lower", "gap9 target",
+ "lower", "gap10 target" ));
+
+ assertU(commit());
+
+ // First insure we can count all six
+ assertQ("Counting all MV pairs failed",
+ req(
+ "q", "id:1000",
+ HighlightParams.HIGHLIGHT, "true",
+ HighlightParams.FIELDS, "lower",
+ HighlightParams.Q, "target",
+ HighlightParams.SNIPPETS, "100"
+ ),
+ "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=6]"
+ );
+
+ // NOTE: These tests seem repeated, but we're testing for off-by-one errors
+ // Now we should see exactly 2 by limiting the number of values searched to 4
+ assertQ("Off by one by going too far",
+ req(
+ "q", "id:1000",
+ HighlightParams.HIGHLIGHT, "true",
+ HighlightParams.FIELDS, "lower",
+ HighlightParams.Q, "target",
+ HighlightParams.SNIPPETS, "100",
+ HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, "4"
+ ),
+ "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=2]"
+ );
+
+
+ // Does 0 work?
+ assertQ("Off by one by going too far",
+ req(
+ "q", "id:1000",
+ HighlightParams.HIGHLIGHT, "true",
+ HighlightParams.FIELDS, "lower",
+ HighlightParams.Q, "target",
+ HighlightParams.SNIPPETS, "100",
+ HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, "0"
+ ),
+ "//lst[@name='highlighting']/lst[@name='1000' and count(child::*) = 0]"
+ );
+
+
+ // Now we should see exactly 2 by limiting the number of values searched to 2
+ assertQ("Off by one by not going far enough",
+ req(
+ "q", "id:1000",
+ HighlightParams.HIGHLIGHT, "true",
+ HighlightParams.FIELDS, "lower",
+ HighlightParams.Q, "target",
+ HighlightParams.SNIPPETS, "100",
+ HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, "2"
+ ),
+ "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=2]"
+ );
+
+
+ // Now we should see exactly 1 by limiting the number of values searched to 1
+ assertQ("Not counting exactly 1",
+ req(
+ "q", "id:1000",
+ HighlightParams.HIGHLIGHT, "true",
+ HighlightParams.FIELDS, "lower",
+ HighlightParams.Q, "target",
+ HighlightParams.SNIPPETS, "100",
+ HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, "1"
+ ),
+ "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=1]"
+ );
+
+
+ // Now we should see exactly 4 by limiting the number of values found to 4
+ assertQ("Matching 4 should exactly match 4",
+ req(
+ "q", "id:1000",
+ HighlightParams.HIGHLIGHT, "true",
+ HighlightParams.FIELDS, "lower",
+ HighlightParams.Q, "target",
+ HighlightParams.SNIPPETS, "100",
+ HighlightParams.MAX_MULTIVALUED_TO_MATCH, "4"
+ ),
+ "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=4]"
+ );
+
+
+ // Now we should see exactly 2 by limiting the number of values found to 2
+ assertQ("Matching 6 should exactly search them all",
+ req(
+ "q", "id:1000",
+ HighlightParams.HIGHLIGHT, "true",
+ HighlightParams.FIELDS, "lower",
+ HighlightParams.Q, "target",
+ HighlightParams.SNIPPETS, "100",
+ HighlightParams.MAX_MULTIVALUED_TO_MATCH, "6"
+ ),
+ "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=6]"
+ );
+
+
+ // Now we should see exactly 1 by limiting the number of values found to 1
+ assertQ("Matching 6 should exactly match them all",
+ req(
+ "q", "id:1000",
+ HighlightParams.HIGHLIGHT, "true",
+ HighlightParams.FIELDS, "lower",
+ HighlightParams.Q, "target",
+ HighlightParams.SNIPPETS, "100",
+ HighlightParams.MAX_MULTIVALUED_TO_MATCH, "1"
+ ),
+ "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=1]"
+ );
+
+ // Now we should see exactly 0 by limiting the number of values found to 0
+ assertQ("Matching 6 should exactly match them all",
+ req(
+ "q", "id:1000",
+ HighlightParams.HIGHLIGHT, "true",
+ HighlightParams.FIELDS, "lower",
+ HighlightParams.Q, "target",
+ HighlightParams.SNIPPETS, "100",
+ HighlightParams.MAX_MULTIVALUED_TO_MATCH, "0"
+ ),
+ "//lst[@name='highlighting']/lst[@name='1000' and count(child::*) = 0]"
+ );
+
+
+
+ // Should bail at the first parameter matched.
+ assertQ("Matching 6 should exactly match them all",
+ req(
+ "q", "id:1000",
+ HighlightParams.HIGHLIGHT, "true",
+ HighlightParams.FIELDS, "lower",
+ HighlightParams.Q, "target",
+ HighlightParams.SNIPPETS, "100",
+ HighlightParams.MAX_MULTIVALUED_TO_MATCH, "2",
+ HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, "10"
+ ),
+ "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=2]"
+ );
+
+ // Should bail at the first parameter matched.
+ assertQ("Matching 6 should exactly match them all",
+ req(
+ "q", "id:1000",
+ HighlightParams.HIGHLIGHT, "true",
+ HighlightParams.FIELDS, "lower",
+ HighlightParams.Q, "target",
+ HighlightParams.SNIPPETS, "100",
+ HighlightParams.MAX_MULTIVALUED_TO_MATCH, "10",
+ HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, "2"
+ ),
+ "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=2]"
+ );
+
+ }
}
Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java?rev=1463543&r1=1463542&r2=1463543&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java Tue Apr 2 13:44:58 2013
@@ -44,6 +44,8 @@ public interface HighlightParams {
public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch";
public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength";
+ public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine";
+ public static final String MAX_MULTIVALUED_TO_MATCH = HIGHLIGHT + ".maxMultiValuedToMatch";
public static final String USE_PHRASE_HIGHLIGHTER = HIGHLIGHT+".usePhraseHighlighter";
public static final String HIGHLIGHT_MULTI_TERM = HIGHLIGHT+".highlightMultiTerm";