You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2013/04/02 15:44:58 UTC

svn commit: r1463543 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/highlight/ core/src/test-files/solr/collection1/conf/ core/src/test/org/apache/solr/highlight/ solrj/src/java/org/apache/solr/common/params/

Author: erick
Date: Tue Apr  2 13:44:58 2013
New Revision: 1463543

URL: http://svn.apache.org/r1463543
Log:
SOLR-4656, additional parameters for limiting work when highlighting multivalued fields

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
    lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema.xml
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
    lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1463543&r1=1463542&r2=1463543&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Tue Apr  2 13:44:58 2013
@@ -103,13 +103,24 @@ New Features
   "dynamicFields", respectively, to align with all other REST API outputs, which
   use camelCase.
   (Steve Rowe)
-  
+    
 * SOLR-4658: In preparation for REST API requests that can modify the schema,
   a "managed schema" is introduced.  
   Add '<schemaFactory class="ManagedSchemaFactory" mutable="true"/>' to solrconfig.xml
   in order to use it, and to enable schema modifications via REST API requests.
   (Steve Rowe, Robert Muir)
 
+* SOLR-4656: Added two new highlight parameters, hl.maxMultiValuedToMatch and 
+  hl.maxMultiValuedToExamine. maxMultiValuedToMatch stops looking for snippets after 
+  finding the specified number of matches, no matter how far into the multivalued field
+  you've gone. maxMultiValuedToExamine stops looking for matches after the specified
+  number of multiValued entries have been examined. If both are specified, the limit
+  hit first stops the loop. Also this patch cuts down on the copying of the document 
+  entries during highlighting. These optimizations are probably unnoticeable unless
+  there are a large number of entries in the multiValued field. Conspicuously, this will
+  prevent the "best" match from being found if it appears later in the MV list than the
+  cutoff specified by either of these params. (Erick Erickson)
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java?rev=1463543&r1=1463542&r2=1463543&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java Tue Apr  2 13:44:58 2013
@@ -429,21 +429,15 @@ public class DefaultSolrHighlighter exte
     )) return;
     // END: Hack
     
-    SolrParams params = req.getParams(); 
-    StorableField[] docFields = doc.getFields(fieldName);
-    List<String> listFields = new ArrayList<String>();
-    for (StorableField field : docFields) {
-      listFields.add(field.stringValue());
-    }
+    SolrParams params = req.getParams();
 
     // preserve order of values in a multiValued list
     boolean preserveMulti = params.getFieldBool(fieldName, HighlightParams.PRESERVE_MULTI, false);
 
-    String[] docTexts = (String[]) listFields.toArray(new String[listFields.size()]);
-   
-    // according to Document javadoc, doc.getValues() never returns null. check empty instead of null
-    if (docTexts.length == 0) return;
-    
+    List<StorableField> allFields = doc.getFields();
+    if (allFields != null && allFields.size() == 0) return; // No explicit contract that getFields returns != null,
+                                                            // although currently it can't.
+
     TokenStream tstream = null;
     int numFragments = getMaxSnippets(fieldName, params);
     boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);
@@ -456,15 +450,25 @@ public class DefaultSolrHighlighter exte
     if (tvStream != null) {
       tots = new TermOffsetsTokenStream(tvStream);
     }
+    int mvToExamine = Integer.parseInt(req.getParams().get(HighlightParams.MAX_MULTIVALUED_TO_EXAMINE,
+        Integer.toString(Integer.MAX_VALUE)));
+    int mvToMatch = Integer.parseInt(req.getParams().get(HighlightParams.MAX_MULTIVALUED_TO_MATCH,
+        Integer.toString(Integer.MAX_VALUE)));
+
+    for (StorableField thisField : allFields) {
+      if (mvToExamine <= 0 || mvToMatch <= 0) break;
+
+      if (! thisField.name().equals(fieldName)) continue; // Is there a better way to do this?
 
-    for (int j = 0; j < docTexts.length; j++) {
+      --mvToExamine;
+      String thisText = thisField.stringValue();
       if( tots != null ) {
         // if we're using TermOffsets optimization, then get the next
         // field value's TokenStream (i.e. get field j's TokenStream) from tots:
-        tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
+        tstream = tots.getMultiValuedTokenStream( thisText.length() );
       } else {
         // fall back to analyzer
-        tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
+        tstream = createAnalyzerTStream(schema, fieldName, thisText);
       }
       
       int maxCharsToAnalyze = params.getFieldInt(fieldName,
@@ -491,21 +495,23 @@ public class DefaultSolrHighlighter exte
       }
       
       if (maxCharsToAnalyze < 0) {
-        highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
+        highlighter.setMaxDocCharsToAnalyze(thisText.length());
       } else {
         highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
       }
 
       try {
-        TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, docTexts[j], mergeContiguousFragments, numFragments);
+        TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, thisText, mergeContiguousFragments, numFragments);
         for (int k = 0; k < bestTextFragments.length; k++) {
           if (preserveMulti) {
             if (bestTextFragments[k] != null) {
               frags.add(bestTextFragments[k]);
+              --mvToMatch;
             }
           } else {
             if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
               frags.add(bestTextFragments[k]);
+              --mvToMatch;
             }
           }
         }

Modified: lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema.xml?rev=1463543&r1=1463542&r2=1463543&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/collection1/conf/schema.xml Tue Apr  2 13:44:58 2013
@@ -582,6 +582,8 @@
    
    <field name="store" type="location" indexed="true" stored="true" omitNorms="false"/>
 
+   <field name="lower" type="lowertok" indexed="false" stored="true" multiValued="true" />
+
    <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
         will be used if the name matches any of the patterns.
         RESTRICTION: the glob-like pattern in the name attribute must have

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java?rev=1463543&r1=1463542&r2=1463543&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java Tue Apr  2 13:44:58 2013
@@ -22,6 +22,7 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.handler.component.HighlightComponent;
+import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.util.*;
 import org.apache.solr.common.params.HighlightParams;
@@ -849,4 +850,175 @@ public class HighlighterTest extends Sol
         "//lst[@name='highlighting']/lst[@name='1']" +
         "/arr[@name='title']/str='Apache Software <em>Foundation</em>'");
   }
+
+  @Test
+  public void testMaxMvParams() {
+    assertU(adoc("title", "Apache Software Foundation", "id", "1000",
+        "lower", "gap1 target",
+        "lower", "gap2 target",
+        "lower", "gap3 nothing",
+        "lower", "gap4 nothing",
+        "lower", "gap5 target",
+        "lower", "gap6 target",
+        "lower", "gap7 nothing",
+        "lower", "gap8 nothing",
+        "lower", "gap9 target",
+        "lower", "gap10 target" ));
+
+    assertU(commit());
+
+    // First insure we can count all six
+    assertQ("Counting all MV pairs failed",
+        req(
+            "q", "id:1000",
+            HighlightParams.HIGHLIGHT, "true",
+            HighlightParams.FIELDS, "lower",
+            HighlightParams.Q, "target",
+            HighlightParams.SNIPPETS, "100"
+        ),
+        "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=6]"
+    );
+
+    // NOTE: These tests seem repeated, but we're testing for off-by-one errors
+    // Now we should see exactly 2 by limiting the number of values searched to 4
+    assertQ("Off by one by going too far",
+        req(
+            "q", "id:1000",
+            HighlightParams.HIGHLIGHT, "true",
+            HighlightParams.FIELDS, "lower",
+            HighlightParams.Q, "target",
+            HighlightParams.SNIPPETS, "100",
+            HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, "4"
+        ),
+        "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=2]"
+    );
+
+
+    // Does 0 work?
+    assertQ("Off by one by going too far",
+        req(
+            "q", "id:1000",
+            HighlightParams.HIGHLIGHT, "true",
+            HighlightParams.FIELDS, "lower",
+            HighlightParams.Q, "target",
+            HighlightParams.SNIPPETS, "100",
+            HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, "0"
+        ),
+        "//lst[@name='highlighting']/lst[@name='1000' and count(child::*) = 0]"
+    );
+
+
+    // Now we should see exactly 2 by limiting the number of values searched to 2
+    assertQ("Off by one by not going far enough",
+        req(
+            "q", "id:1000",
+            HighlightParams.HIGHLIGHT, "true",
+            HighlightParams.FIELDS, "lower",
+            HighlightParams.Q, "target",
+            HighlightParams.SNIPPETS, "100",
+            HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, "2"
+        ),
+        "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=2]"
+    );
+
+
+    // Now we should see exactly 1 by limiting the number of values searched to 1
+    assertQ("Not counting exactly 1",
+        req(
+            "q", "id:1000",
+            HighlightParams.HIGHLIGHT, "true",
+            HighlightParams.FIELDS, "lower",
+            HighlightParams.Q, "target",
+            HighlightParams.SNIPPETS, "100",
+            HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, "1"
+        ),
+        "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=1]"
+    );
+
+
+    // Now we should see exactly 4 by limiting the number of values found to 4
+    assertQ("Matching 4 should exactly match 4",
+        req(
+            "q", "id:1000",
+            HighlightParams.HIGHLIGHT, "true",
+            HighlightParams.FIELDS, "lower",
+            HighlightParams.Q, "target",
+            HighlightParams.SNIPPETS, "100",
+            HighlightParams.MAX_MULTIVALUED_TO_MATCH, "4"
+        ),
+        "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=4]"
+    );
+
+
+    // Now we should see exactly 2 by limiting the number of values found to 2
+    assertQ("Matching 6 should exactly search them all",
+        req(
+            "q", "id:1000",
+            HighlightParams.HIGHLIGHT, "true",
+            HighlightParams.FIELDS, "lower",
+            HighlightParams.Q, "target",
+            HighlightParams.SNIPPETS, "100",
+            HighlightParams.MAX_MULTIVALUED_TO_MATCH, "6"
+        ),
+        "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=6]"
+    );
+
+
+    // Now we should see exactly 1 by limiting the number of values found to 1
+    assertQ("Matching 6 should exactly match them all",
+        req(
+            "q", "id:1000",
+            HighlightParams.HIGHLIGHT, "true",
+            HighlightParams.FIELDS, "lower",
+            HighlightParams.Q, "target",
+            HighlightParams.SNIPPETS, "100",
+            HighlightParams.MAX_MULTIVALUED_TO_MATCH, "1"
+        ),
+        "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=1]"
+    );
+
+    // Now we should see exactly 0 by limiting the number of values found to 0
+    assertQ("Matching 6 should exactly match them all",
+        req(
+            "q", "id:1000",
+            HighlightParams.HIGHLIGHT, "true",
+            HighlightParams.FIELDS, "lower",
+            HighlightParams.Q, "target",
+            HighlightParams.SNIPPETS, "100",
+            HighlightParams.MAX_MULTIVALUED_TO_MATCH, "0"
+        ),
+        "//lst[@name='highlighting']/lst[@name='1000' and count(child::*) = 0]"
+    );
+
+
+
+    // Should bail at the first parameter matched.
+    assertQ("Matching 6 should exactly match them all",
+        req(
+            "q", "id:1000",
+            HighlightParams.HIGHLIGHT, "true",
+            HighlightParams.FIELDS, "lower",
+            HighlightParams.Q, "target",
+            HighlightParams.SNIPPETS, "100",
+            HighlightParams.MAX_MULTIVALUED_TO_MATCH, "2",
+            HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, "10"
+        ),
+        "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=2]"
+    );
+
+    // Should bail at the first parameter matched.
+    assertQ("Matching 6 should exactly match them all",
+        req(
+            "q", "id:1000",
+            HighlightParams.HIGHLIGHT, "true",
+            HighlightParams.FIELDS, "lower",
+            HighlightParams.Q, "target",
+            HighlightParams.SNIPPETS, "100",
+            HighlightParams.MAX_MULTIVALUED_TO_MATCH, "10",
+            HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, "2"
+        ),
+        "//lst[@name='highlighting']/lst[@name='1000']/arr[@name='lower' and count(*)=2]"
+    );
+
+  }
 }

Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java?rev=1463543&r1=1463542&r2=1463543&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java Tue Apr  2 13:44:58 2013
@@ -44,6 +44,8 @@ public interface HighlightParams {
   public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch";
   public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
   public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength";
+  public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine";
+  public static final String MAX_MULTIVALUED_TO_MATCH = HIGHLIGHT + ".maxMultiValuedToMatch";
   
   public static final String USE_PHRASE_HIGHLIGHTER = HIGHLIGHT+".usePhraseHighlighter";
   public static final String HIGHLIGHT_MULTI_TERM = HIGHLIGHT+".highlightMultiTerm";