You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2015/04/13 16:08:07 UTC

svn commit: r1673200 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java core/src/test/org/apache/solr/highlight/HighlighterTest.java

Author: dsmiley
Date: Mon Apr 13 14:08:07 2015
New Revision: 1673200

URL: http://svn.apache.org/r1673200
Log:
SOLR-6692: hl.maxAnalyzedChars should apply cumulatively on a multi-valued field

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1673200&r1=1673199&r2=1673200&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Mon Apr 13 14:08:07 2015
@@ -128,7 +128,11 @@ Other Changes
 
 * SOLR-7384: Fix spurious failures in FullSolrCloudDistribCmdsTest. (shalin)
 
-* SOLR-6692: The default highlighter is much more extensible. (David Smiley)
+* SOLR-6692: Default highlighter changes:
+  - hl.maxAnalyzedChars now applies cumulatively on a multi-valied field.
+  - fragment ranking on a multi-valued field should be more relevant.
+  - Much more extensible.
+  (David Smiley)
 
 ==================  5.1.0 ==================
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java?rev=1673200&r1=1673199&r2=1673200&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java Mon Apr 13 14:08:07 2015
@@ -71,44 +71,44 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * 
+ *
  * @since solr 1.3
  */
 public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized
 {
 
   public static Logger log = LoggerFactory.getLogger(DefaultSolrHighlighter.class);
-  
+
   protected final SolrCore solrCore;
 
-  /** Will be invoked via reflection */
+  //Will be invoked via reflection
   public DefaultSolrHighlighter(SolrCore solrCore) {
     this.solrCore = solrCore;
   }
 
   // Thread safe registry
   protected final Map<String,SolrFormatter> formatters =
-    new HashMap<>();
+      new HashMap<>();
 
   // Thread safe registry
   protected final Map<String,SolrEncoder> encoders =
-    new HashMap<>();
+      new HashMap<>();
 
   // Thread safe registry
   protected final Map<String,SolrFragmenter> fragmenters =
-    new HashMap<>() ;
+      new HashMap<>() ;
 
   // Thread safe registry
   protected final Map<String, SolrFragListBuilder> fragListBuilders =
-    new HashMap<>() ;
+      new HashMap<>() ;
 
   // Thread safe registry
   protected final Map<String, SolrFragmentsBuilder> fragmentsBuilders =
-    new HashMap<>() ;
+      new HashMap<>() ;
 
   // Thread safe registry
   protected final Map<String, SolrBoundaryScanner> boundaryScanners =
-    new HashMap<>() ;
+      new HashMap<>() ;
 
   @Override
   public void init(PluginInfo info) {
@@ -143,7 +143,7 @@ public class DefaultSolrHighlighter exte
     if( fragListBuilder == null ) fragListBuilder = new SimpleFragListBuilder();
     fragListBuilders.put( "", fragListBuilder );
     fragListBuilders.put( null, fragListBuilder );
-    
+
     // Load the FragmentsBuilders
     SolrFragmentsBuilder fragsBuilder = solrCore.initPlugins(info.getChildren("fragmentsBuilder"),
         fragmentsBuilders, SolrFragmentsBuilder.class, null);
@@ -175,12 +175,12 @@ public class DefaultSolrHighlighter exte
         getFormatter(fieldName, params),
         getEncoder(fieldName, params),
         getSpanQueryScorer(query, fieldName, tokenStream, request));
-    
+
     highlighter.setTextFragmenter(getFragmenter(fieldName, params));
 
     return highlighter;
   }
-  
+
   /**
    * Return a {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
    * @param query The current Query
@@ -188,15 +188,15 @@ public class DefaultSolrHighlighter exte
    * @param request The current SolrQueryRequest
    */
   protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) {
-    SolrParams params = request.getParams(); 
+    SolrParams params = request.getParams();
     Highlighter highlighter = new Highlighter(
-           getFormatter(fieldName, params), 
-           getEncoder(fieldName, params),
-           getQueryScorer(query, fieldName, request));
-     highlighter.setTextFragmenter(getFragmenter(fieldName, params));
-       return highlighter;
+        getFormatter(fieldName, params),
+        getEncoder(fieldName, params),
+        getQueryScorer(query, fieldName, request));
+    highlighter.setTextFragmenter(getFragmenter(fieldName, params));
+    return highlighter;
   }
-  
+
   /**
    * Return a {@link org.apache.lucene.search.highlight.QueryScorer} suitable for this Query and field.
    * @param query The current query
@@ -221,15 +221,14 @@ public class DefaultSolrHighlighter exte
    * @param request The SolrQueryRequest
    */
   protected Scorer getQueryScorer(Query query, String fieldName, SolrQueryRequest request) {
-     boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
-     if (reqFieldMatch) {
-        return new QueryTermScorer(query, request.getSearcher().getIndexReader(), fieldName);
-     }
-     else {
-        return new QueryTermScorer(query);
-     }
+    boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
+    if (reqFieldMatch) {
+      return new QueryTermScorer(query, request.getSearcher().getIndexReader(), fieldName);
+    } else {
+      return new QueryTermScorer(query);
+    }
   }
-  
+
   /**
    * Return the max number of snippets for this field. If this has not
    * been configured for this field, fall back to the configured default
@@ -238,7 +237,7 @@ public class DefaultSolrHighlighter exte
    * @param params The params controlling Highlighting
    */
   protected int getMaxSnippets(String fieldName, SolrParams params) {
-     return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1);
+    return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1);
   }
 
   /**
@@ -249,17 +248,17 @@ public class DefaultSolrHighlighter exte
   protected boolean isMergeContiguousFragments(String fieldName, SolrParams params){
     return params.getFieldBool(fieldName, HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, false);
   }
-  
+
   /**
    * Return a {@link org.apache.lucene.search.highlight.Formatter} appropriate for this field. If a formatter
    * has not been configured for this field, fall back to the configured
    * default or the solr default ({@link org.apache.lucene.search.highlight.SimpleHTMLFormatter}).
-   * 
+   *
    * @param fieldName The name of the field
    * @param params The params controlling Highlighting
    * @return An appropriate {@link org.apache.lucene.search.highlight.Formatter}.
    */
-  protected Formatter getFormatter(String fieldName, SolrParams params ) 
+  protected Formatter getFormatter(String fieldName, SolrParams params )
   {
     String str = params.getFieldParam( fieldName, HighlightParams.FORMATTER );
     SolrFormatter formatter = formatters.get(str);
@@ -273,7 +272,7 @@ public class DefaultSolrHighlighter exte
    * Return an {@link org.apache.lucene.search.highlight.Encoder} appropriate for this field. If an encoder
    * has not been configured for this field, fall back to the configured
    * default or the solr default ({@link org.apache.lucene.search.highlight.DefaultEncoder}).
-   * 
+   *
    * @param fieldName The name of the field
    * @param params The params controlling Highlighting
    * @return An appropriate {@link org.apache.lucene.search.highlight.Encoder}.
@@ -286,17 +285,17 @@ public class DefaultSolrHighlighter exte
     }
     return encoder.getEncoder(fieldName, params);
   }
-  
+
   /**
    * Return a {@link org.apache.lucene.search.highlight.Fragmenter} appropriate for this field. If a fragmenter
    * has not been configured for this field, fall back to the configured
    * default or the solr default ({@link GapFragmenter}).
-   * 
+   *
    * @param fieldName The name of the field
    * @param params The params controlling Highlighting
    * @return An appropriate {@link org.apache.lucene.search.highlight.Fragmenter}.
    */
-  protected Fragmenter getFragmenter(String fieldName, SolrParams params) 
+  protected Fragmenter getFragmenter(String fieldName, SolrParams params)
   {
     String fmt = params.getFieldParam( fieldName, HighlightParams.FRAGMENTER );
     SolrFragmenter frag = fragmenters.get( fmt );
@@ -305,7 +304,7 @@ public class DefaultSolrHighlighter exte
     }
     return frag.getFragmenter(fieldName, params);
   }
-  
+
   protected FragListBuilder getFragListBuilder( String fieldName, SolrParams params ){
     String flb = params.getFieldParam( fieldName, HighlightParams.FRAG_LIST_BUILDER );
     SolrFragListBuilder solrFlb = fragListBuilders.get(flb);
@@ -314,7 +313,7 @@ public class DefaultSolrHighlighter exte
     }
     return solrFlb.getFragListBuilder(params);
   }
-  
+
   protected FragmentsBuilder getFragmentsBuilder( String fieldName, SolrParams params ){
     BoundaryScanner bs = getBoundaryScanner(fieldName, params);
     return getSolrFragmentsBuilder( fieldName, params ).getFragmentsBuilder(params, bs);
@@ -322,7 +321,7 @@ public class DefaultSolrHighlighter exte
 
   protected SolrFragmentsBuilder getSolrFragmentsBuilder( String fieldName, SolrParams params ){
     String fb = params.getFieldParam( fieldName, HighlightParams.FRAGMENTS_BUILDER );
-    SolrFragmentsBuilder solrFb = fragmentsBuilders.get(fb);
+    SolrFragmentsBuilder solrFb = fragmentsBuilders.get( fb );
     if( solrFb == null ){
       throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmentsBuilder: " + fb );
     }
@@ -337,7 +336,7 @@ public class DefaultSolrHighlighter exte
     }
     return solrBs.getBoundaryScanner(fieldName, params);
   }
-  
+
   /**
    * Generates a list of Highlighted query fragments for each item in a list
    * of documents, or returns null if highlighting is disabled.
@@ -353,23 +352,23 @@ public class DefaultSolrHighlighter exte
   @Override
   @SuppressWarnings("unchecked")
   public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
-    SolrParams params = req.getParams(); 
+    SolrParams params = req.getParams();
     if (!isHighlightingEnabled(params))
-        return null;
-     
+      return null;
+
     SolrIndexSearcher searcher = req.getSearcher();
     IndexSchema schema = searcher.getSchema();
     NamedList fragments = new SimpleOrderedMap();
     String[] fieldNames = getHighlightFields(query, req, defaultFields);
     Set<String> fset = new HashSet<>();
-     
+
     {
       // pre-fetch documents using the Searcher's doc cache
       for(String f : fieldNames) { fset.add(f); }
       // fetch unique key if one exists.
       SchemaField keyField = schema.getUniqueKeyField();
       if(null != keyField)
-        fset.add(keyField.getName());  
+        fset.add(keyField.getName());
     }
 
     // get FastVectorHighlighter instance out of the processing loop
@@ -399,7 +398,7 @@ public class DefaultSolrHighlighter exte
     }
     return fragments;
   }
-  
+
   /*
    * If fieldName is undefined, this method returns false, then
    * doHighlightingByHighlighter() will do nothing for the field.
@@ -438,7 +437,7 @@ public class DefaultSolrHighlighter exte
 
   @SuppressWarnings("unchecked")
   protected void doHighlightingByHighlighter( Query query, SolrQueryRequest req, NamedList docSummaries,
-      int docId, StoredDocument doc, String fieldName ) throws IOException {
+                                              int docId, StoredDocument doc, String fieldName ) throws IOException {
     final SolrIndexSearcher searcher = req.getSearcher();
     final IndexSchema schema = searcher.getSchema();
     final SolrParams params = req.getParams();
@@ -538,6 +537,7 @@ public class DefaultSolrHighlighter exte
       }
 
       highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
+      maxCharsToAnalyze -= thisText.length();
 
       // Highlight!
       try {
@@ -594,6 +594,7 @@ public class DefaultSolrHighlighter exte
       String value = thisField.stringValue();
       result.add(value);
 
+      maxCharsToAnalyze -= value.length();//we exit early if we'll never get to analyze the value
       maxValues--;
       if (maxValues <= 0 || maxCharsToAnalyze <= 0) {
         break;
@@ -653,7 +654,7 @@ public class DefaultSolrHighlighter exte
       }
     }
   }
-  
+
   protected TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException {
     return new TokenOrderingFilter(schema.getIndexAnalyzer().tokenStream(fieldName, docText), 10);
   }
@@ -669,7 +670,7 @@ final class TokenOrderingFilter extends
   private final LinkedList<OrderedToken> queue = new LinkedList<>(); //TODO replace with Deque, Array impl
   private boolean done=false;
   private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-  
+
   protected TokenOrderingFilter(TokenStream input, int windowSize) {
     super(input);
     this.windowSize = windowSize;

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java?rev=1673200&r1=1673199&r2=1673200&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java Mon Apr 13 14:08:07 2015
@@ -86,8 +86,8 @@ public class HighlighterTest extends Sol
     SolrFragmenter regex = highlighter.fragmenters.get( "regex" );
     SolrFragmenter frag = highlighter.fragmenters.get( null );
     assertSame( gap, frag );
-    assertTrue( gap instanceof GapFragmenter );
-    assertTrue( regex instanceof RegexFragmenter );
+    assertTrue(gap instanceof GapFragmenter);
+    assertTrue(regex instanceof RegexFragmenter);
   }
 
   @Test
@@ -128,13 +128,13 @@ public class HighlighterTest extends Sol
     sumLRF = h.getRequestFactory(
       "standard", 0, 200, args);
     assertQ("Merge Contiguous",
-            sumLRF.makeRequest("t_text:long"),
-            "//lst[@name='highlighting']/lst[@name='1']",
-            "//lst[@name='1']/arr[@name='t_text']/str[.='this is some <em>long</em> text.  It has']",
-            "//lst[@name='1']/arr[@name='t_text']/str[.=' the word <em>long</em> in many places.  In fact, it has']",
-            "//lst[@name='1']/arr[@name='t_text']/str[.=' <em>long</em> on some different fragments.  Let us']",
-            "//lst[@name='1']/arr[@name='t_text']/str[.=' see what happens to <em>long</em> in this case.']"
-            );
+        sumLRF.makeRequest("t_text:long"),
+        "//lst[@name='highlighting']/lst[@name='1']",
+        "//lst[@name='1']/arr[@name='t_text']/str[.='this is some <em>long</em> text.  It has']",
+        "//lst[@name='1']/arr[@name='t_text']/str[.=' the word <em>long</em> in many places.  In fact, it has']",
+        "//lst[@name='1']/arr[@name='t_text']/str[.=' <em>long</em> on some different fragments.  Let us']",
+        "//lst[@name='1']/arr[@name='t_text']/str[.=' see what happens to <em>long</em> in this case.']"
+    );
   }
 
   @Test
@@ -269,10 +269,10 @@ public class HighlighterTest extends Sol
     assertU(commit());
     assertU(optimize());
     assertQ("Basic summarization",
-            sumLRF.makeRequest("long"),
-            "//lst[@name='highlighting']/lst[@name='1']",
-            "//lst[@name='1']/arr[@name='tv_text']/str"
-            );
+        sumLRF.makeRequest("long"),
+        "//lst[@name='highlighting']/lst[@name='1']",
+        "//lst[@name='1']/arr[@name='tv_text']/str"
+    );
     
     // try the same thing without a q param
     assertQ("Should not explode...", // q.alt should return everything
@@ -292,9 +292,9 @@ public class HighlighterTest extends Sol
     TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
       "standard", 0, 200, args);
     
-    assertU(adoc("textgap", "first entry hasnt queryword", 
-                 "textgap", "second entry has queryword long",
-                 "id", "1"));
+    assertU(adoc("textgap", "first entry hasnt queryword",
+        "textgap", "second entry has queryword long",
+        "id", "1"));
     assertU(commit());
     assertU(optimize());
     assertQ("Basic summarization",
@@ -357,10 +357,10 @@ public class HighlighterTest extends Sol
     assertU(commit());
     assertU(optimize());
     assertQ("Basic summarization",
-            sumLRF.makeRequest("long"),
-            "//lst[@name='highlighting']/lst[@name='1']",
-            "//lst[@name='1']/arr[@name='t_text']/str"
-            );
+        sumLRF.makeRequest("long"),
+        "//lst[@name='highlighting']/lst[@name='1']",
+        "//lst[@name='1']/arr[@name='t_text']/str"
+    );
 
   }
 
@@ -432,15 +432,15 @@ public class HighlighterTest extends Sol
      sumLRF = h.getRequestFactory(
            "standard", 0, 200, args);
      assertQ("Test RequireFieldMatch",
-           sumLRF.makeRequest("t_text1:random OR t_text2:words"),
-           "//lst[@name='highlighting']/lst[@name='1']",
-           "//lst[@name='1']/arr[@name='t_text1']/str[.='<em>random</em> words for highlighting tests']",
-           "//lst[@name='1']/arr[@name='t_text2']/str[.='more random <em>words</em> for second field']"
-           );
+         sumLRF.makeRequest("t_text1:random OR t_text2:words"),
+         "//lst[@name='highlighting']/lst[@name='1']",
+         "//lst[@name='1']/arr[@name='t_text1']/str[.='<em>random</em> words for highlighting tests']",
+         "//lst[@name='1']/arr[@name='t_text2']/str[.='more random <em>words</em> for second field']"
+     );
 
      // test case for un-optimized index
      assertU(adoc("t_text1", "random words for highlighting tests", "id", "2",
-             "t_text2", "more random words for second field"));
+         "t_text2", "more random words for second field"));
      assertU(delI("1"));
      assertU(commit());
      sumLRF = h.getRequestFactory(
@@ -461,7 +461,7 @@ public class HighlighterTest extends Sol
     args.put("hl", "true");
     args.put("hl.fl", "t_text");
     args.put("hl.simple.pre","<B>");
-    args.put("hl.simple.post","</B>");
+    args.put("hl.simple.post", "</B>");
     TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
       "standard", 0, 200, args);
     
@@ -469,14 +469,14 @@ public class HighlighterTest extends Sol
     assertU(commit());
     assertU(optimize());
     assertQ("Basic summarization",
-            sumLRF.makeRequest("t_text:long"),
-            "//lst[@name='highlighting']/lst[@name='1']",
-            "//lst[@name='1']/arr[@name='t_text']/str[.='a <B>long</B> days night']"
-            );
+        sumLRF.makeRequest("t_text:long"),
+        "//lst[@name='highlighting']/lst[@name='1']",
+        "//lst[@name='1']/arr[@name='t_text']/str[.='a <B>long</B> days night']"
+    );
     
     // test a per-field override
-    args.put("f.t_text.hl.simple.pre","<I>");
-    args.put("f.t_text.hl.simple.post","</I>");
+    args.put("f.t_text.hl.simple.pre", "<I>");
+    args.put("f.t_text.hl.simple.post", "</I>");
     sumLRF = h.getRequestFactory(
           "standard", 0, 200, args);
     assertQ("Basic summarization",
@@ -515,35 +515,59 @@ public class HighlighterTest extends Sol
     args.put("fl", "id score");
     args.put("hl", "true");
     args.put("hl.snippets", "10");
-    args.put("hl.fl", "t_text");
+    final String field = random().nextBoolean() ? "t_text" : "tv_text";
+    args.put("hl.fl", field);
     TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
       "standard", 0, 200, args);
     
 
-    assertU(adoc("t_text", LONG_TEXT, "id", "1"));
+    assertU(adoc(field, LONG_TEXT, "id", "1"));
     assertU(commit());
-    assertU(optimize());
+
     assertQ("token at start of text",
-            sumLRF.makeRequest("t_text:disjoint"),
+            sumLRF.makeRequest(field + ":disjoint"),
             "//lst[@name='highlighting']/lst[@name='1']",
             "//lst[@name='1']/arr[count(str)=1]"
             );
     args.put("hl.maxAnalyzedChars", "20");
     sumLRF = h.getRequestFactory("standard", 0, 200, args);
     assertQ("token at end of text",
-            sumLRF.makeRequest("t_text:disjoint"),
-            "//lst[@name='highlighting']/lst[@name='1']",
-            "//lst[@name='1'][not(*)]"
-            );
+        sumLRF.makeRequest(field + ":disjoint"),
+        "//lst[@name='highlighting']/lst[@name='1']",
+        "//lst[@name='1'][not(*)]"
+    );
     args.put("hl.maxAnalyzedChars", "-1");
     sumLRF = h.getRequestFactory("standard", 0, 200, args);
     assertQ("token at start of text",
-        sumLRF.makeRequest("t_text:disjoint"),
+        sumLRF.makeRequest(field + ":disjoint"),
         "//lst[@name='highlighting']/lst[@name='1']",
         "//lst[@name='1']/arr[count(str)=1]"
     );
+
   }
-  
+
+  // Test multi-valued together with hl.maxAnalyzedChars
+  @Test
+  public void testMultiValuedMaxAnalyzedChars() throws Exception {
+    String shortText = "some short blah blah blah blah";
+    final String field = random().nextBoolean() ? "tv_mv_text" : "textgap"; // term vecs or not
+    assertU(adoc(field, shortText,
+        field, LONG_TEXT,
+        "id", "1"));
+    assertU(commit());
+
+    assertQ(req("q", field + ":(short OR long)",
+            "indent", "on",
+            "hl", "true",
+            "hl.fl", field,
+            "hl.snippets", "2",
+            "hl.maxAnalyzedChars", "8"),
+        "//lst[@name='highlighting']/lst[@name='1']/arr[count(*)=1]",
+        "//lst[@name='1']/arr/str[1][.='some <em>short</em>']"
+        //"//lst[@name='1']/arr/str[2][.='a <em>long</em> days']"
+    );
+  }
+
   @Test
   public void testRegexFragmenter() {
     HashMap<String,String> args = new HashMap<>();