You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2006/07/13 20:47:49 UTC

svn commit: r421682 - in /incubator/solr/trunk/src: java/org/apache/solr/util/CommonParams.java java/org/apache/solr/util/DisMaxParams.java test/org/apache/solr/HighlighterTest.java

Author: yonik
Date: Thu Jul 13 11:47:49 2006
New Revision: 421682

URL: http://svn.apache.org/viewvc?rev=421682&view=rev
Log:
highlighting: SOLR-24

Added:
    incubator/solr/trunk/src/java/org/apache/solr/util/CommonParams.java   (with props)
    incubator/solr/trunk/src/java/org/apache/solr/util/DisMaxParams.java   (with props)
    incubator/solr/trunk/src/test/org/apache/solr/HighlighterTest.java   (with props)

Added: incubator/solr/trunk/src/java/org/apache/solr/util/CommonParams.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/java/org/apache/solr/util/CommonParams.java?rev=421682&view=auto
==============================================================================
--- incubator/solr/trunk/src/java/org/apache/solr/util/CommonParams.java (added)
+++ incubator/solr/trunk/src/java/org/apache/solr/util/CommonParams.java Thu Jul 13 11:47:49 2006
@@ -0,0 +1,191 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrInfoMBean;
+import org.apache.solr.core.SolrException;
+
+import org.apache.solr.util.StrUtils;
+import org.apache.solr.util.NamedList;
+
+import java.util.logging.Logger;
+import java.util.logging.Level;
+import java.util.logging.Handler;
+
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Collection;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.regex.Pattern;
+import java.io.IOException;
+
+    
+
+  /**
+   * A collection on common params, both for Plugin initialization and
+   * for Requests.
+   */
+  public class CommonParams {
+
+    /** query and init param for field list */
+    public static String FL = "fl";
+    /** default query field */
+    public static String DF = "df";
+    /** whether to include debug data */
+    public static String DEBUG_QUERY = "debugQuery";
+    /** another query to explain against */
+    public static String EXPLAIN_OTHER = "explainOther";
+    /** wether to highlight */
+    public static String HIGHLIGHT = "highlight";
+    /** fields to highlight */
+    public static String HIGHLIGHT_FIELDS = "highlightFields";
+    /** maximum highlight fragments to return */
+    public static String MAX_SNIPPETS = "maxSnippets";
+    /** override default highlight Formatter class */
+    public static String HIGHLIGHT_FORMATTER_CLASS = "highlightFormatterClass";
+
+
+    /** the default field list to be used */
+    public String fl = null;
+    /** the default field to query */
+    public String df = null;
+    /** do not debug by default **/
+    public String debugQuery = null;
+    /** no default other explanation query **/
+    public String explainOther = null;
+    /** whether to highlight */
+    public static boolean highlight = false;
+    /** fields to highlight */
+    public static String highlightFields = null;
+    /** maximum highlight fragments to return */
+    public static int maxSnippets = 1;
+    /** override default highlight Formatter class */
+    public static String highlightFormatterClass = null;
+
+
+    public CommonParams() {
+      /* :NOOP: */
+    }
+
+    /** @see #setValues */
+    public CommonParams(NamedList args) {
+      this();
+      setValues(args);
+    }
+
+    /**
+     * Sets the params using values from a NamedList, usefull in the
+     * init method for your handler.
+     *
+     * <p>
+     * If any param is not of the expected type, a severe error is
+     * logged,and the param is skipped.
+     * </p>
+     *
+     * <p>
+     * If any param is not of in the NamedList, it is skipped and the
+     * old value is left alone.
+     * </p>
+     *
+     */
+    public void setValues(NamedList args) {
+
+      Object tmp;
+        
+      tmp = args.get(FL);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          fl = tmp.toString();
+        } else {
+          SolrCore.log.severe("init param is not a str: " + FL);
+        }
+      }
+
+      tmp = args.get(DF);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          df = tmp.toString();
+        } else {
+          SolrCore.log.severe("init param is not a str: " + DF);
+        }
+      }
+
+      tmp = args.get(DEBUG_QUERY);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          debugQuery = tmp.toString();
+        } else {
+          SolrCore.log.severe("init param is not a str: " + DEBUG_QUERY);
+        }
+      }
+
+      tmp = args.get(EXPLAIN_OTHER);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          explainOther = tmp.toString();
+        } else {
+          SolrCore.log.severe("init param is not a str: " + EXPLAIN_OTHER);
+        }
+      }
+
+      tmp = args.get(HIGHLIGHT);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          // Any non-empty string other than 'false' implies highlighting
+          String val = tmp.toString().trim();
+          highlight = !(val.equals("") || val.equals("false"));
+        } else {
+          SolrCore.log.severe("init param is not a str: " + HIGHLIGHT);
+        }
+      }
+
+      tmp = args.get(HIGHLIGHT_FIELDS);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          highlightFields = tmp.toString();
+        } else {
+          SolrCore.log.severe("init param is not a str: " + HIGHLIGHT);
+        }
+      }
+
+      tmp = args.get(MAX_SNIPPETS);
+      if (null != tmp) {
+        if (tmp instanceof Integer) {
+          maxSnippets = ((Integer)tmp).intValue();
+        } else {
+          SolrCore.log.severe("init param is not an int: " + MAX_SNIPPETS);
+        }
+      }
+
+      tmp = args.get(HIGHLIGHT_FORMATTER_CLASS);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          highlightFormatterClass = tmp.toString();
+        } else {
+          SolrCore.log.severe("init param is not a str: " + HIGHLIGHT_FORMATTER_CLASS);
+        }
+      }
+        
+    }
+
+  }
+

Propchange: incubator/solr/trunk/src/java/org/apache/solr/util/CommonParams.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/solr/trunk/src/java/org/apache/solr/util/CommonParams.java
------------------------------------------------------------------------------
    svn:executable = *

Added: incubator/solr/trunk/src/java/org/apache/solr/util/DisMaxParams.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/java/org/apache/solr/util/DisMaxParams.java?rev=421682&view=auto
==============================================================================
--- incubator/solr/trunk/src/java/org/apache/solr/util/DisMaxParams.java (added)
+++ incubator/solr/trunk/src/java/org/apache/solr/util/DisMaxParams.java Thu Jul 13 11:47:49 2006
@@ -0,0 +1,182 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrInfoMBean;
+import org.apache.solr.core.SolrException;
+
+import org.apache.solr.util.StrUtils;
+import org.apache.solr.util.NamedList;
+
+import java.util.logging.Logger;
+import java.util.logging.Level;
+import java.util.logging.Handler;
+
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Collection;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.regex.Pattern;
+import java.io.IOException;
+
+    
+
+  /**
+   * A collection of params used in DisMaxRequestHandler,
+   both for Plugin initialization and * for Requests.
+   */
+  public  class DisMaxParams extends CommonParams {
+
+    /** query and init param for tiebreaker value */
+    public static String TIE = "tie";
+    /** query and init param for query fields */
+    public static String QF = "qf";
+    /** query and init param for phrase boost fields */
+    public static String PF = "pf";
+    /** query and init param for MinShouldMatch specification */
+    public static String MM = "mm";
+    /** query and init param for Phrase Slop value */
+    public static String PS = "ps";
+    /** query and init param for boosting query */
+    public static String BQ = "bq";
+    /** query and init param for boosting functions */
+    public static String BF = "bf";
+    /** query and init param for filtering query */
+    public static String FQ = "fq";
+    /** query and init param for field list */
+    public static String GEN = "gen";
+        
+    /** the default tie breaker to use in DisjunctionMaxQueries */
+    public float tiebreaker = 0.0f;
+    /** the default query fields to be used */
+    public String qf = null;
+    /** the default phrase boosting fields to be used */
+    public String pf = null;
+    /** the default min should match to be used */
+    public String mm = "100%";
+    /** the default phrase slop to be used */
+    public int pslop = 0;
+    /** the default boosting query to be used */
+    public String bq = null;
+    /** the default boosting functions to be used */
+    public String bf = null;
+    /** the default filtering query to be used */
+    public String fq = null;
+
+
+    /**
+     * Sets the params using values from a NamedList, usefull in the
+     * init method for your handler.
+     *
+     * <p>
+     * If any param is not of the expected type, a severe error is
+     * logged,and the param is skipped.
+     * </p>
+     *
+     * <p>
+     * If any param is not of in the NamedList, it is skipped and the
+     * old value is left alone.
+     * </p>
+     *
+     */
+    public void setValues(NamedList args) {
+
+      super.setValues(args);
+
+      Object tmp;
+
+      tmp = args.get(TIE);
+      if (null != tmp) {
+        if (tmp instanceof Float) {
+          tiebreaker = ((Float)tmp).floatValue();
+        } else {
+          SolrCore.log.severe("init param is not a float: " + TIE);
+        }
+      }
+
+      tmp = args.get(QF);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          qf = tmp.toString();
+        } else {
+          SolrCore.log.severe("init param is not a str: " + QF);
+        }
+      }
+
+      tmp = args.get(PF);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          pf = tmp.toString();
+        } else {
+          SolrCore.log.severe("init param is not a str: " + PF);
+        }
+      }
+
+        
+      tmp = args.get(MM);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          mm = tmp.toString();
+        } else {
+          SolrCore.log.severe("init param is not a str: " + MM);
+        }
+      }
+        
+      tmp = args.get(PS);
+      if (null != tmp) {
+        if (tmp instanceof Integer) {
+          pslop = ((Integer)tmp).intValue();
+        } else {
+          SolrCore.log.severe("init param is not an int: " + PS);
+        }
+      }
+
+      tmp = args.get(BQ);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          bq = tmp.toString();
+        } else {
+          SolrCore.log.severe("init param is not a str: " + BQ);
+        }
+      }
+ 
+      tmp = args.get(BF);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          bf = tmp.toString();
+        } else {
+          SolrCore.log.severe("init param is not a str: " + BF);
+        }
+      }
+ 
+      tmp = args.get(FQ);
+      if (null != tmp) {
+        if (tmp instanceof String) {
+          fq = tmp.toString();
+        } else {
+          SolrCore.log.severe("init param is not a str: " + FQ);
+        }
+      }
+                
+    }
+
+  }

Propchange: incubator/solr/trunk/src/java/org/apache/solr/util/DisMaxParams.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/solr/trunk/src/java/org/apache/solr/util/DisMaxParams.java
------------------------------------------------------------------------------
    svn:executable = *

Added: incubator/solr/trunk/src/test/org/apache/solr/HighlighterTest.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/test/org/apache/solr/HighlighterTest.java?rev=421682&view=auto
==============================================================================
--- incubator/solr/trunk/src/test/org/apache/solr/HighlighterTest.java (added)
+++ incubator/solr/trunk/src/test/org/apache/solr/HighlighterTest.java Thu Jul 13 11:47:49 2006
@@ -0,0 +1,221 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr;
+
+import org.apache.solr.request.*;
+import org.apache.solr.util.*;
+import org.apache.solr.schema.*;
+
+import java.util.HashMap;
+
+/**
+ * Tests some basic functionality of Solr while demonstrating good
+ * Best Practices for using AbstractSolrTestCase
+ */
+public class HighlighterTest extends AbstractSolrTestCase {
+
+  public String getSchemaFile() { return "schema.xml"; }
+  public String getSolrConfigFile() { return "solrconfig.xml"; }
+
+  public void setUp() throws Exception {
+    // if you override setUp or tearDown, you better call
+    // the super classes version
+    super.setUp();
+  }
+  public void tearDown() throws Exception {
+    // if you override setUp or tearDown, you better call
+    // the super classes version
+    super.tearDown();
+
+  }
+
+  public void testTermVecHighlight() {
+
+    // do summarization using term vectors
+    HashMap args = new HashMap();
+    args.put("highlight", "true");
+    args.put("highlightFields", "tv_text");
+    args.put("maxSnippets", "2");
+    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+      "standard",0,200,args);
+    
+    assertU(adoc("tv_text", "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated at all--we want two disjoint long fragments.", 
+                 "id", "1"));
+    assertU(commit());
+    assertU(optimize());
+    assertQ("Basic summarization",
+            sumLRF.makeRequest("tv_text:long"),
+            "//lst[@name='highlighting']/lst[@name='1']",
+            "//lst[@name='1']/arr[@name='tv_text']/str[.='a <em>long</em> days night this should be a piece of text which']",
+            "//arr[@name='tv_text']/str[.=' <em>long</em> fragments']"
+            );
+  }
+
+  public void testDisMaxHighlight() {
+
+    // same test run through dismax handler
+    HashMap args = new HashMap();
+    args.put("highlight", "true");
+    args.put("highlightFields", "tv_text");
+    args.put("qf", "tv_text");
+    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+      "dismax",0,200,args);
+    
+    assertU(adoc("tv_text", "a long day's night", "id", "1"));
+    assertU(commit());
+    assertU(optimize());
+    assertQ("Basic summarization",
+            sumLRF.makeRequest("long"),
+            "//lst[@name='highlighting']/lst[@name='1']",
+            "//lst[@name='1']/arr[@name='tv_text']/str"
+            );
+
+  }
+
+
+  public void testMultiValueAnalysisHighlight() {
+
+    // do summarization using re-analysis of the field
+    HashMap args = new HashMap();
+    args.put("highlight", "true");
+    args.put("highlightFields", "textgap");
+    args.put("df", "textgap");
+    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+      "standard", 0, 200, args);
+    
+    assertU(adoc("textgap", "first entry hasnt queryword", 
+                 "textgap", "second entry has queryword long",
+                 "id", "1"));
+    assertU(commit());
+    assertU(optimize());
+    assertQ("Basic summarization",
+            sumLRF.makeRequest("long"),
+            "//lst[@name='highlighting']/lst[@name='1']",
+            "//lst[@name='1']/arr[@name='textgap']/str"
+            );
+
+  }
+
+
+  public void testDefaultFieldHighlight() {
+
+    // do summarization using re-analysis of the field
+    HashMap args = new HashMap();
+    args.put("highlight", "true");
+    args.put("df", "t_text");
+    args.put("highlightFields", "");
+    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+      "standard", 0, 200, args);
+    
+    assertU(adoc("t_text", "a long day's night", "id", "1"));
+    assertU(commit());
+    assertU(optimize());
+    assertQ("Basic summarization",
+            sumLRF.makeRequest("long"),
+            "//lst[@name='highlighting']/lst[@name='1']",
+            "//lst[@name='1']/arr[@name='t_text']/str"
+            );
+
+  }
+
+
+
+  public void testHighlightDisabled() {
+
+    // ensure highlighting can be explicitly disabled
+    HashMap args = new HashMap();
+    args.put("highlight", "false");
+    args.put("highlightFields", "t_text");
+    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+      "standard", 0, 200, args);
+    
+    assertU(adoc("t_text", "a long day's night", "id", "1"));
+    assertU(commit());
+    assertU(optimize());
+    assertQ("Basic summarization",
+            sumLRF.makeRequest("t_text:long"), "not(//lst[@name='highlighting'])");
+
+  }
+
+
+  public void testTwoFieldHighlight() {
+
+    // do summarization using re-analysis of the field
+    HashMap args = new HashMap();
+    args.put("highlight", "true");
+    args.put("highlightFields", "t_text tv_text");
+    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+      "standard", 0, 200, args);
+    
+    assertU(adoc("t_text", "a long day's night", "id", "1",
+                 "tv_text", "a long night's day"));
+    assertU(commit());
+    assertU(optimize());
+    assertQ("Basic summarization",
+            sumLRF.makeRequest("t_text:long"),
+            "//lst[@name='highlighting']/lst[@name='1']",
+            "//lst[@name='1']/arr[@name='t_text']/str",
+            "//lst[@name='1']/arr[@name='tv_text']/str"
+            );
+
+  }
+
+  public void testCustomFormatterHighlight() {
+
+    // do summarization using a custom formatter
+    HashMap args = new HashMap();
+    args.put("highlight", "true");
+    args.put("highlightFields", "t_text");
+    args.put("highlightFormatterClass", 
+             "org.apache.lucene.search.highlight.SimpleHTMLFormatter");
+    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+      "standard", 0, 200, args);
+    
+    assertU(adoc("t_text", "a long days night", "id", "1"));
+    assertU(commit());
+    assertU(optimize());
+    assertQ("Basic summarization",
+            sumLRF.makeRequest("t_text:long"),
+            "//lst[@name='highlighting']/lst[@name='1']",
+            "//lst[@name='1']/arr[@name='t_text']/str[.='a <B>long</B> days night']"
+            );
+  }
+
+  public void testLongFragment() {
+
+    // do summarization using a custom formatter
+    HashMap args = new HashMap();
+    args.put("highlight", "true");
+    args.put("highlightFields", "tv_text");
+    args.put("highlightFormatterClass", 
+             "org.apache.lucene.search.highlight.SimpleHTMLFormatter");
+    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+      "standard", 0, 200, args);
+    
+
+    String text = 
+      "junit: [mkdir] Created dir: /home/klaas/worio/backend/trunk/build-src/solr-nightly/build/test-results [junit] Running org.apache.solr.BasicFunctionalityTest [junit] Tests run: 7, Failures: 0, Errors: 0, Time elapsed: 5.36 sec [junit] Running org.apache.solr.ConvertedLegacyTest [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 8.268 sec [junit] Running org.apache.solr.DisMaxRequestHandlerTest [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 1.56 sec [junit] Running org.apache.solr.HighlighterTest [junit] Tests run: 7, Failures: 0, Errors: 0, Time elapsed: 4.979 sec [junit] Running org.apache.solr.OutputWriterTest [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 0.797 sec [junit] Running org.apache.solr.SampleTest [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 1.021 sec [junit] Running org.apache.solr.analysis.TestBufferedTokenStream [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 0.05 sec [junit] Running org.apac
 he.solr.analysis.TestRemoveDuplicatesTokenFilter [junit] Tests run: 3, Failures: 0, Errors: 0, Time elapsed: 0.054 sec [junit] Running org.apache.solr.analysis.TestSynonymFilter [junit] Tests run: 6, Failures: 0, Errors: 0, Time elapsed: 0.081 sec [junit] Running org.apache.solr.analysis.TestWordDelimiterFilter [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 1.714 sec [junit] Running org.apache.solr.search.TestDocSet [junit] Tests run: 1, Failures: 0, Errors: 0, Time elapsed: 0.788 sec [junit] Running org.apache.solr.util.SolrPluginUtilsTest [junit] Tests run: 5, Failures: 0, Errors: 0, Time elapsed: 3.519 sec [junit] Running org.apache.solr.util.TestOpenBitSet [junit] Tests run: 2, Failures: 0, Errors: 0, Time elapsed: 0.533 sec";
+    assertU(adoc("tv_text", text, "id", "1"));
+    assertU(commit());
+    assertU(optimize());
+    assertQ("Basic summarization",
+            sumLRF.makeRequest("tv_text:dir"),
+            "//lst[@name='highlighting']/lst[@name='1']",
+            "//lst[@name='1']/arr[@name='tv_text']/str"
+            );
+  }
+}

Propchange: incubator/solr/trunk/src/test/org/apache/solr/HighlighterTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/solr/trunk/src/test/org/apache/solr/HighlighterTest.java
------------------------------------------------------------------------------
    svn:executable = *