You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ry...@apache.org on 2007/07/03 08:14:08 UTC
svn commit: r552682 - in /lucene/solr/trunk: ./ example/solr/conf/
src/java/org/apache/solr/common/params/ src/java/org/apache/solr/highlight/
src/test/org/apache/solr/highlight/ src/test/test-files/solr/conf/
Author: ryan
Date: Mon Jul 2 23:14:07 2007
New Revision: 552682
URL: http://svn.apache.org/viewvc?view=rev&rev=552682
Log:
SOLR-225 -- adding pluggable highlighting formatters and fragmenters
Added:
lucene/solr/trunk/src/java/org/apache/solr/highlight/GapFragmenter.java (with props)
lucene/solr/trunk/src/java/org/apache/solr/highlight/HighlightingPluginBase.java (with props)
lucene/solr/trunk/src/java/org/apache/solr/highlight/HtmlFormatter.java (with props)
lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFormatter.java (with props)
lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFragmenter.java (with props)
lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java (with props)
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/example/solr/conf/solrconfig.xml
lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java
lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml
Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?view=diff&rev=552682&r1=552681&r2=552682
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Mon Jul 2 23:14:07 2007
@@ -87,6 +87,10 @@
RequestHandlers, FieldTypes, and QueryResponseWriters to share the same
base code for loading and initalizing plugins. (ryan)
+14. SOLR-225: Enable pluggable highlighting classes. Allow configurable
+ highlighting formatters and Fragmenters. (ryan)
+
+
Changes in runtime behavior
Optimizations
Modified: lucene/solr/trunk/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/solr/conf/solrconfig.xml?view=diff&rev=552682&r1=552681&r2=552682
==============================================================================
--- lucene/solr/trunk/example/solr/conf/solrconfig.xml (original)
+++ lucene/solr/trunk/example/solr/conf/solrconfig.xml Mon Jul 2 23:14:07 2007
@@ -431,6 +431,31 @@
</lst>
</requestHandler>
+ <!-- This should mostlikely be commented out in the "default" case -->
+ <highlighting>
+ <!-- Configure the standard fragmenter -->
+ <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
+ <lst name="defaults">
+ <int name="hl.fragsize">100</int>
+ </lst>
+ </fragmenter>
+
+ <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
+ <lst name="defaults">
+ <int name="hl.fragsize">70</int>
+ </lst>
+ </fragmenter>
+
+ <!-- Configure the standard formatter -->
+ <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
+ <lst name="defaults">
+ <str name="hl.simple.pre"><![CDATA[<em>]]></str>
+ <str name="hl.simple.post"><![CDATA[</em>]]></str>
+ </lst>
+ </formatter>
+ </highlighting>
+
+
<!-- queryResponseWriter plugins... query responses will be written using the
writer specified by the 'wt' request parameter matching the name of a registered
writer.
Modified: lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java?view=diff&rev=552682&r1=552681&r2=552682
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/common/params/HighlightParams.java Mon Jul 2 23:14:07 2007
@@ -27,14 +27,17 @@
public static final String SIMPLE = "simple";
- public static final String HIGHLIGHT = "hl";
- public static final String PREFIX = "hl.";
- public static final String FIELDS = PREFIX+"fl";
- public static final String SNIPPETS = PREFIX+"snippets";
- public static final String FRAGSIZE = PREFIX+"fragsize";
- public static final String FORMATTER = PREFIX+"formatter";
- public static final String SIMPLE_PRE = PREFIX+SIMPLE+".pre";
+ public static final String HIGHLIGHT = "hl";
+ public static final String PREFIX = "hl.";
+ public static final String FIELDS = PREFIX+"fl";
+ public static final String SNIPPETS = PREFIX+"snippets";
+ public static final String FRAGSIZE = PREFIX+"fragsize";
+ public static final String INCREMENT = PREFIX+"increment";
+ public static final String SLOP = PREFIX+"slop";
+ public static final String MAX_CHARS = PREFIX+"maxAnalyzedChars";
+ public static final String FORMATTER = PREFIX+"formatter";
+ public static final String FRAGMENTER = PREFIX+"fragmenter";
+ public static final String SIMPLE_PRE = PREFIX+SIMPLE+".pre";
public static final String SIMPLE_POST = PREFIX+SIMPLE+".post";
public static final String FIELD_MATCH = PREFIX+"requireFieldMatch";
-
}
Added: lucene/solr/trunk/src/java/org/apache/solr/highlight/GapFragmenter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/GapFragmenter.java?view=auto&rev=552682
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/highlight/GapFragmenter.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/highlight/GapFragmenter.java Mon Jul 2 23:14:07 2007
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.highlight;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.search.highlight.Fragmenter;
+import org.apache.lucene.search.highlight.NullFragmenter;
+import org.apache.lucene.search.highlight.SimpleFragmenter;
+import org.apache.solr.common.params.DefaultSolrParams;
+import org.apache.solr.common.params.HighlightParams;
+import org.apache.solr.common.params.SolrParams;
+
+public class GapFragmenter extends HighlightingPluginBase implements SolrFragmenter
+{
+ public Fragmenter getFragmenter(String fieldName, SolrParams params )
+ {
+ numRequests++;
+ if( defaults != null ) {
+ params = new DefaultSolrParams( params, defaults );
+ }
+
+ int fragsize = params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, 100 );
+ return (fragsize <= 0) ? new NullFragmenter() : new LuceneGapFragmenter(fragsize);
+ }
+
+
+ ///////////////////////////////////////////////////////////////////////
+ //////////////////////// SolrInfoMBeans methods ///////////////////////
+ ///////////////////////////////////////////////////////////////////////
+
+ @Override
+ public String getDescription() {
+ return "GapFragmenter";
+ }
+
+ @Override
+ public String getVersion() {
+ return "$Revision$";
+ }
+
+ @Override
+ public String getSourceId() {
+ return "$Id$";
+ }
+
+ @Override
+ public String getSource() {
+ return "$URL$";
+ }
+}
+
+
+/**
+ * A simple modification of SimpleFragmenter which additionally creates new
+ * fragments when an unusually-large position increment is encountered
+ * (this behaves much better in the presence of multi-valued fields).
+ */
+class LuceneGapFragmenter extends SimpleFragmenter {
+ /**
+ * When a gap in term positions is observed that is at least this big, treat
+ * the gap as a fragment delimiter.
+ */
+ public static final int INCREMENT_THRESHOLD = 50;
+ protected int fragOffsetAccum = 0;
+
+ public LuceneGapFragmenter() {
+ }
+
+ public LuceneGapFragmenter(int fragsize) {
+ super(fragsize);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.search.highlight.TextFragmenter#start(java.lang.String)
+ */
+ public void start(String originalText) {
+ fragOffsetAccum = 0;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token)
+ */
+ public boolean isNewFragment(Token token) {
+ boolean isNewFrag =
+ token.endOffset() >= fragOffsetAccum + getFragmentSize() ||
+ token.getPositionIncrement() > INCREMENT_THRESHOLD;
+ if(isNewFrag) {
+ fragOffsetAccum += token.endOffset() - fragOffsetAccum;
+ }
+ return isNewFrag;
+ }
+}
Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/GapFragmenter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/GapFragmenter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/solr/trunk/src/java/org/apache/solr/highlight/HighlightingPluginBase.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/HighlightingPluginBase.java?view=auto&rev=552682
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/highlight/HighlightingPluginBase.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/highlight/HighlightingPluginBase.java Mon Jul 2 23:14:07 2007
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.highlight;
+
+import java.net.URL;
+
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.core.SolrInfoMBean;
+
+/**
+ *
+ * @author ryan
+ * @since solr 1.3
+ */
+public abstract class HighlightingPluginBase implements SolrInfoMBean
+{
+ protected long numRequests;
+ protected SolrParams defaults;
+
+ public void init(NamedList args) {
+ if( args != null ) {
+ Object o = args.get("defaults");
+ if (o != null && o instanceof NamedList ) {
+ defaults = SolrParams.toSolrParams((NamedList)o);
+ }
+ }
+ }
+
+ //////////////////////// SolrInfoMBeans methods //////////////////////
+
+ public String getName() {
+ return this.getClass().getName();
+ }
+
+ public abstract String getDescription();
+ public abstract String getSourceId();
+ public abstract String getSource();
+ public abstract String getVersion();
+
+ public Category getCategory()
+ {
+ return Category.HIGHLIGHTING;
+ }
+
+ public URL[] getDocs() {
+ return null; // this can be overridden, but not required
+ }
+
+ public NamedList getStatistics() {
+ NamedList<Long> lst = new SimpleOrderedMap<Long>();
+ lst.add("requests", numRequests);
+ return lst;
+ }
+}
+
+
Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/HighlightingPluginBase.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/HighlightingPluginBase.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/solr/trunk/src/java/org/apache/solr/highlight/HtmlFormatter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/HtmlFormatter.java?view=auto&rev=552682
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/highlight/HtmlFormatter.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/highlight/HtmlFormatter.java Mon Jul 2 23:14:07 2007
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.highlight;
+
+import org.apache.lucene.search.highlight.Formatter;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.solr.common.params.DefaultSolrParams;
+import org.apache.solr.common.params.HighlightParams;
+import org.apache.solr.common.params.SolrParams;
+
+/**
+ * Use the SimpleHTMLFormatter
+ */
+public class HtmlFormatter extends HighlightingPluginBase implements SolrFormatter
+{
+ public Formatter getFormatter(String fieldName, SolrParams params )
+ {
+ numRequests++;
+ if( defaults != null ) {
+ params = new DefaultSolrParams( params, defaults );
+ }
+
+ return new SimpleHTMLFormatter(
+ params.getFieldParam(fieldName, HighlightParams.SIMPLE_PRE, "<em>" ),
+ params.getFieldParam(fieldName, HighlightParams.SIMPLE_POST, "</em>"));
+ }
+
+ ///////////////////////////////////////////////////////////////////////
+ //////////////////////// SolrInfoMBeans methods ///////////////////////
+ ///////////////////////////////////////////////////////////////////////
+
+ @Override
+ public String getDescription() {
+ return "GapFragmenter";
+ }
+
+ @Override
+ public String getVersion() {
+ return "$Revision$";
+ }
+
+ @Override
+ public String getSourceId() {
+ return "$Id$";
+ }
+
+ @Override
+ public String getSource() {
+ return "$URL$";
+ }
+}
Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/HtmlFormatter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/HtmlFormatter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFormatter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFormatter.java?view=auto&rev=552682
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFormatter.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFormatter.java Mon Jul 2 23:14:07 2007
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.highlight;
+
+import org.apache.lucene.search.highlight.Formatter;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrInfoMBean;
+import org.apache.solr.util.plugin.NamedListInitializedPlugin;
+
+public interface SolrFormatter extends SolrInfoMBean, NamedListInitializedPlugin {
+
+ /** <code>init</code> will be called just once, immediately after creation.
+ * <p>The args are user-level initialization parameters that
+ * may be specified when declaring a request handler in
+ * solrconfig.xml
+ */
+ public void init(NamedList args);
+
+ /**
+ * Return a formatter appropriate for this field.
+ *
+ * @param fieldName The name of the field
+ * @param request The current SolrQueryRequest
+ * @return An appropriate Formatter.
+ */
+ public Formatter getFormatter(String fieldName, SolrParams params );
+}
+
Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFormatter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFormatter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFragmenter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFragmenter.java?view=auto&rev=552682
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFragmenter.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFragmenter.java Mon Jul 2 23:14:07 2007
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.highlight;
+
+import org.apache.lucene.search.highlight.Fragmenter;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrInfoMBean;
+import org.apache.solr.util.plugin.NamedListInitializedPlugin;
+
+public interface SolrFragmenter extends SolrInfoMBean, NamedListInitializedPlugin {
+
+ /** <code>init</code> will be called just once, immediately after creation.
+ * <p>The args are user-level initialization parameters that
+ * may be specified when declaring a request handler in
+ * solrconfig.xml
+ */
+ public void init(NamedList args);
+
+ /**
+ * Return a fragmenter appropriate for this field.
+ *
+ * @param fieldName The name of the field
+ * @param request The current SolrQueryRequest
+ * @return An appropriate Fragmenter.
+ */
+ public Fragmenter getFragmenter(String fieldName, SolrParams params);
+}
+
Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFragmenter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrFragmenter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java?view=auto&rev=552682
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java Mon Jul 2 23:14:07 2007
@@ -0,0 +1,435 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.highlight;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.ListIterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Logger;
+
+import javax.xml.xpath.XPathConstants;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.highlight.Formatter;
+import org.apache.lucene.search.highlight.Fragmenter;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.TextFragment;
+import org.apache.lucene.search.highlight.TokenSources;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.HighlightParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.core.Config;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.SolrPluginUtils;
+import org.apache.solr.util.plugin.NamedListPluginLoader;
+import org.w3c.dom.NodeList;
+
+public class SolrHighlighter
+{
+ public static Logger log = Logger.getLogger(SolrHighlighter.class.getName());
+
+ // Thread safe registry
+ protected final Map<String,SolrFormatter> formatters =
+ Collections.synchronizedMap( new HashMap<String, SolrFormatter>() );
+
+ // Thread safe registry
+ protected final Map<String,SolrFragmenter> fragmenters =
+ Collections.synchronizedMap( new HashMap<String, SolrFragmenter>() );
+
+ public void initalize( Config config )
+ {
+ formatters.clear();
+ fragmenters.clear();
+
+ // Load the fragmenters
+ String xpath = "highlighting/fragmenter";
+ NamedListPluginLoader<SolrFragmenter> fragloader = new NamedListPluginLoader<SolrFragmenter>( xpath, fragmenters );
+ SolrFragmenter frag = fragloader.load( (NodeList)config.evaluate( xpath, XPathConstants.NODESET ) );
+ if( frag == null ) {
+ frag = new GapFragmenter();
+ }
+ fragmenters.put( "", frag );
+ fragmenters.put( null, frag );
+
+ // Load the formatters
+ xpath = "highlighting/formatter";
+ NamedListPluginLoader<SolrFormatter> fmtloader = new NamedListPluginLoader<SolrFormatter>( xpath, formatters );
+ SolrFormatter fmt = fmtloader.load( (NodeList)config.evaluate( xpath, XPathConstants.NODESET ) );
+ if( fmt == null ) {
+ fmt = new HtmlFormatter();
+ }
+ formatters.put( "", fmt );
+ formatters.put( null, fmt );
+ }
+
+
+ /**
+ * Check whether Highlighting is enabled for this request.
+ * @param request The current SolrQueryRequest
+ * @return <code>true</code> if highlighting enabled, <code>false</code> if not.
+ */
+ public boolean isHighlightingEnabled(SolrParams params) {
+ return params.getBool(HighlightParams.HIGHLIGHT, false);
+ }
+
+ /**
+ * Return a Highlighter appropriate for this field.
+ * @param query The current Query
+ * @param fieldName The name of the field
+ * @param request The current SolrQueryRequest
+ */
+ protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) {
+ SolrParams params = request.getParams();
+ Highlighter highlighter = new Highlighter(
+ getFormatter(fieldName, params),
+ getQueryScorer(query, fieldName, request));
+ highlighter.setTextFragmenter(getFragmenter(fieldName, params));
+ return highlighter;
+ }
+
+ /**
+ * Return a QueryScorer suitable for this Query and field.
+ * @param query The current query
+ * @param fieldName The name of the field
+ * @param request The SolrQueryRequest
+ */
+ protected QueryScorer getQueryScorer(Query query, String fieldName, SolrQueryRequest request) {
+ boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
+ if (reqFieldMatch) {
+ return new QueryScorer(query, request.getSearcher().getReader(), fieldName);
+ }
+ else {
+ return new QueryScorer(query);
+ }
+ }
+
+ /**
+ * Return a String array of the fields to be highlighted.
+ * Falls back to the programatic defaults, or the default search field if the list of fields
+ * is not specified in either the handler configuration or the request.
+ * @param query The current Query
+ * @param request The current SolrQueryRequest
+ * @param defaultFields Programmatic default highlight fields, used if nothing is specified in the handler config or the request.
+ */
+ public String[] getHighlightFields(Query query, SolrQueryRequest request, String[] defaultFields) {
+ String fields[] = request.getParams().getParams(HighlightParams.FIELDS);
+
+ // if no fields specified in the request, or the handler, fall back to programmatic default, or default search field.
+ if(emptyArray(fields)) {
+ // use default search field if highlight fieldlist not specified.
+ if (emptyArray(defaultFields)) {
+ fields = new String[]{request.getSchema().getDefaultSearchFieldName()};
+ }
+ else {
+ fields = defaultFields;
+ }
+ }
+ else if (fields.length == 1) {
+ // if there's a single request/handler value, it may be a space/comma separated list
+ fields = SolrPluginUtils.split(fields[0]);
+ }
+
+ return fields;
+ }
+
+ protected boolean emptyArray(String[] arr) {
+ return (arr == null || arr.length == 0 || arr[0] == null || arr[0].trim().length() == 0);
+ }
+
+ /**
+ * Return the max number of snippets for this field. If this has not
+ * been configured for this field, fall back to the configured default
+ * or the solr default.
+ * @param fieldName The name of the field
+ * @param request The current SolrQueryRequest
+ */
+ protected int getMaxSnippets(String fieldName, SolrParams params) {
+ return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1);
+ }
+
+ /**
+ * Return a formatter appropriate for this field. If a formatter
+ * has not been configured for this field, fall back to the configured
+ * default or the solr default (SimpleHTMLFormatter).
+ *
+ * @param fieldName The name of the field
+ * @param request The current SolrQueryRequest
+ * @return An appropriate Formatter.
+ */
+ protected Formatter getFormatter(String fieldName, SolrParams params )
+ {
+ String str = params.getFieldParam( fieldName, HighlightParams.FORMATTER );
+ SolrFormatter formatter = formatters.get( str );
+ if( formatter == null ) {
+ throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown formatter: "+str );
+ }
+ return formatter.getFormatter( fieldName, params );
+ }
+
+ /**
+ * Return a fragmenter appropriate for this field. If a fragmenter
+ * has not been configured for this field, fall back to the configured
+ * default or the solr default (GapFragmenter).
+ *
+ * @param fieldName The name of the field
+ * @param request The current SolrQueryRequest
+ * @return An appropriate Fragmenter.
+ */
+ protected Fragmenter getFragmenter(String fieldName, SolrParams params)
+ {
+ String fmt = params.getFieldParam( fieldName, HighlightParams.FRAGMENTER );
+ SolrFragmenter frag = fragmenters.get( fmt );
+ if( frag == null ) {
+ throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmenter: "+fmt );
+ }
+ return frag.getFragmenter( fieldName, params );
+ }
+
+ /**
+ * Generates a list of Highlighted query fragments for each item in a list
+ * of documents, or returns null if highlighting is disabled.
+ *
+ * @param docs query results
+ * @param query the query
+ * @param req the current request
+ * @param defaultFields default list of fields to summarize
+ *
+ * @return NamedList containing a NamedList for each document, which in
+ * turns contains sets (field, summary) pairs.
+ */
+ @SuppressWarnings("unchecked")
+ public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
+ SolrParams params = req.getParams();
+ if (!isHighlightingEnabled(params))
+ return null;
+
+ SolrIndexSearcher searcher = req.getSearcher();
+ IndexSchema schema = searcher.getSchema();
+ NamedList fragments = new SimpleOrderedMap();
+ String[] fieldNames = getHighlightFields(query, req, defaultFields);
+ Document[] readDocs = new Document[docs.size()];
+ {
+ // pre-fetch documents using the Searcher's doc cache
+ Set<String> fset = new HashSet<String>();
+ for(String f : fieldNames) { fset.add(f); }
+ // fetch unique key if one exists.
+ SchemaField keyField = schema.getUniqueKeyField();
+ if(null != keyField)
+ fset.add(keyField.getName());
+ searcher.readDocs(readDocs, docs, fset);
+ }
+
+ // Highlight each document
+ DocIterator iterator = docs.iterator();
+ for (int i = 0; i < docs.size(); i++) {
+ int docId = iterator.nextDoc();
+ Document doc = readDocs[i];
+ NamedList docSummaries = new SimpleOrderedMap();
+ for (String fieldName : fieldNames) {
+ fieldName = fieldName.trim();
+ String[] docTexts = doc.getValues(fieldName);
+ if (docTexts == null) continue;
+
+ // get highlighter, and number of fragments for this field
+ Highlighter highlighter = getHighlighter(query, fieldName, req);
+ int numFragments = getMaxSnippets(fieldName, params);
+
+ String[] summaries;
+ TextFragment[] frag;
+ if (docTexts.length == 1) {
+ // single-valued field
+ TokenStream tstream;
+ try {
+ // attempt term vectors
+ tstream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
+ }
+ catch (IllegalArgumentException e) {
+ // fall back to analyzer
+ tstream = new TokenOrderingFilter(schema.getAnalyzer().tokenStream(fieldName, new StringReader(docTexts[0])), 10);
+ }
+ frag = highlighter.getBestTextFragments(tstream, docTexts[0], false, numFragments);
+ }
+ else {
+ // multi-valued field
+ MultiValueTokenStream tstream;
+ tstream = new MultiValueTokenStream(fieldName, docTexts, schema.getAnalyzer(), true);
+ frag = highlighter.getBestTextFragments(tstream, tstream.asSingleValue(), false, numFragments);
+ }
+ // convert fragments back into text
+ // TODO: we can include score and position information in output as snippet attributes
+ if (frag.length > 0) {
+ ArrayList<String> fragTexts = new ArrayList<String>();
+ for (int j = 0; j < frag.length; j++) {
+ if ((frag[j] != null) && (frag[j].getScore() > 0)) {
+ fragTexts.add(frag[j].toString());
+ }
+ }
+ summaries = fragTexts.toArray(new String[0]);
+ if (summaries.length > 0)
+ docSummaries.add(fieldName, summaries);
+ }
+ }
+ String printId = schema.printableUniqueKey(doc);
+ fragments.add(printId == null ? null : printId, docSummaries);
+ }
+ return fragments;
+ }
+}
+
+
+
+
+/**
+ * Helper class which creates a single TokenStream out of values from a
+ * multi-valued field.
+ */
+class MultiValueTokenStream extends TokenStream {
+ private String fieldName;
+ private String[] values;
+ private Analyzer analyzer;
+ private int curIndex; // next index into the values array
+ private int curOffset; // offset into concatenated string
+ private TokenStream currentStream; // tokenStream currently being iterated
+ private boolean orderTokenOffsets;
+
+ /** Constructs a TokenStream for consecutively-analyzed field values
+ *
+ * @param fieldName name of the field
+ * @param values array of field data
+ * @param analyzer analyzer instance
+ */
+ public MultiValueTokenStream(String fieldName, String[] values,
+ Analyzer analyzer, boolean orderTokenOffsets) {
+ this.fieldName = fieldName;
+ this.values = values;
+ this.analyzer = analyzer;
+ curIndex = -1;
+ curOffset = 0;
+ currentStream = null;
+ this.orderTokenOffsets=orderTokenOffsets;
+ }
+
+ /** Returns the next token in the stream, or null at EOS. */
+ @Override
+ public Token next() throws IOException {
+ int extra = 0;
+ if(currentStream == null) {
+ curIndex++;
+ if(curIndex < values.length) {
+ currentStream = analyzer.tokenStream(fieldName,
+ new StringReader(values[curIndex]));
+ if (orderTokenOffsets) currentStream = new TokenOrderingFilter(currentStream,10);
+ // add extra space between multiple values
+ if(curIndex > 0)
+ extra = analyzer.getPositionIncrementGap(fieldName);
+ } else {
+ return null;
+ }
+ }
+ Token nextToken = currentStream.next();
+ if(nextToken == null) {
+ curOffset += values[curIndex].length();
+ currentStream = null;
+ return next();
+ }
+ // create an modified token which is the offset into the concatenated
+ // string of all values
+ Token offsetToken = new Token(nextToken.termText(),
+ nextToken.startOffset() + curOffset,
+ nextToken.endOffset() + curOffset);
+ offsetToken.setPositionIncrement(nextToken.getPositionIncrement() + extra*10);
+ return offsetToken;
+ }
+
+ /**
+ * Returns all values as a single String into which the Tokens index with
+ * their offsets.
+ */
+ public String asSingleValue() {
+ StringBuilder sb = new StringBuilder();
+ for(String str : values)
+ sb.append(str);
+ return sb.toString();
+ }
+
+}
+
+
+/** Orders Tokens in a window first by their startOffset ascending.
+ * endOffset is currently ignored.
+ * This is meant to work around fickleness in the highlighter only. It
+ * can mess up token positions and should not be used for indexing or querying.
+ */
+class TokenOrderingFilter extends TokenFilter {
+ private final int windowSize;
+ private final LinkedList<Token> queue = new LinkedList<Token>();
+ private boolean done=false;
+
+ protected TokenOrderingFilter(TokenStream input, int windowSize) {
+ super(input);
+ this.windowSize = windowSize;
+ }
+
+ @Override
+ public Token next() throws IOException {
+ while (!done && queue.size() < windowSize) {
+ Token newTok = input.next();
+ if (newTok==null) {
+ done=true;
+ break;
+ }
+
+ // reverse iterating for better efficiency since we know the
+ // list is already sorted, and most token start offsets will be too.
+ ListIterator<Token> iter = queue.listIterator(queue.size());
+ while(iter.hasPrevious()) {
+ if (newTok.startOffset() >= iter.previous().startOffset()) {
+ // insertion will be before what next() would return (what
+ // we just compared against), so move back one so the insertion
+ // will be after.
+ iter.next();
+ break;
+ }
+ }
+ iter.add(newTok);
+ }
+
+ return queue.isEmpty() ? null : queue.removeFirst();
+ }
+}
+
+
Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/highlight/SolrHighlighter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Modified: lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java?view=diff&rev=552682&r1=552681&r2=552682
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/highlight/HighlighterTest.java Mon Jul 2 23:14:07 2007
@@ -17,10 +17,12 @@
package org.apache.solr.highlight;
-import java.util.HashMap;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.*;
+import org.apache.solr.util.*;
+import org.apache.solr.schema.*;
-import org.apache.solr.util.AbstractSolrTestCase;
-import org.apache.solr.util.TestHarness;
+import java.util.HashMap;
/**
* Tests some basic functionality of Solr while demonstrating good
@@ -43,6 +45,27 @@
// if you override setUp or tearDown, you better call
// the super classes version
super.tearDown();
+ }
+
+ public void testConfig()
+ {
+ SolrHighlighter highlighter = SolrCore.getSolrCore().getHighlighter();
+ System.out.println( "highlighter" );
+
+ // Make sure we loaded the one formatter
+ SolrFormatter fmt1 = highlighter.formatters.get( null );
+ SolrFormatter fmt2 = highlighter.formatters.get( "" );
+ assertSame( fmt1, fmt2 );
+ assertTrue( fmt1 instanceof HtmlFormatter );
+
+
+ // Make sure we loaded the one formatter
+ SolrFragmenter gap = highlighter.fragmenters.get( "gap" );
+ SolrFragmenter regex = highlighter.fragmenters.get( "regex" );
+ SolrFragmenter frag = highlighter.fragmenters.get( null );
+ assertSame( gap, frag );
+ assertTrue( gap instanceof GapFragmenter );
+ assertTrue( regex instanceof RegexFragmenter );
}
public void testTermVecHighlight() {
Modified: lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml?view=diff&rev=552682&r1=552681&r2=552682
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml Mon Jul 2 23:14:07 2007
@@ -265,6 +265,30 @@
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" />
+ <highlighting>
+ <!-- Configure the standard fragmenter -->
+ <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
+ <lst name="defaults">
+ <int name="hl.fragsize">100</int>
+ </lst>
+ </fragmenter>
+
+ <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
+ <lst name="defaults">
+ <int name="hl.fragsize">70</int>
+ </lst>
+ </fragmenter>
+
+ <!-- Configure the standard formatter -->
+ <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
+ <lst name="defaults">
+ <str name="hl.simple.pre"><![CDATA[<em>]]></str>
+ <str name="hl.simple.post"><![CDATA[</em>]]></str>
+ </lst>
+ </formatter>
+ </highlighting>
+
+
<!-- enable streaming for testing... -->
<requestDispatcher handleSelect="true" >
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />