You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2016/12/06 23:13:06 UTC
[11/50] [abbrv] lucene-solr:apiv2: SOLR-9708: Added
UnifiedSolrHighlighter. Added hl.method=original|fastVector|postings|unified
SOLR-9708: Added UnifiedSolrHighlighter. Added hl.method=original|fastVector|postings|unified
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4314c51c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4314c51c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4314c51c
Branch: refs/heads/apiv2
Commit: 4314c51c66de1eed0dbc4897684e79935ebfd55e
Parents: a4573fe
Author: David Smiley <ds...@apache.org>
Authored: Thu Nov 24 19:36:07 2016 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Thu Nov 24 19:36:07 2016 -0500
----------------------------------------------------------------------
solr/CHANGES.txt | 13 +
.../handler/component/HighlightComponent.java | 108 ++++--
.../solr/highlight/DefaultSolrHighlighter.java | 15 +-
.../solr/highlight/PostingsSolrHighlighter.java | 9 +-
.../solr/highlight/UnifiedSolrHighlighter.java | 365 +++++++++++++++++++
.../conf/schema-unifiedhighlight.xml | 64 ++++
.../highlight/FastVectorHighlighterTest.java | 9 +-
.../apache/solr/highlight/HighlighterTest.java | 26 +-
.../highlight/TestPostingsSolrHighlighter.java | 2 +-
.../highlight/TestUnifiedSolrHighlighter.java | 229 ++++++++++++
.../solr/common/params/HighlightParams.java | 118 +++---
11 files changed, 868 insertions(+), 90 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4314c51c/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index fe67490..392d02d 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -98,6 +98,13 @@ Upgrade Notes
replaced by corresponding per-second rates viz. "avgRequestsPerSecond", "5minRateRequestsPerSecond"
and "15minRateRequestsPerSecond" for consistency with stats output in other parts of Solr.
+* SOLR-9708: You are encouraged to try out the UnifiedHighlighter by setting hl.method=unified and report feedback. It
+ might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the
+ original Highlighter. That said, some options aren't supported yet, notably hl.fragsize and
+ hl.requireFieldMatch=false. It will get more features in time, especially with your input. See HighlightParams.java
+ for a listing of highlight parameters annotated with which highlighters use them.
+ hl.useFastVectorHighlighter is now considered deprecated in lieu of hl.method=fastVector.
+
New Features
----------------------
* SOLR-9293: Solrj client support for hierarchical clusters and other topics
@@ -137,6 +144,12 @@ New Features
* SOLR-9721: javabin Tuple parser for streaming and other end points (noble)
+* SOLR-9708: Added UnifiedSolrHighlighter, a highlighter adapter for Lucene's UnifiedHighlighter. The adapter is a
+ derivative of the PostingsSolrHighlighter, supporting mostly the same parameters with some differences.
+ Introduced "hl.method" parameter which can be set to original|fastVector|postings|unified to pick the highlighter at
+ runtime without the need to modify solrconfig from the default configuration. hl.useFastVectorHighlighter is now
+ considered deprecated in lieu of hl.method=fastVector. (Timothy Rodriguez, David Smiley)
+
Optimizations
----------------------
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4314c51c/solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java b/solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java
index f63ae51..00ed4ef 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java
@@ -16,6 +16,14 @@
*/
package org.apache.solr.handler.component;
+import java.io.IOException;
+import java.net.URL;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Stream;
+
import com.google.common.base.Objects;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
@@ -29,6 +37,7 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.highlight.DefaultSolrHighlighter;
import org.apache.solr.highlight.PostingsSolrHighlighter;
import org.apache.solr.highlight.SolrHighlighter;
+import org.apache.solr.highlight.UnifiedSolrHighlighter;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
@@ -38,9 +47,7 @@ import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.apache.solr.util.plugin.SolrCoreAware;
-import java.io.IOException;
-import java.net.URL;
-import java.util.List;
+import static java.util.stream.Collectors.toMap;
/**
* TODO!
@@ -50,13 +57,50 @@ import java.util.List;
*/
public class HighlightComponent extends SearchComponent implements PluginInfoInitialized, SolrCoreAware
{
+ public enum HighlightMethod {
+ UNIFIED("unified"),
+ FAST_VECTOR("fastVector"),
+ POSTINGS("postings"),
+ ORIGINAL("original");
+
+ private static final Map<String, HighlightMethod> METHODS = Collections.unmodifiableMap(Stream.of(values())
+ .collect(toMap(HighlightMethod::getMethodName, Function.identity())));
+
+ private final String methodName;
+
+ HighlightMethod(String method) {
+ this.methodName = method;
+ }
+
+ public String getMethodName() {
+ return methodName;
+ }
+
+ public static HighlightMethod parse(String method) {
+ return METHODS.get(method);
+ }
+ }
+
public static final String COMPONENT_NAME = "highlight";
+
private PluginInfo info = PluginInfo.EMPTY_INFO;
- private SolrHighlighter highlighter;
+ @Deprecated // DWS: in 7.0 lets restructure the abstractions/relationships
+ private SolrHighlighter solrConfigHighlighter;
+
+ /**
+ * @deprecated instead depend on {@link #process(ResponseBuilder)} to choose the highlighter based on
+ * {@link HighlightParams#METHOD}
+ */
+ @Deprecated
public static SolrHighlighter getHighlighter(SolrCore core) {
HighlightComponent hl = (HighlightComponent) core.getSearchComponents().get(HighlightComponent.COMPONENT_NAME);
- return hl==null ? null: hl.getHighlighter();
+ return hl==null ? null: hl.getHighlighter();
+ }
+
+ @Deprecated
+ public SolrHighlighter getHighlighter() {
+ return solrConfigHighlighter;
}
@Override
@@ -67,7 +111,7 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
@Override
public void prepare(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
- rb.doHighlights = highlighter.isHighlightingEnabled(params);
+ rb.doHighlights = solrConfigHighlighter.isHighlightingEnabled(params);
if(rb.doHighlights){
rb.setNeedDocList(true);
String hlq = params.get(HighlightParams.Q);
@@ -90,26 +134,28 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
if(children.isEmpty()) {
PluginInfo pluginInfo = core.getSolrConfig().getPluginInfo(SolrHighlighter.class.getName()); //TODO deprecated configuration remove later
if (pluginInfo != null) {
- highlighter = core.createInitInstance(pluginInfo, SolrHighlighter.class, null, DefaultSolrHighlighter.class.getName());
+ solrConfigHighlighter = core.createInitInstance(pluginInfo, SolrHighlighter.class, null, DefaultSolrHighlighter.class.getName());
} else {
DefaultSolrHighlighter defHighlighter = new DefaultSolrHighlighter(core);
defHighlighter.init(PluginInfo.EMPTY_INFO);
- highlighter = defHighlighter;
+ solrConfigHighlighter = defHighlighter;
}
} else {
- highlighter = core.createInitInstance(children.get(0),SolrHighlighter.class,null, DefaultSolrHighlighter.class.getName());
+ solrConfigHighlighter = core.createInitInstance(children.get(0),SolrHighlighter.class,null, DefaultSolrHighlighter.class.getName());
}
}
@Override
public void process(ResponseBuilder rb) throws IOException {
+
if (rb.doHighlights) {
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
- String[] defaultHighlightFields; //TODO: get from builder by default?
+ SolrHighlighter highlighter = getHighlighter(params);
+ String[] defaultHighlightFields; //TODO: get from builder by default?
if (rb.getQparser() != null) {
defaultHighlightFields = rb.getQparser().getDefaultHighlightFields();
} else {
@@ -130,14 +176,8 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
rb.setHighlightQuery( highlightQuery );
}
}
-
- if(highlightQuery != null) {
- boolean rewrite = (highlighter instanceof PostingsSolrHighlighter == false) && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
- Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));
- highlightQuery = rewrite ? highlightQuery.rewrite(req.getSearcher().getIndexReader()) : highlightQuery;
- }
- // No highlighting if there is no query -- consider q.alt="*:*
+ // No highlighting if there is no query -- consider q.alt=*:*
if( highlightQuery != null ) {
NamedList sumData = highlighter.doHighlighting(
rb.getResults().docList,
@@ -152,6 +192,36 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
}
}
+ protected SolrHighlighter getHighlighter(SolrParams params) {
+ HighlightMethod method = HighlightMethod.parse(params.get(HighlightParams.METHOD));
+ if (method == null) {
+ return solrConfigHighlighter;
+ }
+
+ switch (method) {
+ case UNIFIED:
+ if (solrConfigHighlighter instanceof UnifiedSolrHighlighter) {
+ return solrConfigHighlighter;
+ }
+ return new UnifiedSolrHighlighter(); // TODO cache one?
+ case POSTINGS:
+ if (solrConfigHighlighter instanceof PostingsSolrHighlighter) {
+ return solrConfigHighlighter;
+ }
+ return new PostingsSolrHighlighter(); // TODO cache one?
+ case FAST_VECTOR: // fall-through
+ case ORIGINAL:
+ if (solrConfigHighlighter instanceof DefaultSolrHighlighter) {
+ return solrConfigHighlighter;
+ } else {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
+ "In order to use " + HighlightParams.METHOD + "=" + method.getMethodName() + " the configured" +
+ " highlighter in solrconfig must be " + DefaultSolrHighlighter.class);
+ }
+ default: throw new AssertionError();
+ }
+ }
+
@Override
public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
if (!rb.doHighlights) return;
@@ -195,10 +265,6 @@ public class HighlightComponent extends SearchComponent implements PluginInfoIni
}
}
-
- public SolrHighlighter getHighlighter() {
- return highlighter;
- }
////////////////////////////////////////////
/// SolrInfoMBean
////////////////////////////////////////////
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4314c51c/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
index f020eb7..e035a75 100644
--- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
@@ -66,6 +66,7 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.component.HighlightComponent;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
@@ -373,6 +374,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
if (!isHighlightingEnabled(params)) // also returns early if no unique key field
return null;
+ boolean rewrite = query != null && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
+ Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));
+
+ if (rewrite) {
+ query = query.rewrite(req.getSearcher().getIndexReader());
+ }
+
SolrIndexSearcher searcher = req.getSearcher();
IndexSchema schema = searcher.getSchema();
@@ -463,8 +471,11 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
* Determines if we should use the FastVectorHighlighter for this field.
*/
protected boolean useFastVectorHighlighter(SolrParams params, SchemaField schemaField) {
- boolean useFvhParam = params.getFieldBool(schemaField.getName(), HighlightParams.USE_FVH, false);
- if (!useFvhParam) return false;
+ boolean methodFvh =
+ HighlightComponent.HighlightMethod.FAST_VECTOR.getMethodName().equals(
+ params.getFieldParam(schemaField.getName(), HighlightParams.METHOD))
+ || params.getFieldBool(schemaField.getName(), HighlightParams.USE_FVH, false);
+ if (!methodFvh) return false;
boolean termPosOff = schemaField.storeTermPositions() && schemaField.storeTermOffsets();
if (!termPosOff) {
log.warn("Solr will use the standard Highlighter instead of FastVectorHighlighter because the {} field " +
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4314c51c/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
index d005f4e..513b38a 100644
--- a/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java
@@ -50,8 +50,9 @@ import org.apache.solr.util.plugin.PluginInfoInitialized;
* <p>
* Example configuration:
* <pre class="prettyprint">
- * <requestHandler name="standard" class="solr.StandardRequestHandler">
+ * <requestHandler name="/select" class="solr.SearchHandler">
* <lst name="defaults">
+ * <str name="hl.method">postings</str>
* <int name="hl.snippets">1</int>
* <str name="hl.tag.pre">&lt;em&gt;</str>
* <str name="hl.tag.post">&lt;/em&gt;</str>
@@ -71,12 +72,6 @@ import org.apache.solr.util.plugin.PluginInfoInitialized;
* </lst>
* </requestHandler>
* </pre>
- * ...
- * <pre class="prettyprint">
- * <searchComponent class="solr.HighlightComponent" name="highlight">
- * <highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"/>
- * </searchComponent>
- * </pre>
* <p>
* Notes:
* <ul>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4314c51c/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
new file mode 100644
index 0000000..6f81241
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.highlight;
+
+import java.io.IOException;
+import java.text.BreakIterator;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
+import org.apache.lucene.search.uhighlight.DefaultPassageFormatter;
+import org.apache.lucene.search.uhighlight.PassageFormatter;
+import org.apache.lucene.search.uhighlight.PassageScorer;
+import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
+import org.apache.solr.common.params.HighlightParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.core.PluginInfo;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrRequestInfo;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.RTimerTree;
+import org.apache.solr.util.plugin.PluginInfoInitialized;
+
+/**
+ * Highlighter impl that uses {@link UnifiedHighlighter}
+ * <p>
+ * Example configuration with default values:
+ * <pre class="prettyprint">
+ * <requestHandler name="/select" class="solr.SearchHandler">
+ * <lst name="defaults">
+ * <str name="hl.method">unified</str>
+ * <int name="hl.snippets">1</int>
+ * <str name="hl.tag.pre">&lt;em&gt;</str>
+ * <str name="hl.tag.post">&lt;/em&gt;</str>
+ * <str name="hl.simple.pre">&lt;em&gt;</str>
+ * <str name="hl.simple.post">&lt;/em&gt;</str>
+ * <str name="hl.tag.ellipsis">... </str>
+ * <bool name="hl.defaultSummary">true</bool>
+ * <str name="hl.encoder">simple</str>
+ * <float name="hl.score.k1">1.2</float>
+ * <float name="hl.score.b">0.75</float>
+ * <float name="hl.score.pivot">87</float>
+ * <str name="hl.bs.language"></str>
+ * <str name="hl.bs.country"></str>
+ * <str name="hl.bs.variant"></str>
+ * <str name="hl.bs.type">SENTENCE</str>
+ * <int name="hl.maxAnalyzedChars">10000</int>
+ * <bool name="hl.highlightMultiTerm">true</bool>
+ * <bool name="hl.usePhraseHighlighter">true</bool>
+ * <int name="hl.cacheFieldValCharsThreshold">524288</int>
+ * <str name="hl.offsetSource"></str>
+ * </lst>
+ * </requestHandler>
+ * </pre>
+ * <p>
+ * Notes:
+ * <ul>
+ * <li>hl.q (string) can specify the query
+ * <li>hl.fl (string) specifies the field list.
+ * <li>hl.snippets (int) specifies how many snippets to return.
+ * <li>hl.tag.pre (string) specifies text which appears before a highlighted term.
+ * <li>hl.tag.post (string) specifies text which appears after a highlighted term.
+ * <li>hl.simple.pre (string) specifies text which appears before a highlighted term. (prefer hl.tag.pre)
+ * <li>hl.simple.post (string) specifies text which appears before a highlighted term. (prefer hl.tag.post)
+ * <li>hl.tag.ellipsis (string) specifies text which joins non-adjacent passages. The default is to retain each
+ * value in a list without joining them.
+ * <li>hl.defaultSummary (bool) specifies if a field should have a default summary of the leading text.
+ * <li>hl.encoder (string) can be 'html' (html escapes content) or 'simple' (no escaping).
+ * <li>hl.score.k1 (float) specifies bm25 scoring parameter 'k1'
+ * <li>hl.score.b (float) specifies bm25 scoring parameter 'b'
+ * <li>hl.score.pivot (float) specifies bm25 scoring parameter 'avgdl'
+ * <li>hl.bs.type (string) specifies how to divide text into passages: [SENTENCE, LINE, WORD, CHAR, WHOLE]
+ * <li>hl.bs.language (string) specifies language code for BreakIterator. default is empty string (root locale)
+ * <li>hl.bs.country (string) specifies country code for BreakIterator. default is empty string (root locale)
+ * <li>hl.bs.variant (string) specifies country code for BreakIterator. default is empty string (root locale)
+ * <li>hl.maxAnalyzedChars (int) specifies how many characters at most will be processed in a document for any one field.
+ * <li>hl.highlightMultiTerm (bool) enables highlighting for range/wildcard/fuzzy/prefix queries at some cost. default is true
+ * <li>hl.usePhraseHighlighter (bool) enables phrase highlighting. default is true
+ * <li>hl.cacheFieldValCharsThreshold (int) controls how many characters from a field are cached. default is 524288 (1MB in 2 byte chars)
+ * <li>hl.offsetSource (string) specifies which offset source to use, prefers postings, but will use what's available if not specified
+ * </ul>
+ *
+ * @lucene.experimental
+ */
+public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized {
+
+ protected static final String SNIPPET_SEPARATOR = "\u0000";
+ private static final String[] ZERO_LEN_STR_ARRAY = new String[0];
+
+ @Override
+ public void init(PluginInfo info) {
+ }
+
+ @Override
+ public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
+ final SolrParams params = req.getParams();
+
+ // if highlighting isn't enabled, then why call doHighlighting?
+ if (!isHighlightingEnabled(params))
+ return null;
+
+ int[] docIDs = toDocIDs(docs);
+
+ // fetch the unique keys
+ String[] keys = getUniqueKeys(req.getSearcher(), docIDs);
+
+ // query-time parameters
+ String[] fieldNames = getHighlightFields(query, req, defaultFields);
+
+ int maxPassages[] = new int[fieldNames.length];
+ for (int i = 0; i < fieldNames.length; i++) {
+ maxPassages[i] = params.getFieldInt(fieldNames[i], HighlightParams.SNIPPETS, 1);
+ }
+
+ UnifiedHighlighter highlighter = getHighlighter(req);
+ Map<String, String[]> snippets = highlighter.highlightFields(fieldNames, query, docIDs, maxPassages);
+ return encodeSnippets(keys, fieldNames, snippets);
+ }
+
+ /**
+ * Creates an instance of the Lucene {@link UnifiedHighlighter}. Provided for subclass extension so that
+ * a subclass can return a subclass of {@link SolrExtendedUnifiedHighlighter}.
+ */
+ protected UnifiedHighlighter getHighlighter(SolrQueryRequest req) {
+ return new SolrExtendedUnifiedHighlighter(req);
+ }
+
+ /**
+ * Encodes the resulting snippets into a namedlist
+ *
+ * @param keys the document unique keys
+ * @param fieldNames field names to highlight in the order
+ * @param snippets map from field name to snippet array for the docs
+ * @return encoded namedlist of summaries
+ */
+ protected NamedList<Object> encodeSnippets(String[] keys, String[] fieldNames, Map<String, String[]> snippets) {
+ NamedList<Object> list = new SimpleOrderedMap<>();
+ for (int i = 0; i < keys.length; i++) {
+ NamedList<Object> summary = new SimpleOrderedMap<>();
+ for (String field : fieldNames) {
+ String snippet = snippets.get(field)[i];
+ if (snippet == null) {
+ //TODO reuse logic of DefaultSolrHighlighter.alternateField
+ summary.add(field, ZERO_LEN_STR_ARRAY);
+ } else {
+ // we used a special snippet separator char and we can now split on it.
+ summary.add(field, snippet.split(SNIPPET_SEPARATOR));
+ }
+ }
+ list.add(keys[i], summary);
+ }
+ return list;
+ }
+
+ /**
+ * Converts solr's DocList to the int[] docIDs
+ */
+ protected int[] toDocIDs(DocList docs) {
+ int[] docIDs = new int[docs.size()];
+ DocIterator iterator = docs.iterator();
+ for (int i = 0; i < docIDs.length; i++) {
+ if (!iterator.hasNext()) {
+ throw new AssertionError();
+ }
+ docIDs[i] = iterator.nextDoc();
+ }
+ if (iterator.hasNext()) {
+ throw new AssertionError();
+ }
+ return docIDs;
+ }
+
+ /**
+ * Retrieves the unique keys for the topdocs to key the results
+ */
+ protected String[] getUniqueKeys(SolrIndexSearcher searcher, int[] docIDs) throws IOException {
+ IndexSchema schema = searcher.getSchema();
+ SchemaField keyField = schema.getUniqueKeyField();
+ if (keyField != null) {
+ Set<String> selector = Collections.singleton(keyField.getName());
+ String[] uniqueKeys = new String[docIDs.length];
+ for (int i = 0; i < docIDs.length; i++) {
+ int docid = docIDs[i];
+ Document doc = searcher.doc(docid, selector);
+ String id = schema.printableUniqueKey(doc);
+ uniqueKeys[i] = id;
+ }
+ return uniqueKeys;
+ } else {
+ return new String[docIDs.length];
+ }
+ }
+
+ /**
+ * From {@link #getHighlighter(org.apache.solr.request.SolrQueryRequest)}.
+ */
+ protected static class SolrExtendedUnifiedHighlighter extends UnifiedHighlighter {
+ protected final SolrParams params;
+ protected final IndexSchema schema;
+
+ protected final RTimerTree loadFieldValuesTimer;
+
+ public SolrExtendedUnifiedHighlighter(SolrQueryRequest req) {
+ super(req.getSearcher(), req.getSchema().getIndexAnalyzer());
+ this.params = req.getParams();
+ this.schema = req.getSchema();
+ this.setMaxLength(
+ params.getInt(HighlightParams.MAX_CHARS, UnifiedHighlighter.DEFAULT_MAX_LENGTH));
+ this.setCacheFieldValCharsThreshold(
+ params.getInt(HighlightParams.CACHE_FIELD_VAL_CHARS_THRESHOLD, DEFAULT_CACHE_CHARS_THRESHOLD));
+
+ // SolrRequestInfo is a thread-local singleton providing access to the ResponseBuilder to code that
+ // otherwise can't get it in a nicer way.
+ SolrQueryRequest request = SolrRequestInfo.getRequestInfo().getReq();
+ final RTimerTree timerTree;
+ if (request.getRequestTimer() != null) { //It may be null if not used in a search context.
+ timerTree = request.getRequestTimer();
+ } else {
+ timerTree = new RTimerTree(); // since null checks are annoying
+ }
+ loadFieldValuesTimer = timerTree.sub("loadFieldValues"); // we assume a new timer, state of STARTED
+ loadFieldValuesTimer.pause(); // state of PAUSED now with about zero time. Will fail if state isn't STARTED.
+ }
+
+ @Override
+ protected OffsetSource getOffsetSource(String field) {
+ String sourceStr = params.getFieldParam(field, HighlightParams.OFFSET_SOURCE);
+ if (sourceStr != null) {
+ return OffsetSource.valueOf(sourceStr.toUpperCase(Locale.ROOT));
+ } else {
+ return super.getOffsetSource(field);
+ }
+ }
+
+ @Override
+ public int getMaxNoHighlightPassages(String field) {
+ boolean defaultSummary = params.getFieldBool(field, HighlightParams.DEFAULT_SUMMARY, false);
+ if (defaultSummary) {
+ return -1;// signifies return first hl.snippets passages worth of the content
+ } else {
+ return 0;// will return null
+ }
+ }
+
+ @Override
+ protected PassageFormatter getFormatter(String fieldName) {
+ String preTag = params.getFieldParam(fieldName, HighlightParams.TAG_PRE,
+ params.getFieldParam(fieldName, HighlightParams.SIMPLE_PRE, "<em>")
+ );
+
+ String postTag = params.getFieldParam(fieldName, HighlightParams.TAG_POST,
+ params.getFieldParam(fieldName, HighlightParams.SIMPLE_POST, "</em>")
+ );
+ String ellipsis = params.getFieldParam(fieldName, HighlightParams.TAG_ELLIPSIS, SNIPPET_SEPARATOR);
+ String encoder = params.getFieldParam(fieldName, HighlightParams.ENCODER, "simple");
+ return new DefaultPassageFormatter(preTag, postTag, ellipsis, "html".equals(encoder));
+ }
+
+ @Override
+ protected PassageScorer getScorer(String fieldName) {
+ float k1 = params.getFieldFloat(fieldName, HighlightParams.SCORE_K1, 1.2f);
+ float b = params.getFieldFloat(fieldName, HighlightParams.SCORE_B, 0.75f);
+ float pivot = params.getFieldFloat(fieldName, HighlightParams.SCORE_PIVOT, 87f);
+ return new PassageScorer(k1, b, pivot);
+ }
+
+ @Override
+ protected BreakIterator getBreakIterator(String field) {
+ String language = params.getFieldParam(field, HighlightParams.BS_LANGUAGE);
+ String country = params.getFieldParam(field, HighlightParams.BS_COUNTRY);
+ String variant = params.getFieldParam(field, HighlightParams.BS_VARIANT);
+ Locale locale = parseLocale(language, country, variant);
+ String type = params.getFieldParam(field, HighlightParams.BS_TYPE);
+ return parseBreakIterator(type, locale);
+ }
+
+ /**
+ * parse a break iterator type for the specified locale
+ */
+ protected BreakIterator parseBreakIterator(String type, Locale locale) {
+ if (type == null || "SENTENCE".equals(type)) {
+ return BreakIterator.getSentenceInstance(locale);
+ } else if ("LINE".equals(type)) {
+ return BreakIterator.getLineInstance(locale);
+ } else if ("WORD".equals(type)) {
+ return BreakIterator.getWordInstance(locale);
+ } else if ("CHARACTER".equals(type)) {
+ return BreakIterator.getCharacterInstance(locale);
+ } else if ("WHOLE".equals(type)) {
+ return new WholeBreakIterator();
+ } else {
+ throw new IllegalArgumentException("Unknown " + HighlightParams.BS_TYPE + ": " + type);
+ }
+ }
+
+ /**
+ * parse a locale from a language+country+variant spec
+ */
+ protected Locale parseLocale(String language, String country, String variant) {
+ if (language == null && country == null && variant == null) {
+ return Locale.ROOT;
+ } else if (language == null) {
+ throw new IllegalArgumentException("language is required if country or variant is specified");
+ } else if (country == null && variant != null) {
+ throw new IllegalArgumentException("To specify variant, country is required");
+ } else if (country != null && variant != null) {
+ return new Locale(language, country, variant);
+ } else if (country != null) {
+ return new Locale(language, country);
+ } else {
+ return new Locale(language);
+ }
+ }
+
+ @Override
+ protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int
+ cacheCharsThreshold) throws IOException {
+ // Time loading field values. It can be an expensive part of highlighting.
+ loadFieldValuesTimer.resume();
+ try {
+ return super.loadFieldValues(fields, docIter, cacheCharsThreshold);
+ } finally {
+ loadFieldValuesTimer.pause(); // note: doesn't need to be "stopped"; pause is fine.
+ }
+ }
+
+ @Override
+ protected boolean shouldHandleMultiTermQuery(String field) {
+ return params.getFieldBool(field, HighlightParams.HIGHLIGHT_MULTI_TERM, true);
+ }
+
+ @Override
+ protected boolean shouldHighlightPhrasesStrictly(String field) {
+ return params.getFieldBool(field, HighlightParams.USE_PHRASE_HIGHLIGHTER, true);
+ }
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4314c51c/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml b/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml
new file mode 100644
index 0000000..7c3bd11
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml
@@ -0,0 +1,64 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for PostingsHighlighter -->
+
+<schema name="unifiedhighlight" version="1.0">
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+
+ <!-- basic text field: no offsets! -->
+ <fieldType name="text" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.MockTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <!-- text field with offsets -->
+ <fieldType name="text_offsets" class="solr.TextField" storeOffsetsWithPositions="true">
+ <analyzer>
+ <tokenizer class="solr.MockTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
+ <field name="text" type="text_offsets" indexed="true" stored="true"/>
+ <field name="text2" type="text" indexed="true" stored="true"/>
+ <field name="text3" type="text_offsets" indexed="true" stored="true"/>
+
+ <defaultSearchField>text</defaultSearchField>
+ <uniqueKey>id</uniqueKey>
+</schema>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4314c51c/solr/core/src/test/org/apache/solr/highlight/FastVectorHighlighterTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/highlight/FastVectorHighlighterTest.java b/solr/core/src/test/org/apache/solr/highlight/FastVectorHighlighterTest.java
index 1d61c08..aafe3f5 100644
--- a/solr/core/src/test/org/apache/solr/highlight/FastVectorHighlighterTest.java
+++ b/solr/core/src/test/org/apache/solr/highlight/FastVectorHighlighterTest.java
@@ -70,7 +70,12 @@ public class FastVectorHighlighterTest extends SolrTestCaseJ4 {
args.put("hl", "true");
args.put("hl.fl", "tv_text");
args.put("hl.snippets", "2");
- args.put("hl.useFastVectorHighlighter", "true");
+ args.put("hl.tag.pre", "<fvpre>"); //... and let post default to </em>. This is just a test.
+ if (random().nextBoolean()) {
+ args.put("hl.useFastVectorHighlighter", "true"); // old way
+ } else {
+ args.put("hl.method", "fastVector"); // the new way
+ }
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard",0,200,args);
@@ -81,7 +86,7 @@ public class FastVectorHighlighterTest extends SolrTestCaseJ4 {
assertQ("Basic summarization",
sumLRF.makeRequest("tv_text:vector"),
"//lst[@name='highlighting']/lst[@name='1']",
- "//lst[@name='1']/arr[@name='tv_text']/str[.='basic fast <em>vector</em> highlighter test']"
+ "//lst[@name='1']/arr[@name='tv_text']/str[.='basic fast <fvpre>vector</em> highlighter test']"
);
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4314c51c/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
index 2cc74ab..6506f98 100644
--- a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
+++ b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
@@ -43,10 +43,6 @@ import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Test;
-/**
- * Tests some basic functionality of Solr while demonstrating good
- * Best Practices for using AbstractSolrTestCase
- */
public class HighlighterTest extends SolrTestCaseJ4 {
private static String LONG_TEXT = "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is " +
@@ -91,6 +87,25 @@ public class HighlighterTest extends SolrTestCaseJ4 {
}
@Test
+ public void testMethodPostings() {
+ String field = "t_text";
+ assertU(adoc(field, LONG_TEXT,
+ "id", "1"));
+ assertU(commit());
+
+ try {
+ assertQ("Tried PostingsSolrHighlighter but failed due to offsets not in postings",
+ req("q", "long", "hl.method", "postings", "df", field, "hl", "true"));
+ fail("Did not encounter exception for no offsets");
+ } catch (Exception e) {
+ assertTrue("Cause should be illegal argument", e.getCause() instanceof IllegalArgumentException);
+ assertTrue("Should warn no offsets", e.getCause().getMessage().contains("indexed without offsets"));
+ }
+ // note: the default schema.xml has no offsets in postings to test the PostingsHighlighter. Leave that for another
+ // test class.
+ }
+
+ @Test
public void testMergeContiguous() throws Exception {
HashMap<String,String> args = new HashMap<>();
args.put(HighlightParams.HIGHLIGHT, "true");
@@ -99,6 +114,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
args.put(HighlightParams.SNIPPETS, String.valueOf(4));
args.put(HighlightParams.FRAGSIZE, String.valueOf(40));
args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
+ args.put(HighlightParams.METHOD, "original"); // test works; no complaints
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
String input = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " +
@@ -763,7 +779,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
);
// Prove fallback highlighting works also with FVH
- args.put("hl.useFastVectorHighlighter", "true");
+ args.put("hl.method", "fastVector");
args.put("hl.tag.pre", "<fvhpre>");
args.put("hl.tag.post", "</fvhpost>");
args.put("f.t_text.hl.maxAlternateFieldLength", "18");
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4314c51c/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java b/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java
index 08748e4..074f9f4 100644
--- a/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java
+++ b/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java
@@ -52,7 +52,7 @@ public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 {
public void testSimple() {
assertQ("simplest test",
- req("q", "text:document", "sort", "id asc", "hl", "true"),
+ req("q", "text:document", "sort", "id asc", "hl", "true", "hl.method", "postings"), // test hl.method is happy too
"count(//lst[@name='highlighting']/*)=2",
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4314c51c/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
new file mode 100644
index 0000000..95754a4
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.highlight;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.IndexSchema;
+import org.junit.BeforeClass;
+
+/** Tests for the UnifiedHighlighter Solr plugin **/
+public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ initCore("solrconfig-basic.xml", "schema-unifiedhighlight.xml");
+
+ // test our config is sane, just to be sure:
+
+ // 'text' and 'text3' should have offsets, 'text2' should not
+ IndexSchema schema = h.getCore().getLatestSchema();
+ assertTrue(schema.getField("text").storeOffsetsWithPositions());
+ assertTrue(schema.getField("text3").storeOffsetsWithPositions());
+ assertFalse(schema.getField("text2").storeOffsetsWithPositions());
+ }
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ clearIndex();
+ assertU(adoc("text", "document one", "text2", "document one", "text3", "crappy document", "id", "101"));
+ assertU(adoc("text", "second document", "text2", "second document", "text3", "crappier document", "id", "102"));
+ assertU(commit());
+ }
+
+ public static SolrQueryRequest req(String... params) {
+ return SolrTestCaseJ4.req(params, "hl.method", "unified");
+ }
+
+ public void testSimple() {
+ assertQ("simplest test",
+ req("q", "text:document", "sort", "id asc", "hl", "true"),
+ "count(//lst[@name='highlighting']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
+ }
+
+ public void testImpossibleOffsetSource() {
+ try {
+ assertQ("impossible offset source",
+ req("q", "text2:document", "hl.offsetSource", "postings", "hl.fl", "text2", "sort", "id asc", "hl", "true"),
+ "count(//lst[@name='highlighting']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
+ fail("Did not encounter exception for no offsets");
+ } catch (Exception e) {
+ assertTrue("Cause should be illegal argument", e.getCause() instanceof IllegalArgumentException);
+ assertTrue("Should warn no offsets", e.getCause().getMessage().contains("indexed without offsets"));
+ }
+ }
+
+ public void testMultipleSnippetsReturned() {
+ clearIndex();
+ assertU(adoc("text", "Document snippet one. Intermediate sentence. Document snippet two.",
+ "text2", "document one", "text3", "crappy document", "id", "101"));
+ assertU(commit());
+ assertQ("multiple snippets test",
+ req("q", "text:document", "sort", "id asc", "hl", "true", "hl.snippets", "2", "hl.bs.type", "SENTENCE"),
+ "count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Document</em> snippet one. '",
+ "//lst[@name='highlighting']/lst[@name='101']/arr/str[2]='<em>Document</em> snippet two.'");
+ }
+
+ public void testStrictPhrasesEnabledByDefault() {
+ clearIndex();
+ assertU(adoc("text", "Strict phrases should be enabled for phrases",
+ "text2", "document one", "text3", "crappy document", "id", "101"));
+ assertU(commit());
+ assertQ("strict phrase handling",
+ req("q", "text:\"strict phrases\"", "sort", "id asc", "hl", "true"),
+ "count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1",
+ "//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Strict</em> <em>phrases</em> should be enabled for phrases'");
+ }
+
+ public void testStrictPhrasesCanBeDisabled() {
+ clearIndex();
+ assertU(adoc("text", "Strict phrases should be disabled for phrases",
+ "text2", "document one", "text3", "crappy document", "id", "101"));
+ assertU(commit());
+ assertQ("strict phrase handling",
+ req("q", "text:\"strict phrases\"", "sort", "id asc", "hl", "true", "hl.usePhraseHighlighter", "false"),
+ "count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1",
+ "//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Strict</em> <em>phrases</em> should be disabled for <em>phrases</em>'");
+ }
+
+ public void testMultiTermQueryEnabledByDefault() {
+ clearIndex();
+ assertU(adoc("text", "Aviary Avenue document",
+ "text2", "document one", "text3", "crappy document", "id", "101"));
+ assertU(commit());
+ assertQ("multi term query handling",
+ req("q", "text:av*", "sort", "id asc", "hl", "true"),
+ "count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=1",
+ "//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Aviary</em> <em>Avenue</em> document'");
+ }
+
+ public void testMultiTermQueryCanBeDisabled() {
+ clearIndex();
+ assertU(adoc("text", "Aviary Avenue document",
+ "text2", "document one", "text3", "crappy document", "id", "101"));
+ assertU(commit());
+ assertQ("multi term query handling",
+ req("q", "text:av*", "sort", "id asc", "hl", "true", "hl.highlightMultiTerm", "false"),
+ "count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=0");
+ }
+
+ public void testPagination() {
+ assertQ("pagination test",
+ req("q", "text:document", "sort", "id asc", "hl", "true", "rows", "1", "start", "1"),
+ "count(//lst[@name='highlighting']/*)=1",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'");
+ }
+
+ public void testEmptySnippet() {
+ assertQ("null snippet test",
+ req("q", "text:one OR *:*", "sort", "id asc", "hl", "true"),
+ "count(//lst[@name='highlighting']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document <em>one</em>'",
+ "count(//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/*)=0");
+ }
+
+ public void testDefaultSummary() {
+ assertQ("null snippet test",
+ req("q", "text:one OR *:*", "sort", "id asc", "hl", "true", "hl.defaultSummary", "true"),
+ "count(//lst[@name='highlighting']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document <em>one</em>'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second document'");
+ }
+
+ public void testDifferentField() {
+ assertQ("highlighting text3",
+ req("q", "text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text3"),
+ "count(//lst[@name='highlighting']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy <em>document</em>'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier <em>document</em>'");
+ }
+
+ public void testTwoFields() {
+ assertQ("highlighting text and text3",
+ req("q", "text:document text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text,text3"),
+ "count(//lst[@name='highlighting']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy <em>document</em>'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier <em>document</em>'");
+ }
+
+ public void testTags() {
+ assertQ("different pre/post tags",
+ req("q", "text:document", "sort", "id asc", "hl", "true", "hl.tag.pre", "[", "hl.tag.post", "]"),
+ "count(//lst[@name='highlighting']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
+ }
+
+ public void testUsingSimplePrePostTags() {
+ assertQ("different pre/post tags",
+ req("q", "text:document", "sort", "id asc", "hl", "true", "hl.simple.pre", "[", "hl.simple.post", "]"),
+ "count(//lst[@name='highlighting']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
+ }
+
+ public void testUsingSimplePrePostTagsPerField() {
+ assertQ("different pre/post tags",
+ req("q", "text:document", "sort", "id asc", "hl", "true", "f.text.hl.simple.pre", "[", "f.text.hl.simple.post", "]"),
+ "count(//lst[@name='highlighting']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
+ }
+
+ public void testTagsPerField() {
+ assertQ("highlighting text and text3",
+ req("q", "text:document text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text,text3", "f.text3.hl.tag.pre", "[", "f.text3.hl.tag.post", "]"),
+ "count(//lst[@name='highlighting']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy [document]'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier [document]'");
+ }
+
+ public void testBreakIterator() {
+ assertQ("different breakiterator",
+ req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WORD"),
+ "count(//lst[@name='highlighting']/*)=2",
+ "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em>'",
+ "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='<em>document</em>'");
+ }
+
+ public void testBreakIterator2() {
+ assertU(adoc("text", "Document one has a first sentence. Document two has a second sentence.", "id", "103"));
+ assertU(commit());
+ assertQ("different breakiterator",
+ req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WHOLE"),
+ "//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first sentence. <em>Document</em> two has a second sentence.'");
+ }
+
+ public void testEncoder() {
+ assertU(adoc("text", "Document one has a first <i>sentence</i>.", "id", "103"));
+ assertU(commit());
+ assertQ("html escaped",
+ req("q", "text:document", "sort", "id asc", "hl", "true", "hl.encoder", "html"),
+ "//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4314c51c/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
index c0d40aa..fd752bf 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java
@@ -21,62 +21,76 @@ package org.apache.solr.common.params;
* @since solr 1.3
*/
public interface HighlightParams {
+ // primary
public static final String HIGHLIGHT = "hl";
- public static final String Q = HIGHLIGHT+".q";
- public static final String QPARSER = HIGHLIGHT+".qparser";
+ public static final String METHOD = HIGHLIGHT+".method"; // original|fastVector|postings|unified
+ @Deprecated // see hl.method
+ public static final String USE_FVH = HIGHLIGHT + ".useFastVectorHighlighter";
public static final String FIELDS = HIGHLIGHT+".fl";
public static final String SNIPPETS = HIGHLIGHT+".snippets";
- public static final String FRAGSIZE = HIGHLIGHT+".fragsize";
- public static final String INCREMENT = HIGHLIGHT+".increment";
- public static final String MAX_CHARS = HIGHLIGHT+".maxAnalyzedChars";
- public static final String FORMATTER = HIGHLIGHT+".formatter";
- public static final String ENCODER = HIGHLIGHT+".encoder";
- public static final String FRAGMENTER = HIGHLIGHT+".fragmenter";
- public static final String PRESERVE_MULTI = HIGHLIGHT+".preserveMulti";
- public static final String FRAG_LIST_BUILDER = HIGHLIGHT+".fragListBuilder";
- public static final String FRAGMENTS_BUILDER = HIGHLIGHT+".fragmentsBuilder";
- public static final String BOUNDARY_SCANNER = HIGHLIGHT+".boundaryScanner";
- public static final String BS_MAX_SCAN = HIGHLIGHT+".bs.maxScan";
- public static final String BS_CHARS = HIGHLIGHT+".bs.chars";
- public static final String BS_TYPE = HIGHLIGHT+".bs.type";
- public static final String BS_LANGUAGE = HIGHLIGHT+".bs.language";
- public static final String BS_COUNTRY = HIGHLIGHT+".bs.country";
- public static final String BS_VARIANT = HIGHLIGHT+".bs.variant";
- public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch";
- public static final String DEFAULT_SUMMARY = HIGHLIGHT + ".defaultSummary";
- public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
- public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength";
- public static final String HIGHLIGHT_ALTERNATE = HIGHLIGHT+".highlightAlternate";
- public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine";
- public static final String MAX_MULTIVALUED_TO_MATCH = HIGHLIGHT + ".maxMultiValuedToMatch";
-
- public static final String USE_PHRASE_HIGHLIGHTER = HIGHLIGHT+".usePhraseHighlighter";
- public static final String HIGHLIGHT_MULTI_TERM = HIGHLIGHT+".highlightMultiTerm";
- public static final String PAYLOADS = HIGHLIGHT+".payloads";
- public static final String MERGE_CONTIGUOUS_FRAGMENTS = HIGHLIGHT + ".mergeContiguous";
+ // KEY:
+ // OH = (original) Highlighter (AKA the standard Highlighter)
+ // FVH = FastVectorHighlighter
+ // PH = PostingsHighlighter
+ // UH = UnifiedHighlighter
- public static final String USE_FVH = HIGHLIGHT + ".useFastVectorHighlighter";
- public static final String TAG_PRE = HIGHLIGHT + ".tag.pre";
- public static final String TAG_POST = HIGHLIGHT + ".tag.post";
- public static final String TAG_ELLIPSIS = HIGHLIGHT + ".tag.ellipsis";
- public static final String PHRASE_LIMIT = HIGHLIGHT + ".phraseLimit";
- public static final String MULTI_VALUED_SEPARATOR = HIGHLIGHT + ".multiValuedSeparatorChar";
-
- // Formatter
- public static final String SIMPLE = "simple";
- public static final String SIMPLE_PRE = HIGHLIGHT+"."+SIMPLE+".pre";
- public static final String SIMPLE_POST = HIGHLIGHT+"."+SIMPLE+".post";
+ // query interpretation
+ public static final String Q = HIGHLIGHT+".q"; // all
+ public static final String QPARSER = HIGHLIGHT+".qparser"; // all
+ public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch"; // OH, FVH
+ public static final String USE_PHRASE_HIGHLIGHTER = HIGHLIGHT+".usePhraseHighlighter"; // OH, FVH, UH
+ public static final String HIGHLIGHT_MULTI_TERM = HIGHLIGHT+".highlightMultiTerm"; // all
- // Regex fragmenter
- public static final String REGEX = "regex";
- public static final String SLOP = HIGHLIGHT+"."+REGEX+".slop";
- public static final String PATTERN = HIGHLIGHT+"."+REGEX+".pattern";
- public static final String MAX_RE_CHARS = HIGHLIGHT+"."+REGEX+".maxAnalyzedChars";
-
- // Scoring parameters
- public static final String SCORE = "score";
- public static final String SCORE_K1 = HIGHLIGHT +"."+SCORE+".k1";
- public static final String SCORE_B = HIGHLIGHT +"."+SCORE+".b";
- public static final String SCORE_PIVOT = HIGHLIGHT +"."+SCORE+".pivot";
+ // if no snippets...
+ public static final String DEFAULT_SUMMARY = HIGHLIGHT + ".defaultSummary"; // UH, PH
+ public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField"; // OH, FVH
+ public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength"; // OH, FVH
+ public static final String HIGHLIGHT_ALTERNATE = HIGHLIGHT+".highlightAlternate"; // OH, FVH
+
+ // sizing
+ public static final String FRAGSIZE = HIGHLIGHT+".fragsize"; // OH, FVH
+ public static final String FRAGMENTER = HIGHLIGHT+".fragmenter"; // OH
+ public static final String INCREMENT = HIGHLIGHT+".increment"; // OH
+ public static final String REGEX = "regex"; // OH
+ public static final String SLOP = HIGHLIGHT+"."+REGEX+".slop"; // OH
+ public static final String PATTERN = HIGHLIGHT+"."+REGEX+".pattern"; // OH
+ public static final String MAX_RE_CHARS= HIGHLIGHT+"."+REGEX+".maxAnalyzedChars"; // OH
+ public static final String BOUNDARY_SCANNER = HIGHLIGHT+".boundaryScanner"; // FVH
+ public static final String BS_MAX_SCAN = HIGHLIGHT+".bs.maxScan"; // FVH
+ public static final String BS_CHARS = HIGHLIGHT+".bs.chars"; // FVH
+ public static final String BS_TYPE = HIGHLIGHT+".bs.type"; // FVH, UH, PH
+ public static final String BS_LANGUAGE = HIGHLIGHT+".bs.language"; // FVH, UH, PH
+ public static final String BS_COUNTRY = HIGHLIGHT+".bs.country"; // FVH, UH, PH
+ public static final String BS_VARIANT = HIGHLIGHT+".bs.variant"; // FVH, UH, PH
+
+ // formatting
+ public static final String FORMATTER = HIGHLIGHT+".formatter"; // OH
+ public static final String ENCODER = HIGHLIGHT+".encoder"; // OH, (UH, PH limited)
+ public static final String MERGE_CONTIGUOUS_FRAGMENTS = HIGHLIGHT + ".mergeContiguous"; // OH
+ public static final String SIMPLE = "simple"; // OH
+ public static final String SIMPLE_PRE = HIGHLIGHT+"."+SIMPLE+".pre"; // OH
+ public static final String SIMPLE_POST = HIGHLIGHT+"."+SIMPLE+".post"; // OH
+ public static final String FRAGMENTS_BUILDER = HIGHLIGHT+".fragmentsBuilder"; // FVH
+ public static final String TAG_PRE = HIGHLIGHT + ".tag.pre"; // FVH, UH, PH
+ public static final String TAG_POST = HIGHLIGHT + ".tag.post"; // FVH, UH, PH
+ public static final String TAG_ELLIPSIS= HIGHLIGHT + ".tag.ellipsis"; // FVH, UH, PH
+ public static final String MULTI_VALUED_SEPARATOR = HIGHLIGHT + ".multiValuedSeparatorChar"; // FVH, PH
+
+ // ordering
+ public static final String PRESERVE_MULTI = HIGHLIGHT+".preserveMulti"; // OH
+ public static final String FRAG_LIST_BUILDER = HIGHLIGHT+".fragListBuilder"; // FVH
+ public static final String SCORE = "score"; // UH, PH
+ public static final String SCORE_K1 = HIGHLIGHT +"."+SCORE+".k1"; // UH, PH
+ public static final String SCORE_B = HIGHLIGHT +"."+SCORE+".b"; // UH, PH
+ public static final String SCORE_PIVOT = HIGHLIGHT +"."+SCORE+".pivot"; // UH, PH
+
+ // misc
+ public static final String MAX_CHARS = HIGHLIGHT+".maxAnalyzedChars"; // all
+ public static final String PAYLOADS = HIGHLIGHT+".payloads"; // OH
+ public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine"; // OH
+ public static final String MAX_MULTIVALUED_TO_MATCH = HIGHLIGHT + ".maxMultiValuedToMatch"; // OH
+ public static final String PHRASE_LIMIT = HIGHLIGHT + ".phraseLimit"; // FVH
+ public static final String OFFSET_SOURCE = HIGHLIGHT + ".offsetSource"; // UH
+ public static final String CACHE_FIELD_VAL_CHARS_THRESHOLD = HIGHLIGHT + ".cacheFieldValCharsThreshold"; // UH
}