You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2020/02/11 07:22:11 UTC
[lucene-solr] branch branch_8x updated: SOLR-14194: Highlighters
now supports docValues for the uniqueKey and the original highlighter can
highlight docValues.
This is an automated email from the ASF dual-hosted git repository.
dsmiley pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 8d95df3 SOLR-14194: Highlighters now supports docValues for the uniqueKey and the original highlighter can highlight docValues.
8d95df3 is described below
commit 8d95df364fe028b9803bfbc8c96e004c5bc442cf
Author: David Smiley <ds...@salesforce.com>
AuthorDate: Tue Feb 11 02:18:08 2020 -0500
SOLR-14194: Highlighters now supports docValues for the uniqueKey
and the original highlighter can highlight docValues.
(cherry picked from commit 9a4f7661e96416e3aac3e48f1108422ab4184473)
---
solr/CHANGES.txt | 3 +
.../handler/component/TermVectorComponent.java | 12 +---
.../solr/highlight/DefaultSolrHighlighter.java | 73 +++++++++++++---------
.../solr/highlight/UnifiedSolrHighlighter.java | 11 ++--
.../java/org/apache/solr/schema/IndexSchema.java | 13 ++++
.../collection1/conf/schema-unifiedhighlight.xml | 2 +-
.../highlight/HighlighterWithoutStoredIdTest.java | 36 +++++++++++
.../solr/highlight/TestUnifiedSolrHighlighter.java | 2 +
.../TestUnifiedSolrHighlighterWithoutStoredId.java | 37 +++++++++++
solr/solr-ref-guide/src/highlighting.adoc | 1 +
10 files changed, 143 insertions(+), 47 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 21b90e8..a0ff476 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -68,6 +68,9 @@ Improvements
* SOLR-14245: Validate Replica / ReplicaInfo on creation. (ab)
+* SOLR-14194: Highlighting now works when the uniqueKey field is not stored but has docValues. And the original
+ highlighter can now highlight text fields from docValues. (Andrzej Wislowski, David Smiley)
+
Optimizations
---------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
index 8d7617e..5a18ee4 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
@@ -27,7 +27,6 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.PostingsEnum;
@@ -272,16 +271,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
if (keyField != null) {
// guaranteed to be one and only one since this is uniqueKey!
SolrDocument solrDoc = docFetcher.solrDoc(docId, srf);
-
- String uKey = null;
- Object val = solrDoc.getFieldValue(uniqFieldName);
- if (val != null) {
- if (val instanceof StoredField) {
- uKey = ((StoredField) val).stringValue();
- } else {
- uKey = val.toString();
- }
- }
+ String uKey = schema.printableUniqueKey(solrDoc);
assert null != uKey;
docNL.add("uniqueKey", uKey);
termVectors.add(uKey, docNL);
diff --git a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
index 088170d..8a60026 100644
--- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
@@ -34,7 +34,6 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.document.Document;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
@@ -63,6 +62,7 @@ import org.apache.lucene.search.vectorhighlight.FieldQuery;
import org.apache.lucene.search.vectorhighlight.FragListBuilder;
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.lucene.util.AttributeSource.State;
+import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.MapSolrParams;
@@ -73,11 +73,13 @@ import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.HighlightComponent;
import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.SolrReturnFields;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -445,10 +447,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
String[] fieldNames = getHighlightFields(query, req, defaultFields);
Set<String> preFetchFieldNames = getDocPrefetchFieldNames(fieldNames, req);
+ SolrReturnFields returnFields;
if (preFetchFieldNames != null) {
preFetchFieldNames.add(keyField.getName());
+ returnFields = new SolrReturnFields(preFetchFieldNames.toArray(new String[0]), req);
+ } else {
+ returnFields = new SolrReturnFields(new String[0], req);
}
-
FvhContainer fvhContainer = new FvhContainer(null, null); // Lazy container for fvh and fieldQuery
IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getSlowAtomicReader()); // SOLR-5855
@@ -458,7 +463,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
DocIterator iterator = docs.iterator();
for (int i = 0; i < docs.size(); i++) {
int docId = iterator.nextDoc();
- Document doc = searcher.doc(docId, preFetchFieldNames);
+ SolrDocument doc = searcher.getDocFetcher().solrDoc(docId, returnFields);
@SuppressWarnings("rawtypes")
NamedList docHighlights = new SimpleOrderedMap();
@@ -482,9 +487,9 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
return fragments;
}
- protected Object doHighlightingOfField(Document doc, int docId, SchemaField schemaField,
- FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req,
- SolrParams params) throws IOException {
+ protected Object doHighlightingOfField(SolrDocument doc, int docId, SchemaField schemaField,
+ FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req,
+ SolrParams params) throws IOException {
Object fieldHighlights;
if (schemaField == null) {
fieldHighlights = null;
@@ -527,12 +532,20 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
return fieldHighlights;
}
- /** Returns the field names to be passed to {@link SolrIndexSearcher#doc(int, Set)}.
+ /**
+ * Returns the field names to be passed to {@link org.apache.solr.search.SolrDocumentFetcher#solrDoc(int, SolrReturnFields)}.
* Subclasses might over-ride to include fields in search-results and other stored field values needed so as to avoid
- * the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null. */
+ * the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null.
+ */
protected Set<String> getDocPrefetchFieldNames(String[] hlFieldNames, SolrQueryRequest req) {
Set<String> preFetchFieldNames = new HashSet<>(hlFieldNames.length + 1);//+1 for uniqueyKey added after
Collections.addAll(preFetchFieldNames, hlFieldNames);
+ for (String hlFieldName : hlFieldNames) {
+ String alternateField = req.getParams().getFieldParam(hlFieldName, HighlightParams.ALTERNATE_FIELD);
+ if (alternateField != null) {
+ preFetchFieldNames.add(alternateField);
+ }
+ }
return preFetchFieldNames;
}
@@ -555,7 +568,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
/** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */
@SuppressWarnings("unchecked")
- protected Object doHighlightingByFastVectorHighlighter(Document doc, int docId,
+ protected Object doHighlightingByFastVectorHighlighter(SolrDocument doc, int docId,
SchemaField schemaField, FvhContainer fvhContainer,
IndexReader reader, SolrQueryRequest req) throws IOException {
SolrParams params = req.getParams();
@@ -577,7 +590,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
/** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */
@SuppressWarnings("unchecked")
- protected Object doHighlightingByHighlighter(Document doc, int docId, SchemaField schemaField, Query query,
+ protected Object doHighlightingByHighlighter(SolrDocument doc, int docId, SchemaField schemaField, Query query,
IndexReader reader, SolrQueryRequest req) throws IOException {
final SolrParams params = req.getParams();
final String fieldName = schemaField.getName();
@@ -709,18 +722,25 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
/** Fetches field values to highlight. If the field value should come from an atypical place (or another aliased
* field name, then a subclass could override to implement that.
*/
- protected List<String> getFieldValues(Document doc, String fieldName, int maxValues, int maxCharsToAnalyze,
+ protected List<String> getFieldValues(SolrDocument doc, String fieldName, int maxValues, int maxCharsToAnalyze,
SolrQueryRequest req) {
// Collect the Fields we will examine (could be more than one if multi-valued)
+ Collection<Object> fieldValues = doc.getFieldValues(fieldName);
+ if (fieldValues == null) {
+ return Collections.emptyList();
+ }
+ FieldType fieldType = req.getSchema().getFieldType(fieldName);
List<String> result = new ArrayList<>();
- for (IndexableField thisField : doc.getFields()) {
- if (! thisField.name().equals(fieldName)) {
- continue;
+ for (Object value : fieldValues) {
+ String strValue;
+ if (value instanceof IndexableField) {
+ strValue = fieldType.toExternal((IndexableField)value);
+ } else {
+ strValue = value.toString(); // TODO FieldType needs an API for this, e.g. toExternalFromDv()
}
- String value = thisField.stringValue();
- result.add(value);
+ result.add(strValue);
- maxCharsToAnalyze -= value.length();//we exit early if we'll never get to analyze the value
+ maxCharsToAnalyze -= strValue.length();//we exit early if we'll never get to analyze the value
maxValues--;
if (maxValues <= 0 || maxCharsToAnalyze <= 0) {
break;
@@ -743,7 +763,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
/** Returns the alternate highlight object for this field -- a String[] by default. Null if none. */
@SuppressWarnings("unchecked")
- protected Object alternateField(Document doc, int docId, String fieldName, FvhContainer fvhContainer, Query query,
+ protected Object alternateField(SolrDocument doc, int docId, String fieldName, FvhContainer fvhContainer, Query query,
IndexReader reader, SolrQueryRequest req) throws IOException {
IndexSchema schema = req.getSearcher().getSchema();
SolrParams params = req.getParams();
@@ -775,20 +795,15 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
// Fallback to static non-highlighted
- IndexableField[] docFields = doc.getFields(alternateField);
- if (docFields.length == 0) {
+ List<String> listFields = getFieldValues(doc, alternateField, Integer.MAX_VALUE, Integer.MAX_VALUE, req);
+ if (listFields.isEmpty()) {
// The alternate field did not exist, treat the original field as fallback instead
- docFields = doc.getFields(fieldName);
- }
- List<String> listFields = new ArrayList<>();
- for (IndexableField field : docFields) {
- if (field.binaryValue() == null)
- listFields.add(field.stringValue());
+ listFields = getFieldValues(doc, fieldName, Integer.MAX_VALUE, Integer.MAX_VALUE, req);
+ if (listFields.isEmpty()) {
+ return null;
+ }
}
- if (listFields.isEmpty()) {
- return null;
- }
String[] altTexts = listFields.toArray(new String[listFields.size()]);
Encoder encoder = getEncoder(fieldName, params);
diff --git a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
index 9063214..b226663 100644
--- a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
@@ -18,7 +18,6 @@ package org.apache.solr.highlight;
import java.io.IOException;
import java.text.BreakIterator;
-import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Locale;
@@ -26,7 +25,6 @@ import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
-import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
@@ -37,6 +35,7 @@ import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.apache.lucene.search.uhighlight.PassageScorer;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
import org.apache.lucene.search.uhighlight.WholeBreakIterator;
+import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.SolrParams;
@@ -49,6 +48,7 @@ import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.SolrReturnFields;
import org.apache.solr.util.RTimerTree;
import org.apache.solr.util.plugin.PluginInfoInitialized;
@@ -210,13 +210,12 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf
IndexSchema schema = searcher.getSchema();
SchemaField keyField = schema.getUniqueKeyField();
if (keyField != null) {
- Set<String> selector = Collections.singleton(keyField.getName());
+ SolrReturnFields returnFields = new SolrReturnFields(keyField.getName(), null);
String[] uniqueKeys = new String[docIDs.length];
for (int i = 0; i < docIDs.length; i++) {
int docid = docIDs[i];
- Document doc = searcher.doc(docid, selector);
- String id = schema.printableUniqueKey(doc);
- uniqueKeys[i] = id;
+ SolrDocument solrDoc = searcher.getDocFetcher().solrDoc(docid, returnFields);
+ uniqueKeys[i] = schema.printableUniqueKey(solrDoc);
}
return uniqueKeys;
} else {
diff --git a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
index 5f213d3..6bd2e79 100644
--- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
+++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
@@ -50,6 +50,7 @@ import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Version;
import org.apache.solr.common.MapSerializable;
+import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
@@ -339,6 +340,18 @@ public class IndexSchema {
return f==null ? null : uniqueKeyFieldType.toExternal(f);
}
+ /** Like {@link #printableUniqueKey(org.apache.lucene.document.Document)} */
+ public String printableUniqueKey(SolrDocument solrDoc) {
+ Object val = solrDoc.getFieldValue(uniqueKeyFieldName);
+ if (val == null) {
+ return null;
+ } else if (val instanceof IndexableField) {
+ return uniqueKeyFieldType.toExternal((IndexableField) val);
+ } else {
+ return val.toString();
+ }
+ }
+
private SchemaField getIndexedField(String fname) {
SchemaField f = getFields().get(fname);
if (f==null) {
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml b/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml
index 0c9a083..90b7c52 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml
@@ -36,7 +36,7 @@
</analyzer>
</fieldType>
- <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
+ <field name="id" type="string" indexed="true" stored="${solr.tests.id.stored:true}" multiValued="false" docValues="${solr.tests.id.docValues:false}" required="false"/>
<field name="text" type="text_offsets" indexed="true" stored="true"/>
<field name="text2" type="text" indexed="true" stored="true"/>
<field name="text3" type="text_offsets" indexed="true" stored="true" large="true"/>
diff --git a/solr/core/src/test/org/apache/solr/highlight/HighlighterWithoutStoredIdTest.java b/solr/core/src/test/org/apache/solr/highlight/HighlighterWithoutStoredIdTest.java
new file mode 100644
index 0000000..97df83b
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/highlight/HighlighterWithoutStoredIdTest.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.highlight;
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class HighlighterWithoutStoredIdTest extends HighlighterTest {
+
+ @BeforeClass
+ public static void beforeClassProps() {
+ System.setProperty("solr.tests.id.stored", "false");
+ System.setProperty("solr.tests.id.docValues", "true");
+ }
+
+ @AfterClass
+ public static void afterClassProps() {
+ System.clearProperty("solr.tests.id.stored");
+ System.clearProperty("solr.tests.id.docValues");
+ }
+
+}
diff --git a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
index f6418cb..9d890b1 100644
--- a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
+++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
@@ -45,6 +45,8 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
System.clearProperty("filterCache.enabled");
System.clearProperty("queryResultCache.enabled");
System.clearProperty("documentCache.enabled");
+ System.clearProperty("solr.tests.id.stored");
+ System.clearProperty("solr.tests.id.docValues");
}
@Override
diff --git a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighterWithoutStoredId.java b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighterWithoutStoredId.java
new file mode 100644
index 0000000..e7fb6cd
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighterWithoutStoredId.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.highlight;
+
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+/** Tests for the UnifiedHighlighter Solr plugin **/
+public class TestUnifiedSolrHighlighterWithoutStoredId extends TestUnifiedSolrHighlighter {
+
+ @BeforeClass
+ public static void beforeClassProps() {
+ System.setProperty("solr.tests.id.stored", "false");
+ System.setProperty("solr.tests.id.docValues", "true");
+ }
+
+ @AfterClass
+ public static void afterClassProps() {
+ System.clearProperty("solr.tests.id.stored");
+ System.clearProperty("solr.tests.id.docValues");
+ }
+}
diff --git a/solr/solr-ref-guide/src/highlighting.adoc b/solr/solr-ref-guide/src/highlighting.adoc
index ab6b5a6..cdbc873 100644
--- a/solr/solr-ref-guide/src/highlighting.adoc
+++ b/solr/solr-ref-guide/src/highlighting.adoc
@@ -173,6 +173,7 @@ The Original Highlighter, sometimes called the "Standard Highlighter" or "Defaul
Its query accuracy is good enough for most needs, although it's not quite as good/perfect as the Unified Highlighter.
+
The Original Highlighter will normally analyze stored text on the fly in order to highlight. It will use full term vectors if available.
+If the text isn't "stored" but is in doc values (`docValues="true"`), this highlighter can work with it.
+
Where this highlighter falls short is performance; it's often twice as slow as the Unified Highlighter. And despite being the most customizable, it doesn't have a BreakIterator based fragmenter (all the others do), which could pose a challenge for some languages.