You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by cp...@apache.org on 2023/09/15 11:32:46 UTC
[solr] branch main updated: SOLR-15440: FieldValueFeature now uses DocValues when docValues=true and stored=true are combined. (#1883)
This is an automated email from the ASF dual-hosted git repository.
cpoerschke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new a52c849e9d4 SOLR-15440: FieldValueFeature now uses DocValues when docValues=true and stored=true are combined. (#1883)
a52c849e9d4 is described below
commit a52c849e9d4a17452eea29ee224fd0ac2af669c9
Author: Christine Poerschke <cp...@apache.org>
AuthorDate: Fri Sep 15 12:32:41 2023 +0100
SOLR-15440: FieldValueFeature now uses DocValues when docValues=true and stored=true are combined. (#1883)
Co-authored-by: tomglk <>
---
solr/CHANGES.txt | 2 +
.../apache/solr/ltr/feature/FieldValueFeature.java | 20 ++--
.../solr/ltr/feature/LegacyFieldValueFeature.java | 58 +++++++++++
.../test-files/solr/collection1/conf/schema.xml | 2 +
.../solr/ltr/feature/TestFieldValueFeature.java | 116 +++++++++++++--------
.../ltr/feature/TestLegacyFieldValueFeature.java | 84 +++++++++++++++
.../pages/major-changes-in-solr-9.adoc | 3 +
7 files changed, 234 insertions(+), 51 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 67ddcdd211d..138ca224e11 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -132,6 +132,8 @@ Improvements
* SOLR-16950: SimpleTracer propagation for manual transaction ids
+* SOLR-15440: The Learning To Rank FieldValueFeature now uses DocValues when docValues=true and stored=true are combined. (Christine Poerschke, Tom Gilke)
+
Optimizations
---------------------
diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index 65980018067..bab34fa8d3e 100644
--- a/solr/modules/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -56,18 +56,22 @@ import org.apache.solr.search.SolrIndexSearcher;
* <p>There are 4 different types of FeatureScorers that a FieldValueFeatureWeight may use. The
* chosen scorer depends on the field attributes.
*
- * <p>FieldValueFeatureScorer (FVFS): used for stored=true, no matter if docValues=true or
- * docValues=false
+ * <p>FieldValueFeatureScorer (FVFS): used for stored=true if docValues=false
*
- * <p>NumericDocValuesFVFS: used for stored=false and docValues=true, if docValueType == NUMERIC
+ * <p>NumericDocValuesFVFS: used for docValues=true, if docValueType == NUMERIC
*
- * <p>SortedDocValuesFVFS: used for stored=false and docValues=true, if docValueType == SORTED
+ * <p>SortedDocValuesFVFS: used for docValues=true, if docValueType == SORTED
*
- * <p>DefaultValueFVFS: used for stored=false and docValues=true, a fallback scorer that is used on
- * segments where no document has a value set in the field of this feature
+ * <p>DefaultValueFVFS: used for docValues=true, a fallback scorer that is used on segments where no
+ * document has a value set in the field of this feature
+ *
+ * <p>Use {@link LegacyFieldValueFeature} for the pre 9.4 behaviour of not using DocValues when
+ * docValues=true is combined with stored=true.
*/
public class FieldValueFeature extends Feature {
+ protected boolean useDocValuesForStored = true;
+
private String field;
private Set<String> fieldAsSet;
@@ -134,7 +138,9 @@ public class FieldValueFeature extends Feature {
*/
@Override
public FeatureScorer scorer(LeafReaderContext context) throws IOException {
- if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) {
+ if (schemaField != null
+ && (!schemaField.stored() || useDocValuesForStored)
+ && schemaField.hasDocValues()) {
final FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
final DocValuesType docValuesType =
diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/feature/LegacyFieldValueFeature.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/feature/LegacyFieldValueFeature.java
new file mode 100644
index 00000000000..868059f6cc3
--- /dev/null
+++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/feature/LegacyFieldValueFeature.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.ltr.feature;
+
+import java.util.Map;
+
+/**
+ * This feature returns the value of a field in the current document. The field must have
+ * stored="true" or docValues="true" properties. Example configuration:
+ *
+ * <pre>
+ * {
+ * "name": "rawHits",
+ * "class": "org.apache.solr.ltr.feature.LegacyFieldValueFeature",
+ * "params": {
+ * "field": "hits"
+ * }
+ * }
+ * </pre>
+ *
+ * <p>There are 4 different types of FeatureScorers that a FieldValueFeatureWeight may use. The
+ * chosen scorer depends on the field attributes.
+ *
+ * <p>FieldValueFeatureScorer (FVFS): used for stored=true, no matter if docValues=true or
+ * docValues=false
+ *
+ * <p>NumericDocValuesFVFS: used for stored=false and docValues=true, if docValueType == NUMERIC
+ *
+ * <p>SortedDocValuesFVFS: used for stored=false and docValues=true, if docValueType == SORTED
+ *
+ * <p>DefaultValueFVFS: used for stored=false and docValues=true, a fallback scorer that is used on
+ * segments where no document has a value set in the field of this feature
+ *
+ * <p>Matches {@link FieldValueFeature} behaviour prior to 9.4 i.e. DocValues are not used when
+ * docValues=true is combined with stored=true.
+ */
+@Deprecated(since = "9.4")
+public class LegacyFieldValueFeature extends FieldValueFeature {
+
+ public LegacyFieldValueFeature(String name, Map<String, Object> params) {
+ super(name, params);
+ this.useDocValuesForStored = false;
+ }
+}
diff --git a/solr/modules/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/modules/ltr/src/test-files/solr/collection1/conf/schema.xml
index 04f5c8e22f8..8291b9bc7d6 100644
--- a/solr/modules/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/modules/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -35,6 +35,7 @@
<field name="normHits" type="float" indexed="true" stored="true" />
<field name="isTrendy" type="boolean" indexed="true" stored="true" />
+ <field name="storedDvIsTrendy" type="boolean" indexed="true" stored="true" docValues="true"/>
<field name="dvIsTrendy" type="boolean" indexed="true" stored="false" docValues="true"/>
<field name="dvIntField" type="int" indexed="false" docValues="true" stored="false" default="-1" multiValued="false"/>
@@ -67,6 +68,7 @@
<copyField source="popularity" dest="dvDoublePopularity"/>
<copyField source="isTrendy" dest="dvIsTrendy"/>
+ <copyField source="isTrendy" dest="storedDvIsTrendy"/>
<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
diff --git a/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
index 5f102fb67a1..8bed4efe7ed 100644
--- a/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
+++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
@@ -47,9 +47,18 @@ public class TestFieldValueFeature extends TestRerankBase {
"dvDoublePopularity",
"dvStringPopularity",
"isTrendy",
- "dvIsTrendy"
+ "dvIsTrendy",
+ "storedDvIsTrendy"
};
+ protected String getFieldValueFeatureClassName() {
+ return FieldValueFeature.class.getName();
+ }
+
+ protected String getObservingFieldValueFeatureClassName() {
+ return ObservingFieldValueFeature.class.getName();
+ }
+
@Before
public void before() throws Exception {
setuptest(false);
@@ -178,7 +187,7 @@ public class TestFieldValueFeature extends TestRerankBase {
assertU(commit());
for (String field : FIELDS) {
- loadFeature(field, FieldValueFeature.class.getName(), "{\"field\":\"" + field + "\"}");
+ loadFeature(field, getFieldValueFeatureClassName(), "{\"field\":\"" + field + "\"}");
}
loadModel(
"model",
@@ -186,7 +195,7 @@ public class TestFieldValueFeature extends TestRerankBase {
FIELDS,
"{\"weights\":{\"popularity\":1.0,\"dvIntPopularity\":1.0,\"dvLongPopularity\":1.0,"
+ "\"dvFloatPopularity\":1.0,\"dvDoublePopularity\":1.0,"
- + "\"dvStringPopularity\":1.0,\"isTrendy\":1.0,\"dvIsTrendy\":1.0}}");
+ + "\"dvStringPopularity\":1.0,\"isTrendy\":1.0,\"dvIsTrendy\":1.0,\"storedDvIsTrendy\":1.0}}");
}
@After
@@ -253,7 +262,7 @@ public class TestFieldValueFeature extends TestRerankBase {
"/query" + query.toQueryString(),
"/response/docs/[0]/=={'[fv]':'popularity=0.0,dvIntPopularity=0.0,dvLongPopularity=0.0,"
+ "dvFloatPopularity=0.0,dvDoublePopularity=0.0,"
- + "dvStringPopularity=0.0,isTrendy=0.0,dvIsTrendy=0.0'}");
+ + "dvStringPopularity=0.0,isTrendy=0.0,dvIsTrendy=0.0,storedDvIsTrendy=0.0'}");
}
@Test
@@ -263,7 +272,7 @@ public class TestFieldValueFeature extends TestRerankBase {
loadFeature(
field + "42",
- FieldValueFeature.class.getName(),
+ getFieldValueFeatureClassName(),
fstore,
"{\"field\":\"" + field + "\",\"defaultValue\":\"42.0\"}");
@@ -310,8 +319,7 @@ public class TestFieldValueFeature extends TestRerankBase {
assertU(adoc("id", "21"));
assertU(commit());
- loadFeature(
- field, FieldValueFeature.class.getName(), fstore, "{\"field\":\"" + field + "\"}");
+ loadFeature(field, getFieldValueFeatureClassName(), fstore, "{\"field\":\"" + field + "\"}");
loadModel(
field + "-model",
LinearModel.class.getName(),
@@ -339,7 +347,7 @@ public class TestFieldValueFeature extends TestRerankBase {
final String fstore = "testThatFieldValueFeatureScorerIsUsedAndDefaultIsReturned";
loadFeature(
"not-existing-field",
- ObservingFieldValueFeature.class.getName(),
+ getObservingFieldValueFeatureClassName(),
fstore,
"{\"field\":\"cowabunga\"}");
@@ -375,10 +383,7 @@ public class TestFieldValueFeature extends TestRerankBase {
final String fstore = "testThatDefaultFieldValueScorerIsUsedAndDefaultIsReturned" + field;
loadFeature(
- field,
- ObservingFieldValueFeature.class.getName(),
- fstore,
- "{\"field\":\"" + field + "\"}");
+ field, getObservingFieldValueFeatureClassName(), fstore, "{\"field\":\"" + field + "\"}");
loadModel(
field + "-model",
@@ -415,11 +420,16 @@ public class TestFieldValueFeature extends TestRerankBase {
implTestBooleanValue("dvIsTrendy");
}
+ @Test
+ public void testBooleanValue_stored_docValues() throws Exception {
+ implTestBooleanValue("storedDvIsTrendy");
+ }
+
private void implTestBooleanValue(String isTrendyFieldName) throws Exception {
final String fstore = "test_boolean_store";
loadFeature(
"trendy",
- FieldValueFeature.class.getName(),
+ getFieldValueFeatureClassName(),
fstore,
"{\"field\":\"" + isTrendyFieldName + "\"}");
@@ -473,7 +483,7 @@ public class TestFieldValueFeature extends TestRerankBase {
loadFeature(
"dvStringPopularities",
- FieldValueFeature.class.getName(),
+ getFieldValueFeatureClassName(),
fstore,
"{\"field\":\"dvStringPopularities\"}");
@@ -493,6 +503,10 @@ public class TestFieldValueFeature extends TestRerankBase {
"/error/msg/=='java.lang.IllegalArgumentException: Doc values type SORTED_SET of field dvStringPopularities is not supported'");
}
+ protected String storedDvIsTrendy_FieldValueFeatureScorer_className() {
+ return SortedDocValuesFieldValueFeatureScorer.class.getName();
+ }
+
@Test
public void testThatCorrectFieldValueFeatureIsUsedForDocValueTypes() throws Exception {
final String[][] fieldsWithDifferentTypes = {
@@ -502,6 +516,8 @@ public class TestFieldValueFeature extends TestRerankBase {
new String[] {
"dvStringPopularity", "T", SortedDocValuesFieldValueFeatureScorer.class.getName()
},
+ new String[] {"dvIsTrendy", "1", SortedDocValuesFieldValueFeatureScorer.class.getName()},
+ new String[] {"storedDvIsTrendy", "1", storedDvIsTrendy_FieldValueFeatureScorer_className()},
new String[] {"noDvFloatField", "1", FieldValueFeatureScorer.class.getName()},
new String[] {"noDvStrNumField", "T", FieldValueFeatureScorer.class.getName()}
};
@@ -510,37 +526,51 @@ public class TestFieldValueFeature extends TestRerankBase {
final String field = fieldAndScorerClass[0];
final String fieldValue = fieldAndScorerClass[1];
final String fstore = "testThatCorrectFieldValueFeatureIsUsedForDocValueTypes" + field;
+ final String modelName = field + "-model";
- assertU(adoc("id", "21", field, fieldValue));
- assertU(commit());
-
- loadFeature(
- field,
- ObservingFieldValueFeature.class.getName(),
- fstore,
- "{\"field\":\"" + field + "\"}");
- loadModel(
- field + "-model",
- LinearModel.class.getName(),
- new String[] {field},
- fstore,
- "{\"weights\":{\"" + field + "\":1.0}}");
+ loadFeatureAndModel(getObservingFieldValueFeatureClassName(), field, fstore, modelName);
- final SolrQuery query = new SolrQuery("id:21");
- query.add("rq", "{!ltr model=" + field + "-model reRankDocs=4}");
- query.add("fl", "[fv]");
+ final String usedScorerClass = addAndQueryId21(field, modelName, fieldValue);
- ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
- assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
- assertJQ(
- "/query" + query.toQueryString(),
- "/response/docs/[0]/=={'[fv]':'"
- + FeatureLoggerTestUtils.toFeatureVector(field, "1.0")
- + "'}");
- assertEquals(fieldAndScorerClass[2], ObservingFieldValueFeature.usedScorerClass);
+ assertEquals(fieldAndScorerClass[2], usedScorerClass);
}
}
+ protected void loadFeatureAndModel(
+ String featureClassName, String field, String fstore, String modelName) throws Exception {
+ loadFeature(field, featureClassName, fstore, "{\"field\":\"" + field + "\"}");
+
+ loadModel(
+ modelName,
+ LinearModel.class.getName(),
+ new String[] {field},
+ fstore,
+ "{\"weights\":{\"" + field + "\":1.0}}");
+ }
+
+ /**
+ * @return used scorer class
+ */
+ protected String addAndQueryId21(String field, String modelName, String fieldValue)
+ throws Exception {
+
+ assertU(adoc("id", "21", field, fieldValue));
+ assertU(commit());
+
+ final SolrQuery query = new SolrQuery("id:21");
+ query.add("rq", "{!ltr model=" + modelName + " reRankDocs=4}");
+ query.add("fl", "[fv]");
+
+ ObservingFieldValueFeature.usedScorerClass = null; // to clear away any previous test's use
+ assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'[fv]':'"
+ + FeatureLoggerTestUtils.toFeatureVector(field, "1.0")
+ + "'}");
+ return ObservingFieldValueFeature.usedScorerClass;
+ }
+
@Test
public void testParamsToMap() throws Exception {
final LinkedHashMap<String, Object> params = new LinkedHashMap<>();
@@ -595,8 +625,7 @@ public class TestFieldValueFeature extends TestRerankBase {
};
final String fstore = "testThatStringValuesAreCorrectlyParsed" + field;
- loadFeature(
- field, FieldValueFeature.class.getName(), fstore, "{\"field\":\"" + field + "\"}");
+ loadFeature(field, getFieldValueFeatureClassName(), fstore, "{\"field\":\"" + field + "\"}");
loadModel(
field + "-model",
LinearModel.class.getName(),
@@ -642,8 +671,7 @@ public class TestFieldValueFeature extends TestRerankBase {
};
final String fstore = "testThatDateValuesAreCorrectlyParsed" + field;
- loadFeature(
- field, FieldValueFeature.class.getName(), fstore, "{\"field\":\"" + field + "\"}");
+ loadFeature(field, getFieldValueFeatureClassName(), fstore, "{\"field\":\"" + field + "\"}");
loadModel(
field + "-model",
LinearModel.class.getName(),
@@ -668,7 +696,7 @@ public class TestFieldValueFeature extends TestRerankBase {
* This class is used to track which specific FieldValueFeature is used so that we can test,
* whether the fallback mechanism works correctly.
*/
- public static final class ObservingFieldValueFeature extends FieldValueFeature {
+ public static class ObservingFieldValueFeature extends FieldValueFeature {
static String usedScorerClass;
public ObservingFieldValueFeature(String name, Map<String, Object> params) {
diff --git a/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestLegacyFieldValueFeature.java b/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestLegacyFieldValueFeature.java
new file mode 100644
index 00000000000..16374234af8
--- /dev/null
+++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestLegacyFieldValueFeature.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.ltr.feature;
+
+import java.util.Map;
+import org.junit.Test;
+
+@Deprecated
+public class TestLegacyFieldValueFeature extends TestFieldValueFeature {
+
+ @Override
+ protected String getFieldValueFeatureClassName() {
+ return LegacyFieldValueFeature.class.getName();
+ }
+
+ @Override
+ protected String getObservingFieldValueFeatureClassName() {
+ return LegacyObservingFieldValueFeature.class.getName();
+ }
+
+ @Deprecated
+ public static final class LegacyObservingFieldValueFeature
+ extends TestFieldValueFeature.ObservingFieldValueFeature {
+
+ public LegacyObservingFieldValueFeature(String name, Map<String, Object> params) {
+ super(name, params);
+ this.useDocValuesForStored = false;
+ }
+ }
+
+ @Override
+ protected String storedDvIsTrendy_FieldValueFeatureScorer_className() {
+ return FieldValueFeature.FieldValueFeatureWeight.FieldValueFeatureScorer.class.getName();
+ }
+
+ @Test
+ public void test_LegacyFieldValueFeature_behavesDifferentlyThan_FieldValueFeature()
+ throws Exception {
+ // the field storedDvIsTrendy has stored=true and docValues=true
+ final String field = "storedDvIsTrendy";
+
+ // demonstrate that & how the FieldValueFeature & LegacyFieldValueFeature implementations differ
+
+ // the LegacyFieldValueFeature does not use docValues
+ String usedScorerClass = loadAndQuery(getObservingFieldValueFeatureClassName(), field);
+ assertEquals(
+ FieldValueFeature.FieldValueFeatureWeight.FieldValueFeatureScorer.class.getName(),
+ usedScorerClass);
+
+ // the FieldValueFeature does use docValues
+ usedScorerClass = loadAndQuery(super.getObservingFieldValueFeatureClassName(), field);
+ assertEquals(
+ FieldValueFeature.FieldValueFeatureWeight.SortedDocValuesFieldValueFeatureScorer.class
+ .getName(),
+ usedScorerClass);
+ }
+
+ private String loadAndQuery(String featureClassName, String field) throws Exception {
+ final String modelName = field + "-model-" + featureClassName;
+ final String featureStoreName =
+ "test_LegacyFieldValueFeature_behavesDifferentlyThan_FieldValueFeature_"
+ + field
+ + "_"
+ + featureClassName;
+
+ loadFeatureAndModel(featureClassName, field, featureStoreName, modelName);
+
+ return addAndQueryId21(field, modelName, "1");
+ }
+}
diff --git a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
index 0824107a91f..fd35533abe5 100644
--- a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
+++ b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc
@@ -95,6 +95,9 @@ If users want separate client/server settings, they can manually override the `s
To disable the `rid` generation set the system property `solr.disableRequestId` to `true`.
To disable the always-on trace generation set the system property `solr.alwaysOnTraceId` to `false`.
+=== Learning To Rank
+* The FieldValueFeature class now always uses DocValues when docValues=true is set. A LegacyFieldValueFeature class provides the prior behaviour of not using DocValues when the docValues=true and stored=true field attributes are both set.
+
== Solr 9.3
=== Binary Releases
* Solr now comes with two xref:deployment-guide:installing-solr.adoc#available-solr-packages[binary releases] and xref:deployment-guide:solr-in-docker.adoc#available-images[docker images], **full** and **slim**.