You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ab...@apache.org on 2023/05/15 14:26:20 UTC
[solr] branch main updated: SOLR-16759: Introducing logAll parameter in the feature logger (#1586)
This is an automated email from the ASF dual-hosted git repository.
abenedetti pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new f9f5b59e042 SOLR-16759: Introducing logAll parameter in the feature logger (#1586)
f9f5b59e042 is described below
commit f9f5b59e0424ec3ef53ef314a0e165b5a4525590
Author: aruggero <57...@users.noreply.github.com>
AuthorDate: Mon May 15 16:26:12 2023 +0200
SOLR-16759: Introducing logAll parameter in the feature logger (#1586)
---
solr/CHANGES.txt | 2 +
.../java/org/apache/solr/ltr/CSVFeatureLogger.java | 11 +-
.../java/org/apache/solr/ltr/FeatureLogger.java | 13 +-
.../java/org/apache/solr/ltr/LTRScoringQuery.java | 26 +-
.../solr/ltr/SolrQueryRequestContextUtils.java | 18 +-
.../interleaving/LTRInterleavingScoringQuery.java | 7 +-
.../LTRFeatureLoggerTransformerFactory.java | 79 ++-
.../apache/solr/ltr/search/LTRQParserPlugin.java | 54 +-
.../org/apache/solr/ltr/TestLTRScoringQuery.java | 5 +-
.../solr/ltr/TestSelectiveWeightCreation.java | 13 +-
.../solr/ltr/feature/TestExternalFeatures.java | 2 +-
.../solr/ltr/feature/TestFeatureLogging.java | 469 ++++++++++++-
.../solr/ltr/feature/TestNoMatchSolrFeature.java | 2 +-
.../solr/ltr/feature/TestOriginalScoreFeature.java | 2 +-
.../transform/TestFeatureLoggerTransformer.java | 6 +-
.../transform/TestInterleavingTransformer.java | 6 +-
.../query-guide/pages/learning-to-rank.adoc | 723 +++++++++++++++------
17 files changed, 1118 insertions(+), 320 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f5f70eb1174..3b68d3f2135 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -127,6 +127,8 @@ Improvements
* SOLR-16766: Remove -usage as option from bin/solr, and ensure both -help and -h work. (Bence Szabo via Eric Pugh)
+* SOLR-16759: Introducing logAll parameter in the feature logger (Anna Ruggero, Alessandro Benedetti)
+
Optimizations
---------------------
diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/CSVFeatureLogger.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/CSVFeatureLogger.java
index aea4d337b20..73e98b249ed 100644
--- a/solr/modules/ltr/src/java/org/apache/solr/ltr/CSVFeatureLogger.java
+++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/CSVFeatureLogger.java
@@ -23,14 +23,15 @@ public class CSVFeatureLogger extends FeatureLogger {
private final char keyValueSep;
private final char featureSep;
- public CSVFeatureLogger(String fvCacheName, FeatureFormat f) {
- super(fvCacheName, f);
+ public CSVFeatureLogger(String fvCacheName, FeatureFormat f, Boolean logAll) {
+ super(fvCacheName, f, logAll);
this.keyValueSep = DEFAULT_KEY_VALUE_SEPARATOR;
this.featureSep = DEFAULT_FEATURE_SEPARATOR;
}
- public CSVFeatureLogger(String fvCacheName, FeatureFormat f, char keyValueSep, char featureSep) {
- super(fvCacheName, f);
+ public CSVFeatureLogger(
+ String fvCacheName, FeatureFormat f, Boolean logAll, char keyValueSep, char featureSep) {
+ super(fvCacheName, f, logAll);
this.keyValueSep = keyValueSep;
this.featureSep = featureSep;
}
@@ -43,7 +44,7 @@ public class CSVFeatureLogger extends FeatureLogger {
StringBuilder sb = new StringBuilder(featuresInfo.length * 3);
boolean isDense = featureFormat.equals(FeatureFormat.DENSE);
for (LTRScoringQuery.FeatureInfo featInfo : featuresInfo) {
- if (isDense || featInfo.isUsed()) {
+ if (featInfo != null && (isDense || featInfo.isUsed())) {
sb.append(featInfo.getName())
.append(keyValueSep)
.append(featInfo.getValue())
diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java
index 1b43511f39c..9be531c1ef3 100644
--- a/solr/modules/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java
+++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java
@@ -33,9 +33,12 @@ public abstract class FeatureLogger {
protected final FeatureFormat featureFormat;
- protected FeatureLogger(String fvCacheName, FeatureFormat f) {
+ protected Boolean logAll;
+
+ protected FeatureLogger(String fvCacheName, FeatureFormat f, Boolean logAll) {
this.fvCacheName = fvCacheName;
this.featureFormat = f;
+ this.logAll = logAll;
}
/**
@@ -80,4 +83,12 @@ public abstract class FeatureLogger {
int docid, LTRScoringQuery scoringQuery, SolrIndexSearcher searcher) {
return (String) searcher.cacheLookup(fvCacheName, fvCacheKey(scoringQuery, docid));
}
+
+ public Boolean isLoggingAll() {
+ return logAll;
+ }
+
+ public void setLogAll(Boolean logAll) {
+ this.logAll = logAll;
+ }
}
diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java
index d7787826991..1cc17d5deed 100644
--- a/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java
+++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java
@@ -59,7 +59,6 @@ public class LTRScoringQuery extends Query implements Accountable {
// contains a description of the model
private final LTRScoringModel ltrScoringModel;
- private final boolean extractAllFeatures;
private final LTRThreadModule ltrThreadMgr;
// limits the number of threads per query, so that multiple requests can be serviced
@@ -67,7 +66,7 @@ public class LTRScoringQuery extends Query implements Accountable {
private final Semaphore querySemaphore;
// feature logger to output the features.
- private FeatureLogger fl;
+ private FeatureLogger logger;
// Map of external parameters, such as query intent, that can be used by
// features
private final Map<String, String[]> efi;
@@ -77,21 +76,15 @@ public class LTRScoringQuery extends Query implements Accountable {
private SolrQueryRequest request;
public LTRScoringQuery(LTRScoringModel ltrScoringModel) {
- this(ltrScoringModel, Collections.<String, String[]>emptyMap(), false, null);
- }
-
- public LTRScoringQuery(LTRScoringModel ltrScoringModel, boolean extractAllFeatures) {
- this(ltrScoringModel, Collections.<String, String[]>emptyMap(), extractAllFeatures, null);
+ this(ltrScoringModel, Collections.<String, String[]>emptyMap(), null);
}
public LTRScoringQuery(
LTRScoringModel ltrScoringModel,
Map<String, String[]> externalFeatureInfo,
- boolean extractAllFeatures,
LTRThreadModule ltrThreadMgr) {
this.ltrScoringModel = ltrScoringModel;
this.efi = externalFeatureInfo;
- this.extractAllFeatures = extractAllFeatures;
this.ltrThreadMgr = ltrThreadMgr;
if (this.ltrThreadMgr != null) {
this.querySemaphore = this.ltrThreadMgr.createQuerySemaphore();
@@ -108,12 +101,12 @@ public class LTRScoringQuery extends Query implements Accountable {
return ltrScoringModel.getName();
}
- public void setFeatureLogger(FeatureLogger fl) {
- this.fl = fl;
+ public void setFeatureLogger(FeatureLogger logger) {
+ this.logger = logger;
}
public FeatureLogger getFeatureLogger() {
- return fl;
+ return logger;
}
public void setOriginalQuery(Query originalQuery) {
@@ -207,8 +200,8 @@ public class LTRScoringQuery extends Query implements Accountable {
final Collection<Feature> allFeatures = ltrScoringModel.getAllFeatures();
int modelFeatSize = modelFeatures.size();
- Collection<Feature> features = null;
- if (this.extractAllFeatures) {
+ Collection<Feature> features;
+ if (logger != null && logger.isLoggingAll()) {
features = allFeatures;
} else {
features = modelFeatures;
@@ -224,13 +217,12 @@ public class LTRScoringQuery extends Query implements Accountable {
createWeightsParallel(searcher, scoreMode.needsScores(), featureWeights, features);
}
int i = 0, j = 0;
- if (this.extractAllFeatures) {
+ if (logger != null && logger.isLoggingAll()) {
for (final Feature.FeatureWeight fw : featureWeights) {
extractedFeatureWeights[i++] = fw;
}
for (final Feature f : modelFeatures) {
- // we can lookup by featureid because all features will be
- // extracted when this.extractAllFeatures is set
+ // we can lookup by featureid because all features will be extracted
modelFeaturesWeights[j++] = extractedFeatureWeights[f.getIndex()];
}
} else {
diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/SolrQueryRequestContextUtils.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/SolrQueryRequestContextUtils.java
index 57e790bac68..9d1354aeada 100644
--- a/solr/modules/ltr/src/java/org/apache/solr/ltr/SolrQueryRequestContextUtils.java
+++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/SolrQueryRequestContextUtils.java
@@ -29,8 +29,8 @@ public class SolrQueryRequestContextUtils {
/** key of the scoring queries in the request context */
private static final String SCORING_QUERIES = LTR_PREFIX + "scoring_queries";
- /** key of the isExtractingFeatures flag in the request context */
- private static final String IS_EXTRACTING_FEATURES = LTR_PREFIX + "isExtractingFeatures";
+ /** key of the isLoggingFeatures flag in the request context */
+ private static final String IS_LOGGING_FEATURES = LTR_PREFIX + "isLoggingFeatures";
/** key of the feature vector store name in the request context */
private static final String STORE = LTR_PREFIX + "store";
@@ -53,17 +53,13 @@ public class SolrQueryRequestContextUtils {
return (LTRScoringQuery[]) req.getContext().get(SCORING_QUERIES);
}
- /** isExtractingFeatures flag accessors */
- public static void setIsExtractingFeatures(SolrQueryRequest req) {
- req.getContext().put(IS_EXTRACTING_FEATURES, Boolean.TRUE);
+ /** isLoggingFeatures flag accessors */
+ public static void enableFeatureLogging(SolrQueryRequest req) {
+ req.getContext().put(IS_LOGGING_FEATURES, Boolean.TRUE);
}
- public static void clearIsExtractingFeatures(SolrQueryRequest req) {
- req.getContext().put(IS_EXTRACTING_FEATURES, Boolean.FALSE);
- }
-
- public static boolean isExtractingFeatures(SolrQueryRequest req) {
- return Boolean.TRUE.equals(req.getContext().get(IS_EXTRACTING_FEATURES));
+ public static boolean isLoggingFeatures(SolrQueryRequest req) {
+ return Boolean.TRUE.equals(req.getContext().get(IS_LOGGING_FEATURES));
}
/** feature vector store name accessors */
diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/interleaving/LTRInterleavingScoringQuery.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/interleaving/LTRInterleavingScoringQuery.java
index be1559299de..87fdcbb0005 100644
--- a/solr/modules/ltr/src/java/org/apache/solr/ltr/interleaving/LTRInterleavingScoringQuery.java
+++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/interleaving/LTRInterleavingScoringQuery.java
@@ -31,16 +31,11 @@ public class LTRInterleavingScoringQuery extends LTRScoringQuery {
super(ltrScoringModel);
}
- public LTRInterleavingScoringQuery(LTRScoringModel ltrScoringModel, boolean extractAllFeatures) {
- super(ltrScoringModel, extractAllFeatures);
- }
-
public LTRInterleavingScoringQuery(
LTRScoringModel ltrScoringModel,
Map<String, String[]> externalFeatureInfo,
- boolean extractAllFeatures,
LTRThreadModule ltrThreadMgr) {
- super(ltrScoringModel, externalFeatureInfo, extractAllFeatures, ltrThreadMgr);
+ super(ltrScoringModel, externalFeatureInfo, ltrThreadMgr);
}
public Set<Integer> getPickedInterleavingDocIds() {
diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java
index b3646097745..ebf8661bd70 100644
--- a/solr/modules/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java
+++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.java
@@ -71,7 +71,12 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
// used inside fl to specify the feature store to use for the feature extraction
private static final String FV_STORE = "store";
- private static String DEFAULT_LOGGING_MODEL_NAME = "logging-model";
+ // used inside fl to specify to log (all|model only) features
+ private static final String FV_LOG_ALL = "logAll";
+
+ private static final String DEFAULT_LOGGING_MODEL_NAME = "logging-model";
+
+ private static final boolean DEFAULT_NO_RERANKING_LOGGING_ALL = true;
private String fvCacheName;
private String loggingModelName = DEFAULT_LOGGING_MODEL_NAME;
@@ -124,16 +129,18 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
public DocTransformer create(String name, SolrParams localparams, SolrQueryRequest req) {
// Hint to enable feature vector cache since we are requesting features
- SolrQueryRequestContextUtils.setIsExtractingFeatures(req);
+ SolrQueryRequestContextUtils.enableFeatureLogging(req);
// Communicate which feature store we are requesting features for
final String fvStoreName = localparams.get(FV_STORE);
SolrQueryRequestContextUtils.setFvStoreName(
req, (fvStoreName == null ? defaultStore : fvStoreName));
+ Boolean logAll = localparams.getBool(FV_LOG_ALL);
+
// Create and supply the feature logger to be used
SolrQueryRequestContextUtils.setFeatureLogger(
- req, createFeatureLogger(localparams.get(FV_FORMAT)));
+ req, createFeatureLogger(localparams.get(FV_FORMAT), logAll));
return new FeatureTransformer(
name, localparams, req, (fvStoreName != null) /* hasExplicitFeatureStore */);
@@ -146,7 +153,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
*
* @return a feature logger for the format specified.
*/
- private FeatureLogger createFeatureLogger(String formatStr) {
+ private FeatureLogger createFeatureLogger(String formatStr, Boolean logAll) {
final FeatureLogger.FeatureFormat format;
if (formatStr != null) {
format = FeatureLogger.FeatureFormat.valueOf(formatStr.toUpperCase(Locale.ROOT));
@@ -156,7 +163,8 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
if (fvCacheName == null) {
throw new IllegalArgumentException("a fvCacheName must be configured");
}
- return new CSVFeatureLogger(fvCacheName, format, csvKeyValueDelimiter, csvFeatureSeparator);
+ return new CSVFeatureLogger(
+ fvCacheName, format, logAll, csvKeyValueDelimiter, csvFeatureSeparator);
}
class FeatureTransformer extends DocTransformer {
@@ -182,7 +190,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
private LTRScoringQuery[] rerankingQueries;
private LTRScoringQuery.ModelWeight[] modelWeights;
private FeatureLogger featureLogger;
- private boolean docsWereNotReranked;
+ private boolean docsWereReranked;
/**
* @param name Name of the field to be added in a document representing the feature vectors
@@ -224,17 +232,40 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
}
rerankingQueriesFromContext = SolrQueryRequestContextUtils.getScoringQueries(req);
- docsWereNotReranked =
- (rerankingQueriesFromContext == null || rerankingQueriesFromContext.length == 0);
+ docsWereReranked =
+ (rerankingQueriesFromContext != null && rerankingQueriesFromContext.length != 0);
String transformerFeatureStore = SolrQueryRequestContextUtils.getFvStoreName(req);
+ FeatureLogger featureLogger = SolrQueryRequestContextUtils.getFeatureLogger(req);
+
Map<String, String[]> transformerExternalFeatureInfo =
LTRQParserPlugin.extractEFIParams(localparams);
+ List<Feature> modelFeatures = null;
+
+ if (docsWereReranked) {
+ LTRScoringModel scoringModel = rerankingQueriesFromContext[0].getScoringModel();
+ modelFeatures = scoringModel.getFeatures();
+ } else {
+ if (featureLogger.isLoggingAll() == null) {
+ featureLogger.setLogAll(DEFAULT_NO_RERANKING_LOGGING_ALL);
+ }
+ if (!featureLogger.isLoggingAll()) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ "you can only log all features from the store '"
+ + transformerFeatureStore
+ + "' passed in input in the logger");
+ }
+ }
final LoggingModel loggingModel =
- createLoggingModel(transformerFeatureStore, docsWereNotReranked);
+ createLoggingModel(
+ transformerFeatureStore,
+ featureLogger.isLoggingAll(),
+ modelFeatures,
+ docsWereReranked);
setupRerankingQueriesForLogging(
transformerFeatureStore, transformerExternalFeatureInfo, loggingModel);
- setupRerankingWeightsForLogging(context);
+ setupRerankingWeightsForLogging(context, featureLogger);
}
/**
@@ -243,7 +274,10 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
* @param transformerFeatureStore the explicit transformer feature store
*/
private LoggingModel createLoggingModel(
- String transformerFeatureStore, Boolean docsWereNotReranked) {
+ String transformerFeatureStore,
+ boolean logAll,
+ List<Feature> modelFeatures,
+ boolean docsWereReranked) {
final ManagedFeatureStore featureStores =
ManagedFeatureStore.getManagedFeatureStore(req.getCore());
@@ -260,7 +294,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
// check for empty feature store only if there is no reranking query, otherwise the model
// store will be used later for feature extraction
- if (docsWereNotReranked) {
+ if (!docsWereReranked) {
if (store.getFeatures().isEmpty()) {
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST,
@@ -271,7 +305,10 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
// if transformerFeatureStore was null before this gets actual name
transformerFeatureStore = store.getName();
- return new LoggingModel(loggingModelName, transformerFeatureStore, store.getFeatures());
+ return new LoggingModel(
+ loggingModelName,
+ transformerFeatureStore,
+ (logAll ? store.getFeatures() : modelFeatures));
}
/**
@@ -301,13 +338,9 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
String transformerFeatureStore,
Map<String, String[]> transformerExternalFeatureInfo,
LoggingModel loggingModel) {
- if (docsWereNotReranked) { // no reranking query
+ if (!docsWereReranked) { // no reranking query
LTRScoringQuery loggingQuery =
- new LTRScoringQuery(
- loggingModel,
- transformerExternalFeatureInfo,
- true /* extractAllFeatures */,
- threadManager);
+ new LTRScoringQuery(loggingModel, transformerExternalFeatureInfo, threadManager);
rerankingQueries = new LTRScoringQuery[] {loggingQuery};
} else {
rerankingQueries = new LTRScoringQuery[rerankingQueriesFromContext.length];
@@ -335,14 +368,13 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
(!transformerExternalFeatureInfo.isEmpty()
? transformerExternalFeatureInfo
: rerankingQueries[i].getExternalFeatureInfo()),
- true /* extractAllFeatures */,
threadManager);
}
}
}
}
- private void setupRerankingWeightsForLogging(ResultContext context) {
+ private void setupRerankingWeightsForLogging(ResultContext context, FeatureLogger logger) {
modelWeights = new LTRScoringQuery.ModelWeight[rerankingQueries.length];
for (int i = 0; i < rerankingQueries.length; i++) {
if (rerankingQueries[i].getOriginalQuery() == null) {
@@ -352,8 +384,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
if (!(rerankingQueries[i] instanceof OriginalRankingLTRScoringQuery)
|| hasExplicitFeatureStore) {
if (rerankingQueries[i].getFeatureLogger() == null) {
- rerankingQueries[i].setFeatureLogger(
- SolrQueryRequestContextUtils.getFeatureLogger(req));
+ rerankingQueries[i].setFeatureLogger(logger);
}
featureLogger = rerankingQueries[i].getFeatureLogger();
try {
@@ -399,7 +430,7 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
LTRRescorer.extractFeaturesInfo(
rerankingModelWeight,
docid,
- (docsWereNotReranked ? score : null),
+ (!docsWereReranked ? score : null),
leafContexts));
}
doc.addField(name, featureVector);
diff --git a/solr/modules/ltr/src/java/org/apache/solr/ltr/search/LTRQParserPlugin.java b/solr/modules/ltr/src/java/org/apache/solr/ltr/search/LTRQParserPlugin.java
index 7006b110aab..e3ba6cffe1d 100644
--- a/solr/modules/ltr/src/java/org/apache/solr/ltr/search/LTRQParserPlugin.java
+++ b/solr/modules/ltr/src/java/org/apache/solr/ltr/search/LTRQParserPlugin.java
@@ -27,6 +27,7 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.ltr.FeatureLogger;
import org.apache.solr.ltr.LTRScoringQuery;
import org.apache.solr.ltr.LTRThreadModule;
import org.apache.solr.ltr.SolrQueryRequestContextUtils;
@@ -155,8 +156,8 @@ public class LTRQParserPlugin extends QParserPlugin
SolrException.ErrorCode.BAD_REQUEST, "Must provide one or two models in the request");
}
final boolean isInterleaving = (modelNames.length > 1);
- final boolean extractFeatures = SolrQueryRequestContextUtils.isExtractingFeatures(req);
- final String tranformerFeatureStoreName = SolrQueryRequestContextUtils.getFvStoreName(req);
+ final boolean isLoggingFeatures = SolrQueryRequestContextUtils.isLoggingFeatures(req);
+
final Map<String, String[]> externalFeatureInfo = extractEFIParams(localParams);
LTRScoringQuery rerankingQuery = null;
@@ -175,37 +176,44 @@ public class LTRQParserPlugin extends QParserPlugin
SolrException.ErrorCode.BAD_REQUEST,
"cannot find " + LTRQParserPlugin.MODEL + " " + modelNames[i]);
}
- final String modelFeatureStoreName = ltrScoringModel.getFeatureStoreName();
- // Check if features are requested and if the model feature store and feature-transform
- // feature store are the same
- final boolean featuresRequestedFromSameStore =
- (modelFeatureStoreName.equals(tranformerFeatureStoreName)
- || tranformerFeatureStoreName == null)
- ? extractFeatures
- : false;
if (isInterleaving) {
rerankingQuery =
rerankingQueries[i] =
new LTRInterleavingScoringQuery(
- ltrScoringModel,
- externalFeatureInfo,
- featuresRequestedFromSameStore,
- threadManager);
+ ltrScoringModel, externalFeatureInfo, threadManager);
} else {
rerankingQuery =
- new LTRScoringQuery(
- ltrScoringModel,
- externalFeatureInfo,
- featuresRequestedFromSameStore,
- threadManager);
+ new LTRScoringQuery(ltrScoringModel, externalFeatureInfo, threadManager);
rerankingQueries[i] = null;
}
- // Enable the feature vector caching if we are extracting features, and the features
- // we requested are the same ones we are reranking with
- if (featuresRequestedFromSameStore) {
- rerankingQuery.setFeatureLogger(SolrQueryRequestContextUtils.getFeatureLogger(req));
+ if (isLoggingFeatures) {
+ FeatureLogger featureLogger = SolrQueryRequestContextUtils.getFeatureLogger(req);
+ final String modelFeatureStore = ltrScoringModel.getFeatureStoreName();
+ final String loggerFeatureStore = SolrQueryRequestContextUtils.getFvStoreName(req);
+ final boolean isSameFeatureStore =
+ (modelFeatureStore.equals(loggerFeatureStore) || loggerFeatureStore == null);
+
+ if (isSameFeatureStore) {
+ if (featureLogger.isLoggingAll() == null) {
+ featureLogger.setLogAll(false); // default to log only model features
+ }
+ rerankingQuery.setFeatureLogger(featureLogger);
+ } else {
+ if (featureLogger.isLoggingAll() == null) {
+ featureLogger.setLogAll(true); // default to log all features from the store
+ }
+ if (!featureLogger.isLoggingAll()) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST,
+ "the feature store '"
+ + loggerFeatureStore
+ + "' in the logger is different from the model feature store '"
+ + modelFeatureStore
+ + "', you can only log all the features from the store");
+ }
+ }
}
} else {
rerankingQuery =
diff --git a/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRScoringQuery.java b/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRScoringQuery.java
index d0cdcaa9b27..8bcd684f1b0 100644
--- a/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRScoringQuery.java
+++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/TestLTRScoringQuery.java
@@ -130,15 +130,14 @@ public class TestLTRScoringQuery extends SolrTestCase {
final HashMap<String, String[]> externalFeatureInfo = new HashMap<>();
externalFeatureInfo.put("queryIntent", new String[] {"company"});
externalFeatureInfo.put("user_query", new String[] {"abc"});
- final LTRScoringQuery m1 = new LTRScoringQuery(algorithm1, externalFeatureInfo, false, null);
+ final LTRScoringQuery m1 = new LTRScoringQuery(algorithm1, externalFeatureInfo, null);
final HashMap<String, String[]> externalFeatureInfo2 = new HashMap<>();
externalFeatureInfo2.put("user_query", new String[] {"abc"});
externalFeatureInfo2.put("queryIntent", new String[] {"company"});
int totalPoolThreads = 10, numThreadsPerRequest = 10;
LTRThreadModule threadManager = new LTRThreadModule(totalPoolThreads, numThreadsPerRequest);
- final LTRScoringQuery m2 =
- new LTRScoringQuery(algorithm1, externalFeatureInfo2, false, threadManager);
+ final LTRScoringQuery m2 = new LTRScoringQuery(algorithm1, externalFeatureInfo2, threadManager);
// Models with same algorithm and efis, just in different order should be the same
assertEquals(m1, m2);
diff --git a/solr/modules/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java b/solr/modules/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java
index cf1a1dbaf21..be3bf3d481b 100644
--- a/solr/modules/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java
+++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java
@@ -163,7 +163,7 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
hits,
searcher,
hits.scoreDocs[0].doc,
- new LTRScoringQuery(ltrScoringModel1, false)); // features not requested in response
+ new LTRScoringQuery(ltrScoringModel1)); // features not requested in response
LTRScoringQuery.FeatureInfo[] featuresInfo = modelWeight.getFeaturesInfo();
assertEquals(features.size(), modelWeight.getModelFeatureValuesNormalized().length);
@@ -184,12 +184,11 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
"test",
allFeatures,
TestLinearModel.makeFeatureWeights(features));
- modelWeight =
- performQuery(
- hits,
- searcher,
- hits.scoreDocs[0].doc,
- new LTRScoringQuery(ltrScoringModel2, true)); // features requested in response
+ LTRScoringQuery ltrQuery2 = new LTRScoringQuery(ltrScoringModel2);
+ // features requested in response
+ ltrQuery2.setFeatureLogger(
+ new CSVFeatureLogger("test", FeatureLogger.FeatureFormat.DENSE, true));
+ modelWeight = performQuery(hits, searcher, hits.scoreDocs[0].doc, ltrQuery2);
featuresInfo = modelWeight.getFeaturesInfo();
assertEquals(features.size(), modelWeight.getModelFeatureValuesNormalized().length);
diff --git a/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java b/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java
index df7c5e48b78..2bf4a706c58 100644
--- a/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java
+++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java
@@ -87,7 +87,7 @@ public class TestExternalFeatures extends TestRerankBase {
public void testFeaturesUseStopwordQueryReturnEmptyFeatureVector() throws Exception {
final SolrQuery query = new SolrQuery();
query.setQuery("*:*");
- query.add("fl", "*,score,fv:[fv]");
+ query.add("fl", "*,score,fv:[fv logAll=true]");
query.add("rows", "1");
// Stopword only query passed in
query.add(
diff --git a/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java b/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java
index 0c920d717ca..6c859d987cf 100644
--- a/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java
+++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestFeatureLogging.java
@@ -76,7 +76,7 @@ public class TestFeatureLogging extends TestRerankBase {
final SolrQuery query = new SolrQuery();
query.setQuery("title:bloomberg");
- query.add("fl", "id,popularity,[fv]");
+ query.add("fl", "id,popularity,[fv logAll=true]");
query.add("rows", "3");
query.add("debugQuery", "on");
query.add("rq", "{!ltr reRankDocs=3 model=sum1}");
@@ -87,7 +87,7 @@ public class TestFeatureLogging extends TestRerankBase {
"/response/docs/[0]/=={'id':'7', 'popularity':2, '[fv]':'" + docs0fv_default_csv + "'}");
query.remove("fl");
- query.add("fl", "[fv]");
+ query.add("fl", "[fv logAll=true]");
query.add("rows", "3");
query.add("rq", "{!ltr reRankDocs=3 model=sum1}");
@@ -241,7 +241,7 @@ public class TestFeatureLogging extends TestRerankBase {
query.add("debugQuery", "on");
query.remove("fl");
- query.add("fl", "fv:[fv]");
+ query.add("fl", "fv:[fv logAll=true]");
query.add("rows", "3");
query.add("group", "true");
query.add("group.field", "title");
@@ -295,7 +295,7 @@ public class TestFeatureLogging extends TestRerankBase {
// csv - no feature format specified i.e. use default
query.remove("fl");
- query.add("fl", "*,score,fv:[fv store=test4]");
+ query.add("fl", "*,score,fv:[fv store=test4 logAll=true]");
assertJQ(
"/query" + query.toQueryString(), "/response/docs/[0]/fv/=='" + docs0fv_default_csv + "'");
assertJQ(
@@ -303,7 +303,7 @@ public class TestFeatureLogging extends TestRerankBase {
// csv - sparse feature format check
query.remove("fl");
- query.add("fl", "*,score,fv:[fv store=test4 format=sparse]");
+ query.add("fl", "*,score,fv:[fv store=test4 format=sparse logAll=true]");
assertJQ(
"/query" + query.toQueryString(), "/response/docs/[0]/fv/=='" + docs0fv_sparse_csv + "'");
assertJQ(
@@ -311,10 +311,467 @@ public class TestFeatureLogging extends TestRerankBase {
// csv - dense feature format check
query.remove("fl");
- query.add("fl", "*,score,fv:[fv store=test4 format=dense]");
+ query.add("fl", "*,score,fv:[fv store=test4 format=dense logAll=true]");
assertJQ(
"/query" + query.toQueryString(), "/response/docs/[0]/fv/=='" + docs0fv_dense_csv + "'");
assertJQ(
"/query" + query.toQueryString(), "/response/docs/[1]/fv/=='" + docs1fv_dense_csv + "'");
}
+
+ @Test
+ public void testNoReranking_defaultStoreDefaultLogAll_shouldPrintAllFeatures() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+
+ // No store specified, use default store for logging
+ // No logAll specified, use default: logAll=true
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv]");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'fv':'"
+ + FeatureLoggerTestUtils.toFeatureVector(
+ "defaultStoreFeature1", "1.0", "defaultStoreFeature2", "4.0")
+ + "'}");
+ }
+
+ @Test
+ public void testNoReranking_defaultStoreLogAllTrue_shouldPrintAllFeatures() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+
+ // No store specified, use default store for logging
+ // logAll=true, return all the features in the default store
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv logAll=true]");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'fv':'"
+ + FeatureLoggerTestUtils.toFeatureVector(
+ "defaultStoreFeature1", "1.0", "defaultStoreFeature2", "4.0")
+ + "'}");
+ }
+
+ @Test
+ public void testNoReranking_defaultStoreLogAllFalse_shouldRaiseException() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+
+ // No store specified, use default store for logging
+ // logAll=false, exception since no model used
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv logAll=false]");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/error/msg=='you can only log all features from the store \\'null\\' passed in input in the logger'");
+ }
+
+ @Test
+ public void testNoReranking_definedStoreDefaultLogAll_shouldPrintAllFeatures() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+
+ // Store specified, used store for logging
+ // No logAll specified, use default: logAll=true
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv store=storeA]");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'fv':'"
+ + FeatureLoggerTestUtils.toFeatureVector(
+ "storeAFeature1", "2.0", "storeAFeature2", "6.0")
+ + "'}");
+ }
+
+ @Test
+ public void testNoReranking_definedStoreLogAllTrue_shouldPrintAllFeature() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+
+ // Store specified, used store for logging
+ // logAll=true, return all the features in the defined store
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv store=storeA logAll=true]");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'fv':'"
+ + FeatureLoggerTestUtils.toFeatureVector(
+ "storeAFeature1", "2.0", "storeAFeature2", "6.0")
+ + "'}");
+ }
+
+ @Test
+ public void testNoReranking_definedStoreLogAllFalse_shouldRaiseException() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+
+ // Store specified, used store for logging
+ // logAll=false, exception since no model used
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv store=storeA logAll=false]");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/error/msg=='you can only log all features from the store \\'storeA\\' passed in input in the logger'");
+ }
+
+ @Test
+ public void testReranking_defaultStoreDefaultLogAll_shouldPrintModelFeatures() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+ loadModel(
+ "modelA",
+ LinearModel.class.getName(),
+ new String[] {"storeAFeature2"},
+ "storeA",
+ "{\"weights\":{\"storeAFeature2\":6.0}}");
+
+ // No store specified, use model store for logging
+ // No logAll specified, use default: logAll=false
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv]");
+ query.add("rq", "{!ltr reRankDocs=3 model=modelA}");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'fv':'"
+ + FeatureLoggerTestUtils.toFeatureVector("storeAFeature2", "6.0")
+ + "'}");
+ }
+
+ @Test
+ public void testReranking_defaultStoreLogAllTrue_shouldPrintAllFeatures() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+ loadModel(
+ "modelA",
+ LinearModel.class.getName(),
+ new String[] {"storeAFeature2"},
+ "storeA",
+ "{\"weights\":{\"storeAFeature2\":6.0}}");
+
+ // No store specified, use model store for logging
+ // logAll=true, return all the features in the model store
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv logAll=true]");
+ query.add("rq", "{!ltr reRankDocs=3 model=modelA}");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'fv':'"
+ + FeatureLoggerTestUtils.toFeatureVector(
+ "storeAFeature1", "2.0", "storeAFeature2", "6.0")
+ + "'}");
+ }
+
+ @Test
+ public void testReranking_defaultStoreLogAllFalse_shouldPrintModelFeatures() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+ loadModel(
+ "modelA",
+ LinearModel.class.getName(),
+ new String[] {"storeAFeature2"},
+ "storeA",
+ "{\"weights\":{\"storeAFeature2\":6.0}}");
+
+ // No store specified, use model store for logging
+ // logAll=false, only model features returned
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv logAll=false]");
+ query.add("rq", "{!ltr reRankDocs=3 model=modelA}");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'fv':'"
+ + FeatureLoggerTestUtils.toFeatureVector("storeAFeature2", "6.0")
+ + "'}");
+ }
+
+ @Test
+ public void testReranking_differentStoreDefaultLogAll_shouldPrintAllFeatures() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+ loadFeature("storeBFeature1", ValueFeature.class.getName(), "storeB", "{\"value\":3.0}");
+ loadFeature("storeBFeature2", ValueFeature.class.getName(), "storeB", "{\"value\":7.0}");
+ loadModel(
+ "modelA",
+ LinearModel.class.getName(),
+ new String[] {"storeAFeature2"},
+ "storeA",
+ "{\"weights\":{\"storeAFeature2\":6.0}}");
+
+ // Store specified, used store for logging
+ // No logAll specified, use default: logAll=true
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv store=storeB]");
+ query.add("rq", "{!ltr reRankDocs=3 model=modelA}");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'fv':'"
+ + FeatureLoggerTestUtils.toFeatureVector(
+ "storeBFeature1", "3.0", "storeBFeature2", "7.0")
+ + "'}");
+ }
+
+ @Test
+ public void testReranking_differentStoreLogAllTrue_shouldPrintAllFeatures() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+ loadFeature("storeBFeature1", ValueFeature.class.getName(), "storeB", "{\"value\":3.0}");
+ loadFeature("storeBFeature2", ValueFeature.class.getName(), "storeB", "{\"value\":7.0}");
+ loadModel(
+ "modelA",
+ LinearModel.class.getName(),
+ new String[] {"storeAFeature2"},
+ "storeA",
+ "{\"weights\":{\"storeAFeature2\":6.0}}");
+
+ // Store specified, used store for logging
+ // logAll=true, return all the features in the defined store
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv store=storeB logAll=true]");
+ query.add("rq", "{!ltr reRankDocs=3 model=modelA}");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'fv':'"
+ + FeatureLoggerTestUtils.toFeatureVector(
+ "storeBFeature1", "3.0", "storeBFeature2", "7.0")
+ + "'}");
+ }
+
+ @Test
+ public void testReranking_differentStoreLogAllFalse_shouldRaiseException() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+ loadFeature("storeBFeature1", ValueFeature.class.getName(), "storeB", "{\"value\":3.0}");
+ loadFeature("storeBFeature2", ValueFeature.class.getName(), "storeB", "{\"value\":7.0}");
+ loadModel(
+ "modelA",
+ LinearModel.class.getName(),
+ new String[] {"storeAFeature2"},
+ "storeA",
+ "{\"weights\":{\"storeAFeature2\":6.0}}");
+
+ // Store specified, used store for logging
+ // logAll=false, exception since the defined store is different from the model store
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv store=storeB logAll=false]");
+ query.add("rq", "{!ltr reRankDocs=3 model=modelA}");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/error/msg=='the feature store \\'storeB\\' in the logger is different from the model feature store \\'storeA\\', you can only log all the features from the store'");
+ }
+
+ @Test
+ public void testReranking_modelStoreDefaultLogAll_shouldPrintModelFeatures() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+ loadModel(
+ "modelA",
+ LinearModel.class.getName(),
+ new String[] {"storeAFeature2"},
+ "storeA",
+ "{\"weights\":{\"storeAFeature2\":6.0}}");
+
+ // Store specified, used store for logging
+ // No logAll specified, use default: logAll=false since we pass the same store as the model
+ // store
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv store=storeA]");
+ query.add("rq", "{!ltr reRankDocs=3 model=modelA}");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'fv':'"
+ + FeatureLoggerTestUtils.toFeatureVector("storeAFeature2", "6.0")
+ + "'}");
+ }
+
+ @Test
+ public void testReranking_modelStoreLogAllFalse_shouldPrintModelFeatures() throws Exception {
+ loadFeature(
+ "defaultStoreFeature1",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":1.0}");
+ loadFeature(
+ "defaultStoreFeature2",
+ ValueFeature.class.getName(),
+ FeatureStore.DEFAULT_FEATURE_STORE_NAME,
+ "{\"value\":4.0}");
+ loadFeature("storeAFeature1", ValueFeature.class.getName(), "storeA", "{\"value\":2.0}");
+ loadFeature("storeAFeature2", ValueFeature.class.getName(), "storeA", "{\"value\":6.0}");
+ loadModel(
+ "modelA",
+ LinearModel.class.getName(),
+ new String[] {"storeAFeature2"},
+ "storeA",
+ "{\"weights\":{\"storeAFeature2\":6.0}}");
+
+ // Store specified, used store for logging
+ // logAll=false, only model features returned since the defined store is the same as the model
+ // store
+ final SolrQuery query = new SolrQuery();
+ query.setQuery("id:7");
+ query.add("rows", "1");
+ query.add("fl", "fv:[fv store=storeA logAll=false]");
+ query.add("rq", "{!ltr reRankDocs=3 model=modelA}");
+ assertJQ(
+ "/query" + query.toQueryString(),
+ "/response/docs/[0]/=={'fv':'"
+ + FeatureLoggerTestUtils.toFeatureVector("storeAFeature2", "6.0")
+ + "'}");
+ }
}
diff --git a/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java b/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java
index 03c1121cc85..f73add37788 100644
--- a/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java
+++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestNoMatchSolrFeature.java
@@ -203,7 +203,7 @@ public class TestNoMatchSolrFeature extends TestRerankBase {
// extraction
final SolrQuery query = new SolrQuery();
query.setQuery("*:*");
- query.add("fl", "*, score,fv:[fv]");
+ query.add("fl", "*, score,fv:[fv logAll=true]");
query.add("rows", "4");
query.add("rq", "{!ltr model=nomatchmodel2 reRankDocs=4}");
diff --git a/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreFeature.java b/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreFeature.java
index 2a020664ac4..5de7b1f3ad6 100644
--- a/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreFeature.java
+++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreFeature.java
@@ -149,7 +149,7 @@ public class TestOriginalScoreFeature extends TestRerankBase {
}
query.remove("fl");
- query.add("fl", "*, score, fv:[fv]");
+ query.add("fl", "*, score, fv:[fv logAll=true]");
query.add("rq", "{!ltr model=" + modelName + " reRankDocs=4}");
assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
diff --git a/solr/modules/ltr/src/test/org/apache/solr/ltr/response/transform/TestFeatureLoggerTransformer.java b/solr/modules/ltr/src/test/org/apache/solr/ltr/response/transform/TestFeatureLoggerTransformer.java
index 8f2d611e2d3..e68b433c2e0 100644
--- a/solr/modules/ltr/src/test/org/apache/solr/ltr/response/transform/TestFeatureLoggerTransformer.java
+++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/response/transform/TestFeatureLoggerTransformer.java
@@ -234,7 +234,7 @@ public class TestFeatureLoggerTransformer extends TestRerankBase {
final SolrQuery query = new SolrQuery();
query.setQuery("*:*");
- query.add("fl", "*, score,features:[fv format=sparse]");
+ query.add("fl", "*, score,features:[fv format=sparse logAll=true]");
query.add("rows", "10");
query.add("debugQuery", "true");
query.add("fq", "{!terms f=title}w1"); // 1,3,4,7,8
@@ -279,7 +279,7 @@ public class TestFeatureLoggerTransformer extends TestRerankBase {
final SolrQuery query = new SolrQuery();
query.setQuery("*:*");
- query.add("fl", "*, score,features:[fv format=dense]");
+ query.add("fl", "*, score,features:[fv format=dense logAll=true]");
query.add("rows", "10");
query.add("debugQuery", "true");
query.add("fq", "{!terms f=title}w1"); // 1,3,4,7,8
@@ -529,7 +529,7 @@ public class TestFeatureLoggerTransformer extends TestRerankBase {
final SolrQuery query = new SolrQuery();
query.setQuery("*:*");
- query.add("fl", "*, score,features:[fv format=sparse]");
+ query.add("fl", "*, score,features:[fv format=sparse logAll=true]");
query.add("rows", "10");
query.add("debugQuery", "true");
query.add("fq", "{!terms f=title}w1"); // 1,3,4,7,8
diff --git a/solr/modules/ltr/src/test/org/apache/solr/ltr/response/transform/TestInterleavingTransformer.java b/solr/modules/ltr/src/test/org/apache/solr/ltr/response/transform/TestInterleavingTransformer.java
index 8a50e5dde07..c07d29f0eab 100644
--- a/solr/modules/ltr/src/test/org/apache/solr/ltr/response/transform/TestInterleavingTransformer.java
+++ b/solr/modules/ltr/src/test/org/apache/solr/ltr/response/transform/TestInterleavingTransformer.java
@@ -215,7 +215,8 @@ public class TestInterleavingTransformer extends TestRerankBase {
final SolrQuery query = new SolrQuery();
query.setQuery("*:*");
- query.add("fl", "*, score,interleavingPick:[interleaving],features:[fv format=sparse]");
+ query.add(
+ "fl", "*, score,interleavingPick:[interleaving],features:[fv format=sparse logAll=true]");
query.add("rows", "10");
query.add("debugQuery", "true");
query.add("fq", "{!terms f=title}w1"); // 1,3,4,7,8
@@ -268,7 +269,8 @@ public class TestInterleavingTransformer extends TestRerankBase {
final SolrQuery query = new SolrQuery();
query.setQuery("*:*");
- query.add("fl", "*, score,interleavingPick:[interleaving],features:[fv format=sparse]");
+ query.add(
+ "fl", "*, score,interleavingPick:[interleaving],features:[fv format=sparse logAll=true]");
query.add("rows", "10");
query.add("debugQuery", "true");
query.add("fq", "{!terms f=title}w1"); // 1,3,4,7,8
diff --git a/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc b/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc
index 4546f0efbb7..1dc37fbdae9 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/learning-to-rank.adoc
@@ -18,7 +18,7 @@
With the *Learning To Rank* (or *LTR* for short) module you can configure and run machine learned ranking models in Solr.
-The module also supports feature extraction inside Solr.
+The module also supports feature logging inside Solr.
The only thing you need to do outside Solr is train your own ranking model.
== Learning to Rank Concepts
@@ -89,7 +89,7 @@ The process of https://en.wikipedia.org/wiki/Feature_engineering[feature enginee
|(custom) |(custom class extending {solr-javadocs}/modules/ltr/org/apache/solr/ltr/norm/Normalizer.html[Normalizer]) |
|===
-==== Feature Extraction
+==== Feature Logging
The ltr module includes a xref:document-transformers.adoc[`[features\]` transformer] to support the calculation and return of feature values for https://en.wikipedia.org/wiki/Feature_extraction[feature extraction] purposes including and especially when you do not yet have an actual reranking model.
@@ -115,6 +115,173 @@ In the form of JSON files your trained model or models (e.g., different models f
This is provided via the `ltr` xref:configuration-guide:solr-modules.adoc[Solr Module] that needs to be enabled before use.
+== Installation of LTR
+
+The ltr module requires the `modules/ltr/lib/solr-ltr-*.jar` JARs.
+
+== LTR Configuration
+
+Learning-To-Rank is a module and therefore its plugins must be configured in `solrconfig.xml`.
+
+=== Minimum Requirements
+
+* Include the required module JARs.
+Note that by default paths are relative to the Solr core, so they may need adjustments to your configuration, or an explicit specification of the `$solr.install.dir`.
++
+[source,xml]
+----
+<lib dir="${solr.install.dir:../../../..}/modules/ltr/lib/" regex=".*\.jar" />
+----
+
+* Declaration of the `ltr` query parser.
++
+[source,xml]
+----
+<queryParser name="ltr" class="org.apache.solr.ltr.search.LTRQParserPlugin"/>
+----
+
+* Configuration of the feature values cache.
++
+[source,xml]
+----
+<cache name="QUERY_DOC_FV"
+ class="solr.search.CaffeineCache"
+ size="4096"
+ initialSize="2048"
+ autowarmCount="4096"
+ regenerator="solr.search.NoOpRegenerator" />
+----
+
+* Declaration of the `[features]` transformer.
++
+[source,xml]
+----
+<transformer name="features" class="org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory">
+ <str name="fvCacheName">QUERY_DOC_FV</str>
+</transformer>
+----
+
+* Declaration of the `[interleaving]` transformer.
++
+[source,xml]
+----
+<transformer name="interleaving" class="org.apache.solr.ltr.response.transform.LTRInterleavingTransformerFactory"/>
+----
+
+== LTR Lifecycle
+
+=== Feature Stores
+
+It is recommended that you organise all your features into stores which are akin to namespaces:
+
+* Features within a store must be named uniquely.
+* Across stores identical or similar features can share the same name.
+* If no store name is specified then the default `\_DEFAULT_` feature store will be used.
+
+To discover the names of all your feature stores:
+
+[source,text]
+http://localhost:8983/solr/techproducts/schema/feature-store
+
+To inspect the content of the `commonFeatureStore` feature store:
+
+[source,text]
+http://localhost:8983/solr/techproducts/schema/feature-store/commonFeatureStore
+
+=== Models
+
+* A model uses features from exactly one feature store.
+* If no store is specified then the default `\_DEFAULT_` feature store will be used.
+* A model need not use all the features defined in a feature store.
+* Multiple models can use the same feature store.
+
+To log features for `currentFeatureStore` 's features:
+
+[source,text]
+http://localhost:8983/solr/techproducts/query?q=test&fl=id,score,[features store=currentFeatureStore]
+
+To log features for `nextFeatureStore` features whilst reranking with `currentModel` based on `currentFeatureStore`:
+
+[source,text]
+http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr model=currentModel reRankDocs=100}&fl=id,score,[features store=nextFeatureStore]
+
+To view all models:
+
+[source,text]
+http://localhost:8983/solr/techproducts/schema/model-store
+
+To delete the `currentModel` model:
+
+[source,bash]
+----
+curl -XDELETE 'http://localhost:8983/solr/techproducts/schema/model-store/currentModel'
+----
+
+IMPORTANT: A feature store may be deleted only when there are no models using it.
+
+To delete the `currentFeatureStore` feature store:
+
+[source,bash]
+----
+curl -XDELETE 'http://localhost:8983/solr/techproducts/schema/feature-store/currentFeatureStore'
+----
+
+==== Using Large Models
+
+With SolrCloud, large models may fail to upload due to the limitation of ZooKeeper's buffer.
+In this case, `DefaultWrapperModel` may help you to separate the model definition from uploaded file.
+
+Assuming that you consider to use a large model placed at `/path/to/models/myModel.json` through `DefaultWrapperModel`.
+
+[source,json]
+----
+{
+ "store" : "largeModelsFeatureStore",
+ "name" : "myModel",
+ "class" : "...",
+ "features" : [
+ "..."
+ ],
+ "params" : {
+ "...": "..."
+ }
+}
+----
+
+First, add the directory to Solr's resource paths with a xref:configuration-guide:libs.adoc#lib-directives-in-solrconfig[`<lib/>` directive]:
+
+[source,xml]
+----
+ <lib dir="/path/to" regex="models" />
+----
+
+Then, configure `DefaultWrapperModel` to wrap `myModel.json`:
+
+[source,json]
+----
+{
+ "store" : "largeModelsFeatureStore",
+ "name" : "myWrapperModel",
+ "class" : "org.apache.solr.ltr.model.DefaultWrapperModel",
+ "params" : {
+ "resource" : "myModel.json"
+ }
+}
+----
+
+`myModel.json` will be loaded during the initialization and be able to use by specifying `model=myWrapperModel`.
+
+NOTE: No `"features"` are configured in `myWrapperModel` because the features of the wrapped model (`myModel`) will be used; also note that the `"store"` configured for the wrapper model must match that of the wrapped model i.e., in this example the feature store called `largeModelsFeatureStore` is used.
+
+CAUTION: `<lib dir="/path/to/models" regex=".*\.json" />` doesn't work as expected in this case, because `SolrResourceLoader` considers given resources as JAR if `<lib />` indicates files.
+
+As an alternative to the above-described `DefaultWrapperModel`, it is possible to xref:deployment-guide:zookeeper-ensemble.adoc#increasing-the-file-size-limit[increase ZooKeeper's file size limit].
+
+=== Applying Changes
+
+The feature store and the model store are both xref:configuration-guide:managed-resources.adoc[].
+Changes made to managed resources are not applied to the active Solr components until the Solr collection (or Solr core in single server mode) is reloaded.
+
== Quick Start with LTR
The `"techproducts"` example included with Solr is pre-configured to load the plugins required for learning-to-rank from the `ltr` xref:configuration-guide:solr-modules.adoc[Solr Module], but they are disabled by default.
@@ -166,9 +333,9 @@ http://localhost:8983/solr/techproducts/schema/feature-store/_DEFAULT_
]
----
-=== Extracting Features
+=== Logging Features
-To extract features as part of a query, add `[features]` to the `fl` parameter, for example:
+To log features as part of a query, add `[features]` to the `fl` parameter, for example:
[source,text]
http://localhost:8983/solr/techproducts/query?q=test&fl=id,score,[features]
@@ -196,6 +363,69 @@ The output will include feature values as a comma-separated list, resembling the
}}
----
+==== Feature Logging Parameters
+
+The feature logger transformer accepts the parameters described below.
+Examples on how to use them can be found in the <<ltr-examples>> section below.
+
+`store`::
++
+[%autowidth,frame=none]
+|===
+|No Re-Ranking |Optional |Default: `\_DEFAULT_`
+|Re-Ranking |Optional |Default: model feature store
+|===
++
+This parameter specifies the feature store to use for logging features.
++
+In a reranking query, the default feature store used is the model feature store (e.g. `[features]`).
+
+`logAll`::
++
+[%autowidth,frame=none]
+|===
+|No Re-Ranking |Default: `true`
+|===
++
+
++
+[%autowidth,frame=none]
+|===
+|Re-Ranking |Logger and Model have same feature store|Default: `false`
+|Re-Ranking |Logger and Model have different feature store |Default: `true`
+|===
++
+This parameter specifies the features to log.
++
+If set to `true` all the features from the feature store are printed.
+
+If set to `false` only the features used by the model are printed.
+
+CAUTION: When no re-ranking query is passed, only `logAll` = 'true' is supported. Passing `false` will cause a Solr exception.
+
+CAUTION: In a logging scenario where a re-ranking query is passed, if the logger `store` is different from the model `store`, only `logAll` = 'true' is supported. Passing `false` will cause a Solr exception.
+
+`format`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `dense`
+|===
++
+This parameter specifies the format to use for logging features. The supported values are: `dense` and `sparse`.
++
+You can change the default behavior to be sparse, putting `<str name="defaultFormat">sparse</str>` in the {solr-javadocs}/modules/ltr/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.html[feature logger transformer] declaration in `solrconfig.xml` as follows:
+
+[source,xml]
+----
+<transformer name="features" class="org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory">
+ <str name="fvCacheName">QUERY_DOC_FV</str>
+ <str name="defaultFormat">sparse</str>
+ <str name="csvKeyValueDelimiter">:</str>
+ <str name="csvFeatureSeparator"> </str>
+</transformer>
+----
+
=== Uploading a Model
To upload the model in a `/path/myModel.json` file, please run:
@@ -350,7 +580,7 @@ To rerank the results of a query, interleaving two models using a specific algor
[source,text]
http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr model=myModelA model=myModelB reRankDocs=100 interleavingAlgorithm=TeamDraft}&fl=id,score
-Currently the only (and default) algorithm supported is 'TeamDraft'.
+Currently, the only (and default) algorithm supported is 'TeamDraft'.
=== External Feature Information
@@ -401,11 +631,9 @@ http://localhost:8983/solr/techproducts/schema/feature-store/myEfiFeatureStore
]
----
-As an aside, you may have noticed that the `myEfiFeatures.json` example uses `"store":"myEfiFeatureStore"` attributes: read more about feature `store` in the <<LTR Lifecycle>> section of this page.
-
-==== Extracting Features
+==== Logging Features
-To extract `myEfiFeatureStore` features as part of a query, add `efi.*` parameters to the `[features]` part of the `fl` parameter, for example:
+To log `myEfiFeatureStore` features as part of a query, add `efi.*` parameters to the `[features]` part of the `fl` parameter, for example:
[source,text]
http://localhost:8983/solr/techproducts/query?q=test&fl=id,cat,manu,score,[features store=myEfiFeatureStore efi.text=test efi.preferredManufacturer=Apache efi.fromMobile=1]
@@ -463,75 +691,20 @@ http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr model=myEfiModel e
Notice the absence of `efi.*` parameters in the `[features]` part of the `fl` parameter.
-==== Extracting Features While Reranking
+==== Logging Features While Reranking
-To extract features for `myEfiFeatureStore` features while still reranking with `myModel`:
+To log features for `myEfiFeatureStore` features while still reranking with `myModel`:
[source,text]
http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr model=myModel}&fl=id,cat,manu,score,[features store=myEfiFeatureStore efi.text=test efi.preferredManufacturer=Apache efi.fromMobile=1]
Notice the absence of `efi.\*` parameters in the `rq` parameter (because `myModel` does not use `efi` feature) and the presence of `efi.*` parameters in the `[features]` part of the `fl` parameter (because `myEfiFeatureStore` contains `efi` features).
-Read more about model evolution in the <<LTR Lifecycle>> section of this page.
-
=== Training Example
Example training data and a demo `train_and_upload_demo_model.py` script can be found in the `solr/modules/ltr/example` folder in the https://gitbox.apache.org/repos/asf?p=solr.git;a=tree;f=solr/modules/ltr/example[Apache Solr Git repository] (mirrored on https://github.com/apache/solr/tree/releases/solr/{solr-full-version}/solr/modules/ltr/example[github.com]).
This example folder is not shipped in the Solr binary release.
-== Installation of LTR
-
-The ltr module requires the `modules/ltr/lib/solr-ltr-*.jar` JARs.
-
-== LTR Configuration
-
-Learning-To-Rank is a module and therefore its plugins must be configured in `solrconfig.xml`.
-
-=== Minimum Requirements
-
-* Include the required module JARs.
-Note that by default paths are relative to the Solr core so they may need adjustments to your configuration, or an explicit specification of the `$solr.install.dir`.
-+
-[source,xml]
-----
-<lib dir="${solr.install.dir:../../../..}/modules/ltr/lib/" regex=".*\.jar" />
-----
-
-* Declaration of the `ltr` query parser.
-+
-[source,xml]
-----
-<queryParser name="ltr" class="org.apache.solr.ltr.search.LTRQParserPlugin"/>
-----
-
-* Configuration of the feature values cache.
-+
-[source,xml]
-----
-<cache name="QUERY_DOC_FV"
- class="solr.search.CaffeineCache"
- size="4096"
- initialSize="2048"
- autowarmCount="4096"
- regenerator="solr.search.NoOpRegenerator" />
-----
-
-* Declaration of the `[features]` transformer.
-+
-[source,xml]
-----
-<transformer name="features" class="org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory">
- <str name="fvCacheName">QUERY_DOC_FV</str>
-</transformer>
-----
-
-* Declaration of the `[interleaving]` transformer.
-+
-[source,xml]
-----
-<transformer name="interleaving" class="org.apache.solr.ltr.response.transform.LTRInterleavingTransformerFactory"/>
-----
-
=== Advanced Options
==== LTRThreadModule
@@ -539,30 +712,14 @@ Note that by default paths are relative to the Solr core so they may need adjust
A thread module can be configured for the query parser and/or the transformer to parallelize the creation of feature weights.
For details, please refer to the {solr-javadocs}/modules/ltr/org/apache/solr/ltr/LTRThreadModule.html[LTRThreadModule] javadocs.
-==== Feature Vector Customization
+==== Models handling features' null values
+This feature is available only for {solr-javadocs}/modules/ltr/org/apache/solr/ltr/model/MultipleAdditiveTreesModel.html[MultipleAdditiveTreesModel].
-The features transformer returns dense CSV values such as `featureA=0.1,featureB=0.2,featureC=0.3,featureD=0.0`.
+In some scenarios a null value for a feature has a different meaning than a zero value. There are models that are trained to distinguish the two (e.g. https://xgboost.readthedocs.io/en/stable/faq.html#how-to-deal-with-missing-values), in Solr an additional `missing` branch parameter has been introduced to support this feature.
-For sparse CSV output such as `featureA:0.1 featureB:0.2 featureC:0.3` you can customize the {solr-javadocs}/modules/ltr/org/apache/solr/ltr/response/transform/LTRFeatureLoggerTransformerFactory.html[feature logger transformer] declaration in `solrconfig.xml` as follows:
+This defines the branch to follow when the corresponding feature value is null. With the default configuration a null and a zero value have the same meaning.
-[source,xml]
-----
-<transformer name="features" class="org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory">
- <str name="fvCacheName">QUERY_DOC_FV</str>
- <str name="defaultFormat">sparse</str>
- <str name="csvKeyValueDelimiter">:</str>
- <str name="csvFeatureSeparator"> </str>
-</transformer>
-----
-
-==== Models handling features' null values
-This feature is available only for {solr-javadocs}/modules/ltr/org/apache/solr/ltr/model/MultipleAdditiveTreesModel.html[MultipleAdditiveTreesModel].
-
-In some scenarios a null value for a feature has a different meaning than a zero value. There are models that are trained to distinguish the two (e.g. https://xgboost.readthedocs.io/en/stable/faq.html#how-to-deal-with-missing-values), in Solr an additional `"missing"` branch parameter has been introduced to support this feature.
-
-This defines the branch to follow when the corresponding feature value is null. With the default configuration a null and a zero value have the same meaning.
-
-To handle null values, the `"myFeatures.json"` file needs to be modified. A `"defaultValue"` parameter with a `"NaN"` value needs to be added to each feature that can assume a null value.
+To handle null values, the `myFeatures.json` file needs to be modified. A `defaultValue` parameter with a `NaN` value needs to be added to each feature that can assume a null value.
.Example: /path/myFeatures.json
[source,json]
@@ -588,9 +745,9 @@ To handle null values, the `"myFeatures.json"` file needs to be modified. A `"de
Also, the model configuration needs two additional parameter:
-* `"isNullSameAsZero"` needs to be defined in the model `"params"` and set to `"false"`;
+* `isNullSameAsZero` needs to be defined in the model `params` and set to `false`;
-* the `"missing"` parameter needs to be added to each branch where the corresponding feature supports null values. This can assume one value between `"left"` and `"right"`.
+* the `missing` parameter needs to be added to each branch where the corresponding feature supports null values. This can assume one value between `left` and `right`.
.Example: /path/myModel.json
[source,json]
@@ -632,15 +789,14 @@ Also, the model configuration needs two additional parameter:
----
-When isNullSameAsZero is `"false"` for your model, the feature vector changes.
+When `isNullSameAsZero` is `false` for your model, the feature vector changes.
* dense format: all features values are shown, also the default values which can be zero or null values.
* sparse format: only non default values are shown.
e.g.
-given the features defined in <<models-handling-features-null-values>>.
-If their values are `matchedTitle=0` and `productReviewScore=0`, the sparse format will return `productReviewScore:0` (0 is the default value of `matchedTitle=0` and therefore it is not returned, 0 is not the default value of `productReviewScore=0` and therefore it is returned).
+given the features defined before; if their values are `matchedTitle=0` and `productReviewScore=0`, the sparse format will return `productReviewScore:0` (0 is the default value of `matchedTitle=0` and therefore it is not returned, 0 is not the default value of `productReviewScore=0` and therefore it is returned).
==== Implementation and Contributions
@@ -658,123 +814,9 @@ Related links:
* https://cwiki.apache.org/confluence/display/solr/HowToContribute
* https://cwiki.apache.org/confluence/display/LUCENE/HowToContribute
-== LTR Lifecycle
-
-=== Feature Stores
-
-It is recommended that you organise all your features into stores which are akin to namespaces:
-
-* Features within a store must be named uniquely.
-* Across stores identical or similar features can share the same name.
-* If no store name is specified then the default `\_DEFAULT_` feature store will be used.
-
-To discover the names of all your feature stores:
-
-[source,text]
-http://localhost:8983/solr/techproducts/schema/feature-store
-
-To inspect the content of the `commonFeatureStore` feature store:
-
-[source,text]
-http://localhost:8983/solr/techproducts/schema/feature-store/commonFeatureStore
-
-=== Models
-
-* A model uses features from exactly one feature store.
-* If no store is specified then the default `\_DEFAULT_` feature store will be used.
-* A model need not use all the features defined in a feature store.
-* Multiple models can use the same feature store.
-
-To extract features for `currentFeatureStore` 's features:
-
-[source,text]
-http://localhost:8983/solr/techproducts/query?q=test&fl=id,score,[features store=currentFeatureStore]
-
-To extract features for `nextFeatureStore` features whilst reranking with `currentModel` based on `currentFeatureStore`:
-
-[source,text]
-http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr model=currentModel reRankDocs=100}&fl=id,score,[features store=nextFeatureStore]
-
-To view all models:
-
-[source,text]
-http://localhost:8983/solr/techproducts/schema/model-store
-
-To delete the `currentModel` model:
-
-[source,bash]
-----
-curl -XDELETE 'http://localhost:8983/solr/techproducts/schema/model-store/currentModel'
-----
-
-IMPORTANT: A feature store may be deleted only when there are no models using it.
-
-To delete the `currentFeatureStore` feature store:
-
-[source,bash]
-----
-curl -XDELETE 'http://localhost:8983/solr/techproducts/schema/feature-store/currentFeatureStore'
-----
-
-==== Using Large Models
-
-With SolrCloud, large models may fail to upload due to the limitation of ZooKeeper's buffer.
-In this case, `DefaultWrapperModel` may help you to separate the model definition from uploaded file.
-
-Assuming that you consider to use a large model placed at `/path/to/models/myModel.json` through `DefaultWrapperModel`.
-
-[source,json]
-----
-{
- "store" : "largeModelsFeatureStore",
- "name" : "myModel",
- "class" : ...,
- "features" : [
- ...
- ],
- "params" : {
- ...
- }
-}
-----
-
-First, add the directory to Solr's resource paths with a xref:configuration-guide:libs.adoc#lib-directives-in-solrconfig[`<lib/>` directive]:
+== LTR Examples
-[source,xml]
-----
- <lib dir="/path/to" regex="models" />
-----
-
-Then, configure `DefaultWrapperModel` to wrap `myModel.json`:
-
-[source,json]
-----
-{
- "store" : "largeModelsFeatureStore",
- "name" : "myWrapperModel",
- "class" : "org.apache.solr.ltr.model.DefaultWrapperModel",
- "params" : {
- "resource" : "myModel.json"
- }
-}
-----
-
-`myModel.json` will be loaded during the initialization and be able to use by specifying `model=myWrapperModel`.
-
-NOTE: No `"features"` are configured in `myWrapperModel` because the features of the wrapped model (`myModel`) will be used; also note that the `"store"` configured for the wrapper model must match that of the wrapped model i.e., in this example the feature store called `largeModelsFeatureStore` is used.
-
-CAUTION: `<lib dir="/path/to/models" regex=".*\.json" />` doesn't work as expected in this case, because `SolrResourceLoader` considers given resources as JAR if `<lib />` indicates files.
-
-As an alternative to the above-described `DefaultWrapperModel`, it is possible to xref:deployment-guide:zookeeper-ensemble.adoc#increasing-the-file-size-limit[increase ZooKeeper's file size limit].
-
-=== Applying Changes
-
-The feature store and the model store are both xref:configuration-guide:managed-resources.adoc[].
-Changes made to managed resources are not applied to the active Solr components until the Solr collection (or Solr core in single server mode) is reloaded.
-
-=== LTR Examples
-
-==== One Feature Store, Multiple Ranking Models
+=== One Feature Store, Multiple Ranking Models
* `leftModel` and `rightModel` both use features from `commonFeatureStore` and the only different between the two models is the weights attached to each feature.
* Conventions used:
@@ -857,7 +899,7 @@ Changes made to managed resources are not applied to the active Solr components
}
----
-==== Model Evolution
+=== Model Evolution
* `linearModel201701` uses features from `featureStore201701`
* `treesModel201702` uses features from `featureStore201702`
@@ -980,6 +1022,269 @@ Changes made to managed resources are not applied to the active Solr components
}
----
+=== Features Logging
+
+==== logAll parameter
+
+Suppose to have a complete feature store like:
+
+.Example: /path/completeFeaturesStore.json
+[source,json]
+----
+[
+ {
+ "store" : "completeFeaturesStore",
+ "name" : "documentRecency",
+ "class" : "org.apache.solr.ltr.feature.SolrFeature",
+ "params" : {
+ "q" : "{!func}recip( ms(NOW,last_modified), 3.16e-11, 1, 1)"
+ }
+ },
+ {
+ "store" : "completeFeaturesStore",
+ "name" : "isBook",
+ "class" : "org.apache.solr.ltr.feature.SolrFeature",
+ "params" : {
+ "fq": ["{!terms f=cat}book"]
+ }
+ },
+ {
+ "store" : "completeFeaturesStore",
+ "name" : "originalScore",
+ "class" : "org.apache.solr.ltr.feature.OriginalScoreFeature",
+ "params" : {}
+ }
+]
+----
+
+And suppose to have a simple linear model that uses just two of the `completeFeaturesStore` 's features:
+
+.Example: /path/linearModel.json
+[source,json]
+----
+{
+ "store" : "completeFeaturesStore",
+ "name" : "linearModel",
+ "class" : "org.apache.solr.ltr.model.LinearModel",
+ "features" : [
+ { "name" : "isBook" },
+ { "name" : "originalScore" }
+ ],
+ "params" : {
+ "weights" : {
+ "isBook" : 1.0,
+ "originalScore" : 0.5
+ }
+ }
+}
+----
+
+Making a logging + re-ranking query without defining the `store` and `logAll` parameters will print only the model features (default: `store=model store` and `logAll=false`).
+
+The query:
+[source,text]
+http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr model=linearModel reRankDocs=100}&fl=id,score,[features]
+
+The output:
+
+[source,json]
+----
+{
+ "responseHeader":{
+ "status":0,
+ "QTime":0,
+ "params":{
+ "q":"test",
+ "fl":"id,score,[features]",
+ "rq":"{!ltr model=linearModel reRankDocs=100}"}},
+ "response":{"numFound":2,"start":0,"maxScore":1.0005897,"docs":[
+ {
+ "id":"GB18030TEST",
+ "score":1.0005897,
+ "[features]":"isBook=0.0,originalScore=1.959392"},
+ {
+ "id":"UTF8TEST",
+ "score":0.79656565,
+ "[features]":"isBook=0.0,originalScore=1.5513437"}]
+ }}
+----
+
+Making a logging + re-ranking query without defining the `store` parameter and setting `logAll = true` will print all the features from the model store.
+
+The query:
+[source,text]
+http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr model=linearModel reRankDocs=100}&fl=id,score,[features logAll=true]
+
+The output:
+
+[source,json]
+----
+{
+ "responseHeader":{
+ "status":0,
+ "QTime":0,
+ "params":{
+ "q":"test",
+ "fl":"id,score,[features logAll=true]",
+ "rq":"{!ltr model=linearModel reRankDocs=100}"}},
+ "response":{"numFound":2,"start":0,"maxScore":1.0005897,"docs":[
+ {
+ "id":"GB18030TEST",
+ "score":1.0005897,
+ "[features]":"documentRecency=0.020893792,isBook=0.0,originalScore=1.959392"},
+ {
+ "id":"UTF8TEST",
+ "score":0.79656565,
+ "[features]":"documentRecency=0.020893792,isBook=0.0,originalScore=1.5513437"}]
+ }}
+----
+
+Suppose to have a different feature store like:
+
+.Example: /path/differentFeaturesStore.json
+[source,json]
+----
+[
+ {
+ "store": "differentFeaturesStore",
+ "name": "valueFeature1",
+ "class": "org.apache.solr.ltr.feature.FieldValueFeature",
+ "params": {
+ "field": "field1"
+ }
+ },
+ {
+ "store": "differentFeaturesStore",
+ "name": "valueFeature2",
+ "class": "org.apache.solr.ltr.feature.FieldValueFeature",
+ "params": {
+ "field": "field2"
+ }
+ }
+]
+----
+
+Making a logging + re-ranking query defining a `store` parameter different from the model store without defining the `logAll` parameter will print all the features from the selected feature store (default: `logAll=true`).
+
+The query:
+[source,text]
+http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr model=linearModel reRankDocs=100}&fl=id,score,[features store=differentFeaturesStore]
+
+The output:
+
+[source,json]
+----
+{
+ "responseHeader":{
+ "status":0,
+ "QTime":0,
+ "params":{
+ "q":"test",
+ "fl":"id,score,[features store=differentFeaturesStore]",
+ "rq":"{!ltr model=linearModel reRankDocs=100}"}},
+ "response":{"numFound":2,"start":0,"maxScore":1.0005897,"docs":[
+ {
+ "id":"GB18030TEST",
+ "score":1.0005897,
+ "[features]":"valueFeature1=0.1,valueFeature2=2.0"},
+ {
+ "id":"UTF8TEST",
+ "score":0.79656565,
+ "[features]":"valueFeature1=1.3,valueFeature2=4.0"}]
+ }}
+----
+
+==== format parameter
+
+Suppose to have a feature store like:
+
+.Example: /path/myFeaturesStore.json
+[source,json]
+----
+[
+ {
+ "store": "myFeaturesStore",
+ "name": "featureA",
+ "class": "org.apache.solr.ltr.feature.FieldValueFeature",
+ "params": {
+ "field": "field1"
+ }
+ },
+ {
+ "store": "myFeaturesStore",
+ "name": "featureB",
+ "class": "org.apache.solr.ltr.feature.FieldValueFeature",
+ "params": {
+ "field": "field2"
+ }
+ },
+ {
+ "store": "myFeaturesStore",
+ "name": "featureC",
+ "class": "org.apache.solr.ltr.feature.FieldValueFeature",
+ "params": {
+ "field": "field3"
+ }
+ }
+]
+----
+
+To return dense CSV values such as: `featureA=0.1,featureB=0.2,featureC=0.0`, pass the `format=dense` parameter to the feature logger transformer:
+
+[source,text]
+http://localhost:8983/solr/techproducts/query?q=test&fl=id,score,[features store=myFeaturesStore format=dense]
+
+The output:
+
+[source,json]
+----
+{
+ "responseHeader":{
+ "status":0,
+ "QTime":0,
+ "params":{
+ "q":"test",
+ "fl":"id,score,[features store=myFeaturesStore format=dense]"}},
+ "response":{"numFound":2,"start":0,"maxScore":1.0005897,"docs":[
+ {
+ "id":"GB18030TEST",
+ "score":1.0005897,
+ "[features]":"featureA=0.1,featureB=0.2,featureC=0.0"},
+ {
+ "id":"UTF8TEST",
+ "score":0.79656565,
+ "[features]":"featureA=1.3,featureB=0.0,featureC=2.1"}]
+ }}
+----
+
+To return sparse CSV values such as: `featureA=0.1,featureB=0.2`, pass the `format=sparse` parameter to the feature logger transformer:
+
+[source,text]
+http://localhost:8983/solr/techproducts/query?q=test&fl=id,score,[features store=myFeaturesStore format=sparse]
+
+The output:
+
+[source,json]
+----
+{
+ "responseHeader":{
+ "status":0,
+ "QTime":0,
+ "params":{
+ "q":"test",
+ "fl":"id,score,[features store=myFeaturesStore format=sparse]"}},
+ "response":{"numFound":2,"start":0,"maxScore":1.0005897,"docs":[
+ {
+ "id":"GB18030TEST",
+ "score":1.0005897,
+ "[features]":"featureA=0.1,featureB=0.2"},
+ {
+ "id":"UTF8TEST",
+ "score":0.79656565,
+ "[features]":"featureA=1.3,featureC=2.1"}]
+ }}
+----
+
== Additional LTR Resources
* "Learning to Rank in Solr" presentation at Lucene/Solr Revolution 2015 in Austin: