You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ab...@apache.org on 2023/01/04 21:18:32 UTC
[solr] branch branch_9_1 updated: SOLR-16567: KnnQueryParser support for both pre-filters and post-filter (#1245)
This is an automated email from the ASF dual-hosted git repository.
abenedetti pushed a commit to branch branch_9_1
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9_1 by this push:
new d5811d8184b SOLR-16567: KnnQueryParser support for both pre-filters and post-filter (#1245)
d5811d8184b is described below
commit d5811d8184bb5e4c304789710f6aa2cbf06380f8
Author: Alessandro Benedetti <a....@sease.io>
AuthorDate: Wed Jan 4 22:08:47 2023 +0100
SOLR-16567: KnnQueryParser support for both pre-filters and post-filter (#1245)
* SOLR-16567: KnnQueryParser support for both pre-filters and post-filters(cost>0)
---
solr/CHANGES.txt | 2 +
.../apache/solr/handler/MoreLikeThisHandler.java | 2 +-
.../solr/handler/component/QueryComponent.java | 2 +-
.../handler/component/RealTimeGetComponent.java | 4 +-
.../java/org/apache/solr/search/QueryUtils.java | 8 +--
.../org/apache/solr/search/neural/KnnQParser.java | 33 +++++--------
.../apache/solr/search/neural/KnnQParserTest.java | 57 ++++++++++++++++++++++
.../org/apache/solr/handler/AnalyticsHandler.java | 2 +-
.../query-guide/pages/dense-vector-search.adoc | 7 +++
9 files changed, 85 insertions(+), 32 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 8bfbccc7c84..6e7c8606d79 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -22,6 +22,8 @@ Other Changes
* SOLR-16598: Upgrade Protobuf to 3.21.12 (David Smiley)
+* SOLR-16567: Fixed problem with filtering and KNN search, especially when using post-filters (Alessandro Benedetti)
+
================== 9.1.0 ==================
New Features
diff --git a/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java b/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
index 84c30320425..08277ffb8a2 100644
--- a/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
@@ -119,7 +119,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase {
sortSpec = parser.getSortSpec(true);
}
- filters = QueryUtils.parseFilterQueries(req, false);
+ filters = QueryUtils.parseFilterQueries(req);
} catch (SyntaxError e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index d04a1c3c279..eba85c48a4c 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -204,7 +204,7 @@ public class QueryComponent extends SearchComponent {
List<Query> filters = rb.getFilters();
// if filters already exists, make a copy instead of modifying the original
filters = filters == null ? new ArrayList<>(fqs.length) : new ArrayList<>(filters);
- filters.addAll(QueryUtils.parseFilterQueries(req, false));
+ filters.addAll(QueryUtils.parseFilterQueries(req));
// only set the filters if they are not empty otherwise
// fq=&someotherParam= will trigger all docs filter for every request
diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
index 070e0166b20..4384bfe670b 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
@@ -19,6 +19,7 @@ package org.apache.solr.handler.component;
import static org.apache.solr.common.params.CommonParams.DISTRIB;
import static org.apache.solr.common.params.CommonParams.ID;
import static org.apache.solr.common.params.CommonParams.VERSION_FIELD;
+import static org.apache.solr.search.QueryUtils.makeQueryable;
import com.google.common.collect.Lists;
import java.io.IOException;
@@ -205,7 +206,7 @@ public class RealTimeGetComponent extends SearchComponent {
List<Query> filters = rb.getFilters();
// if filters already exists, make a copy instead of modifying the original
filters = filters == null ? new ArrayList<>(fqs.length) : new ArrayList<>(filters);
- filters.addAll(QueryUtils.parseFilterQueries(req, true));
+ filters.addAll(QueryUtils.parseFilterQueries(req));
if (!filters.isEmpty()) {
rb.setFilters(filters);
}
@@ -326,6 +327,7 @@ public class RealTimeGetComponent extends SearchComponent {
if (rb.getFilters() != null) {
for (Query raw : rb.getFilters()) {
+ raw = makeQueryable(raw);
Query q = raw.rewrite(searcherInfo.getSearcher().getIndexReader());
Scorer scorer =
searcherInfo
diff --git a/solr/core/src/java/org/apache/solr/search/QueryUtils.java b/solr/core/src/java/org/apache/solr/search/QueryUtils.java
index 2709d9d6b7a..9234b34fac2 100644
--- a/solr/core/src/java/org/apache/solr/search/QueryUtils.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryUtils.java
@@ -226,14 +226,11 @@ public class QueryUtils {
* Parse the filter queries in Solr request
*
* @param req Solr request
- * @param fixNegativeQueries if true, negative queries are rewritten by adding a MatchAllDocs
- * query clause
* @return and array of Query. If the request does not contain filter queries, returns an empty
* list.
* @throws SyntaxError if an error occurs during parsing
*/
- public static List<Query> parseFilterQueries(SolrQueryRequest req, boolean fixNegativeQueries)
- throws SyntaxError {
+ public static List<Query> parseFilterQueries(SolrQueryRequest req) throws SyntaxError {
String[] filterQueriesStr = req.getParams().getParams(CommonParams.FQ);
@@ -244,9 +241,6 @@ public class QueryUtils {
QParser fqp = QParser.getParser(fq, req);
fqp.setIsFilter(true);
Query query = fqp.getQuery();
- if (fixNegativeQueries) {
- query = makeQueryable(query);
- }
filters.add(query);
}
}
diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
index 7255562d8fe..5eb6fc1d0f4 100644
--- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
@@ -16,10 +16,9 @@
*/
package org.apache.solr.search.neural;
+import java.io.IOException;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
@@ -31,6 +30,7 @@ import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.QueryUtils;
+import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
public class KnnQParser extends QParser {
@@ -53,7 +53,7 @@ public class KnnQParser extends QParser {
}
@Override
- public Query parse() {
+ public Query parse() throws SyntaxError {
String denseVectorField = localParams.get(QueryParsing.F);
String vectorToSearch = localParams.get(QueryParsing.V);
int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K);
@@ -83,31 +83,21 @@ public class KnnQParser extends QParser {
schemaField.getName(), parsedVectorToSearch, topK, getFilterQuery());
}
- private Query getFilterQuery() throws SolrException {
- if (!isFilter()) {
+ private Query getFilterQuery() throws SolrException, SyntaxError {
+ boolean isSubQuery = recurseCount != 0;
+ if (!isFilter() && !isSubQuery) {
String[] filterQueries = req.getParams().getParams(CommonParams.FQ);
if (filterQueries != null && filterQueries.length != 0) {
- List<Query> filters;
-
try {
- filters = QueryUtils.parseFilterQueries(req, true);
- } catch (SyntaxError e) {
+ List<Query> filters = QueryUtils.parseFilterQueries(req);
+ SolrIndexSearcher.ProcessedFilter processedFilter =
+ req.getSearcher().getProcessedFilter(filters);
+ return processedFilter.filter;
+ } catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
-
- if (filters.size() == 1) {
- return filters.get(0);
- }
-
- BooleanQuery.Builder builder = new BooleanQuery.Builder();
- for (Query query : filters) {
- builder.add(query, BooleanClause.Occur.FILTER);
- }
-
- return builder.build();
}
}
-
return null;
}
@@ -118,6 +108,7 @@ public class KnnQParser extends QParser {
* @return a float array
*/
private static float[] parseVector(String value, int dimension) {
+
if (!value.startsWith("[") || !value.endsWith("]")) {
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST,
diff --git a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
index ef27333832d..c22d9dd2cbe 100644
--- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
@@ -288,6 +288,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
"//result/doc[10]/str[@name='id'][.='8']");
}
+ @Test
public void knnQueryUsedInFilter_shouldFilterResultsBeforeTheQueryExecution() {
String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
assertQ(
@@ -303,6 +304,23 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
"//result/doc[2]/str[@name='id'][.='4']");
}
+ @Test
+ public void knnQueryUsedInFilters_shouldFilterResultsBeforeTheQueryExecution() {
+ String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+ assertQ(
+ req(
+ CommonParams.Q,
+ "id:(3 4 9 2)",
+ "fq",
+ "{!knn f=vector topK=4}" + vectorToSearch,
+ "fq",
+ "id:(4 20)",
+ "fl",
+ "id"),
+ "//result[@numFound='1']",
+ "//result/doc[1]/str[@name='id'][.='4']");
+ }
+
@Test
public void knnQueryWithFilterQuery_shouldPerformKnnSearchInPreFilteredResults() {
String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
@@ -335,6 +353,45 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
"//result/doc[4]/str[@name='id'][.='9']");
}
+ @Test
+ public void knnQueryWithCostlyFq_shouldPerformKnnSearchWithPostFilter() {
+ String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+ assertQ(
+ req(
+ CommonParams.Q,
+ "{!knn f=vector topK=10}" + vectorToSearch,
+ "fq",
+ "{!frange cache=false l=0.99}$q",
+ "fl",
+ "*,score"),
+ "//result[@numFound='5']",
+ "//result/doc[1]/str[@name='id'][.='1']",
+ "//result/doc[2]/str[@name='id'][.='4']",
+ "//result/doc[3]/str[@name='id'][.='2']",
+ "//result/doc[4]/str[@name='id'][.='10']",
+ "//result/doc[5]/str[@name='id'][.='3']");
+ }
+
+ @Test
+ public void knnQueryWithFilterQueries_shouldPerformKnnSearchWithPreFiltersAndPostFilters() {
+ String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+ assertQ(
+ req(
+ CommonParams.Q,
+ "{!knn f=vector topK=4}" + vectorToSearch,
+ "fq",
+ "id:(3 4 9 2)",
+ "fq",
+ "{!frange cache=false l=0.99}$q",
+ "fl",
+ "id"),
+ "//result[@numFound='2']",
+ "//result/doc[1]/str[@name='id'][.='4']",
+ "//result/doc[2]/str[@name='id'][.='2']");
+ }
+
@Test
public void knnQueryWithNegativeFilterQuery_shouldPerformKnnSearchInPreFilteredResults() {
String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
diff --git a/solr/modules/analytics/src/java/org/apache/solr/handler/AnalyticsHandler.java b/solr/modules/analytics/src/java/org/apache/solr/handler/AnalyticsHandler.java
index 6da7ca79ddf..0b080f14a5f 100644
--- a/solr/modules/analytics/src/java/org/apache/solr/handler/AnalyticsHandler.java
+++ b/solr/modules/analytics/src/java/org/apache/solr/handler/AnalyticsHandler.java
@@ -130,7 +130,7 @@ public class AnalyticsHandler extends RequestHandlerBase
queries.add(query);
// Filter Params
- queries.addAll(QueryUtils.parseFilterQueries(req, false));
+ queries.addAll(QueryUtils.parseFilterQueries(req));
return req.getSearcher().getDocSet(queries);
}
diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
index 1484ab84ed7..b9e6106ffec 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
@@ -287,6 +287,13 @@ In
&q={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0]&fq=id:(1 2 3)
The results are prefiltered by the fq=id:(1 2 3) and then only the documents from this subset are considered as candidates for the topK knn retrieval.
+
+If you want to run some of the filter queries as post-filters you can follow the standard approach for post-filtering in Apache Solr, using the cache and cost local parameters.
+
+e.g.
+
+[source,text]
+&q={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0]&fq={!frange cache=false l=0.99}$q
====