You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ab...@apache.org on 2023/01/04 21:18:32 UTC

[solr] branch branch_9_1 updated: SOLR-16567: KnnQueryParser support for both pre-filters and post-filter (#1245)

This is an automated email from the ASF dual-hosted git repository.

abenedetti pushed a commit to branch branch_9_1
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9_1 by this push:
     new d5811d8184b SOLR-16567: KnnQueryParser support for both pre-filters and post-filter (#1245)
d5811d8184b is described below

commit d5811d8184bb5e4c304789710f6aa2cbf06380f8
Author: Alessandro Benedetti <a....@sease.io>
AuthorDate: Wed Jan 4 22:08:47 2023 +0100

    SOLR-16567: KnnQueryParser support for both pre-filters and post-filter (#1245)
    
    * SOLR-16567: KnnQueryParser support for both pre-filters and post-filters(cost>0)
---
 solr/CHANGES.txt                                   |  2 +
 .../apache/solr/handler/MoreLikeThisHandler.java   |  2 +-
 .../solr/handler/component/QueryComponent.java     |  2 +-
 .../handler/component/RealTimeGetComponent.java    |  4 +-
 .../java/org/apache/solr/search/QueryUtils.java    |  8 +--
 .../org/apache/solr/search/neural/KnnQParser.java  | 33 +++++--------
 .../apache/solr/search/neural/KnnQParserTest.java  | 57 ++++++++++++++++++++++
 .../org/apache/solr/handler/AnalyticsHandler.java  |  2 +-
 .../query-guide/pages/dense-vector-search.adoc     |  7 +++
 9 files changed, 85 insertions(+), 32 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 8bfbccc7c84..6e7c8606d79 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -22,6 +22,8 @@ Other Changes
 
 * SOLR-16598: Upgrade Protobuf to 3.21.12 (David Smiley)
 
+* SOLR-16567: Fixed problem with filtering and KNN search, especially when using post-filters (Alessandro Benedetti)
+
 ==================  9.1.0 ==================
 
 New Features
diff --git a/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java b/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
index 84c30320425..08277ffb8a2 100644
--- a/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
@@ -119,7 +119,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase {
           sortSpec = parser.getSortSpec(true);
         }
 
-        filters = QueryUtils.parseFilterQueries(req, false);
+        filters = QueryUtils.parseFilterQueries(req);
       } catch (SyntaxError e) {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
       }
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index d04a1c3c279..eba85c48a4c 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -204,7 +204,7 @@ public class QueryComponent extends SearchComponent {
         List<Query> filters = rb.getFilters();
         // if filters already exists, make a copy instead of modifying the original
         filters = filters == null ? new ArrayList<>(fqs.length) : new ArrayList<>(filters);
-        filters.addAll(QueryUtils.parseFilterQueries(req, false));
+        filters.addAll(QueryUtils.parseFilterQueries(req));
 
         // only set the filters if they are not empty otherwise
         // fq=&someotherParam= will trigger all docs filter for every request
diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
index 070e0166b20..4384bfe670b 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
@@ -19,6 +19,7 @@ package org.apache.solr.handler.component;
 import static org.apache.solr.common.params.CommonParams.DISTRIB;
 import static org.apache.solr.common.params.CommonParams.ID;
 import static org.apache.solr.common.params.CommonParams.VERSION_FIELD;
+import static org.apache.solr.search.QueryUtils.makeQueryable;
 
 import com.google.common.collect.Lists;
 import java.io.IOException;
@@ -205,7 +206,7 @@ public class RealTimeGetComponent extends SearchComponent {
         List<Query> filters = rb.getFilters();
         // if filters already exists, make a copy instead of modifying the original
         filters = filters == null ? new ArrayList<>(fqs.length) : new ArrayList<>(filters);
-        filters.addAll(QueryUtils.parseFilterQueries(req, true));
+        filters.addAll(QueryUtils.parseFilterQueries(req));
         if (!filters.isEmpty()) {
           rb.setFilters(filters);
         }
@@ -326,6 +327,7 @@ public class RealTimeGetComponent extends SearchComponent {
 
           if (rb.getFilters() != null) {
             for (Query raw : rb.getFilters()) {
+              raw = makeQueryable(raw);
               Query q = raw.rewrite(searcherInfo.getSearcher().getIndexReader());
               Scorer scorer =
                   searcherInfo
diff --git a/solr/core/src/java/org/apache/solr/search/QueryUtils.java b/solr/core/src/java/org/apache/solr/search/QueryUtils.java
index 2709d9d6b7a..9234b34fac2 100644
--- a/solr/core/src/java/org/apache/solr/search/QueryUtils.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryUtils.java
@@ -226,14 +226,11 @@ public class QueryUtils {
    * Parse the filter queries in Solr request
    *
    * @param req Solr request
-   * @param fixNegativeQueries if true, negative queries are rewritten by adding a MatchAllDocs
-   *     query clause
    * @return and array of Query. If the request does not contain filter queries, returns an empty
    *     list.
    * @throws SyntaxError if an error occurs during parsing
    */
-  public static List<Query> parseFilterQueries(SolrQueryRequest req, boolean fixNegativeQueries)
-      throws SyntaxError {
+  public static List<Query> parseFilterQueries(SolrQueryRequest req) throws SyntaxError {
 
     String[] filterQueriesStr = req.getParams().getParams(CommonParams.FQ);
 
@@ -244,9 +241,6 @@ public class QueryUtils {
           QParser fqp = QParser.getParser(fq, req);
           fqp.setIsFilter(true);
           Query query = fqp.getQuery();
-          if (fixNegativeQueries) {
-            query = makeQueryable(query);
-          }
           filters.add(query);
         }
       }
diff --git a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
index 7255562d8fe..5eb6fc1d0f4 100644
--- a/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/neural/KnnQParser.java
@@ -16,10 +16,9 @@
  */
 package org.apache.solr.search.neural;
 
+import java.io.IOException;
 import java.util.List;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Query;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.CommonParams;
@@ -31,6 +30,7 @@ import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.QueryParsing;
 import org.apache.solr.search.QueryUtils;
+import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.search.SyntaxError;
 
 public class KnnQParser extends QParser {
@@ -53,7 +53,7 @@ public class KnnQParser extends QParser {
   }
 
   @Override
-  public Query parse() {
+  public Query parse() throws SyntaxError {
     String denseVectorField = localParams.get(QueryParsing.F);
     String vectorToSearch = localParams.get(QueryParsing.V);
     int topK = localParams.getInt(TOP_K, DEFAULT_TOP_K);
@@ -83,31 +83,21 @@ public class KnnQParser extends QParser {
         schemaField.getName(), parsedVectorToSearch, topK, getFilterQuery());
   }
 
-  private Query getFilterQuery() throws SolrException {
-    if (!isFilter()) {
+  private Query getFilterQuery() throws SolrException, SyntaxError {
+    boolean isSubQuery = recurseCount != 0;
+    if (!isFilter() && !isSubQuery) {
       String[] filterQueries = req.getParams().getParams(CommonParams.FQ);
       if (filterQueries != null && filterQueries.length != 0) {
-        List<Query> filters;
-
         try {
-          filters = QueryUtils.parseFilterQueries(req, true);
-        } catch (SyntaxError e) {
+          List<Query> filters = QueryUtils.parseFilterQueries(req);
+          SolrIndexSearcher.ProcessedFilter processedFilter =
+              req.getSearcher().getProcessedFilter(filters);
+          return processedFilter.filter;
+        } catch (IOException e) {
           throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
         }
-
-        if (filters.size() == 1) {
-          return filters.get(0);
-        }
-
-        BooleanQuery.Builder builder = new BooleanQuery.Builder();
-        for (Query query : filters) {
-          builder.add(query, BooleanClause.Occur.FILTER);
-        }
-
-        return builder.build();
       }
     }
-
     return null;
   }
 
@@ -118,6 +108,7 @@ public class KnnQParser extends QParser {
    * @return a float array
    */
   private static float[] parseVector(String value, int dimension) {
+
     if (!value.startsWith("[") || !value.endsWith("]")) {
       throw new SolrException(
           SolrException.ErrorCode.BAD_REQUEST,
diff --git a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
index ef27333832d..c22d9dd2cbe 100644
--- a/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/neural/KnnQParserTest.java
@@ -288,6 +288,7 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
         "//result/doc[10]/str[@name='id'][.='8']");
   }
 
+  @Test
   public void knnQueryUsedInFilter_shouldFilterResultsBeforeTheQueryExecution() {
     String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
     assertQ(
@@ -303,6 +304,23 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
         "//result/doc[2]/str[@name='id'][.='4']");
   }
 
+  @Test
+  public void knnQueryUsedInFilters_shouldFilterResultsBeforeTheQueryExecution() {
+    String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+    assertQ(
+        req(
+            CommonParams.Q,
+            "id:(3 4 9 2)",
+            "fq",
+            "{!knn f=vector topK=4}" + vectorToSearch,
+            "fq",
+            "id:(4 20)",
+            "fl",
+            "id"),
+        "//result[@numFound='1']",
+        "//result/doc[1]/str[@name='id'][.='4']");
+  }
+
   @Test
   public void knnQueryWithFilterQuery_shouldPerformKnnSearchInPreFilteredResults() {
     String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
@@ -335,6 +353,45 @@ public class KnnQParserTest extends SolrTestCaseJ4 {
         "//result/doc[4]/str[@name='id'][.='9']");
   }
 
+  @Test
+  public void knnQueryWithCostlyFq_shouldPerformKnnSearchWithPostFilter() {
+    String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+    assertQ(
+        req(
+            CommonParams.Q,
+            "{!knn f=vector topK=10}" + vectorToSearch,
+            "fq",
+            "{!frange cache=false l=0.99}$q",
+            "fl",
+            "*,score"),
+        "//result[@numFound='5']",
+        "//result/doc[1]/str[@name='id'][.='1']",
+        "//result/doc[2]/str[@name='id'][.='4']",
+        "//result/doc[3]/str[@name='id'][.='2']",
+        "//result/doc[4]/str[@name='id'][.='10']",
+        "//result/doc[5]/str[@name='id'][.='3']");
+  }
+
+  @Test
+  public void knnQueryWithFilterQueries_shouldPerformKnnSearchWithPreFiltersAndPostFilters() {
+    String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
+
+    assertQ(
+        req(
+            CommonParams.Q,
+            "{!knn f=vector topK=4}" + vectorToSearch,
+            "fq",
+            "id:(3 4 9 2)",
+            "fq",
+            "{!frange cache=false l=0.99}$q",
+            "fl",
+            "id"),
+        "//result[@numFound='2']",
+        "//result/doc[1]/str[@name='id'][.='4']",
+        "//result/doc[2]/str[@name='id'][.='2']");
+  }
+
   @Test
   public void knnQueryWithNegativeFilterQuery_shouldPerformKnnSearchInPreFilteredResults() {
     String vectorToSearch = "[1.0, 2.0, 3.0, 4.0]";
diff --git a/solr/modules/analytics/src/java/org/apache/solr/handler/AnalyticsHandler.java b/solr/modules/analytics/src/java/org/apache/solr/handler/AnalyticsHandler.java
index 6da7ca79ddf..0b080f14a5f 100644
--- a/solr/modules/analytics/src/java/org/apache/solr/handler/AnalyticsHandler.java
+++ b/solr/modules/analytics/src/java/org/apache/solr/handler/AnalyticsHandler.java
@@ -130,7 +130,7 @@ public class AnalyticsHandler extends RequestHandlerBase
     queries.add(query);
 
     // Filter Params
-    queries.addAll(QueryUtils.parseFilterQueries(req, false));
+    queries.addAll(QueryUtils.parseFilterQueries(req));
     return req.getSearcher().getDocSet(queries);
   }
 
diff --git a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
index 1484ab84ed7..b9e6106ffec 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc
@@ -287,6 +287,13 @@ In
 &q={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0]&fq=id:(1 2 3)
 
 The results are prefiltered by the fq=id:(1 2 3) and then only the documents from this subset are considered as candidates for the topK knn retrieval.
+
+If you want to run some of the filter queries as post-filters you can follow the standard approach for post-filtering in Apache Solr, using the cache and cost local parameters.
+
+e.g.
+
+[source,text]
+&q={!knn f=vector topK=10}[1.0, 2.0, 3.0, 4.0]&fq={!frange cache=false l=0.99}$q
 ====