You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gu...@apache.org on 2021/02/21 16:49:51 UTC

[lucene-solr] branch master updated: SOLR-14787 - Adding support to use inequalities to the payload check query parser. (#1954)

This is an automated email from the ASF dual-hosted git repository.

gus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new b298d7f  SOLR-14787 - Adding support to use inequalities to the payload check query parser. (#1954)
b298d7f is described below

commit b298d7fb160a49f552dc3987b83aa53601c7b29a
Author: Kevin Watters <kw...@kmwllc.com>
AuthorDate: Sun Feb 21 11:49:36 2021 -0500

    SOLR-14787 - Adding support to use inequalities to the payload check query parser. (#1954)
---
 .../queries/payloads/TestPayloadCheckQuery.java    |  0
 .../solr/search/PayloadCheckQParserPlugin.java     | 17 +++++++-
 .../solr/search/TestPayloadCheckQParserPlugin.java | 29 ++++++++++++--
 solr/solr-ref-guide/src/other-parsers.adoc         | 45 ++++++++++++++--------
 4 files changed, 68 insertions(+), 23 deletions(-)

diff --git a/lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java b/lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java
old mode 100755
new mode 100644
diff --git a/solr/core/src/java/org/apache/solr/search/PayloadCheckQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/PayloadCheckQParserPlugin.java
index e5702bf..a4a3db5 100644
--- a/solr/core/src/java/org/apache/solr/search/PayloadCheckQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/PayloadCheckQParserPlugin.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Locale;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.payloads.FloatEncoder;
@@ -28,6 +29,8 @@ import org.apache.lucene.analysis.payloads.IdentityEncoder;
 import org.apache.lucene.analysis.payloads.IntegerEncoder;
 import org.apache.lucene.analysis.payloads.PayloadEncoder;
 import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery;
+import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.MatchOperation;
+import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.PayloadType;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.util.BytesRef;
@@ -53,6 +56,13 @@ public class PayloadCheckQParserPlugin extends QParserPlugin {
         String field = localParams.get(QueryParsing.F);
         String value = localParams.get(QueryParsing.V);
         String p = localParams.get("payloads");
+        // payloads and op parameter are probably mutually exclusive. we could consider making a different query
+        // not a span payload check query, but something that just operates on payloads without the span?
+        String strOp = localParams.get("op");
+        MatchOperation op = MatchOperation.EQ;
+        if (strOp != null) {
+          op = MatchOperation.valueOf(strOp.toUpperCase(Locale.ROOT));
+        }
 
         if (field == null) {
           throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'f' not specified");
@@ -81,12 +91,16 @@ public class PayloadCheckQParserPlugin extends QParserPlugin {
 
         PayloadEncoder encoder = null;
         String e = PayloadUtils.getPayloadEncoder(ft);
+        PayloadType payloadType = null;
         if ("float".equals(e)) {    // TODO: centralize this string->PayloadEncoder logic (see DelimitedPayloadTokenFilterFactory)
           encoder = new FloatEncoder();
+          payloadType = PayloadType.FLOAT;
         } else if ("integer".equals(e)) {
           encoder = new IntegerEncoder();
+          payloadType = PayloadType.INT;
         } else if ("identity".equals(e)) {
           encoder = new IdentityEncoder();
+          payloadType = PayloadType.STRING;
         }
 
         if (encoder == null) {
@@ -99,8 +113,7 @@ public class PayloadCheckQParserPlugin extends QParserPlugin {
           if (rawPayload.length() > 0)
             payloads.add(encoder.encode(rawPayload.toCharArray()));
         }
-
-        return new SpanPayloadCheckQuery(query, payloads);
+        return new SpanPayloadCheckQuery(query, payloads, payloadType, op);
       }
     };
 
diff --git a/solr/core/src/test/org/apache/solr/search/TestPayloadCheckQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestPayloadCheckQParserPlugin.java
index 14bd833..f619320 100644
--- a/solr/core/src/test/org/apache/solr/search/TestPayloadCheckQParserPlugin.java
+++ b/solr/core/src/test/org/apache/solr/search/TestPayloadCheckQParserPlugin.java
@@ -46,20 +46,41 @@ public class TestPayloadCheckQParserPlugin extends SolrTestCaseJ4 {
         "{!payload_check f=vals_dpi payloads='1 2'}A B",
         // "{!payload_check f=vals_dpi payloads='1 2.0'}A B",  // ideally this should pass, but IntegerEncoder can't handle "2.0"
         "{!payload_check f=vals_dpi payloads='1 2 3'}A B C",
-
         "{!payload_check f=vals_dpf payloads='1 2'}one two",
+        "{!payload_check f=vals_dpf payloads='1 2' op='eq'}one two",
         "{!payload_check f=vals_dpf payloads='1 2.0'}one two", // shows that FloatEncoder can handle "1"
-
-        "{!payload_check f=vals_dps payloads='NOUN VERB'}cat jumped"
+        "{!payload_check f=vals_dps payloads='NOUN VERB'}cat jumped",
+        "{!payload_check f=vals_dpf payloads='0.75' op='gt'}one",
+        "{!payload_check f=vals_dpf payloads='0.75 1.5' op='gt'}one two",
+        "{!payload_check f=vals_dpf payloads='1.25' op='lt'}one", // inequality on float lt
+        "{!payload_check f=vals_dpf payloads='1.0' op='lte'}one", // inequality on float lte
+        "{!payload_check f=vals_dpf payloads='0.75' op='gt'}one", // inequality on float gt
+        "{!payload_check f=vals_dpf payloads='1.0' op='gte'}one", // inequality on float gte
+        "{!payload_check f=vals_dpi payloads='2' op='lt'}A", // inequality on int lt
+        "{!payload_check f=vals_dpi payloads='1' op='lte'}A", // inequality on int lte
+        "{!payload_check f=vals_dpi payloads='0' op='gt'}A", // inequality on int gt
+        "{!payload_check f=vals_dpi payloads='1' op='gte'}A" // inequality on int gte
     };
 
     String[] should_not_matches = new String[] {
+        "{!payload_check f=vals_dpf payloads='0.75' op='gt'}one two", // too few payloads
+        "{!payload_check f=vals_dpf payloads='0.75 1.5 2.0' op='gt'}one two", // too many payloads
         "{!payload_check f=vals_dpi v=A payloads=2}",
         "{!payload_check f=vals_dpi payloads='1 2'}B C",
         "{!payload_check f=vals_dpi payloads='1 2 3'}A B",
         "{!payload_check f=vals_dpi payloads='1 2'}A B C",
         "{!payload_check f=vals_dpf payloads='1 2.0'}two three",
-        "{!payload_check f=vals_dps payloads='VERB NOUN'}cat jumped"
+        "{!payload_check f=vals_dps payloads='VERB NOUN'}cat jumped",
+        "{!payload_check f=vals_dpf payloads='1.25' op='gt'}one",
+        "{!payload_check f=vals_dpf payloads='0.75 3' op='gt'}one two",
+        "{!payload_check f=vals_dpf payloads='1.0' op='lt'}one", // inequality on float lt
+        "{!payload_check f=vals_dpf payloads='0.75' op='lte'}one", // inequality on float lte
+        "{!payload_check f=vals_dpf payloads='1.0' op='gt'}one", // inequality on float gt
+        "{!payload_check f=vals_dpf payloads='1.25' op='gte'}one", // inequality on float gte
+        "{!payload_check f=vals_dpi payloads='1' op='lt'}A", // inequality on int lt
+        "{!payload_check f=vals_dpi payloads='0' op='lte'}A", // inequality on int lte
+        "{!payload_check f=vals_dpi payloads='1' op='gt'}A", // inequality on int gt
+        "{!payload_check f=vals_dpi payloads='2' op='gte'}A" // inequality on int gte
     };
 
     for(String should_match : should_matches) {
diff --git a/solr/solr-ref-guide/src/other-parsers.adoc b/solr/solr-ref-guide/src/other-parsers.adoc
index e714f12..5c96dbf 100644
--- a/solr/solr-ref-guide/src/other-parsers.adoc
+++ b/solr/solr-ref-guide/src/other-parsers.adoc
@@ -977,16 +977,11 @@ For more information about the possibilities of nested queries, see Yonik Seeley
 
 == Payload Query Parsers
 
-These query parsers utilize payloads encoded on terms during indexing.
-
-The main query, for both of these parsers, is parsed straightforwardly from the field type's query analysis into a `SpanQuery`. The generated `SpanQuery` will be either a `SpanTermQuery` or an ordered, zero slop `SpanNearQuery`, depending on how many tokens are emitted. Payloads can be encoded on terms using either the `DelimitedPayloadTokenFilter` or the `NumericPayloadTokenFilter`. The payload using parsers are:
-
-* `PayloadScoreQParser`
-* `PayloadCheckQParser`
+These query parsers utilize payloads encoded on terms during indexing. Payloads can be encoded on terms using either the `DelimitedPayloadTokenFilter` or the `NumericPayloadTokenFilter`.
 
 === Payload Score Parser
 
-`PayloadScoreQParser` incorporates each matching term's numeric (integer or float) payloads into the scores.
+`PayloadScoreQParser` incorporates each matching term's numeric (integer or float) payloads into the scores. The main query is parsed from the field type's query analysis into a `SpanQuery` based on the value of the `operator` parameter below.
 
 This parser accepts the following parameters:
 
@@ -997,7 +992,9 @@ The field to use. This parameter is required.
 The payload function. The options are: `min`, `max`, `average`, or `sum`. This parameter is required.
 
 `operator`::
-A search operator. The options are `or` and `phrase`, which is the default. This defines if the search query should be an OR query or a phrase query.
+A search operator. The options are
+  * `or` will generate either a `SpanTermQuery` or a `SpanOrQuery` depending on the number of tokens emitted.
+  * `phrase` will generate either `SpanTermQuery` or an ordered, zero slop `SpanNearQuery`, depending on how many tokens are emitted.
 
 `includeSpanScore`::
 If `true`, multiples the computed payload factor by the score of the original query. If `false`, the default, the computed payload factor is the score.
@@ -1012,7 +1009,9 @@ If `true`, multiples the computed payload factor by the score of the original qu
 
 === Payload Check Parser
 
-`PayloadCheckQParser` only matches when the matching terms also have the specified payloads.
+`PayloadCheckQParser` only matches when the matching terms also have the specified relationship to the payloads.  The default relationship is equals, however, inequality matching can also be performed. The main query, for both of these parsers, is parsed straightforwardly from the field type's query analysis into a `SpanQuery`. The generated `SpanQuery` will be either a `SpanTermQuery` or an ordered, zero slop `SpanNearQuery`, depending on how many tokens are emitted. The net effect is t [...]
+
+NOTE: If when the field analysis is applied to the query, it alters the number of tokens, the final number of tokens must match the number of payloads supplied in the `payloads` parameter. If there is a mismatch between the number of query tokens, and the number of payload values supplied with this query, the query will not match.
 
 This parser accepts the following parameters:
 
@@ -1020,18 +1019,30 @@ This parser accepts the following parameters:
 The field to use (required).
 
 `payloads`::
-A space-separated list of payloads that must match the query terms (required)
-+
-Each specified payload will be encoded using the encoder determined from the field type and encoded accordingly for matching.
-+
-`DelimitedPayloadTokenFilter` 'identity' encoded payloads also work here, as well as float and integer encoded ones.
+A space-separated list of payloads to be compared with payloads in the matching tokens from the document (required). Each specified payload will be encoded using the encoder determined from the field type prior to matching. Integer, float and identity (string) encodings are supported with the same meanings as for DelimitedPayloadTokenFilter.
+
+`op`::
+The inequality operation to apply to the payload check. All operations require that consecutive tokens derived from the analysis of the query match consecutive tokens in the document, and additionally the payloads on the document tokens must be:
+ * `eq` -  equal to the specified payloads (default)
+ * `gt` -  greater than the specified payloads
+ * `lt` -  less than the specified payloads
+ * `gte` -  greater than or equal to the specified payloads
+ * `lte` -  less than or equal to the specified payloads
 
-*Example*
+*Examples*
 
+Find all documents with the phrase "searching stuff" where searching has a payload of "VERB" and "stuff" has a payload of "NOUN"
 [source,text]
-----
 {!payload_check f=words_dps payloads="VERB NOUN"}searching stuff
-----
+
+Find all documents with "foo" where "foo" has a payload with a value of greater than or equal to 0.75
+[source,text]
+{!payload_check f=words_dpf payloads="0.75" op="gte"}foo
+
+Find all documents with the phrase "foo bar" where term "foo" has a payload greater than 9 and "bar" has a payload greater than 5
+[source,text]
+{!payload_check f=words_dpi payloads="9 5" op="gt"}foo bar
+
 
 == Prefix Query Parser