You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2020/09/01 00:03:10 UTC
[lucene-solr] branch jira/SOLR-14383 updated: flesh out search
examples showing non-trivial of/which usage w/CAUTIONs about need for
double escaping slashes in nest paths
This is an automated email from the ASF dual-hosted git repository.
hossman pushed a commit to branch jira/SOLR-14383
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/jira/SOLR-14383 by this push:
new 1ba402b flesh out search examples showing non-trivial of/which usage w/CAUTIONs about need for double escaping slashes in nest paths
1ba402b is described below
commit 1ba402b3081ffdabf69067baa3b67efd0c812002
Author: Chris Hostetter <ho...@apache.org>
AuthorDate: Mon Aug 31 17:02:52 2020 -0700
flesh out search examples showing non-trivial of/which usage w/CAUTIONs about need for double escaping slashes in nest paths
---
.../src/indexing-nested-documents.adoc | 14 +++
.../src/searching-nested-documents.adoc | 120 ++++++++++++++++-----
2 files changed, 106 insertions(+), 28 deletions(-)
diff --git a/solr/solr-ref-guide/src/indexing-nested-documents.adoc b/solr/solr-ref-guide/src/indexing-nested-documents.adoc
index 4c8c9ce..4a22518 100644
--- a/solr/solr-ref-guide/src/indexing-nested-documents.adoc
+++ b/solr/solr-ref-guide/src/indexing-nested-documents.adoc
@@ -63,6 +63,7 @@ Even though the child documents in these examples are provided syntactically as
"price_i": 42,
"manuals": [ { "id": "P11!D41",
"name_s": "Red Swingline Brochure",
+ "pages_i":1,
"content_t": "..."
} ]
},
@@ -72,10 +73,12 @@ Even though the child documents in these examples are provided syntactically as
} ],
"manuals": [ { "id": "P11!D51",
"name_s": "Quick Reference Guide",
+ "pages_i":1,
"content_t": "How to use your stapler ..."
},
{ "id": "P11!D61",
"name_s": "Warranty Details",
+ "pages_i":42,
"content_t": "... lifetime guarantee ..."
} ]
},
@@ -87,6 +90,7 @@ Even though the child documents in these examples are provided syntactically as
"price_i": 89,
"manuals": [ { "id": "P22!D42",
"name_s": "Red Mont Blanc Brochure",
+ "pages_i":1,
"content_t": "..."
} ]
},
@@ -96,6 +100,7 @@ Even though the child documents in these examples are provided syntactically as
} ],
"manuals": [ { "id": "P22!D52",
"name_s": "How To Use A Pen",
+ "pages_i":42,
"content_t": "Start by removing the cap ..."
} ]
} ]
@@ -211,6 +216,7 @@ Although not recommended, it is also possible to index child documents "anonymou
{ "id": "P11!D41",
"type_s": "DOC",
"name_s": "Red Swingline Brochure",
+ "pages_i":1,
"content_t": "..."
} ]
},
@@ -222,11 +228,13 @@ Although not recommended, it is also possible to index child documents "anonymou
{ "id": "P11!D51",
"type_s": "DOC",
"name_s": "Quick Reference Guide",
+ "pages_i":1,
"content_t": "How to use your stapler ..."
},
{ "id": "P11!D61",
"type_s": "DOC",
"name_s": "Warranty Details",
+ "pages_i":42,
"content_t": "... lifetime guarantee ..."
}
]
@@ -284,6 +292,7 @@ $ curl --globoff 'http://localhost:8983/solr/gettingstarted/select?omitHeader=tr
"id":"P11!D41",
"type_s":"DOC",
"name_s":"Red Swingline Brochure",
+ "pages_i":1,
"content_t":"...",
"_version_":1673055562829398016},
{
@@ -302,12 +311,14 @@ $ curl --globoff 'http://localhost:8983/solr/gettingstarted/select?omitHeader=tr
"id":"P11!D51",
"type_s":"DOC",
"name_s":"Quick Reference Guide",
+ "pages_i":1,
"content_t":"How to use your stapler ...",
"_version_":1673055562829398016},
{
"id":"P11!D61",
"type_s":"DOC",
"name_s":"Warranty Details",
+ "pages_i":42,
"content_t":"... lifetime guarantee ...",
"_version_":1673055562829398016}]}]
}}
@@ -334,6 +345,7 @@ $ curl --globoff 'http://localhost:8983/solr/gettingstarted/select?omitHeader=tr
<str name="id">P11!D41</str>
<str name="type_s">DOC</str>
<str name="name_s">Red Swingline Brochure</str>
+ <int name="pages_i">1</int>
<str name="content_t">...</str>
<long name="_version_">1673055562829398016</long></doc>
<doc>
@@ -352,12 +364,14 @@ $ curl --globoff 'http://localhost:8983/solr/gettingstarted/select?omitHeader=tr
<str name="id">P11!D51</str>
<str name="type_s">DOC</str>
<str name="name_s">Quick Reference Guide</str>
+ <int name="pages_i">1</int>
<str name="content_t">How to use your stapler ...</str>
<long name="_version_">1673055562829398016</long></doc>
<doc>
<str name="id">P11!D61</str>
<str name="type_s">DOC</str>
<str name="name_s">Warranty Details</str>
+ <int name="pages_i">42</int>
<str name="content_t">... lifetime guarantee ...</str>
<long name="_version_">1673055562829398016</long></doc></doc>
</result>
diff --git a/solr/solr-ref-guide/src/searching-nested-documents.adoc b/solr/solr-ref-guide/src/searching-nested-documents.adoc
index 09e16f8..aea51cc 100644
--- a/solr/solr-ref-guide/src/searching-nested-documents.adoc
+++ b/solr/solr-ref-guide/src/searching-nested-documents.adoc
@@ -76,6 +76,7 @@ $ curl 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&q=descr
{
"id":"P11!D41",
"name_s":"Red Swingline Brochure",
+ "pages_i":1,
"content_t":"...",
"_version_":1672933224035123200}]},
@@ -88,12 +89,14 @@ $ curl 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&q=descr
{
"id":"P11!D51",
"name_s":"Quick Reference Guide",
+ "pages_i":1,
"content_t":"How to use your stapler ...",
"_version_":1672933224035123200},
{
"id":"P11!D61",
"name_s":"Warranty Details",
+ "pages_i":42,
"content_t":"... lifetime guarantee ...",
"_version_":1672933224035123200}]}]
}}
@@ -108,12 +111,13 @@ Let's consider again the `description_t:staplers` query used above -- if we wrap
[source,bash]
----
-$ curl 'http://localhost:8983/solr/gettingstarted/select' -d omitHeader=true -d 'q={!child+of="*:* -_nest_path_:*"}description_t:staplers'
+$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' -d 'q={!child of="*:* -_nest_path_:*"}description_t:staplers'
{
"response":{"numFound":5,"start":0,"maxScore":0.30136836,"numFoundExact":true,"docs":[
{
"id":"P11!D41",
"name_s":"Red Swingline Brochure",
+ "pages_i":1,
"content_t":"...",
"_version_":1672933224035123200},
{
@@ -129,11 +133,13 @@ $ curl 'http://localhost:8983/solr/gettingstarted/select' -d omitHeader=true -d
{
"id":"P11!D51",
"name_s":"Quick Reference Guide",
+ "pages_i":1,
"content_t":"How to use your stapler ...",
"_version_":1672933224035123200},
{
"id":"P11!D61",
"name_s":"Warranty Details",
+ "pages_i":42,
"content_t":"... lifetime guarantee ...",
"_version_":1672933224035123200}]
}}
@@ -141,64 +147,121 @@ $ curl 'http://localhost:8983/solr/gettingstarted/select' -d omitHeader=true -d
In this example we've used `\*:* -\_nest_path_:*` as our <<other-parsers#block-mask,`of` parameter>> to indicate we want to consider all documents which don't have a nest path -- ie: all "root" level document -- as the set of possible parents.
-nocommit: example with more interesting `of` param - ie: only manuals that are attached to SKUs
+By changing the `of` param to match ancestors at specific `\_nest_path_` levels, we can narrow down the list of children we return. In the query below, we search for all descendents of `skus` (using an `of` param that identifies all documents that do _not_ have a `\_nest_path_` with the prefix `/skus/*`) with a `price_i` less then `50`:
+
+[source,bash]
+----
+$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!child of="*:* -_nest_path_:\\/skus\\/*"}(+price_i:[* TO 50] +_nest_path_:\/skus)'
+{
+ "response":{"numFound":1,"start":0,"maxScore":1.0,"numFoundExact":true,"docs":[
+ {
+ "id":"P11!D41",
+ "name_s":"Red Swingline Brochure",
+ "pages_i":1,
+ "content_t":"...",
+ "_version_":1675662666752851968}]
+ }}
+----
+
+[#double-escaping-nest-path-slashes]
+[CAUTION]
+.Double Escaping `\_nest_path_` slashes in `of`
+====
+Note that in the above example, the `/` characters in the `\_nest_path_` were "double escaped" in the `of` parameter:
+
+* One level of `\` escaping is neccessary to prevent the `/` from being interpreted as a {lucene-javadocs}/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Regexp_Searches[Regex Query]
+* An additional level of "escaping the escape character" is neccessary because the `of` local parameter is a quoted string; so we need a second `\` to ensure the first `\` is preserved and passed as is to the query parser.
+
+(You can see that only a single level of of `\` escaping is needed in the body of the query string -- to prevent the Regex syntax -- because it's not a quoted string local param)
+
+You may find it more convinient to use <<local-parameters-in-queries#parameter-dereferencing,parameter references>> in conjunction with <<other-parsers#other-parsers,other parsers>> that do not treat `/` as a special character to express the same query in a more verbose form:
+
+[source,bash]
+----
+curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!child of=$block_mask}(+price_i:[* TO 50] +{!field f="_nest_path_" v="/skus"})' --data-urlencode 'block_mask=(*:* -{!prefix f="_nest_path_" v="/skus/"})'
+----
+
+====
-nocommit: show both the "inline" nest path (explain escaping) and the param deref using "prefix" parser
=== Parent Query Parser
The inverse of the `{!child}` query parser is the `{!parent}` query parser, which let's you search for the _ancestor_ documents of some child documents matching a wrapped query. For a detailed explanation of this parser, see the section <<other-parsers.adoc#block-join-parent-query-parser,Block Join Parent Query Parser>>.
-nocommit: change this example to a query that matches "manuals"...
-
-Let's first consider this example of searching for all "sku" type documents that have a color of "RED"...
+Let's first consider this example of searching for all "manual" type documents that have exactly `1` page:
[source,bash]
----
-$ curl 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&q=color_s:RED'
+$ curl 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&q=pages_i:1'
{
- "response":{"numFound":2,"start":0,"maxScore":0.2449984,"numFoundExact":true,"docs":[
+ "response":{"numFound":3,"start":0,"maxScore":1.0,"numFoundExact":true,"docs":[
{
- "id":"P11!S21",
- "color_s":"RED",
- "price_i":42,
- "_version_":1672933224035123200},
+ "id":"P11!D41",
+ "name_s":"Red Swingline Brochure",
+ "pages_i":1,
+ "content_t":"...",
+ "_version_":1676585794196733952},
{
- "id":"P22!S22",
- "color_s":"RED",
- "price_i":89,
- "_version_":1672933224436727808}]
+ "id":"P11!D51",
+ "name_s":"Quick Reference Guide",
+ "pages_i":1,
+ "content_t":"How to use your stapler ...",
+ "_version_":1676585794196733952},
+ {
+ "id":"P22!D42",
+ "name_s":"Red Mont Blanc Brochure",
+ "pages_i":1,
+ "content_t":"...",
+ "_version_":1676585794347728896}]
}}
----
-nocommit: change this query to match all "ancestors" of the above query (products & skus)
-
-We can wrap that query in a `{!parent}` query to return the details of all products that have "RED" skus...
-
-nocommit: switch curl command to use `-d` for readability...
+We can wrap that query in a `{!parent}` query to return the details of all products that are ancestors of these manuals:
[source,bash]
----
-$ curl --globoff 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&q={!parent+which="*:*+-_nest_path_:*"}color_s:RED'
+$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!parent which="*:* -_nest_path_:*"}(+_nest_path_:\/skus\/manuals +pages_i:1)'
{
"response":{"numFound":2,"start":0,"maxScore":1.4E-45,"numFoundExact":true,"docs":[
{
"id":"P11!prod",
"name_s":"Swingline Stapler",
"description_t":"The Cadillac of office staplers ...",
- "_version_":1672933224035123200},
+ "_version_":1676585794196733952},
{
"id":"P22!prod",
"name_s":"Mont Blanc Fountain Pen",
"description_t":"A Premium Writing Instrument ...",
- "_version_":1672933224436727808}]
+ "_version_":1676585794347728896}]
}}
----
In this example we've used `\*:* -\_nest_path_:*` as our <<other-parsers#block-mask,`which` parameter>> to indicate we want to consider all documents which don't have a nest path -- ie: all "root" level document -- as the set of possible parents.
-nocommit: now give a more interesting example, using which to only match the "sku" parents
+By changing the `which` param to match ancestors at specific `\_nest_path_` levels, we can change the type of ancestors we return. In the query below, we search for `skus` (using an `which` param that identifies all documents that do _not_ have a `\_nest_path_` with the prefix `/skus/*`) that are the ancestors of `manuals` with exactly `1` page:
+[source,bash]
+----
+$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!parent which="*:* -_nest_path_:\\/skus\\/*"}(+_nest_path_:\/skus\/manuals +pages_i:1)'
+{
+ "response":{"numFound":2,"start":0,"maxScore":1.4E-45,"numFoundExact":true,"docs":[
+ {
+ "id":"P11!S21",
+ "color_s":"RED",
+ "price_i":42,
+ "_version_":1676585794196733952},
+ {
+ "id":"P22!S22",
+ "color_s":"RED",
+ "price_i":89,
+ "_version_":1676585794347728896}]
+ }}
+----
+
+[CAUTION]
+====
+Note that in the above example, the `/` characters in the `\_nest_path_` were "double escaped" in the `which` parameter, for the <<#double-escaping-nest-path-slashes,same reasons discussed above>> regarding the `{!child} pasers `of` parameter.
+====
=== Combining Block Join Query Parsers with Child Doc Transformer
@@ -215,19 +278,20 @@ Here for example is a query where:
[source,bash]
----
-$ curl --globoff 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&fq=color_s:RED&q={!child+of="*:*+-_nest_path_:*"+filters=$parent_fq}&parent_fq={!parent+which="*:*+-_nest_path_:*"}_nest_path_:"/manuals"+AND+content_t:"lifetime+guarantee"&fl=*,[child]'
+$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' -d 'fq=color_s:RED' --data-urlencode 'q={!child of="*:* -_nest_path_:*" filters=$parent_fq}' --data-urlencode 'parent_fq={!parent which="*:* -_nest_path_:*"}(+_nest_path_:"/manuals" +content_t:"lifetime guarantee")' -d 'fl=*,[child]'
{
"response":{"numFound":1,"start":0,"maxScore":1.4E-45,"numFoundExact":true,"docs":[
{
"id":"P11!S21",
"color_s":"RED",
"price_i":42,
- "_version_":1672933224035123200,
+ "_version_":1676585794196733952,
"manuals":[
{
"id":"P11!D41",
"name_s":"Red Swingline Brochure",
+ "pages_i":1,
"content_t":"...",
- "_version_":1672933224035123200}]}]
+ "_version_":1676585794196733952}]}]
}}
----