You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2020/08/13 00:59:44 UTC
[lucene-solr] branch jira/SOLR-14383 updated: SOLR-14383: minor
typo/inconsistency fix in sample doc ids
This is an automated email from the ASF dual-hosted git repository.
hossman pushed a commit to branch jira/SOLR-14383
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/jira/SOLR-14383 by this push:
new 3095dd2 SOLR-14383: minor typo/inconsistency fix in sample doc ids
3095dd2 is described below
commit 3095dd23958aa3dd9190ca698e1258f994ad374c
Author: Chris Hostetter <ho...@apache.org>
AuthorDate: Wed Aug 12 17:40:49 2020 -0700
SOLR-14383: minor typo/inconsistency fix in sample doc ids
Also add a unit test of hueristic for 'safe' of/which params based on _nest_path_ before trying to figure out how to document it clearly
(This is basically the manual 'long form' way of writing child/parent queries using the (hypothetical) syntactic sugar described in SOLR-14687)
---
.../solr/update/TestNestedUpdateProcessor.java | 309 +++++++++++++++++++++
.../src/indexing-nested-documents.adoc | 2 +-
solr/solr-ref-guide/src/other-parsers.adoc | 4 +-
3 files changed, 313 insertions(+), 2 deletions(-)
diff --git a/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java b/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java
index 49177a9..83305a8 100644
--- a/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java
+++ b/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java
@@ -17,7 +17,15 @@
package org.apache.solr.update;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
@@ -25,6 +33,7 @@ import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.update.processor.NestedUpdateProcessorFactory;
import org.apache.solr.update.processor.UpdateRequestProcessor;
+import org.apache.solr.common.params.SolrParams;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
@@ -209,4 +218,304 @@ public class TestNestedUpdateProcessor extends SolrTestCaseJ4 {
updateJ(cmd, null);
assertU(commit());
}
+
+ /**
+ * Randomized test to look for flaws in the documented approach for building "safe" values of the
+ * <code>of</code> / <code>which</code> params in the <code>child</code> / <code>parent</code> QParsers
+ * when a specific <code>_nest_path_</code> is desired
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/SOLR-14687">SOLR-14687</a>
+ */
+ public void testRandomNestPathQueryFiltering() throws Exception {
+
+ // First: build a bunch of complex randomly nested documents, with random "nest paths"
+ // re-use the same "path segments" at various levels of nested, so as to confuse things even more
+ final RandomNestedDocModel docs = new RandomNestedDocModel();
+ for (int i = 0; i < 50; i++) {
+ final SolrInputDocument rootDoc = docs.buildRandomDoc();
+ assertU(adoc(rootDoc));
+ }
+ assertU(commit());
+
+ // now do some systematic parent/child queries.
+ // we're checking both for "parser errors" (ie: children matching "parent filter")
+ // as well as that the test_path_s of all matching docs meets our expectations
+
+ // *:* w/ parent parser...
+ // starts at "root" parent_path and recurses until we get no (expected) results
+ assertTrue(// we expected at least one query for every "real" path,
+ // but there will be more because we'll try lots of sub-paths that have no docs
+ docs.numDocsDescendentFromPath.keySet().size()
+ < docs.recursiveCheckParentQueryOfAllChildren(Collections.<String>emptyList()));
+ // sanity check: path that is garunteed not to exist...
+ assertEquals(1, docs.recursiveCheckParentQueryOfAllChildren(Arrays.asList("xxx", "yyy")));
+
+ // *:* w/ child parser...
+ // starts at "root" parent_path and recurses until we get no (expected) results
+ assertTrue(// we expected at least one query for every "real" path,
+ // but there will be more because we'll try lots of sub-paths that have no docs
+ docs.numDocsWithPathWithKids.keySet().size()
+ < docs.recursiveCheckChildQueryOfAllParents(Collections.<String>emptyList()));
+ // sanity check: path that is garunteed not to exist...
+ assertEquals(1, docs.recursiveCheckChildQueryOfAllParents(Arrays.asList("xxx", "yyy")));
+
+ // quering against individual child ids w/ both parent & child parser...
+ docs.checkParentAndChildQueriesOfEachDocument();
+ }
+
+ private static class RandomNestedDocModel {
+ public static final List<String> PATH_ELEMENTS = Arrays.asList("aa", "bb", "cc", "dd");
+
+ private final Map<String,SolrInputDocument> allDocs = new HashMap<>();
+
+ public final Map<String,Integer> numDocsDescendentFromPath = new HashMap<>();
+ public final Map<String,Integer> numDocsWithPathWithKids = new HashMap<>();
+
+ private int idCounter = 0;
+
+ public synchronized SolrInputDocument buildRandomDoc() {
+ return buildRandomDoc(null, Collections.<String>emptyList(), 15);
+ }
+ private static String joinPath(List<String> test_path) {
+ return "/" + String.join("/", test_path);
+ }
+ private synchronized SolrInputDocument buildRandomDoc(SolrInputDocument parent,
+ List<String> test_path,
+ int maxDepthAndBreadth) {
+ final String path_string = joinPath(test_path);
+ final String id = "" + (++idCounter);
+ maxDepthAndBreadth--;
+ final SolrInputDocument doc = sdoc
+ ("id", id,
+ // may change, but we want it 0 even if we never add any
+ "num_direct_kids_s", "0",
+ // conceptually matches _nest_path_ but should be easier to make assertions about (no inline position #s)
+ "test_path_s", path_string);
+ if (null != parent) {
+ // order matters: if we add the Collection first, SolrInputDocument will try to reuse it
+ doc.addField("ancestor_ids_ss", parent.getFieldValue("id"));
+ if (parent.containsKey("ancestor_ids_ss")) { // sigh: getFieldValues returns null, not empty collection
+ doc.addField("ancestor_ids_ss", parent.getFieldValues("ancestor_ids_ss"));
+ }
+ }
+
+ for (int i = 0; i < test_path.size(); i++) {
+ // NOTE: '<' not '<=" .. we only includes paths we are descendents of, not our full path...
+ numDocsDescendentFromPath.merge(joinPath(test_path.subList(0, i)), 1, Math::addExact);
+ }
+
+ if (0 < maxDepthAndBreadth) {
+ final int numDirectKids = TestUtil.nextInt(random(), 0, Math.min(4, maxDepthAndBreadth));
+ doc.setField("num_direct_kids_s", "" + numDirectKids);
+ if (0 < numDirectKids) {
+ numDocsWithPathWithKids.merge(path_string, 1, Math::addExact);
+ }
+ maxDepthAndBreadth -= numDirectKids;
+ for (int i = 0; i < numDirectKids; i++) {
+ final String kidType = PATH_ELEMENTS.get(random().nextInt(PATH_ELEMENTS.size()));
+ final List<String> kid_path = new ArrayList<>(test_path);
+ kid_path.add(kidType);
+ final SolrInputDocument kid = buildRandomDoc(doc, kid_path, maxDepthAndBreadth);
+ doc.addField(kidType, kid);
+ // order matters: if we add the Collection first, SolrInputDocument will try to reuse it
+ doc.addField("descendent_ids_ss", kid.getFieldValue("id"));
+ if (kid.containsKey("descendent_ids_ss")) { // sigh: getFieldValues returns null, not empty collection
+ doc.addField("descendent_ids_ss", kid.getFieldValues("descendent_ids_ss"));
+ }
+ }
+ }
+ allDocs.put(id, doc);
+ return doc;
+ }
+
+
+ /**
+ * Loops over the 'model' of every document we've indexed, asserting that
+ * parent/child queries wrapping an '<code>id:foo</code> using various paths
+ * match the expected ancestors/descendents
+ */
+ public void checkParentAndChildQueriesOfEachDocument() {
+ assertFalse("You didn't build any docs", allDocs.isEmpty());
+
+ for (String doc_id : allDocs.keySet()) {
+ final String doc_path = allDocs.get(doc_id).getFieldValue("test_path_s").toString();
+
+ if ( ! doc_path.equals("/") ) {
+
+ // doc_id -> descdentId must have at least one ancestor (since it's not a root level document)
+ final String descendentId = doc_id;
+ assert allDocs.get(descendentId).containsKey("ancestor_ids_ss");
+ final List<Object> allAncestorIds = new ArrayList<>(allDocs.get(descendentId).getFieldValues("ancestor_ids_ss"));
+
+ // pick a random ancestor to use in our testing...
+ final String ancestorId = allAncestorIds.get(random().nextInt(allAncestorIds.size())).toString();
+ final String ancestor_path = allDocs.get(ancestorId).getFieldValue("test_path_s").toString();
+
+ final Collection<Object> allOfAncestorsDescendentIds
+ = allDocs.get(ancestorId).getFieldValues("descendent_ids_ss");
+
+ assertTrue("Sanity check " + ancestorId + " ancestor of " + descendentId,
+ allOfAncestorsDescendentIds.contains(descendentId));
+
+ // now we should be able to assert that a 'parent' query wrapped around a query for the descendentId
+ // using the ancestor_path should match exactly one doc: our ancestorId...
+ assertQ(req(parentQueryMaker(ancestor_path, "id:" + descendentId),
+ "_trace_path_tested", ancestor_path,
+ "fl", "id",
+ "indent", "true")
+ , "//result/@numFound=1"
+ , "//doc/str[@name='id'][.='"+ancestorId+"']"
+ );
+
+ // meanwhile, a 'child' query wrapped arround a query for the ancestorId, using the ancestor_path,
+ // should match all of it's descendents (for simplicity we'll check just the numFound and the
+ // 'descendentId' we started with)
+ assertQ(req(childQueryMaker(ancestor_path, "id:" + ancestorId),
+ "_trace_path_tested", ancestor_path,
+ "rows", "9999",
+ "fl", "id",
+ "indent", "true")
+ , "//result/@numFound="+allOfAncestorsDescendentIds.size()
+ , "//doc/str[@name='id'][.='"+descendentId+"']"
+ );
+
+ }
+
+ // regardless of wether doc_id has an ancestor or not, a 'parent' query with a path that isn't a
+ // prefix of the path of the (child) doc_id in the wrapped query should match 0 docs w/o failing
+ assertQ(req(parentQueryMaker("/xxx/yyy", "id:" + doc_id),
+ "_trace_path_tested", "/xxx/yyy",
+ "indent", "true")
+ , "//result/@numFound=0");
+
+ // likewise: a 'child' query wrapped around a query for our doc_id (regardless of wether if has
+ // any kids), using a path that doesn't start with the same prefix as doc_id, should match 0
+ // docs w/o failing
+ assertQ(req(childQueryMaker("/xxx/yyy", "id:" + doc_id),
+ "_trace_path_tested", "/xxx/yyy",
+ "indent", "true")
+ , "//result/@numFound=0");
+
+ // lastly: wrapping a child query around a query for our doc_id, using a path that "extends"
+ // the doc_id's path should always get 0 results if that path doesn't match any actual kids
+ // (regardless of wether doc_id has any children/descendents)
+ assertQ(req(childQueryMaker(doc_path + "/xxx/yyy", "id:" + doc_id),
+ "_trace_path_tested", doc_path + "/xxx/yyy",
+ "indent", "true")
+ , "//result/@numFound=0");
+ }
+ }
+
+
+ /**
+ * recursively check path permutations using <code>*:*</code> inner query, asserting that the
+ * only docs matched have the expected path, and at least one kid (since this is the "parents" parser)
+ *
+ * (using <code>*:*</code> as our inner query keeps the validation simple and also helps stress out
+ * risk of matching incorrect docs if the 'which' param is wrong)
+ *
+ * @return total number of queries checked (assuming no assertion failures)
+ */
+ public int recursiveCheckParentQueryOfAllChildren(List<String> parent_path) {
+ final String p = joinPath(parent_path);
+ final int expectedParents = numDocsWithPathWithKids.getOrDefault(p, 0);
+ assertQ(req(parentQueryMaker(p, "*:*"),
+ "rows", "9999",
+ "_trace_path_tested", p,
+ "fl", "test_path_s,num_direct_kids_s",
+ "indent", "true")
+ , "//result/@numFound="+expectedParents
+ , "count(//doc)="+expectedParents
+ , "count(//doc/str[@name='test_path_s'][.='"+p+"'])="+expectedParents
+ , "0=count(//doc/str[@name='num_direct_kids_s'][.='0'])"
+ );
+ int numChecked = 1;
+
+ // no point in recursing on the current path if we already have no results found...
+ if (0 < expectedParents) {
+ for (String next : PATH_ELEMENTS) {
+ final List<String> next_path = new ArrayList<>(parent_path);
+ next_path.add(next);
+ numChecked += recursiveCheckParentQueryOfAllChildren(next_path);
+ }
+ }
+ return numChecked;
+ }
+
+ /**
+ * This implements the "safe query based on parent path" rules we're sanity checking.
+ *
+ * @param parent_path the nest path of the parents to consider
+ * @param inner_child_query the specific children whose ancestors we are looking for, must be simple string <code>*:*</code> or <code>id:foo</code>
+ */
+ private SolrParams parentQueryMaker(String parent_path, String inner_child_query) {
+ assertValidPathSytax(parent_path);
+
+ if (parent_path.equals("/")) {
+ return params("q", "{!parent which=$parent_filt v=$child_q}",
+ "parent_filt", "(*:* -_nest_path_:*)",
+ "child_q", "(+" + inner_child_query + " +_nest_path_:*)");
+ } // else...
+ return params("q", "{!parent which=$parent_filt v=$child_q})",
+ "parent_filt", "(*:* -{!prefix f='_nest_path_' v='"+parent_path+"/'})",
+ "child_q", "(+" + inner_child_query + " +{!prefix f='_nest_path_' v='"+parent_path+"/'})");
+ }
+
+ /**
+ * recursively check path permutations using <code>*:*</code> inner query, asserting that the
+ * only docs matched have paths that include the specified path as a (strict) prefix
+ *
+ * (using <code>*:*</code> as our inner query keeps the validation simple and also helps stress out
+ * risk of matching incorrect docs if the 'of' param is wrong)
+ *
+ * @return total number of queries checked (assuming no assertion failures)
+ */
+ public int recursiveCheckChildQueryOfAllParents(List<String> parent_path) {
+ final String p = joinPath(parent_path);
+ final int expectedMatches = numDocsDescendentFromPath.getOrDefault(p, 0);
+ assertQ(req(childQueryMaker(p, "*:*"),
+ "rows", "9999",
+ "_trace_path_tested", p,
+ "fl", "test_path_s",
+ "indent", "true")
+ , "//result/@numFound="+expectedMatches
+ , "count(//doc)="+expectedMatches
+ , "count(//doc/str[@name='test_path_s'][starts-with(., '"+p+"')])="+expectedMatches
+ );
+ int numChecked = 1;
+
+ // no point in recursing on the current path if we already have no results found...
+ if (0 < expectedMatches) {
+ for (String next : PATH_ELEMENTS) {
+ final List<String> next_path = new ArrayList<>(parent_path);
+ next_path.add(next);
+ numChecked += recursiveCheckChildQueryOfAllParents(next_path);
+ }
+ }
+ return numChecked;
+ }
+
+ /**
+ * This implements the "safe query based on parent path" rules we're sanity checking.
+ *
+ * @param parent_path the nest path of the parents to consider
+ * @param inner_parent_query the specific parents whose descendents we are looking for, must be simple string <code>*:*</code> or <code>id:foo</code>
+ */
+ private SolrParams childQueryMaker(String parent_path, String inner_parent_query) {
+ assertValidPathSytax(parent_path);
+ if (parent_path.equals("/")) {
+ return params("q", "{!child of=$parent_filt v=$parent_q})",
+ "parent_filt", "(*:* -_nest_path_:*)",
+ "parent_q", "(+" + inner_parent_query + " -_nest_path_:*)");
+ } // else...
+ return params("q", "{!child of=$parent_filt v=$parent_q})",
+ "parent_filt", "(*:* -{!prefix f='_nest_path_' v='"+parent_path+"/'})",
+ "parent_q", "(+" + inner_parent_query + " +{!field f='_nest_path_' v='"+parent_path+"'})");
+ }
+
+ private void assertValidPathSytax(String path) {
+ assert path.startsWith("/");
+ assert (1 == path.length()) ^ ! path.endsWith("/");
+ }
+ }
}
diff --git a/solr/solr-ref-guide/src/indexing-nested-documents.adoc b/solr/solr-ref-guide/src/indexing-nested-documents.adoc
index 85c6943..4c8c9ce 100644
--- a/solr/solr-ref-guide/src/indexing-nested-documents.adoc
+++ b/solr/solr-ref-guide/src/indexing-nested-documents.adoc
@@ -85,7 +85,7 @@ Even though the child documents in these examples are provided syntactically as
"skus": [ { "id": "P22!S22",
"color_s": "RED",
"price_i": 89,
- "manuals": [ { "id": "P21!D41",
+ "manuals": [ { "id": "P22!D42",
"name_s": "Red Mont Blanc Brochure",
"content_t": "..."
} ]
diff --git a/solr/solr-ref-guide/src/other-parsers.adoc b/solr/solr-ref-guide/src/other-parsers.adoc
index d0bc731..788cdb3 100644
--- a/solr/solr-ref-guide/src/other-parsers.adoc
+++ b/solr/solr-ref-guide/src/other-parsers.adoc
@@ -32,7 +32,9 @@ Many of these parsers are expressed the same way as <<local-parameters-in-querie
// nocommit: so people can understand them (and so other sections of the ref-guide can link here for
// nocommit: actual explanaiton)
-
+// nocommit: find a way to write up a susincet & clear description of the process/hueristic demonstrated in
+// nocommit: TestNestedUpdateProcessor.testRandomNestPathQueryFiltering
+// nocommit: AKA: the "manual" form of the syntactic sugar proposed in SOLR-14687
There are two query parsers that support block joins. These parsers allow indexing and searching for relational content that has been <<indexing-nested-documents.adoc#indexing-nested-documents, indexed as Nested Documents>>.