You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-issues@jackrabbit.apache.org by "Thomas Mueller (Jira)" <ji...@apache.org> on 2021/08/03 13:52:00 UTC

[jira] [Commented] (OAK-9522) Index cost estimation: prefer union query with path restriction

    [ https://issues.apache.org/jira/browse/OAK-9522?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17392314#comment-17392314 ] 

Thomas Mueller commented on OAK-9522:
-------------------------------------

Initial patch (I will create a PR in git now):
{noformat}
 svn diff
Index: oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java
===================================================================
--- oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java	(revision 1891980)
+++ oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java	(working copy)
@@ -32,6 +32,7 @@
 import org.apache.jackrabbit.oak.spi.commit.EditorHook;
 import org.apache.jackrabbit.oak.spi.query.Cursor;
 import org.apache.jackrabbit.oak.spi.query.Filter;
+import org.apache.jackrabbit.oak.spi.query.Filter.PathRestriction;
 import org.apache.jackrabbit.oak.spi.query.QueryIndex;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
@@ -102,7 +103,54 @@
         f.restrictProperty("d/*/foo", Operator.EQUAL, PropertyValues.newString("bar"));
         validateResult(f, of("/", "/test", "/test/c"));
     }
+    
+    @Test
+    public void entryCountWithNoPathRestriction() throws Exception {
+        IndexDefinitionBuilder idxBuilder =
+                new IndexDefinitionBuilder(rootBuilder.child(IndexConstants.INDEX_DEFINITIONS_NAME).
+                        child("fooIndex"))
+                        .noAsync().evaluatePathRestrictions();
+        idxBuilder.indexRule("nt:base").property("foo").propertyIndex();
+        idxBuilder.build();
+        commit();
 
+        NodeBuilder testRootBuilder = rootBuilder.child("test");
+        for(int i=0; i<100; i++) {
+            testRootBuilder.child("n" + i).setProperty("foo", "bar");
+        }
+        commit();
+
+        FilterImpl f;
+
+        // //*[foo = 'bar']
+        f = createFilter(root, NT_BASE);
+        f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar"));
+        // equality check: we assume 5 different values
+        assertEquals(20, getEstimatedCount(f));
+
+        // /jcr:root/test/*[foo = 'bar']
+        f = createFilter(root, NT_BASE);
+        f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar"));
+        f.restrictPath("/test", PathRestriction.DIRECT_CHILDREN);
+        // direct children + equality check: we assume 5 different values, and 50%
+        assertEquals(10, getEstimatedCount(f));
+
+        // /jcr:root/test//*[foo = 'bar']
+        f = createFilter(root, NT_BASE);
+        f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar"));
+        f.restrictPath("/test", PathRestriction.ALL_CHILDREN);
+        // descendants + equality check: we assume 5 different values, and 90%
+        assertEquals(18, getEstimatedCount(f));
+
+        // /jcr:root/test/x[foo = 'bar']
+        f = createFilter(root, NT_BASE);
+        f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar"));
+        f.restrictPath("/test/x", PathRestriction.EXACT);
+        // exact path + equality check: we assume 5 different values, and 90%
+        assertEquals(1, getEstimatedCount(f));
+
+    }
+
     @Test
     public void pathTranformationWithAllChildrenPathRestriction() throws Exception {
         IndexDefinitionBuilder idxBuilder =
@@ -263,6 +311,13 @@
 
         assertEquals(f.toString(), expected, paths);
     }
+    
+    private long getEstimatedCount(Filter f) {
+        List<QueryIndex.IndexPlan> plans = index.getPlans(f, null, root);
+        assertEquals("Only one plan must show up", 1, plans.size());
+        QueryIndex.IndexPlan plan = plans.get(0);
+        return plan.getEstimatedEntryCount();
+    }
 
     private void commit() throws Exception {
         root = HOOK.processCommit(rootBuilder.getBaseState(), rootBuilder.getNodeState(), EMPTY);
Index: oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
===================================================================
--- oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java	(revision 1891980)
+++ oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java	(working copy)
@@ -47,6 +47,7 @@
 import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
 import org.apache.jackrabbit.oak.spi.filter.PathFilter;
 import org.apache.jackrabbit.oak.spi.query.Filter;
+import org.apache.jackrabbit.oak.spi.query.Filter.PathRestriction;
 import org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction;
 import org.apache.jackrabbit.oak.spi.query.QueryConstants;
 import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains;
@@ -858,6 +859,35 @@
                 minNumDocs = docCntForField;
             }
         }
+        // Reduce the estimation if the index supports path restrictions,
+        // and we have one that we can use.
+        // We don't need to be exact here, because we don't know the
+        // exact number of nodes in the subtree - this is taken care of
+        // in the query engine.
+        // But we need to adjust the cost a bit, so that
+        // the cost is lower if there is a usable path condition
+        // (see below).
+        if (definition.evaluatePathRestrictions()) {
+            PathRestriction pathRestriction = filter.getPathRestriction();
+            if (pathRestriction == PathRestriction.EXACT || pathRestriction == PathRestriction.PARENT) {
+                // We have an exact path restriction and we have an index that indexes the path:
+                // then the result size is at most 1.
+                minNumDocs = 1;
+            } else if (pathRestriction == PathRestriction.DIRECT_CHILDREN) {
+                // We restrict to direct children: assume at most 50% are there.
+                minNumDocs /= 2;
+            } else if (pathRestriction != PathRestriction.NO_RESTRICTION) {
+                // Other restriction: assume at most 90%.
+                // This is important if we have
+                // "descendantnode(/content/abc) or issamenode(/content/abc)":
+                // We could either convert it to a union, or not use the path restriction.
+                // In this case, we want to convert it to a union!
+                // A path restriction will reduce the number of entries.
+                // So the cost of having a usable path condition is
+                // lower than the cost of not having a path condition at all.
+                minNumDocs = (int) (minNumDocs * 0.9);
+            }
+        }
         return minNumDocs;
     }
{noformat}

> Index cost estimation: prefer union query with path restriction
> ---------------------------------------------------------------
>
>                 Key: OAK-9522
>                 URL: https://issues.apache.org/jira/browse/OAK-9522
>             Project: Jackrabbit Oak
>          Issue Type: Improvement
>            Reporter: Thomas Mueller
>            Assignee: Thomas Mueller
>            Priority: Major
>              Labels: index, lucene, query
>
> If there is a query of this form with an index that supports path restrictions (evaluatePathRestrictions = true):
> {noformat}
> select * from [nt:base] where [indexedProperty] = x 
> and (issamenode('/content') or isdescendantnode('/content'))
> {noformat}
> then the path restriction isn't used in the index.
> This is unfortunate, because the index returns a lot more results, and the query engine will have to check if the path matches the condition.
> How this works: the query engine creates two possible queries:
> * plan a: one with just the [indexedProperty] = x condition, and no path restriction
> * plan b: a union query, where one side uses issamenode and the other side uses isdescendantnode.
> The index will return the same cost no matter if a path condition is there or not. And that's why the query engine will pick plan a: because the cost of the union query is higher.
> The index should return a lower cost if there is a path restriction. (Right now the query engine will reduce the cost if there is a path restriction, but that's not sufficient for this case).



--
This message was sent by Atlassian Jira
(v8.3.4#803005)