You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-issues@jackrabbit.apache.org by "Thomas Mueller (Jira)" <ji...@apache.org> on 2021/08/03 13:52:00 UTC
[jira] [Commented] (OAK-9522) Index cost estimation: prefer union
query with path restriction
[ https://issues.apache.org/jira/browse/OAK-9522?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17392314#comment-17392314 ]
Thomas Mueller commented on OAK-9522:
-------------------------------------
Initial patch (I will create a PR in git now):
{noformat}
svn diff
Index: oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java
===================================================================
--- oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java (revision 1891980)
+++ oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexPathRestrictionTest.java (working copy)
@@ -32,6 +32,7 @@
import org.apache.jackrabbit.oak.spi.commit.EditorHook;
import org.apache.jackrabbit.oak.spi.query.Cursor;
import org.apache.jackrabbit.oak.spi.query.Filter;
+import org.apache.jackrabbit.oak.spi.query.Filter.PathRestriction;
import org.apache.jackrabbit.oak.spi.query.QueryIndex;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
@@ -102,7 +103,54 @@
f.restrictProperty("d/*/foo", Operator.EQUAL, PropertyValues.newString("bar"));
validateResult(f, of("/", "/test", "/test/c"));
}
+
+ @Test
+ public void entryCountWithNoPathRestriction() throws Exception {
+ IndexDefinitionBuilder idxBuilder =
+ new IndexDefinitionBuilder(rootBuilder.child(IndexConstants.INDEX_DEFINITIONS_NAME).
+ child("fooIndex"))
+ .noAsync().evaluatePathRestrictions();
+ idxBuilder.indexRule("nt:base").property("foo").propertyIndex();
+ idxBuilder.build();
+ commit();
+ NodeBuilder testRootBuilder = rootBuilder.child("test");
+ for(int i=0; i<100; i++) {
+ testRootBuilder.child("n" + i).setProperty("foo", "bar");
+ }
+ commit();
+
+ FilterImpl f;
+
+ // //*[foo = 'bar']
+ f = createFilter(root, NT_BASE);
+ f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar"));
+ // equality check: we assume 5 different values
+ assertEquals(20, getEstimatedCount(f));
+
+ // /jcr:root/test/*[foo = 'bar']
+ f = createFilter(root, NT_BASE);
+ f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar"));
+ f.restrictPath("/test", PathRestriction.DIRECT_CHILDREN);
+ // direct children + equality check: we assume 5 different values, and 50%
+ assertEquals(10, getEstimatedCount(f));
+
+ // /jcr:root/test//*[foo = 'bar']
+ f = createFilter(root, NT_BASE);
+ f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar"));
+ f.restrictPath("/test", PathRestriction.ALL_CHILDREN);
+ // descendants + equality check: we assume 5 different values, and 90%
+ assertEquals(18, getEstimatedCount(f));
+
+ // /jcr:root/test/x[foo = 'bar']
+ f = createFilter(root, NT_BASE);
+ f.restrictProperty("foo", Operator.EQUAL, PropertyValues.newString("bar"));
+ f.restrictPath("/test/x", PathRestriction.EXACT);
+ // exact path + equality check: we assume 5 different values, and 90%
+ assertEquals(1, getEstimatedCount(f));
+
+ }
+
@Test
public void pathTranformationWithAllChildrenPathRestriction() throws Exception {
IndexDefinitionBuilder idxBuilder =
@@ -263,6 +311,13 @@
assertEquals(f.toString(), expected, paths);
}
+
+ private long getEstimatedCount(Filter f) {
+ List<QueryIndex.IndexPlan> plans = index.getPlans(f, null, root);
+ assertEquals("Only one plan must show up", 1, plans.size());
+ QueryIndex.IndexPlan plan = plans.get(0);
+ return plan.getEstimatedEntryCount();
+ }
private void commit() throws Exception {
root = HOOK.processCommit(rootBuilder.getBaseState(), rootBuilder.getNodeState(), EMPTY);
Index: oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java
===================================================================
--- oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java (revision 1891980)
+++ oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/query/FulltextIndexPlanner.java (working copy)
@@ -47,6 +47,7 @@
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
import org.apache.jackrabbit.oak.spi.filter.PathFilter;
import org.apache.jackrabbit.oak.spi.query.Filter;
+import org.apache.jackrabbit.oak.spi.query.Filter.PathRestriction;
import org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction;
import org.apache.jackrabbit.oak.spi.query.QueryConstants;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextContains;
@@ -858,6 +859,35 @@
minNumDocs = docCntForField;
}
}
+ // Reduce the estimation if the index supports path restrictions,
+ // and we have one that we can use.
+ // We don't need to be exact here, because we don't know the
+ // exact number of nodes in the subtree - this is taken care of
+ // in the query engine.
+ // But we need to adjust the cost a bit, so that
+ // the cost is lower if there is a usable path condition
+ // (see below).
+ if (definition.evaluatePathRestrictions()) {
+ PathRestriction pathRestriction = filter.getPathRestriction();
+ if (pathRestriction == PathRestriction.EXACT || pathRestriction == PathRestriction.PARENT) {
+ // We have an exact path restriction and we have an index that indexes the path:
+ // then the result size is at most 1.
+ minNumDocs = 1;
+ } else if (pathRestriction == PathRestriction.DIRECT_CHILDREN) {
+ // We restrict to direct children: assume at most 50% are there.
+ minNumDocs /= 2;
+ } else if (pathRestriction != PathRestriction.NO_RESTRICTION) {
+ // Other restriction: assume at most 90%.
+ // This is important if we have
+ // "descendantnode(/content/abc) or issamenode(/content/abc)":
+ // We could either convert it to a union, or not use the path restriction.
+ // In this case, we want to convert it to a union!
+ // A path restriction will reduce the number of entries.
+ // So the cost of having a usable path condition is
+ // lower than the cost of not having a path condition at all.
+ minNumDocs = (int) (minNumDocs * 0.9);
+ }
+ }
return minNumDocs;
}
{noformat}
> Index cost estimation: prefer union query with path restriction
> ---------------------------------------------------------------
>
> Key: OAK-9522
> URL: https://issues.apache.org/jira/browse/OAK-9522
> Project: Jackrabbit Oak
> Issue Type: Improvement
> Reporter: Thomas Mueller
> Assignee: Thomas Mueller
> Priority: Major
> Labels: index, lucene, query
>
> If there is a query of this form with an index that supports path restrictions (evaluatePathRestrictions = true):
> {noformat}
> select * from [nt:base] where [indexedProperty] = x
> and (issamenode('/content') or isdescendantnode('/content'))
> {noformat}
> then the path restriction isn't used in the index.
> This is unfortunate, because the index returns a lot more results, and the query engine will have to check if the path matches the condition.
> How this works: the query engine creates two possible queries:
> * plan a: one with just the [indexedProperty] = x condition, and no path restriction
> * plan b: a union query, where one side uses issamenode and the other side uses isdescendantnode.
> The index will return the same cost no matter if a path condition is there or not. And that's why the query engine will pick plan a: because the cost of the union query is higher.
> The index should return a lower cost if there is a path restriction. (Right now the query engine will reduce the cost if there is a path restriction, but that's not sufficient for this case).
--
This message was sent by Atlassian Jira
(v8.3.4#803005)