You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2014/12/08 14:12:14 UTC
svn commit: r1643807 - in /jackrabbit/oak/trunk/oak-core/src:
main/java/org/apache/jackrabbit/oak/plugins/index/counter/jmx/
main/java/org/apache/jackrabbit/oak/plugins/index/nodetype/
main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy...
Author: thomasm
Date: Mon Dec 8 13:12:13 2014
New Revision: 1643807
URL: http://svn.apache.org/r1643807
Log:
OAK-1907 Better cost estimates for traversal, property, and ordered indexes
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/counter/jmx/NodeCounter.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/nodetype/NodeTypeIndex.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/ContentMirrorStoreStrategy.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/UniqueEntryStoreStrategy.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/TraversingIndex.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/property/PropertyIndexTest.java
jackrabbit/oak/trunk/oak-core/src/test/resources/org/apache/jackrabbit/oak/query/sql2_index.txt
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/counter/jmx/NodeCounter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/counter/jmx/NodeCounter.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/counter/jmx/NodeCounter.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/counter/jmx/NodeCounter.java Mon Dec 8 13:12:13 2014
@@ -28,6 +28,7 @@ import org.apache.jackrabbit.oak.plugins
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.apache.jackrabbit.oak.util.ApproximateCounter;
/**
* A mechanism to retrieve node counter data.
@@ -58,21 +59,41 @@ public class NodeCounter implements Node
@Override
public long getEstimatedNodeCount(String path) {
+ return getEstimatedNodeCount(store.getRoot(), path, false);
+ }
+
+ /**
+ * Get the estimated number of nodes for a given path.
+ *
+ * @param root the root
+ * @param path the path
+ * @param max whether to get the maximum expected number of nodes (the
+ * stored value plus the resolution)
+ * @return -1 if unknown, 0 if the node does not exist (or, if max is false,
+ * if there are probably not many descendant nodes), or the
+ * (maximum) estimated number of descendant nodes
+ */
+ public static long getEstimatedNodeCount(NodeState root, String path, boolean max) {
// check if there is a property in the node itself
// (for property index nodes)
- NodeState s = child(store.getRoot(),
+ NodeState s = child(root,
PathUtils.elements(path));
- if (s == null) {
+ if (s == null || !s.exists()) {
// node not found
- return -1;
+ return 0;
}
PropertyState p = s.getProperty(NodeCounterEditor.COUNT_PROPERTY_NAME);
if (p != null) {
- return p.getValue(Type.LONG);
+ long x = p.getValue(Type.LONG);
+ if (max) {
+ // in the node itself, we just add the resolution
+ x += ApproximateCounter.COUNT_RESOLUTION;
+ }
+ return x;
}
// check in the counter index (if it exists)
- s = child(store.getRoot(),
+ s = child(root,
IndexConstants.INDEX_DEFINITIONS_NAME,
"counter",
NodeCounterEditor.DATA_NODE_NAME);
@@ -81,16 +102,31 @@ public class NodeCounter implements Node
return -1;
}
s = child(s, PathUtils.elements(path));
- if (s == null) {
+ if (s == null || !s.exists()) {
// we have an index, but no data
- return 0;
+ long x = 0;
+ if (max) {
+ // in the index, the resolution is lower
+ x += ApproximateCounter.COUNT_RESOLUTION * 10;
+ }
+
+ return x;
}
p = s.getProperty(NodeCounterEditor.COUNT_PROPERTY_NAME);
if (p == null) {
// we have an index, but no data
- return 0;
+ long x = 0;
+ if (max) {
+ // in the index, the resolution is lower
+ x += ApproximateCounter.COUNT_RESOLUTION * 10;
+ }
+ return x;
}
- return p.getValue(Type.LONG);
+ long x = p.getValue(Type.LONG);
+ if (max) {
+ x += ApproximateCounter.COUNT_RESOLUTION;
+ }
+ return x;
}
@Override
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/nodetype/NodeTypeIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/nodetype/NodeTypeIndex.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/nodetype/NodeTypeIndex.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/nodetype/NodeTypeIndex.java Mon Dec 8 13:12:13 2014
@@ -71,7 +71,7 @@ class NodeTypeIndex implements QueryInde
@Override
public String getPlan(Filter filter, NodeState root) {
- return filter.toString();
+ return "nodeType " + filter.toString();
}
@Override
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/ContentMirrorStoreStrategy.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/ContentMirrorStoreStrategy.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/ContentMirrorStoreStrategy.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/ContentMirrorStoreStrategy.java Mon Dec 8 13:12:13 2014
@@ -181,11 +181,20 @@ public class ContentMirrorStoreStrategy
public long count(Filter filter, NodeState indexMeta, final String indexStorageNodeName,
Set<String> values, int max) {
NodeState index = indexMeta.getChildNode(indexStorageNodeName);
- int count = 0;
+ long count = 0;
if (values == null) {
PropertyState ec = indexMeta.getProperty(ENTRY_COUNT_PROPERTY_NAME);
if (ec != null) {
- return ec.getValue(Type.LONG);
+ count = ec.getValue(Type.LONG);
+ if (count >= 0) {
+ return count;
+ }
+ }
+ if (count == 0) {
+ PropertyState ap = index.getProperty(ApproximateCounter.COUNT_PROPERTY_NAME);
+ if (ap != null) {
+ return ap.getValue(Type.LONG);
+ }
}
CountingNodeVisitor v = new CountingNodeVisitor(max);
v.visit(index);
@@ -201,21 +210,39 @@ public class ContentMirrorStoreStrategy
}
PropertyState ec = indexMeta.getProperty(ENTRY_COUNT_PROPERTY_NAME);
if (ec != null) {
- long entryCount = ec.getValue(Type.LONG);
- // assume 10000 entries per key, so that this index is used
- // instead of traversal, but not instead of a regular property index
- long keyCount = entryCount / 10000;
- ec = indexMeta.getProperty(KEY_COUNT_PROPERTY_NAME);
- if (ec != null) {
- keyCount = ec.getValue(Type.LONG);
- }
- // cast to double to avoid overflow
- // (entryCount could be Long.MAX_VALUE)
- // the cost is not multiplied by the size,
- // otherwise the traversing index might be used
- keyCount = Math.max(1, keyCount);
- return (long) ((double) entryCount / keyCount) + size;
+ count = ec.getValue(Type.LONG);
+ if (count >= 0) {
+ // assume 10000 entries per key, so that this index is used
+ // instead of traversal, but not instead of a regular property index
+ long keyCount = count / 10000;
+ ec = indexMeta.getProperty(KEY_COUNT_PROPERTY_NAME);
+ if (ec != null) {
+ keyCount = ec.getValue(Type.LONG);
+ }
+ // cast to double to avoid overflow
+ // (entryCount could be Long.MAX_VALUE)
+ // the cost is not multiplied by the size,
+ // otherwise the traversing index might be used
+ keyCount = Math.max(1, keyCount);
+ return (long) ((double) count / keyCount) + size;
+ }
}
+ long approxMax = 0;
+ if (count == 0) {
+ PropertyState ap = index.getProperty(ApproximateCounter.COUNT_PROPERTY_NAME);
+ if (ap != null) {
+ for (String p : values) {
+ NodeState s = index.getChildNode(p);
+ if (s.exists()) {
+ ap = s.getProperty(ApproximateCounter.COUNT_PROPERTY_NAME);
+ if (ap != null) {
+ approxMax += ap.getValue(Type.LONG);
+ }
+ }
+ }
+ }
+ }
+ count = 0;
max = Math.max(10, max / size);
int i = 0;
String filterRootPath = null;
@@ -223,6 +250,11 @@ public class ContentMirrorStoreStrategy
filter.getPathRestriction().equals(Filter.PathRestriction.ALL_CHILDREN)) {
filterRootPath = filter.getPath();
}
+ if (filterRootPath == null && approxMax > 0) {
+ // we do have an approximation, and
+ // there is no path filter
+ return approxMax;
+ }
for (String p : values) {
if (count > max && i > 3) {
// the total count is extrapolated from the the number
@@ -248,6 +280,12 @@ public class ContentMirrorStoreStrategy
}
i++;
}
+ if (approxMax > 0 && approxMax > count) {
+ // we do have an approximation, and
+ // it is higher than what we counted
+ // (we don't count that far)
+ count = approxMax;
+ }
}
return count;
}
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/UniqueEntryStoreStrategy.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/UniqueEntryStoreStrategy.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/UniqueEntryStoreStrategy.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/UniqueEntryStoreStrategy.java Mon Dec 8 13:12:13 2014
@@ -151,7 +151,16 @@ public class UniqueEntryStoreStrategy im
if (values == null) {
PropertyState ec = indexMeta.getProperty(ENTRY_COUNT_PROPERTY_NAME);
if (ec != null) {
- return ec.getValue(Type.LONG);
+ count = ec.getValue(Type.LONG);
+ if (count >= 0) {
+ return count;
+ }
+ }
+ if (count == 0) {
+ PropertyState ap = index.getProperty(ApproximateCounter.COUNT_PROPERTY_NAME);
+ if (ap != null) {
+ return ap.getValue(Type.LONG);
+ }
}
count = 1 + index.getChildNodeCount(max);
// "is not null" queries typically read more data
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java Mon Dec 8 13:12:13 2014
@@ -258,7 +258,7 @@ public class FilterImpl implements Filte
case EXACT:
return path.matches(this.path);
case PARENT:
- return PathUtils.isAncestor(path, this.path);
+ return PathUtils.getParentPath(this.path).equals(path);
case DIRECT_CHILDREN:
return PathUtils.getParentPath(path).equals(this.path);
case ALL_CHILDREN:
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/TraversingIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/TraversingIndex.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/TraversingIndex.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/TraversingIndex.java Mon Dec 8 13:12:13 2014
@@ -19,6 +19,8 @@
package org.apache.jackrabbit.oak.query.index;
import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.index.counter.jmx.NodeCounter;
+import org.apache.jackrabbit.oak.query.ast.JoinConditionImpl;
import org.apache.jackrabbit.oak.spi.query.Cursor;
import org.apache.jackrabbit.oak.spi.query.Cursors;
import org.apache.jackrabbit.oak.spi.query.Filter;
@@ -49,21 +51,47 @@ public class TraversingIndex implements
if (filter.isAlwaysFalse()) {
return 0;
}
-
- // worst case 100 million nodes
- double nodeCount = 100000000;
- // worst case 100 thousand children
- double nodeCountChildren = 100000;
-
// if the path is from a join, then the depth is not correct
// (the path might be the root node), but that's OK
String path = filter.getPath();
PathRestriction restriction = filter.getPathRestriction();
+ // the simple cases
switch (restriction) {
+ case EXACT:
+ return 1;
+ case PARENT:
+ if (PathUtils.denotesRoot(path)) {
+ return 0;
+ }
+ return 1;
case NO_RESTRICTION:
+ case ALL_CHILDREN:
+ case DIRECT_CHILDREN:
break;
- case EXACT:
- nodeCount = 1;
+ default:
+ throw new IllegalArgumentException("Unknown restriction: " + restriction);
+ }
+
+ if (!path.startsWith(JoinConditionImpl.SPECIAL_PATH_PREFIX)) {
+ String testPath = path;
+ if (restriction == PathRestriction.NO_RESTRICTION) {
+ testPath = "/";
+ }
+ long count = NodeCounter.getEstimatedNodeCount(rootState, testPath, true);
+ if (count >= 0) {
+ if (restriction == PathRestriction.DIRECT_CHILDREN) {
+ count = count / 2;
+ }
+ return count;
+ }
+ }
+
+ // worst case 100 million descendant nodes
+ double nodeCount = 100000000;
+ // worst case 100 thousand children
+ double nodeCountChildren = 100000;
+ switch (restriction) {
+ case NO_RESTRICTION:
break;
case ALL_CHILDREN:
if (!PathUtils.denotesRoot(path)) {
@@ -78,13 +106,6 @@ public class TraversingIndex implements
}
}
break;
- case PARENT:
- if (PathUtils.denotesRoot(path)) {
- nodeCount = 1;
- } else {
- nodeCount = PathUtils.getDepth(path);
- }
- break;
case DIRECT_CHILDREN:
// estimate 100'000 children for now,
// to ensure an index is used if there is one
Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/property/PropertyIndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/property/PropertyIndexTest.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/property/PropertyIndexTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/property/PropertyIndexTest.java Mon Dec 8 13:12:13 2014
@@ -68,8 +68,10 @@ public class PropertyIndexTest {
// Add index definition
NodeBuilder builder = root.builder();
- createIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
+ NodeBuilder index = createIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
true, false, ImmutableSet.of("foo"), null);
+ // disable the estimation
+ index.setProperty("entryCount", -1);
NodeState before = builder.getNodeState();
// Add some content and process it through the property index hook
@@ -117,8 +119,10 @@ public class PropertyIndexTest {
// Add index definition
NodeBuilder builder = root.builder();
- createIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
+ NodeBuilder index = createIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
true, false, ImmutableSet.of("foo"), null);
+ // disable the estimation
+ index.setProperty("entryCount", -1);
NodeState before = builder.getNodeState();
NodeBuilder path1 = builder.child("path1");
@@ -191,6 +195,10 @@ public class PropertyIndexTest {
NodeBuilder c = data.child("c_" + i);
c.setProperty("foo", "azerty");
}
+ // add more nodes (to make traversal more expensive)
+ for (int i = 0; i < 10000; i++) {
+ data.child("cx_" + i);
+ }
NodeState after = builder.getNodeState();
NodeState indexed = HOOK.processCommit(before, after, CommitInfo.EMPTY);
@@ -202,7 +210,7 @@ public class PropertyIndexTest {
double traversal = new TraversingIndex().getCost(f, indexed);
assertTrue("Estimated cost for " + nodes
- + " nodes should not be higher than traversal (" + cost + ")",
+ + " nodes should not be higher than traversal (" + cost + " < " + traversal + ")",
cost < traversal);
}
@@ -212,8 +220,9 @@ public class PropertyIndexTest {
// Add index definition
NodeBuilder builder = root.builder();
- createIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
+ NodeBuilder index = createIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
true, false, ImmutableSet.of("foo"), null);
+ index.setProperty("entryCount", -1);
NodeState before = builder.getNodeState();
// Add some content and process it through the property index hook
Modified: jackrabbit/oak/trunk/oak-core/src/test/resources/org/apache/jackrabbit/oak/query/sql2_index.txt
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/resources/org/apache/jackrabbit/oak/query/sql2_index.txt?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/resources/org/apache/jackrabbit/oak/query/sql2_index.txt (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/resources/org/apache/jackrabbit/oak/query/sql2_index.txt Mon Dec 8 13:12:13 2014
@@ -37,7 +37,7 @@ explain select e.[jcr:path]
inner join [nt:base] as d on ischildnode(d, c)
inner join [nt:base] as e on ischildnode(e, d)
where name(a) = 'a'
- and isdescendantnode(a, '/b')
+ and isdescendantnode(a, '/')
and name(b) = 'c'
and name(c) = 'd'
and name(d) = 'e'
@@ -53,9 +53,9 @@ explain select e.[jcr:path]
inner join [nt:base] as [b] /* traverse "* && //parent/of/join"
where name([b]) = 'c' */
on ischildnode([c], [b])
- inner join [nt:base] as [a] /* traverse "/b//* && //parent/of/join"
+ inner join [nt:base] as [a] /* traverse "//* && //parent/of/join"
where (name([a]) = 'a')
- and (isdescendantnode([a], [/b])) */
+ and (isdescendantnode([a], [/])) */
on ischildnode([b], [a])
explain select e.[jcr:path]
@@ -65,7 +65,7 @@ explain select e.[jcr:path]
inner join [nt:base] as d on ischildnode(d, c)
inner join [nt:base] as e on ischildnode(e, d)
where name(a) = 'a'
- and isdescendantnode(a, '/b')
+ and isdescendantnode(a, '/')
and name(b) = 'c'
and name(c) = 'd'
and name(d) = 'e'
@@ -81,9 +81,9 @@ explain select e.[jcr:path]
inner join [nt:base] as [b] /* traverse "* && //parent/of/join"
where name([b]) = 'c' */
on ischildnode([c], [b])
- inner join [nt:base] as [a] /* traverse "/b//* && //parent/of/join"
+ inner join [nt:base] as [a] /* traverse "//* && //parent/of/join"
where (name([a]) = 'a')
- and (isdescendantnode([a], [/b])) */
+ and (isdescendantnode([a], [/])) */
on ischildnode([b], [a])
explain select excerpt(.)