You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2014/12/08 14:12:14 UTC

svn commit: r1643807 - in /jackrabbit/oak/trunk/oak-core/src: main/java/org/apache/jackrabbit/oak/plugins/index/counter/jmx/ main/java/org/apache/jackrabbit/oak/plugins/index/nodetype/ main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy...

Author: thomasm
Date: Mon Dec  8 13:12:13 2014
New Revision: 1643807

URL: http://svn.apache.org/r1643807
Log:
OAK-1907 Better cost estimates for traversal, property, and ordered indexes

Modified:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/counter/jmx/NodeCounter.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/nodetype/NodeTypeIndex.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/ContentMirrorStoreStrategy.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/UniqueEntryStoreStrategy.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/TraversingIndex.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/property/PropertyIndexTest.java
    jackrabbit/oak/trunk/oak-core/src/test/resources/org/apache/jackrabbit/oak/query/sql2_index.txt

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/counter/jmx/NodeCounter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/counter/jmx/NodeCounter.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/counter/jmx/NodeCounter.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/counter/jmx/NodeCounter.java Mon Dec  8 13:12:13 2014
@@ -28,6 +28,7 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.apache.jackrabbit.oak.util.ApproximateCounter;
 
 /**
  * A mechanism to retrieve node counter data.
@@ -58,21 +59,41 @@ public class NodeCounter implements Node
     
     @Override
     public long getEstimatedNodeCount(String path) {
+        return getEstimatedNodeCount(store.getRoot(), path, false);
+    }
+
+    /**
+     * Get the estimated number of nodes for a given path.
+     * 
+     * @param root the root
+     * @param path the path
+     * @param max whether to get the maximum expected number of nodes (the
+     *            stored value plus the resolution)
+     * @return -1 if unknown, 0 if the node does not exist (or, if max is false,
+     *         if there are probably not many descendant nodes), or the
+     *         (maximum) estimated number of descendant nodes
+     */
+    public static long getEstimatedNodeCount(NodeState root, String path, boolean max) {
         // check if there is a property in the node itself
         // (for property index nodes)
-        NodeState s = child(store.getRoot(),
+        NodeState s = child(root,
                 PathUtils.elements(path));
-        if (s == null) {
+        if (s == null || !s.exists()) {
             // node not found
-            return -1;
+            return 0;
         }
         PropertyState p = s.getProperty(NodeCounterEditor.COUNT_PROPERTY_NAME);
         if (p != null) {
-            return p.getValue(Type.LONG);
+            long x = p.getValue(Type.LONG);
+            if (max) {
+                // in the node itself, we just add the resolution
+                x += ApproximateCounter.COUNT_RESOLUTION;
+            }
+            return x;
         }
         
         // check in the counter index (if it exists)
-        s = child(store.getRoot(),
+        s = child(root,
                 IndexConstants.INDEX_DEFINITIONS_NAME,
                 "counter",
                 NodeCounterEditor.DATA_NODE_NAME);
@@ -81,16 +102,31 @@ public class NodeCounter implements Node
             return -1;
         }
         s = child(s, PathUtils.elements(path));
-        if (s == null) {
+        if (s == null || !s.exists()) {
             // we have an index, but no data
-            return 0;
+            long x = 0;
+            if (max) {
+                // in the index, the resolution is lower
+                x += ApproximateCounter.COUNT_RESOLUTION * 10;
+            }
+            
+            return x;
         }
         p = s.getProperty(NodeCounterEditor.COUNT_PROPERTY_NAME);
         if (p == null) {
             // we have an index, but no data
-            return 0;
+            long x = 0;
+            if (max) {
+                // in the index, the resolution is lower
+                x += ApproximateCounter.COUNT_RESOLUTION * 10;
+            }
+            return x;
         }
-        return p.getValue(Type.LONG);
+        long x = p.getValue(Type.LONG);
+        if (max) {
+            x += ApproximateCounter.COUNT_RESOLUTION;
+        }       
+        return x;
     }
     
     @Override

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/nodetype/NodeTypeIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/nodetype/NodeTypeIndex.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/nodetype/NodeTypeIndex.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/nodetype/NodeTypeIndex.java Mon Dec  8 13:12:13 2014
@@ -71,7 +71,7 @@ class NodeTypeIndex implements QueryInde
     
     @Override
     public String getPlan(Filter filter, NodeState root) {
-        return filter.toString();
+        return "nodeType " + filter.toString();
     }
 
     @Override

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/ContentMirrorStoreStrategy.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/ContentMirrorStoreStrategy.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/ContentMirrorStoreStrategy.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/ContentMirrorStoreStrategy.java Mon Dec  8 13:12:13 2014
@@ -181,11 +181,20 @@ public class ContentMirrorStoreStrategy
     public long count(Filter filter, NodeState indexMeta, final String indexStorageNodeName,
             Set<String> values, int max) {
         NodeState index = indexMeta.getChildNode(indexStorageNodeName);
-        int count = 0;
+        long count = 0;
         if (values == null) {
             PropertyState ec = indexMeta.getProperty(ENTRY_COUNT_PROPERTY_NAME);
             if (ec != null) {
-                return ec.getValue(Type.LONG);
+                count = ec.getValue(Type.LONG);
+                if (count >= 0) {
+                    return count;
+                }
+            }
+            if (count == 0) {
+                PropertyState ap = index.getProperty(ApproximateCounter.COUNT_PROPERTY_NAME);
+                if (ap != null) {
+                    return ap.getValue(Type.LONG);
+                }
             }
             CountingNodeVisitor v = new CountingNodeVisitor(max);
             v.visit(index);
@@ -201,21 +210,39 @@ public class ContentMirrorStoreStrategy
             }
             PropertyState ec = indexMeta.getProperty(ENTRY_COUNT_PROPERTY_NAME);       
             if (ec != null) {
-                long entryCount = ec.getValue(Type.LONG);
-                // assume 10000 entries per key, so that this index is used
-                // instead of traversal, but not instead of a regular property index
-                long keyCount = entryCount / 10000;
-                ec = indexMeta.getProperty(KEY_COUNT_PROPERTY_NAME);
-                if (ec != null) {
-                    keyCount = ec.getValue(Type.LONG);
-                }
-                // cast to double to avoid overflow 
-                // (entryCount could be Long.MAX_VALUE)
-                // the cost is not multiplied by the size, 
-                // otherwise the traversing index might be used
-                keyCount = Math.max(1, keyCount);
-                return (long) ((double) entryCount / keyCount) + size;
+                count = ec.getValue(Type.LONG);
+                if (count >= 0) {
+                    // assume 10000 entries per key, so that this index is used
+                    // instead of traversal, but not instead of a regular property index
+                    long keyCount = count / 10000;
+                    ec = indexMeta.getProperty(KEY_COUNT_PROPERTY_NAME);
+                    if (ec != null) {
+                        keyCount = ec.getValue(Type.LONG);
+                    }
+                    // cast to double to avoid overflow 
+                    // (entryCount could be Long.MAX_VALUE)
+                    // the cost is not multiplied by the size, 
+                    // otherwise the traversing index might be used              
+                    keyCount = Math.max(1, keyCount);
+                    return (long) ((double) count / keyCount) + size;
+                }
             }
+            long approxMax = 0;
+            if (count == 0) {
+                PropertyState ap = index.getProperty(ApproximateCounter.COUNT_PROPERTY_NAME);
+                if (ap != null) {
+                    for (String p : values) {
+                        NodeState s = index.getChildNode(p);
+                        if (s.exists()) {
+                            ap = s.getProperty(ApproximateCounter.COUNT_PROPERTY_NAME);
+                            if (ap != null) {
+                                approxMax += ap.getValue(Type.LONG);
+                            }
+                        }
+                    }
+                }
+            }
+            count = 0;
             max = Math.max(10, max / size);
             int i = 0;
             String filterRootPath = null;
@@ -223,6 +250,11 @@ public class ContentMirrorStoreStrategy
                     filter.getPathRestriction().equals(Filter.PathRestriction.ALL_CHILDREN)) {
                 filterRootPath = filter.getPath();
             }
+            if (filterRootPath == null && approxMax > 0) {
+                // we do have an approximation, and
+                // there is no path filter
+                return approxMax;
+            }
             for (String p : values) {
                 if (count > max && i > 3) {
                     // the total count is extrapolated from the the number 
@@ -248,6 +280,12 @@ public class ContentMirrorStoreStrategy
                 }
                 i++;
             }
+            if (approxMax > 0 && approxMax > count) {
+                // we do have an approximation, and
+                // it is higher than what we counted
+                // (we don't count that far)
+                count = approxMax;
+            }
         }
         return count;
     }

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/UniqueEntryStoreStrategy.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/UniqueEntryStoreStrategy.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/UniqueEntryStoreStrategy.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/property/strategy/UniqueEntryStoreStrategy.java Mon Dec  8 13:12:13 2014
@@ -151,7 +151,16 @@ public class UniqueEntryStoreStrategy im
         if (values == null) {
             PropertyState ec = indexMeta.getProperty(ENTRY_COUNT_PROPERTY_NAME);
             if (ec != null) {
-                return ec.getValue(Type.LONG);
+                count = ec.getValue(Type.LONG);
+                if (count >= 0) {
+                    return count;
+                }
+            }
+            if (count == 0) {
+                PropertyState ap = index.getProperty(ApproximateCounter.COUNT_PROPERTY_NAME);
+                if (ap != null) {
+                    return ap.getValue(Type.LONG);
+                }
             }
             count = 1 + index.getChildNodeCount(max);
             // "is not null" queries typically read more data

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/FilterImpl.java Mon Dec  8 13:12:13 2014
@@ -258,7 +258,7 @@ public class FilterImpl implements Filte
         case EXACT:
             return path.matches(this.path);
         case PARENT:
-            return PathUtils.isAncestor(path, this.path);
+            return PathUtils.getParentPath(this.path).equals(path);
         case DIRECT_CHILDREN:
             return PathUtils.getParentPath(path).equals(this.path);
         case ALL_CHILDREN:

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/TraversingIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/TraversingIndex.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/TraversingIndex.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/index/TraversingIndex.java Mon Dec  8 13:12:13 2014
@@ -19,6 +19,8 @@
 package org.apache.jackrabbit.oak.query.index;
 
 import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.index.counter.jmx.NodeCounter;
+import org.apache.jackrabbit.oak.query.ast.JoinConditionImpl;
 import org.apache.jackrabbit.oak.spi.query.Cursor;
 import org.apache.jackrabbit.oak.spi.query.Cursors;
 import org.apache.jackrabbit.oak.spi.query.Filter;
@@ -49,21 +51,47 @@ public class TraversingIndex implements
         if (filter.isAlwaysFalse()) {
             return 0;
         }
-        
-        // worst case 100 million nodes
-        double nodeCount = 100000000;
-        // worst case 100 thousand children
-        double nodeCountChildren = 100000;
-        
         // if the path is from a join, then the depth is not correct
         // (the path might be the root node), but that's OK
         String path = filter.getPath();
         PathRestriction restriction = filter.getPathRestriction();
+        // the simple cases
         switch (restriction) {
+        case EXACT:
+            return 1;
+        case PARENT:
+            if (PathUtils.denotesRoot(path)) {
+                return 0;
+            }
+            return 1;
         case NO_RESTRICTION:
+        case ALL_CHILDREN:
+        case DIRECT_CHILDREN:
             break;
-        case EXACT:
-            nodeCount = 1;
+        default:
+            throw new IllegalArgumentException("Unknown restriction: " + restriction);
+        }
+        
+        if (!path.startsWith(JoinConditionImpl.SPECIAL_PATH_PREFIX)) {
+            String testPath = path;
+            if (restriction == PathRestriction.NO_RESTRICTION) {
+                testPath = "/";
+            }
+            long count = NodeCounter.getEstimatedNodeCount(rootState, testPath, true);
+            if (count >= 0) {
+                if (restriction == PathRestriction.DIRECT_CHILDREN) {
+                    count = count / 2;
+                }
+                return count;
+            }
+        }
+        
+        // worst case 100 million descendant nodes
+        double nodeCount = 100000000;
+        // worst case 100 thousand children
+        double nodeCountChildren = 100000;
+        switch (restriction) {
+        case NO_RESTRICTION:
             break;
         case ALL_CHILDREN:
             if (!PathUtils.denotesRoot(path)) {
@@ -78,13 +106,6 @@ public class TraversingIndex implements
                 }
             }
             break;
-        case PARENT:
-            if (PathUtils.denotesRoot(path)) {
-                nodeCount = 1;
-            } else {
-                nodeCount = PathUtils.getDepth(path);
-            }
-            break;
         case DIRECT_CHILDREN:
             // estimate 100'000 children for now, 
             // to ensure an index is used if there is one

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/property/PropertyIndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/property/PropertyIndexTest.java?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/property/PropertyIndexTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/index/property/PropertyIndexTest.java Mon Dec  8 13:12:13 2014
@@ -68,8 +68,10 @@ public class PropertyIndexTest {
 
         // Add index definition
         NodeBuilder builder = root.builder();
-        createIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
+        NodeBuilder index = createIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
                 true, false, ImmutableSet.of("foo"), null);
+        // disable the estimation
+        index.setProperty("entryCount", -1);        
         NodeState before = builder.getNodeState();
 
         // Add some content and process it through the property index hook
@@ -117,8 +119,10 @@ public class PropertyIndexTest {
 
         // Add index definition
         NodeBuilder builder = root.builder();
-        createIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
+        NodeBuilder index = createIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
                 true, false, ImmutableSet.of("foo"), null);
+        // disable the estimation
+        index.setProperty("entryCount", -1);
         NodeState before = builder.getNodeState();
 
         NodeBuilder path1 = builder.child("path1");
@@ -191,6 +195,10 @@ public class PropertyIndexTest {
             NodeBuilder c = data.child("c_" + i);
             c.setProperty("foo", "azerty");
         }
+        // add more nodes (to make traversal more expensive)
+        for (int i = 0; i < 10000; i++) {
+            data.child("cx_" + i);
+        }
         NodeState after = builder.getNodeState();
         NodeState indexed = HOOK.processCommit(before, after, CommitInfo.EMPTY);
 
@@ -202,7 +210,7 @@ public class PropertyIndexTest {
         double traversal = new TraversingIndex().getCost(f, indexed);
 
         assertTrue("Estimated cost for " + nodes
-                + " nodes should not be higher than traversal (" + cost + ")",
+                + " nodes should not be higher than traversal (" + cost + " < " + traversal + ")",
                 cost < traversal);
     }
 
@@ -212,8 +220,9 @@ public class PropertyIndexTest {
 
         // Add index definition
         NodeBuilder builder = root.builder();
-        createIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
+        NodeBuilder index = createIndexDefinition(builder.child(INDEX_DEFINITIONS_NAME), "foo",
                 true, false, ImmutableSet.of("foo"), null);
+        index.setProperty("entryCount", -1);
         NodeState before = builder.getNodeState();
 
         // Add some content and process it through the property index hook

Modified: jackrabbit/oak/trunk/oak-core/src/test/resources/org/apache/jackrabbit/oak/query/sql2_index.txt
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/resources/org/apache/jackrabbit/oak/query/sql2_index.txt?rev=1643807&r1=1643806&r2=1643807&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/resources/org/apache/jackrabbit/oak/query/sql2_index.txt (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/resources/org/apache/jackrabbit/oak/query/sql2_index.txt Mon Dec  8 13:12:13 2014
@@ -37,7 +37,7 @@ explain select e.[jcr:path]
   inner join [nt:base] as d on ischildnode(d, c)
   inner join [nt:base] as e on ischildnode(e, d)
   where name(a) = 'a'
-  and isdescendantnode(a, '/b')
+  and isdescendantnode(a, '/')
   and name(b) = 'c'
   and name(c) = 'd'
   and name(d) = 'e'
@@ -53,9 +53,9 @@ explain select e.[jcr:path]
   inner join [nt:base] as [b] /* traverse "* && //parent/of/join"
   where name([b]) = 'c' */
   on ischildnode([c], [b])
-  inner join [nt:base] as [a] /* traverse "/b//* && //parent/of/join"
+  inner join [nt:base] as [a] /* traverse "//* && //parent/of/join"
   where (name([a]) = 'a')
-  and (isdescendantnode([a], [/b])) */
+  and (isdescendantnode([a], [/])) */
   on ischildnode([b], [a])
 
 explain select e.[jcr:path]
@@ -65,7 +65,7 @@ explain select e.[jcr:path]
   inner join [nt:base] as d on ischildnode(d, c)
   inner join [nt:base] as e on ischildnode(e, d)
   where name(a) = 'a'
-  and isdescendantnode(a, '/b')
+  and isdescendantnode(a, '/')
   and name(b) = 'c'
   and name(c) = 'd'
   and name(d) = 'e'
@@ -81,9 +81,9 @@ explain select e.[jcr:path]
   inner join [nt:base] as [b] /* traverse "* && //parent/of/join"
   where name([b]) = 'c' */
   on ischildnode([c], [b])
-  inner join [nt:base] as [a] /* traverse "/b//* && //parent/of/join"
+  inner join [nt:base] as [a] /* traverse "//* && //parent/of/join"
   where (name([a]) = 'a')
-  and (isdescendantnode([a], [/b])) */
+  and (isdescendantnode([a], [/])) */
   on ischildnode([b], [a])
 
 explain select excerpt(.)