You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2014/10/28 04:23:45 UTC

svn commit: r1634774 - in /jackrabbit/oak/trunk/oak-lucene/src: main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ test/java/org/apache/jackrabbit/oak/plugins/index/lucene/

Author: amitj
Date: Tue Oct 28 03:23:45 2014
New Revision: 1634774

URL: http://svn.apache.org/r1634774
Log:
OAK-2200: Tune cost calculation for lucene property index
Added a configurable param entryCount which defaults to 1000 and indicates the max count to be returned by the lucene index.

Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexPlanner.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java?rev=1634774&r1=1634773&r2=1634774&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java Tue Oct 28 03:23:45 2014
@@ -40,6 +40,7 @@ import org.apache.lucene.codecs.Codec;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.ENTRY_COUNT_PROPERTY_NAME;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.BLOB_SIZE;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.EXCLUDE_PROPERTY_NAMES;
 import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.EXPERIMENTAL_STORAGE;
@@ -57,6 +58,11 @@ public class IndexDefinition {
      */
     static final int DEFAULT_BLOB_SIZE = OakDirectory.DEFAULT_BLOB_SIZE - 300;
 
+    /**
+     * Default entry count to keep estimated entry count low.
+     */
+    static final long DEFAULT_ENTRY_COUNT = 1000;
+
     private final int propertyTypes;
 
     private final Set<String> excludes;
@@ -79,6 +85,12 @@ public class IndexDefinition {
 
     private final Codec codec;
 
+    /**
+     * Defines the maximum estimated entry count configured.
+     * Defaults to {#DEFAULT_ENTRY_COUNT}
+     */
+    private final long entryCount;
+
     public IndexDefinition(NodeBuilder defn) {
         this.definition = defn;
         PropertyState pst = defn.getProperty(INCLUDE_PROPERTY_TYPES);
@@ -118,6 +130,12 @@ public class IndexDefinition {
         this.funcName = functionName != null ? "native*" + functionName : null;
 
         this.codec = createCodec();
+
+        if (defn.hasProperty(ENTRY_COUNT_PROPERTY_NAME)) {
+            this.entryCount = defn.getProperty(ENTRY_COUNT_PROPERTY_NAME).getValue(Type.LONG);
+        } else {
+            this.entryCount = DEFAULT_ENTRY_COUNT;
+        }
     }
 
     boolean includeProperty(String name) {
@@ -195,6 +213,10 @@ public class IndexDefinition {
         return codec;
     }
 
+    public long getEntryCount() {
+        return entryCount;
+    }
+
     //~------------------------------------------< Internal >
 
     private Codec createCodec() {

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexPlanner.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexPlanner.java?rev=1634774&r1=1634773&r2=1634774&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexPlanner.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexPlanner.java Tue Oct 28 03:23:45 2014
@@ -112,7 +112,7 @@ public class IndexPlanner {
                 .setSortOrder(createSortOrder())
                 .setDelayed(true) //Lucene is always async
                 .setAttribute(LuceneIndex.ATTR_INDEX_PATH, indexPath)
-                .setEstimatedEntryCount(getReader().numDocs());
+                .setEstimatedEntryCount(Math.min(defn.getEntryCount(), getReader().numDocs()));
     }
 
     private String getPathPrefix() {

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java?rev=1634774&r1=1634773&r2=1634774&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java Tue Oct 28 03:23:45 2014
@@ -888,7 +888,10 @@ public class LucenePropertyIndex impleme
                 bq.add(new TermQuery(new Term(JCR_MIXINTYPES, type)), SHOULD);
             }
         }
-        qs.add(bq);
+
+        if (bq.clauses().size() != 0) {
+            qs.add(bq);
+        }
     }
 
     static Query getFullTextQuery(FullTextExpression ft, final Analyzer analyzer, final IndexReader reader) {

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java?rev=1634774&r1=1634773&r2=1634774&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java Tue Oct 28 03:23:45 2014
@@ -36,7 +36,10 @@ import org.apache.jackrabbit.oak.api.Com
 import org.apache.jackrabbit.oak.api.ContentRepository;
 import org.apache.jackrabbit.oak.api.Tree;
 import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
 import org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneInitializerHelper;
+import org.apache.jackrabbit.oak.plugins.index.nodetype.NodeTypeIndexProvider;
+import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider;
 import org.apache.jackrabbit.oak.plugins.memory.PropertyStates;
 import org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent;
 import org.apache.jackrabbit.oak.query.AbstractQueryTest;
@@ -85,6 +88,8 @@ public class LucenePropertyIndexTest ext
                 .with((QueryIndexProvider) provider)
                 .with((Observer) provider)
                 .with(new LuceneIndexEditorProvider())
+                .with(new PropertyIndexEditorProvider())
+                .with(new NodeTypeIndexProvider())
                 .createContentRepository();
     }
 
@@ -111,6 +116,35 @@ public class LucenePropertyIndexTest ext
     }
 
     @Test
+    public void indexSelectionVsNodeType() throws Exception {
+        Tree luceneIndex = createIndex("test1", of("propa"));
+        luceneIndex.setProperty(IndexConstants.ENTRY_COUNT_PROPERTY_NAME, 5L, Type.LONG);
+
+        // Decrease cost of node type index
+        Tree nodeTypeIndex = root.getTree("/").getChild("oak:index").getChild("nodetype");
+        nodeTypeIndex.setProperty(IndexConstants.ENTRY_COUNT_PROPERTY_NAME, 50L, Type.LONG);
+        nodeTypeIndex.setProperty(IndexConstants.KEY_COUNT_PROPERTY_NAME, 10L, Type.LONG);
+
+        Tree test = root.getTree("/").addChild("test");
+        test.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);
+
+        List<String> paths = Lists.newArrayList();
+        for (int idx = 0; idx < 15; idx++) {
+            Tree a = test.addChild("n"+idx);
+            a.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);
+            a.setProperty("propa", "foo");
+            paths.add("/test/n" + idx);
+        }
+        root.commit();
+
+        String propaQuery = "select [jcr:path] from [nt:unstructured] where [propa] = 'foo'";
+        String explain = explain(propaQuery);
+        assertThat(explain(propaQuery), containsString("lucene:test1"));
+
+        assertQuery(propaQuery, paths);
+    }
+
+    @Test
     public void rangeQueriesWithLong() throws Exception {
         Tree idx = createIndex("test1", of("propa", "propb"));
         Tree propIdx = idx.addChild(PROP_NODE).addChild("propa");



Re: svn commit: r1634774 - in /jackrabbit/oak/trunk/oak-lucene/src: main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ test/java/org/apache/jackrabbit/oak/plugins/index/lucene/

Posted by Thomas Mueller <mu...@adobe.com>.
Hi,

I think using "entryCount" is ok. It's anyway only needed until we have
proper cost estimation in the property index and ordered index.

Regards,
Thomas



On 28/10/14 09:32, "Davide Giannella" <da...@apache.org> wrote:

>On 28/10/2014 03:23, amitj@apache.org wrote:
>> Author: amitj
>> Date: Tue Oct 28 03:23:45 2014
>> New Revision: 1634774
>>
>> URL: http://svn.apache.org/r1634774
>> Log:
>> OAK-2200: Tune cost calculation for lucene property index
>> Added a configurable param entryCount which defaults to 1000 and
>>indicates the max count to be returned by the lucene index.
>>
>Doesn't `entryCount` for other indexes means the value actually
>returned? If so it would be clearer to keep the same behaviour and in
>case the "max count returned" is needed introduce a new variable;
>something like maxEntryCount.
>
>D.
>
>


Re: svn commit: r1634774 - in /jackrabbit/oak/trunk/oak-lucene/src: main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ test/java/org/apache/jackrabbit/oak/plugins/index/lucene/

Posted by Davide Giannella <da...@apache.org>.
On 28/10/2014 03:23, amitj@apache.org wrote:
> Author: amitj
> Date: Tue Oct 28 03:23:45 2014
> New Revision: 1634774
>
> URL: http://svn.apache.org/r1634774
> Log:
> OAK-2200: Tune cost calculation for lucene property index
> Added a configurable param entryCount which defaults to 1000 and indicates the max count to be returned by the lucene index.
>
Doesn't `entryCount` for other indexes means the value actually
returned? If so it would be clearer to keep the same behaviour and in
case the "max count returned" is needed introduce a new variable;
something like maxEntryCount.

D.