You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2014/10/28 04:23:45 UTC
svn commit: r1634774 - in /jackrabbit/oak/trunk/oak-lucene/src:
main/java/org/apache/jackrabbit/oak/plugins/index/lucene/
test/java/org/apache/jackrabbit/oak/plugins/index/lucene/
Author: amitj
Date: Tue Oct 28 03:23:45 2014
New Revision: 1634774
URL: http://svn.apache.org/r1634774
Log:
OAK-2200: Tune cost calculation for lucene property index
Added a configurable param entryCount which defaults to 1000 and indicates the max count to be returned by the lucene index.
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexPlanner.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java?rev=1634774&r1=1634773&r2=1634774&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java Tue Oct 28 03:23:45 2014
@@ -40,6 +40,7 @@ import org.apache.lucene.codecs.Codec;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.ENTRY_COUNT_PROPERTY_NAME;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.BLOB_SIZE;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.EXCLUDE_PROPERTY_NAMES;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.EXPERIMENTAL_STORAGE;
@@ -57,6 +58,11 @@ public class IndexDefinition {
*/
static final int DEFAULT_BLOB_SIZE = OakDirectory.DEFAULT_BLOB_SIZE - 300;
+ /**
+ * Default entry count to keep estimated entry count low.
+ */
+ static final long DEFAULT_ENTRY_COUNT = 1000;
+
private final int propertyTypes;
private final Set<String> excludes;
@@ -79,6 +85,12 @@ public class IndexDefinition {
private final Codec codec;
+ /**
+ * Defines the maximum estimated entry count configured.
+ * Defaults to {#DEFAULT_ENTRY_COUNT}
+ */
+ private final long entryCount;
+
public IndexDefinition(NodeBuilder defn) {
this.definition = defn;
PropertyState pst = defn.getProperty(INCLUDE_PROPERTY_TYPES);
@@ -118,6 +130,12 @@ public class IndexDefinition {
this.funcName = functionName != null ? "native*" + functionName : null;
this.codec = createCodec();
+
+ if (defn.hasProperty(ENTRY_COUNT_PROPERTY_NAME)) {
+ this.entryCount = defn.getProperty(ENTRY_COUNT_PROPERTY_NAME).getValue(Type.LONG);
+ } else {
+ this.entryCount = DEFAULT_ENTRY_COUNT;
+ }
}
boolean includeProperty(String name) {
@@ -195,6 +213,10 @@ public class IndexDefinition {
return codec;
}
+ public long getEntryCount() {
+ return entryCount;
+ }
+
//~------------------------------------------< Internal >
private Codec createCodec() {
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexPlanner.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexPlanner.java?rev=1634774&r1=1634773&r2=1634774&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexPlanner.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexPlanner.java Tue Oct 28 03:23:45 2014
@@ -112,7 +112,7 @@ public class IndexPlanner {
.setSortOrder(createSortOrder())
.setDelayed(true) //Lucene is always async
.setAttribute(LuceneIndex.ATTR_INDEX_PATH, indexPath)
- .setEstimatedEntryCount(getReader().numDocs());
+ .setEstimatedEntryCount(Math.min(defn.getEntryCount(), getReader().numDocs()));
}
private String getPathPrefix() {
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java?rev=1634774&r1=1634773&r2=1634774&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java Tue Oct 28 03:23:45 2014
@@ -888,7 +888,10 @@ public class LucenePropertyIndex impleme
bq.add(new TermQuery(new Term(JCR_MIXINTYPES, type)), SHOULD);
}
}
- qs.add(bq);
+
+ if (bq.clauses().size() != 0) {
+ qs.add(bq);
+ }
}
static Query getFullTextQuery(FullTextExpression ft, final Analyzer analyzer, final IndexReader reader) {
Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java?rev=1634774&r1=1634773&r2=1634774&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java Tue Oct 28 03:23:45 2014
@@ -36,7 +36,10 @@ import org.apache.jackrabbit.oak.api.Com
import org.apache.jackrabbit.oak.api.ContentRepository;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneInitializerHelper;
+import org.apache.jackrabbit.oak.plugins.index.nodetype.NodeTypeIndexProvider;
+import org.apache.jackrabbit.oak.plugins.index.property.PropertyIndexEditorProvider;
import org.apache.jackrabbit.oak.plugins.memory.PropertyStates;
import org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent;
import org.apache.jackrabbit.oak.query.AbstractQueryTest;
@@ -85,6 +88,8 @@ public class LucenePropertyIndexTest ext
.with((QueryIndexProvider) provider)
.with((Observer) provider)
.with(new LuceneIndexEditorProvider())
+ .with(new PropertyIndexEditorProvider())
+ .with(new NodeTypeIndexProvider())
.createContentRepository();
}
@@ -111,6 +116,35 @@ public class LucenePropertyIndexTest ext
}
@Test
+ public void indexSelectionVsNodeType() throws Exception {
+ Tree luceneIndex = createIndex("test1", of("propa"));
+ luceneIndex.setProperty(IndexConstants.ENTRY_COUNT_PROPERTY_NAME, 5L, Type.LONG);
+
+ // Decrease cost of node type index
+ Tree nodeTypeIndex = root.getTree("/").getChild("oak:index").getChild("nodetype");
+ nodeTypeIndex.setProperty(IndexConstants.ENTRY_COUNT_PROPERTY_NAME, 50L, Type.LONG);
+ nodeTypeIndex.setProperty(IndexConstants.KEY_COUNT_PROPERTY_NAME, 10L, Type.LONG);
+
+ Tree test = root.getTree("/").addChild("test");
+ test.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);
+
+ List<String> paths = Lists.newArrayList();
+ for (int idx = 0; idx < 15; idx++) {
+ Tree a = test.addChild("n"+idx);
+ a.setProperty("jcr:primaryType", "nt:unstructured", Type.NAME);
+ a.setProperty("propa", "foo");
+ paths.add("/test/n" + idx);
+ }
+ root.commit();
+
+ String propaQuery = "select [jcr:path] from [nt:unstructured] where [propa] = 'foo'";
+ String explain = explain(propaQuery);
+ assertThat(explain(propaQuery), containsString("lucene:test1"));
+
+ assertQuery(propaQuery, paths);
+ }
+
+ @Test
public void rangeQueriesWithLong() throws Exception {
Tree idx = createIndex("test1", of("propa", "propb"));
Tree propIdx = idx.addChild(PROP_NODE).addChild("propa");
Re: svn commit: r1634774 - in /jackrabbit/oak/trunk/oak-lucene/src:
main/java/org/apache/jackrabbit/oak/plugins/index/lucene/
test/java/org/apache/jackrabbit/oak/plugins/index/lucene/
Posted by Thomas Mueller <mu...@adobe.com>.
Hi,
I think using "entryCount" is ok. It's anyway only needed until we have
proper cost estimation in the property index and ordered index.
Regards,
Thomas
On 28/10/14 09:32, "Davide Giannella" <da...@apache.org> wrote:
>On 28/10/2014 03:23, amitj@apache.org wrote:
>> Author: amitj
>> Date: Tue Oct 28 03:23:45 2014
>> New Revision: 1634774
>>
>> URL: http://svn.apache.org/r1634774
>> Log:
>> OAK-2200: Tune cost calculation for lucene property index
>> Added a configurable param entryCount which defaults to 1000 and
>>indicates the max count to be returned by the lucene index.
>>
>Doesn't `entryCount` for other indexes means the value actually
>returned? If so it would be clearer to keep the same behaviour and in
>case the "max count returned" is needed introduce a new variable;
>something like maxEntryCount.
>
>D.
>
>
Re: svn commit: r1634774 - in /jackrabbit/oak/trunk/oak-lucene/src:
main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ test/java/org/apache/jackrabbit/oak/plugins/index/lucene/
Posted by Davide Giannella <da...@apache.org>.
On 28/10/2014 03:23, amitj@apache.org wrote:
> Author: amitj
> Date: Tue Oct 28 03:23:45 2014
> New Revision: 1634774
>
> URL: http://svn.apache.org/r1634774
> Log:
> OAK-2200: Tune cost calculation for lucene property index
> Added a configurable param entryCount which defaults to 1000 and indicates the max count to be returned by the lucene index.
>
Doesn't `entryCount` for other indexes means the value actually
returned? If so it would be clearer to keep the same behaviour and in
case the "max count returned" is needed introduce a new variable;
something like maxEntryCount.
D.