You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2013/05/08 13:19:49 UTC
svn commit: r1480226 -
/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/QueryIndex.java
Author: thomasm
Date: Wed May 8 11:19:49 2013
New Revision: 1480226
URL: http://svn.apache.org/r1480226
Log:
OAK-622 Improve QueryIndex interface (WIP)
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/QueryIndex.java
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/QueryIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/QueryIndex.java?rev=1480226&r1=1480225&r2=1480226&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/QueryIndex.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/QueryIndex.java Wed May 8 11:19:49 2013
@@ -42,10 +42,21 @@ import org.apache.jackrabbit.oak.spi.sta
public interface QueryIndex {
/**
- * Estimate the cost to query with the given filter. The returned
+ * Estimate the worst-case cost to query with the given filter. The returned
* cost is a value between 1 (very fast; lookup of a unique node) and the
- * estimated number of nodes to traverse.
- *
+ * estimated number of entries to traverse, if the cursor would be fully
+ * read, and if there could in theory be one network roundtrip or disk read
+ * operation per node (this method may return a lower number if the data is
+ * known to be fully in memory).
+ * <p>
+ * The returned value is supposed to be an estimate and doesn't have to be
+ * very accurate. Please note this method is called on each index whenever a
+ * query is run, so the method should be reasonably fast (not read any data
+ * itself, or at least not read too much data).
+ * <p>
+ * If an index implementation can not query the data, it has to return
+ * {@code Double.MAX_VALUE}.
+ *
* @param filter the filter
* @param rootState root state of the current repository snapshot
* @return the estimated cost in number of read nodes
@@ -53,8 +64,21 @@ public interface QueryIndex {
double getCost(Filter filter, NodeState rootState);
/**
- * Start a query.
- *
+ * Query the index. The returned cursor is supposed to return as few nodes
+ * as possible, but may return more nodes than necessary.
+ * <p>
+ * An implementation should only filter the result if it can do so easily
+ * and efficiently; the query engine will verify the data again (in memory)
+ * and check for access rights.
+ * <p>
+ * The method is only called if this index is used for the given query and
+ * selector, which is only the case if the given index implementation
+ * returned the lowest cost for the given filter. If the implementation
+ * returned {@code Double.MAX_VALUE} in the getCost method for the given
+ * filter, then this method is not called. If it is still called, then it is
+ * supposed to throw an exception (as it would be an internal error of the
+ * query engine).
+ *
* @param filter the filter
* @param rootState root state of the current repository snapshot
* @return a cursor to iterate over the result
@@ -62,8 +86,10 @@ public interface QueryIndex {
Cursor query(Filter filter, NodeState rootState);
/**
- * Get the query plan for the given filter.
- *
+ * Get the query plan for the given filter. This method is called when
+ * running an {@code EXPLAIN SELECT} query, or for logging purposes. The
+ * result should be human readable.
+ *
* @param filter the filter
* @param rootState root state of the current repository snapshot
* @return the query plan
@@ -114,14 +140,14 @@ public interface QueryIndex {
// /**
// * The cost to execute the query once. The returned value should
// * approximately match the number of disk read operations plus the
-// * number of network roundtrips.
+// * number of network roundtrips (worst case).
// */
// double costPerExecution;
//
// /**
// * The cost to read one entry from the cursor. The returned value should
// * approximately match the number of disk read operations plus the
-// * number of network roundtrips.
+// * number of network roundtrips (worst case).
// */
// double costPerEntry;
//
@@ -135,11 +161,6 @@ public interface QueryIndex {
// * The filter to use.
// */
// Filter filter;
-//
-// /**
-// * Whether transient (unsaved) changes are included.
-// */
-// boolean includeTransient;
//
// /**
// * Whether the index is not always up-to-date.