You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2016/04/01 08:36:25 UTC
svn commit: r1737310 - in
/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene:
LuceneIndexMBean.java LuceneIndexMBeanImpl.java LucenePropertyIndex.java
Author: chetanm
Date: Fri Apr 1 06:36:25 2016
New Revision: 1737310
URL: http://svn.apache.org/viewvc?rev=1737310&view=rev
Log:
OAK-4164 - Expose path stats for Lucene index
Path stats are done by visiting the index content by recursively performing immediate child node queries and then performing a breadth first traversal
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBean.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBeanImpl.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBean.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBean.java?rev=1737310&r1=1737309&r2=1737310&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBean.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBean.java Fri Apr 1 06:36:25 2016
@@ -23,9 +23,29 @@ import java.io.IOException;
import javax.management.openmbean.TabularData;
+import org.apache.jackrabbit.oak.commons.jmx.Description;
+import org.apache.jackrabbit.oak.commons.jmx.Name;
+
public interface LuceneIndexMBean {
String TYPE = "LuceneIndex";
TabularData getIndexStats() throws IOException;
+ @Description("Determines the set of index paths upto given maxLevel. This can be used to determine the value for" +
+ "[includedPaths]. For this to work you should have [evaluatePathRestrictions] set to true in your index " +
+ "definition")
+ String[] getIndexedPaths(
+ @Description("Index path for which stats are to be determined")
+ @Name("indexPath")
+ String indexPath,
+ @Name("maxLevel")
+ @Description("Maximum depth to examine. E.g. 5. Stats calculation would " +
+ "break out after this limit")
+ int maxLevel,
+ @Description("Maximum number of unique paths to examine. E.g. 100. Stats " +
+ "calculation would break out after this limit")
+ @Name("maxPathCount")
+ int maxPathCount
+ ) throws IOException;
+
}
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBeanImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBeanImpl.java?rev=1737310&r1=1737309&r2=1737310&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBeanImpl.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBeanImpl.java Fri Apr 1 06:36:25 2016
@@ -21,8 +21,12 @@ package org.apache.jackrabbit.oak.plugin
import java.io.File;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
import java.util.Set;
+import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.management.NotCompliantMBeanException;
import javax.management.openmbean.CompositeDataSupport;
@@ -34,9 +38,25 @@ import javax.management.openmbean.Tabula
import javax.management.openmbean.TabularDataSupport;
import javax.management.openmbean.TabularType;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Sets;
+import com.google.common.collect.TreeTraverser;
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.Tree;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.commons.jmx.AnnotatedStandardMBean;
+import org.apache.jackrabbit.oak.plugins.index.lucene.LucenePropertyIndex.PathStoredFieldVisitor;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.NumericRangeQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
@@ -45,6 +65,7 @@ import org.slf4j.LoggerFactory;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount;
+import static org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newAncestorTerm;
public class LuceneIndexMBeanImpl extends AnnotatedStandardMBean implements LuceneIndexMBean {
private final Logger log = LoggerFactory.getLogger(getClass());
@@ -84,6 +105,34 @@ public class LuceneIndexMBeanImpl extend
return tds;
}
+ @Override
+ public String[] getIndexedPaths(String indexPath, int maxLevel, int maxPathCount) throws IOException {
+ IndexNode indexNode = null;
+ try {
+ if(indexPath == null){
+ indexPath = "/";
+ }
+
+ indexNode = indexTracker.acquireIndexNode(indexPath);
+ if (indexNode != null) {
+ IndexDefinition defn = indexNode.getDefinition();
+ if (!defn.evaluatePathRestrictions()){
+ String msg = String.format("Index at [%s] does not have [%s] enabled. So paths statistics cannot " +
+ "be determined for this index", indexPath, LuceneIndexConstants.EVALUATE_PATH_RESTRICTION);
+ return createMsg(msg);
+ }
+
+ IndexSearcher searcher = indexNode.getSearcher();
+ return determineIndexedPaths(searcher, maxLevel, maxPathCount);
+ }
+ } finally {
+ if (indexNode != null) {
+ indexNode.release();
+ }
+ }
+ return new String[0];
+ }
+
public void dumpIndexContent(String sourcePath, String destPath) throws IOException {
IndexNode indexNode = null;
try {
@@ -108,6 +157,146 @@ public class LuceneIndexMBeanImpl extend
}
}
+ private String[] determineIndexedPaths(IndexSearcher searcher, final int maxLevel, int maxPathCount)
+ throws IOException {
+ Set<String> paths = Sets.newHashSet();
+ int startDepth = getStartDepth(searcher, maxLevel);
+ if (startDepth < 0){
+ return createMsg("startDepth cannot be determined after search for upto maxLevel ["+maxLevel+"]");
+ }
+
+ SearchContext sc = new SearchContext(searcher, maxLevel, maxPathCount);
+ List<LuceneDoc> docs = getDocsAtLevel(startDepth, sc);
+ int maxPathLimitBreachedAtLevel = -1;
+ topLevel:
+ for (LuceneDoc doc : docs){
+ TreeTraverser<LuceneDoc> traverser = new TreeTraverser<LuceneDoc>() {
+ @Override
+ public Iterable<LuceneDoc> children(@Nonnull LuceneDoc root) {
+ //Break at maxLevel
+ if (root.depth >= maxLevel) {
+ return Collections.emptyList();
+ }
+ return root.getChildren();
+ }
+ };
+
+ for (LuceneDoc node : traverser.breadthFirstTraversal(doc)) {
+ if (paths.size() < maxPathCount) {
+ paths.add(node.path);
+ } else {
+ maxPathLimitBreachedAtLevel = node.depth;
+ break topLevel;
+ }
+ }
+ }
+ if (maxPathLimitBreachedAtLevel < 0) {
+ return Iterables.toArray(paths, String.class);
+ }
+
+ //If max limit for path is reached then we can safely
+ //say about includedPaths upto depth = level at which limit reached - 1
+ //As for that level we know *all* the path roots
+ Set<String> result = Sets.newHashSet();
+ int safeDepth = maxPathLimitBreachedAtLevel - 1;
+ if (safeDepth > 0) {
+ for (String path : paths) {
+ int pathDepth = PathUtils.getDepth(path);
+ if (pathDepth == safeDepth) {
+ result.add(path);
+ }
+ }
+ }
+ return Iterables.toArray(result, String.class);
+ }
+
+ /**
+ * Look for the startDepth. An index might have dat only at paths like /a/b/c so
+ * to determine the start depth which needs to be used for query we need to find
+ * out depth at which we start getting any entry
+ */
+ private int getStartDepth(IndexSearcher searcher, int maxLevel) throws IOException {
+ int depth = 0;
+ while(depth < maxLevel){
+ //Confirm if we have any hit at current depth
+ TopDocs docs = searcher.search(newDepthQuery(depth), 1);
+ if (docs.totalHits != 0){
+ return depth;
+ }
+ depth++;
+ }
+ return -1;
+ }
+
+ private static List<LuceneDoc> getDocsAtLevel(int startDepth, SearchContext sc) throws IOException {
+ TopDocs docs = sc.searcher.search(newDepthQuery(startDepth), Integer.MAX_VALUE);
+ return getLuceneDocs(docs, sc);
+ }
+
+ private static class SearchContext{
+ final IndexSearcher searcher;
+ final int maxLevel;
+
+ private SearchContext(IndexSearcher searcher, int maxLevel, int maxPathCount) {
+ this.searcher = searcher;
+ this.maxLevel = maxLevel;
+ }
+ }
+
+ private static class LuceneDoc {
+ final String path;
+ final SearchContext sc;
+ final int depth;
+
+ public LuceneDoc(String path, SearchContext sc) {
+ this.path = path;
+ this.sc = sc;
+ this.depth = PathUtils.getDepth(path);
+ }
+
+ public Iterable<LuceneDoc> getChildren() {
+ //Perform a query for immediate child nodes at given path
+ BooleanQuery bq = new BooleanQuery();
+ bq.add(new BooleanClause(new TermQuery(newAncestorTerm(path)), BooleanClause.Occur.MUST));
+ bq.add(new BooleanClause(newDepthQuery(path), BooleanClause.Occur.MUST));
+
+ try {
+ TopDocs docs = sc.searcher.search(bq, Integer.MAX_VALUE);
+ return getLuceneDocs(docs, sc);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+ private static List<LuceneDoc> getLuceneDocs(TopDocs docs, SearchContext sc) throws IOException {
+ List<LuceneDoc> result = new ArrayList<LuceneDoc>(docs.scoreDocs.length);
+ IndexReader reader = sc.searcher.getIndexReader();
+ for (ScoreDoc doc : docs.scoreDocs){
+ result.add(new LuceneDoc(getPath(reader, doc), sc));
+ }
+ return result;
+ }
+
+ private static String getPath(IndexReader reader, ScoreDoc doc) throws IOException {
+ PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
+ reader.document(doc.doc, visitor);
+ return visitor.getPath();
+ }
+
+ private static Query newDepthQuery(String path) {
+ int depth = PathUtils.getDepth(path) + 1;
+ return newDepthQuery(depth);
+ }
+
+ private static Query newDepthQuery(int depth) {
+ return NumericRangeQuery.newIntRange(FieldNames.PATH_DEPTH, depth, depth, true, true);
+ }
+
+ private static String[] createMsg(String msg){
+ return new String[] {msg};
+ }
+
private static class IndexStats {
static final String[] FIELD_NAMES = new String[]{
"path",
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java?rev=1737310&r1=1737309&r2=1737310&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java Fri Apr 1 06:36:25 2016
@@ -1588,7 +1588,7 @@ public class LucenePropertyIndex impleme
}
}
- private static class PathStoredFieldVisitor extends StoredFieldVisitor {
+ static class PathStoredFieldVisitor extends StoredFieldVisitor {
private String path;
private boolean pathVisited;