You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2017/06/26 15:59:36 UTC
svn commit: r1799938 - in
/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene:
LuceneIndexMBean.java LuceneIndexMBeanImpl.java
Author: thomasm
Date: Mon Jun 26 15:59:36 2017
New Revision: 1799938
URL: http://svn.apache.org/viewvc?rev=1799938&view=rev
Log:
OAK-6381 Improved index analysis tools
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBean.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBeanImpl.java
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBean.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBean.java?rev=1799938&r1=1799937&r2=1799938&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBean.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBean.java Mon Jun 26 15:59:36 2017
@@ -61,6 +61,20 @@ public interface LuceneIndexMBean {
@Description("The index path (empty for all indexes)")
String indexPath
) throws IOException;
+
+ @Description("Retrieves the terms, and number of documents for each term, for an index. " +
+ "This allows to closely investigate what is stored in the index.")
+ String[] getFieldTermsInfo(
+ @Name("indexPath")
+ @Description("The index path (empty for all indexes)")
+ String indexPath,
+ @Name("field")
+ @Description("The field name (empty for all fields)")
+ String field,
+ @Name("max")
+ @Description("The maximum number of entries to return (e.g. 100)")
+ int max
+ ) throws IOException;
@Description("Returns the stored index definition for index at given path in string form")
String getStoredIndexDefinition(@Name("indexPath") String indexPath);
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBeanImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBeanImpl.java?rev=1799938&r1=1799937&r2=1799938&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBeanImpl.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexMBeanImpl.java Mon Jun 26 15:59:36 2017
@@ -24,6 +24,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
@@ -61,6 +62,8 @@ import org.apache.lucene.index.Directory
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
@@ -72,6 +75,7 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.BytesRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -83,6 +87,7 @@ import static org.apache.jackrabbit.oak.
import static org.apache.jackrabbit.oak.plugins.index.lucene.directory.DirectoryUtils.dirSize;
public class LuceneIndexMBeanImpl extends AnnotatedStandardMBean implements LuceneIndexMBean {
+
private final Logger log = LoggerFactory.getLogger(getClass());
private final IndexTracker indexTracker;
private final NodeStore nodeStore;
@@ -225,6 +230,32 @@ public class LuceneIndexMBeanImpl extend
}
return list.toArray(new String[0]);
}
+
+ @Override
+ public String[] getFieldTermsInfo(String indexPath, String field, int max) throws IOException {
+ TreeSet<String> indexes = new TreeSet<String>();
+ if (indexPath == null || indexPath.isEmpty()) {
+ indexes.addAll(indexTracker.getIndexNodePaths());
+ } else {
+ indexes.add(indexPath);
+ }
+ ArrayList<String> list = new ArrayList<String>();
+ for (String path : indexes) {
+ IndexNode indexNode = null;
+ try {
+ indexNode = indexTracker.acquireIndexNode(path);
+ if (indexNode != null) {
+ IndexSearcher searcher = indexNode.getSearcher();
+ list.addAll(getFieldTerms(path, field, max, searcher));
+ }
+ } finally {
+ if (indexNode != null) {
+ indexNode.release();
+ }
+ }
+ }
+ return list.toArray(new String[0]);
+ }
@Override
public String getStoredIndexDefinition(@Name("indexPath") String indexPath) {
@@ -343,7 +374,69 @@ public class LuceneIndexMBeanImpl extend
}
return list;
}
-
+
+ private static ArrayList<String> getFieldTerms(String path,
+ String field, int max, IndexSearcher searcher) throws IOException {
+ if (field == null || field.isEmpty()) {
+ ArrayList<String> list = new ArrayList<String>();
+ IndexReader reader = searcher.getIndexReader();
+ Fields fields = MultiFields.getFields(reader);
+ if (fields != null) {
+ for(String f : fields) {
+ list.addAll(getFieldTerms(path, f, max, searcher));
+ }
+ }
+ return list;
+ }
+ IndexReader reader = searcher.getIndexReader();
+ Terms terms = MultiFields.getTerms(reader, field);
+ ArrayList<String> result = new ArrayList<>();
+ if (terms == null) {
+ return result;
+ }
+ TermsEnum iterator = terms.iterator(null);
+ BytesRef byteRef = null;
+ class Entry implements Comparable<Entry> {
+ String term;
+ int count;
+ @Override
+ public int compareTo(Entry o) {
+ int c = Integer.compare(count, o.count);
+ if (c == 0) {
+ c = term.compareTo(o.term);
+ }
+ return -c;
+ }
+ }
+ ArrayList<Entry> list = new ArrayList<>();
+ long totalCount = 0;
+ while((byteRef = iterator.next()) != null) {
+ Entry e = new Entry();
+ e.term = byteRef.utf8ToString();
+ e.count = iterator.docFreq();
+ totalCount += e.count;
+ if (e.count > 1) {
+ list.add(e);
+ }
+ if (max > 0 && list.size() > 2 * max) {
+ sortAndTruncateList(list, max);
+ }
+ }
+ sortAndTruncateList(list, max);
+ result.add(totalCount + " (total for field " + field + ")");
+ for(Entry e : list) {
+ result.add(e.count + " " + e.term);
+ }
+ return result;
+ }
+
+ static <T extends Comparable<T>> void sortAndTruncateList(ArrayList<T> list, int max) {
+ Collections.sort(list);
+ if (max > 0 && list.size() > max) {
+ list.subList(max, list.size()).clear();
+ }
+ }
+
private static String[] determineIndexedPaths(IndexSearcher searcher, final int maxLevel, int maxPathCount)
throws IOException {
Set<String> paths = Sets.newHashSet();