You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by al...@apache.org on 2011/10/13 12:23:20 UTC
svn commit: r1182761 -
/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
Author: alexparvulescu
Date: Thu Oct 13 10:23:20 2011
New Revision: 1182761
URL: http://svn.apache.org/viewvc?rev=1182761&view=rev
Log:
JCR-3107 Speed up hierarchy cache initialization
- based on a patch by Martin Böttcher
Modified:
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java?rev=1182761&r1=1182760&r2=1182761&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java Thu Oct 13 10:23:20 2011
@@ -16,28 +16,31 @@
*/
package org.apache.jackrabbit.core.query.lucene;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.text.NumberFormat;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.commons.collections.map.LRUMap;
+import org.apache.jackrabbit.core.id.NodeId;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.jackrabbit.core.id.NodeId;
-import org.apache.commons.collections.map.LRUMap;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.BitSet;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.Collections;
-import java.text.NumberFormat;
-import java.util.concurrent.ConcurrentHashMap;
-
/**
* Implements an <code>IndexReader</code> that maintains caches to resolve
* {@link #getParent(int, BitSet)} calls efficiently.
@@ -85,7 +88,7 @@ class CachingIndexReader extends FilterI
* Initializes the {@link #inSegmentParents} and {@link #foreignParentDocIds}
* caches.
*/
- private CacheInitializer cacheInitializer;
+ private final CacheInitializer cacheInitializer;
/**
* Tick when this index reader was created.
@@ -127,16 +130,7 @@ class CachingIndexReader extends FilterI
this.cache = cache;
this.inSegmentParents = new int[delegatee.maxDoc()];
Arrays.fill(this.inSegmentParents, -1);
- this.shareableNodes = new BitSet();
- TermDocs tDocs = delegatee.termDocs(
- new Term(FieldNames.SHAREABLE_NODE, ""));
- try {
- while (tDocs.next()) {
- shareableNodes.set(tDocs.doc());
- }
- } finally {
- tDocs.close();
- }
+ this.shareableNodes = initShareableNodes(delegatee);
this.cacheInitializer = new CacheInitializer(delegatee);
if (initCache) {
cacheInitializer.run();
@@ -147,6 +141,20 @@ class CachingIndexReader extends FilterI
this.termDocsCache = new TermDocsCache(delegatee, FieldNames.PROPERTIES);
}
+ private BitSet initShareableNodes(IndexReader delegatee) throws IOException {
+ BitSet shareableNodes = new BitSet();
+ TermDocs tDocs = delegatee.termDocs(new Term(FieldNames.SHAREABLE_NODE,
+ ""));
+ try {
+ while (tDocs.next()) {
+ shareableNodes.set(tDocs.doc());
+ }
+ } finally {
+ tDocs.close();
+ }
+ return shareableNodes;
+ }
+
/**
* Returns the <code>DocId</code> of the parent of <code>n</code> or
* {@link DocId#NULL} if <code>n</code> does not have a parent
@@ -354,6 +362,11 @@ class CachingIndexReader extends FilterI
private class CacheInitializer implements Runnable {
/**
+ * The {@link #inSegmentParents} is persisted using this filename.
+ */
+ private static final String FILE_CACHE_NAME_ARRAY = "cache.inSegmentParents";
+
+ /**
* From where to read.
*/
private final IndexReader reader;
@@ -370,8 +383,8 @@ class CachingIndexReader extends FilterI
/**
* Creates a new initializer with the given <code>reader</code>.
- *
- * @param reader an index reader.
+ * @param reader
+ * an index reader.
*/
public CacheInitializer(IndexReader reader) {
this.reader = reader;
@@ -389,7 +402,12 @@ class CachingIndexReader extends FilterI
// immediately return when stop is requested
return;
}
- initializeParents(reader);
+ boolean initCacheFromFile = loadCacheFromFile();
+ if (!initCacheFromFile) {
+ // file-based cache is not available, load from the
+ // repository
+ initializeParents(reader);
+ }
} catch (Exception e) {
// only log warn message during regular operation
if (!stopRequested) {
@@ -462,7 +480,8 @@ class CachingIndexReader extends FilterI
});
if (docs.isEmpty()) {
- // no more nodes to initialize
+ // no more nodes to initialize, persist cache to file
+ saveCacheToFile();
break;
}
@@ -590,6 +609,64 @@ class CachingIndexReader extends FilterI
tDocs.close();
}
}
+
+ /**
+ * Persists the cache info {@link #inSegmentParents} to a file:
+ * {@link #FILE_CACHE_NAME_ARRAY}, for faster init times on startup.
+ *
+ * see https://issues.apache.org/jira/browse/JCR-3107
+ */
+ public void saveCacheToFile() throws IOException {
+ IndexOutput io = null;
+ try {
+ io = reader.directory().createOutput(FILE_CACHE_NAME_ARRAY);
+ for (int parent : inSegmentParents) {
+ io.writeInt(parent);
+ }
+ } catch (Exception e) {
+ log.error(
+ "Error saving " + FILE_CACHE_NAME_ARRAY + ": "
+ + e.getMessage(), e);
+ } finally {
+ io.close();
+ }
+ }
+
+ /**
+ * Loads the cache info {@link #inSegmentParents} from the file
+ * {@link #FILE_CACHE_NAME_ARRAY}.
+ *
+ * see https://issues.apache.org/jira/browse/JCR-3107
+ *
+ * @return true if the cache has been initialized of false if the cache
+ * file does not exist yet, or an error happened
+ */
+ private boolean loadCacheFromFile() throws IOException {
+ IndexInput ii = null;
+ try {
+ ii = reader.directory().openInput(FILE_CACHE_NAME_ARRAY);
+ for (int i = 0; i < inSegmentParents.length; i++) {
+ inSegmentParents[i] = ii.readInt();
+ }
+ return true;
+ } catch (FileNotFoundException ignore) {
+ // expected in the case where the file-based cache has not been
+ // initialized yet
+ } catch (IOException ignore) {
+ log.warn(
+ "Saved state of CachingIndexReader is corrupt, will try to remove offending file "
+ + FILE_CACHE_NAME_ARRAY, ignore);
+ // In the case where is a read error, the cache file is removed
+ // so it can be recreated after
+ // the cache loads the data from the repository directly
+ reader.directory().deleteFile(FILE_CACHE_NAME_ARRAY);
+ } finally {
+ if (ii != null) {
+ ii.close();
+ }
+ }
+ return false;
+ }
}
/**