You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by re...@apache.org on 2012/01/19 15:29:59 UTC

svn commit: r1233399 - in /jackrabbit/branches/2.2: ./ jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java

Author: reschke
Date: Thu Jan 19 14:29:59 2012
New Revision: 1233399

URL: http://svn.apache.org/viewvc?rev=1233399&view=rev
Log:
JCR-3107: Speed up hierarchy cache initialization

(backported to 2.2)

Modified:
    jackrabbit/branches/2.2/   (props changed)
    jackrabbit/branches/2.2/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java

Propchange: jackrabbit/branches/2.2/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jan 19 14:29:59 2012
@@ -3,4 +3,4 @@
 /jackrabbit/sandbox/JCR-1456:774917-886178
 /jackrabbit/sandbox/JCR-2170:812417-816332
 /jackrabbit/sandbox/tripod-JCR-2209:795441-795863
-/jackrabbit/trunk:1038201,1038203,1038205,1038657,1039064,1039347,1039408,1039422-1039423,1039888,1039946,1040033,1040090,1040459,1040601,1040606,1040661,1040958,1041379,1041439,1041761,1042643,1042647,1042978-1042982,1043084-1043086,1043088,1043343,1043357-1043358,1043430,1043554,1043616,1043618,1043637,1043656,1043893,1043897,1044239,1044312,1044451,1044613,1049473,1049491,1049514,1049518,1049520,1049859,1049870,1049874,1049878,1049880,1049883,1049889,1049891,1049894-1049895,1049899-1049901,1049909-1049911,1049915-1049916,1049919,1049923,1049925,1049931,1049936,1049939,1050212,1050298,1050346,1050551,1055068,1055070-1055071,1055116-1055117,1055127,1055134,1055164,1055498,1060431,1060434,1060753,1063756,1064213,1064670,1065599,1065622,1066059,1066071,1066794,1069831,1071562,1071573,1071680,1072087,1074140,1077927,1077970,1079314,1079317,1080186,1080540,1082599,1082611,1082620,1087304,1088991,1089032,1089053,1089436,1092106,1092117,1092683,1097363,1097513-1097514,1098963-109
 8964,1099033,1099172,1100242,1100286,1101046,1102262,1102268-1102270,1102299,1102601,1104027,1126987,1128175,1129206,1130192,1130228,1132993,1136353,1136360,1138511,1141141,1141717,1143396,1143738,1144332,1144338,1144695,1152258,1155431,1157175,1165609,1173196,1174822,1174887,1175988,1176423,1176465,1176515,1176546,1177249,1177340,1178251,1178892,1179124,1179548,1180922,1181712,1182281,1182667,1182929,1183409,1185691,1186285,1186802,1187344,1188541,1188590,1198827
+/jackrabbit/trunk:1038201,1038203,1038205,1038657,1039064,1039347,1039408,1039422-1039423,1039888,1039946,1040033,1040090,1040459,1040601,1040606,1040661,1040958,1041379,1041439,1041761,1042643,1042647,1042978-1042982,1043084-1043086,1043088,1043343,1043357-1043358,1043430,1043554,1043616,1043618,1043637,1043656,1043893,1043897,1044239,1044312,1044451,1044613,1049473,1049491,1049514,1049518,1049520,1049859,1049870,1049874,1049878,1049880,1049883,1049889,1049891,1049894-1049895,1049899-1049901,1049909-1049911,1049915-1049916,1049919,1049923,1049925,1049931,1049936,1049939,1050212,1050298,1050346,1050551,1055068,1055070-1055071,1055116-1055117,1055127,1055134,1055164,1055498,1060431,1060434,1060753,1063756,1064213,1064670,1065599,1065622,1066059,1066071,1066794,1069831,1071562,1071573,1071680,1072087,1074140,1077927,1077970,1079314,1079317,1080186,1080540,1082599,1082611,1082620,1087304,1088991,1089032,1089053,1089436,1092106,1092117,1092683,1097363,1097513-1097514,1098963-109
 8964,1099033,1099172,1100242,1100286,1101046,1102262,1102268-1102270,1102299,1102601,1104027,1126987,1128175,1129206,1130192,1130228,1132993,1136353,1136360,1138511,1141141,1141717,1143396,1143738,1144332,1144338,1144695,1152258,1155431,1157175,1165609,1173196,1174822,1174887,1175988,1176423,1176465,1176515,1176546,1177249,1177340,1178251,1178892,1179124,1179548,1180922,1181712,1182281,1182667,1182761,1182824,1182929,1183409,1185691,1186285,1186802,1187344,1188541,1188590,1198827

Modified: jackrabbit/branches/2.2/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.2/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java?rev=1233399&r1=1233398&r2=1233399&view=diff
==============================================================================
--- jackrabbit/branches/2.2/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java (original)
+++ jackrabbit/branches/2.2/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/CachingIndexReader.java Thu Jan 19 14:29:59 2012
@@ -16,29 +16,32 @@
  */
 package org.apache.jackrabbit.core.query.lucene;
 
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.text.NumberFormat;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.commons.collections.map.LRUMap;
+import org.apache.jackrabbit.core.id.NodeId;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FilterIndexReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermDocs;
 import org.apache.lucene.index.TermEnum;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.jackrabbit.core.id.NodeId;
-import org.apache.commons.collections.map.LRUMap;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.BitSet;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.Collections;
-import java.text.NumberFormat;
-import java.util.concurrent.ConcurrentHashMap;
-
 /**
  * Implements an <code>IndexReader</code> that maintains caches to resolve
  * {@link #getParent(int, BitSet)} calls efficiently.
@@ -86,7 +89,7 @@ class CachingIndexReader extends FilterI
      * Initializes the {@link #inSegmentParents} and {@link #foreignParentDocIds}
      * caches.
      */
-    private CacheInitializer cacheInitializer;
+    private final CacheInitializer cacheInitializer;
 
     /**
      * Tick when this index reader was created.
@@ -128,16 +131,7 @@ class CachingIndexReader extends FilterI
         this.cache = cache;
         this.inSegmentParents = new int[delegatee.maxDoc()];
         Arrays.fill(this.inSegmentParents, -1);
-        this.shareableNodes = new BitSet();
-        TermDocs tDocs = delegatee.termDocs(
-                new Term(FieldNames.SHAREABLE_NODE, ""));
-        try {
-            while (tDocs.next()) {
-                shareableNodes.set(tDocs.doc());
-            }
-        } finally {
-            tDocs.close();
-        }
+        this.shareableNodes = initShareableNodes(delegatee);
         this.cacheInitializer = new CacheInitializer(delegatee);
         if (initCache) {
             cacheInitializer.run();
@@ -148,6 +142,20 @@ class CachingIndexReader extends FilterI
         this.termDocsCache = new TermDocsCache(delegatee, FieldNames.PROPERTIES);
     }
 
+    private BitSet initShareableNodes(IndexReader delegatee) throws IOException {
+        BitSet shareableNodes = new BitSet();
+        TermDocs tDocs = delegatee.termDocs(new Term(FieldNames.SHAREABLE_NODE,
+                ""));
+        try {
+            while (tDocs.next()) {
+                shareableNodes.set(tDocs.doc());
+            }
+        } finally {
+            tDocs.close();
+        }
+        return shareableNodes;
+    }
+
     /**
      * Returns the <code>DocId</code> of the parent of <code>n</code> or
      * {@link DocId#NULL} if <code>n</code> does not have a parent
@@ -355,6 +363,11 @@ class CachingIndexReader extends FilterI
     private class CacheInitializer implements Runnable {
 
         /**
+         * The {@link #inSegmentParents} is persisted using this filename.
+         */
+        private static final String FILE_CACHE_NAME_ARRAY = "cache.inSegmentParents";
+
+        /**
          * From where to read.
          */
         private final IndexReader reader;
@@ -371,8 +384,8 @@ class CachingIndexReader extends FilterI
 
         /**
          * Creates a new initializer with the given <code>reader</code>.
-         *
-         * @param reader an index reader.
+         * @param reader
+         *            an index reader.
          */
         public CacheInitializer(IndexReader reader) {
             this.reader = reader;
@@ -390,7 +403,13 @@ class CachingIndexReader extends FilterI
                     // immediately return when stop is requested
                     return;
                 }
-                initializeParents(reader);
+                boolean initCacheFromFile = loadCacheFromFile();
+                if (!initCacheFromFile) {
+                    // file-based cache is not available, load from the
+                    // repository
+                    log.debug("persisted cache is not available, will load directly from the repository.");
+                    initializeParents(reader);
+                }
             } catch (Exception e) {
                 // only log warn message during regular operation
                 if (!stopRequested) {
@@ -463,7 +482,8 @@ class CachingIndexReader extends FilterI
                 });
 
                 if (docs.isEmpty()) {
-                    // no more nodes to initialize
+                    // no more nodes to initialize, persist cache to file
+                    saveCacheToFile();
                     break;
                 }
 
@@ -591,6 +611,69 @@ class CachingIndexReader extends FilterI
                 tDocs.close();
             }
         }
+
+        /**
+         * Persists the cache info {@link #inSegmentParents} to a file:
+         * {@link #FILE_CACHE_NAME_ARRAY}, for faster init times on startup.
+         * 
+         * see https://issues.apache.org/jira/browse/JCR-3107
+         */
+        public void saveCacheToFile() throws IOException {
+            IndexOutput io = null;
+            try {
+                io = reader.directory().createOutput(FILE_CACHE_NAME_ARRAY);
+                for (int parent : inSegmentParents) {
+                    io.writeInt(parent);
+                }
+            } catch (Exception e) {
+                log.error(
+                        "Error saving " + FILE_CACHE_NAME_ARRAY + ": "
+                                + e.getMessage(), e);
+            } finally {
+                io.close();
+            }
+        }
+
+        /**
+         * Loads the cache info {@link #inSegmentParents} from the file
+         * {@link #FILE_CACHE_NAME_ARRAY}.
+         * 
+         * see https://issues.apache.org/jira/browse/JCR-3107
+         * 
+         * @return true if the cache has been initialized of false if the cache
+         *         file does not exist yet, or an error happened
+         */
+        private boolean loadCacheFromFile() throws IOException {
+            IndexInput ii = null;
+            try {
+                long time = System.currentTimeMillis();
+                ii = reader.directory().openInput(FILE_CACHE_NAME_ARRAY);
+                for (int i = 0; i < inSegmentParents.length; i++) {
+                    inSegmentParents[i] = ii.readInt();
+                }
+                log.debug(
+                        "persisted cache initialized {} DocIds in {} ms",
+                        new Object[] { inSegmentParents.length,
+                                System.currentTimeMillis() - time });
+                return true;
+            } catch (FileNotFoundException ignore) {
+                // expected in the case where the file-based cache has not been
+                // initialized yet
+            } catch (IOException ignore) {
+                log.warn(
+                        "Saved state of CachingIndexReader is corrupt, will try to remove offending file "
+                                + FILE_CACHE_NAME_ARRAY, ignore);
+                // In the case where is a read error, the cache file is removed
+                // so it can be recreated after
+                // the cache loads the data from the repository directly
+                reader.directory().deleteFile(FILE_CACHE_NAME_ARRAY);
+            } finally {
+                if (ii != null) {
+                    ii.close();
+                }
+            }
+            return false;
+        }
     }
 
     /**