You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2016/07/04 09:30:02 UTC

svn commit: r1751236 - in /jackrabbit/oak/trunk/oak-lucene/src: main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ test/java/org/apache/jackrabbit/oak/plugins/index/lucene/ test...

Author: chetanm
Date: Mon Jul  4 09:30:01 2016
New Revision: 1751236

URL: http://svn.apache.org/viewvc?rev=1751236&view=rev
Log:
OAK-3629 - Index corruption seen with CopyOnRead when index definition is recreated

Implement garbage collection of old index directories. By default DeleteOnClose logic for directory would only remove the specific directory folder but the container parent folder would not be removed (as there may be other directory like spell check in use). So implemented logic ensures that on close it checks for older version (based on previous reindex) and removes them if they are empty

Also upon start it would ensure that only latest version of index exist

Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexCopier.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexMeta.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexRootDirectory.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/LocalIndexDir.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexCopierTest.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexRootDirectoryTest.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexCopier.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexCopier.java?rev=1751236&r1=1751235&r2=1751236&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexCopier.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexCopier.java Mon Jul  4 09:30:01 2016
@@ -84,7 +84,6 @@ import static org.apache.jackrabbit.oak.
 public class IndexCopier implements CopyOnReadStatsMBean, Closeable {
     private static final Set<String> REMOTE_ONLY = ImmutableSet.of("segments.gen");
     private static final int MAX_FAILURE_ENTRIES = 10000;
-    private static final AtomicInteger UNIQUE_COUNTER = new AtomicInteger();
     private static final String WORK_DIR_NAME = "indexWriterDir";
 
     private final Logger log = LoggerFactory.getLogger(getClass());
@@ -113,7 +112,6 @@ public class IndexCopier implements Copy
     private final AtomicLong uploadTime = new AtomicLong();
 
 
-    private final Map<String, String> indexPathMapping = newConcurrentMap();
     private final Map<String, Set<String>> sharedWorkingSetMap = newHashMap();
     private final Map<String, String> indexPathVersionMapping = newConcurrentMap();
     private final ConcurrentMap<String, LocalIndexFile> failedToDeleteFiles = newConcurrentMap();
@@ -1089,7 +1087,10 @@ public class IndexCopier implements Copy
                 //Clean out the local dir irrespective of any error occurring upon
                 //close in wrapped directory
                 try{
+                    long totalDeletedSize = FileUtils.sizeOf(oldIndexDir);
                     FileUtils.deleteDirectory(oldIndexDir);
+                    totalDeletedSize  += indexRootDirectory.gcEmptyDirs(oldIndexDir);
+                    garbageCollectedSize.addAndGet(totalDeletedSize);
                     log.debug("Removed old index content from {} ", oldIndexDir);
                 } catch (IOException e){
                     log.warn("Not able to remove old version of copied index at {}", oldIndexDir, e);

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexMeta.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexMeta.java?rev=1751236&r1=1751235&r2=1751236&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexMeta.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexMeta.java Mon Jul  4 09:30:01 2016
@@ -69,6 +69,11 @@ final class IndexMeta implements Compara
         return Long.compare(creationTime, o.creationTime);
     }
 
+    @Override
+    public String toString() {
+        return String.format("%s, %tc", indexPath, creationTime);
+    }
+
     private static Properties loadFromFile(File file) throws IOException {
         InputStream is = null;
         try {

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexRootDirectory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexRootDirectory.java?rev=1751236&r1=1751235&r2=1751236&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexRootDirectory.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexRootDirectory.java Mon Jul  4 09:30:01 2016
@@ -28,6 +28,8 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
+import javax.annotation.CheckForNull;
+
 import com.google.common.base.Charsets;
 import com.google.common.base.Joiner;
 import com.google.common.collect.ArrayListMultimap;
@@ -37,20 +39,44 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.hash.Hashing;
 import org.apache.commons.io.FileUtils;
+import org.apache.jackrabbit.oak.commons.IOUtils;
 import org.apache.jackrabbit.oak.commons.PathUtils;
 import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition;
 import org.apache.jackrabbit.oak.stats.Clock;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
+import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Preconditions.checkState;
 
+/**
+ * Represents the root directory on file system used for storing index copy locally.
+ * For each Oak index in repository it creates a container directory which is a function of
+ * index path and a unique id which stored in index node in Oak. Under that container
+ * directory various sub directories can be created for storing different types of indexes
+ */
 public class IndexRootDirectory {
     static final int MAX_NAME_LENGTH = 127;
     public static final String INDEX_METADATA_FILE_NAME = "index-details.txt";
 
+    private static final FileFilter LOCAL_DIR_FILTER = new FileFilter() {
+        @Override
+        public boolean accept(File file) {
+            if (!file.isDirectory()) {
+                return false;
+            }
+            File metaFile = new File(file, INDEX_METADATA_FILE_NAME);
+            return metaFile.exists();
+        }
+    };
+
+    private final Logger log = LoggerFactory.getLogger(getClass());
+
     private final File indexRootDir;
 
-    public IndexRootDirectory(File indexRootDir) {
+    public IndexRootDirectory(File indexRootDir) throws IOException {
         this.indexRootDir = indexRootDir;
+        gcIndexDirs();
     }
 
     public long getSize(){
@@ -62,8 +88,7 @@ public class IndexRootDirectory {
 
         if (uid == null) {
             //Old format
-            String subDir = getPathHash(indexPath);
-            File baseFolder = new File(indexRootDir, subDir);
+            File baseFolder = getOldFormatDir(indexPath);
             String version = String.valueOf(definition.getReindexCount());
             File indexDir = new File(baseFolder, version);
             if (!indexDir.exists()){
@@ -114,6 +139,43 @@ public class IndexRootDirectory {
     }
 
     /**
+     * Performs garbage collection of older version of index directories based on
+     * index directory derived from the passed sub directory.
+     *
+     * @param subDir one of the sub directories like 'default' etc. Such that
+     *               correct local index directory (container dir) can be checked for deletion
+     */
+    public long gcEmptyDirs(File subDir) throws IOException {
+        File parent = checkNotNull(subDir).getParentFile().getCanonicalFile();
+        LocalIndexDir indexDir = findMatchingIndexDir(parent);
+        long totalDeletedSize = 0;
+        if (indexDir != null) {
+            List<LocalIndexDir> idxDirs = getLocalIndexes(indexDir.getJcrPath());
+            //Flag to determine in given ordered list of LocalIndexDir
+            //we found the dir which matched the parent of passed dir. So its safe
+            //to delete those dirs and its successors in the list (as they are older)
+            boolean matchingDirFound = false;
+            for (LocalIndexDir d : idxDirs){
+                if (d.dir.equals(parent)){
+                    matchingDirFound = true;
+                }
+                if (matchingDirFound && d.isEmpty()){
+                    long dirSize = FileUtils.sizeOf(d.dir);
+                    if (FileUtils.deleteQuietly(d.dir)){
+                        totalDeletedSize += dirSize;
+                    } else {
+                        log.warn("Not able to deleted unused local index directory [{}]. " +
+                                "Deletion would be retried later again.",  d);
+                    }
+                    totalDeletedSize += deleteOldFormatDir(d.getJcrPath());
+                }
+            }
+        }
+        return totalDeletedSize;
+    }
+
+
+    /**
      * <ul>
      *     <li>abc -> abc</li>
      *     <li>xy:abc -> xyabc</li>
@@ -152,16 +214,7 @@ public class IndexRootDirectory {
      * The value is a sorted list with most recent version of index at the start
      */
     private Map<String, List<LocalIndexDir>> getIndexesPerPath() throws IOException {
-        File[] dirs = indexRootDir.listFiles(new FileFilter() {
-            @Override
-            public boolean accept(File file) {
-                if (!file.isDirectory()){
-                    return false;
-                }
-                File metaFile = new File(file, INDEX_METADATA_FILE_NAME);
-                return metaFile.exists();
-            }
-        });
+        File[] dirs = indexRootDir.listFiles(LOCAL_DIR_FILTER);
 
         ListMultimap<String, LocalIndexDir> pathToDirMap = ArrayListMultimap.create();
         for (File indexDir : dirs){
@@ -178,6 +231,70 @@ public class IndexRootDirectory {
         return result;
     }
 
+    /**
+     * Garbage collect old index directories. Should only be invoked at startup
+     * as it assumes that none of the directories are getting used
+     */
+    private void gcIndexDirs() throws IOException {
+        Map<String, List<LocalIndexDir>> mapping = getIndexesPerPath();
+        long totalDeletedSize = 0;
+        for (Map.Entry<String, List<LocalIndexDir>> e : mapping.entrySet()) {
+            List<LocalIndexDir> dirs = e.getValue();
+            //In startup mode we can be sure that no directory is in use
+            //so be more aggressive in what we delete i.e. even not empty dir
+            for (int i = 1; i < dirs.size(); i++) {
+                LocalIndexDir dir = dirs.get(i);
+                long dirSize = FileUtils.sizeOf(dir.dir);
+                if (FileUtils.deleteQuietly(dir.dir)){
+                    totalDeletedSize += dirSize;
+                } else {
+                    log.warn("Not able to deleted unused local index directory [{}]. " +
+                            "Deletion would be retried later again.",  dir);
+                }
+            }
+            totalDeletedSize += deleteOldFormatDir(dirs.get(0).getJcrPath());
+        }
+
+        if (totalDeletedSize > 0){
+            log.info("Reclaimed [{}] space by removing unused/old index directories",
+                    IOUtils.humanReadableByteCount(totalDeletedSize));
+        }
+    }
+
+    @CheckForNull
+    private LocalIndexDir findMatchingIndexDir(File dir) throws IOException {
+        //Resolve to canonical file so that equals can work reliable
+        dir = dir.getCanonicalFile();
+
+        Map<String, List<LocalIndexDir>> mapping = getIndexesPerPath();
+        for (Map.Entry<String, List<LocalIndexDir>> e : mapping.entrySet()){
+            for (LocalIndexDir idxDir : e.getValue()){
+                if (idxDir.dir.equals(dir)){
+                    return idxDir;
+                }
+            }
+        }
+        return null;
+    }
+
+    private long deleteOldFormatDir(String jcrPath) {
+        File oldDir = getOldFormatDir(jcrPath);
+        if (oldDir.exists()){
+            long size = FileUtils.sizeOf(oldDir);
+            if (!FileUtils.deleteQuietly(oldDir)){
+                log.warn("Not able to deleted unused local index directory [{}]", oldDir.getAbsolutePath());
+            } else {
+                return size;
+            }
+        }
+        return 0;
+    }
+
+    private File getOldFormatDir(String indexPath) {
+        String subDir = getPathHash(indexPath);
+        return new File(indexRootDir, subDir);
+    }
+
     private static long getTime() {
         try {
             return Clock.SIMPLE.getTimeIncreasing();

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/LocalIndexDir.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/LocalIndexDir.java?rev=1751236&r1=1751235&r2=1751236&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/LocalIndexDir.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/LocalIndexDir.java Mon Jul  4 09:30:01 2016
@@ -32,7 +32,7 @@ public final class LocalIndexDir impleme
     final IndexMeta indexMeta;
 
     public LocalIndexDir(File dir) throws IOException {
-        this.dir = dir;
+        this.dir = dir.getCanonicalFile();
         File indexDetails = new File(dir, IndexRootDirectory.INDEX_METADATA_FILE_NAME);
         checkState(isIndexDir(dir), "No file [%s] found in dir [%s]",
                 INDEX_METADATA_FILE_NAME, dir.getAbsolutePath());
@@ -70,6 +70,11 @@ public final class LocalIndexDir impleme
         return indexMeta.compareTo(o.indexMeta);
     }
 
+    @Override
+    public String toString() {
+        return String.format("%s (%s)", dir.getAbsolutePath(), indexMeta);
+    }
+
     static boolean isIndexDir(File file){
         File indexDetails = new File(file, IndexRootDirectory.INDEX_METADATA_FILE_NAME);
         return indexDetails.exists();

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexCopierTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexCopierTest.java?rev=1751236&r1=1751235&r2=1751236&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexCopierTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexCopierTest.java Mon Jul  4 09:30:01 2016
@@ -245,6 +245,10 @@ public class IndexCopierTest {
         //Assert that new index file do exist and not get removed
         File indexDir2 = c1.getIndexDir(defn, indexPath);
         assertTrue(new File(indexDir2, "t1").exists());
+
+        //Check if parent directory is also removed i.e.
+        //index count should be 1 now
+        assertEquals(1, c1.getIndexRootDirectory().getLocalIndexes(indexPath).size());
     }
 
     @Test

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexRootDirectoryTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexRootDirectoryTest.java?rev=1751236&r1=1751235&r2=1751236&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexRootDirectoryTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/IndexRootDirectoryTest.java Mon Jul  4 09:30:01 2016
@@ -20,9 +20,11 @@
 package org.apache.jackrabbit.oak.plugins.index.lucene.directory;
 
 import java.io.File;
+import java.io.IOException;
 import java.util.List;
 
 import com.google.common.base.Strings;
+import org.apache.commons.io.FileUtils;
 import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
 import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorContext;
@@ -47,7 +49,7 @@ public class IndexRootDirectoryTest {
     private NodeBuilder builder = EMPTY_NODE.builder();
 
     @Before
-    public void setUp(){
+    public void setUp() throws IOException {
         dir = new IndexRootDirectory(temporaryFolder.getRoot());
     }
 
@@ -131,11 +133,77 @@ public class IndexRootDirectoryTest {
         assertEquals(longName, IndexRootDirectory.getIndexFolderBaseName(longName2));
     }
 
+    @Test
+    public void gcIndexDirs() throws Exception{
+        //Create an old format directory
+        File fa0 = dir.getIndexDir(getDefn(), "/a");
+
+        configureUniqueId();
+        File fa1 = dir.getIndexDir(getDefn(), "/a");
+        configureUniqueId();
+        File fa2 = dir.getIndexDir(getDefn(), "/a");
+
+        List<LocalIndexDir> dirs = dir.getLocalIndexes("/a");
+        assertEquals(2, dirs.size());
+
+        dir.gcEmptyDirs(fa2);
+        //No index dir should be removed. Even empty ones
+        assertEquals(2, dirs.size());
+
+        configureUniqueId();
+        File fa3 = dir.getIndexDir(getDefn(), "/a");
+
+        assertEquals(3, dir.getLocalIndexes("/a").size());
+
+        //Dir size should still be 3 as non empty dir cannot be be collected
+        dir.gcEmptyDirs(fa3);
+        assertEquals(3, dir.getLocalIndexes("/a").size());
+
+        //Now get rid of 'default' for the first localDir dir i.e. fa1
+        FileUtils.deleteQuietly(fa1);
+        dir.gcEmptyDirs(fa1);
+
+        assertEquals(2, dir.getLocalIndexes("/a").size());
+        assertFalse(fa0.exists());
+
+        FileUtils.deleteDirectory(fa2);
+        FileUtils.deleteDirectory(fa3);
+
+        //Note that we deleted both fa2 and fa3 but GC was done based on fa2 (fa2 < fa3)
+        //So only dirs which are of same time or older than fa2 would be removed. So in this
+        //case fa3 would be left
+        dir.gcEmptyDirs(fa2);
+        assertEquals(1, dir.getLocalIndexes("/a").size());
+    }
+
+    @Test
+    public void gcIndexDirsOnStart() throws Exception{
+        File fa0 = dir.getIndexDir(getDefn(), "/a");
+
+        configureUniqueId();
+        File fa1 = dir.getIndexDir(getDefn(), "/a");
+        configureUniqueId();
+        File fa2 = dir.getIndexDir(getDefn(), "/a");
+        assertEquals(2, dir.getLocalIndexes("/a").size());
+
+        //Now reinitialize
+        dir = new IndexRootDirectory(temporaryFolder.getRoot());
+
+        assertFalse(fa0.exists());
+        assertFalse(fa1.exists());
+        assertTrue(fa2.exists());
+        assertEquals(1, dir.getLocalIndexes("/a").size());
+    }
+
     private NodeBuilder resetBuilder() {
         builder = EMPTY_NODE.builder();
         return builder;
     }
 
+    private void configureUniqueId(){
+        LuceneIndexEditorContext.configureUniqueId(resetBuilder());
+    }
+
     private IndexDefinition getDefn(){
         return new IndexDefinition(root, builder.getNodeState());
     }