You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2017/10/18 09:42:24 UTC

svn commit: r1812485 - in /jackrabbit/oak/trunk: oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java oak-it/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java

Author: amitj
Date: Wed Oct 18 09:42:24 2017
New Revision: 1812485

URL: http://svn.apache.org/viewvc?rev=1812485&view=rev
Log:
OAK-6827: Consistency check fails with active deletions

- consistencyCheck now filers out missing candidates tracked with active deletions
- GC when complete ejects out active deleted tracked ids which are not reported even by mark which essentially means that the those dangling/unused references have been eventually cleared out by version gc

Modified:
    jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
    jackrabbit/oak/trunk/oak-it/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java

Modified: jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1812485&r1=1812484&r2=1812485&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (original)
+++ jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java Wed Oct 18 09:42:24 2017
@@ -41,10 +41,8 @@ import javax.annotation.Nullable;
 import com.google.common.base.Charsets;
 import com.google.common.base.Function;
 import com.google.common.base.Joiner;
-import com.google.common.base.Splitter;
 import com.google.common.base.StandardSystemProperty;
 import com.google.common.base.Stopwatch;
-import com.google.common.base.Strings;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -58,6 +56,7 @@ import org.apache.jackrabbit.core.data.D
 import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
 import org.apache.jackrabbit.oak.commons.FileIOUtils;
 import org.apache.jackrabbit.oak.commons.FileIOUtils.FileLineDifferenceIterator;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.BlobIdTracker;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.BlobTracker;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils;
@@ -69,7 +68,9 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import static com.google.common.collect.Lists.newArrayList;
+import static java.io.File.createTempFile;
 import static org.apache.commons.io.FileUtils.copyFile;
+import static org.apache.commons.io.FileUtils.moveFile;
 import static org.apache.jackrabbit.oak.commons.FileIOUtils.copy;
 import static org.apache.jackrabbit.oak.commons.FileIOUtils.merge;
 import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
@@ -437,7 +438,7 @@ public class MarkSweepGarbageCollector i
             closeQuietly(removesWriter);
         }
 
-        BlobCollectionType.get(blobStore).handleRemoves(blobStore, fs.getGarbage());
+        BlobCollectionType.get(blobStore).handleRemoves(blobStore, fs.getGarbage(), fs.getMarkedRefs());
 
         if(count != deleted) {
             LOG.warn("Deleted only [{}] blobs entries from the [{}] candidates identified. This may happen if blob " 
@@ -605,10 +606,12 @@ public class MarkSweepGarbageCollector i
                 fs.getAvailableRefs(),
                 fs.getMarkedRefs(),
                 transformer);
-            candidates = FileIOUtils.writeStrings(iter, fs.getGcCandidates(), true);
+            // If tracking then also filter ids being tracked which are active deletions for lucene
+            candidates = BlobCollectionType.get(blobStore).filter(blobStore, iter, fs);
+
             LOG.trace("Ending difference phase of the consistency check");
-            
             LOG.info("Consistency check found [{}] missing blobs", candidates);
+
             if (candidates > 0) {
                 LOG.warn("Consistency check failure in the the blob store : {}, check missing candidates in file {}",
                             blobStore, fs.getGcCandidates().getAbsolutePath());
@@ -620,6 +623,7 @@ public class MarkSweepGarbageCollector i
         }
         return candidates;
     }
+
     /**
      * BlobIdRetriever class to retrieve all blob ids.
      */
@@ -794,9 +798,11 @@ public class MarkSweepGarbageCollector i
             }
 
             @Override
-            void handleRemoves(GarbageCollectableBlobStore blobStore,
-                    File removedIds) throws IOException {
-                ((BlobTrackingStore) blobStore).getTracker().remove(removedIds);
+            void handleRemoves(GarbageCollectableBlobStore blobStore, File removedIds, File markedRefs) throws IOException {
+                BlobTrackingStore store = (BlobTrackingStore) blobStore;
+                BlobIdTracker tracker = (BlobIdTracker) store.getTracker();
+                tracker.remove(removedIds);
+                tracker.getDeleteTracker().reconcile(markedRefs);
             }
 
             @Override
@@ -810,6 +816,34 @@ public class MarkSweepGarbageCollector i
                     LOG.warn("Unable to track blob ids locally");
                 }
             }
+
+            @Override
+            public int filter(GarbageCollectableBlobStore blobStore, FileLineDifferenceIterator iter,
+                GarbageCollectorFileState fs) throws IOException {
+                // Write the original candidates
+                FileIOUtils.writeStrings(iter, fs.getGcCandidates(), true);
+
+                // Filter the ids actively deleted
+                BlobTrackingStore store = (BlobTrackingStore) blobStore;
+                BlobIdTracker tracker = (BlobIdTracker) store.getTracker();
+
+                // Move the candidates identified to a temp file
+                File candTemp = createTempFile("candTemp", null);
+                copyFile(fs.getGcCandidates(), candTemp);
+
+                Iterator<String> filter = tracker.getDeleteTracker().filter(candTemp);
+                try {
+                    return FileIOUtils.writeStrings(filter, fs.getGcCandidates(), true);
+                } finally {
+                    if (filter != null && filter instanceof FileLineDifferenceIterator) {
+                        ((FileLineDifferenceIterator) filter).close();
+                    }
+
+                    if (candTemp != null) {
+                        candTemp.delete();
+                    }
+                }
+            }
         },
         DEFAULT;
 
@@ -872,10 +906,10 @@ public class MarkSweepGarbageCollector i
          *
          * @param blobStore
          * @param removedIds
+         * @param markedRefs
          * @throws IOException
          */
-        void handleRemoves(GarbageCollectableBlobStore blobStore,
-            File removedIds) throws IOException {
+        void handleRemoves(GarbageCollectableBlobStore blobStore, File removedIds, File markedRefs) throws IOException {
             FileUtils.forceDelete(removedIds);
         }
 
@@ -897,5 +931,10 @@ public class MarkSweepGarbageCollector i
             }
             return DEFAULT;
         }
+
+        public int filter(GarbageCollectableBlobStore blobStore, FileLineDifferenceIterator iter,
+            GarbageCollectorFileState fs) throws IOException {
+            return FileIOUtils.writeStrings(iter, fs.getGcCandidates(), true);
+        }
     }
 }

Modified: jackrabbit/oak/trunk/oak-it/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-it/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java?rev=1812485&r1=1812484&r2=1812485&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-it/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java (original)
+++ jackrabbit/oak/trunk/oak-it/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java Wed Oct 18 09:42:24 2017
@@ -19,8 +19,10 @@ package org.apache.jackrabbit.oak.plugin
 import java.io.ByteArrayInputStream;
 import java.io.Closeable;
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
@@ -57,14 +59,19 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
+import static com.google.common.collect.Lists.newArrayList;
 import static com.google.common.collect.Sets.newHashSet;
 import static com.google.common.collect.Sets.union;
+import static java.lang.String.valueOf;
 import static java.util.concurrent.Executors.newSingleThreadExecutor;
 import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor;
 import static java.util.concurrent.TimeUnit.MILLISECONDS;
 import static java.util.concurrent.TimeUnit.MINUTES;
 import static org.apache.commons.io.FileUtils.forceDelete;
 import static org.apache.commons.io.IOUtils.closeQuietly;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.readStringsAsSet;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
+import static org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreUtils.createFDS;
 import static org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreUtils.getBlobStore;
 import static org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils
     .SharedStoreRecordType.REPOSITORY;
@@ -122,6 +129,7 @@ public class DataStoreTrackerGCTest {
         Cluster cluster = new Cluster("cluster1");
         BlobStore s = cluster.blobStore;
         BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore) s).getTracker();
+
         DataStoreState state = init(cluster.nodeStore, 0);
         ScheduledFuture<?> scheduledFuture = newSingleThreadScheduledExecutor()
             .schedule(tracker.new SnapshotJob(), 0, MILLISECONDS);
@@ -138,6 +146,134 @@ public class DataStoreTrackerGCTest {
     }
 
     @Test
+    public void gcReconcileActiveDeletion() throws Exception {
+        Cluster cluster = new Cluster("cluster1");
+        BlobStore s = cluster.blobStore;
+        BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore) s).getTracker();
+        DataStoreState state = init(cluster.nodeStore, 0);
+
+        // Simulate creation and active deletion after init without version gc to enable references to hang around
+        List<String> addlAdded = doActiveDelete(cluster.nodeStore,
+            (DataStoreBlobStore) cluster.blobStore, tracker, folder,0, 2);
+        List<String> addlPresent = Lists.newArrayList(addlAdded.get(2), addlAdded.get(3));
+        List<String> activeDeleted = Lists.newArrayList(addlAdded.get(0), addlAdded.get(1));
+        state.blobsPresent.addAll(addlPresent);
+        state.blobsAdded.addAll(addlPresent);
+
+        cluster.gc.collectGarbage(false);
+
+        Set<String> existingAfterGC = iterate(s);
+        // Check the state of the blob store after gc
+        assertEquals(state.blobsPresent, existingAfterGC);
+        // Tracked blobs should reflect deletions after gc
+        assertEquals(state.blobsPresent, retrieveTracked(tracker));
+        // Check that the delete tracker is refreshed
+        assertEquals(Sets.newHashSet(activeDeleted), retrieveActiveDeleteTracked(tracker, folder));
+    }
+
+    @Test
+    public void gcReconcileActiveDeletionMarkCleared() throws Exception {
+        Cluster cluster = new Cluster("cluster1");
+        BlobStore s = cluster.blobStore;
+        BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore) s).getTracker();
+        // Simulate active deletion before the init to ensure that the references also cleared
+        List<String> addlAdded = doActiveDelete(cluster.nodeStore,
+            (DataStoreBlobStore) cluster.blobStore, tracker, folder,0, 2);
+        DataStoreState state = init(cluster.nodeStore, 0);
+
+        // Force a snapshot of the tracker to refresh
+        File f = folder.newFile();
+        tracker.remove(f, BlobTracker.Options.ACTIVE_DELETION);
+
+        List<String> addlPresent = Lists.newArrayList(addlAdded.get(2), addlAdded.get(3));
+        List<String> activeDeleted = Lists.newArrayList(addlAdded.get(0), addlAdded.get(1));
+        state.blobsPresent.addAll(addlPresent);
+        state.blobsAdded.addAll(addlPresent);
+
+        cluster.gc.collectGarbage(false);
+        Set<String> existingAfterGC = iterate(s);
+        // Check the state of the blob store after gc
+        assertEquals(state.blobsPresent, existingAfterGC);
+        // Tracked blobs should reflect deletions after gc
+        assertEquals(state.blobsPresent, retrieveTracked(tracker));
+        // Check that the delete tracker is refreshed
+        assertEquals(Sets.newHashSet(), retrieveActiveDeleteTracked(tracker, folder));
+    }
+
+    @Test
+    public void consistencyCheckOnlyActiveDeletion() throws Exception {
+        Cluster cluster = new Cluster("cluster1");
+        BlobStore s = cluster.blobStore;
+        BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore) s).getTracker();
+        DataStoreState state = init(cluster.nodeStore, 0);
+
+        List<String> addlAdded = doActiveDelete(cluster.nodeStore,
+            (DataStoreBlobStore) cluster.blobStore, tracker, folder,0, 2);
+        List<String> addlPresent = Lists.newArrayList(addlAdded.get(2), addlAdded.get(3));
+        List<String> activeDeleted = Lists.newArrayList(addlAdded.get(0), addlAdded.get(1));
+        state.blobsPresent.addAll(addlPresent);
+        state.blobsAdded.addAll(addlPresent);
+
+        // Since datastore in consistent state and only active deletions the missing list should be empty
+        assertEquals(0, cluster.gc.checkConsistency());
+    }
+
+    @Test
+    public void consistencyCheckDeletedWithActiveDeletion() throws Exception {
+        Cluster cluster = new Cluster("cluster1");
+        BlobStore s = cluster.blobStore;
+        BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore) s).getTracker();
+        DataStoreState state = init(cluster.nodeStore, 0);
+
+        // Directly delete from blobstore
+        ArrayList<String> blobs = Lists.newArrayList(state.blobsPresent);
+        String removedId = blobs.remove(0);
+        ((DataStoreBlobStore) s).deleteChunks(Lists.newArrayList(removedId), 0);
+        state.blobsPresent = Sets.newHashSet(blobs);
+        File f = folder.newFile();
+        writeStrings(Lists.newArrayList(removedId).iterator(), f, false);
+        tracker.remove(f);
+
+        List<String> addlAdded = doActiveDelete(cluster.nodeStore,
+            (DataStoreBlobStore) cluster.blobStore, tracker, folder,0, 2);
+        List<String> addlPresent = Lists.newArrayList(addlAdded.get(2), addlAdded.get(3));
+        state.blobsPresent.addAll(addlPresent);
+        state.blobsAdded.addAll(addlPresent);
+
+        // Only the missing blob should be reported and not the active deletions
+        assertEquals(1, cluster.gc.checkConsistency());
+    }
+
+    private List<String> doActiveDelete(NodeStore nodeStore, DataStoreBlobStore blobStore, BlobIdTracker tracker,
+        TemporaryFolder folder, int delIdx, int num) throws Exception {
+        List<String> set = Lists.newArrayList();
+        NodeBuilder a = nodeStore.getRoot().builder();
+        int number = 4;
+        for (int i = 0; i < number; i++) {
+            Blob b = nodeStore.createBlob(randomStream(i, 90));
+            a.child("cactive" + i).setProperty("x", b);
+            set.add(b.getContentIdentity());
+        }
+        nodeStore.merge(a, INSTANCE, EMPTY);
+
+        List<String> deleted = Lists.newArrayList();
+
+        //a = nodeStore.getRoot().builder();
+        for(int idx = delIdx; idx < delIdx + num; idx++) {
+            blobStore.deleteChunks(Lists.newArrayList(set.get(idx)), 0);
+            deleted.add(set.get(idx));
+            a.child("cactive" + idx).remove();
+        }
+        nodeStore.merge(a, INSTANCE, EMPTY);
+
+        File f = folder.newFile();
+        writeStrings(deleted.iterator(), f, false);
+
+        tracker.remove(f, BlobTracker.Options.ACTIVE_DELETION);
+        return set;
+    }
+
+    @Test
     public void gcColdStart() throws Exception {
         Cluster cluster = new Cluster("cluster1");
         BlobStore s = cluster.blobStore;
@@ -162,6 +298,21 @@ public class DataStoreTrackerGCTest {
         assertEquals(state.blobsPresent, retrieveTracked(tracker));
     }
 
+    private static Set<String> retrieveActiveDeleteTracked(BlobIdTracker tracker, TemporaryFolder folder) throws IOException {
+        File f = folder.newFile();
+        Set<String> retrieved = readStringsAsSet(
+            new FileInputStream(tracker.getDeleteTracker().retrieve(f.getAbsolutePath())), false);
+        return retrieved;
+    }
+
+    private static List<String> range(int min, int max) {
+        List<String> list = newArrayList();
+        for (int i = min; i <= max; i++) {
+            list.add(valueOf(i));
+        }
+        return list;
+    }
+
     private HashSet<String> addNodeSpecialChars(DocumentNodeStore ds) throws Exception {
         List<String> specialCharSets =
             Lists.newArrayList("q\\%22afdg\\%22", "a\nbcd", "a\n\rabcd", "012\\efg" );
@@ -258,7 +409,7 @@ public class DataStoreTrackerGCTest {
         NodeBuilder a = nodeStore.getRoot().builder();
         int number = 4;
         for (int i = 0; i < number; i++) {
-            Blob b = nodeStore.createBlob(randomStream(i, 90));
+            Blob b = nodeStore.createBlob(randomStream(i, 40));
             a.child("cinline" + i).setProperty("x", b);
         }
         nodeStore.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
@@ -416,7 +567,6 @@ public class DataStoreTrackerGCTest {
         }
         s.merge(a, INSTANCE, EMPTY);
 
-
         a = s.getRoot().builder();
         for (int id : processed) {
             a.child("c" + id).remove();
@@ -444,7 +594,7 @@ public class DataStoreTrackerGCTest {
         }
 
         public Cluster(String clusterName, int clusterId, MemoryDocumentStore store) throws Exception {
-            blobStore = getBlobStore(blobStoreRoot);
+            blobStore = new DataStoreBlobStore(createFDS(blobStoreRoot, 50));
             nodeStore = builderProvider.newBuilder()
                 .setClusterId(clusterId)
                 .clock(clock)