You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2017/10/18 09:42:24 UTC
svn commit: r1812485 - in /jackrabbit/oak/trunk:
oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
oak-it/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java
Author: amitj
Date: Wed Oct 18 09:42:24 2017
New Revision: 1812485
URL: http://svn.apache.org/viewvc?rev=1812485&view=rev
Log:
OAK-6827: Consistency check fails with active deletions
- consistencyCheck now filers out missing candidates tracked with active deletions
- GC when complete ejects out active deleted tracked ids which are not reported even by mark which essentially means that the those dangling/unused references have been eventually cleared out by version gc
Modified:
jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
jackrabbit/oak/trunk/oak-it/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java
Modified: jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1812485&r1=1812484&r2=1812485&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (original)
+++ jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java Wed Oct 18 09:42:24 2017
@@ -41,10 +41,8 @@ import javax.annotation.Nullable;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
-import com.google.common.base.Splitter;
import com.google.common.base.StandardSystemProperty;
import com.google.common.base.Stopwatch;
-import com.google.common.base.Strings;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
@@ -58,6 +56,7 @@ import org.apache.jackrabbit.core.data.D
import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
import org.apache.jackrabbit.oak.commons.FileIOUtils;
import org.apache.jackrabbit.oak.commons.FileIOUtils.FileLineDifferenceIterator;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.BlobIdTracker;
import org.apache.jackrabbit.oak.plugins.blob.datastore.BlobTracker;
import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils;
@@ -69,7 +68,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.google.common.collect.Lists.newArrayList;
+import static java.io.File.createTempFile;
import static org.apache.commons.io.FileUtils.copyFile;
+import static org.apache.commons.io.FileUtils.moveFile;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.copy;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.merge;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
@@ -437,7 +438,7 @@ public class MarkSweepGarbageCollector i
closeQuietly(removesWriter);
}
- BlobCollectionType.get(blobStore).handleRemoves(blobStore, fs.getGarbage());
+ BlobCollectionType.get(blobStore).handleRemoves(blobStore, fs.getGarbage(), fs.getMarkedRefs());
if(count != deleted) {
LOG.warn("Deleted only [{}] blobs entries from the [{}] candidates identified. This may happen if blob "
@@ -605,10 +606,12 @@ public class MarkSweepGarbageCollector i
fs.getAvailableRefs(),
fs.getMarkedRefs(),
transformer);
- candidates = FileIOUtils.writeStrings(iter, fs.getGcCandidates(), true);
+ // If tracking then also filter ids being tracked which are active deletions for lucene
+ candidates = BlobCollectionType.get(blobStore).filter(blobStore, iter, fs);
+
LOG.trace("Ending difference phase of the consistency check");
-
LOG.info("Consistency check found [{}] missing blobs", candidates);
+
if (candidates > 0) {
LOG.warn("Consistency check failure in the the blob store : {}, check missing candidates in file {}",
blobStore, fs.getGcCandidates().getAbsolutePath());
@@ -620,6 +623,7 @@ public class MarkSweepGarbageCollector i
}
return candidates;
}
+
/**
* BlobIdRetriever class to retrieve all blob ids.
*/
@@ -794,9 +798,11 @@ public class MarkSweepGarbageCollector i
}
@Override
- void handleRemoves(GarbageCollectableBlobStore blobStore,
- File removedIds) throws IOException {
- ((BlobTrackingStore) blobStore).getTracker().remove(removedIds);
+ void handleRemoves(GarbageCollectableBlobStore blobStore, File removedIds, File markedRefs) throws IOException {
+ BlobTrackingStore store = (BlobTrackingStore) blobStore;
+ BlobIdTracker tracker = (BlobIdTracker) store.getTracker();
+ tracker.remove(removedIds);
+ tracker.getDeleteTracker().reconcile(markedRefs);
}
@Override
@@ -810,6 +816,34 @@ public class MarkSweepGarbageCollector i
LOG.warn("Unable to track blob ids locally");
}
}
+
+ @Override
+ public int filter(GarbageCollectableBlobStore blobStore, FileLineDifferenceIterator iter,
+ GarbageCollectorFileState fs) throws IOException {
+ // Write the original candidates
+ FileIOUtils.writeStrings(iter, fs.getGcCandidates(), true);
+
+ // Filter the ids actively deleted
+ BlobTrackingStore store = (BlobTrackingStore) blobStore;
+ BlobIdTracker tracker = (BlobIdTracker) store.getTracker();
+
+ // Move the candidates identified to a temp file
+ File candTemp = createTempFile("candTemp", null);
+ copyFile(fs.getGcCandidates(), candTemp);
+
+ Iterator<String> filter = tracker.getDeleteTracker().filter(candTemp);
+ try {
+ return FileIOUtils.writeStrings(filter, fs.getGcCandidates(), true);
+ } finally {
+ if (filter != null && filter instanceof FileLineDifferenceIterator) {
+ ((FileLineDifferenceIterator) filter).close();
+ }
+
+ if (candTemp != null) {
+ candTemp.delete();
+ }
+ }
+ }
},
DEFAULT;
@@ -872,10 +906,10 @@ public class MarkSweepGarbageCollector i
*
* @param blobStore
* @param removedIds
+ * @param markedRefs
* @throws IOException
*/
- void handleRemoves(GarbageCollectableBlobStore blobStore,
- File removedIds) throws IOException {
+ void handleRemoves(GarbageCollectableBlobStore blobStore, File removedIds, File markedRefs) throws IOException {
FileUtils.forceDelete(removedIds);
}
@@ -897,5 +931,10 @@ public class MarkSweepGarbageCollector i
}
return DEFAULT;
}
+
+ public int filter(GarbageCollectableBlobStore blobStore, FileLineDifferenceIterator iter,
+ GarbageCollectorFileState fs) throws IOException {
+ return FileIOUtils.writeStrings(iter, fs.getGcCandidates(), true);
+ }
}
}
Modified: jackrabbit/oak/trunk/oak-it/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-it/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java?rev=1812485&r1=1812484&r2=1812485&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-it/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java (original)
+++ jackrabbit/oak/trunk/oak-it/src/test/java/org/apache/jackrabbit/oak/plugins/blob/datastore/DataStoreTrackerGCTest.java Wed Oct 18 09:42:24 2017
@@ -19,8 +19,10 @@ package org.apache.jackrabbit.oak.plugin
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@@ -57,14 +59,19 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
+import static com.google.common.collect.Lists.newArrayList;
import static com.google.common.collect.Sets.newHashSet;
import static com.google.common.collect.Sets.union;
+import static java.lang.String.valueOf;
import static java.util.concurrent.Executors.newSingleThreadExecutor;
import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.MINUTES;
import static org.apache.commons.io.FileUtils.forceDelete;
import static org.apache.commons.io.IOUtils.closeQuietly;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.readStringsAsSet;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
+import static org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreUtils.createFDS;
import static org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreUtils.getBlobStore;
import static org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils
.SharedStoreRecordType.REPOSITORY;
@@ -122,6 +129,7 @@ public class DataStoreTrackerGCTest {
Cluster cluster = new Cluster("cluster1");
BlobStore s = cluster.blobStore;
BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore) s).getTracker();
+
DataStoreState state = init(cluster.nodeStore, 0);
ScheduledFuture<?> scheduledFuture = newSingleThreadScheduledExecutor()
.schedule(tracker.new SnapshotJob(), 0, MILLISECONDS);
@@ -138,6 +146,134 @@ public class DataStoreTrackerGCTest {
}
@Test
+ public void gcReconcileActiveDeletion() throws Exception {
+ Cluster cluster = new Cluster("cluster1");
+ BlobStore s = cluster.blobStore;
+ BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore) s).getTracker();
+ DataStoreState state = init(cluster.nodeStore, 0);
+
+ // Simulate creation and active deletion after init without version gc to enable references to hang around
+ List<String> addlAdded = doActiveDelete(cluster.nodeStore,
+ (DataStoreBlobStore) cluster.blobStore, tracker, folder,0, 2);
+ List<String> addlPresent = Lists.newArrayList(addlAdded.get(2), addlAdded.get(3));
+ List<String> activeDeleted = Lists.newArrayList(addlAdded.get(0), addlAdded.get(1));
+ state.blobsPresent.addAll(addlPresent);
+ state.blobsAdded.addAll(addlPresent);
+
+ cluster.gc.collectGarbage(false);
+
+ Set<String> existingAfterGC = iterate(s);
+ // Check the state of the blob store after gc
+ assertEquals(state.blobsPresent, existingAfterGC);
+ // Tracked blobs should reflect deletions after gc
+ assertEquals(state.blobsPresent, retrieveTracked(tracker));
+ // Check that the delete tracker is refreshed
+ assertEquals(Sets.newHashSet(activeDeleted), retrieveActiveDeleteTracked(tracker, folder));
+ }
+
+ @Test
+ public void gcReconcileActiveDeletionMarkCleared() throws Exception {
+ Cluster cluster = new Cluster("cluster1");
+ BlobStore s = cluster.blobStore;
+ BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore) s).getTracker();
+ // Simulate active deletion before the init to ensure that the references also cleared
+ List<String> addlAdded = doActiveDelete(cluster.nodeStore,
+ (DataStoreBlobStore) cluster.blobStore, tracker, folder,0, 2);
+ DataStoreState state = init(cluster.nodeStore, 0);
+
+ // Force a snapshot of the tracker to refresh
+ File f = folder.newFile();
+ tracker.remove(f, BlobTracker.Options.ACTIVE_DELETION);
+
+ List<String> addlPresent = Lists.newArrayList(addlAdded.get(2), addlAdded.get(3));
+ List<String> activeDeleted = Lists.newArrayList(addlAdded.get(0), addlAdded.get(1));
+ state.blobsPresent.addAll(addlPresent);
+ state.blobsAdded.addAll(addlPresent);
+
+ cluster.gc.collectGarbage(false);
+ Set<String> existingAfterGC = iterate(s);
+ // Check the state of the blob store after gc
+ assertEquals(state.blobsPresent, existingAfterGC);
+ // Tracked blobs should reflect deletions after gc
+ assertEquals(state.blobsPresent, retrieveTracked(tracker));
+ // Check that the delete tracker is refreshed
+ assertEquals(Sets.newHashSet(), retrieveActiveDeleteTracked(tracker, folder));
+ }
+
+ @Test
+ public void consistencyCheckOnlyActiveDeletion() throws Exception {
+ Cluster cluster = new Cluster("cluster1");
+ BlobStore s = cluster.blobStore;
+ BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore) s).getTracker();
+ DataStoreState state = init(cluster.nodeStore, 0);
+
+ List<String> addlAdded = doActiveDelete(cluster.nodeStore,
+ (DataStoreBlobStore) cluster.blobStore, tracker, folder,0, 2);
+ List<String> addlPresent = Lists.newArrayList(addlAdded.get(2), addlAdded.get(3));
+ List<String> activeDeleted = Lists.newArrayList(addlAdded.get(0), addlAdded.get(1));
+ state.blobsPresent.addAll(addlPresent);
+ state.blobsAdded.addAll(addlPresent);
+
+ // Since datastore in consistent state and only active deletions the missing list should be empty
+ assertEquals(0, cluster.gc.checkConsistency());
+ }
+
+ @Test
+ public void consistencyCheckDeletedWithActiveDeletion() throws Exception {
+ Cluster cluster = new Cluster("cluster1");
+ BlobStore s = cluster.blobStore;
+ BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore) s).getTracker();
+ DataStoreState state = init(cluster.nodeStore, 0);
+
+ // Directly delete from blobstore
+ ArrayList<String> blobs = Lists.newArrayList(state.blobsPresent);
+ String removedId = blobs.remove(0);
+ ((DataStoreBlobStore) s).deleteChunks(Lists.newArrayList(removedId), 0);
+ state.blobsPresent = Sets.newHashSet(blobs);
+ File f = folder.newFile();
+ writeStrings(Lists.newArrayList(removedId).iterator(), f, false);
+ tracker.remove(f);
+
+ List<String> addlAdded = doActiveDelete(cluster.nodeStore,
+ (DataStoreBlobStore) cluster.blobStore, tracker, folder,0, 2);
+ List<String> addlPresent = Lists.newArrayList(addlAdded.get(2), addlAdded.get(3));
+ state.blobsPresent.addAll(addlPresent);
+ state.blobsAdded.addAll(addlPresent);
+
+ // Only the missing blob should be reported and not the active deletions
+ assertEquals(1, cluster.gc.checkConsistency());
+ }
+
+ private List<String> doActiveDelete(NodeStore nodeStore, DataStoreBlobStore blobStore, BlobIdTracker tracker,
+ TemporaryFolder folder, int delIdx, int num) throws Exception {
+ List<String> set = Lists.newArrayList();
+ NodeBuilder a = nodeStore.getRoot().builder();
+ int number = 4;
+ for (int i = 0; i < number; i++) {
+ Blob b = nodeStore.createBlob(randomStream(i, 90));
+ a.child("cactive" + i).setProperty("x", b);
+ set.add(b.getContentIdentity());
+ }
+ nodeStore.merge(a, INSTANCE, EMPTY);
+
+ List<String> deleted = Lists.newArrayList();
+
+ //a = nodeStore.getRoot().builder();
+ for(int idx = delIdx; idx < delIdx + num; idx++) {
+ blobStore.deleteChunks(Lists.newArrayList(set.get(idx)), 0);
+ deleted.add(set.get(idx));
+ a.child("cactive" + idx).remove();
+ }
+ nodeStore.merge(a, INSTANCE, EMPTY);
+
+ File f = folder.newFile();
+ writeStrings(deleted.iterator(), f, false);
+
+ tracker.remove(f, BlobTracker.Options.ACTIVE_DELETION);
+ return set;
+ }
+
+ @Test
public void gcColdStart() throws Exception {
Cluster cluster = new Cluster("cluster1");
BlobStore s = cluster.blobStore;
@@ -162,6 +298,21 @@ public class DataStoreTrackerGCTest {
assertEquals(state.blobsPresent, retrieveTracked(tracker));
}
+ private static Set<String> retrieveActiveDeleteTracked(BlobIdTracker tracker, TemporaryFolder folder) throws IOException {
+ File f = folder.newFile();
+ Set<String> retrieved = readStringsAsSet(
+ new FileInputStream(tracker.getDeleteTracker().retrieve(f.getAbsolutePath())), false);
+ return retrieved;
+ }
+
+ private static List<String> range(int min, int max) {
+ List<String> list = newArrayList();
+ for (int i = min; i <= max; i++) {
+ list.add(valueOf(i));
+ }
+ return list;
+ }
+
private HashSet<String> addNodeSpecialChars(DocumentNodeStore ds) throws Exception {
List<String> specialCharSets =
Lists.newArrayList("q\\%22afdg\\%22", "a\nbcd", "a\n\rabcd", "012\\efg" );
@@ -258,7 +409,7 @@ public class DataStoreTrackerGCTest {
NodeBuilder a = nodeStore.getRoot().builder();
int number = 4;
for (int i = 0; i < number; i++) {
- Blob b = nodeStore.createBlob(randomStream(i, 90));
+ Blob b = nodeStore.createBlob(randomStream(i, 40));
a.child("cinline" + i).setProperty("x", b);
}
nodeStore.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
@@ -416,7 +567,6 @@ public class DataStoreTrackerGCTest {
}
s.merge(a, INSTANCE, EMPTY);
-
a = s.getRoot().builder();
for (int id : processed) {
a.child("c" + id).remove();
@@ -444,7 +594,7 @@ public class DataStoreTrackerGCTest {
}
public Cluster(String clusterName, int clusterId, MemoryDocumentStore store) throws Exception {
- blobStore = getBlobStore(blobStoreRoot);
+ blobStore = new DataStoreBlobStore(createFDS(blobStoreRoot, 50));
nodeStore = builderProvider.newBuilder()
.setClusterId(clusterId)
.clock(clock)