You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2017/10/09 06:01:33 UTC

svn commit: r1811532 - in /jackrabbit/oak/trunk: oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/ oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/ oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segme...

Author: amitj
Date: Mon Oct  9 06:01:33 2017
New Revision: 1811532

URL: http://svn.apache.org/viewvc?rev=1811532&view=rev
Log:
OAK-5546: [BlobGC] Adapt time to delete blobs based on lucene indexing activity

- Using the minimum of oldest checkpoint timestamp and the mark phase start time as the reference time to calculate max age of blobs to be considered for deletion
- Added test
- Updated NodeStoreService to pass whiteboard instance to the GC object

Added:
    jackrabbit/oak/trunk/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCCheckpointRefTest.java   (with props)
Modified:
    jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java
    jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java
    jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java

Modified: jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1811532&r1=1811531&r2=1811532&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (original)
+++ jackrabbit/oak/trunk/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java Mon Oct  9 06:01:33 2017
@@ -53,12 +53,15 @@ import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.LineIterator;
 import org.apache.jackrabbit.core.data.DataRecord;
 import org.apache.jackrabbit.core.data.DataStoreException;
+import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
 import org.apache.jackrabbit.oak.commons.FileIOUtils;
 import org.apache.jackrabbit.oak.commons.FileIOUtils.FileLineDifferenceIterator;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.BlobTracker;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils.SharedStoreRecordType;
 import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
+import org.apache.jackrabbit.oak.spi.whiteboard.Whiteboard;
+import org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -114,6 +117,10 @@ public class MarkSweepGarbageCollector i
 
     private final String root;
 
+    private final Whiteboard whiteboard;
+
+    private CheckpointMBean checkpointMbean;
+
     /**
      * Creates an instance of MarkSweepGarbageCollector
      *
@@ -135,7 +142,8 @@ public class MarkSweepGarbageCollector i
             String root,
             int batchCount,
             long maxLastModifiedInterval,
-            @Nullable String repositoryId)
+            @Nullable String repositoryId,
+            @Nullable Whiteboard whiteboard)
             throws IOException {
         this.executor = executor;
         this.blobStore = blobStore;
@@ -144,28 +152,37 @@ public class MarkSweepGarbageCollector i
         this.maxLastModifiedInterval = maxLastModifiedInterval;
         this.repoId = repositoryId;
         this.root = root;
+        this.whiteboard = whiteboard;
+        if (whiteboard != null) {
+            this.checkpointMbean = WhiteboardUtils.getService(whiteboard, CheckpointMBean.class);
+        }
     }
 
-    /**
-     * Instantiates a new blob garbage collector.
-     */
     public MarkSweepGarbageCollector(
             BlobReferenceRetriever marker,
             GarbageCollectableBlobStore blobStore,
             Executor executor,
+            String root,
+            int batchCount,
+            long maxLastModifiedInterval,
             @Nullable String repositoryId)
             throws IOException {
-        this(marker, blobStore, executor, TEMP_DIR, DEFAULT_BATCH_COUNT, TimeUnit.HOURS
-                .toMillis(24), repositoryId);
+        this(marker, blobStore, executor, root, batchCount, maxLastModifiedInterval, repositoryId, null);
     }
 
+    /**
+     * Instantiates a new blob garbage collector.
+     */
     public MarkSweepGarbageCollector(
             BlobReferenceRetriever marker,
             GarbageCollectableBlobStore blobStore,
             Executor executor,
             long maxLastModifiedInterval,
-            @Nullable String repositoryId) throws IOException {
-        this(marker, blobStore, executor, TEMP_DIR, DEFAULT_BATCH_COUNT, maxLastModifiedInterval, repositoryId);
+            @Nullable String repositoryId,
+            @Nullable Whiteboard whiteboard)
+            throws IOException {
+        this(marker, blobStore, executor, TEMP_DIR, DEFAULT_BATCH_COUNT, TimeUnit.HOURS
+                .toMillis(24), repositoryId, whiteboard);
     }
 
     @Override
@@ -266,7 +283,7 @@ public class MarkSweepGarbageCollector i
                 long deleteCount = sweep(fs, markStart, forceBlobRetrieve);
                 threw = false;
 
-                long maxTime = getLastMaxModifiedTime(markStart) > 0 ? getLastMaxModifiedTime(markStart) : markStart;
+                long maxTime = getMaxModifiedTime(markStart) > 0 ? getMaxModifiedTime(markStart) : markStart;
                 sw.stop();
 
                 LOG.info("Blob garbage collection completed in {} ({} ms). Number of blobs deleted [{}] with max modification time of [{}]",
@@ -379,10 +396,10 @@ public class MarkSweepGarbageCollector i
         long count = 0;
         long deleted = 0;
         
-        long lastMaxModifiedTime = getLastMaxModifiedTime(earliestRefAvailTime); 
+        long maxModifiedTime = getMaxModifiedTime(earliestRefAvailTime);
         LOG.debug("Starting sweep phase of the garbage collector");
         LOG.debug("Sweeping blobs with modified time > than the configured max deleted time ({}). ",
-                timestampToString(lastMaxModifiedTime));
+                timestampToString(maxModifiedTime));
 
         BufferedWriter removesWriter = null;
         LineIterator iterator = null;
@@ -397,7 +414,7 @@ public class MarkSweepGarbageCollector i
                 List<String> ids = partitions.next();
                 count += ids.size();
                 deleted += BlobCollectionType.get(blobStore)
-                    .sweepInternal(blobStore, ids, removesQueue, lastMaxModifiedTime);
+                    .sweepInternal(blobStore, ids, removesQueue, maxModifiedTime);
                 saveBatchToFile(newArrayList(removesQueue), removesWriter);
                 removesQueue.clear();
             }
@@ -412,7 +429,7 @@ public class MarkSweepGarbageCollector i
             LOG.warn("Deleted only [{}] blobs entries from the [{}] candidates identified. This may happen if blob " 
                          + "modified time is > "
                          + "than the max deleted time ({})", deleted, count,
-                        timestampToString(lastMaxModifiedTime));
+                        timestampToString(maxModifiedTime));
         }
 
         // Remove all the merged marked references
@@ -425,10 +442,36 @@ public class MarkSweepGarbageCollector i
         return batchCount;
     }
 
-    private long getLastMaxModifiedTime(long maxModified) {
-        return maxLastModifiedInterval > 0 ?
-            ((maxModified <= 0 ? System.currentTimeMillis() : maxModified) - maxLastModifiedInterval) :
-            0;
+    /**
+     * 3 possibilities
+     *  - If maxLastModifiedInterval <= 0 then return 0 which is interpreted as current by delete call
+     *      (For testing purposes only)
+     *  - If oldest checkpoint creation date > 0 then reference time is the earliest of that and the parameter
+     *      maxModificationReferenceTime
+     *  - Else the parameter maxModificationReferenceTime is used as the reference time
+     *
+     * @param maxModificationReferenceTime typically the mark phase start time (could be 0 for tests)
+     * @return max modified time of blobs to be considered for deletion
+     */
+    private long getMaxModifiedTime(long maxModificationReferenceTime) {
+        if (maxLastModifiedInterval <= 0) {
+            return 0;
+        }
+
+        long oldestCheckopoint = -1;
+        if (checkpointMbean != null) {
+            oldestCheckopoint = checkpointMbean.getOldestCheckpointCreationDate().getTime();
+            LOG.debug("Oldest checkpoint data retrieved {} ", oldestCheckopoint);
+        }
+        LOG.debug("maxModificationReferenceTime {} ", maxModificationReferenceTime);
+
+        maxModificationReferenceTime = maxModificationReferenceTime <= 0 ?
+                                            System.currentTimeMillis() : maxModificationReferenceTime;
+        long calculatedReferenceTime = (oldestCheckopoint <= 0 ? maxModificationReferenceTime :
+                Math.min(maxModificationReferenceTime, oldestCheckopoint));
+        LOG.debug("Calculated reference time {} ", calculatedReferenceTime);
+
+        return (calculatedReferenceTime - maxLastModifiedInterval);
     }
 
     /**

Added: jackrabbit/oak/trunk/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCCheckpointRefTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCCheckpointRefTest.java?rev=1811532&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCCheckpointRefTest.java (added)
+++ jackrabbit/oak/trunk/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCCheckpointRefTest.java Mon Oct  9 06:01:33 2017
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.blob;
+
+import java.util.Date;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import javax.management.openmbean.TabularData;
+
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.apache.jackrabbit.oak.spi.whiteboard.DefaultWhiteboard;
+import org.apache.jackrabbit.oak.spi.whiteboard.Registration;
+import org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils;
+import org.apache.jackrabbit.oak.stats.Clock;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Adds BlobGC tests related to retrieving oldest checkpoint reference
+ */
+public class BlobGCCheckpointRefTest extends BlobGCTest {
+    @Override
+    @Before
+    public void before() {
+        super.before();
+        checkpointMBean = new MemoryStoreCheckpointMBean(nodeStore, clock);
+        WhiteboardUtils.registerMBean(wb, CheckpointMBean.class, checkpointMBean,
+            CheckpointMBean.TYPE, "Test checkpoint mbean");
+    }
+
+    @Test
+    public void gcCheckpointHeld() throws Exception {
+        log.info("Staring gcCheckpointHeld()");
+
+        BlobStoreState state = setUp(10, 5, 100);
+        long afterSetupTime = clock.getTime();
+        log.info("afterSetupTime {}", afterSetupTime);
+
+        checkpointMBean.createCheckpoint(100);
+        Set<String> afterCheckpointBlobs = createBlobs(2, 100);
+        Set<String> present = Sets.union(state.blobsPresent, afterCheckpointBlobs);
+        long maxGcAge = checkpointMBean.getOldestCheckpointCreationTimestamp() - afterSetupTime;
+
+        log.info("{} blobs added : {}", state.blobsAdded.size(), state.blobsAdded);
+        log.info("{} blobs remaining : {}", present.size(), present);
+
+        Set<String> existingAfterGC = gcInternal(maxGcAge);
+        assertTrue(Sets.symmetricDifference(present, existingAfterGC).isEmpty());
+    }
+
+    @Test
+    public void gcCheckpointHeldNoAddition() throws Exception {
+        log.info("Staring gcCheckpointHeldNoAddition()");
+
+        BlobStoreState state = setUp(10, 5, 100);
+        long afterSetupTime = clock.getTime();
+        log.info("afterSetupTime {}", afterSetupTime);
+
+        checkpointMBean.createCheckpoint(100);
+        long maxGcAge = checkpointMBean.getOldestCheckpointCreationTimestamp() - afterSetupTime;
+
+        log.info("{} blobs added : {}", state.blobsAdded.size(), state.blobsAdded);
+        log.info("{} blobs remaining : {}", state.blobsPresent.size(), state.blobsPresent);
+
+        Set<String> existingAfterGC = gcInternal(maxGcAge);
+        assertTrue(Sets.symmetricDifference(state.blobsPresent, existingAfterGC).isEmpty());
+    }
+
+    @Test
+    public void gcCheckpointHeldMaxAgeChange() throws Exception {
+        log.info("Staring gcCheckpointHeldMaxAgeChange()");
+        startReferenceTime = clock.getTime();
+
+        BlobStoreState state = setUp(10, 5, 100);
+        long afterSetupTime = clock.getTime();
+        log.info("{} afterSetupTime time", afterSetupTime);
+
+        checkpointMBean.createCheckpoint(100);
+        Set<String> afterCheckpointBlobs = createBlobs(2, 100);
+        state.blobsPresent.addAll(afterCheckpointBlobs);
+
+        log.info("{} blobs added : {}", state.blobsAdded.size(), state.blobsAdded);
+        log.info("{} blobs remaining : {}", state.blobsPresent.size(), state.blobsPresent);
+
+        long maxGcAge = checkpointMBean.getOldestCheckpointCreationTimestamp() - afterSetupTime;
+        log.info("Max age configured {}", maxGcAge);
+        Set<String> existingAfterGC = gcInternal(maxGcAge);
+        assertTrue(Sets.symmetricDifference(state.blobsPresent, existingAfterGC).isEmpty());
+    }
+
+    /**
+     * CheckpointMBean implementation for MemoryNodeStore
+     */
+    static class MemoryStoreCheckpointMBean implements CheckpointMBean {
+        private static final String CREATION_DATE = "creationDate";
+        private final Clock clock;
+        private final NodeStore nodeStore;
+
+        public MemoryStoreCheckpointMBean(NodeStore nodeStore, Clock clock) {
+            this.nodeStore = nodeStore;
+            this.clock = clock;
+        }
+
+        @Override public TabularData listCheckpoints() {
+            throw new UnsupportedOperationException("Operation not supported");
+        }
+
+        @Override public long getOldestCheckpointCreationTimestamp() {
+            Iterable<String> checkpoints = nodeStore.checkpoints();
+            long minCreationDate = Long.MAX_VALUE;
+            for (String checkpoint : checkpoints) {
+                Map<String, String> chkInfo = nodeStore.checkpointInfo(checkpoint);
+
+                if (chkInfo.containsKey(CREATION_DATE) &&
+                    Long.valueOf(chkInfo.get(CREATION_DATE)) < minCreationDate) {
+                    minCreationDate = Long.valueOf(chkInfo.get(CREATION_DATE));
+                }
+            }
+
+            if (minCreationDate == Long.MAX_VALUE) {
+                minCreationDate = 0;
+            }
+
+            return minCreationDate;
+        }
+
+        @Override public Date getOldestCheckpointCreationDate() {
+            return new Date(getOldestCheckpointCreationTimestamp());
+        }
+
+        @Override public String createCheckpoint(long lifetime) {
+            Map<String, String> props = Maps.newHashMap();
+            props.put(CREATION_DATE, String.valueOf(clock.getTime()));
+            String checkpoint = nodeStore.checkpoint(lifetime, props);
+
+            return checkpoint;
+        }
+
+        @Override public boolean releaseCheckpoint(String id) {
+            return nodeStore.release(id);
+        }
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCCheckpointRefTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java?rev=1811532&r1=1811531&r2=1811532&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentNodeStoreService.java Mon Oct  9 06:01:33 2017
@@ -646,7 +646,8 @@ public class SegmentNodeStoreService {
                     (GarbageCollectableBlobStore) blobStore,
                     executor,
                     TimeUnit.SECONDS.toMillis(configuration.getBlobGcMaxAge()),
-                    getOrCreateId(segmentNodeStore)
+                    getOrCreateId(segmentNodeStore),
+                    whiteboard
             );
             closeables.add(registrations.registerMBean(
                     BlobGCMBean.class,

Modified: jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java?rev=1811532&r1=1811531&r2=1811532&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java (original)
+++ jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java Mon Oct  9 06:01:33 2017
@@ -123,6 +123,7 @@ import org.apache.jackrabbit.oak.spi.sta
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.apache.jackrabbit.oak.spi.state.NodeStateDiff;
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.apache.jackrabbit.oak.spi.whiteboard.Whiteboard;
 import org.apache.jackrabbit.oak.stats.Clock;
 import org.apache.jackrabbit.oak.commons.benchmark.PerfLogger;
 import org.slf4j.Logger;
@@ -2954,11 +2955,12 @@ public final class DocumentNodeStore
      *
      * @param blobGcMaxAgeInSecs
      * @param repositoryId
+     * @param whiteboard
      * @return garbage collector of the BlobStore supports GC otherwise null
      */
     @CheckForNull
-    public MarkSweepGarbageCollector createBlobGarbageCollector(long blobGcMaxAgeInSecs,
-            String repositoryId) {
+    public MarkSweepGarbageCollector createBlobGarbageCollector(long blobGcMaxAgeInSecs, String repositoryId,
+        Whiteboard whiteboard) {
         MarkSweepGarbageCollector blobGC = null;
         if(blobStore instanceof GarbageCollectableBlobStore){
             try {
@@ -2967,7 +2969,8 @@ public final class DocumentNodeStore
                             (GarbageCollectableBlobStore) blobStore,
                         executor,
                         SECONDS.toMillis(blobGcMaxAgeInSecs),
-                        repositoryId);
+                        repositoryId,
+                        whiteboard);
             } catch (IOException e) {
                 throw new RuntimeException("Error occurred while initializing " +
                         "the MarkSweepGarbageCollector",e);
@@ -3276,4 +3279,4 @@ public final class DocumentNodeStore
     int getUpdateLimit() {
         return updateLimit;
     }
-}
\ No newline at end of file
+}

Modified: jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java?rev=1811532&r1=1811531&r2=1811532&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java (original)
+++ jackrabbit/oak/trunk/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java Mon Oct  9 06:01:33 2017
@@ -949,7 +949,8 @@ public class DocumentNodeStoreService {
 
         if (store.getBlobStore() instanceof GarbageCollectableBlobStore) {
             BlobGarbageCollector gc = store.createBlobGarbageCollector(blobGcMaxAgeInSecs, 
-                                                        ClusterRepositoryInfo.getOrCreateId(nodeStore));
+                                                        ClusterRepositoryInfo.getOrCreateId(nodeStore),
+                                                        whiteboard);
             addRegistration(registerMBean(whiteboard, BlobGCMBean.class, new BlobGC(gc, executor),
                     BlobGCMBean.TYPE, "Document node store blob garbage collection"));
         }