You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ca...@apache.org on 2017/06/06 06:45:37 UTC

svn commit: r1797740 - in /jackrabbit/oak/trunk/oak-lucene/src: main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ test/java/org/apache/jackrabbit/oak/plugins/index/lucene/

Author: catholicon
Date: Tue Jun  6 06:45:36 2017
New Revision: 1797740

URL: http://svn.apache.org/viewvc?rev=1797740&view=rev
Log:
OAK-2808: Active deletion of 'deleted' Lucene index files from DataStore without relying on full scale Blob GC

Setup scheduling of purge. Points to note:
* By default, the feature is disabled (scheduler interval = -1)
* Purged blobs need to be deleted before Math.min(oldest_checkpoint_timestamp, curr_time - Long.getLong("oak.active.deletion.minAge") )

Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
    jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java?rev=1797740&r1=1797739&r2=1797740&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java Tue Jun  6 06:45:36 2017
@@ -34,6 +34,7 @@ import java.util.concurrent.atomic.Atomi
 
 import javax.annotation.Nonnull;
 import javax.management.NotCompliantMBeanException;
+import javax.management.openmbean.CompositeData;
 
 import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
@@ -47,6 +48,7 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.ReferencePolicy;
 import org.apache.felix.scr.annotations.ReferencePolicyOption;
 import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
+import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
 import org.apache.jackrabbit.oak.cache.CacheStats;
 import org.apache.jackrabbit.oak.commons.PropertiesUtil;
 import org.apache.jackrabbit.oak.osgi.OsgiWhiteboard;
@@ -75,6 +77,7 @@ import org.apache.jackrabbit.oak.spi.que
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
 import org.apache.jackrabbit.oak.spi.whiteboard.Registration;
 import org.apache.jackrabbit.oak.spi.whiteboard.Whiteboard;
+import org.apache.jackrabbit.oak.stats.Clock;
 import org.apache.jackrabbit.oak.stats.StatisticsProvider;
 import org.apache.lucene.analysis.util.CharFilterFactory;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -90,6 +93,7 @@ import static com.google.common.base.Pre
 import static java.util.Collections.emptyMap;
 import static org.apache.commons.io.FileUtils.ONE_MB;
 import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.registerMBean;
+import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.scheduleWithFixedDelay;
 
 @SuppressWarnings("UnusedDeclaration")
 @Component(metatype = true, label = "Apache Jackrabbit Oak LuceneIndexProvider")
@@ -233,15 +237,22 @@ public class LuceneIndexProviderService
     )
     private static final String PROP_DISABLE_STORED_INDEX_DEFINITION = "disableStoredIndexDefinition";
 
-    private static final boolean PROP_DELETED_BLOB_COLLECTION_ENABLED = false;
+    private static final int PROP_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL = -1;
     @Property(
-            boolValue = PROP_DELETED_BLOB_COLLECTION_ENABLED,
-            label = "Actively remove deleted index blobs from blob store",
-            description = "Index blobs are explicitly unique and don't require mark-sweek type collection." +
-                    "Turning this on would setup early deletion of blobs from blob collection that are deleted" +
-                    " during indexing."
-    )
-    private static final String PROP_ENABLE_DELETED_BLOB_COLLECTION_DEFINITION = "enableDeletedBlobsCollection";
+            intValue = PROP_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL,
+            label = "Time interval (in seconds) for actively removing deleted index blobs from blob store",
+            description = "Index blobs are explicitly unique and don't require mark-sweep type collection." +
+                    "This is number of seconds for scheduling clean-up. -1 would disable the functionality." +
+                    "Cleanup implies purging index blobs marked as deleted earlier during some indexing cycle."
+    )
+    private static final String PROP_NAME_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL = "deletedBlobsCollectionInterval";
+    /**
+     * Actively deleted blob must be deleted for at least this long (in seconds)
+     */
+    final long MIN_BLOB_AGE_TO_ACTIVELY_DELETE = Long.getLong("oak.active.deletion.minAge",
+            TimeUnit.HOURS.toSeconds(24));
+
+    private final Clock clock = Clock.SIMPLE;
 
     private Whiteboard whiteboard;
 
@@ -282,6 +293,9 @@ public class LuceneIndexProviderService
     )
     private GarbageCollectableBlobStore blobStore;
 
+    @Reference
+    private CheckpointMBean checkpointMBean;
+
     private IndexCopier indexCopier;
 
     private ActiveDeletedBlobCollectorFactory.ActiveDeletedBlobCollector activeDeletedBlobCollector;
@@ -323,15 +337,7 @@ public class LuceneIndexProviderService
         initializeExtractedTextCache(bundleContext, config);
         IndexTracker tracker = createTracker(bundleContext, config);
         indexProvider = new LuceneIndexProvider(tracker, scorerFactory, augmentorFactory);
-        if (PROP_DELETED_BLOB_COLLECTION_ENABLED && blobStore != null) {
-            File blobCollectorWorkingDir = new File(indexDir, "deleted-blobs");
-            activeDeletedBlobCollector = ActiveDeletedBlobCollectorFactory.newInstance(blobCollectorWorkingDir, executorService);
-            log.info("Active blob collector initialized at working dir: {}", blobCollectorWorkingDir);
-        } else {
-            activeDeletedBlobCollector = ActiveDeletedBlobCollectorFactory.NOOP;
-            log.info("Active blob collector set to NOOP. Enable? {}; blobStore: {}",
-                    PROP_DELETED_BLOB_COLLECTION_ENABLED, blobStore);
-        }
+        initializeActiveBlobCollector(whiteboard, config);
         initializeLogging(config);
         initialize();
 
@@ -703,6 +709,53 @@ public class LuceneIndexProviderService
         regs.add(bundleContext.registerService(IndexInfoProvider.class.getName(), infoProvider, null));
     }
 
+    private void initializeActiveBlobCollector(Whiteboard whiteboard, Map<String, ?> config) {
+        int activeDeletionInterval = PropertiesUtil.toInteger(
+                config.get(PROP_NAME_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL),
+                PROP_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL);
+        if (activeDeletionInterval > -1 && blobStore!= null) {
+            File blobCollectorWorkingDir = new File(indexDir, "deleted-blobs");
+            activeDeletedBlobCollector = ActiveDeletedBlobCollectorFactory.newInstance(blobCollectorWorkingDir, executorService);
+            oakRegs.add(
+                    scheduleWithFixedDelay(whiteboard, () ->
+                                activeDeletedBlobCollector.purgeBlobsDeleted(
+                                        getSafeTimestampForDeletedBlobs(checkpointMBean),
+                                        blobStore),
+                            activeDeletionInterval));
+
+            log.info("Active blob collector initialized at working dir: {}; deletion interval {} seconds;" +
+                            "minAge: {}",
+                    blobCollectorWorkingDir, activeDeletionInterval, MIN_BLOB_AGE_TO_ACTIVELY_DELETE);
+        } else {
+            activeDeletedBlobCollector = ActiveDeletedBlobCollectorFactory.NOOP;
+            log.info("Active blob collector set to NOOP. deletionInterval: {} seconds; blobStore: {}",
+                    activeDeletionInterval, blobStore);
+        }
+    }
+
+    private long getSafeTimestampForDeletedBlobs(CheckpointMBean checkpointMBean) {
+        long timestamp = clock.getTime() - TimeUnit.SECONDS.toMillis(MIN_BLOB_AGE_TO_ACTIVELY_DELETE);
+
+        CompositeData data = checkpointMBean.getOldestCheckpointCreationTime();
+        Object timestampObj = data.get("timestamp");
+        String timestampStr = null;
+        if (timestampObj != null) {
+            timestampStr = timestampObj.toString();
+        }
+        try {
+            long minCheckpointTimestamp = Long.parseLong(timestampStr);
+            if (minCheckpointTimestamp < timestamp) {
+                log.info("Oldest checkpoint time data ({}) is older than buffer period for deleted blobs." +
+                        " Using that instead", data);
+                timestamp = minCheckpointTimestamp;
+            }
+        } catch (NumberFormatException nfe) {
+            log.warn("Couldn't find timestamp in checkpoint mbean output: {}", data);
+        }
+
+        return timestamp;
+    }
+
     protected void bindNodeAggregator(NodeAggregator aggregator) {
         this.nodeAggregator = aggregator;
         initialize();

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java?rev=1797740&r1=1797739&r2=1797740&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java Tue Jun  6 06:45:36 2017
@@ -115,7 +115,6 @@ public class ActiveDeletedBlobCollectorF
      * due deleted blob
      */
     static class ActiveDeletedBlobCollectorImpl implements ActiveDeletedBlobCollector {
-
         private static PerfLogger PERF_LOG = new PerfLogger(
                 LoggerFactory.getLogger(ActiveDeletedBlobCollectorImpl.class.getName() + ".perf"));
         private static Logger LOG = LoggerFactory.getLogger(ActiveDeletedBlobCollectorImpl.class.getName());

Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java?rev=1797740&r1=1797739&r2=1797740&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java Tue Jun  6 06:45:36 2017
@@ -35,6 +35,7 @@ import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.reflect.FieldUtils;
 import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
+import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.CachingFileDataStore;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreUtils;
@@ -88,6 +89,7 @@ public class LuceneIndexProviderServiceT
         context.registerService(NodeStore.class, new MemoryNodeStore());
         context.registerService(IndexPathService.class, mock(IndexPathService.class));
         context.registerService(AsyncIndexInfoService.class, mock(AsyncIndexInfoService.class));
+        context.registerService(CheckpointMBean.class, mock(CheckpointMBean.class));
         MockOsgi.injectServices(service, context.bundleContext());
     }