You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ca...@apache.org on 2017/06/06 06:45:37 UTC
svn commit: r1797740 - in /jackrabbit/oak/trunk/oak-lucene/src:
main/java/org/apache/jackrabbit/oak/plugins/index/lucene/
main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/
test/java/org/apache/jackrabbit/oak/plugins/index/lucene/
Author: catholicon
Date: Tue Jun 6 06:45:36 2017
New Revision: 1797740
URL: http://svn.apache.org/viewvc?rev=1797740&view=rev
Log:
OAK-2808: Active deletion of 'deleted' Lucene index files from DataStore without relying on full scale Blob GC
Setup scheduling of purge. Points to note:
* By default, the feature is disabled (scheduler interval = -1)
* Purged blobs need to be deleted before Math.min(oldest_checkpoint_timestamp, curr_time - Long.getLong("oak.active.deletion.minAge") )
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java?rev=1797740&r1=1797739&r2=1797740&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java Tue Jun 6 06:45:36 2017
@@ -34,6 +34,7 @@ import java.util.concurrent.atomic.Atomi
import javax.annotation.Nonnull;
import javax.management.NotCompliantMBeanException;
+import javax.management.openmbean.CompositeData;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
@@ -47,6 +48,7 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.ReferencePolicy;
import org.apache.felix.scr.annotations.ReferencePolicyOption;
import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
+import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
import org.apache.jackrabbit.oak.cache.CacheStats;
import org.apache.jackrabbit.oak.commons.PropertiesUtil;
import org.apache.jackrabbit.oak.osgi.OsgiWhiteboard;
@@ -75,6 +77,7 @@ import org.apache.jackrabbit.oak.spi.que
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.apache.jackrabbit.oak.spi.whiteboard.Registration;
import org.apache.jackrabbit.oak.spi.whiteboard.Whiteboard;
+import org.apache.jackrabbit.oak.stats.Clock;
import org.apache.jackrabbit.oak.stats.StatisticsProvider;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -90,6 +93,7 @@ import static com.google.common.base.Pre
import static java.util.Collections.emptyMap;
import static org.apache.commons.io.FileUtils.ONE_MB;
import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.registerMBean;
+import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.scheduleWithFixedDelay;
@SuppressWarnings("UnusedDeclaration")
@Component(metatype = true, label = "Apache Jackrabbit Oak LuceneIndexProvider")
@@ -233,15 +237,22 @@ public class LuceneIndexProviderService
)
private static final String PROP_DISABLE_STORED_INDEX_DEFINITION = "disableStoredIndexDefinition";
- private static final boolean PROP_DELETED_BLOB_COLLECTION_ENABLED = false;
+ private static final int PROP_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL = -1;
@Property(
- boolValue = PROP_DELETED_BLOB_COLLECTION_ENABLED,
- label = "Actively remove deleted index blobs from blob store",
- description = "Index blobs are explicitly unique and don't require mark-sweek type collection." +
- "Turning this on would setup early deletion of blobs from blob collection that are deleted" +
- " during indexing."
- )
- private static final String PROP_ENABLE_DELETED_BLOB_COLLECTION_DEFINITION = "enableDeletedBlobsCollection";
+ intValue = PROP_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL,
+ label = "Time interval (in seconds) for actively removing deleted index blobs from blob store",
+ description = "Index blobs are explicitly unique and don't require mark-sweep type collection." +
+ "This is number of seconds for scheduling clean-up. -1 would disable the functionality." +
+ "Cleanup implies purging index blobs marked as deleted earlier during some indexing cycle."
+ )
+ private static final String PROP_NAME_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL = "deletedBlobsCollectionInterval";
+ /**
+ * Actively deleted blob must be deleted for at least this long (in seconds)
+ */
+ final long MIN_BLOB_AGE_TO_ACTIVELY_DELETE = Long.getLong("oak.active.deletion.minAge",
+ TimeUnit.HOURS.toSeconds(24));
+
+ private final Clock clock = Clock.SIMPLE;
private Whiteboard whiteboard;
@@ -282,6 +293,9 @@ public class LuceneIndexProviderService
)
private GarbageCollectableBlobStore blobStore;
+ @Reference
+ private CheckpointMBean checkpointMBean;
+
private IndexCopier indexCopier;
private ActiveDeletedBlobCollectorFactory.ActiveDeletedBlobCollector activeDeletedBlobCollector;
@@ -323,15 +337,7 @@ public class LuceneIndexProviderService
initializeExtractedTextCache(bundleContext, config);
IndexTracker tracker = createTracker(bundleContext, config);
indexProvider = new LuceneIndexProvider(tracker, scorerFactory, augmentorFactory);
- if (PROP_DELETED_BLOB_COLLECTION_ENABLED && blobStore != null) {
- File blobCollectorWorkingDir = new File(indexDir, "deleted-blobs");
- activeDeletedBlobCollector = ActiveDeletedBlobCollectorFactory.newInstance(blobCollectorWorkingDir, executorService);
- log.info("Active blob collector initialized at working dir: {}", blobCollectorWorkingDir);
- } else {
- activeDeletedBlobCollector = ActiveDeletedBlobCollectorFactory.NOOP;
- log.info("Active blob collector set to NOOP. Enable? {}; blobStore: {}",
- PROP_DELETED_BLOB_COLLECTION_ENABLED, blobStore);
- }
+ initializeActiveBlobCollector(whiteboard, config);
initializeLogging(config);
initialize();
@@ -703,6 +709,53 @@ public class LuceneIndexProviderService
regs.add(bundleContext.registerService(IndexInfoProvider.class.getName(), infoProvider, null));
}
+ private void initializeActiveBlobCollector(Whiteboard whiteboard, Map<String, ?> config) {
+ int activeDeletionInterval = PropertiesUtil.toInteger(
+ config.get(PROP_NAME_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL),
+ PROP_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL);
+ if (activeDeletionInterval > -1 && blobStore!= null) {
+ File blobCollectorWorkingDir = new File(indexDir, "deleted-blobs");
+ activeDeletedBlobCollector = ActiveDeletedBlobCollectorFactory.newInstance(blobCollectorWorkingDir, executorService);
+ oakRegs.add(
+ scheduleWithFixedDelay(whiteboard, () ->
+ activeDeletedBlobCollector.purgeBlobsDeleted(
+ getSafeTimestampForDeletedBlobs(checkpointMBean),
+ blobStore),
+ activeDeletionInterval));
+
+ log.info("Active blob collector initialized at working dir: {}; deletion interval {} seconds;" +
+ "minAge: {}",
+ blobCollectorWorkingDir, activeDeletionInterval, MIN_BLOB_AGE_TO_ACTIVELY_DELETE);
+ } else {
+ activeDeletedBlobCollector = ActiveDeletedBlobCollectorFactory.NOOP;
+ log.info("Active blob collector set to NOOP. deletionInterval: {} seconds; blobStore: {}",
+ activeDeletionInterval, blobStore);
+ }
+ }
+
+ private long getSafeTimestampForDeletedBlobs(CheckpointMBean checkpointMBean) {
+ long timestamp = clock.getTime() - TimeUnit.SECONDS.toMillis(MIN_BLOB_AGE_TO_ACTIVELY_DELETE);
+
+ CompositeData data = checkpointMBean.getOldestCheckpointCreationTime();
+ Object timestampObj = data.get("timestamp");
+ String timestampStr = null;
+ if (timestampObj != null) {
+ timestampStr = timestampObj.toString();
+ }
+ try {
+ long minCheckpointTimestamp = Long.parseLong(timestampStr);
+ if (minCheckpointTimestamp < timestamp) {
+ log.info("Oldest checkpoint time data ({}) is older than buffer period for deleted blobs." +
+ " Using that instead", data);
+ timestamp = minCheckpointTimestamp;
+ }
+ } catch (NumberFormatException nfe) {
+ log.warn("Couldn't find timestamp in checkpoint mbean output: {}", data);
+ }
+
+ return timestamp;
+ }
+
protected void bindNodeAggregator(NodeAggregator aggregator) {
this.nodeAggregator = aggregator;
initialize();
Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java?rev=1797740&r1=1797739&r2=1797740&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java Tue Jun 6 06:45:36 2017
@@ -115,7 +115,6 @@ public class ActiveDeletedBlobCollectorF
* due deleted blob
*/
static class ActiveDeletedBlobCollectorImpl implements ActiveDeletedBlobCollector {
-
private static PerfLogger PERF_LOG = new PerfLogger(
LoggerFactory.getLogger(ActiveDeletedBlobCollectorImpl.class.getName() + ".perf"));
private static Logger LOG = LoggerFactory.getLogger(ActiveDeletedBlobCollectorImpl.class.getName());
Modified: jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java?rev=1797740&r1=1797739&r2=1797740&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java Tue Jun 6 06:45:36 2017
@@ -35,6 +35,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.reflect.FieldUtils;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
+import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
import org.apache.jackrabbit.oak.plugins.blob.datastore.CachingFileDataStore;
import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreUtils;
@@ -88,6 +89,7 @@ public class LuceneIndexProviderServiceT
context.registerService(NodeStore.class, new MemoryNodeStore());
context.registerService(IndexPathService.class, mock(IndexPathService.class));
context.registerService(AsyncIndexInfoService.class, mock(AsyncIndexInfoService.class));
+ context.registerService(CheckpointMBean.class, mock(CheckpointMBean.class));
MockOsgi.injectServices(service, context.bundleContext());
}