You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ca...@apache.org on 2017/06/06 06:44:55 UTC

svn commit: r1797738 - in /jackrabbit/oak/trunk: oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/ oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ oak-run/src/main/java/org/apache/jackrabbit/oak...

Author: catholicon
Date: Tue Jun  6 06:44:55 2017
New Revision: 1797738

URL: http://svn.apache.org/viewvc?rev=1797738&view=rev
Log:
OAK-2808: Active deletion of 'deleted' Lucene index files from DataStore without relying on full scale Blob GC

Review comments:
* Use blob.getContentIdentity()
* Add javadocs
* refactor setting up external directory factory

Minor cleanup

Modified:
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakDirectory.java
    jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java?rev=1797738&r1=1797737&r2=1797738&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java Tue Jun  6 06:44:55 2017
@@ -68,7 +68,6 @@ public class LuceneIndexEditorProvider i
     private final ActiveDeletedBlobCollector activeDeletedBlobCollector;
     private GarbageCollectableBlobStore blobStore;
     private IndexingQueue indexingQueue;
-    private DirectoryFactory externallyProvidedDirectoryFactory;
 
     /**
      * Number of indexed Lucene document that can be held in memory
@@ -131,7 +130,7 @@ public class LuceneIndexEditorProvider i
             IndexingContext indexingContext = ((ContextAwareCallback)callback).getIndexingContext();
             BlobDeletionCallback blobDeletionCallback = activeDeletedBlobCollector.getBlobDeletionCallback();
             indexingContext.registerIndexCommitCallback(blobDeletionCallback);
-            indexWriterFactory = new DefaultIndexWriterFactory(mountInfoProvider, getDirectoryFactory(blobDeletionCallback));
+            indexWriterFactory = new DefaultIndexWriterFactory(mountInfoProvider, newDirectoryFactory(blobDeletionCallback));
             LuceneIndexWriterFactory writerFactory = indexWriterFactory;
             IndexDefinition indexDefinition = null;
             boolean asyncIndexing = true;
@@ -203,16 +202,8 @@ public class LuceneIndexEditorProvider i
         this.inMemoryDocsLimit = inMemoryDocsLimit;
     }
 
-    public void setDirectoryFactory(DirectoryFactory directoryFactory) {
-        this.externallyProvidedDirectoryFactory = directoryFactory;
-    }
-
-    private DirectoryFactory getDirectoryFactory(BlobDeletionCallback blobDeletionCallback) {
-        if (externallyProvidedDirectoryFactory == null) {
-            return new DefaultDirectoryFactory(indexCopier, blobStore, blobDeletionCallback);
-        } else {
-            return externallyProvidedDirectoryFactory;
-        }
+    protected DirectoryFactory newDirectoryFactory(BlobDeletionCallback blobDeletionCallback) {
+        return new DefaultDirectoryFactory(indexCopier, blobStore, blobDeletionCallback);
     }
 
     private LuceneDocumentHolder getDocumentHolder(CommitContext commitContext){

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakDirectory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakDirectory.java?rev=1797738&r1=1797737&r2=1797738&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakDirectory.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakDirectory.java Tue Jun  6 06:44:55 2017
@@ -163,7 +163,13 @@ public class OakDirectory extends Direct
         if (property != null) {
             if (property.getType() == BINARIES || property.getType() == BINARY) {
                 for (Blob b : property.getValue(BINARIES)) {
-                    blobDeletionCallback.deleted(b.toString(),
+                    //Mark the blob as deleted. Also, post index path, type of directory
+                    //(:suggest, :data, etc) and filename being deleted
+                    String blobId = b.getContentIdentity();
+                    if (blobId == null) {
+                        blobId = b.toString();
+                    }
+                    blobDeletionCallback.deleted(blobId,
                             Lists.newArrayList(definition.getIndexPath(), dataNodeName, name));
                 }
             }

Modified: jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java?rev=1797738&r1=1797737&r2=1797738&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java (original)
+++ jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java Tue Jun  6 06:44:55 2017
@@ -52,10 +52,12 @@ import static com.google.common.base.Pre
 
 public class ActiveDeletedBlobCollectorFactory {
     public interface ActiveDeletedBlobCollector {
+        /**
+         * @return an instance of {@link BlobDeletionCallback} that can be used to track deleted blobs
+         */
         BlobDeletionCallback getBlobDeletionCallback();
         void purgeBlobsDeleted(long before, GarbageCollectableBlobStore blobStore);
     }
-//                        LOG.info("Added {} to delete.", info);
 
     public static ActiveDeletedBlobCollector NOOP = new ActiveDeletedBlobCollector() {
         @Override
@@ -70,6 +72,14 @@ public class ActiveDeletedBlobCollectorF
     };
 
     public interface BlobDeletionCallback extends IndexCommitCallback {
+        /**
+         * Tracks deleted blobs. From the pov of this interface, blobId is an opaque string
+         * that needs to be tracked.
+         * @param blobId blobId representing deleted blob. In theory, it has nothing to do with
+         *               blobs though.
+         * @param ids Information that can be useful for debugging - this is not used for purging
+         *            blobs.
+         */
         void deleted(String blobId, Iterable<String> ids);
         BlobDeletionCallback NOOP = new BlobDeletionCallback() {
             @Override
@@ -143,6 +153,11 @@ public class ActiveDeletedBlobCollectorF
             this.deletedBlobsFileWriter = new DeletedBlobsFileWriter();
         }
 
+        /**
+         * Purges blobs form blob-store which were tracked earlier to deleted.
+         * @param before only purge blobs which were deleted before this timestamps
+         * @param blobStore
+         */
         public void purgeBlobsDeleted(long before, @Nonnull GarbageCollectableBlobStore blobStore) {
             long numBlobsDeleted = 0;
             long numChunksDeleted = 0;
@@ -181,13 +196,15 @@ public class ActiveDeletedBlobCollectorF
                                         break;
                                     }
 
-                                    long deleted = blobStore.countDeleteChunks(
-                                            Lists.newArrayList(blobStore.resolveChunks(deletedBlobId)), 0);
-                                    if (deleted < 1) {
-                                        LOG.warn("Blob {} in file {} not deleted", deletedBlobId, deletedBlobListFile);
-                                    } else {
-                                        numBlobsDeleted++;
-                                        numChunksDeleted += deleted;
+                                    List<String> chunkIds = Lists.newArrayList(blobStore.resolveChunks(deletedBlobId));
+                                    if (chunkIds.size() > 0) {
+                                        long deleted = blobStore.countDeleteChunks(chunkIds, 0);
+                                        if (deleted < 1) {
+                                            LOG.warn("Blob {} in file {} not deleted", deletedBlobId, deletedBlobListFile);
+                                        } else {
+                                            numBlobsDeleted++;
+                                            numChunksDeleted += deleted;
+                                        }
                                     }
                                 } catch (NumberFormatException nfe) {
                                     LOG.warn("Couldn't parse blobTimestamp(" + parsedDeletedBlobIdLine[1] +
@@ -333,6 +350,10 @@ public class ActiveDeletedBlobCollectorF
             }
         }
 
+        /**
+         * This implementation would track deleted blobs and then pass them onto
+         * {@link ActiveDeletedBlobCollectorImpl} on a successful commit
+         */
         private class DeletedBlobCollector implements BlobDeletionCallback {
             List<BlobIdInfoStruct> deletedBlobs = new ArrayList<>();
 

Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java?rev=1797738&r1=1797737&r2=1797738&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java Tue Jun  6 06:44:55 2017
@@ -28,6 +28,7 @@ import org.apache.commons.io.FileUtils;
 import org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache;
 import org.apache.jackrabbit.oak.plugins.index.lucene.IndexCopier;
 import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider;
+import org.apache.jackrabbit.oak.plugins.index.lucene.directory.ActiveDeletedBlobCollectorFactory.BlobDeletionCallback;
 import org.apache.jackrabbit.oak.plugins.index.lucene.directory.DirectoryFactory;
 import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
 
@@ -44,21 +45,32 @@ class LuceneIndexHelper implements Close
     }
 
     public LuceneIndexEditorProvider createEditorProvider() throws IOException {
-        LuceneIndexEditorProvider editor =  new LuceneIndexEditorProvider(
-                getIndexCopier(),
-                textCache,
-                null,
-                indexHelper.getMountInfoProvider()
-        );
+        LuceneIndexEditorProvider editor;
+        if (directoryFactory != null) {
+            editor = new LuceneIndexEditorProvider(
+                    getIndexCopier(),
+                    textCache,
+                    null,
+                    indexHelper.getMountInfoProvider()
+            ) {
+                @Override
+                protected DirectoryFactory newDirectoryFactory(BlobDeletionCallback blobDeletionCallback) {
+                    return directoryFactory;
+                }
+            };
+        } else {
+            editor = new LuceneIndexEditorProvider(
+                    getIndexCopier(),
+                    textCache,
+                    null,
+                    indexHelper.getMountInfoProvider()
+            );
+        }
 
         if (indexHelper.getBlobStore() instanceof GarbageCollectableBlobStore) {
             editor.setBlobStore((GarbageCollectableBlobStore) indexHelper.getBlobStore());
         }
 
-        if (directoryFactory != null) {
-            editor.setDirectoryFactory(directoryFactory);
-        }
-
         return editor;
     }