You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ad...@apache.org on 2023/09/22 13:18:37 UTC

[jackrabbit-oak] branch issues/OAK-9949 created (now 5136d82b9a)

This is an automated email from the ASF dual-hosted git repository.

adulceanu pushed a change to branch issues/OAK-9949
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


      at 5136d82b9a OAK-9949 Offline Tail Compaction

This branch includes the following new commits:

     new 5136d82b9a OAK-9949 Offline Tail Compaction

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[jackrabbit-oak] 01/01: OAK-9949 Offline Tail Compaction

Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

adulceanu pushed a commit to branch issues/OAK-9949
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 5136d82b9a0ac9701d144fda47d6ad4cd5b68a27
Author: Lucas Weitzendorf <lw...@adobe.com>
AuthorDate: Wed Sep 28 14:59:13 2022 +0200

    OAK-9949 Offline Tail Compaction
---
 .../site/markdown/nodestore/segment/overview.md    |  6 ++-
 .../apache/jackrabbit/oak/run/CompactCommand.java  | 23 +++++++----
 .../oak/segment/aws/tool/AwsCompact.java           | 28 ++++++++++++-
 .../oak/segment/azure/tool/AzureCompact.java       | 28 ++++++++++++-
 .../oak/segment/CheckpointCompactor.java           | 46 +++++++++++++---------
 .../jackrabbit/oak/segment/tool/Compact.java       | 28 ++++++++++++-
 6 files changed, 129 insertions(+), 30 deletions(-)

diff --git a/oak-doc/src/site/markdown/nodestore/segment/overview.md b/oak-doc/src/site/markdown/nodestore/segment/overview.md
index 53f10542ab..499bcb3cf2 100644
--- a/oak-doc/src/site/markdown/nodestore/segment/overview.md
+++ b/oak-doc/src/site/markdown/nodestore/segment/overview.md
@@ -823,14 +823,16 @@ This option is optional and is disabled by default.
 ### <a name="compact"/> Compact
 
 ```
-java -jar oak-run.jar compact [--force] [--mmap] [--compactor] [--threads] SOURCE [--target-path DESTINATION] [--persistent-cache-path PERSISTENT_CACHE_PATH] [--persistent-cache-size-gb <PERSISTENT_CACHE_SIZE_GB>]
+java -jar oak-run.jar compact [--force] [--mmap] [--tail] [--compactor] [--threads] SOURCE [--target-path DESTINATION] [--persistent-cache-path PERSISTENT_CACHE_PATH] [--persistent-cache-size-gb <PERSISTENT_CACHE_SIZE_GB>]
 ```
 
 The `compact` command performs offline compaction of the local/remote Segment Store at `SOURCE`. 
 `SOURCE` must be a valid path/uri to an existing Segment Store. Currently, Azure Segment Store and AWS Segment Store the supported remote Segment Stores. 
 Please refer to the [Remote Segment Stores](#remote-segment-stores) section for details on how to correctly specify connection URIs.
 
-If the optional `--force [Boolean]` argument is set to `true` the tool ignores a non-matching Segment Store version. *CAUTION*: this will upgrade the Segment Store to the 
+With the optional `--tail` flag, only tail compaction is performed instead of the full repository.
+
+If the optional `--force` flag is set, the tool ignores a non-matching Segment Store version. *CAUTION*: this will upgrade the Segment Store to the 
 latest version, which is incompatible with older versions. *There is no way to downgrade 
 an accidentally upgraded Segment Store*.  
 
diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java
index 47257982d3..f955278931 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/CompactCommand.java
@@ -25,6 +25,7 @@ import joptsimple.OptionSet;
 import joptsimple.OptionSpec;
 import org.apache.jackrabbit.oak.run.commons.Command;
 import org.apache.jackrabbit.oak.segment.azure.tool.AzureCompact;
+import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions;
 import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType;
 import org.apache.jackrabbit.oak.segment.aws.tool.AwsCompact;
 import org.apache.jackrabbit.oak.segment.tool.Compact;
@@ -48,12 +49,11 @@ class CompactCommand implements Command {
                     "is always enforced and this option is ignored.")
                 .withOptionalArg()
                 .ofType(Boolean.class);
-        OptionSpec<Boolean> forceArg = parser.accepts("force",
+        OptionSpec<Void> forceArg = parser.accepts("force",
                 "Force compaction and ignore a non matching segment store version. " +
                         "CAUTION: this will upgrade the segment store to the latest version, " +
-                        "which is incompatible with older versions of Oak.")
-                .withOptionalArg()
-                .ofType(Boolean.class);
+                        "which is incompatible with older versions of Oak.");
+        OptionSpec<Void> tailArg = parser.accepts("tail", "Use tail compaction instead of a full repository rewrite.");
         OptionSpec<String> compactor = parser.accepts("compactor",
                 "Allow the user to control compactor type to be used. Valid choices are \"classic\", \"diff\", \"parallel\". " +
                         "While \"classic\" is slower, it might be more stable, due to lack of optimisations employed " +
@@ -110,10 +110,13 @@ class CompactCommand implements Command {
                     .withTargetPath(targetPath.value(options))
                     .withPersistentCachePath(persistentCachePath.value(options))
                     .withPersistentCacheSizeGb(persistentCacheSizeGb.value(options))
-                    .withForce(isTrue(forceArg.value(options)))
+                    .withForce(options.has(forceArg))
                     .withGCLogInterval(Long.getLong("compaction-progress-log", 150000))
                     .withConcurrency(nThreads.value(options));
 
+            if (options.has(tailArg)) {
+                azureBuilder.withGCType(SegmentGCOptions.GCType.TAIL);
+            }
             if (options.has(compactor)) {
                 azureBuilder.withCompactorType(CompactorType.fromDescription(compactor.value(options)));
             }
@@ -122,11 +125,14 @@ class CompactCommand implements Command {
         } else if (path.startsWith("aws:")) {
             AwsCompact.Builder awsBuilder = AwsCompact.builder()
                     .withPath(path)
-                    .withForce(isTrue(forceArg.value(options)))
+                    .withForce(options.has(forceArg))
                     .withSegmentCacheSize(Integer.getInteger("cache", 256))
                     .withGCLogInterval(Long.getLong("compaction-progress-log", 150000))
                     .withConcurrency(nThreads.value(options));
 
+            if (options.has(tailArg)) {
+                awsBuilder.withGCType(SegmentGCOptions.GCType.TAIL);
+            }
             if (options.has(compactor)) {
                 awsBuilder.withCompactorType(CompactorType.fromDescription(compactor.value(options)));
             }
@@ -135,13 +141,16 @@ class CompactCommand implements Command {
         } else {
             Compact.Builder tarBuilder = Compact.builder()
                     .withPath(new File(path))
-                    .withForce(isTrue(forceArg.value(options)))
+                    .withForce(options.has(forceArg))
                     .withMmap(mmapArg.value(options))
                     .withOs(StandardSystemProperty.OS_NAME.value())
                     .withSegmentCacheSize(Integer.getInteger("cache", 256))
                     .withGCLogInterval(Long.getLong("compaction-progress-log", 150000))
                     .withConcurrency(nThreads.value(options));
 
+            if (options.has(tailArg)) {
+                tarBuilder.withGCType(SegmentGCOptions.GCType.TAIL);
+            }
             if (options.has(compactor)) {
                 tarBuilder.withCompactorType(CompactorType.fromDescription(compactor.value(options)));
             }
diff --git a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/tool/AwsCompact.java b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/tool/AwsCompact.java
index 1c77b9bab6..6405dcebd1 100644
--- a/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/tool/AwsCompact.java
+++ b/oak-segment-aws/src/main/java/org/apache/jackrabbit/oak/segment/aws/tool/AwsCompact.java
@@ -34,6 +34,7 @@ import org.apache.jackrabbit.guava.common.io.Files;
 
 import org.apache.jackrabbit.oak.segment.SegmentCache;
 import org.apache.jackrabbit.oak.segment.aws.tool.AwsToolUtils.SegmentStoreType;
+import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GCType;
 import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType;
 import org.apache.jackrabbit.oak.segment.file.FileStore;
 import org.apache.jackrabbit.oak.segment.file.JournalReader;
@@ -73,6 +74,8 @@ public class AwsCompact {
 
         private int segmentCacheSize = DEFAULT_SEGMENT_CACHE_MB;
 
+        private GCType gcType = GCType.FULL;
+
         private CompactorType compactorType = CompactorType.PARALLEL_COMPACTOR;
 
         private int concurrency = 1;
@@ -133,6 +136,16 @@ public class AwsCompact {
             return this;
         }
 
+        /**
+         * The garbage collection type used. If not specified it defaults to full compaction
+         * @param gcType the GC type
+         * @return this builder
+         */
+        public Builder withGCType(GCType gcType) {
+            this.gcType = gcType;
+            return this;
+        }
+
         /**
          * The compactor type to be used by compaction. If not specified it defaults to
          * "parallel" compactor
@@ -173,6 +186,8 @@ public class AwsCompact {
 
     private final long gcLogInterval;
 
+    private final GCType gcType;
+
     private final CompactorType compactorType;
 
     private final int concurrency;
@@ -182,6 +197,7 @@ public class AwsCompact {
         this.segmentCacheSize = builder.segmentCacheSize;
         this.strictVersionCheck = !builder.force;
         this.gcLogInterval = builder.gcLogInterval;
+        this.gcType = builder.gcType;
         this.compactorType = builder.compactorType;
         this.concurrency = builder.concurrency;
     }
@@ -206,7 +222,17 @@ public class AwsCompact {
 
         try (FileStore store = newFileStore(persistence, Files.createTempDir(), strictVersionCheck, segmentCacheSize,
                 gcLogInterval, compactorType, concurrency)) {
-            if (!store.compactFull()) {
+            boolean success = false;
+            switch (gcType) {
+                case FULL:
+                    success = store.compactFull();
+                    break;
+                case TAIL:
+                    success = store.compactTail();
+                    break;
+            }
+
+            if (!success) {
                 System.out.printf("Compaction cancelled after %s.\n", printableStopwatch(watch));
                 return 1;
             }
diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java
index a8e103c3c8..d4942f63f1 100644
--- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java
+++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/tool/AzureCompact.java
@@ -36,6 +36,7 @@ import com.microsoft.azure.storage.blob.ListBlobItem;
 
 import org.apache.jackrabbit.oak.segment.SegmentCache;
 import org.apache.jackrabbit.oak.segment.azure.tool.ToolUtils.SegmentStoreType;
+import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GCType;
 import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType;
 import org.apache.jackrabbit.oak.segment.file.FileStore;
 import org.apache.jackrabbit.oak.segment.spi.persistence.SegmentArchiveManager;
@@ -79,6 +80,8 @@ public class AzureCompact {
 
         private int segmentCacheSize = 2048;
 
+        private GCType gcType = GCType.FULL;
+
         private CompactorType compactorType = CompactorType.PARALLEL_COMPACTOR;
 
         private int concurrency = 1;
@@ -159,6 +162,16 @@ public class AzureCompact {
             return this;
         }
 
+        /**
+         * The garbage collection type used. If not specified it defaults to full compaction
+         * @param gcType the GC type
+         * @return this builder
+         */
+        public Builder withGCType(GCType gcType) {
+            this.gcType = gcType;
+            return this;
+        }
+
         /**
          * The compactor type to be used by compaction. If not specified it defaults to
          * "parallel" compactor
@@ -225,6 +238,8 @@ public class AzureCompact {
 
     private final long gcLogInterval;
 
+    private final GCType gcType;
+
     private final CompactorType compactorType;
 
     private final int concurrency;
@@ -239,6 +254,7 @@ public class AzureCompact {
         this.segmentCacheSize = builder.segmentCacheSize;
         this.strictVersionCheck = !builder.force;
         this.gcLogInterval = builder.gcLogInterval;
+        this.gcType = builder.gcType;
         this.compactorType = builder.compactorType;
         this.concurrency = builder.concurrency;
         this.persistentCachePath = builder.persistentCachePath;
@@ -270,7 +286,17 @@ public class AzureCompact {
 
         try (FileStore store = newFileStore(splitPersistence, Files.createTempDir(), strictVersionCheck, segmentCacheSize,
                 gcLogInterval, compactorType, concurrency)) {
-            if (!store.compactFull()) {
+            boolean success = false;
+            switch (gcType) {
+                case FULL:
+                    success = store.compactFull();
+                    break;
+                case TAIL:
+                    success = store.compactTail();
+                    break;
+            }
+
+            if (!success) {
                 System.out.printf("Compaction cancelled after %s.\n", printableStopwatch(watch));
                 return 1;
             }
diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CheckpointCompactor.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CheckpointCompactor.java
index d5609a5a9d..e9d63cec05 100644
--- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CheckpointCompactor.java
+++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/CheckpointCompactor.java
@@ -36,11 +36,13 @@ import java.util.Map;
 import java.util.Map.Entry;
 
 import org.apache.jackrabbit.oak.commons.Buffer;
+import org.apache.jackrabbit.oak.plugins.memory.MemoryChildNodeEntry;
 import org.apache.jackrabbit.oak.segment.file.GCNodeWriteMonitor;
 import org.apache.jackrabbit.oak.segment.file.cancel.Canceller;
 import org.apache.jackrabbit.oak.spi.blob.BlobStore;
 import org.apache.jackrabbit.oak.spi.gc.GCMonitor;
 import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
+import org.apache.jackrabbit.oak.spi.state.DefaultNodeStateDiff;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.jetbrains.annotations.NotNull;
@@ -117,7 +119,7 @@ public class CheckpointCompactor implements Compactor {
     ) throws IOException {
         // Collect a chronologically ordered list of roots for the uncompacted
         // state. This list consists of all checkpoints followed by the root.
-        LinkedHashMap<String, NodeState> uncompactedRoots = collectRoots(uncompacted);
+        LinkedHashMap<String, NodeState> uncompactedRoots = collectRoots(base, uncompacted);
 
         // Compact the list of uncompacted roots to a list of compacted roots.
         LinkedHashMap<String, NodeState> compactedRoots = compact(
@@ -183,27 +185,35 @@ public class CheckpointCompactor implements Compactor {
      * the root.
      */
     @NotNull
-    private LinkedHashMap<String, NodeState> collectRoots(@Nullable NodeState superRoot) {
+    private LinkedHashMap<String, NodeState> collectRoots(@NotNull NodeState superRootBefore, @NotNull NodeState superRootAfter) {
         LinkedHashMap<String, NodeState> roots = newLinkedHashMap();
-        if (superRoot != null) {
-            List<ChildNodeEntry> checkpoints = newArrayList(
-                    superRoot.getChildNode("checkpoints").getChildNodeEntries());
 
-            checkpoints.sort((cne1, cne2) -> {
-                long c1 = cne1.getNodeState().getLong("created");
-                long c2 = cne2.getNodeState().getLong("created");
-                return Long.compare(c1, c2);
-            });
+        List<ChildNodeEntry> checkpoints = newArrayList();
+        superRootAfter.getChildNode("checkpoints").compareAgainstBaseState(
+                superRootBefore.getChildNode("checkpoints"), new DefaultNodeStateDiff() {
+                    @Override
+                    public boolean childNodeAdded(String name, NodeState after) {
+                        checkpoints.add(new MemoryChildNodeEntry(name, after));
+                        return true;
+                    }
+                }
+        );
 
-            for (ChildNodeEntry checkpoint : checkpoints) {
-                String name = checkpoint.getName();
-                NodeState node = checkpoint.getNodeState();
-                gcListener.info("found checkpoint {} created at {}.",
-                    name, new Date(node.getLong("created")));
-                roots.put("checkpoints/" + name + "/root", node.getChildNode("root"));
-            }
-            roots.put("root", superRoot.getChildNode("root"));
+        checkpoints.sort((cne1, cne2) -> {
+            long c1 = cne1.getNodeState().getLong("created");
+            long c2 = cne2.getNodeState().getLong("created");
+            return Long.compare(c1, c2);
+        });
+
+        for (ChildNodeEntry checkpoint : checkpoints) {
+            String name = checkpoint.getName();
+            NodeState node = checkpoint.getNodeState();
+            gcListener.info("found checkpoint {} created at {}.",
+                name, new Date(node.getLong("created")));
+            roots.put("checkpoints/" + name + "/root", node.getChildNode("root"));
         }
+        roots.put("root", superRootAfter.getChildNode("root"));
+
         return roots;
     }
 
diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Compact.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Compact.java
index a72360ddb0..62fb9af92f 100644
--- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Compact.java
+++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/tool/Compact.java
@@ -37,6 +37,7 @@ import java.util.concurrent.TimeUnit;
 
 import org.apache.jackrabbit.guava.common.base.Stopwatch;
 import org.apache.jackrabbit.oak.segment.SegmentCache;
+import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.GCType;
 import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.CompactorType;
 import org.apache.jackrabbit.oak.segment.spi.persistence.JournalFile;
 import org.apache.jackrabbit.oak.segment.spi.persistence.JournalFileWriter;
@@ -78,6 +79,8 @@ public class Compact {
 
         private int segmentCacheSize = DEFAULT_SEGMENT_CACHE_MB;
 
+        private GCType gcType = GCType.FULL;
+
         private CompactorType compactorType = CompactorType.PARALLEL_COMPACTOR;
 
         private int concurrency = 1;
@@ -164,6 +167,16 @@ public class Compact {
             return this;
         }
 
+        /**
+         * The garbage collection type used. If not specified it defaults to full compaction
+         * @param gcType the GC type
+         * @return this builder
+         */
+        public Builder withGCType(GCType gcType) {
+            this.gcType = gcType;
+            return this;
+        }
+
         /**
          * The compactor type to be used by compaction. If not specified it defaults to
          * "parallel" compactor
@@ -277,6 +290,8 @@ public class Compact {
 
     private final long gcLogInterval;
 
+    private final GCType gcType;
+
     private final CompactorType compactorType;
 
     private final int concurrency;
@@ -288,6 +303,7 @@ public class Compact {
         this.segmentCacheSize = builder.segmentCacheSize;
         this.strictVersionCheck = !builder.force;
         this.gcLogInterval = builder.gcLogInterval;
+        this.gcType = builder.gcType;
         this.compactorType = builder.compactorType;
         this.concurrency = builder.concurrency;
     }
@@ -303,7 +319,17 @@ public class Compact {
         Stopwatch watch = Stopwatch.createStarted();
 
         try (FileStore store = newFileStore()) {
-            if (!store.compactFull()) {
+            boolean success = false;
+            switch (gcType) {
+                case FULL:
+                    success = store.compactFull();
+                    break;
+                case TAIL:
+                    success = store.compactTail();
+                    break;
+            }
+
+            if (!success) {
                 System.out.printf("Compaction cancelled after %s.\n", printableStopwatch(watch));
                 return 1;
             }