You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by st...@apache.org on 2023/11/06 14:56:05 UTC

(jackrabbit-oak) 01/01: OAK-10526 : set split doc maxRev to 'now' at split time, to avoid it being GCed too early

This is an automated email from the ASF dual-hosted git repository.

stefanegli pushed a commit to branch OAK-10526
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 5be575223d2f60470e795ffb10a78520f76195cb
Author: Stefan Egli <st...@apache.org>
AuthorDate: Mon Nov 6 15:55:45 2023 +0100

    OAK-10526 : set split doc maxRev to 'now' at split time, to avoid it being GCed too early
---
 .../jackrabbit/oak/plugins/document/SplitOperations.java | 16 ++++++++++++++--
 .../oak/plugins/document/VersionGarbageCollectorIT.java  | 15 ++++++++++-----
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/SplitOperations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/SplitOperations.java
index 5e0b8f6fec..2644ee71bb 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/SplitOperations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/SplitOperations.java
@@ -324,7 +324,13 @@ class SplitOperations {
                 for (Range r : entry.getValue()) {
                     setPrevious(intermediate, r);
                 }
-                setIntermediateDocProps(intermediate, h);
+                // OAK-10526 : setting 'maxRev=now()' here guarantees earliest GC of this
+                // split doc will be 'maxAgeMillis' (24h) from now (hence covers all open
+                // JCR sessions) or until any checkpoint created before 'now()' is
+                // released. While this leaves garbage split doc slightly longer than
+                // absolutely necessary, it is a rather simple and robust mechanism.
+                setIntermediateDocProps(intermediate,
+                        Revision.newRevision(context.getClusterId()));
                 splitOps.add(intermediate);
             }
         }
@@ -380,7 +386,13 @@ class SplitOperations {
             // check size of old document
             NodeDocument oldDoc = new NodeDocument(STORE);
             UpdateUtils.applyChanges(oldDoc, old);
-            setSplitDocProps(doc, oldDoc, old, high);
+            // OAK-10526 : setting 'maxRev=now()' here guarantees earliest GC of this
+            // split doc will be 'maxAgeMillis' (24h) from now (hence covers all open
+            // JCR sessions) or until any checkpoint created before 'now()' is
+            // released. While this leaves garbage split doc slightly longer than
+            // absolutely necessary, it is a rather simple and robust mechanism.
+            setSplitDocProps(doc, oldDoc, old,
+                    Revision.newRevision(context.getClusterId()));
             splitOps.add(old);
 
             if (numValues < numRevsThreshold) {
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index 1ecbd73cb7..c40ae68456 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -82,7 +82,6 @@ import org.apache.jackrabbit.oak.stats.Clock;
 import org.jetbrains.annotations.NotNull;
 import org.junit.After;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -295,7 +294,6 @@ public class VersionGarbageCollectorIT {
      * split doc.
      */
     @Test
-    @Ignore(value = "requires fix for OAK-10526")
     public void gcSplitDocsWithReferencedRevisions() throws Exception {
         final String exp;
 
@@ -327,7 +325,14 @@ public class VersionGarbageCollectorIT {
         exp = lastValue;
         store.runBackgroundOperations();
 
-        // step 5 : create a checkpoint at t(+1w)
+        // step 4b : another change to further lastRev for clusterId 1
+        // required to ensure 5sec rounding of mongo variant is also covered
+        clock.waitUntil(clock.getTime() + TimeUnit.SECONDS.toMillis(6));
+        b1 = store.getRoot().builder();
+        b1.child("unrelated").setProperty("unrelated", "unrelated");
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // step 5 : create a checkpoint at t(+1w+6sec)
         String checkpoint = store.checkpoint(TimeUnit.DAYS.toMillis(42));
         assertEquals(exp, store.getRoot().getChildNode("t").getString("foo"));
         assertEquals(exp, store.retrieve(checkpoint).getChildNode("t").getString("foo"));
@@ -348,8 +353,8 @@ public class VersionGarbageCollectorIT {
         // as we'd be in the same rounded second) -> t(+2w:30s)
         clock.waitUntil(clock.getTime() + TimeUnit.SECONDS.toMillis(30));
 
-        // step 9 : trigger another GC - this now splits away the referenced revision
-        assertEquals(1, gc.gc(24, HOURS).splitDocGCCount);
+        // step 9 : trigger another GC - previously split away the referenced revision
+        assertEquals(0, gc.gc(24, HOURS).splitDocGCCount);
         // flush the caches as otherwise it might deliver stale data
         store.getNodeCache().invalidateAll();
         assertEquals("barZ", store.getRoot().getChildNode("t").getString("foo"));