You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by da...@apache.org on 2023/09/05 10:33:11 UTC

[jackrabbit-oak] branch DetailedGC/OAK-10199 updated (af4c4a7a96 -> 577aeac17f)

This is an automated email from the ASF dual-hosted git repository.

daim pushed a change to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


 discard af4c4a7a96 OAK-10199 : removed DetailedGC code changes for RDBDocumentStore
 discard 4508a6894c OAK-10199 : made detailedGC non-configurable on RDBDocumentStore
 discard 81d1da8597 OAK-10199 : incorporated review comments to remove <p/> XHTML tags
 discard 53f22b76c6 OAK-10199 : renamed method name to avoid confusion with other variable of same name
 discard f6eac061f1 OAK-10199 : added unit cases for bundled properties getting garbaged collected as well
 discard 39eb349cb3 OAK-10199 : added bundled properties while calculating the properties retain set as well
 discard 25d01cf1ef OAK-10199 : added review comment to make minor adjustment for naming conventions and fixes
 discard 578b2b0d6f OAK-10199 : refactored RDBVersionGCSupport code
 discard d80d5eae99 OAK-10199 : fixed query to avoid skipping documents with greater _modified timestamp
 discard cbe532b93b OAK-10199 : added logic to skip non garbage documents
 discard b3ae227a7b OAK-10199 : added unit cases to handle concurrent prop update and escaped properties update
 discard 4984eea7d0 OAK-10199 : handled escaped properties while deleting them
 discard c5e853f43b OAK-10199 : added test case to continue with GC even if there is crash
 discard 5c14ac6d8a OAK-10199 : fixed logic to include previously garbage collected documents if updated recently
 discard 7b91bb17c3 OAK-10199 : fixed the VG INIT test
 discard 685f8984c7 OAK-10199 : fixed the VGC IT test cases failures for RDB
 discard 0a83461acb OAK-10199 : fixed the test case with different revision for DetailedGC
 discard 93b38e8bec OAK-10199 : added check to include oldestId when running detailedGc very first time
 discard 8ca58d284c OAK-10199 : added test cases to fetch 5000+ modified docs in loop and verify them
 discard 8c400504e4 OAK-10199 : minor refactoring
 discard fabe3c166d OAK-10199 : updated logic to fetch nodes by sorting them on the basis of _modified & _id
 discard 1cc1a4912c OAK-10199 : fixed code smells as suggested by Sonar
 discard 06f710e299 OAK-10199 : ignore documents which doesn't have _modified field in mongo while fetching modifiedDocs
 discard 9afce915d4 OAK-10199 : used bulk findAndModify api to perform garbage cleanup
 discard a96dfc8557 OAK-10199 : override getModifiedDocs() for RDB and added unit cases for deletedProps
 discard 051205c776 OAK-10199 : provided support for feature toggle & osgi config for detailed gc
 discard a59ae2fb0e OAK-10199 : disable the detailGc in tearDown to avoid side-effects
 discard bb675c548c OAK-10199 : initial sketch of detail gc skeleton
     add ad1a4298f2 OAK-10426: oak-segment-azure: enable baseline check (#1097)
     add 757553face OAK-10425: Ability to remove mixin type without read permission on jcr:mixinTypes property
     add 09aaa8164f Merge pull request #1100 from mreutegg/OAK-10425
     new 2eb2a6cebe OAK-10199 : initial sketch of detail gc skeleton
     new 0c2f2c6bf7 OAK-10199 : disable the detailGc in tearDown to avoid side-effects
     new 25e5b1363e OAK-10199 : provided support for feature toggle & osgi config for detailed gc
     new 49ebde2e8f OAK-10199 : override getModifiedDocs() for RDB and added unit cases for deletedProps
     new d2ed664f94 OAK-10199 : used bulk findAndModify api to perform garbage cleanup
     new 6ed1b0c1ae OAK-10199 : ignore documents which doesn't have _modified field in mongo while fetching modifiedDocs
     new 9930d10e7d OAK-10199 : fixed code smells as suggested by Sonar
     new 5463da0adf OAK-10199 : updated logic to fetch nodes by sorting them on the basis of _modified & _id
     new 3ae56d3c7a OAK-10199 : minor refactoring
     new c9c492cbde OAK-10199 : added test cases to fetch 5000+ modified docs in loop and verify them
     new f6085a9f46 OAK-10199 : added check to include oldestId when running detailedGc very first time
     new 12d0837e08 OAK-10199 : fixed the test case with different revision for DetailedGC
     new c7a29e7358 OAK-10199 : fixed the VGC IT test cases failures for RDB
     new 644698d057 OAK-10199 : fixed the VG INIT test
     new 69b52bd0fa OAK-10199 : fixed logic to include previously garbage collected documents if updated recently
     new 5131702d32 OAK-10199 : added test case to continue with GC even if there is crash
     new 11794a11be OAK-10199 : handled escaped properties while deleting them
     new 4ee104c9b1 OAK-10199 : added unit cases to handle concurrent prop update and escaped properties update
     new 8a8c3482e2 OAK-10199 : added logic to skip non garbage documents
     new a397f06cc2 OAK-10199 : fixed query to avoid skipping documents with greater _modified timestamp
     new e293fd6da7 OAK-10199 : refactored RDBVersionGCSupport code
     new 9fc6d411ac OAK-10199 : added review comment to make minor adjustment for naming conventions and fixes
     new c9abe5e5cf OAK-10199 : added bundled properties while calculating the properties retain set as well
     new 6a3ddfce0c OAK-10199 : added unit cases for bundled properties getting garbaged collected as well
     new f5846e5193 OAK-10199 : renamed method name to avoid confusion with other variable of same name
     new 1c70351c0f OAK-10199 : incorporated review comments to remove <p/> XHTML tags
     new c705535b22 OAK-10199 : made detailedGC non-configurable on RDBDocumentStore
     new 577aeac17f OAK-10199 : removed DetailedGC code changes for RDBDocumentStore

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (af4c4a7a96)
            \
             N -- N -- N   refs/heads/DetailedGC/OAK-10199 (577aeac17f)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 28 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 oak-jcr/pom.xml                                    |  3 +++
 .../security/authorization/ReadPropertyTest.java   | 25 ++++++++++++++++++++++
 oak-segment-azure/pom.xml                          | 16 --------------
 3 files changed, 28 insertions(+), 16 deletions(-)


[jackrabbit-oak] 26/28: OAK-10199 : incorporated review comments to remove

XHTML tags

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 1c70351c0f7377322e676a4ff35dd4bf809a0195
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Mon Aug 7 18:16:00 2023 +0530

    OAK-10199 : incorporated review comments to remove <p/> XHTML tags
---
 .../jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java      | 2 +-
 .../java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java | 4 ++--
 .../jackrabbit/oak/plugins/document/VersionGCRecommendations.java     | 4 ++--
 .../org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java  | 2 +-
 .../jackrabbit/oak/plugins/document/VersionGarbageCollector.java      | 4 ++--
 .../apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java  | 2 +-
 .../jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java      | 2 +-
 7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java
index 3b39d4c3a1..f77aa2c676 100644
--- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java
+++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java
@@ -72,7 +72,7 @@ public class DocumentNodeStoreHelper {
     }
 
     public static VersionGarbageCollector createVersionGC(
-            DocumentNodeStore nodeStore, VersionGCSupport gcSupport, final boolean detailedGCEnabled) {
+            DocumentNodeStore nodeStore, VersionGCSupport gcSupport, boolean detailedGCEnabled) {
         return new VersionGarbageCollector(nodeStore, gcSupport, detailedGCEnabled);
     }
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
index 38673aa7e7..ce1fe241c7 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
@@ -1673,8 +1673,8 @@ public final class NodeDocument extends Document {
     /**
      * Returns name of all the properties on this document
      * <p>
-     *  Note: property names returned are escaped
-     * <p/>
+     * Note: property names returned are escaped
+     *
      * @return Set of all property names (escaped)
      * @see Utils#unescapePropertyName(String)
      * @see Utils#escapePropertyName(String)
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index 05ebaa44a9..6e2b9eaf1d 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -92,7 +92,7 @@ public class VersionGCRecommendations {
      * @param detailedGCEnabled whether detailedGC is enabled or not
      */
     public VersionGCRecommendations(long maxRevisionAgeMs, Checkpoints checkpoints, Clock clock, VersionGCSupport vgc,
-                                    VersionGCOptions options, GCMonitor gcMonitor, final boolean detailedGCEnabled) {
+                                    VersionGCOptions options, GCMonitor gcMonitor, boolean detailedGCEnabled) {
         boolean ignoreDueToCheckPoint = false;
         boolean ignoreDetailedGCDueToCheckPoint = false;
         long deletedOnceCount = 0;
@@ -306,7 +306,7 @@ public class VersionGCRecommendations {
                 ignoreGC = true;
             } else {
                 gcScope = gcScope.notLaterThan(checkpoint.getTimestamp() - 1);
-                log.info("checkpoint at [{}] found, detailedGCScope now {}", timestampToString(checkpoint.getTimestamp()), gcScope);
+                log.debug("checkpoint at [{}] found, detailedGCScope now {}", timestampToString(checkpoint.getTimestamp()), gcScope);
             }
         }
         return new GCResult(ignoreGC, gcScope);
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index ab7789f920..df505adf8a 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -83,7 +83,7 @@ public class VersionGCSupport {
      * since the epoch and the implementation will convert them to seconds at
      * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
      * then perform the comparison.
-     * <p/>
+     *
      *
      * @param fromModified the lower bound modified timestamp (inclusive)
      * @param toModified   the upper bound modified timestamp (exclusive)
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index c65ea1dc01..8f2c977274 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -619,9 +619,9 @@ public class VersionGarbageCollector {
          * et al: essentially garbage that in earlier versions of Oak were ignored. This
          * includes: deleted properties, revision information within documents, branch
          * commit related garbage.
-         * <p/>
+         * <p>
          * TODO: limit this to run only on a singleton instance, eg the cluster leader
-         * <p/>
+         * <p>
          * The "detailed garbage" collector can be instructed to do a full repository scan
          * - or incrementally based on where it last left off. When doing a full
          * repository scan (but not limited to that), it executes in (small) batches
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
index 3a9ef2d95d..25a3aca5e7 100755
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
@@ -1854,7 +1854,7 @@ public class RDBDocumentStore implements DocumentStore {
         }
 
         if (sortBy != null && !sortBy.isEmpty()) {
-            for (String key: sortBy) {
+            for (String key : sortBy) {
                 if (!allowedProps.contains(key)) {
                     final String message = "indexed property " + key + " not supported. supported properties are " + allowedProps;
                     LOG.error(message);
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index 5082422109..27c582311b 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -114,7 +114,7 @@ public class RDBVersionGCSupport extends VersionGCSupport {
      * since the epoch and the implementation will convert them to seconds at
      * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
      * then perform the comparison.
-     * <p/>
+     *
      *
      * @param fromModified the lower bound modified timestamp (inclusive)
      * @param toModified   the upper bound modified timestamp (exclusive)


[jackrabbit-oak] 27/28: OAK-10199 : made detailedGC non-configurable on RDBDocumentStore

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit c705535b22be32795d6be58ba6fd80ba33e1c863
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Mon Aug 7 19:26:27 2023 +0530

    OAK-10199 : made detailedGC non-configurable on RDBDocumentStore
---
 .../document/rdb/RDBDocumentNodeStoreBuilder.java  | 25 ++++++++++++++++++++++
 .../rdb/RDBDocumentNodeStoreBuilderTest.java       | 21 ++++++++++++++++++
 .../oak/plugins/document/util/UtilsTest.java       | 12 +++++++++++
 3 files changed, 58 insertions(+)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java
index f997e4bf9a..92aa40af86 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilder.java
@@ -26,6 +26,8 @@ import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder;
 import org.apache.jackrabbit.oak.plugins.document.DocumentStore;
 import org.apache.jackrabbit.oak.plugins.document.MissingLastRevSeeker;
 import org.apache.jackrabbit.oak.plugins.document.VersionGCSupport;
+import org.apache.jackrabbit.oak.spi.toggle.Feature;
+import org.jetbrains.annotations.Nullable;
 
 /**
  * A builder for a {@link DocumentNodeStore} backed by a relational database.
@@ -113,4 +115,27 @@ public class RDBDocumentNodeStoreBuilder
             return super.createMissingLastRevSeeker();
         }
     }
+
+    @Override
+    public boolean isDetailedGCEnabled() {
+        // detailedGC is non supported for RDB
+        return false;
+    }
+
+    @Override
+    public RDBDocumentNodeStoreBuilder setDetailedGCEnabled(boolean b) {
+        // detailedGC is non supported for RDB
+        return thisBuilder();
+    }
+
+    @Override
+    public RDBDocumentNodeStoreBuilder setDocStoreDetailedGCFeature(@Nullable Feature docStoreDetailedGC) {
+        return thisBuilder();
+    }
+
+    @Override
+    @Nullable
+    public Feature getDocStoreDetailedGCFeature() {
+        return null;
+    }
 }
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java
index e3a3939d19..d821b1f785 100755
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentNodeStoreBuilderTest.java
@@ -16,13 +16,18 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.fail;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
 import java.util.UUID;
 
 import javax.sql.DataSource;
 
 import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
+import org.apache.jackrabbit.oak.spi.toggle.Feature;
 import org.junit.Test;
 
 public class RDBDocumentNodeStoreBuilderTest {
@@ -54,4 +59,20 @@ public class RDBDocumentNodeStoreBuilderTest {
         } catch (DocumentStoreException expected) {
         }
     }
+
+    @Test
+    public void detailedGCDisabled() {
+        RDBDocumentNodeStoreBuilder builder = new RDBDocumentNodeStoreBuilder();
+        builder.setDetailedGCEnabled(true);
+        assertFalse(builder.isDetailedGCEnabled());
+    }
+
+    @Test
+    public void detailedGCFeatureToggleDisabled() {
+        RDBDocumentNodeStoreBuilder builder = new RDBDocumentNodeStoreBuilder();
+        Feature docStoreDetailedGCFeature = mock(Feature.class);
+        when(docStoreDetailedGCFeature.isEnabled()).thenReturn(true);
+        builder.setDocStoreDetailedGCFeature(docStoreDetailedGCFeature);
+        assertNull(builder.getDocStoreDetailedGCFeature());
+    }
 }
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
index 6041a41724..cddaf79a61 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
@@ -56,6 +56,7 @@ import org.mockito.Mockito;
 import org.slf4j.event.Level;
 
 import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder.newDocumentNodeStoreBuilder;
+import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentNodeStoreBuilder.newRDBDocumentNodeStoreBuilder;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.isDetailedGCEnabled;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.isThrottlingEnabled;
 import static org.hamcrest.CoreMatchers.containsString;
@@ -223,6 +224,17 @@ public class UtilsTest {
         assertTrue("Detailed GC is enabled via Feature Toggle", detailedGCEnabled);
     }
 
+    @Test
+    public void detailedGCDisabledForRDB() {
+        DocumentNodeStoreBuilder<?> builder = newRDBDocumentNodeStoreBuilder();
+        builder.setDetailedGCEnabled(true);
+        Feature docStoreDetailedGCFeature = mock(Feature.class);
+        when(docStoreDetailedGCFeature.isEnabled()).thenReturn(true);
+        builder.setDocStoreDetailedGCFeature(docStoreDetailedGCFeature);
+        boolean detailedGCEnabled = isDetailedGCEnabled(builder);
+        assertFalse("Detailed GC is disabled for RDB Document Store", detailedGCEnabled);
+    }
+
     @Test
     public void getDepthFromId() throws Exception{
         assertEquals(1, Utils.getDepthFromId("1:/x"));


[jackrabbit-oak] 22/28: OAK-10199 : added review comment to make minor adjustment for naming conventions and fixes

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 9fc6d411ac90aca0983a37baad20b45eabcd10c1
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Fri Jul 28 17:14:15 2023 +0530

    OAK-10199 : added review comment to make minor adjustment for naming conventions and fixes
---
 .../oak/plugins/document/NodeDocument.java         |  7 +-
 .../plugins/document/VersionGCRecommendations.java | 17 ++---
 .../oak/plugins/document/VersionGCSupport.java     | 11 ++-
 .../plugins/document/VersionGarbageCollector.java  | 80 ++++++++++++----------
 .../document/mongo/MongoVersionGCSupport.java      |  7 +-
 .../plugins/document/rdb/RDBVersionGCSupport.java  |  4 +-
 .../document/VersionGarbageCollectorIT.java        | 10 +--
 7 files changed, 73 insertions(+), 63 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
index 66a1bc2eae..38673aa7e7 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
@@ -1672,7 +1672,12 @@ public final class NodeDocument extends Document {
 
     /**
      * Returns name of all the properties on this document
-     * @return Set of all property names
+     * <p>
+     *  Note: property names returned are escaped
+     * <p/>
+     * @return Set of all property names (escaped)
+     * @see Utils#unescapePropertyName(String)
+     * @see Utils#escapePropertyName(String)
      */
     @NotNull
     Set<String> getPropertyNames() {
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index 2092844299..05ebaa44a9 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -54,7 +54,7 @@ public class VersionGCRecommendations {
     private final GCMonitor gcmon;
 
     final boolean ignoreDueToCheckPoint;
-    final boolean ignoreDetailGCDueToCheckPoint;
+    final boolean ignoreDetailedGCDueToCheckPoint;
     final TimeInterval scope;
     final TimeInterval scopeDetailedGC;
     final long maxCollect;
@@ -94,7 +94,7 @@ public class VersionGCRecommendations {
     public VersionGCRecommendations(long maxRevisionAgeMs, Checkpoints checkpoints, Clock clock, VersionGCSupport vgc,
                                     VersionGCOptions options, GCMonitor gcMonitor, final boolean detailedGCEnabled) {
         boolean ignoreDueToCheckPoint = false;
-        boolean ignoreDetailGCDueToCheckPoint = false;
+        boolean ignoreDetailedGCDueToCheckPoint = false;
         long deletedOnceCount = 0;
         long suggestedIntervalMs;
         long oldestPossible;
@@ -176,13 +176,13 @@ public class VersionGCRecommendations {
         //Check for any registered checkpoint which prevent the GC from running
         Revision checkpoint = checkpoints.getOldestRevisionToKeep();
 
-        final GCResult gcResult = getResult(options, ignoreDueToCheckPoint, scope, checkpoint);
+        final GCResult gcResult = getResult(options, checkpoint, scope);
         scope = gcResult.gcScope;
         ignoreDueToCheckPoint = gcResult.ignoreGC;
 
-        final GCResult detailGCResult = getResult(options, ignoreDetailGCDueToCheckPoint, scopeDetailedGC, checkpoint);
+        final GCResult detailGCResult = getResult(options, checkpoint, scopeDetailedGC);
         scopeDetailedGC = detailGCResult.gcScope;
-        ignoreDetailGCDueToCheckPoint = detailGCResult.ignoreGC;
+        ignoreDetailedGCDueToCheckPoint = detailGCResult.ignoreGC;
 
         if (scope.getDurationMs() <= options.precisionMs) {
             // If we have narrowed the collect time interval down as much as we can, no
@@ -194,7 +194,7 @@ public class VersionGCRecommendations {
         this.precisionMs = options.precisionMs;
         this.ignoreDueToCheckPoint = ignoreDueToCheckPoint;
         this.scope = scope;
-        this.ignoreDetailGCDueToCheckPoint = ignoreDetailGCDueToCheckPoint;
+        this.ignoreDetailedGCDueToCheckPoint = ignoreDetailedGCDueToCheckPoint;
         this.scopeDetailedGC = scopeDetailedGC;
         this.detailedGCId = oldestModifiedDocId;
         this.scopeIsComplete = scope.toMs >= keep.fromMs;
@@ -251,7 +251,7 @@ public class VersionGCRecommendations {
         }
 
         // save data for detailed GC
-        if (detailedGCEnabled && !stats.canceled && !stats.ignoredDetailGCDueToCheckPoint) {
+        if (detailedGCEnabled && !stats.canceled && !stats.ignoredDetailedGCDueToCheckPoint) {
             // success, we would not expect to encounter revisions older than this in the future
             setLongSetting(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, stats.oldestModifiedDocTimeStamp);
             setStringSetting(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP, stats.oldestModifiedDocId);
@@ -297,7 +297,8 @@ public class VersionGCRecommendations {
     }
 
     @NotNull
-    private static GCResult getResult(VersionGCOptions options, boolean ignoreGC, TimeInterval gcScope, Revision checkpoint) {
+    private static GCResult getResult(final VersionGCOptions options, final Revision checkpoint, TimeInterval gcScope) {
+        boolean ignoreGC = false;
         if (checkpoint != null && gcScope.endsAfter(checkpoint.getTimestamp())) {
             TimeInterval minimalScope = gcScope.startAndDuration(options.precisionMs);
             if (minimalScope.endsAfter(checkpoint.getTimestamp())) {
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index 1e19eb6af7..ab7789f920 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -209,20 +209,19 @@ public class VersionGCSupport {
      * @return the oldest modified document.
      */
     public Optional<NodeDocument> getOldestModifiedDoc(final Clock clock) {
-        long ts = 0;
         long now = clock.getTime();
         Iterable<NodeDocument> docs = null;
-
-        LOG.info("find oldest modified document");
         try {
-            docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE);
+            docs = getModifiedDocs(0, now, 1, MIN_ID_VALUE);
             if (docs.iterator().hasNext()) {
-                return ofNullable(docs.iterator().next());
+                final NodeDocument oldestModifiedDoc = docs.iterator().next();
+                LOG.info("Oldest modified document is {}", oldestModifiedDoc);
+                return ofNullable(oldestModifiedDoc);
             }
         } finally {
             Utils.closeIfCloseable(docs);
         }
-        LOG.info("find oldest modified document to be {}", Utils.timestampToString(ts));
+        LOG.info("No Modified Doc has been found, retuning empty");
         return empty();
     }
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 766c8e4cc5..f2a334b75b 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -66,6 +66,7 @@ import static java.util.concurrent.TimeUnit.SECONDS;
 import static java.util.stream.Collectors.joining;
 import static java.util.stream.Collectors.toSet;
 import static org.apache.jackrabbit.guava.common.base.StandardSystemProperty.LINE_SEPARATOR;
+import static org.apache.jackrabbit.guava.common.base.Stopwatch.createUnstarted;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.all;
 import static org.apache.jackrabbit.guava.common.collect.Iterators.partition;
 import static org.apache.jackrabbit.guava.common.util.concurrent.Atomics.newReference;
@@ -85,6 +86,7 @@ public class VersionGarbageCollector {
     private static final int DELETE_BATCH_SIZE = 450;
     private static final int UPDATE_BATCH_SIZE = 450;
     private static final int PROGRESS_BATCH_SIZE = 10000;
+    private static final int DETAILED_GC_BATCH_SIZE = 1000;
     private static final String STATUS_IDLE = "IDLE";
     private static final String STATUS_INITIALIZING = "INITIALIZING";
     private static final Logger log = LoggerFactory.getLogger(VersionGarbageCollector.class);
@@ -270,7 +272,7 @@ public class VersionGarbageCollector {
 
     public static class VersionGCStats {
         boolean ignoredGCDueToCheckPoint;
-        boolean ignoredDetailGCDueToCheckPoint;
+        boolean ignoredDetailedGCDueToCheckPoint;
         boolean canceled;
         boolean success = true;
         boolean limitExceeded;
@@ -348,7 +350,7 @@ public class VersionGarbageCollector {
 
             return "VersionGCStats{" +
                     "ignoredGCDueToCheckPoint=" + ignoredGCDueToCheckPoint +
-                    "ignoredDetailGCDueToCheckPoint=" + ignoredDetailGCDueToCheckPoint +
+                    "ignoredDetailedGCDueToCheckPoint=" + ignoredDetailedGCDueToCheckPoint +
                     ", canceled=" + canceled +
                     ", deletedDocGCCount=" + deletedDocGCCount + " (of which leaf: " + deletedLeafDocGCCount + ")" +
                     ", updateResurrectedGCCount=" + updateResurrectedGCCount +
@@ -366,7 +368,7 @@ public class VersionGarbageCollector {
         void addRun(VersionGCStats run) {
             ++iterationCount;
             this.ignoredGCDueToCheckPoint = run.ignoredGCDueToCheckPoint;
-            this.ignoredDetailGCDueToCheckPoint = run.ignoredDetailGCDueToCheckPoint;
+            this.ignoredDetailedGCDueToCheckPoint = run.ignoredDetailedGCDueToCheckPoint;
             this.canceled = run.canceled;
             this.success = run.success;
             this.limitExceeded = run.limitExceeded;
@@ -422,9 +424,9 @@ public class VersionGarbageCollector {
         SORTING,
         SPLITS_CLEANUP,
         DETAILED_GC,
-        COLLECT_PROPS,
-        COLLECT_OLD_REVS,
-        COLLECT_UNMERGED_BC,
+        DETAILED_GC_COLLECT_PROPS,
+        DETAILED_GC_COLLECT_OLD_REVS,
+        DETAILED_GC_COLLECT_UNMERGED_BC,
         DETAILED_GC_CLEANUP,
         UPDATING
     }
@@ -455,9 +457,9 @@ public class VersionGarbageCollector {
             this.watches.put(GCPhase.SPLITS_CLEANUP, stats.collectAndDeleteSplitDocs);
             this.watches.put(GCPhase.UPDATING, stats.updateResurrectedDocuments);
             this.watches.put(GCPhase.DETAILED_GC, stats.detailedGCDocs);
-            this.watches.put(GCPhase.COLLECT_PROPS, stats.collectDeletedProps);
-            this.watches.put(GCPhase.COLLECT_OLD_REVS, stats.collectDeletedOldRevs);
-            this.watches.put(GCPhase.COLLECT_UNMERGED_BC, stats.collectUnmergedBC);
+            this.watches.put(GCPhase.DETAILED_GC_COLLECT_PROPS, stats.collectDeletedProps);
+            this.watches.put(GCPhase.DETAILED_GC_COLLECT_OLD_REVS, stats.collectDeletedOldRevs);
+            this.watches.put(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC, stats.collectUnmergedBC);
             this.watches.put(GCPhase.DETAILED_GC_CLEANUP, stats.deleteDetailedGCDocs);
             this.canceled = canceled;
         }
@@ -584,8 +586,8 @@ public class VersionGarbageCollector {
 
                 // now run detailed GC if enabled
                 if (detailedGCEnabled) {
-                    if (rec.ignoreDetailGCDueToCheckPoint) {
-                        phases.stats.ignoredDetailGCDueToCheckPoint = true;
+                    if (rec.ignoreDetailedGCDueToCheckPoint) {
+                        phases.stats.ignoredDetailedGCDueToCheckPoint = true;
                         monitor.skipped("Checkpoint prevented detailed revision garbage collection");
                     } else {
                         final RevisionVector headRevision = nodeStore.getHeadRevision();
@@ -594,9 +596,7 @@ public class VersionGarbageCollector {
                     }
                 }
 
-                if (detailedGCEnabled && rec.ignoreDueToCheckPoint && rec.ignoreDetailGCDueToCheckPoint) {
-                    cancel.set(true);
-                } else if (!detailedGCEnabled && rec.ignoreDueToCheckPoint) {
+                if ((detailedGCEnabled && rec.ignoreDetailedGCDueToCheckPoint) || rec.ignoreDueToCheckPoint) {
                     cancel.set(true);
                 }
 
@@ -615,14 +615,14 @@ public class VersionGarbageCollector {
         }
 
         /**
-         * "Detail garbage" refers to additional garbage identified as part of OAK-10199
+         * "Detailed garbage" refers to additional garbage identified as part of OAK-10199
          * et al: essentially garbage that in earlier versions of Oak were ignored. This
          * includes: deleted properties, revision information within documents, branch
          * commit related garbage.
          * <p/>
          * TODO: limit this to run only on a singleton instance, eg the cluster leader
          * <p/>
-         * The "detail garbage" collector can be instructed to do a full repository scan
+         * The "detailed garbage" collector can be instructed to do a full repository scan
          * - or incrementally based on where it last left off. When doing a full
          * repository scan (but not limited to that), it executes in (small) batches
          * followed by voluntary paused (aka throttling) to avoid excessive load on the
@@ -635,22 +635,25 @@ public class VersionGarbageCollector {
          */
         private void collectDetailedGarbage(final GCPhases phases, final RevisionVector headRevision, final VersionGCRecommendations rec)
                 throws IOException {
+
+            final long oldestModifiedMs = rec.scopeDetailedGC.fromMs;
+            final long toModifiedMs = rec.scopeDetailedGC.toMs;
+            final String oldestModifiedDocId = rec.detailedGCId;
+
             int docsTraversed = 0;
             boolean foundDoc = true;
-            final long oldestModifiedMs = rec.scopeDetailedGC.fromMs;
-            final long toModified = rec.scopeDetailedGC.toMs;
             long oldModifiedMs = oldestModifiedMs;
-            final String oldestModifiedDocId = rec.detailedGCId;
+
             try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) {
                 long fromModified = oldestModifiedMs;
                 String fromId = ofNullable(oldestModifiedDocId).orElse(MIN_ID_VALUE);
                 NodeDocument lastDoc;
                 if (phases.start(GCPhase.DETAILED_GC)) {
-                    while (foundDoc && fromModified < toModified && docsTraversed < PROGRESS_BATCH_SIZE) {
+                    while (foundDoc && fromModified < toModifiedMs && docsTraversed < PROGRESS_BATCH_SIZE) {
                         // set foundDoc to false to allow exiting the while loop
                         foundDoc = false;
                         lastDoc = null;
-                        Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModified, toModified, 1000, fromId);
+                        Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModified, toModifiedMs, DETAILED_GC_BATCH_SIZE, fromId);
                         try {
                             for (NodeDocument doc : itr) {
                                 foundDoc = true;
@@ -662,7 +665,7 @@ public class VersionGarbageCollector {
                                 }
                                 docsTraversed++;
                                 if (docsTraversed % 100 == 0) {
-                                    monitor.info("Iterated through {} documents so far. {} had detail garbage",
+                                    monitor.info("Iterated through {} documents so far. {} had detailed garbage",
                                             docsTraversed, gc.getGarbageDocsCount());
                                 }
 
@@ -675,12 +678,12 @@ public class VersionGarbageCollector {
 
                                 final Long modified = lastDoc.getModified();
                                 if (modified == null) {
-                                    monitor.warn("collectDetailGarbage : document has no _modified property : {}",
+                                    monitor.warn("collectDetailedGarbage : document has no _modified property : {}",
                                             doc.getId());
-                                } else if (SECONDS.toMillis(modified) < oldestModifiedMs) {
+                                } else if (SECONDS.toMillis(modified) < fromModified) {
                                     monitor.warn(
-                                            "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
-                                            modified, fromModified, toModified);
+                                            "collectDetailedGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
+                                            modified, fromModified, toModifiedMs);
                                 }
                             }
                             // now remove the garbage in one go, if any
@@ -721,7 +724,7 @@ public class VersionGarbageCollector {
                 if (docsTraversed < PROGRESS_BATCH_SIZE) {
                     // we have traversed all the docs within given time range and nothing is left
                     // lets set oldModifiedDocTimeStamp to upper limit of this cycle
-                    phases.stats.oldestModifiedDocTimeStamp = toModified;
+                    phases.stats.oldestModifiedDocTimeStamp = toModifiedMs;
                     phases.stats.oldestModifiedDocId = MIN_ID_VALUE;
                 }
             }
@@ -826,6 +829,7 @@ public class VersionGarbageCollector {
 
         private final Map<String, Integer> deletedPropsCountMap;
         private int garbageDocsCount;
+        private int totalGarbageDocsCount;
 
         public DetailedGC(@NotNull RevisionVector headRevision, @NotNull GCMonitor monitor, @NotNull AtomicBoolean cancel) {
             this.headRevision = requireNonNull(headRevision);
@@ -833,7 +837,7 @@ public class VersionGarbageCollector {
             this.cancel = cancel;
             this.updateOpList = new ArrayList<>();
             this.deletedPropsCountMap = new HashMap<>();
-            this.timer = Stopwatch.createUnstarted();
+            this.timer = createUnstarted();
         }
 
         public void collectGarbage(final NodeDocument doc, final GCPhases phases) {
@@ -849,6 +853,7 @@ public class VersionGarbageCollector {
             // only add if there are changes for this doc
             if (op.hasChanges()) {
                 garbageDocsCount++;
+                totalGarbageDocsCount++;
                 monitor.info("Collected [{}] garbage for doc [{}]", op.getChanges().size(), doc.getId());
                 updateOpList.add(op);
             }
@@ -859,9 +864,9 @@ public class VersionGarbageCollector {
         }
 
         private void collectUnmergedBranchCommitDocument(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) {
-            if (phases.start(GCPhase.COLLECT_UNMERGED_BC)){
+            if (phases.start(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC)){
                 // TODO add umerged BC collection logic
-                phases.stop(GCPhase.COLLECT_UNMERGED_BC);
+                phases.stop(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC);
             }
 
         }
@@ -869,7 +874,7 @@ public class VersionGarbageCollector {
         private void collectDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) {
 
             // get Map of all properties along with their values
-            if (phases.start(GCPhase.COLLECT_PROPS)) {
+            if (phases.start(GCPhase.DETAILED_GC_COLLECT_PROPS)) {
                 final Set<String> properties = doc.getPropertyNames();
 
                 // find all the properties which can be removed from document.
@@ -893,26 +898,26 @@ public class VersionGarbageCollector {
                 if (log.isDebugEnabled()) {
                     log.debug("Collected {} deleted properties for document {}", deletedPropsGCCount, doc.getId());
                 }
-                phases.stop(GCPhase.COLLECT_PROPS);
+                phases.stop(GCPhase.DETAILED_GC_COLLECT_PROPS);
             }
         }
 
         private void collectOldRevisions(NodeDocument doc, GCPhases phases, UpdateOp updateOp) {
 
-            if (phases.start(GCPhase.COLLECT_OLD_REVS)){
+            if (phases.start(GCPhase.DETAILED_GC_COLLECT_OLD_REVS)){
                 // TODO add old rev collection logic
-                phases.stop(GCPhase.COLLECT_OLD_REVS);
+                phases.stop(GCPhase.DETAILED_GC_COLLECT_OLD_REVS);
             }
 
         }
 
         int getGarbageDocsCount() {
-            return garbageDocsCount;
+            return totalGarbageDocsCount;
         }
 
         @Override
-        public void close() throws IOException {
-
+        public void close() {
+            totalGarbageDocsCount = 0;
         }
 
         public void removeGarbage(final VersionGCStats stats) {
@@ -948,6 +953,7 @@ public class VersionGarbageCollector {
                 log.info("Updated [{}] documents, deleted [{}] properties", updatedDocs, deletedProps);
                 // now reset delete metadata
                 updateOpList.clear();
+                deletedPropsCountMap.clear();
                 garbageDocsCount = 0;
             } finally {
                 delayOnModifications(timer.stop().elapsed(MILLISECONDS), cancel);
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index 6637afa4aa..1f6d4bf5f5 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -141,8 +141,8 @@ public class MongoVersionGCSupport extends VersionGCSupport {
      * then perform the comparison.
      * <p/>
      *
-     * @param fromModified the lower bound modified timestamp (inclusive)
-     * @param toModified   the upper bound modified timestamp (exclusive)
+     * @param fromModified the lower bound modified timestamp in millis (inclusive)
+     * @param toModified   the upper bound modified timestamp in millis (exclusive)
      * @param limit        the limit of documents to return
      * @param fromId       the lower bound {@link NodeDocument#ID}
      * @return matching documents.
@@ -246,8 +246,6 @@ public class MongoVersionGCSupport extends VersionGCSupport {
      */
     @Override
     public Optional<NodeDocument> getOldestModifiedDoc(final Clock clock) {
-        LOG.info("getOldestModifiedDoc() <- start");
-
         final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1));
 
         // we need to add query condition to ignore `previous` documents which doesn't have this field
@@ -260,6 +258,7 @@ public class MongoVersionGCSupport extends VersionGCSupport {
         } catch (Exception ex) {
             LOG.error("getOldestModifiedDoc() <- error while fetching data from Mongo", ex);
         }
+        LOG.info("No Modified Doc has been found, retuning empty");
         return empty();
     }
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index 5e66bd974d..5082422109 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -330,16 +330,16 @@ public class RDBVersionGCSupport extends VersionGCSupport {
     @Override
     public Optional<NodeDocument> getOldestModifiedDoc(Clock clock) {
 
-        LOG.info("getOldestModifiedDoc() <- start");
         Iterable<NodeDocument> modifiedDocs = null;
         try {
             modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE);
             return modifiedDocs.iterator().hasNext() ? ofNullable(modifiedDocs.iterator().next()) : empty();
         } catch (DocumentStoreException ex) {
-            LOG.error("getOldestModifiedDoc()", ex);
+            LOG.error("getOldestModifiedDoc() <- Error ", ex);
         } finally {
             closeIfCloseable(modifiedDocs);
         }
+        LOG.info("No Modified Doc has been found, retuning empty");
         return empty();
     }
 
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index df785878b3..4e5360e2c3 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -172,14 +172,14 @@ public class VersionGarbageCollectorIT {
         clock.waitUntil(cp.getTimestamp() + expiryTime - maxAge);
         VersionGCStats stats = gc.gc(maxAge, TimeUnit.MILLISECONDS);
         assertTrue(stats.ignoredGCDueToCheckPoint);
-        assertFalse(stats.ignoredDetailGCDueToCheckPoint);
+        assertFalse(stats.ignoredDetailedGCDueToCheckPoint);
         assertTrue(stats.canceled);
 
         //Fast forward time to future such that checkpoint get expired
         clock.waitUntil(clock.getTime() + expiryTime + 1);
         stats = gc.gc(maxAge, TimeUnit.MILLISECONDS);
         assertFalse("GC should be performed", stats.ignoredGCDueToCheckPoint);
-        assertFalse("Detailed GC shouldn't be performed", stats.ignoredDetailGCDueToCheckPoint);
+        assertFalse("Detailed GC shouldn't be performed", stats.ignoredDetailedGCDueToCheckPoint);
         assertFalse(stats.canceled);
     }
 
@@ -258,13 +258,13 @@ public class VersionGarbageCollectorIT {
         //Fast forward time to future but before expiry of checkpoint
         clock.waitUntil(cp.getTimestamp() + expiryTime - maxAge);
         VersionGCStats stats = gc.gc(maxAge, TimeUnit.MILLISECONDS);
-        assertTrue(stats.ignoredDetailGCDueToCheckPoint);
+        assertTrue(stats.ignoredDetailedGCDueToCheckPoint);
         assertTrue(stats.canceled);
 
         //Fast forward time to future such that checkpoint get expired
         clock.waitUntil(clock.getTime() + expiryTime + 1);
         stats = gc.gc(maxAge, TimeUnit.MILLISECONDS);
-        assertFalse("Detailed GC should be performed", stats.ignoredDetailGCDueToCheckPoint);
+        assertFalse("Detailed GC should be performed", stats.ignoredDetailedGCDueToCheckPoint);
         assertFalse(stats.canceled);
     }
 
@@ -315,7 +315,7 @@ public class VersionGarbageCollectorIT {
         assertEquals(1, stats.deletedPropsGCCount);
         assertEquals(1, stats.updatedDetailedGCDocsCount);
         assertTrue(stats.ignoredGCDueToCheckPoint);
-        assertFalse(stats.ignoredDetailGCDueToCheckPoint);
+        assertFalse(stats.ignoredDetailedGCDueToCheckPoint);
         assertFalse(stats.canceled);
     }
 


[jackrabbit-oak] 11/28: OAK-10199 : added check to include oldestId when running detailedGc very first time

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit f6085a9f4677ae99d8a5001d3c6b3d1794e7cda8
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Mon Jun 19 23:04:33 2023 +0530

    OAK-10199 : added check to include oldestId when running detailedGc very first time
---
 .../oak/plugins/document/VersionGCSupport.java     | 20 +++++---
 .../plugins/document/VersionGarbageCollector.java  |  5 +-
 .../document/mongo/MongoVersionGCSupport.java      | 18 ++++---
 .../plugins/document/rdb/RDBVersionGCSupport.java  | 22 +++++----
 .../oak/plugins/document/VersionGCSupportTest.java | 39 ++++++++++++---
 .../document/VersionGarbageCollectorIT.java        | 57 ++++++++++++++++++++++
 6 files changed, 128 insertions(+), 33 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index db54553061..96fa2bbaea 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -74,21 +74,25 @@ public class VersionGCSupport {
 
     /**
      * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
-     * within the given range .The two passed modified timestamps are in milliseconds
+     * within the given range and are greater than given @{@link NodeDocument#ID}.
+     * <p>
+     * The two passed modified timestamps are in milliseconds
      * since the epoch and the implementation will convert them to seconds at
      * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
      * then perform the comparison.
+     * <p/>
      *
-     * @param fromModified the lower bound modified timestamp (inclusive)
-     * @param toModified the upper bound modified timestamp (exclusive)
-     * @param limit the limit of documents to return
-     * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
+     * @param fromModified  the lower bound modified timestamp (inclusive)
+     * @param toModified    the upper bound modified timestamp (exclusive)
+     * @param limit         the limit of documents to return
+     * @param fromId        the lower bound {@link NodeDocument#ID}
+     * @param includeFromId boolean indicating whether {@code fromId} is inclusive or not
      * @return matching documents.
      */
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
-                                                  @NotNull final String fromId) {
+                                                  @NotNull final String fromId, boolean includeFromId) {
         return StreamSupport
-                .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, fromId).spliterator(), false)
+                .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, includeFromId ? "\0"+fromId : fromId).spliterator(), false)
                 .filter(input -> modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified))
                 .sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))
                 .limit(limit)
@@ -193,7 +197,7 @@ public class VersionGCSupport {
 
         LOG.info("find oldest modified document");
         try {
-            docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE);
+            docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE, false);
             if (docs.iterator().hasNext()) {
                 return docs.iterator().next();
             }
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index f54299e3fd..0c7da0d4fa 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -615,6 +615,7 @@ public class VersionGarbageCollector {
                 throws IOException {
             int docsTraversed = 0;
             boolean foundDoc = true;
+            boolean includeFromId = true;
             long oldestModifiedDocTimeStamp = rec.scopeDetailedGC.fromMs;
             String oldestModifiedDocId = rec.detailedGCId;
             try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) {
@@ -624,7 +625,9 @@ public class VersionGarbageCollector {
                     while (foundDoc && oldestModifiedDocTimeStamp < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) {
                         // set foundDoc to false to allow exiting the while loop
                         foundDoc = false;
-                        Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedDocTimeStamp, toModified, 1000, oldestModifiedDocId);
+                        Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedDocTimeStamp, toModified, 1000, oldestModifiedDocId, includeFromId);
+                        // set includeFromId to false for subsequent queries
+                        includeFromId = false;
                         try {
                             for (NodeDocument doc : itr) {
                                 foundDoc = true;
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index 9896857e36..ca9a8a955b 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -131,23 +131,27 @@ public class MongoVersionGCSupport extends VersionGCSupport {
 
     /**
      * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
-     * within the given range .The two passed modified timestamps are in milliseconds
+     * within the given range and are greater than given @{@link NodeDocument#ID}.
+     * <p>
+     * The two passed modified timestamps are in milliseconds
      * since the epoch and the implementation will convert them to seconds at
      * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
      * then perform the comparison.
+     * <p/>
      *
-     * @param fromModified the lower bound modified timestamp (inclusive)
-     * @param toModified the upper bound modified timestamp (exclusive)
-     * @param limit the limit of documents to return
-     * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
+     * @param fromModified  the lower bound modified timestamp (inclusive)
+     * @param toModified    the upper bound modified timestamp (exclusive)
+     * @param limit         the limit of documents to return
+     * @param fromId        the lower bound {@link NodeDocument#ID}
+     * @param includeFromId boolean indicating whether {@code fromId} is inclusive or not
      * @return matching documents.
      */
     @Override
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
-                                                  @NotNull final String fromId) {
+                                                  @NotNull final String fromId, boolean includeFromId) {
         // _modified >= fromModified && _modified < toModified && _id > fromId
         final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
-                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), gt(ID, fromId));
+                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), includeFromId ? gte(ID, fromId) :gt(ID, fromId));
         // first sort by _modified and then by _id
         final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1));
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index 7006c18683..efce4b8006 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -96,27 +96,31 @@ public class RDBVersionGCSupport extends VersionGCSupport {
 
     /**
      * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
-     * within the given range .The two passed modified timestamps are in milliseconds
+     * within the given range and are greater than given @{@link NodeDocument#ID}.
+     * <p>
+     * The two passed modified timestamps are in milliseconds
      * since the epoch and the implementation will convert them to seconds at
      * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
      * then perform the comparison.
+     * <p/>
      *
-     * @param fromModified the lower bound modified timestamp (inclusive)
-     * @param toModified the upper bound modified timestamp (exclusive)
-     * @param limit the limit of documents to return
-     * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
+     * @param fromModified  the lower bound modified timestamp (inclusive)
+     * @param toModified    the upper bound modified timestamp (exclusive)
+     * @param limit         the limit of documents to return
+     * @param fromId        the lower bound {@link NodeDocument#ID}
+     * @param includeFromId boolean indicating whether {@code fromId} is inclusive or not
      * @return matching documents.
      */
     @Override
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
-                                                  @NotNull final String fromId) {
+                                                  @NotNull final String fromId, boolean includeFromId) {
         List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
                 new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)),
-                new QueryCondition(ID, ">", of(fromId)));
+                new QueryCondition(ID, includeFromId ? ">=" : ">", of(fromId)));
         if (MODE == 1) {
             return getIterator(EMPTY_KEY_PATTERN, conditions);
         } else {
-            return store.queryAsIterable(NODES, fromId, null, EMPTY_KEY_PATTERN, conditions, limit, of(MODIFIED_IN_SECS, ID));
+            return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, conditions, limit, of(MODIFIED_IN_SECS, ID));
         }
     }
 
@@ -287,7 +291,7 @@ public class RDBVersionGCSupport extends VersionGCSupport {
         LOG.info("getOldestModifiedDoc() <- start");
         Iterable<NodeDocument> modifiedDocs = null;
         try {
-            modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE);
+            modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE, false);
             doc = modifiedDocs.iterator().hasNext() ? modifiedDocs.iterator().next() : NULL;
         } catch (DocumentStoreException ex) {
             LOG.error("getOldestModifiedDoc()", ex);
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
index 0061771383..cff9511a66 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
@@ -33,6 +33,7 @@ import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
+import static java.lang.Long.MAX_VALUE;
 import static java.util.Comparator.comparing;
 import static java.util.List.of;
 import static java.util.Optional.ofNullable;
@@ -206,8 +207,8 @@ public class VersionGCSupportTest {
     public void findModifiedDocsWhenModifiedIsDifferent() {
         long secs = 42;
         long offset = SECONDS.toMillis(secs);
-        List<UpdateOp> updateOps = new ArrayList<>(5_001);
-        for (int i = 0; i < 5_001; i++) {
+        List<UpdateOp> updateOps = new ArrayList<>(5_000);
+        for (int i = 0; i < 5_000; i++) {
             Revision r = new Revision(offset + (i * 5), 0, 1);
             String id = getIdFromPath("/x" + i);
             ids.add(id);
@@ -223,9 +224,10 @@ public class VersionGCSupportTest {
         long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
         assertEquals(40L, oldestModifiedDocTs);
         assertEquals("1:/x0", oldestModifiedDocId);
+        boolean includeFromId = true;
 
         for(int i = 0; i < 5; i++) {
-            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), Long.MAX_VALUE, 1000, oldestModifiedDocId);
+            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, includeFromId);
             assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
             long count = stream(modifiedDocs.spliterator(), false).count();
             assertEquals(1000, count);
@@ -234,6 +236,7 @@ public class VersionGCSupportTest {
             }
             oldestModifiedDocId = oldestModifiedDoc.getId();
             oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+            includeFromId = false;
         }
     }
 
@@ -249,7 +252,7 @@ public class VersionGCSupportTest {
             setModified(op, r);
             updateOps.add(op);
         }
-        // create 5_000 nodes
+        // create 5_001 nodes
         store.create(NODES, updateOps);
 
         NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE);
@@ -257,9 +260,10 @@ public class VersionGCSupportTest {
         long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
         assertEquals(40L, oldestModifiedDocTs);
         assertEquals("1:/x0", oldestModifiedDocId);
+        boolean includeFromId = true;
 
         for(int i = 0; i < 5; i++) {
-            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), Long.MAX_VALUE, 1000, oldestModifiedDocId);
+            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, includeFromId);
             assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
             long count = stream(modifiedDocs.spliterator(), false).count();
             assertEquals(1000, count);
@@ -268,7 +272,21 @@ public class VersionGCSupportTest {
             }
             oldestModifiedDocId = oldestModifiedDoc.getId();
             oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+            includeFromId = false;
         }
+
+        // fetch last remaining document now
+        Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, false);
+        assertEquals(1, stream(modifiedDocs.spliterator(), false).count());
+        assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
+        oldestModifiedDoc = modifiedDocs.iterator().next();
+        oldestModifiedDocId = oldestModifiedDoc.getId();
+        oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+
+        // all documents had been fetched, now we won't get any document
+        modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, false);
+        assertEquals(0, stream(modifiedDocs.spliterator(), false).count());
+
     }
 
     @Test
@@ -291,10 +309,10 @@ public class VersionGCSupportTest {
         }
         // create 5_000 nodes
         store.create(NODES, updateOps);
-
+        boolean includeFromId = true;
 
         for(int i = 0; i < 5; i++) {
-            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), Long.MAX_VALUE, 1000, oldestModifiedDocId);
+            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, includeFromId);
             assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
             long count = stream(modifiedDocs.spliterator(), false).count();
             assertEquals(1000, count);
@@ -303,7 +321,12 @@ public class VersionGCSupportTest {
             }
             oldestModifiedDocId = oldestModifiedDoc.getId();
             oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+            includeFromId = false;
         }
+
+        // all documents had been fetched, now we won't get any document
+        Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, false);
+        assertEquals(0, stream(modifiedDocs.spliterator(), false).count());
     }
 
     private void assertPossiblyDeleted(long fromSeconds, long toSeconds, long num) {
@@ -312,7 +335,7 @@ public class VersionGCSupportTest {
     }
 
     private void assertModified(long fromSeconds, long toSeconds, long num) {
-        Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE);
+        Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE, false);
         assertEquals(num, stream(docs.spliterator(), false).count());
         assertTrue(isInOrder(docs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
     }
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index caa156a6d4..f6e8554252 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -34,6 +34,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 
+import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.apache.commons.lang3.reflect.FieldUtils.writeField;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.size;
@@ -83,6 +84,7 @@ import org.apache.jackrabbit.oak.stats.Clock;
 import org.jetbrains.annotations.NotNull;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -337,6 +339,61 @@ public class VersionGarbageCollectorIT {
         assertEquals(50_000, stats.deletedPropsGCCount);
 
     }
+
+    // Test when we have more than 1000 deleted properties with different revisions
+    @Test
+    @Ignore
+    public void testGCDeletedProps_2() throws Exception {
+        //1. Create nodes with properties
+        NodeBuilder b1 = null;
+        for (int k = 0; k < 50; k ++) {
+            b1 = store.getRoot().builder();
+            // Add property to node & save
+            for (int i = 0; i < 100; i++) {
+                for (int j = 0; j < 10; j++) {
+                    b1.child(k + "z" + i).setProperty("prop" + j, "foo", STRING);
+                }
+            }
+            store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+            // increase the clock to create new revision for next batch
+            clock.waitUntil(Revision.getCurrentTimestamp() + SECONDS.toMillis(k * 5));
+        }
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(10);
+        //1. Go past GC age and check no GC done as nothing deleted
+        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        VersionGCStats stats = gc.gc(maxAge, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+        //Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        for (int k = 0; k < 50; k ++) {
+            for (int i = 0; i < 100; i++) {
+                for (int j = 0; j < 10; j++) {
+                    b2.getChildNode(k + "z" + i).removeProperty("prop" + j);
+                }
+            }
+        }
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        store.runBackgroundOperations();
+
+        //2. Check that a deleted property is not collected before maxAge
+        //Clock cannot move back (it moved forward in #1) so double the maxAge
+        clock.waitUntil(clock.getTime() + delta);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+        //3. Check that deleted property does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(50_000, stats.deletedPropsGCCount);
+
+    }
     
     private void gcSplitDocsInternal(String subNodeName) throws Exception {
         long maxAge = 1; //hrs


[jackrabbit-oak] 28/28: OAK-10199 : removed DetailedGC code changes for RDBDocumentStore

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 577aeac17fbee63f86c9ae15c72079ea6c513ded
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Wed Aug 9 00:08:12 2023 +0530

    OAK-10199 : removed DetailedGC code changes for RDBDocumentStore
---
 .../plugins/document/VersionGarbageCollector.java  |   4 +-
 .../oak/plugins/document/rdb/RDBDocumentStore.java |  16 +---
 .../plugins/document/rdb/RDBDocumentStoreJDBC.java |  16 ++--
 .../plugins/document/rdb/RDBVersionGCSupport.java  | 104 ---------------------
 .../oak/plugins/document/VersionGCInitTest.java    |  13 +--
 5 files changed, 18 insertions(+), 135 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 8f2c977274..a32ded6c5b 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -350,7 +350,7 @@ public class VersionGarbageCollector {
 
             return "VersionGCStats{" +
                     "ignoredGCDueToCheckPoint=" + ignoredGCDueToCheckPoint +
-                    "ignoredDetailedGCDueToCheckPoint=" + ignoredDetailedGCDueToCheckPoint +
+                    ", ignoredDetailedGCDueToCheckPoint=" + ignoredDetailedGCDueToCheckPoint +
                     ", canceled=" + canceled +
                     ", deletedDocGCCount=" + deletedDocGCCount + " (of which leaf: " + deletedLeafDocGCCount + ")" +
                     ", updateResurrectedGCCount=" + updateResurrectedGCCount +
@@ -950,7 +950,7 @@ public class VersionGarbageCollector {
                 updatedDocs = (int) oldDocs.stream().filter(Objects::nonNull).count();
                 stats.updatedDetailedGCDocsCount += updatedDocs;
                 stats.deletedPropsGCCount += deletedProps;
-                log.info("Updated [{}] documents, deleted [{}] properties", updatedDocs, deletedProps);
+                log.debug("Updated [{}] documents, deleted [{}] properties", updatedDocs, deletedProps);
                 // now reset delete metadata
                 updateOpList.clear();
                 deletedPropsCountMap.clear();
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
index 25a3aca5e7..82c09e213d 100755
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
@@ -971,8 +971,8 @@ public class RDBDocumentStore implements DocumentStore {
     public static String VERSIONPROP = "__version";
 
     // set of supported indexed properties
-    private static final Set<String> INDEXEDPROPERTIES = new HashSet<>(Arrays.asList(MODIFIED,
-            NodeDocument.HAS_BINARY_FLAG, NodeDocument.DELETED_ONCE, NodeDocument.SD_TYPE, NodeDocument.SD_MAX_REV_TIME_IN_SECS, VERSIONPROP, ID));
+    private static final Set<String> INDEXEDPROPERTIES = new HashSet<String>(Arrays.asList(new String[] { MODIFIED,
+            NodeDocument.HAS_BINARY_FLAG, NodeDocument.DELETED_ONCE, NodeDocument.SD_TYPE, NodeDocument.SD_MAX_REV_TIME_IN_SECS, VERSIONPROP }));
 
     // set of required table columns
     private static final Set<String> REQUIREDCOLUMNS = Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(
@@ -1840,7 +1840,7 @@ public class RDBDocumentStore implements DocumentStore {
     }
 
     protected <T extends Document> Iterable<T> queryAsIterable(final Collection<T> collection, String fromKey, String toKey,
-            final List<String> excludeKeyPatterns, final List<QueryCondition> conditions, final int limit, final List<String> sortBy) {
+            final List<String> excludeKeyPatterns, final List<QueryCondition> conditions, final int limit, final String sortBy) {
 
         final RDBTableMetaData tmd = getTable(collection);
         Set<String> allowedProps = Sets.intersection(INDEXEDPROPERTIES, tmd.getColumnProperties());
@@ -1853,16 +1853,6 @@ public class RDBDocumentStore implements DocumentStore {
             }
         }
 
-        if (sortBy != null && !sortBy.isEmpty()) {
-            for (String key : sortBy) {
-                if (!allowedProps.contains(key)) {
-                    final String message = "indexed property " + key + " not supported. supported properties are " + allowedProps;
-                    LOG.error(message);
-                    throw new UnsupportedIndexedPropertyException(message);
-                }
-            }
-        }
-
         final String from = collection == Collection.NODES && NodeDocument.MIN_ID_VALUE.equals(fromKey) ? null : fromKey;
         final String to = collection == Collection.NODES && NodeDocument.MAX_ID_VALUE.equals(toKey) ? null : toKey;
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
index 87d0f4b4dd..5caa65d875 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
@@ -16,11 +16,8 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
-import static java.util.List.of;
-import static java.util.stream.Collectors.joining;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
 import static org.apache.jackrabbit.guava.common.collect.Sets.newHashSet;
-import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
 import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.CHAR2OCTETRATIO;
 import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.asBytes;
 import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBJDBCTools.asDocumentStoreException;
@@ -462,7 +459,7 @@ public class RDBDocumentStoreJDBC {
                             + excludeKeyPatterns + ", conditions=" + conditions + ", limit=" + limit)
                     : null);
             stmt = prepareQuery(connection, tmd, fields, minId,
-                    maxId, excludeKeyPatterns, conditions, limit, of(ID));
+                    maxId, excludeKeyPatterns, conditions, limit, "ID");
             rs = stmt.executeQuery();
             while (rs.next() && result.size() < limit) {
                 int field = 1;
@@ -557,7 +554,7 @@ public class RDBDocumentStoreJDBC {
 
     @NotNull
     public Iterator<RDBRow> queryAsIterator(RDBConnectionHandler ch, RDBTableMetaData tmd, String minId, String maxId,
-            List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, List<String> sortBy) throws SQLException {
+            List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, String sortBy) throws SQLException {
         return new ResultSetIterator(ch, tmd, minId, maxId, excludeKeyPatterns, conditions, limit, sortBy);
     }
 
@@ -576,7 +573,7 @@ public class RDBDocumentStoreJDBC {
         private long pstart;
 
         public ResultSetIterator(RDBConnectionHandler ch, RDBTableMetaData tmd, String minId, String maxId,
-                List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, List<String> sortBy) throws SQLException {
+                List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, String sortBy) throws SQLException {
             long start = System.currentTimeMillis();
             try {
                 this.ch = ch;
@@ -698,7 +695,7 @@ public class RDBDocumentStoreJDBC {
 
     @NotNull
     private PreparedStatement prepareQuery(Connection connection, RDBTableMetaData tmd, String columns, String minId, String maxId,
-            List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, List<String> sortBy) throws SQLException {
+            List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, String sortBy) throws SQLException {
 
         StringBuilder selectClause = new StringBuilder();
 
@@ -717,8 +714,8 @@ public class RDBDocumentStoreJDBC {
             query.append(" where ").append(whereClause);
         }
 
-        if (sortBy != null && !sortBy.isEmpty()) {
-            query.append(" order by ").append(sortBy.stream().map(INDEXED_PROP_MAPPING::get).collect(joining(", ")));
+        if (sortBy != null) {
+            query.append(" order by ID");
         }
 
         if (limit != Integer.MAX_VALUE) {
@@ -969,7 +966,6 @@ public class RDBDocumentStoreJDBC {
         tmp.put(NodeDocument.SD_TYPE, "SDTYPE");
         tmp.put(NodeDocument.SD_MAX_REV_TIME_IN_SECS, "SDMAXREVTIME");
         tmp.put(RDBDocumentStore.VERSIONPROP, "VERSION");
-        tmp.put(ID, "ID");
         INDEXED_PROP_MAPPING = Collections.unmodifiableMap(tmp);
     }
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index 27c582311b..a463499793 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -16,23 +16,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
-import static java.util.Comparator.comparing;
-import static java.util.List.of;
-import static java.util.Optional.empty;
-import static java.util.Optional.ofNullable;
-import static java.util.stream.Collectors.toList;
-import static java.util.stream.Stream.concat;
-import static java.util.stream.StreamSupport.stream;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
-import static org.apache.jackrabbit.guava.common.collect.Iterables.size;
-import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
-import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
-import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN;
-import static org.apache.jackrabbit.oak.plugins.document.util.CloseableIterable.wrap;
-import static org.apache.jackrabbit.oak.plugins.document.util.Utils.closeIfCloseable;
 
 import java.io.Closeable;
 import java.io.IOException;
@@ -41,15 +25,11 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
-import java.util.function.Supplier;
-import java.util.stream.Stream;
 
 import org.apache.jackrabbit.oak.commons.properties.SystemPropertySupplier;
 import org.apache.jackrabbit.oak.plugins.document.Collection;
-import org.apache.jackrabbit.oak.plugins.document.Document;
 import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
@@ -60,7 +40,6 @@ import org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.Unsupport
 import org.apache.jackrabbit.oak.plugins.document.util.CloseableIterable;
 import org.apache.jackrabbit.oak.plugins.document.util.Utils;
 import org.apache.jackrabbit.oak.stats.Clock;
-import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -106,67 +85,6 @@ public class RDBVersionGCSupport extends VersionGCSupport {
         }
     }
 
-    /**
-     * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
-     * within the given range and are greater than given @{@link NodeDocument#ID}.
-     * <p>
-     * The two passed modified timestamps are in milliseconds
-     * since the epoch and the implementation will convert them to seconds at
-     * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
-     * then perform the comparison.
-     *
-     *
-     * @param fromModified the lower bound modified timestamp (inclusive)
-     * @param toModified   the upper bound modified timestamp (exclusive)
-     * @param limit        the limit of documents to return
-     * @param fromId       the lower bound {@link NodeDocument#ID}
-     * @return matching documents.
-     */
-    @Override
-    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
-                                                  @NotNull final String fromId) {
-        // (_modified = fromModified && _id > fromId || _modified > fromModified && _modified < toModified)
-        // TODO : introduce support for OR where clause in RDBDocumentStore
-        final List<QueryCondition> c1 = of(new QueryCondition(MODIFIED_IN_SECS, "=", getModifiedInSecs(fromModified)),
-                new QueryCondition(ID, ">", of(fromId)));
-
-        final List<QueryCondition> c2 = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
-                new QueryCondition(MODIFIED_IN_SECS, ">", getModifiedInSecs(fromModified)));
-
-        if (MODE == 1) {
-            return getNodeDocuments(() -> getIterator(EMPTY_KEY_PATTERN, c1), () -> getIterator(EMPTY_KEY_PATTERN, c2), limit);
-        } else {
-            return getNodeDocuments(() -> store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, c1, limit, of(MODIFIED_IN_SECS, ID)),
-                    () -> store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, c2, limit, of(MODIFIED_IN_SECS, ID)),
-                    limit);
-        }
-    }
-
-    /**
-     * To fetch {@link NodeDocument} from database
-     *
-     * @param supplier1 document supplier on basis of 1st Condition
-     * @param supplier2 document supplier on basis of 2nd Condition
-     * @param limit no. of documents to fetch from db
-     * @return sorted documents supplied by supplier1 & supplier2
-     */
-    private Iterable<NodeDocument> getNodeDocuments(final Supplier<Iterable<NodeDocument>> supplier1, final Supplier<Iterable<NodeDocument>> supplier2, final int limit) {
-
-        final Iterable<NodeDocument> itr1 = supplier1.get();
-        if (size(itr1) >= limit) {
-            return itr1;
-        }
-
-        final Iterable<NodeDocument> itr2 = supplier2.get();
-
-        final Stream<NodeDocument> s1 = stream(itr1.spliterator(), false);
-        final Stream<NodeDocument> s2 = stream(itr2.spliterator(), false);
-        return wrap(concat(s1, s2).sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)).limit(limit).collect(toList()), () -> {
-            closeIfCloseable(itr1);
-            closeIfCloseable(itr2);
-        });
-    }
-
     @Override
     protected Iterable<NodeDocument> identifyGarbage(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs,
             final long oldestRevTimeStamp) {
@@ -321,28 +239,6 @@ public class RDBVersionGCSupport extends VersionGCSupport {
         }
     }
 
-    /**
-     * Retrieve the time of the oldest modified document.
-     *
-     * @param clock System Clock
-     * @return the timestamp of the oldest modified document.
-     */
-    @Override
-    public Optional<NodeDocument> getOldestModifiedDoc(Clock clock) {
-
-        Iterable<NodeDocument> modifiedDocs = null;
-        try {
-            modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE);
-            return modifiedDocs.iterator().hasNext() ? ofNullable(modifiedDocs.iterator().next()) : empty();
-        } catch (DocumentStoreException ex) {
-            LOG.error("getOldestModifiedDoc() <- Error ", ex);
-        } finally {
-            closeIfCloseable(modifiedDocs);
-        }
-        LOG.info("No Modified Doc has been found, retuning empty");
-        return empty();
-    }
-
     @Override
     public long getDeletedOnceCount() {
         return store.queryCount(Collection.NODES, null, null, RDBDocumentStore.EMPTY_KEY_PATTERN,
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
index 0c6b2fccdf..eec3e33b6b 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
@@ -31,6 +31,7 @@ import static org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.enableDe
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
@@ -51,12 +52,12 @@ public class VersionGCInitTest {
     @Test
     public void lazyInitialize() throws Exception {
         DocumentStore store = ns.getDocumentStore();
-        Document vgc = store.find(SETTINGS, "versionGC");
+        Document vgc = store.find(SETTINGS, SETTINGS_COLLECTION_ID);
         assertNull(vgc);
 
         ns.getVersionGarbageCollector().gc(1, DAYS);
 
-        vgc = store.find(SETTINGS, "versionGC");
+        vgc = store.find(SETTINGS, SETTINGS_COLLECTION_ID);
         assertNotNull(vgc);
         assertEquals(0L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
         assertNull(vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
@@ -65,7 +66,7 @@ public class VersionGCInitTest {
     @Test
     public void lazyInitializeWithDetailedGC() throws Exception {
         DocumentStore store = ns.getDocumentStore();
-        Document vgc = store.find(SETTINGS, "versionGC");
+        Document vgc = store.find(SETTINGS, SETTINGS_COLLECTION_ID);
         assertNull(vgc);
 
         enableDetailGC(ns.getVersionGarbageCollector());
@@ -77,7 +78,7 @@ public class VersionGCInitTest {
         store.createOrUpdate(NODES, op);
         VersionGCStats stats = ns.getVersionGarbageCollector().gc(1, DAYS);
 
-        vgc = store.find(SETTINGS, "versionGC");
+        vgc = store.find(SETTINGS, SETTINGS_COLLECTION_ID);
         assertNotNull(vgc);
         assertEquals(stats.oldestModifiedDocTimeStamp, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
         assertEquals(stats.oldestModifiedDocId, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
@@ -87,13 +88,13 @@ public class VersionGCInitTest {
     @Test
     public void lazyInitializeWithDetailedGCWithNoData() throws Exception {
         DocumentStore store = ns.getDocumentStore();
-        Document vgc = store.find(SETTINGS, "versionGC");
+        Document vgc = store.find(SETTINGS, SETTINGS_COLLECTION_ID);
         assertNull(vgc);
 
         enableDetailGC(ns.getVersionGarbageCollector());
         VersionGCStats stats = ns.getVersionGarbageCollector().gc(1, DAYS);
 
-        vgc = store.find(SETTINGS, "versionGC");
+        vgc = store.find(SETTINGS, SETTINGS_COLLECTION_ID);
         assertNotNull(vgc);
         assertEquals(stats.oldestModifiedDocTimeStamp, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
         assertEquals(stats.oldestModifiedDocId, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));


[jackrabbit-oak] 06/28: OAK-10199 : ignore documents which doesn't have _modified field in mongo while fetching modifiedDocs

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 6ed1b0c1aedd93bf84503f45b6e079574e1e9c52
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Tue May 30 20:02:13 2023 +0530

    OAK-10199 : ignore documents which doesn't have _modified field in mongo while fetching modifiedDocs
---
 .../oak/plugins/document/mongo/MongoVersionGCSupport.java     | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index 4d01e5d3da..324ade704c 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -19,6 +19,8 @@
 
 package org.apache.jackrabbit.oak.plugins.document.mongo;
 
+import static com.mongodb.client.model.Filters.eq;
+import static com.mongodb.client.model.Filters.exists;
 import static java.util.Optional.ofNullable;
 import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.concat;
@@ -142,7 +144,7 @@ public class MongoVersionGCSupport extends VersionGCSupport {
         // _modified >= fromModified && _modified < toModified
         final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
                 lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)));
-        final Bson sort = Filters.eq(MODIFIED_IN_SECS, 1);
+        final Bson sort = eq(MODIFIED_IN_SECS, 1);
         final FindIterable<BasicDBObject> cursor = getNodeCollection()
                 .find(query)
                 .sort(sort)
@@ -233,10 +235,13 @@ public class MongoVersionGCSupport extends VersionGCSupport {
     public long getOldestModifiedTimestamp(final Clock clock) {
         LOG.info("getOldestModifiedTimestamp() <- start");
 
-        final Bson sort = Filters.eq(MODIFIED_IN_SECS, 1);
+        final Bson sort = eq(MODIFIED_IN_SECS, 1);
         final List<Long> result = new ArrayList<>(1);
 
-        getNodeCollection().find().sort(sort).limit(1).forEach(
+        // we need to add query condition to ignore `previous` documents which doesn't have this field
+        final Bson query = exists(MODIFIED_IN_SECS);
+
+        getNodeCollection().find(query).sort(sort).limit(1).forEach(
                 (Consumer<BasicDBObject>) document ->
                         ofNullable(store.convertFromDBObject(NODES, document))
                                 .ifPresent(doc -> {


[jackrabbit-oak] 04/28: OAK-10199 : override getModifiedDocs() for RDB and added unit cases for deletedProps

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 49ebde2e8f8ea6ab004c953a8264c0fbd9aba158
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Wed Apr 26 21:16:44 2023 +0530

    OAK-10199 : override getModifiedDocs() for RDB and added unit cases for deletedProps
---
 .../oak/plugins/document/NodeDocument.java         |  12 +-
 .../plugins/document/VersionGCRecommendations.java |  36 +-
 .../plugins/document/VersionGarbageCollector.java  | 383 ++++++++++++---------
 .../document/mongo/MongoVersionGCSupport.java      |  51 ++-
 .../plugins/document/rdb/RDBDocumentStoreJDBC.java |   1 +
 .../plugins/document/rdb/RDBVersionGCSupport.java  |  56 +++
 .../oak/plugins/document/DetailGCHelper.java       |  22 +-
 .../oak/plugins/document/NodeDocumentTest.java     |  32 ++
 .../oak/plugins/document/VersionGCInitTest.java    |  17 +
 .../oak/plugins/document/VersionGCStatsTest.java   |  15 +
 .../oak/plugins/document/VersionGCTest.java        |  13 +-
 .../document/VersionGarbageCollectorIT.java        |  64 ++++
 12 files changed, 482 insertions(+), 220 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
index 71abba0a2e..66a1bc2eae 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
@@ -32,7 +32,6 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
-import java.util.stream.Collectors;
 
 import org.apache.jackrabbit.guava.common.base.Function;
 import org.apache.jackrabbit.guava.common.base.Predicate;
@@ -59,6 +58,7 @@ import org.apache.jackrabbit.guava.common.collect.Iterables;
 import org.apache.jackrabbit.guava.common.collect.Maps;
 import org.apache.jackrabbit.guava.common.collect.Sets;
 
+import static java.util.stream.Collectors.toSet;
 import static org.apache.jackrabbit.guava.common.base.Objects.equal;
 import static org.apache.jackrabbit.guava.common.base.Preconditions.checkArgument;
 import static org.apache.jackrabbit.guava.common.base.Preconditions.checkNotNull;
@@ -67,7 +67,6 @@ import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.mergeSorted;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
 import static java.util.Objects.requireNonNull;
-import static java.util.stream.Collectors.toMap;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.StableRevisionComparator.REVERSE;
 import static org.apache.jackrabbit.oak.plugins.document.UpdateOp.Key;
@@ -1672,17 +1671,16 @@ public final class NodeDocument extends Document {
     }
 
     /**
-     * Returns all the properties on this document
-     * @return Map of all properties along with their values
+     * Returns name of all the properties on this document
+     * @return Set of all property names
      */
     @NotNull
-    Map<String, SortedMap<Revision, String>> getProperties() {
+    Set<String> getPropertyNames() {
         return data
                 .keySet()
                 .stream()
                 .filter(Utils::isPropertyName)
-                .map(o -> Map.entry(o, getLocalMap(o)))
-                .collect(toMap(Entry::getKey, Entry::getValue));
+                .collect(toSet());
     }
 
     /**
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index d8b091261d..f04b56fc52 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -18,6 +18,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.document;
 
+import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
@@ -30,9 +31,7 @@ import org.apache.jackrabbit.oak.stats.Clock;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.jackrabbit.guava.common.collect.Maps;
-
-import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP;
 
 /**
@@ -51,7 +50,7 @@ public class VersionGCRecommendations {
     final long maxCollect;
     final long deleteCandidateCount;
     final long lastOldestTimestamp;
-    final long fullDetailGCTimestamp;
+    final long detailedGCTimestamp;
     final long originalCollectLimit;
 
     private final long precisionMs;
@@ -96,7 +95,7 @@ public class VersionGCRecommendations {
         TimeInterval keep = new TimeInterval(clock.getTime() - maxRevisionAgeMs, Long.MAX_VALUE);
 
         Map<String, Long> settings = getLongSettings();
-        lastOldestTimestamp = settings.get(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP);
+        lastOldestTimestamp = settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP);
         if (lastOldestTimestamp == 0) {
             log.debug("No lastOldestTimestamp found, querying for the oldest deletedOnce candidate");
             oldestPossible = vgc.getOldestDeletedOnceTimestamp(clock, options.precisionMs) - 1;
@@ -108,17 +107,17 @@ public class VersionGCRecommendations {
         TimeInterval scope = new TimeInterval(oldestPossible, Long.MAX_VALUE);
         scope = scope.notLaterThan(keep.fromMs);
 
-        fullDetailGCTimestamp = settings.get(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP);
-        if (fullDetailGCTimestamp == 0) {
+        detailedGCTimestamp = settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP);
+        if (detailedGCTimestamp == 0) {
             if (log.isDebugEnabled()) {
-                log.debug("No fullDetailGCTimestamp found, querying for the oldest deletedOnce candidate");
+                log.debug("No detailedGCTimestamp found, querying for the oldest deletedOnce candidate");
             }
             oldestPossibleFullGC = vgc.getOldestModifiedTimestamp(clock) - 1;
             if (log.isDebugEnabled()) {
-                log.debug("fullDetailGCTimestamp found: {}", Utils.timestampToString(oldestPossibleFullGC));
+                log.debug("detailedGCTimestamp found: {}", Utils.timestampToString(oldestPossibleFullGC));
             }
         } else {
-            oldestPossibleFullGC = fullDetailGCTimestamp - 1;
+            oldestPossibleFullGC = detailedGCTimestamp - 1;
         }
 
         TimeInterval scopeFullGC = new TimeInterval(oldestPossibleFullGC, Long.MAX_VALUE);
@@ -206,10 +205,8 @@ public class VersionGCRecommendations {
             stats.needRepeat = true;
         } else if (!stats.canceled && !stats.ignoredGCDueToCheckPoint) {
             // success, we would not expect to encounter revisions older than this in the future
-//            setLongSetting(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs);
-//            setLongSetting(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, stats.oldestModifiedGced);
             setLongSetting(ImmutableMap.of(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs,
-                    SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, stats.oldestModifiedGced));
+                    SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, stats.oldestModifiedGced));
 
             int count = stats.deletedDocGCCount - stats.deletedLeafDocGCCount;
             double usedFraction;
@@ -240,11 +237,11 @@ public class VersionGCRecommendations {
 
     private Map<String, Long> getLongSettings() {
         Document versionGCDoc = vgc.getDocumentStore().find(Collection.SETTINGS, VersionGarbageCollector.SETTINGS_COLLECTION_ID, 0);
-        Map<String, Long> settings = Maps.newHashMap();
+        Map<String, Long> settings = new HashMap<>();
         // default values
-        settings.put(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L);
+        settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L);
         settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP, 0L);
-        settings.put(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, 0L);
+        settings.put(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, 0L);
         if (versionGCDoc != null) {
             for (String k : versionGCDoc.keySet()) {
                 Object value = versionGCDoc.get(k);
@@ -256,14 +253,11 @@ public class VersionGCRecommendations {
         return settings;
     }
 
-    void setLongSetting(String propName, long val) {
+    private void setLongSetting(String propName, long val) {
         setLongSetting(Map.of(propName, val));
-//        UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true);
-//        updateOp.set(propName, val);
-//        vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
     }
 
-    void setLongSetting(final Map<String, Long> propValMap) {
+    private void setLongSetting(final Map<String, Long> propValMap) {
         UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true);
         propValMap.forEach(updateOp::set);
         vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 608ba02398..27ee36204a 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -21,16 +21,18 @@ package org.apache.jackrabbit.oak.plugins.document;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Set;
-import java.util.SortedMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
 
 import org.apache.jackrabbit.guava.common.base.Function;
 import org.apache.jackrabbit.guava.common.base.Joiner;
@@ -55,7 +57,11 @@ import org.jetbrains.annotations.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static java.lang.Math.round;
+import static java.util.Collections.emptySet;
 import static java.util.Objects.requireNonNull;
+import static java.util.Optional.ofNullable;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
 import static org.apache.jackrabbit.guava.common.base.StandardSystemProperty.LINE_SEPARATOR;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.all;
 import static org.apache.jackrabbit.guava.common.collect.Iterators.partition;
@@ -67,14 +73,10 @@ import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_I
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH;
-import static org.apache.jackrabbit.oak.plugins.document.StableRevisionComparator.REVERSE;
 import static org.slf4j.helpers.MessageFormatter.arrayFormat;
 
 public class VersionGarbageCollector {
 
-    /** TODO temporary global flag to enable 'detail gc' during prototyping. Should eventually become eg a system property */
-    public static boolean DETAIL_GC_ENABLED = false;
-
     //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid re-partitioning
     private static final int DELETE_BATCH_SIZE = 450;
     private static final int UPDATE_BATCH_SIZE = 450;
@@ -105,15 +107,9 @@ public class VersionGarbageCollector {
     static final String SETTINGS_COLLECTION_REC_INTERVAL_PROP = "recommendedIntervalMs";
 
     /**
-     * Property name to timestamp when last full-detail-GC run happened, or -1 if not applicable/in-use.
-     * <p>
-     * <ul>
-     * <li>-1 : full repo scan is disabled</li>
-     * <li>0 : full repo scan is enabled and bound to start from zero == oldest _modified </li>
-     * <li>gt 0 : full repo scan is enabled, was already done up until this value</li>
-     * </ul>
+     * Property name to timestamp till when last detailed-GC run happened
      */
-    static final String SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP = "fullDetailGCTimeStamp";
+    static final String SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP = "detailedGCTimeStamp";
 
     private final DocumentNodeStore nodeStore;
     private final DocumentStore ds;
@@ -264,7 +260,6 @@ public class VersionGarbageCollector {
     }
 
     public static class VersionGCStats {
-        public long oldestModifiedGced;
         boolean ignoredGCDueToCheckPoint;
         boolean canceled;
         boolean success = true;
@@ -276,18 +271,26 @@ public class VersionGarbageCollector {
         int splitDocGCCount;
         int intermediateSplitDocGCCount;
         int updateResurrectedGCCount;
+        long oldestModifiedGced;
+        int updatedDetailedGCDocsCount;
+        int deletedPropsGCCount;
         final TimeDurationFormatter df = TimeDurationFormatter.forLogging();
         final Stopwatch active = Stopwatch.createUnstarted();
         final Stopwatch collectDeletedDocs = Stopwatch.createUnstarted();
         final Stopwatch checkDeletedDocs = Stopwatch.createUnstarted();
-        final Stopwatch detailedGcDocs = Stopwatch.createUnstarted();
-        final Stopwatch deleteDeletedDocs = Stopwatch.createUnstarted();
-        final Stopwatch collectAndDeleteSplitDocs = Stopwatch.createUnstarted();
+        final Stopwatch detailedGCDocs = Stopwatch.createUnstarted();
+        final Stopwatch deleteDetailedGCDocs = Stopwatch.createUnstarted();
         final Stopwatch deleteSplitDocs = Stopwatch.createUnstarted();
         final Stopwatch sortDocIds = Stopwatch.createUnstarted();
         final Stopwatch updateResurrectedDocuments = Stopwatch.createUnstarted();
+        final Stopwatch deleteDeletedDocs = Stopwatch.createUnstarted();
+        final Stopwatch collectAndDeleteSplitDocs = Stopwatch.createUnstarted();
+        final Stopwatch collectDeletedProps = Stopwatch.createUnstarted();
+        final Stopwatch collectDeletedOldRevs = Stopwatch.createUnstarted();
+        final Stopwatch collectUnmergedBC = Stopwatch.createUnstarted();
         long activeElapsed, collectDeletedDocsElapsed, checkDeletedDocsElapsed, deleteDeletedDocsElapsed, collectAndDeleteSplitDocsElapsed,
-                deleteSplitDocsElapsed, sortDocIdsElapsed, updateResurrectedDocumentsElapsed, detailedGcDocsElapsed;
+                deleteSplitDocsElapsed, sortDocIdsElapsed, updateResurrectedDocumentsElapsed, detailedGCDocsElapsed, collectDeletedPropsElapsed,
+                deleteDetailedGCDocsElapsed, collectDeletedOldRevsElapsed, collectUnmergedBCElapsed;
 
         @Override
         public String toString() {
@@ -306,6 +309,11 @@ public class VersionGarbageCollector {
                         df.format(updateResurrectedDocumentsElapsed, MICROSECONDS),
                         df.format(deleteDeletedDocsElapsed, MICROSECONDS),
                         df.format(collectAndDeleteSplitDocsElapsed, MICROSECONDS),
+                        df.format(detailedGCDocsElapsed, MICROSECONDS),
+                        df.format(deleteDetailedGCDocsElapsed, MICROSECONDS),
+                        df.format(collectDeletedPropsElapsed, MICROSECONDS),
+                        df.format(collectDeletedOldRevsElapsed, MICROSECONDS),
+                        df.format(collectUnmergedBCElapsed, MICROSECONDS),
                         timeDeletingSplitDocs);
             } else {
                 String timeDeletingSplitDocs = "";
@@ -319,17 +327,24 @@ public class VersionGarbageCollector {
                         df.format(updateResurrectedDocuments.elapsed(MICROSECONDS), MICROSECONDS),
                         df.format(deleteDeletedDocs.elapsed(MICROSECONDS), MICROSECONDS),
                         df.format(collectAndDeleteSplitDocs.elapsed(MICROSECONDS), MICROSECONDS),
+                        df.format(detailedGCDocs.elapsed(MICROSECONDS), MICROSECONDS),
+                        df.format(deleteDetailedGCDocs.elapsed(MICROSECONDS), MICROSECONDS),
+                        df.format(collectDeletedProps.elapsed(MICROSECONDS), MICROSECONDS),
+                        df.format(collectDeletedOldRevs.elapsed(MICROSECONDS), MICROSECONDS),
+                        df.format(collectUnmergedBC.elapsed(MICROSECONDS), MICROSECONDS),
                         timeDeletingSplitDocs);
             }
 
             return "VersionGCStats{" +
                     "ignoredGCDueToCheckPoint=" + ignoredGCDueToCheckPoint +
-                    ", oldestModifiedGced=" + oldestModifiedGced +
                     ", canceled=" + canceled +
                     ", deletedDocGCCount=" + deletedDocGCCount + " (of which leaf: " + deletedLeafDocGCCount + ")" +
                     ", updateResurrectedGCCount=" + updateResurrectedGCCount +
                     ", splitDocGCCount=" + splitDocGCCount +
                     ", intermediateSplitDocGCCount=" + intermediateSplitDocGCCount +
+                    ", oldestModifiedGced=" + oldestModifiedGced +
+                    ", updatedDetailedGCDocsCount=" + updatedDetailedGCDocsCount +
+                    ", deletedPropsGCCount=" + deletedPropsGCCount +
                     ", iterationCount=" + iterationCount +
                     ", timeActive=" + df.format(activeElapsed, MICROSECONDS) +
                     ", " + timings + "}";
@@ -338,7 +353,6 @@ public class VersionGarbageCollector {
         void addRun(VersionGCStats run) {
             ++iterationCount;
             this.ignoredGCDueToCheckPoint = run.ignoredGCDueToCheckPoint;
-            this.oldestModifiedGced = run.oldestModifiedGced;
             this.canceled = run.canceled;
             this.success = run.success;
             this.limitExceeded = run.limitExceeded;
@@ -348,6 +362,9 @@ public class VersionGarbageCollector {
             this.splitDocGCCount += run.splitDocGCCount;
             this.intermediateSplitDocGCCount += run.intermediateSplitDocGCCount;
             this.updateResurrectedGCCount += run.updateResurrectedGCCount;
+            this.oldestModifiedGced = run.oldestModifiedGced;
+            this.updatedDetailedGCDocsCount += run.updatedDetailedGCDocsCount;
+            this.deletedPropsGCCount += run.deletedPropsGCCount;
             if (run.iterationCount > 0) {
                 // run is cumulative with times in elapsed fields
                 this.activeElapsed += run.activeElapsed;
@@ -358,7 +375,11 @@ public class VersionGarbageCollector {
                 this.deleteSplitDocsElapsed += run.deleteSplitDocsElapsed;
                 this.sortDocIdsElapsed += run.sortDocIdsElapsed;
                 this.updateResurrectedDocumentsElapsed += run.updateResurrectedDocumentsElapsed;
-                this.detailedGcDocsElapsed += run.detailedGcDocsElapsed;
+                this.detailedGCDocsElapsed += run.detailedGCDocsElapsed;
+                this.deleteDetailedGCDocsElapsed += run.deleteDetailedGCDocsElapsed;
+                this.collectDeletedPropsElapsed += run.collectDeletedPropsElapsed;
+                this.collectDeletedOldRevsElapsed += run.collectDeletedOldRevsElapsed;
+                this.collectUnmergedBCElapsed += run.collectUnmergedBCElapsed;
             } else {
                 // single run -> read from stop watches
                 this.activeElapsed += run.active.elapsed(MICROSECONDS);
@@ -369,7 +390,11 @@ public class VersionGarbageCollector {
                 this.deleteSplitDocsElapsed += run.deleteSplitDocs.elapsed(MICROSECONDS);
                 this.sortDocIdsElapsed += run.sortDocIds.elapsed(MICROSECONDS);
                 this.updateResurrectedDocumentsElapsed += run.updateResurrectedDocuments.elapsed(MICROSECONDS);
-                this.detailedGcDocsElapsed += run.detailedGcDocs.elapsed(MICROSECONDS);
+                this.detailedGCDocsElapsed += run.detailedGCDocs.elapsed(MICROSECONDS);
+                this.deleteDetailedGCDocsElapsed += run.deleteDetailedGCDocs.elapsed(MICROSECONDS);
+                this.collectDeletedPropsElapsed += run.collectDeletedProps.elapsed(MICROSECONDS);
+                this.collectDeletedOldRevsElapsed += run.collectDeletedOldRevs.elapsed(MICROSECONDS);
+                this.collectUnmergedBCElapsed += run.collectUnmergedBC.elapsed(MICROSECONDS);
             }
         }
     }
@@ -378,10 +403,14 @@ public class VersionGarbageCollector {
         NONE,
         COLLECTING,
         CHECKING,
-        DETAILED_GC,
         DELETING,
         SORTING,
         SPLITS_CLEANUP,
+        DETAILED_GC,
+        COLLECT_PROPS,
+        COLLECT_OLD_REVS,
+        COLLECT_UNMERGED_BC,
+        DETAILED_GC_CLEANUP,
         UPDATING
     }
 
@@ -406,11 +435,15 @@ public class VersionGarbageCollector {
             this.watches.put(GCPhase.NONE, Stopwatch.createStarted());
             this.watches.put(GCPhase.COLLECTING, stats.collectDeletedDocs);
             this.watches.put(GCPhase.CHECKING, stats.checkDeletedDocs);
-            this.watches.put(GCPhase.DETAILED_GC, stats.detailedGcDocs);
             this.watches.put(GCPhase.DELETING, stats.deleteDeletedDocs);
             this.watches.put(GCPhase.SORTING, stats.sortDocIds);
             this.watches.put(GCPhase.SPLITS_CLEANUP, stats.collectAndDeleteSplitDocs);
             this.watches.put(GCPhase.UPDATING, stats.updateResurrectedDocuments);
+            this.watches.put(GCPhase.DETAILED_GC, stats.detailedGCDocs);
+            this.watches.put(GCPhase.COLLECT_PROPS, stats.collectDeletedProps);
+            this.watches.put(GCPhase.COLLECT_OLD_REVS, stats.collectDeletedOldRevs);
+            this.watches.put(GCPhase.COLLECT_UNMERGED_BC, stats.collectUnmergedBC);
+            this.watches.put(GCPhase.DETAILED_GC_CLEANUP, stats.deleteDetailedGCDocs);
             this.canceled = canceled;
         }
 
@@ -534,7 +567,7 @@ public class VersionGarbageCollector {
                     collectDeletedDocuments(phases, headRevision, rec);
                     collectSplitDocuments(phases, sweepRevisions, rec);
                     if (detailedGCEnabled) {
-                        // run only if enabled
+                        // run only if detailed GC enabled
                         collectDetailedGarbage(phases, headRevision, rec);
                     }
                 }
@@ -568,102 +601,74 @@ public class VersionGarbageCollector {
          * it is okay that it takes a considerable amount of time.
          *
          * @param phases {@link GCPhases}
-         * @param headRevision the current head revision of
-         * @throws IOException
-         * @throws LimitExceededException
+         * @param headRevision the current head revision of node store
          */
         private void collectDetailedGarbage(final GCPhases phases, final RevisionVector headRevision, final VersionGCRecommendations rec)
                 throws IOException, LimitExceededException {
             int docsTraversed = 0;
-            long oldestModifiedGced = rec.scopeFullGC.fromMs;
+            boolean foundDoc = true;
+            long oldestModifiedGCed = rec.scopeFullGC.fromMs;
             try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) {
                 final long fromModified = rec.scopeFullGC.fromMs;
                 final long toModified = rec.scopeFullGC.toMs;
-//                if (rec.fullDetailGCTimestamp == -1) {
-//                    // then full detail-gc is disabled or over - use regular scope then
-//                    fromModified = rec.scope.fromMs;
-//                    toModified = rec.scope.toMs;
-//                } else {
-//                    // then full detail-gc is enabled - use it then
-//                    fromModified = rec.fullDetailGCTimestamp; // TODO: once we're passed rec.scope.fromMs we should
-//                    // disable fullgc
-//                    toModified = rec.scope.toMs; // the 'to' here is the max. it will process only eg 1 batch
-//                }
-                // TODO : remove me
-                boolean foundAnything = false; // I think this flag is redundant
-                if (phases.start(GCPhase.COLLECTING)) {
-                    Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedGced, toModified, 2000);
-                    final Stopwatch timer = Stopwatch.createUnstarted();
-                    timer.reset().start();
-                    try {
-                        for (NodeDocument doc : itr) {
-                            // continue with GC?
-                            if (cancel.get()) {
-                                break;
-                            }
-                            if (phases.start(GCPhase.DETAILED_GC)) {
-                                gc.detailedGC(doc, phases);
-                                phases.stop(GCPhase.DETAILED_GC);
-                            }
+                if (phases.start(GCPhase.DETAILED_GC)) {
+                    while (foundDoc && oldestModifiedGCed < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) {
+                        // set foundDoc to false to allow exiting the while loop
+                        foundDoc = false;
+                        Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedGCed, toModified, 1000);
+                        try {
+                            for (NodeDocument doc : itr) {
+                                foundDoc = true;
+                                // continue with GC?
+                                if (cancel.get()) {
+                                    break;
+                                }
+                                docsTraversed++;
+                                if (docsTraversed % PROGRESS_BATCH_SIZE == 0) {
+                                    monitor.info("Iterated through {} documents so far. {} had detail garbage",
+                                            docsTraversed, gc.getGarbageDocsCount());
+                                }
 
-                            // TODO : remove this code, I don't think its possible to fetch these documents
-                            //  who doesn't have _modified field
-                            final Long modified = doc.getModified();
-                            if (modified == null) {
-                                monitor.warn("collectDetailGarbage : document has no _modified property : {}",
-                                        doc.getId());
-                            } else if (modified < oldestModifiedGced) {
-                                monitor.warn(
-                                        "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
-                                        modified, fromModified, toModified);
-                            } else {
-                                oldestModifiedGced = modified;
-                            }
-                            foundAnything = true;
-                            docsTraversed++;
-                            if (docsTraversed % PROGRESS_BATCH_SIZE == 0) {
-                                monitor.info("Iterated through {} documents so far. {} had detail garbage",
-                                        docsTraversed, gc.getNumDocuments());
-                            }
-                            // this would never hit, since we are only fetching the oldest 2000 element in batches of 1000
-                            // TODO: remove this if above mentioned logic is fine
-                            if (rec.maxCollect > 0 && gc.getNumDocuments() > rec.maxCollect) {
-                                // TODO: how would we recover from this?
-                                // If we don't want above solution, then one of the another solution is to use lower time duration
-                                // as done in document deletion process or use lower limit value or
-                                // we should perform all the update ops in 1 go
-                                throw new LimitExceededException();
+                                // collect the data to delete in next step
+                                if (phases.start(GCPhase.COLLECTING)) {
+                                    gc.collectGarbage(doc, phases);
+                                    phases.stop(GCPhase.COLLECTING);
+                                }
+
+                                // TODO : remove this code, I don't think its possible to fetch these documents
+                                //  who doesn't have _modified field
+                                final Long modified = doc.getModified();
+                                if (modified == null) {
+                                    monitor.warn("collectDetailGarbage : document has no _modified property : {}",
+                                            doc.getId());
+                                } else if (modified < oldestModifiedGCed) {
+                                    monitor.warn(
+                                            "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
+                                            modified, fromModified, toModified);
+                                } else {
+                                    oldestModifiedGCed = modified;
+                                }
+
+                                if (gc.hasGarbage()) {
+                                    if (phases.start(GCPhase.DETAILED_GC_CLEANUP)) {
+                                        gc.removeGarbage(phases.stats);
+                                        phases.stop(GCPhase.DETAILED_GC_CLEANUP);
+                                    }
+                                }
+
+                                oldestModifiedGCed = modified == null ? fromModified : modified;
                             }
-                            oldestModifiedGced = modified == null ? fromModified : modified;
+                        } finally {
+                            Utils.closeIfCloseable(itr);
+                            phases.stats.oldestModifiedGced = oldestModifiedGCed;
                         }
-                    } finally {
-                        Utils.closeIfCloseable(itr);
-                        // why do we need to stop this here, we are already stopping the original gc run.
-                        // can this be removed
-                        delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS));
-                        phases.stats.oldestModifiedGced = oldestModifiedGced;
                     }
-                    phases.stop(GCPhase.COLLECTING);
-//                    if (!cancel.get() && foundAnything) {
-//                        // TODO: move to evaluate()
-//                        rec.setLongSetting(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, oldestModifiedGced + 1);
-//                    }
+                    phases.stop(GCPhase.DETAILED_GC);
                 }
             }
         }
 
-        private void delayOnModifications(long durationMs) {
-            long delayMs = Math.round(durationMs * options.delayFactor);
-            if (!cancel.get() && delayMs > 0) {
-                try {
-                    Clock clock = nodeStore.getClock();
-                    clock.waitUntil(clock.getTime() + delayMs);
-                }
-                catch (InterruptedException ex) {
-                    /* ignore */
-                }
-            }
-        }
+
 
         private void collectSplitDocuments(GCPhases phases,
                                            RevisionVector sweepRevisions,
@@ -752,77 +757,146 @@ public class VersionGarbageCollector {
         }
     }
 
-    private static class DetailedGC implements Closeable {
+    private class DetailedGC implements Closeable {
 
         private final RevisionVector headRevision;
         private final GCMonitor monitor;
         private final AtomicBoolean cancel;
-        private int count;
+        private final Stopwatch timer;
+        private final List<UpdateOp> updateOpList;
+        private int garbageDocsCount;
 
         public DetailedGC(@NotNull RevisionVector headRevision, @NotNull GCMonitor monitor, @NotNull AtomicBoolean cancel) {
             this.headRevision = requireNonNull(headRevision);
             this.monitor = monitor;
             this.cancel = cancel;
+            this.updateOpList = new ArrayList<>();
+            this.timer = Stopwatch.createUnstarted();
         }
 
-        public void detailedGC(NodeDocument doc, GCPhases phases) {
-//            deleteSample(doc, phases);
-            UpdateOp updateOp = new UpdateOp(requireNonNull(doc.getId()), false);
-            deleteDeletedProperties(doc, phases, updateOp);
-            deleteUnmergedBranchCommitDocument(doc, phases, updateOp);
-            deleteOldRevisions(doc, phases, updateOp);
+        public void collectGarbage(final NodeDocument doc, final GCPhases phases) {
+
+            monitor.info("Collecting Detailed Garbage for doc [{}]", doc.getId());
+
+            final UpdateOp op = new UpdateOp(requireNonNull(doc.getId()), false);
+            collectDeletedProperties(doc, phases, op);
+            collectUnmergedBranchCommitDocument(doc, phases, op);
+            collectOldRevisions(doc, phases, op);
+            // only add if there are changes for this doc
+            if (op.hasChanges()) {
+                garbageDocsCount++;
+                monitor.info("Collected [{}] garbage for doc [{}]", op.getChanges().size(), doc.getId());
+                updateOpList.add(op);
+            }
         }
 
-        /** TODO remove, this is just a skeleton sample */
-//        private void deleteSample(NodeDocument doc, GCPhases phases) {
-//            if (doc.getId().contains("should_delete")) {
-//                if (phases.start(GCPhase.DELETING)) {
-//                    monitor.info("deleteSample: should do the deletion now, but this is demo only. I'm still learning");
-//                    System.out.println("do the actual deletion");
-//                    count++;
-//                    phases.stop(GCPhase.DELETING);
-//                }
-//            }
-//        }
+        private boolean hasGarbage() {
+            return garbageDocsCount > 0;
+        }
 
-        private void deleteUnmergedBranchCommitDocument(NodeDocument doc, GCPhases phases, UpdateOp updateOp) {
-            // TODO Auto-generated method stub
+        private void collectUnmergedBranchCommitDocument(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) {
+            if (phases.start(GCPhase.COLLECT_UNMERGED_BC)){
+                // TODO add umerged BC collection logic
+                phases.stop(GCPhase.COLLECT_UNMERGED_BC);
+            }
 
         }
 
-        private void deleteDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) {
+        private void collectDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) {
 
             // get Map of all properties along with their values
-            final Map<String, SortedMap<Revision, String>> properties = doc.getProperties();
-
-            // find all the properties which can be removed from document
-            // All the properties whose value is null in their respective
-            // latest revision are eligible to be garbage collected.
-            properties.forEach((propName, revisionStringSortedMap) -> {
-                if (revisionStringSortedMap.keySet()
-                        .stream()
-                        .sorted(REVERSE)
-                        .limit(1)
-                        .anyMatch(revision -> revisionStringSortedMap.get(revision) == null)) {
-                    // set this property for removal
-                    updateOp.remove(propName);
+            if (phases.start(GCPhase.COLLECT_PROPS)) {
+                final Set<String> properties = doc.getPropertyNames();
+
+                // find all the properties which can be removed from document.
+                // All the properties whose value is null in head revision are
+                // eligible to be garbage collected.
+
+                final Set<String> retainPropSet = ofNullable(doc.getNodeAtRevision(nodeStore, headRevision, null))
+                        .map(DocumentNodeState::getPropertyNames)
+                        .orElse(emptySet());
+                final int deletedPropsGCCount = properties.stream()
+                        .filter(p -> !retainPropSet.contains(p))
+                        .mapToInt(x -> {
+                            updateOp.remove(x);
+                            return 1;})
+                        .sum();
+
+
+                phases.stats.deletedPropsGCCount += deletedPropsGCCount;
+                if (log.isDebugEnabled()) {
+                    log.debug("Collected {} deleted properties for document {}", deletedPropsGCCount, doc.getId());
                 }
-            });
+                phases.stop(GCPhase.COLLECT_PROPS);
+            }
         }
 
-        private void deleteOldRevisions(NodeDocument doc, GCPhases phases, UpdateOp updateOp) {
-            // TODO Auto-generated method stub
+        private void collectOldRevisions(NodeDocument doc, GCPhases phases, UpdateOp updateOp) {
+
+            if (phases.start(GCPhase.COLLECT_OLD_REVS)){
+                // TODO add old rev collection logic
+                phases.stop(GCPhase.COLLECT_OLD_REVS);
+            }
 
         }
 
-        long getNumDocuments() {
-            return count;
+        int getGarbageDocsCount() {
+            return garbageDocsCount;
         }
 
         @Override
         public void close() throws IOException {
 
         }
+
+        public void removeGarbage(final VersionGCStats stats) {
+
+            if (updateOpList.isEmpty()) {
+                if (log.isDebugEnabled()) {
+                    log.debug("Skipping removal of detailed garbage, cause no garbage detected");
+                }
+                return;
+            }
+
+            int updatedDocs;
+
+            monitor.info("Proceeding to update [{}] documents", updateOpList.size());
+
+            if (log.isDebugEnabled()) {
+                String collect = updateOpList.stream().map(UpdateOp::getId).collect(Collectors.joining(","));
+                log.trace("Performing batch update of documents with following id's. \n" + collect);
+            }
+
+            if (cancel.get()) {
+                log.info("Aborting the removal of detailed garbage since RGC had been cancelled");
+                return;
+            }
+
+            timer.reset().start();
+            try {
+                // TODO create an api to bulk update findAndUpdate Ops
+                updatedDocs = (int) updateOpList.stream().map(op -> ds.findAndUpdate(NODES, op)).filter(Objects::nonNull).count();
+                stats.updatedDetailedGCDocsCount += updatedDocs;
+                log.info("Updated [{}] documents", updatedDocs);
+                // now reset delete metadata
+                updateOpList.clear();
+                garbageDocsCount = 0;
+            } finally {
+                delayOnModifications(timer.stop().elapsed(MILLISECONDS), cancel);
+            }
+        }
+    }
+    private void delayOnModifications(final long durationMs, final AtomicBoolean cancel) {
+        long delayMs = round(durationMs * options.delayFactor);
+        if (!cancel.get() && delayMs > 0) {
+            try {
+                Clock clock = nodeStore.getClock();
+                clock.waitUntil(clock.getTime() + delayMs);
+            }
+            catch (InterruptedException ex) {
+                /* ignore */
+            }
+        }
     }
 
     /**
@@ -959,19 +1033,6 @@ public class VersionGarbageCollector {
 
         //------------------------------< internal >----------------------------
 
-        private void delayOnModifications(long durationMs) {
-            long delayMs = Math.round(durationMs * options.delayFactor);
-            if (!cancel.get() && delayMs > 0) {
-                try {
-                    Clock clock = nodeStore.getClock();
-                    clock.waitUntil(clock.getTime() + delayMs);
-                }
-                catch (InterruptedException ex) {
-                    /* ignore */
-                }
-            }
-        }
-
         private Iterator<String> previousDocIdsFor(NodeDocument doc) {
             Map<Revision, Range> prevRanges = doc.getPreviousRanges(true);
             if (prevRanges.isEmpty()) {
@@ -1127,7 +1188,7 @@ public class VersionGarbageCollector {
                         monitor.info(msg);
                     }
                 } finally {
-                    delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS));
+                    delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS), cancel);
                 }
             }
             return deletedCount;
@@ -1160,7 +1221,7 @@ public class VersionGarbageCollector {
                 }
             }
             finally {
-                delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS));
+                delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS), cancel);
             }
             return updateCount;
         }
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index e34d8f36b0..4d01e5d3da 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -19,6 +19,8 @@
 
 package org.apache.jackrabbit.oak.plugins.document.mongo;
 
+import static java.util.Optional.ofNullable;
+import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.concat;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
@@ -42,6 +44,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
+import java.util.function.Consumer;
 import java.util.regex.Pattern;
 
 import org.apache.jackrabbit.oak.plugins.document.Document;
@@ -111,10 +114,10 @@ public class MongoVersionGCSupport extends VersionGCSupport {
     @Override
     public CloseableIterable<NodeDocument> getPossiblyDeletedDocs(final long fromModified, final long toModified) {
         //_deletedOnce == true && _modified >= fromModified && _modified < toModified
-        Bson query = and(
+        Bson query = Filters.and(
                 Filters.eq(DELETED_ONCE, true),
-                gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
-                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified))
+                Filters.gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
+                Filters.lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified))
         );
         FindIterable<BasicDBObject> cursor = getNodeCollection()
                 .find(query).batchSize(batchSize);
@@ -139,9 +142,10 @@ public class MongoVersionGCSupport extends VersionGCSupport {
         // _modified >= fromModified && _modified < toModified
         final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
                 lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)));
+        final Bson sort = Filters.eq(MODIFIED_IN_SECS, 1);
         final FindIterable<BasicDBObject> cursor = getNodeCollection()
                 .find(query)
-                .sort(new org.bson.Document(MODIFIED_IN_SECS, 1))
+                .sort(sort)
                 .limit(limit);
         return CloseableIterable.wrap(transform(cursor, input -> store.convertFromDBObject(NODES, input)));
     }
@@ -219,6 +223,35 @@ public class MongoVersionGCSupport extends VersionGCSupport {
         return result.get(0);
     }
 
+    /**
+     * Retrieve the time of the oldest modified document.
+     *
+     * @param clock System Clock to measure time in accuracy of millis
+     * @return the timestamp of the oldest modified document.
+     */
+    @Override
+    public long getOldestModifiedTimestamp(final Clock clock) {
+        LOG.info("getOldestModifiedTimestamp() <- start");
+
+        final Bson sort = Filters.eq(MODIFIED_IN_SECS, 1);
+        final List<Long> result = new ArrayList<>(1);
+
+        getNodeCollection().find().sort(sort).limit(1).forEach(
+                (Consumer<BasicDBObject>) document ->
+                        ofNullable(store.convertFromDBObject(NODES, document))
+                                .ifPresent(doc -> {
+                    long modifiedMs = SECONDS.toMillis(ofNullable(doc.getModified()).orElse(0L));
+                    LOG.info("getOldestDeletedOnceTimestamp() -> {}", Utils.timestampToString(modifiedMs));
+                    result.add(modifiedMs);
+                }));
+
+        if (result.isEmpty()) {
+            LOG.info("getOldestModifiedTimestamp() -> none found, return current time");
+            result.add(clock.getTime());
+        }
+        return result.get(0);
+    }
+
     private List<Bson> createQueries(Set<SplitDocType> gcTypes,
                                  RevisionVector sweepRevs,
                                  long oldestRevTimeStamp) {
@@ -233,9 +266,9 @@ public class MongoVersionGCSupport extends VersionGCSupport {
         }
         // OAK-8351: this (last) query only contains SD_TYPE and SD_MAX_REV_TIME_IN_SECS
         // so mongodb should really use that _sdType_1__sdMaxRevTime_1 index
-        result.add(and(
+        result.add(Filters.and(
                 Filters.or(orClauses),
-                lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp))
+                Filters.lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp))
                 ));
 
         return result;
@@ -266,16 +299,16 @@ public class MongoVersionGCSupport extends VersionGCSupport {
             Bson idPathClause = Filters.or(
                     Filters.regex(ID, Pattern.compile(".*" + idSuffix)),
                     // previous documents with long paths do not have a '-' in the id
-                    and(
+                    Filters.and(
                             Filters.regex(ID, Pattern.compile("[^-]*")),
                             Filters.regex(PATH, Pattern.compile(".*" + idSuffix))
                     )
             );
 
             long minMaxRevTimeInSecs = Math.min(maxRevTimeInSecs, getModifiedInSecs(r.getTimestamp()));
-            result.add(and(
+            result.add(Filters.and(
                     Filters.eq(SD_TYPE, DEFAULT_NO_BRANCH.typeCode()),
-                    lt(SD_MAX_REV_TIME_IN_SECS, minMaxRevTimeInSecs),
+                    Filters.lt(SD_MAX_REV_TIME_IN_SECS, minMaxRevTimeInSecs),
                     idPathClause
                     ));
         }
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
index 5caa65d875..26fc1311fa 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
@@ -715,6 +715,7 @@ public class RDBDocumentStoreJDBC {
         }
 
         if (sortBy != null) {
+            // FIXME : order should be determined via sortBy field
             query.append(" order by ID");
         }
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index a463499793..f26268bcd3 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -16,7 +16,13 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
+import static java.util.Collections.emptyList;
+import static java.util.List.of;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
+import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN;
 
 import java.io.Closeable;
 import java.io.IOException;
@@ -85,6 +91,29 @@ public class RDBVersionGCSupport extends VersionGCSupport {
         }
     }
 
+    /**
+     * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
+     * within the given range .The two passed modified timestamps are in milliseconds
+     * since the epoch and the implementation will convert them to seconds at
+     * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
+     * then perform the comparison.
+     *
+     * @param fromModified the lower bound modified timestamp (inclusive)
+     * @param toModified   the upper bound modified timestamp (exclusive)
+     * @param limit        the limit of documents to return
+     * @return matching documents.
+     */
+    @Override
+    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) {
+        List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
+                new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)));
+        if (MODE == 1) {
+            return getIterator(EMPTY_KEY_PATTERN, conditions);
+        } else {
+            return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, conditions, limit, MODIFIED_IN_SECS);
+        }
+    }
+
     @Override
     protected Iterable<NodeDocument> identifyGarbage(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs,
             final long oldestRevTimeStamp) {
@@ -239,6 +268,33 @@ public class RDBVersionGCSupport extends VersionGCSupport {
         }
     }
 
+    /**
+     * Retrieve the time of the oldest modified document.
+     *
+     * @param clock System Clock
+     * @return the timestamp of the oldest modified document.
+     */
+    @Override
+    public long getOldestModifiedTimestamp(Clock clock) {
+        long modifiedMs = Long.MIN_VALUE;
+
+        LOG.info("getOldestModifiedTimestamp() <- start");
+        try {
+            long modifiedSec = store.getMinValue(NODES, MODIFIED_IN_SECS, null, null, EMPTY_KEY_PATTERN, emptyList());
+            modifiedMs = TimeUnit.SECONDS.toMillis(modifiedSec);
+        } catch (DocumentStoreException ex) {
+            LOG.error("getOldestModifiedTimestamp()", ex);
+        }
+
+        if (modifiedMs > 0) {
+            LOG.info("getOldestModifiedTimestamp() -> {}", Utils.timestampToString(modifiedMs));
+            return modifiedMs;
+        } else {
+            LOG.info("getOldestModifiedTimestamp() -> none found, return current time");
+            return clock.getTime();
+        }
+    }
+
     @Override
     public long getDeletedOnceCount() {
         return store.queryCount(Collection.NODES, null, null, RDBDocumentStore.EMPTY_KEY_PATTERN,
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java
index 8a585c7dc0..d52c5e33ae 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java
@@ -18,25 +18,19 @@
  */
 package org.apache.jackrabbit.oak.plugins.document;
 
-public class DetailGCHelper {
+import static org.apache.commons.lang3.reflect.FieldUtils.writeField;
 
-    public static void setLongSetting(String propName, long val, DocumentNodeStore ns) {
-        UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true);
-        updateOp.set(propName, val);
-        ns.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
-    }
+public class DetailGCHelper {
 
-    public static void enableDetailGC(DocumentNodeStore ns) {
-        VersionGarbageCollector.DETAIL_GC_ENABLED = true;
-        if (ns != null) {
-            setLongSetting(VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, 0, ns);
+    public static void enableDetailGC(final VersionGarbageCollector vgc) throws IllegalAccessException {
+        if (vgc != null) {
+            writeField(vgc, "detailedGCEnabled", true, true);
         }
     }
 
-    public static void disableDetailGC(DocumentNodeStore ns) {
-        VersionGarbageCollector.DETAIL_GC_ENABLED = false;
-        if (ns != null) {
-            setLongSetting(VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, -1, ns);
+    public static void disableDetailGC(final VersionGarbageCollector vgc) throws IllegalAccessException {
+        if (vgc != null) {
+            writeField(vgc, "detailedGCEnabled", false, true);
         }
     }
 }
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java
index b99897dfb0..2adcc9f86a 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java
@@ -449,6 +449,38 @@ public class NodeDocumentTest {
         assertEquals(140, uncommittedRevisions);
     }
 
+    @Test
+    public void getPropertyNames() throws CommitFailedException {
+        DocumentStore store = new MemoryDocumentStore();
+        DocumentNodeStore ns = new DocumentMK.Builder().setDocumentStore(store).setAsyncDelay(0).getNodeStore();
+
+        // add properties
+        for (int i = 0; i < 10; i++) {
+            NodeBuilder nb = ns.getRoot().builder();
+            nb.child("x").setProperty("p"+i, i);
+            merge(ns, nb);
+        }
+
+        final NodeDocument nodeDocument = store.find(NODES, "1:/x");
+        assert nodeDocument != null;
+        assertEquals(10, nodeDocument.getPropertyNames().size());
+    }
+
+    @Test
+    public void getNoPropertyNames() throws CommitFailedException {
+        DocumentStore store = new MemoryDocumentStore();
+        DocumentNodeStore ns = new DocumentMK.Builder().setDocumentStore(store).setAsyncDelay(0).getNodeStore();
+
+        // add no property
+        NodeBuilder nb = ns.getRoot().builder();
+        nb.child("x");
+        merge(ns, nb);
+
+        final NodeDocument nodeDocument = store.find(NODES, "1:/x");
+        assert nodeDocument != null;
+        assertEquals(0, nodeDocument.getPropertyNames().size());
+    }
+
     @Test
     public void getNewestRevisionTooExpensive() throws Exception {
         final int NUM_CHANGES = 200;
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
index 055188be46..ed39a372b2 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
@@ -22,6 +22,9 @@ import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 
+import static org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.enableDetailGC;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 
@@ -49,6 +52,20 @@ public class VersionGCInitTest {
 
         vgc = store.find(Collection.SETTINGS, "versionGC");
         assertNotNull(vgc);
+        assertEquals(0L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
     }
 
+    @Test
+    public void lazyInitializeWithDetailedGC() throws Exception {
+        DocumentStore store = ns.getDocumentStore();
+        Document vgc = store.find(Collection.SETTINGS, "versionGC");
+        assertNull(vgc);
+
+        enableDetailGC(ns.getVersionGarbageCollector());
+        ns.getVersionGarbageCollector().gc(1, TimeUnit.DAYS);
+
+        vgc = store.find(Collection.SETTINGS, "versionGC");
+        assertNotNull(vgc);
+        assertEquals(-1L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+    }
 }
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCStatsTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCStatsTest.java
index 13448e5403..1515ea9312 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCStatsTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCStatsTest.java
@@ -67,6 +67,11 @@ public class VersionGCStatsTest {
         assertEquals(stats.collectAndDeleteSplitDocs.elapsed(MICROSECONDS), cumulative.collectAndDeleteSplitDocsElapsed);
         assertEquals(stats.sortDocIds.elapsed(MICROSECONDS), cumulative.sortDocIdsElapsed);
         assertEquals(stats.updateResurrectedDocuments.elapsed(MICROSECONDS), cumulative.updateResurrectedDocumentsElapsed);
+        assertEquals(stats.detailedGCDocs.elapsed(MICROSECONDS), cumulative.detailedGCDocsElapsed);
+        assertEquals(stats.deleteDetailedGCDocs.elapsed(MICROSECONDS), cumulative.deleteDetailedGCDocsElapsed);
+        assertEquals(stats.collectDeletedProps.elapsed(MICROSECONDS), cumulative.collectDeletedPropsElapsed);
+        assertEquals(stats.collectDeletedOldRevs.elapsed(MICROSECONDS), cumulative.collectDeletedOldRevsElapsed);
+        assertEquals(stats.collectUnmergedBC.elapsed(MICROSECONDS), cumulative.collectUnmergedBCElapsed);
     }
 
     @Test
@@ -83,6 +88,11 @@ public class VersionGCStatsTest {
         assertEquals(stats.collectAndDeleteSplitDocs.elapsed(MICROSECONDS) * 2, cumulative.collectAndDeleteSplitDocsElapsed);
         assertEquals(stats.sortDocIds.elapsed(MICROSECONDS) * 2, cumulative.sortDocIdsElapsed);
         assertEquals(stats.updateResurrectedDocuments.elapsed(MICROSECONDS) * 2, cumulative.updateResurrectedDocumentsElapsed);
+        assertEquals(stats.detailedGCDocs.elapsed(MICROSECONDS) * 2, cumulative.detailedGCDocsElapsed);
+        assertEquals(stats.deleteDetailedGCDocs.elapsed(MICROSECONDS) * 2, cumulative.deleteDetailedGCDocsElapsed);
+        assertEquals(stats.collectDeletedProps.elapsed(MICROSECONDS) * 2, cumulative.collectDeletedPropsElapsed);
+        assertEquals(stats.collectDeletedOldRevs.elapsed(MICROSECONDS) * 2, cumulative.collectDeletedOldRevsElapsed);
+        assertEquals(stats.collectUnmergedBC.elapsed(MICROSECONDS) * 2, cumulative.collectUnmergedBCElapsed);
     }
 
     private void forEachStopwatch(VersionGCStats stats, Callable c) {
@@ -93,6 +103,11 @@ public class VersionGCStatsTest {
         c.call(stats.collectAndDeleteSplitDocs);
         c.call(stats.sortDocIds);
         c.call(stats.updateResurrectedDocuments);
+        c.call(stats.detailedGCDocs);
+        c.call(stats.deleteDetailedGCDocs);
+        c.call(stats.collectDeletedProps);
+        c.call(stats.collectDeletedOldRevs);
+        c.call(stats.collectUnmergedBC);
     }
     
     private interface Callable {
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
index 48f3f362ce..f29716ca5d 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
@@ -44,7 +44,6 @@ import org.jetbrains.annotations.NotNull;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 
@@ -96,7 +95,7 @@ public class VersionGCTest {
 
     @After
     public void tearDown() throws Exception {
-        DetailGCHelper.disableDetailGC(ns);
+        DetailGCHelper.disableDetailGC(gc);
         execService.shutdown();
         execService.awaitTermination(1, MINUTES);
     }
@@ -343,21 +342,19 @@ public class VersionGCTest {
 
     // OAK-10199
     @Test
-    @Ignore
     public void testDetailGcDocumentRead_disabled() throws Exception {
-        DetailGCHelper.disableDetailGC(ns);
+        DetailGCHelper.disableDetailGC(gc);
         VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
         assertNotNull(stats);
-        assertEquals(0, stats.detailedGcDocsElapsed);
+        assertEquals(0, stats.detailedGCDocsElapsed);
     }
 
     @Test
-    @Ignore
     public void testDetailGcDocumentRead_enabled() throws Exception {
-        DetailGCHelper.enableDetailGC(ns);
+        DetailGCHelper.enableDetailGC(gc);
         VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
         assertNotNull(stats);
-        assertNotEquals(0, stats.detailedGcDocsElapsed);
+        assertNotEquals(0, stats.detailedGCDocsElapsed);
     }
 
     // OAK-10199
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index d33cc8c7de..4470a07f96 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -34,10 +34,12 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 
+import static org.apache.commons.lang3.reflect.FieldUtils.writeField;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.size;
 import static java.util.concurrent.TimeUnit.HOURS;
 import static java.util.concurrent.TimeUnit.MINUTES;
+import static org.apache.jackrabbit.oak.api.Type.STRING;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NUM_REVS_THRESHOLD;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PREV_SPLIT_FACTOR;
@@ -225,6 +227,68 @@ public class VersionGarbageCollectorIT {
     public void gcLongPathSplitDocs() throws Exception {
         gcSplitDocsInternal(Strings.repeat("sub", 120));
     }
+
+    @Test
+    public void testGCDeletedProps() throws Exception{
+        //1. Create nodes
+        NodeBuilder b1 = store.getRoot().builder();
+
+        // Add property to node & save
+        b1.child("x").setProperty("test", "t", STRING);
+        b1.child("z").setProperty("prop", "foo", STRING);
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // update the property
+        b1 = store.getRoot().builder();
+        b1.getChildNode("z").setProperty("prop", "bar", STRING);
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // update property again
+        b1 = store.getRoot().builder();
+        b1.getChildNode("z").setProperty("prop", "baz", STRING);
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(10);
+        //1. Go past GC age and check no GC done as nothing deleted
+        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        VersionGCStats stats = gc.gc(maxAge, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+        //Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        b2.getChildNode("z").removeProperty("prop");
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        store.runBackgroundOperations();
+
+        //2. Check that a deleted property is not collected before maxAge
+        //Clock cannot move back (it moved forward in #1) so double the maxAge
+        clock.waitUntil(clock.getTime() + delta);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+        //3. Check that deleted property does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(1, stats.deletedPropsGCCount);
+
+        //4. Check that a revived property (deleted and created again) does not get gc
+        NodeBuilder b3 = store.getRoot().builder();
+        b3.child("x").removeProperty("test");
+        store.merge(b3, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        NodeBuilder b4 = store.getRoot().builder();
+        b4.child("x").setProperty("test", "t", STRING);
+        store.merge(b4, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+    }
     
     private void gcSplitDocsInternal(String subNodeName) throws Exception {
         long maxAge = 1; //hrs


[jackrabbit-oak] 09/28: OAK-10199 : minor refactoring

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 3ae56d3c7a1725ee4e84e042eca4be3b95751c3d
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Mon Jun 19 13:49:48 2023 +0530

    OAK-10199 : minor refactoring
---
 .../org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index abdfdf4a64..e58ec05903 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -55,7 +55,7 @@ public class VersionGCSupport {
 
     /**
      * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
-     * within the given range and the {@link NodeDocument#  DELETED} set to
+     * within the given range and the {@link NodeDocument#DELETED} set to
      * {@code true}. The two passed modified timestamps are in milliseconds
      * since the epoch and the implementation will convert them to seconds at
      * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and


[jackrabbit-oak] 07/28: OAK-10199 : fixed code smells as suggested by Sonar

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 9930d10e7d145205dfade89b8e0995bf9b76e13e
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Wed May 31 14:33:10 2023 +0530

    OAK-10199 : fixed code smells as suggested by Sonar
---
 .../oak/plugins/document/VersionGCRecommendations.java    | 13 +++++++------
 .../oak/plugins/document/VersionGarbageCollector.java     | 15 +++++++--------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index f04b56fc52..4584d925c0 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -22,7 +22,6 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
-import com.google.common.collect.ImmutableMap;
 import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval;
 import org.apache.jackrabbit.oak.plugins.document.util.Utils;
@@ -31,6 +30,8 @@ import org.apache.jackrabbit.oak.stats.Clock;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static java.lang.Long.MAX_VALUE;
+import static java.util.Map.of;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP;
 
@@ -120,8 +121,8 @@ public class VersionGCRecommendations {
             oldestPossibleFullGC = detailedGCTimestamp - 1;
         }
 
-        TimeInterval scopeFullGC = new TimeInterval(oldestPossibleFullGC, Long.MAX_VALUE);
-        scopeFullGC = scopeFullGC.notLaterThan(keep.fromMs);
+        TimeInterval fullGCTimeInternal = new TimeInterval(oldestPossibleFullGC, MAX_VALUE);
+        fullGCTimeInternal = fullGCTimeInternal.notLaterThan(keep.fromMs);
 
         suggestedIntervalMs = settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP);
         if (suggestedIntervalMs > 0) {
@@ -181,7 +182,7 @@ public class VersionGCRecommendations {
         this.precisionMs = options.precisionMs;
         this.ignoreDueToCheckPoint = ignoreDueToCheckPoint;
         this.scope = scope;
-        this.scopeFullGC = scopeFullGC;
+        this.scopeFullGC = fullGCTimeInternal;
         this.scopeIsComplete = scope.toMs >= keep.fromMs;
         this.maxCollect = collectLimit;
         this.suggestedIntervalMs = suggestedIntervalMs;
@@ -205,7 +206,7 @@ public class VersionGCRecommendations {
             stats.needRepeat = true;
         } else if (!stats.canceled && !stats.ignoredGCDueToCheckPoint) {
             // success, we would not expect to encounter revisions older than this in the future
-            setLongSetting(ImmutableMap.of(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs,
+            setLongSetting(of(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs,
                     SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, stats.oldestModifiedGced));
 
             int count = stats.deletedDocGCCount - stats.deletedLeafDocGCCount;
@@ -254,7 +255,7 @@ public class VersionGCRecommendations {
     }
 
     private void setLongSetting(String propName, long val) {
-        setLongSetting(Map.of(propName, val));
+        setLongSetting(of(propName, val));
     }
 
     private void setLongSetting(final Map<String, Long> propValMap) {
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 3f9cb23f9a..b562831e24 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -62,6 +62,7 @@ import static java.util.Collections.emptySet;
 import static java.util.Objects.requireNonNull;
 import static java.util.Optional.ofNullable;
 import static java.util.concurrent.TimeUnit.MILLISECONDS;
+import static java.util.stream.Collectors.joining;
 import static org.apache.jackrabbit.guava.common.base.StandardSystemProperty.LINE_SEPARATOR;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.all;
 import static org.apache.jackrabbit.guava.common.collect.Iterators.partition;
@@ -604,7 +605,7 @@ public class VersionGarbageCollector {
          * @param headRevision the current head revision of node store
          */
         private void collectDetailedGarbage(final GCPhases phases, final RevisionVector headRevision, final VersionGCRecommendations rec)
-                throws IOException, LimitExceededException {
+                throws IOException {
             int docsTraversed = 0;
             boolean foundDoc = true;
             long oldestModifiedGCed = rec.scopeFullGC.fromMs;
@@ -647,11 +648,9 @@ public class VersionGarbageCollector {
                                     oldestModifiedGCed = modified;
                                 }
 
-                                if (gc.hasGarbage()) {
-                                    if (phases.start(GCPhase.DETAILED_GC_CLEANUP)) {
-                                        gc.removeGarbage(phases.stats);
-                                        phases.stop(GCPhase.DETAILED_GC_CLEANUP);
-                                    }
+                                if (gc.hasGarbage() && phases.start(GCPhase.DETAILED_GC_CLEANUP)) {
+                                    gc.removeGarbage(phases.stats);
+                                    phases.stop(GCPhase.DETAILED_GC_CLEANUP);
                                 }
 
                                 oldestModifiedGCed = modified == null ? fromModified : modified;
@@ -861,8 +860,8 @@ public class VersionGarbageCollector {
             monitor.info("Proceeding to update [{}] documents", updateOpList.size());
 
             if (log.isDebugEnabled()) {
-                String collect = updateOpList.stream().map(UpdateOp::getId).collect(Collectors.joining(","));
-                log.trace("Performing batch update of documents with following id's. \n" + collect);
+                String collect = updateOpList.stream().map(UpdateOp::getId).collect(joining(","));
+                log.debug("Performing batch update of documents with following id's [{}]", collect);
             }
 
             if (cancel.get()) {


[jackrabbit-oak] 16/28: OAK-10199 : added test case to continue with GC even if there is crash

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 5131702d320aa0490eacd4ddcc727d9d81cce0b1
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Thu Jun 22 14:19:31 2023 +0530

    OAK-10199 : added test case to continue with GC even if there is crash
---
 .../document/VersionGarbageCollectorIT.java        | 27 +++++++++++++---------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index e0de0c0617..031176ca33 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -84,7 +84,6 @@ import org.apache.jackrabbit.oak.stats.Clock;
 import org.jetbrains.annotations.NotNull;
 import org.junit.After;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -92,7 +91,7 @@ import org.junit.runners.Parameterized;
 @RunWith(Parameterized.class)
 public class VersionGarbageCollectorIT {
 
-    private DocumentStoreFixture fixture;
+    private final DocumentStoreFixture fixture;
 
     private Clock clock;
 
@@ -145,6 +144,7 @@ public class VersionGarbageCollectorIT {
         if (store != null) {
             store.dispose();
         }
+        ClusterNodeInfo.resetClockToDefault();
         Revision.resetClockToDefault();
         execService.shutdown();
         execService.awaitTermination(1, MINUTES);
@@ -456,15 +456,19 @@ public class VersionGarbageCollectorIT {
 
     // Test when properties are not collected in one GC cycle
     @Test
-    @Ignore
     public void testGCDeletedProps_4() throws Exception {
-        documentMKBuilder = new DocumentMK.Builder().clock(clock)
-                .setLeaseCheckMode(LeaseCheckMode.DISABLED)
-                .setDocumentStore(new FailingDocumentStore(fixture.createDocumentStore(), 42)).setAsyncDelay(0);
-        store = documentMKBuilder.getNodeStore();
+        final FailingDocumentStore fds = new FailingDocumentStore(fixture.createDocumentStore(), 42) {
+            @Override
+            public void dispose() {}
+        };
+        store = new DocumentMK.Builder().clock(clock).setLeaseCheckMode(LeaseCheckMode.DISABLED)
+                .setDocumentStore(fds).setAsyncDelay(0).getNodeStore();
+
         assertTrue(store.getDocumentStore() instanceof FailingDocumentStore);
+
         MongoTestUtils.setReadPreference(store, ReadPreference.primary());
         gc = store.getVersionGarbageCollector();
+
         //1. Create nodes with properties
         NodeBuilder b1 = store.getRoot().builder();
         // Add property to node & save
@@ -489,25 +493,26 @@ public class VersionGarbageCollectorIT {
         //3. Check that deleted property does get collected again
         // increment the clock again by more than 2 hours + delta
         clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
-        gc.setOptions(gc.getOptions().withMaxIterations(1));
 
-        ((FailingDocumentStore) store.getDocumentStore()).fail().after(0).eternally();
+        fds.fail().after(0).eternally();
         try {
             store.dispose();
             fail("dispose() must fail with an exception");
         } catch (DocumentStoreException e) {
             // expected
         }
-        ((FailingDocumentStore) store.getDocumentStore()).fail().never();
+        fds.fail().never();
 
         // create new store
         store = new DocumentMK.Builder().clock(clock).setLeaseCheckMode(LeaseCheckMode.DISABLED)
-                .setDocumentStore(new FailingDocumentStore(fixture.createDocumentStore(1), 42)).setAsyncDelay(0)
+                .setDocumentStore(fds).setAsyncDelay(0)
                 .getNodeStore();
         assertTrue(store.getDocumentStore() instanceof FailingDocumentStore);
         MongoTestUtils.setReadPreference(store, ReadPreference.primary());
         gc = store.getVersionGarbageCollector();
         store.runBackgroundOperations();
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
 
         //4. Check that deleted property does get collected again
         // increment the clock again by more than 2 hours + delta


[jackrabbit-oak] 20/28: OAK-10199 : fixed query to avoid skipping documents with greater _modified timestamp

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit a397f06cc2ce5a33a523514b5ebf648194e8521f
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Wed Jul 12 21:05:02 2023 +0530

    OAK-10199 : fixed query to avoid skipping documents with greater _modified timestamp
---
 .../plugins/document/VersionGCRecommendations.java | 85 +++++++++++++------
 .../oak/plugins/document/VersionGCSupport.java     | 39 ++++++---
 .../plugins/document/VersionGarbageCollector.java  | 65 +++++++++------
 .../document/mongo/MongoVersionGCSupport.java      | 38 ++++-----
 .../plugins/document/rdb/RDBVersionGCSupport.java  | 57 ++++++++++---
 .../oak/plugins/document/VersionGCInitTest.java    | 13 +--
 .../oak/plugins/document/VersionGCSupportTest.java |  8 +-
 .../oak/plugins/document/VersionGCTest.java        |  8 +-
 .../document/VersionGarbageCollectorIT.java        | 96 ++++++++++++++++++++--
 9 files changed, 298 insertions(+), 111 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index 0fd0766f5b..2092844299 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -21,19 +21,21 @@ package org.apache.jackrabbit.oak.plugins.document;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval;
 import org.apache.jackrabbit.oak.spi.gc.GCMonitor;
 import org.apache.jackrabbit.oak.stats.Clock;
+import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import static java.lang.Long.MAX_VALUE;
 import static java.util.Map.of;
+import static java.util.Optional.ofNullable;
 import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID;
@@ -52,6 +54,7 @@ public class VersionGCRecommendations {
     private final GCMonitor gcmon;
 
     final boolean ignoreDueToCheckPoint;
+    final boolean ignoreDetailGCDueToCheckPoint;
     final TimeInterval scope;
     final TimeInterval scopeDetailedGC;
     final long maxCollect;
@@ -63,6 +66,7 @@ public class VersionGCRecommendations {
     private final long precisionMs;
     final long suggestedIntervalMs;
     private final boolean scopeIsComplete;
+    private final boolean detailedGCEnabled;
 
     /**
      * With the given maximum age of revisions to keep (earliest time in the past to collect),
@@ -85,20 +89,23 @@ public class VersionGCRecommendations {
      * @param vgc VersionGC support class
      * @param options options for running the gc
      * @param gcMonitor monitor class for messages
+     * @param detailedGCEnabled whether detailedGC is enabled or not
      */
     public VersionGCRecommendations(long maxRevisionAgeMs, Checkpoints checkpoints, Clock clock, VersionGCSupport vgc,
-            VersionGCOptions options, GCMonitor gcMonitor) {
+                                    VersionGCOptions options, GCMonitor gcMonitor, final boolean detailedGCEnabled) {
         boolean ignoreDueToCheckPoint = false;
+        boolean ignoreDetailGCDueToCheckPoint = false;
         long deletedOnceCount = 0;
         long suggestedIntervalMs;
         long oldestPossible;
-        long oldestModifiedDocTimeStamp;
+        final AtomicLong oldestModifiedDocTimeStamp = new AtomicLong();
         String oldestModifiedDocId;
         long collectLimit = options.collectLimit;
 
         this.vgc = vgc;
         this.gcmon = gcMonitor;
         this.originalCollectLimit = options.collectLimit;
+        this.detailedGCEnabled = detailedGCEnabled;
 
         TimeInterval keep = new TimeInterval(clock.getTime() - maxRevisionAgeMs, Long.MAX_VALUE);
 
@@ -120,20 +127,17 @@ public class VersionGCRecommendations {
         if (detailedGCTimestamp == 0) {
             // it will only happen for the very first time, we run this detailedGC
             log.info("No detailedGCTimestamp found, querying for the oldest modified candidate");
-            final NodeDocument doc = vgc.getOldestModifiedDoc(clock);
-            if (doc == NULL) {
-                oldestModifiedDocTimeStamp = 0L;
-            } else {
-                oldestModifiedDocTimeStamp = doc.getModified() == null ? 0L : SECONDS.toMillis(doc.getModified()) - 1L;
-            }
+            vgc.getOldestModifiedDoc(clock).ifPresentOrElse(
+                    d -> oldestModifiedDocTimeStamp.set(SECONDS.toMillis(ofNullable(d.getModified()).orElse(0L))),
+                    () -> oldestModifiedDocTimeStamp.set(0L));
             oldestModifiedDocId = MIN_ID_VALUE;
-            log.info("detailedGCTimestamp found: {}", timestampToString(oldestModifiedDocTimeStamp));
+            log.info("detailedGCTimestamp found: {}", timestampToString(oldestModifiedDocTimeStamp.get()));
         } else {
-            oldestModifiedDocTimeStamp = detailedGCTimestamp - 1L;
+            oldestModifiedDocTimeStamp.set(detailedGCTimestamp);
         }
 
-        TimeInterval detailedGCTimeInternal = new TimeInterval(oldestModifiedDocTimeStamp, MAX_VALUE);
-        detailedGCTimeInternal = detailedGCTimeInternal.notLaterThan(keep.fromMs);
+        TimeInterval scopeDetailedGC = new TimeInterval(oldestModifiedDocTimeStamp.get(), MAX_VALUE);
+        scopeDetailedGC = scopeDetailedGC.notLaterThan(keep.fromMs);
 
         suggestedIntervalMs = (long) settings.get(SETTINGS_COLLECTION_REC_INTERVAL_PROP);
         if (suggestedIntervalMs > 0) {
@@ -171,18 +175,14 @@ public class VersionGCRecommendations {
 
         //Check for any registered checkpoint which prevent the GC from running
         Revision checkpoint = checkpoints.getOldestRevisionToKeep();
-        if (checkpoint != null && scope.endsAfter(checkpoint.getTimestamp())) {
-            TimeInterval minimalScope = scope.startAndDuration(options.precisionMs);
-            if (minimalScope.endsAfter(checkpoint.getTimestamp())) {
-                log.warn("Ignoring RGC run because a valid checkpoint [{}] exists inside minimal scope {}.",
-                        checkpoint.toReadableString(), minimalScope);
-                ignoreDueToCheckPoint = true;
-            } else {
-                scope = scope.notLaterThan(checkpoint.getTimestamp() - 1);
-                detailedGCTimeInternal = detailedGCTimeInternal.notLaterThan(checkpoint.getTimestamp() - 1);
-                log.info("checkpoint at [{}] found, scope now {}, detailedGcScope now {}", timestampToString(checkpoint.getTimestamp()), scope, detailedGCTimeInternal);
-            }
-        }
+
+        final GCResult gcResult = getResult(options, ignoreDueToCheckPoint, scope, checkpoint);
+        scope = gcResult.gcScope;
+        ignoreDueToCheckPoint = gcResult.ignoreGC;
+
+        final GCResult detailGCResult = getResult(options, ignoreDetailGCDueToCheckPoint, scopeDetailedGC, checkpoint);
+        scopeDetailedGC = detailGCResult.gcScope;
+        ignoreDetailGCDueToCheckPoint = detailGCResult.ignoreGC;
 
         if (scope.getDurationMs() <= options.precisionMs) {
             // If we have narrowed the collect time interval down as much as we can, no
@@ -194,7 +194,8 @@ public class VersionGCRecommendations {
         this.precisionMs = options.precisionMs;
         this.ignoreDueToCheckPoint = ignoreDueToCheckPoint;
         this.scope = scope;
-        this.scopeDetailedGC = detailedGCTimeInternal;
+        this.ignoreDetailGCDueToCheckPoint = ignoreDetailGCDueToCheckPoint;
+        this.scopeDetailedGC = scopeDetailedGC;
         this.detailedGCId = oldestModifiedDocId;
         this.scopeIsComplete = scope.toMs >= keep.fromMs;
         this.maxCollect = collectLimit;
@@ -248,6 +249,13 @@ public class VersionGCRecommendations {
             }
             stats.needRepeat = !scopeIsComplete;
         }
+
+        // save data for detailed GC
+        if (detailedGCEnabled && !stats.canceled && !stats.ignoredDetailGCDueToCheckPoint) {
+            // success, we would not expect to encounter revisions older than this in the future
+            setLongSetting(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, stats.oldestModifiedDocTimeStamp);
+            setStringSetting(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP, stats.oldestModifiedDocId);
+        }
     }
 
     private Map<String, Object> getVGCSettings() {
@@ -287,4 +295,29 @@ public class VersionGCRecommendations {
         propValMap.forEach(updateOp::set);
         vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
     }
+
+    @NotNull
+    private static GCResult getResult(VersionGCOptions options, boolean ignoreGC, TimeInterval gcScope, Revision checkpoint) {
+        if (checkpoint != null && gcScope.endsAfter(checkpoint.getTimestamp())) {
+            TimeInterval minimalScope = gcScope.startAndDuration(options.precisionMs);
+            if (minimalScope.endsAfter(checkpoint.getTimestamp())) {
+                log.warn("Ignoring GC run because a valid checkpoint [{}] exists inside minimal scope {}.", checkpoint.toReadableString(), minimalScope);
+                ignoreGC = true;
+            } else {
+                gcScope = gcScope.notLaterThan(checkpoint.getTimestamp() - 1);
+                log.info("checkpoint at [{}] found, detailedGCScope now {}", timestampToString(checkpoint.getTimestamp()), gcScope);
+            }
+        }
+        return new GCResult(ignoreGC, gcScope);
+    }
+
+    private static class GCResult {
+        public final boolean ignoreGC;
+        public final TimeInterval gcScope;
+
+        public GCResult(boolean ignoreGC, TimeInterval gcScope) {
+            this.ignoreGC = ignoreGC;
+            this.gcScope = gcScope;
+        }
+    }
 }
\ No newline at end of file
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index 6086eef772..1e19eb6af7 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -20,17 +20,21 @@
 package org.apache.jackrabbit.oak.plugins.document;
 
 import static java.util.Comparator.comparing;
+import static java.util.Optional.empty;
+import static java.util.Optional.ofNullable;
+import static java.util.stream.Stream.concat;
+import static java.util.stream.StreamSupport.stream;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static java.util.stream.Collectors.toList;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getAllDocuments;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getSelectedDocuments;
 
+import java.util.Optional;
 import java.util.Set;
-import java.util.stream.StreamSupport;
+import java.util.stream.Stream;
 
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
 import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
@@ -66,8 +70,7 @@ public class VersionGCSupport {
      * @return matching documents.
      */
     public Iterable<NodeDocument> getPossiblyDeletedDocs(final long fromModified, final long toModified) {
-        return StreamSupport
-                .stream(getSelectedDocuments(store, NodeDocument.DELETED_ONCE, 1).spliterator(), false)
+        return stream(getSelectedDocuments(store, NodeDocument.DELETED_ONCE, 1).spliterator(), false)
                 .filter(input -> input.wasDeletedOnce() && modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified))
                 .collect(toList());
     }
@@ -90,9 +93,14 @@ public class VersionGCSupport {
      */
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
                                                   @NotNull final String fromId) {
-        return StreamSupport
-                .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, fromId).spliterator(), false)
-                .filter(input -> modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified))
+        // (_modified = fromModified && _id > fromId || _modified > fromModified && _modified < toModified)
+        final Stream<NodeDocument> s1 = stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, fromId).spliterator(), false)
+                .filter(input -> modifiedEqualsTo(input, fromModified));
+
+        final Stream<NodeDocument> s2 = stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1).spliterator(), false)
+                .filter(input -> modifiedGreaterThan(input, fromModified) && modifiedLessThan(input, toModified));
+
+        return concat(s1, s2)
                 .sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))
                 .limit(limit)
                 .collect(toList());
@@ -102,6 +110,17 @@ public class VersionGCSupport {
         Long modified = doc.getModified();
         return modified != null && modified.compareTo(getModifiedInSecs(time)) >= 0;
     }
+
+    private boolean modifiedGreaterThan(final NodeDocument doc, final long time) {
+        Long modified = doc.getModified();
+        return modified != null && modified.compareTo(getModifiedInSecs(time)) > 0;
+    }
+
+    private boolean modifiedEqualsTo(final NodeDocument doc, final long time) {
+        Long modified = doc.getModified();
+        return modified != null && modified.compareTo(getModifiedInSecs(time)) == 0;
+    }
+
     private boolean modifiedLessThan(final NodeDocument doc, final long time) {
         Long modified = doc.getModified();
         return modified != null && modified.compareTo(getModifiedInSecs(time)) < 0;
@@ -189,7 +208,7 @@ public class VersionGCSupport {
      *
      * @return the oldest modified document.
      */
-    public NodeDocument getOldestModifiedDoc(final Clock clock) {
+    public Optional<NodeDocument> getOldestModifiedDoc(final Clock clock) {
         long ts = 0;
         long now = clock.getTime();
         Iterable<NodeDocument> docs = null;
@@ -198,13 +217,13 @@ public class VersionGCSupport {
         try {
             docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE);
             if (docs.iterator().hasNext()) {
-                return docs.iterator().next();
+                return ofNullable(docs.iterator().next());
             }
         } finally {
             Utils.closeIfCloseable(docs);
         }
         LOG.info("find oldest modified document to be {}", Utils.timestampToString(ts));
-        return NULL;
+        return empty();
     }
 
     public long getDeletedOnceCount() throws UnsupportedOperationException {
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 1e5b1129b9..766c8e4cc5 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -77,8 +77,6 @@ import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_I
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.setDeleted;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.setModified;
 import static org.slf4j.helpers.MessageFormatter.arrayFormat;
 
 public class VersionGarbageCollector {
@@ -233,7 +231,7 @@ public class VersionGarbageCollector {
         long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge);
         long now = nodeStore.getClock().getTime();
         VersionGCRecommendations rec = new VersionGCRecommendations(maxRevisionAgeInMillis, nodeStore.getCheckpoints(),
-                nodeStore.getClock(), versionStore, options, gcMonitor);
+                nodeStore.getClock(), versionStore, options, gcMonitor, detailedGCEnabled);
         int estimatedIterations = -1;
         if (rec.suggestedIntervalMs > 0) {
             estimatedIterations = (int)Math.ceil(
@@ -272,6 +270,7 @@ public class VersionGarbageCollector {
 
     public static class VersionGCStats {
         boolean ignoredGCDueToCheckPoint;
+        boolean ignoredDetailGCDueToCheckPoint;
         boolean canceled;
         boolean success = true;
         boolean limitExceeded;
@@ -349,6 +348,7 @@ public class VersionGarbageCollector {
 
             return "VersionGCStats{" +
                     "ignoredGCDueToCheckPoint=" + ignoredGCDueToCheckPoint +
+                    "ignoredDetailGCDueToCheckPoint=" + ignoredDetailGCDueToCheckPoint +
                     ", canceled=" + canceled +
                     ", deletedDocGCCount=" + deletedDocGCCount + " (of which leaf: " + deletedLeafDocGCCount + ")" +
                     ", updateResurrectedGCCount=" + updateResurrectedGCCount +
@@ -366,6 +366,7 @@ public class VersionGarbageCollector {
         void addRun(VersionGCStats run) {
             ++iterationCount;
             this.ignoredGCDueToCheckPoint = run.ignoredGCDueToCheckPoint;
+            this.ignoredDetailGCDueToCheckPoint = run.ignoredDetailGCDueToCheckPoint;
             this.canceled = run.canceled;
             this.success = run.success;
             this.limitExceeded = run.limitExceeded;
@@ -566,13 +567,12 @@ public class VersionGarbageCollector {
             VersionGCStats stats = new VersionGCStats();
             stats.active.start();
             VersionGCRecommendations rec = new VersionGCRecommendations(maxRevisionAgeInMillis, nodeStore.getCheckpoints(),
-                    nodeStore.getClock(), versionStore, options, gcMonitor);
+                    nodeStore.getClock(), versionStore, options, gcMonitor, detailedGCEnabled);
             GCPhases phases = new GCPhases(cancel, stats, gcMonitor);
             try {
                 if (rec.ignoreDueToCheckPoint) {
                     phases.stats.ignoredGCDueToCheckPoint = true;
                     monitor.skipped("Checkpoint prevented revision garbage collection");
-                    cancel.set(true);
                 } else {
                     final RevisionVector headRevision = nodeStore.getHeadRevision();
                     final RevisionVector sweepRevisions = nodeStore.getSweepRevisions();
@@ -580,11 +580,26 @@ public class VersionGarbageCollector {
 
                     collectDeletedDocuments(phases, headRevision, rec);
                     collectSplitDocuments(phases, sweepRevisions, rec);
-                    if (detailedGCEnabled) {
-                        // run only if detailed GC enabled
+                }
+
+                // now run detailed GC if enabled
+                if (detailedGCEnabled) {
+                    if (rec.ignoreDetailGCDueToCheckPoint) {
+                        phases.stats.ignoredDetailGCDueToCheckPoint = true;
+                        monitor.skipped("Checkpoint prevented detailed revision garbage collection");
+                    } else {
+                        final RevisionVector headRevision = nodeStore.getHeadRevision();
+                        monitor.info("Looking at revisions in {} for detailed GC", rec.scopeDetailedGC);
                         collectDetailedGarbage(phases, headRevision, rec);
                     }
                 }
+
+                if (detailedGCEnabled && rec.ignoreDueToCheckPoint && rec.ignoreDetailGCDueToCheckPoint) {
+                    cancel.set(true);
+                } else if (!detailedGCEnabled && rec.ignoreDueToCheckPoint) {
+                    cancel.set(true);
+                }
+
             } catch (LimitExceededException ex) {
                 stats.limitExceeded = true;
             } finally {
@@ -623,20 +638,19 @@ public class VersionGarbageCollector {
             int docsTraversed = 0;
             boolean foundDoc = true;
             final long oldestModifiedMs = rec.scopeDetailedGC.fromMs;
+            final long toModified = rec.scopeDetailedGC.toMs;
             long oldModifiedMs = oldestModifiedMs;
             final String oldestModifiedDocId = rec.detailedGCId;
             try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) {
                 long fromModified = oldestModifiedMs;
-                String fromId = oldestModifiedDocId;
-                NodeDocument lastDoc = null;
-                final long toModified = rec.scopeDetailedGC.toMs;
+                String fromId = ofNullable(oldestModifiedDocId).orElse(MIN_ID_VALUE);
+                NodeDocument lastDoc;
                 if (phases.start(GCPhase.DETAILED_GC)) {
-                    while (foundDoc && fromModified < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) {
+                    while (foundDoc && fromModified < toModified && docsTraversed < PROGRESS_BATCH_SIZE) {
                         // set foundDoc to false to allow exiting the while loop
                         foundDoc = false;
                         lastDoc = null;
                         Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModified, toModified, 1000, fromId);
-                        final Revision revision = nodeStore.newRevision();
                         try {
                             for (NodeDocument doc : itr) {
                                 foundDoc = true;
@@ -647,7 +661,7 @@ public class VersionGarbageCollector {
                                     break;
                                 }
                                 docsTraversed++;
-                                if (docsTraversed % PROGRESS_BATCH_SIZE == 0) {
+                                if (docsTraversed % 100 == 0) {
                                     monitor.info("Iterated through {} documents so far. {} had detail garbage",
                                             docsTraversed, gc.getGarbageDocsCount());
                                 }
@@ -655,7 +669,7 @@ public class VersionGarbageCollector {
                                 lastDoc = doc;
                                 // collect the data to delete in next step
                                 if (phases.start(GCPhase.COLLECTING)) {
-                                    gc.collectGarbage(doc, phases, revision);
+                                    gc.collectGarbage(doc, phases);
                                     phases.stop(GCPhase.COLLECTING);
                                 }
 
@@ -681,7 +695,7 @@ public class VersionGarbageCollector {
                         } finally {
                             Utils.closeIfCloseable(itr);
                             phases.stats.oldestModifiedDocTimeStamp = fromModified;
-                            if (fromModified > (oldModifiedMs + 1)) {
+                            if (fromModified > oldModifiedMs) {
                                 // we have moved ahead, now we can reset oldestModifiedId to min value
                                 fromId = MIN_ID_VALUE;
                                 phases.stats.oldestModifiedDocId = MIN_ID_VALUE;
@@ -690,10 +704,10 @@ public class VersionGarbageCollector {
                                 // save the last _id traversed to avoid re-fetching of ids
                                 phases.stats.oldestModifiedDocId = fromId;
                             }
-                            oldModifiedMs = fromModified - 1;
+                            oldModifiedMs = fromModified;
                         }
-
-                        // if we are already at last document of current timeStamp,
+                        // if we didn't find any document i.e. either we are already at last document
+                        // of current timeStamp or there is no document for this timeStamp
                         // we need to reset fromId & increment fromModified and check again
                         if (!foundDoc && !Objects.equals(fromId, MIN_ID_VALUE)) {
                             fromId = MIN_ID_VALUE;
@@ -703,6 +717,13 @@ public class VersionGarbageCollector {
                     }
                     phases.stop(GCPhase.DETAILED_GC);
                 }
+            } finally {
+                if (docsTraversed < PROGRESS_BATCH_SIZE) {
+                    // we have traversed all the docs within given time range and nothing is left
+                    // lets set oldModifiedDocTimeStamp to upper limit of this cycle
+                    phases.stats.oldestModifiedDocTimeStamp = toModified;
+                    phases.stats.oldestModifiedDocId = MIN_ID_VALUE;
+                }
             }
         }
 
@@ -815,14 +836,14 @@ public class VersionGarbageCollector {
             this.timer = Stopwatch.createUnstarted();
         }
 
-        public void collectGarbage(final NodeDocument doc, final GCPhases phases, final Revision revision) {
+        public void collectGarbage(final NodeDocument doc, final GCPhases phases) {
 
             monitor.info("Collecting Detailed Garbage for doc [{}]", doc.getId());
 
             final UpdateOp op = new UpdateOp(requireNonNull(doc.getId()), false);
             op.equals(MODIFIED_IN_SECS, doc.getModified());
 
-            collectDeletedProperties(doc, phases, op, revision);
+            collectDeletedProperties(doc, phases, op);
             collectUnmergedBranchCommitDocument(doc, phases, op);
             collectOldRevisions(doc, phases, op);
             // only add if there are changes for this doc
@@ -845,7 +866,7 @@ public class VersionGarbageCollector {
 
         }
 
-        private void collectDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp, final Revision revision) {
+        private void collectDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) {
 
             // get Map of all properties along with their values
             if (phases.start(GCPhase.COLLECT_PROPS)) {
@@ -864,8 +885,6 @@ public class VersionGarbageCollector {
                         .filter(p -> !retainPropSet.contains(p))
                         .mapToInt(x -> {
                             updateOp.remove(x);
-                            setModified(updateOp,revision);
-                            setDeleted(updateOp, revision, false);
                             return 1;})
                         .sum();
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index cf821fcf48..6637afa4aa 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -22,34 +22,36 @@ package org.apache.jackrabbit.oak.plugins.document.mongo;
 import static com.mongodb.client.model.Filters.eq;
 import static com.mongodb.client.model.Filters.exists;
 import static com.mongodb.client.model.Filters.gt;
+import static com.mongodb.client.model.Filters.or;
+import static java.util.Optional.empty;
 import static java.util.Optional.ofNullable;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.concat;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
 import static com.mongodb.client.model.Filters.and;
-import static com.mongodb.client.model.Filters.gte;
 import static com.mongodb.client.model.Filters.lt;
 import static java.util.Collections.emptyList;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PATH;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_MAX_REV_TIME_IN_SECS;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_TYPE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH;
 import static org.apache.jackrabbit.oak.plugins.document.mongo.MongoUtils.hasIndex;
+import static org.apache.jackrabbit.oak.plugins.document.util.CloseableIterable.wrap;
 
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
-import java.util.function.Consumer;
 import java.util.regex.Pattern;
 
+import com.mongodb.client.MongoCursor;
 import org.apache.jackrabbit.oak.plugins.document.Document;
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
@@ -148,9 +150,11 @@ public class MongoVersionGCSupport extends VersionGCSupport {
     @Override
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
                                                   @NotNull final String fromId) {
-        // _modified >= fromModified && _modified < toModified && _id > fromId
-        final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
-                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), gt(ID, fromId));
+        // (_modified = fromModified && _id > fromId || _modified > fromModified && _modified < toModified)
+        final Bson query = or(
+                and(eq(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), gt(ID, fromId)),
+                and(gt(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)), lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified))));
+
         // first sort by _modified and then by _id
         final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1));
 
@@ -158,7 +162,7 @@ public class MongoVersionGCSupport extends VersionGCSupport {
                 .find(query)
                 .sort(sort)
                 .limit(limit);
-        return CloseableIterable.wrap(transform(cursor, input -> store.convertFromDBObject(NODES, input)));
+        return wrap(transform(cursor, input -> store.convertFromDBObject(NODES, input)));
     }
 
     @Override
@@ -241,28 +245,22 @@ public class MongoVersionGCSupport extends VersionGCSupport {
      * @return the timestamp of the oldest modified document.
      */
     @Override
-    public NodeDocument getOldestModifiedDoc(final Clock clock) {
+    public Optional<NodeDocument> getOldestModifiedDoc(final Clock clock) {
         LOG.info("getOldestModifiedDoc() <- start");
 
         final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1));
-        final List<NodeDocument> result = new ArrayList<>(1);
 
         // we need to add query condition to ignore `previous` documents which doesn't have this field
         final Bson query = exists(MODIFIED_IN_SECS);
 
-        getNodeCollection().find(query).sort(sort).limit(1).forEach(
-                (Consumer<BasicDBObject>) document ->
-                        ofNullable(store.convertFromDBObject(NODES, document))
-                                .ifPresent(doc -> {
-                    LOG.info("getOldestModifiedDoc() -> {}", doc);
-                    result.add(doc);
-                }));
+        FindIterable<BasicDBObject> limit = getNodeCollection().find(query).sort(sort).limit(1);
 
-        if (result.isEmpty()) {
-            LOG.info("getOldestModifiedDoc() -> none found, return NULL document");
-            result.add(NULL);
+        try(MongoCursor<BasicDBObject> cur = limit.iterator()) {
+            return cur.hasNext() ? ofNullable(store.convertFromDBObject(NODES, cur.next())) : empty();
+        } catch (Exception ex) {
+            LOG.error("getOldestModifiedDoc() <- error while fetching data from Mongo", ex);
         }
-        return result.get(0);
+        return empty();
     }
 
     private List<Bson> createQueries(Set<SplitDocType> gcTypes,
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index 9d96c35811..1b35a30f07 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -16,15 +16,23 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
+import static java.util.Comparator.comparing;
 import static java.util.List.of;
+import static java.util.Optional.empty;
+import static java.util.Optional.ofNullable;
+import static java.util.stream.Collectors.toList;
+import static java.util.stream.Stream.concat;
+import static java.util.stream.StreamSupport.stream;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
+import static org.apache.jackrabbit.guava.common.collect.Iterables.size;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
-import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
 import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN;
+import static org.apache.jackrabbit.oak.plugins.document.util.CloseableIterable.wrap;
+import static org.apache.jackrabbit.oak.plugins.document.util.Utils.closeIfCloseable;
 
 import java.io.Closeable;
 import java.io.IOException;
@@ -33,11 +41,14 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
 
 import org.apache.jackrabbit.oak.commons.properties.SystemPropertySupplier;
 import org.apache.jackrabbit.oak.plugins.document.Collection;
+import org.apache.jackrabbit.oak.plugins.document.Document;
 import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException;
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
@@ -113,13 +124,40 @@ public class RDBVersionGCSupport extends VersionGCSupport {
     @Override
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
                                                   @NotNull final String fromId) {
-        List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
-                new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)),
+        // (_modified = fromModified && _id > fromId || _modified > fromModified && _modified < toModified)
+        // TODO : introduce support for OR where clause in RDBDocumentStore
+        final List<QueryCondition> c1 = of(new QueryCondition(MODIFIED_IN_SECS, "=", getModifiedInSecs(fromModified)),
                 new QueryCondition(ID, ">", of(fromId)));
+
+        final List<QueryCondition> c2 = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
+                new QueryCondition(MODIFIED_IN_SECS, ">", getModifiedInSecs(fromModified)));
+
         if (MODE == 1) {
-            return getIterator(EMPTY_KEY_PATTERN, conditions);
+            final Iterable<NodeDocument> itr1 = getIterator(EMPTY_KEY_PATTERN, c1);
+            if (size(itr1) >= limit) {
+                return itr1;
+            }
+            final Iterable<NodeDocument> itr2 = getIterator(EMPTY_KEY_PATTERN, c2);
+
+            final Stream<NodeDocument> s1 = stream(itr1.spliterator(), false);
+            final Stream<NodeDocument> s2 = stream(itr2.spliterator(), false);
+            return wrap(concat(s1, s2).sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)).limit(limit).collect(toList()), () -> {
+                closeIfCloseable(itr1);
+                closeIfCloseable(itr2);
+            });
         } else {
-            return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, conditions, limit, of(MODIFIED_IN_SECS, ID));
+            final Iterable<NodeDocument> itr1 = store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, c1, limit, of(MODIFIED_IN_SECS, ID));
+            if (size(itr1) >= limit) {
+                return itr1;
+            }
+            final Iterable<NodeDocument> itr2 = store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, c2, limit, of(MODIFIED_IN_SECS, ID));
+
+            final Stream<NodeDocument> s1 = stream(itr1.spliterator(), false);
+            final Stream<NodeDocument> s2 = stream(itr2.spliterator(), false);
+            return wrap(concat(s1, s2).sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)).limit(limit).collect(toList()), () -> {
+                closeIfCloseable(itr1);
+                closeIfCloseable(itr2);
+            });
         }
     }
 
@@ -284,20 +322,19 @@ public class RDBVersionGCSupport extends VersionGCSupport {
      * @return the timestamp of the oldest modified document.
      */
     @Override
-    public NodeDocument getOldestModifiedDoc(Clock clock) {
-        NodeDocument doc = NULL;
+    public Optional<NodeDocument> getOldestModifiedDoc(Clock clock) {
 
         LOG.info("getOldestModifiedDoc() <- start");
         Iterable<NodeDocument> modifiedDocs = null;
         try {
             modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE);
-            doc = modifiedDocs.iterator().hasNext() ? modifiedDocs.iterator().next() : NULL;
+            return modifiedDocs.iterator().hasNext() ? ofNullable(modifiedDocs.iterator().next()) : empty();
         } catch (DocumentStoreException ex) {
             LOG.error("getOldestModifiedDoc()", ex);
         } finally {
-            Utils.closeIfCloseable(modifiedDocs);
+            closeIfCloseable(modifiedDocs);
         }
-        return doc;
+        return empty();
     }
 
     @Override
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
index 738c1109ad..0c6b2fccdf 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
@@ -18,6 +18,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.document;
 
+import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
@@ -74,12 +75,13 @@ public class VersionGCInitTest {
         UpdateOp op = new UpdateOp(id, true);
         NodeDocument.setModified(op, r);
         store.createOrUpdate(NODES, op);
-        ns.getVersionGarbageCollector().gc(1, DAYS);
+        VersionGCStats stats = ns.getVersionGarbageCollector().gc(1, DAYS);
 
         vgc = store.find(SETTINGS, "versionGC");
         assertNotNull(vgc);
-        assertEquals(40_000L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
-        assertEquals("1:/node", vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
+        assertEquals(stats.oldestModifiedDocTimeStamp, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertEquals(stats.oldestModifiedDocId, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
+        assertEquals(MIN_ID_VALUE, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
     }
 
     @Test
@@ -89,11 +91,12 @@ public class VersionGCInitTest {
         assertNull(vgc);
 
         enableDetailGC(ns.getVersionGarbageCollector());
-        ns.getVersionGarbageCollector().gc(1, DAYS);
+        VersionGCStats stats = ns.getVersionGarbageCollector().gc(1, DAYS);
 
         vgc = store.find(SETTINGS, "versionGC");
         assertNotNull(vgc);
-        assertEquals(0L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertEquals(stats.oldestModifiedDocTimeStamp, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertEquals(stats.oldestModifiedDocId, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
         assertEquals(MIN_ID_VALUE, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
     }
 }
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
index 6d02fd38f7..565600a143 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
@@ -196,7 +196,7 @@ public class VersionGCSupportTest {
         setModified(op, r);
         store.create(NODES, of(op));
 
-        NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE);
+        NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE).orElse(NULL);
         String oldestModifiedDocId = oldestModifiedDoc.getId();
         long reportedSecs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
         assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedSecs), Math.abs(secs - reportedSecs) < 5);
@@ -219,7 +219,7 @@ public class VersionGCSupportTest {
         // create 5_000 nodes
         store.create(NODES, updateOps);
 
-        NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE);
+        NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE).orElse(NULL);
         String oldestModifiedDocId = oldestModifiedDoc.getId();
         long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
         assertEquals(40L, oldestModifiedDocTs);
@@ -254,7 +254,7 @@ public class VersionGCSupportTest {
         // create 5_001 nodes
         store.create(NODES, updateOps);
 
-        NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE);
+        NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE).orElse(NULL);
         String oldestModifiedDocId = oldestModifiedDoc.getId();
         long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
         assertEquals(40L, oldestModifiedDocTs);
@@ -290,7 +290,7 @@ public class VersionGCSupportTest {
     @Test
     public void findModifiedDocsWhenOldestDocIsAbsent() {
 
-        NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE);
+        NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE).orElse(NULL);
         String oldestModifiedDocId = MIN_ID_VALUE;
         long oldestModifiedDocTs = 0L;
         assertEquals(NULL, oldestModifiedDoc);
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
index cf3148a86d..39ca136c8d 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
@@ -324,7 +324,7 @@ public class VersionGCTest {
         VersionGCSupport localgcsupport = fakeVersionGCSupport(ns.getDocumentStore(), oneYearAgo, twelveTimesTheLimit);
 
         VersionGCRecommendations rec = new VersionGCRecommendations(secondsPerDay, ns.getCheckpoints(), ns.getClock(), localgcsupport,
-                options, new TestGCMonitor());
+                options, new TestGCMonitor(), false);
 
         // should select a duration of roughly one month
         long duration= rec.scope.getDurationMs();
@@ -338,7 +338,7 @@ public class VersionGCTest {
         assertTrue(stats.needRepeat);
 
         rec = new VersionGCRecommendations(secondsPerDay, ns.getCheckpoints(), ns.getClock(), localgcsupport, options,
-                new TestGCMonitor());
+                new TestGCMonitor(), false);
 
         // new duration should be half
         long nduration = rec.scope.getDurationMs();
@@ -367,7 +367,7 @@ public class VersionGCTest {
         // loop until the recommended interval is at 60s (precisionMS)
         do {
             rec = new VersionGCRecommendations(secondsPerDay, ns.getCheckpoints(), ns.getClock(), localgcsupport, options,
-                    testmonitor);
+                    testmonitor, false);
             stats = new VersionGCStats();
             stats.limitExceeded = true;
             rec.evaluate(stats);
@@ -384,7 +384,7 @@ public class VersionGCTest {
             deletedCount -= deleted;
             localgcsupport = fakeVersionGCSupport(ns.getDocumentStore(), oldestDeleted, deletedCount);
             rec = new VersionGCRecommendations(secondsPerDay, ns.getCheckpoints(), ns.getClock(), localgcsupport, options,
-                    testmonitor);
+                    testmonitor, false);
             stats = new VersionGCStats();
             stats.limitExceeded = false;
             stats.deletedDocGCCount = deleted;
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index 6d7382bac5..df785878b3 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -35,6 +35,7 @@ import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 
 import static java.util.Objects.requireNonNull;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
 import static java.util.stream.Collectors.toList;
 import static java.util.stream.StreamSupport.stream;
 import static org.apache.commons.lang3.reflect.FieldUtils.writeField;
@@ -171,11 +172,15 @@ public class VersionGarbageCollectorIT {
         clock.waitUntil(cp.getTimestamp() + expiryTime - maxAge);
         VersionGCStats stats = gc.gc(maxAge, TimeUnit.MILLISECONDS);
         assertTrue(stats.ignoredGCDueToCheckPoint);
+        assertFalse(stats.ignoredDetailGCDueToCheckPoint);
+        assertTrue(stats.canceled);
 
         //Fast forward time to future such that checkpoint get expired
         clock.waitUntil(clock.getTime() + expiryTime + 1);
         stats = gc.gc(maxAge, TimeUnit.MILLISECONDS);
         assertFalse("GC should be performed", stats.ignoredGCDueToCheckPoint);
+        assertFalse("Detailed GC shouldn't be performed", stats.ignoredDetailGCDueToCheckPoint);
+        assertFalse(stats.canceled);
     }
 
     @Test
@@ -242,6 +247,78 @@ public class VersionGarbageCollectorIT {
     }
 
     // OAK-10199
+    @Test
+    public void detailedGCIgnoredForCheckpoint() throws Exception {
+        long expiryTime = 100, maxAge = 20;
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+
+        Revision cp = Revision.fromString(store.checkpoint(expiryTime));
+
+        //Fast forward time to future but before expiry of checkpoint
+        clock.waitUntil(cp.getTimestamp() + expiryTime - maxAge);
+        VersionGCStats stats = gc.gc(maxAge, TimeUnit.MILLISECONDS);
+        assertTrue(stats.ignoredDetailGCDueToCheckPoint);
+        assertTrue(stats.canceled);
+
+        //Fast forward time to future such that checkpoint get expired
+        clock.waitUntil(clock.getTime() + expiryTime + 1);
+        stats = gc.gc(maxAge, TimeUnit.MILLISECONDS);
+        assertFalse("Detailed GC should be performed", stats.ignoredDetailGCDueToCheckPoint);
+        assertFalse(stats.canceled);
+    }
+
+    @Test
+    public void testDetailedGCNotIgnoredForRGCCheckpoint() throws Exception {
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+
+        // Add property to node & save
+        b1.child("x").setProperty("test", "t", STRING);
+        b1.child("z").setProperty("test", "t", STRING);
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        //Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        b2.getChildNode("x").removeProperty("test");
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        store.runBackgroundOperations();
+
+        //2. move clock forward with 2 hours
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(2));
+
+        //3. Create a checkpoint now with expiry of 1 hour
+        long expiryTime = 1, delta = MINUTES.toMillis(10);
+        NodeBuilder b3 = store.getRoot().builder();
+        b3.getChildNode("z").removeProperty("test");
+        store.merge(b3, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        store.runBackgroundOperations();
+
+        Revision.fromString(store.checkpoint(HOURS.toMillis(expiryTime)));
+
+        //4. move clock forward by 10 mins
+        clock.waitUntil(clock.getTime() + delta);
+
+        // 5. Remove a node
+        NodeBuilder b4 = store.getRoot().builder();
+        b4.getChildNode("z").remove();
+        store.merge(b4, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        store.runBackgroundOperations();
+
+        // 6. Now run gc after checkpoint and see removed properties gets collected
+        clock.waitUntil(clock.getTime() + delta*2);
+        VersionGCStats stats = gc.gc(delta, MILLISECONDS);
+        assertEquals(1, stats.deletedPropsGCCount);
+        assertEquals(1, stats.updatedDetailedGCDocsCount);
+        assertTrue(stats.ignoredGCDueToCheckPoint);
+        assertFalse(stats.ignoredDetailGCDueToCheckPoint);
+        assertFalse(stats.canceled);
+    }
+
     @Test
     public void testGCDeletedProps() throws Exception {
         //1. Create nodes with properties
@@ -265,7 +342,7 @@ public class VersionGarbageCollectorIT {
         // enable the detailed gc flag
         writeField(gc, "detailedGCEnabled", true, true);
         long maxAge = 1; //hours
-        long delta = TimeUnit.MINUTES.toMillis(10);
+        long delta = MINUTES.toMillis(10);
         //1. Go past GC age and check no GC done as nothing deleted
         clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc(maxAge, HOURS);
@@ -365,7 +442,7 @@ public class VersionGarbageCollectorIT {
         // enable the detailed gc flag
         writeField(gc, "detailedGCEnabled", true, true);
         long maxAge = 1; //hours
-        long delta = TimeUnit.MINUTES.toMillis(20);
+        long delta = MINUTES.toMillis(20);
 
         //Remove property
         NodeBuilder b2 = store.getRoot().builder();
@@ -412,7 +489,7 @@ public class VersionGarbageCollectorIT {
         // enable the detailed gc flag
         writeField(gc, "detailedGCEnabled", true, true);
         long maxAge = 1; //hours
-        long delta = TimeUnit.MINUTES.toMillis(20);
+        long delta = MINUTES.toMillis(20);
 
 
         store.runBackgroundOperations();
@@ -426,6 +503,7 @@ public class VersionGarbageCollectorIT {
             long oldestModifiedDocTimeStamp = stats.oldestModifiedDocTimeStamp;
 
             Document document = store.getDocumentStore().find(SETTINGS, SETTINGS_COLLECTION_ID);
+            assert document != null;
             assertEquals(document.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP), oldestModifiedDocTimeStamp);
             assertEquals(document.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP), oldestModifiedDocId);
         }
@@ -444,7 +522,7 @@ public class VersionGarbageCollectorIT {
         // enable the detailed gc flag
         writeField(gc, "detailedGCEnabled", true, true);
         long maxAge = 1; //hours
-        long delta = TimeUnit.MINUTES.toMillis(10);
+        long delta = MINUTES.toMillis(10);
         //1. Go past GC age and check no GC done as nothing deleted
         clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc(maxAge, HOURS);
@@ -525,7 +603,7 @@ public class VersionGarbageCollectorIT {
         // enable the detailed gc flag
         writeField(gc, "detailedGCEnabled", true, true);
         long maxAge = 1; //hours
-        long delta = TimeUnit.MINUTES.toMillis(10);
+        long delta = MINUTES.toMillis(10);
 
         //3. Check that deleted property does get collected again
         // increment the clock again by more than 2 hours + delta
@@ -574,7 +652,7 @@ public class VersionGarbageCollectorIT {
         // enable the detailed gc flag
         writeField(gc, "detailedGCEnabled", true, true);
         long maxAge = 1; //hours
-        long delta = TimeUnit.MINUTES.toMillis(10);
+        long delta = MINUTES.toMillis(10);
         //1. Go past GC age and check no GC done as nothing deleted
         clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc(maxAge, HOURS);
@@ -630,7 +708,7 @@ public class VersionGarbageCollectorIT {
         // enable the detailed gc flag
         writeField(gc, "detailedGCEnabled", true, true);
         long maxAge = 1; //hours
-        long delta = TimeUnit.MINUTES.toMillis(10);
+        long delta = MINUTES.toMillis(10);
         //1. Go past GC age and check no GC done as nothing deleted
         clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc(maxAge, HOURS);
@@ -651,7 +729,7 @@ public class VersionGarbageCollectorIT {
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
         assertEquals(0, stats.updatedDetailedGCDocsCount);
-        assertNull(stats.oldestModifiedDocId); // as GC hadn't run
+        assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId); // as GC hadn't run
 
         //3. Check that deleted property does get collected post maxAge
         clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
@@ -699,7 +777,7 @@ public class VersionGarbageCollectorIT {
         // enable the detailed gc flag
         writeField(gc, "detailedGCEnabled", true, true);
         long maxAge = 1; //hours
-        long delta = TimeUnit.MINUTES.toMillis(10);
+        long delta = MINUTES.toMillis(10);
         //1. Go past GC age and check no GC done as nothing deleted
         clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc(maxAge, HOURS);


[jackrabbit-oak] 21/28: OAK-10199 : refactored RDBVersionGCSupport code

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit e293fd6da7bfc6d8598d37afd57bbdaddc8668e6
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Wed Jul 19 22:41:23 2023 +0530

    OAK-10199 : refactored RDBVersionGCSupport code
---
 .../plugins/document/rdb/RDBVersionGCSupport.java  | 54 ++++++++++++----------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index 1b35a30f07..5e66bd974d 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -44,6 +44,7 @@ import java.util.List;
 import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
+import java.util.function.Supplier;
 import java.util.stream.Stream;
 
 import org.apache.jackrabbit.oak.commons.properties.SystemPropertySupplier;
@@ -133,32 +134,37 @@ public class RDBVersionGCSupport extends VersionGCSupport {
                 new QueryCondition(MODIFIED_IN_SECS, ">", getModifiedInSecs(fromModified)));
 
         if (MODE == 1) {
-            final Iterable<NodeDocument> itr1 = getIterator(EMPTY_KEY_PATTERN, c1);
-            if (size(itr1) >= limit) {
-                return itr1;
-            }
-            final Iterable<NodeDocument> itr2 = getIterator(EMPTY_KEY_PATTERN, c2);
-
-            final Stream<NodeDocument> s1 = stream(itr1.spliterator(), false);
-            final Stream<NodeDocument> s2 = stream(itr2.spliterator(), false);
-            return wrap(concat(s1, s2).sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)).limit(limit).collect(toList()), () -> {
-                closeIfCloseable(itr1);
-                closeIfCloseable(itr2);
-            });
+            return getNodeDocuments(() -> getIterator(EMPTY_KEY_PATTERN, c1), () -> getIterator(EMPTY_KEY_PATTERN, c2), limit);
         } else {
-            final Iterable<NodeDocument> itr1 = store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, c1, limit, of(MODIFIED_IN_SECS, ID));
-            if (size(itr1) >= limit) {
-                return itr1;
-            }
-            final Iterable<NodeDocument> itr2 = store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, c2, limit, of(MODIFIED_IN_SECS, ID));
-
-            final Stream<NodeDocument> s1 = stream(itr1.spliterator(), false);
-            final Stream<NodeDocument> s2 = stream(itr2.spliterator(), false);
-            return wrap(concat(s1, s2).sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)).limit(limit).collect(toList()), () -> {
-                closeIfCloseable(itr1);
-                closeIfCloseable(itr2);
-            });
+            return getNodeDocuments(() -> store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, c1, limit, of(MODIFIED_IN_SECS, ID)),
+                    () -> store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, c2, limit, of(MODIFIED_IN_SECS, ID)),
+                    limit);
+        }
+    }
+
+    /**
+     * To fetch {@link NodeDocument} from database
+     *
+     * @param supplier1 document supplier on basis of 1st Condition
+     * @param supplier2 document supplier on basis of 2nd Condition
+     * @param limit no. of documents to fetch from db
+     * @return sorted documents supplied by supplier1 & supplier2
+     */
+    private Iterable<NodeDocument> getNodeDocuments(final Supplier<Iterable<NodeDocument>> supplier1, final Supplier<Iterable<NodeDocument>> supplier2, final int limit) {
+
+        final Iterable<NodeDocument> itr1 = supplier1.get();
+        if (size(itr1) >= limit) {
+            return itr1;
         }
+
+        final Iterable<NodeDocument> itr2 = supplier2.get();
+
+        final Stream<NodeDocument> s1 = stream(itr1.spliterator(), false);
+        final Stream<NodeDocument> s2 = stream(itr2.spliterator(), false);
+        return wrap(concat(s1, s2).sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)).limit(limit).collect(toList()), () -> {
+            closeIfCloseable(itr1);
+            closeIfCloseable(itr2);
+        });
     }
 
     @Override


[jackrabbit-oak] 19/28: OAK-10199 : added logic to skip non garbage documents

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 8a8c3482e2591a66fa6a7a29a6c51f86a7dc0401
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Wed Jun 28 02:06:50 2023 +0530

    OAK-10199 : added logic to skip non garbage documents
---
 .../plugins/document/VersionGCRecommendations.java |  4 +-
 .../plugins/document/VersionGarbageCollector.java  | 31 ++++++++-----
 .../document/VersionGarbageCollectorIT.java        | 52 ++++++++++++++++++++--
 3 files changed, 71 insertions(+), 16 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index c80399f005..0fd0766f5b 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -118,7 +118,7 @@ public class VersionGCRecommendations {
         detailedGCTimestamp = (long) settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP);
         oldestModifiedDocId = (String) settings.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP);
         if (detailedGCTimestamp == 0) {
-            // it will only happens for the very first time, we run this detailedGC
+            // it will only happen for the very first time, we run this detailedGC
             log.info("No detailedGCTimestamp found, querying for the oldest modified candidate");
             final NodeDocument doc = vgc.getOldestModifiedDoc(clock);
             if (doc == NULL) {
@@ -129,7 +129,7 @@ public class VersionGCRecommendations {
             oldestModifiedDocId = MIN_ID_VALUE;
             log.info("detailedGCTimestamp found: {}", timestampToString(oldestModifiedDocTimeStamp));
         } else {
-            oldestModifiedDocTimeStamp = detailedGCTimestamp - 1;
+            oldestModifiedDocTimeStamp = detailedGCTimestamp - 1L;
         }
 
         TimeInterval detailedGCTimeInternal = new TimeInterval(oldestModifiedDocTimeStamp, MAX_VALUE);
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index eb25184f62..1e5b1129b9 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -77,6 +77,8 @@ import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_I
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.setDeleted;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.setModified;
 import static org.slf4j.helpers.MessageFormatter.arrayFormat;
 
 public class VersionGarbageCollector {
@@ -614,15 +616,17 @@ public class VersionGarbageCollector {
          *
          * @param phases {@link GCPhases}
          * @param headRevision the current head revision of node store
+         * @param rec {@link VersionGCRecommendations} to recommend GC operation
          */
         private void collectDetailedGarbage(final GCPhases phases, final RevisionVector headRevision, final VersionGCRecommendations rec)
                 throws IOException {
             int docsTraversed = 0;
             boolean foundDoc = true;
-            final long oldestModifiedDocTimeStamp = rec.scopeDetailedGC.fromMs;
+            final long oldestModifiedMs = rec.scopeDetailedGC.fromMs;
+            long oldModifiedMs = oldestModifiedMs;
             final String oldestModifiedDocId = rec.detailedGCId;
             try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) {
-                long fromModified = oldestModifiedDocTimeStamp;
+                long fromModified = oldestModifiedMs;
                 String fromId = oldestModifiedDocId;
                 NodeDocument lastDoc = null;
                 final long toModified = rec.scopeDetailedGC.toMs;
@@ -630,7 +634,9 @@ public class VersionGarbageCollector {
                     while (foundDoc && fromModified < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) {
                         // set foundDoc to false to allow exiting the while loop
                         foundDoc = false;
+                        lastDoc = null;
                         Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModified, toModified, 1000, fromId);
+                        final Revision revision = nodeStore.newRevision();
                         try {
                             for (NodeDocument doc : itr) {
                                 foundDoc = true;
@@ -649,7 +655,7 @@ public class VersionGarbageCollector {
                                 lastDoc = doc;
                                 // collect the data to delete in next step
                                 if (phases.start(GCPhase.COLLECTING)) {
-                                    gc.collectGarbage(doc, phases);
+                                    gc.collectGarbage(doc, phases, revision);
                                     phases.stop(GCPhase.COLLECTING);
                                 }
 
@@ -657,7 +663,7 @@ public class VersionGarbageCollector {
                                 if (modified == null) {
                                     monitor.warn("collectDetailGarbage : document has no _modified property : {}",
                                             doc.getId());
-                                } else if (SECONDS.toMillis(modified) < oldestModifiedDocTimeStamp) {
+                                } else if (SECONDS.toMillis(modified) < oldestModifiedMs) {
                                     monitor.warn(
                                             "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
                                             modified, fromModified, toModified);
@@ -669,26 +675,29 @@ public class VersionGarbageCollector {
                                 phases.stop(GCPhase.DETAILED_GC_CLEANUP);
                             }
                             if (lastDoc != null) {
-                                fromModified = lastDoc.getModified() == null ? oldestModifiedDocTimeStamp : SECONDS.toMillis(lastDoc.getModified());
+                                fromModified = lastDoc.getModified() == null ? oldModifiedMs : SECONDS.toMillis(lastDoc.getModified());
                                 fromId = lastDoc.getId();
                             }
                         } finally {
                             Utils.closeIfCloseable(itr);
                             phases.stats.oldestModifiedDocTimeStamp = fromModified;
-                            if (fromModified > (oldestModifiedDocTimeStamp + 1)) {
+                            if (fromModified > (oldModifiedMs + 1)) {
                                 // we have moved ahead, now we can reset oldestModifiedId to min value
+                                fromId = MIN_ID_VALUE;
                                 phases.stats.oldestModifiedDocId = MIN_ID_VALUE;
                             } else {
                                 // there are still documents pending at oldest Modified timestamp,
                                 // save the last _id traversed to avoid re-fetching of ids
                                 phases.stats.oldestModifiedDocId = fromId;
                             }
+                            oldModifiedMs = fromModified - 1;
                         }
 
                         // if we are already at last document of current timeStamp,
-                        // we need to reset fromId and check again
+                        // we need to reset fromId & increment fromModified and check again
                         if (!foundDoc && !Objects.equals(fromId, MIN_ID_VALUE)) {
                             fromId = MIN_ID_VALUE;
+                            fromModified = fromModified + SECONDS.toMillis(5);
                             foundDoc = true; // to run while loop again
                         }
                     }
@@ -806,14 +815,14 @@ public class VersionGarbageCollector {
             this.timer = Stopwatch.createUnstarted();
         }
 
-        public void collectGarbage(final NodeDocument doc, final GCPhases phases) {
+        public void collectGarbage(final NodeDocument doc, final GCPhases phases, final Revision revision) {
 
             monitor.info("Collecting Detailed Garbage for doc [{}]", doc.getId());
 
             final UpdateOp op = new UpdateOp(requireNonNull(doc.getId()), false);
             op.equals(MODIFIED_IN_SECS, doc.getModified());
 
-            collectDeletedProperties(doc, phases, op);
+            collectDeletedProperties(doc, phases, op, revision);
             collectUnmergedBranchCommitDocument(doc, phases, op);
             collectOldRevisions(doc, phases, op);
             // only add if there are changes for this doc
@@ -836,7 +845,7 @@ public class VersionGarbageCollector {
 
         }
 
-        private void collectDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) {
+        private void collectDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp, final Revision revision) {
 
             // get Map of all properties along with their values
             if (phases.start(GCPhase.COLLECT_PROPS)) {
@@ -855,6 +864,8 @@ public class VersionGarbageCollector {
                         .filter(p -> !retainPropSet.contains(p))
                         .mapToInt(x -> {
                             updateOp.remove(x);
+                            setModified(updateOp,revision);
+                            setDeleted(updateOp, revision, false);
                             return 1;})
                         .sum();
 
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index 6ba8dd6f81..6d7382bac5 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -44,6 +44,7 @@ import static java.util.concurrent.TimeUnit.HOURS;
 import static java.util.concurrent.TimeUnit.MINUTES;
 import static org.apache.jackrabbit.oak.api.Type.STRING;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NUM_REVS_THRESHOLD;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PREV_SPLIT_FACTOR;
@@ -51,6 +52,9 @@ import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocTy
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.setModified;
 import static org.apache.jackrabbit.oak.plugins.document.Revision.newRevision;
 import static org.apache.jackrabbit.oak.plugins.document.TestUtils.NO_BINARY;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -288,7 +292,7 @@ public class VersionGarbageCollectorIT {
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(1, stats.deletedPropsGCCount);
         assertEquals(1, stats.updatedDetailedGCDocsCount);
-        assertEquals("1:/z", stats.oldestModifiedDocId);
+        assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
 
         //4. Check that a revived property (deleted and created again) does not get gc
         NodeBuilder b3 = store.getRoot().builder();
@@ -341,7 +345,7 @@ public class VersionGarbageCollectorIT {
         VersionGCStats stats = gc.gc(maxAge*2, HOURS);
         assertEquals(50_000, stats.deletedPropsGCCount);
         assertEquals(5_000, stats.updatedDetailedGCDocsCount);
-        assertNotEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
+        assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
     }
 
     @Test
@@ -387,6 +391,46 @@ public class VersionGarbageCollectorIT {
         assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
     }
 
+    @Test
+    public void testGC_WithNoDeletedProps_And_MoreThan_10_000_DocWithDifferentRevision() throws Exception {
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+        for (int k = 0; k < 50; k ++) {
+            b1 = store.getRoot().builder();
+            // Add property to node & save
+            for (int i = 0; i < 500; i++) {
+                for (int j = 0; j < 10; j++) {
+                    b1.child(k + "z" + i).setProperty("prop" + j, "foo", STRING);
+                }
+            }
+            store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+            // increase the clock to create new revision for next batch
+            clock.waitUntil(Revision.getCurrentTimestamp() + (k * 5));
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(20);
+
+
+        store.runBackgroundOperations();
+        //3. Check that deleted property does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+        for (int i = 0; i < 3 ; i++) {
+
+            VersionGCStats stats = gc.gc(maxAge, HOURS);
+            String oldestModifiedDocId = stats.oldestModifiedDocId;
+            long oldestModifiedDocTimeStamp = stats.oldestModifiedDocTimeStamp;
+
+            Document document = store.getDocumentStore().find(SETTINGS, SETTINGS_COLLECTION_ID);
+            assertEquals(document.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP), oldestModifiedDocTimeStamp);
+            assertEquals(document.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP), oldestModifiedDocId);
+        }
+    }
+
     @Test
     public void testGCDeletedPropsAlreadyGCed() throws Exception {
         //1. Create nodes with properties
@@ -420,7 +464,7 @@ public class VersionGarbageCollectorIT {
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(10, stats.deletedPropsGCCount);
         assertEquals(10, stats.updatedDetailedGCDocsCount);
-        assertNotEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
+        assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
 
         //3. now reCreate those properties again
         NodeBuilder b3 = store.getRoot().builder();
@@ -635,7 +679,7 @@ public class VersionGarbageCollectorIT {
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(0, stats.updatedDetailedGCDocsCount);
         assertEquals(0, stats.deletedPropsGCCount);
-        assertNotEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
+        assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
     }
 
     @Test


[jackrabbit-oak] 18/28: OAK-10199 : added unit cases to handle concurrent prop update and escaped properties update

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 4ee104c9b13f1c16ac1a88982c86f498d910b47f
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Tue Jun 27 20:53:53 2023 +0530

    OAK-10199 : added unit cases to handle concurrent prop update and escaped properties update
---
 .../plugins/document/VersionGCRecommendations.java |   4 +-
 .../plugins/document/VersionGarbageCollector.java  |  31 ++-
 .../oak/plugins/document/VersionGCInitTest.java    |   4 +-
 .../document/VersionGarbageCollectorIT.java        | 217 +++++++++++++++++----
 4 files changed, 202 insertions(+), 54 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index ac0bcc03e3..c80399f005 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -26,12 +26,12 @@ import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.Versio
 import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval;
 import org.apache.jackrabbit.oak.spi.gc.GCMonitor;
 import org.apache.jackrabbit.oak.stats.Clock;
-import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import static java.lang.Long.MAX_VALUE;
 import static java.util.Map.of;
+import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
@@ -124,7 +124,7 @@ public class VersionGCRecommendations {
             if (doc == NULL) {
                 oldestModifiedDocTimeStamp = 0L;
             } else {
-                oldestModifiedDocTimeStamp = doc.getModified() == null ? 0L : doc.getModified() - 1;
+                oldestModifiedDocTimeStamp = doc.getModified() == null ? 0L : SECONDS.toMillis(doc.getModified()) - 1L;
             }
             oldestModifiedDocId = MIN_ID_VALUE;
             log.info("detailedGCTimestamp found: {}", timestampToString(oldestModifiedDocTimeStamp));
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 307315d5a8..eb25184f62 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -24,6 +24,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.EnumSet;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -61,6 +62,7 @@ import static java.util.Collections.emptySet;
 import static java.util.Objects.requireNonNull;
 import static java.util.Optional.ofNullable;
 import static java.util.concurrent.TimeUnit.MILLISECONDS;
+import static java.util.concurrent.TimeUnit.SECONDS;
 import static java.util.stream.Collectors.joining;
 import static java.util.stream.Collectors.toSet;
 import static org.apache.jackrabbit.guava.common.base.StandardSystemProperty.LINE_SEPARATOR;
@@ -629,7 +631,6 @@ public class VersionGarbageCollector {
                         // set foundDoc to false to allow exiting the while loop
                         foundDoc = false;
                         Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModified, toModified, 1000, fromId);
-                        // set includeFromId to false for subsequent queries
                         try {
                             for (NodeDocument doc : itr) {
                                 foundDoc = true;
@@ -656,7 +657,7 @@ public class VersionGarbageCollector {
                                 if (modified == null) {
                                     monitor.warn("collectDetailGarbage : document has no _modified property : {}",
                                             doc.getId());
-                                } else if (modified < oldestModifiedDocTimeStamp) {
+                                } else if (SECONDS.toMillis(modified) < oldestModifiedDocTimeStamp) {
                                     monitor.warn(
                                             "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
                                             modified, fromModified, toModified);
@@ -668,13 +669,13 @@ public class VersionGarbageCollector {
                                 phases.stop(GCPhase.DETAILED_GC_CLEANUP);
                             }
                             if (lastDoc != null) {
-                                fromModified = ofNullable(lastDoc.getModified()).orElse(oldestModifiedDocTimeStamp);
+                                fromModified = lastDoc.getModified() == null ? oldestModifiedDocTimeStamp : SECONDS.toMillis(lastDoc.getModified());
                                 fromId = lastDoc.getId();
                             }
                         } finally {
                             Utils.closeIfCloseable(itr);
                             phases.stats.oldestModifiedDocTimeStamp = fromModified;
-                            if (fromModified > oldestModifiedDocTimeStamp) {
+                            if (fromModified > (oldestModifiedDocTimeStamp + 1)) {
                                 // we have moved ahead, now we can reset oldestModifiedId to min value
                                 phases.stats.oldestModifiedDocId = MIN_ID_VALUE;
                             } else {
@@ -683,6 +684,13 @@ public class VersionGarbageCollector {
                                 phases.stats.oldestModifiedDocId = fromId;
                             }
                         }
+
+                        // if we are already at last document of current timeStamp,
+                        // we need to reset fromId and check again
+                        if (!foundDoc && !Objects.equals(fromId, MIN_ID_VALUE)) {
+                            fromId = MIN_ID_VALUE;
+                            foundDoc = true; // to run while loop again
+                        }
                     }
                     phases.stop(GCPhase.DETAILED_GC);
                 }
@@ -785,6 +793,8 @@ public class VersionGarbageCollector {
         private final AtomicBoolean cancel;
         private final Stopwatch timer;
         private final List<UpdateOp> updateOpList;
+
+        private final Map<String, Integer> deletedPropsCountMap;
         private int garbageDocsCount;
 
         public DetailedGC(@NotNull RevisionVector headRevision, @NotNull GCMonitor monitor, @NotNull AtomicBoolean cancel) {
@@ -792,6 +802,7 @@ public class VersionGarbageCollector {
             this.monitor = monitor;
             this.cancel = cancel;
             this.updateOpList = new ArrayList<>();
+            this.deletedPropsCountMap = new HashMap<>();
             this.timer = Stopwatch.createUnstarted();
         }
 
@@ -800,6 +811,8 @@ public class VersionGarbageCollector {
             monitor.info("Collecting Detailed Garbage for doc [{}]", doc.getId());
 
             final UpdateOp op = new UpdateOp(requireNonNull(doc.getId()), false);
+            op.equals(MODIFIED_IN_SECS, doc.getModified());
+
             collectDeletedProperties(doc, phases, op);
             collectUnmergedBranchCommitDocument(doc, phases, op);
             collectOldRevisions(doc, phases, op);
@@ -837,6 +850,7 @@ public class VersionGarbageCollector {
                         .map(DocumentNodeState::getPropertyNames)
                         .map(p -> p.stream().map(Utils::escapePropertyName).collect(toSet()))
                         .orElse(emptySet());
+
                 final int deletedPropsGCCount = properties.stream()
                         .filter(p -> !retainPropSet.contains(p))
                         .mapToInt(x -> {
@@ -844,8 +858,8 @@ public class VersionGarbageCollector {
                             return 1;})
                         .sum();
 
+                deletedPropsCountMap.put(doc.getId(), deletedPropsGCCount);
 
-                phases.stats.deletedPropsGCCount += deletedPropsGCCount;
                 if (log.isDebugEnabled()) {
                     log.debug("Collected {} deleted properties for document {}", deletedPropsGCCount, doc.getId());
                 }
@@ -896,9 +910,12 @@ public class VersionGarbageCollector {
 
             timer.reset().start();
             try {
-                updatedDocs = (int) ds.findAndUpdate(NODES, updateOpList).stream().filter(Objects::nonNull).count();
+                List<NodeDocument> oldDocs = ds.findAndUpdate(NODES, updateOpList);
+                int deletedProps = oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> deletedPropsCountMap.getOrDefault(d.getId(), 0)).sum();
+                updatedDocs = (int) oldDocs.stream().filter(Objects::nonNull).count();
                 stats.updatedDetailedGCDocsCount += updatedDocs;
-                log.info("Updated [{}] documents", updatedDocs);
+                stats.deletedPropsGCCount += deletedProps;
+                log.info("Updated [{}] documents, deleted [{}] properties", updatedDocs, deletedProps);
                 // now reset delete metadata
                 updateOpList.clear();
                 garbageDocsCount = 0;
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
index 4db64c942f..738c1109ad 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
@@ -78,8 +78,8 @@ public class VersionGCInitTest {
 
         vgc = store.find(SETTINGS, "versionGC");
         assertNotNull(vgc);
-        assertEquals(40L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
-        assertEquals(MIN_ID_VALUE, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
+        assertEquals(40_000L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertEquals("1:/node", vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
     }
 
     @Test
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index ca00648244..6ba8dd6f81 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -34,6 +34,9 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 
+import static java.util.Objects.requireNonNull;
+import static java.util.stream.Collectors.toList;
+import static java.util.stream.StreamSupport.stream;
 import static org.apache.commons.lang3.reflect.FieldUtils.writeField;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.size;
@@ -41,9 +44,12 @@ import static java.util.concurrent.TimeUnit.HOURS;
 import static java.util.concurrent.TimeUnit.MINUTES;
 import static org.apache.jackrabbit.oak.api.Type.STRING;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NUM_REVS_THRESHOLD;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PREV_SPLIT_FACTOR;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.setModified;
+import static org.apache.jackrabbit.oak.plugins.document.Revision.newRevision;
 import static org.apache.jackrabbit.oak.plugins.document.TestUtils.NO_BINARY;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import static org.junit.Assert.assertEquals;
@@ -260,6 +266,7 @@ public class VersionGarbageCollectorIT {
         clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc(maxAge, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
 
         //Remove property
         NodeBuilder b2 = store.getRoot().builder();
@@ -273,12 +280,15 @@ public class VersionGarbageCollectorIT {
         clock.waitUntil(clock.getTime() + delta);
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
 
         //3. Check that deleted property does get collected post maxAge
         clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
 
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(1, stats.deletedPropsGCCount);
+        assertEquals(1, stats.updatedDetailedGCDocsCount);
+        assertEquals("1:/z", stats.oldestModifiedDocId);
 
         //4. Check that a revived property (deleted and created again) does not get gc
         NodeBuilder b3 = store.getRoot().builder();
@@ -292,11 +302,12 @@ public class VersionGarbageCollectorIT {
         clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+        assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
     }
 
-    // Test when we have more than 1000 deleted properties
     @Test
-    public void testGCDeletedProps_1() throws Exception {
+    public void testGCDeletedProps_MoreThan_1000_WithSameRevision() throws Exception {
         //1. Create nodes with properties
         NodeBuilder b1 = store.getRoot().builder();
 
@@ -312,10 +323,6 @@ public class VersionGarbageCollectorIT {
         writeField(gc, "detailedGCEnabled", true, true);
         long maxAge = 1; //hours
         long delta = TimeUnit.MINUTES.toMillis(10);
-        //1. Go past GC age and check no GC done as nothing deleted
-        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
-        VersionGCStats stats = gc.gc(maxAge, HOURS);
-        assertEquals(0, stats.deletedPropsGCCount);
 
         //Remove property
         NodeBuilder b2 = store.getRoot().builder();
@@ -328,77 +335,60 @@ public class VersionGarbageCollectorIT {
 
         store.runBackgroundOperations();
 
-        //2. Check that a deleted property is not collected before maxAge
-        //Clock cannot move back (it moved forward in #1) so double the maxAge
-        clock.waitUntil(clock.getTime() + delta);
-        stats = gc.gc(maxAge*2, HOURS);
-        assertEquals(0, stats.deletedPropsGCCount);
-
         //3. Check that deleted property does get collected post maxAge
         clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
 
-        stats = gc.gc(maxAge*2, HOURS);
+        VersionGCStats stats = gc.gc(maxAge*2, HOURS);
         assertEquals(50_000, stats.deletedPropsGCCount);
-
+        assertEquals(5_000, stats.updatedDetailedGCDocsCount);
+        assertNotEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
     }
 
-    // Test when we have more than 1000 deleted properties with different revisions
     @Test
-    public void testGCDeletedProps_2() throws Exception {
+    public void testGCDeletedProps_MoreThan_1000_WithDifferentRevision() throws Exception {
         //1. Create nodes with properties
-        NodeBuilder b1 = null;
+        NodeBuilder b1 = store.getRoot().builder();
         for (int k = 0; k < 50; k ++) {
-            b1 = store.getRoot().builder();
             // Add property to node & save
             for (int i = 0; i < 100; i++) {
                 for (int j = 0; j < 10; j++) {
                     b1.child(k + "z" + i).setProperty("prop" + j, "foo", STRING);
                 }
             }
-            store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
-            // increase the clock to create new revision for next batch
-            clock.waitUntil(Revision.getCurrentTimestamp() + (k * 5));
         }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
 
         // enable the detailed gc flag
         writeField(gc, "detailedGCEnabled", true, true);
         long maxAge = 1; //hours
-        long delta = TimeUnit.MINUTES.toMillis(10);
-        //1. Go past GC age and check no GC done as nothing deleted
-        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
-        VersionGCStats stats = gc.gc(maxAge, HOURS);
-        assertEquals(0, stats.deletedPropsGCCount);
+        long delta = TimeUnit.MINUTES.toMillis(20);
 
         //Remove property
         NodeBuilder b2 = store.getRoot().builder();
         for (int k = 0; k < 50; k ++) {
+            b2 = store.getRoot().builder();
             for (int i = 0; i < 100; i++) {
                 for (int j = 0; j < 10; j++) {
                     b2.getChildNode(k + "z" + i).removeProperty("prop" + j);
                 }
             }
+            store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+            // increase the clock to create new revision for next batch
+            clock.waitUntil(Revision.getCurrentTimestamp() + (k * 5));
         }
-        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
 
         store.runBackgroundOperations();
-
-        //2. Check that a deleted property is not collected before maxAge
-        //Clock cannot move back (it moved forward in #1) so double the maxAge
-        clock.waitUntil(clock.getTime() + delta);
-        stats = gc.gc(maxAge*2, HOURS);
-        assertEquals(0, stats.deletedPropsGCCount);
-
         //3. Check that deleted property does get collected post maxAge
         clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
 
-        stats = gc.gc(maxAge*2, HOURS);
+        VersionGCStats stats = gc.gc(maxAge, HOURS);
         assertEquals(50_000, stats.deletedPropsGCCount);
-
+        assertEquals(5_000, stats.updatedDetailedGCDocsCount);
+        assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
     }
 
-    // Test where we modify the already GCed nodes
     @Test
-    public void testGCDeletedProps_3() throws Exception {
+    public void testGCDeletedPropsAlreadyGCed() throws Exception {
         //1. Create nodes with properties
         NodeBuilder b1 = store.getRoot().builder();
         // Add property to node & save
@@ -429,6 +419,8 @@ public class VersionGarbageCollectorIT {
 
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(10, stats.deletedPropsGCCount);
+        assertEquals(10, stats.updatedDetailedGCDocsCount);
+        assertNotEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
 
         //3. now reCreate those properties again
         NodeBuilder b3 = store.getRoot().builder();
@@ -452,11 +444,12 @@ public class VersionGarbageCollectorIT {
         clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(10, stats.deletedPropsGCCount);
+        assertEquals(10, stats.updatedDetailedGCDocsCount);
+        assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
     }
 
-    // Test properties are collected after system crash had happened
     @Test
-    public void testGCDeletedProps_4() throws Exception {
+    public void testGCDeletedPropsAfterSystemCrash() throws Exception {
         final FailingDocumentStore fds = new FailingDocumentStore(fixture.createDocumentStore(), 42) {
             @Override
             public void dispose() {}
@@ -519,12 +512,12 @@ public class VersionGarbageCollectorIT {
         clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
         VersionGCStats stats = gc.gc(maxAge*2, HOURS);
         assertEquals(10, stats.deletedPropsGCCount);
-
+        assertEquals(10, stats.updatedDetailedGCDocsCount);
+        assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
     }
 
-    // Test when escaped properties are collected
     @Test
-    public void testGCDeletedProps_5() throws Exception {
+    public void testGCDeletedEscapeProps() throws Exception {
         //1. Create nodes with properties
         NodeBuilder b1 = store.getRoot().builder();
 
@@ -542,6 +535,7 @@ public class VersionGarbageCollectorIT {
         clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc(maxAge, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
 
         //Remove property
         NodeBuilder b2 = store.getRoot().builder();
@@ -557,6 +551,7 @@ public class VersionGarbageCollectorIT {
         clock.waitUntil(clock.getTime() + delta);
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
 
         //3. Check that deleted property does get collected post maxAge
         clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
@@ -574,7 +569,143 @@ public class VersionGarbageCollectorIT {
         clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+    }
+
+    @Test
+    public void testGCDeletedPropsWhenModifiedConcurrently() throws Exception {
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+
+        // Add property to node & save
+        for (int i = 0; i < 10; i++) {
+            b1.child("x"+i).setProperty("test"+i, "t", STRING);
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(10);
+        //1. Go past GC age and check no GC done as nothing deleted
+        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        VersionGCStats stats = gc.gc(maxAge, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+
+        //Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        for (int i = 0; i < 10; i++) {
+            b2.getChildNode("x"+i).removeProperty("test"+i);
+        }
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        store.runBackgroundOperations();
+
+        //2. Check that a deleted property is not collected before maxAge
+        //Clock cannot move back (it moved forward in #1) so double the maxAge
+        clock.waitUntil(clock.getTime() + delta);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+        assertNull(stats.oldestModifiedDocId); // as GC hadn't run
+
+        //3. Check that deleted property does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+        VersionGCSupport gcSupport = new VersionGCSupport(store.getDocumentStore()) {
+
+            @Override
+            public Iterable<NodeDocument> getModifiedDocs(long fromModified, long toModified, int limit, @NotNull String fromId) {
+                Iterable<NodeDocument> modifiedDocs = super.getModifiedDocs(fromModified, toModified, limit, fromId);
+                List<NodeDocument> result = stream(modifiedDocs.spliterator(), false).collect(toList());
+                final Revision updateRev = newRevision(1);
+                store.getDocumentStore().findAndUpdate(NODES, stream(modifiedDocs.spliterator(), false)
+                        .map(doc -> {
+                            UpdateOp op = new UpdateOp(requireNonNull(doc.getId()), false);
+                            setModified(op, updateRev);
+                            return op;
+                        }).
+                        collect(toList())
+                );
+                return result;
+            }
+        };
+
+        VersionGarbageCollector gc = new VersionGarbageCollector(store, gcSupport, true);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+        assertEquals(0, stats.deletedPropsGCCount);
+        assertNotEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
+    }
+
+    @Test
+    public void cancelDetailedGCAfterFirstBatch() throws Exception {
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+
+        // Add property to node & save
+        for (int i = 0; i < 5_000; i++) {
+            for (int j = 0; j < 10; j++) {
+                b1.child("z"+i).setProperty("prop"+j, "foo", STRING);
+            }
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        store.runBackgroundOperations();
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(10);
+        //1. Go past GC age and check no GC done as nothing deleted
+        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        VersionGCStats stats = gc.gc(maxAge, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
 
+        //Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        for (int i = 0; i < 5_000; i++) {
+            for (int j = 0; j < 10; j++) {
+                b2.getChildNode("z"+i).removeProperty("prop"+j);
+            }
+        }
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        store.runBackgroundOperations();
+
+        final AtomicReference<VersionGarbageCollector> gcRef = Atomics.newReference();
+        final VersionGCSupport gcSupport = new VersionGCSupport(store.getDocumentStore()) {
+
+            @Override
+            public Iterable<NodeDocument> getModifiedDocs(long fromModified, long toModified, int limit, @NotNull String fromId) {
+                return () -> new AbstractIterator<>() {
+                    private final Iterator<NodeDocument> it = candidates(fromModified, toModified, limit, fromId);
+
+                    @Override
+                    protected NodeDocument computeNext() {
+                        if (it.hasNext()) {
+                            return it.next();
+                        }
+                        // cancel when we reach the end
+                        gcRef.get().cancel();
+                        return endOfData();
+                    }
+                };
+            }
+
+            private Iterator<NodeDocument> candidates(long fromModified, long toModified, int limit, @NotNull String fromId) {
+                return super.getModifiedDocs(fromModified, toModified, limit, fromId).iterator();
+            }
+        };
+
+        gcRef.set(new VersionGarbageCollector(store, gcSupport, true));
+
+        //3. Check that deleted property does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+        stats = gcRef.get().gc(maxAge*2, HOURS);
+        assertTrue(stats.canceled);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+        assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(MIN_ID_VALUE, stats.oldestModifiedDocId);
     }
 
     // OAK-10199 END


[jackrabbit-oak] 24/28: OAK-10199 : added unit cases for bundled properties getting garbaged collected as well

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 6a3ddfce0cc4fc4eebc7ab2b9c278f885c391a1a
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Thu Aug 3 15:32:24 2023 +0530

    OAK-10199 : added unit cases for bundled properties getting garbaged collected as well
---
 .../document/VersionGarbageCollectorIT.java        | 133 +++++++++++++++++++--
 1 file changed, 125 insertions(+), 8 deletions(-)

diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index 4e5360e2c3..56f8b3f77b 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -34,6 +34,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 
+import static java.util.List.of;
 import static java.util.Objects.requireNonNull;
 import static java.util.concurrent.TimeUnit.MILLISECONDS;
 import static java.util.stream.Collectors.toList;
@@ -43,7 +44,9 @@ import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.size;
 import static java.util.concurrent.TimeUnit.HOURS;
 import static java.util.concurrent.TimeUnit.MINUTES;
+import static org.apache.jackrabbit.oak.api.Type.NAME;
 import static org.apache.jackrabbit.oak.api.Type.STRING;
+import static org.apache.jackrabbit.oak.api.Type.STRINGS;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
@@ -51,12 +54,14 @@ import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NUM_REVS_T
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PREV_SPLIT_FACTOR;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.setModified;
+import static org.apache.jackrabbit.oak.plugins.document.Revision.getCurrentTimestamp;
 import static org.apache.jackrabbit.oak.plugins.document.Revision.newRevision;
 import static org.apache.jackrabbit.oak.plugins.document.TestUtils.NO_BINARY;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
+import static org.apache.jackrabbit.oak.plugins.document.bundlor.DocumentBundlor.META_PROP_PATTERN;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotEquals;
@@ -79,10 +84,12 @@ import org.apache.jackrabbit.guava.common.io.Closer;
 import org.apache.jackrabbit.guava.common.util.concurrent.Atomics;
 import com.mongodb.ReadPreference;
 
+import org.apache.jackrabbit.oak.InitialContent;
 import org.apache.jackrabbit.oak.api.CommitFailedException;
 import org.apache.jackrabbit.oak.api.PropertyState;
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.RDBFixture;
+import org.apache.jackrabbit.oak.plugins.document.bundlor.BundlingConfigInitializer;
 import org.apache.jackrabbit.oak.plugins.document.mongo.MongoTestUtils;
 import org.apache.jackrabbit.oak.plugins.document.rdb.RDBOptions;
 import org.apache.jackrabbit.oak.plugins.document.util.Utils;
@@ -316,7 +323,7 @@ public class VersionGarbageCollectorIT {
         assertEquals(1, stats.updatedDetailedGCDocsCount);
         assertTrue(stats.ignoredGCDueToCheckPoint);
         assertFalse(stats.ignoredDetailedGCDueToCheckPoint);
-        assertFalse(stats.canceled);
+        assertTrue(stats.canceled);
     }
 
     @Test
@@ -344,7 +351,7 @@ public class VersionGarbageCollectorIT {
         long maxAge = 1; //hours
         long delta = MINUTES.toMillis(10);
         //1. Go past GC age and check no GC done as nothing deleted
-        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        clock.waitUntil(getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc(maxAge, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
         assertEquals(0, stats.updatedDetailedGCDocsCount);
@@ -455,7 +462,7 @@ public class VersionGarbageCollectorIT {
             }
             store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
             // increase the clock to create new revision for next batch
-            clock.waitUntil(Revision.getCurrentTimestamp() + (k * 5));
+            clock.waitUntil(getCurrentTimestamp() + (k * 5));
         }
 
         store.runBackgroundOperations();
@@ -482,7 +489,7 @@ public class VersionGarbageCollectorIT {
             }
             store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
             // increase the clock to create new revision for next batch
-            clock.waitUntil(Revision.getCurrentTimestamp() + (k * 5));
+            clock.waitUntil(getCurrentTimestamp() + (k * 5));
         }
         store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
 
@@ -524,7 +531,7 @@ public class VersionGarbageCollectorIT {
         long maxAge = 1; //hours
         long delta = MINUTES.toMillis(10);
         //1. Go past GC age and check no GC done as nothing deleted
-        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        clock.waitUntil(getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc(maxAge, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
 
@@ -654,7 +661,7 @@ public class VersionGarbageCollectorIT {
         long maxAge = 1; //hours
         long delta = MINUTES.toMillis(10);
         //1. Go past GC age and check no GC done as nothing deleted
-        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        clock.waitUntil(getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc(maxAge, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
         assertEquals(0, stats.updatedDetailedGCDocsCount);
@@ -694,6 +701,116 @@ public class VersionGarbageCollectorIT {
         assertEquals(0, stats.updatedDetailedGCDocsCount);
     }
 
+    @Test
+    public void testGCDeletedNonBundledProps() throws Exception {
+
+        //0. Initialize bundling configs
+        final NodeBuilder builder = store.getRoot().builder();
+        new InitialContent().initialize(builder);
+        BundlingConfigInitializer.INSTANCE.initialize(builder);
+        merge(store, builder);
+        store.runBackgroundOperations();
+
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+        b1.child("x").setProperty("jcr:primaryType", "nt:file", NAME);
+
+        // Add property to node & save
+        for (int i = 0; i < 10; i++) {
+            b1.child("x").child("jcr:content").setProperty("prop"+i, "t", STRING);
+            b1.child("x").setProperty(META_PROP_PATTERN, of("jcr:content"), STRINGS);
+            b1.child("x").setProperty("prop"+i, "bar", STRING);
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = MINUTES.toMillis(10);
+        //1. Go past GC age and check no GC done as nothing deleted
+        clock.waitUntil(getCurrentTimestamp() + maxAge);
+        VersionGCStats stats = gc.gc(maxAge, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+
+        //Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        for (int i = 0; i < 10; i++) {
+            b2.getChildNode("x").removeProperty("prop"+i);
+        }
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        store.runBackgroundOperations();
+
+        //2. Check that a deleted property is not collected before maxAge
+        //Clock cannot move back (it moved forward in #1) so double the maxAge
+        clock.waitUntil(clock.getTime() + delta);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+
+        //3. Check that deleted property does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(10, stats.deletedPropsGCCount);
+    }
+
+    @Test
+    public void testGCDeletedBundledProps() throws Exception {
+
+        //0. Initialize bundling configs
+        final NodeBuilder builder = store.getRoot().builder();
+        new InitialContent().initialize(builder);
+        BundlingConfigInitializer.INSTANCE.initialize(builder);
+        merge(store, builder);
+        store.runBackgroundOperations();
+
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+        b1.child("x").setProperty("jcr:primaryType", "nt:file", NAME);
+
+        // Add property to node & save
+        for (int i = 0; i < 10; i++) {
+            b1.child("x").child("jcr:content").setProperty("prop"+i, "t", STRING);
+            b1.child("x").setProperty(META_PROP_PATTERN, of("jcr:content"), STRINGS);
+            b1.child("x").setProperty("prop"+i, "bar", STRING);
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = MINUTES.toMillis(10);
+        //1. Go past GC age and check no GC done as nothing deleted
+        clock.waitUntil(getCurrentTimestamp() + maxAge);
+        VersionGCStats stats = gc.gc(maxAge, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+
+        //Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        for (int i = 0; i < 10; i++) {
+            b2.getChildNode("x").getChildNode("jcr:content").removeProperty("prop"+i);
+        }
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        store.runBackgroundOperations();
+
+        //2. Check that a deleted property is not collected before maxAge
+        //Clock cannot move back (it moved forward in #1) so double the maxAge
+        clock.waitUntil(clock.getTime() + delta);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+        assertEquals(0, stats.updatedDetailedGCDocsCount);
+
+        //3. Check that deleted property does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(10, stats.deletedPropsGCCount);
+    }
+
     @Test
     public void testGCDeletedPropsWhenModifiedConcurrently() throws Exception {
         //1. Create nodes with properties
@@ -710,7 +827,7 @@ public class VersionGarbageCollectorIT {
         long maxAge = 1; //hours
         long delta = MINUTES.toMillis(10);
         //1. Go past GC age and check no GC done as nothing deleted
-        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        clock.waitUntil(getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc(maxAge, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
         assertEquals(0, stats.updatedDetailedGCDocsCount);
@@ -779,7 +896,7 @@ public class VersionGarbageCollectorIT {
         long maxAge = 1; //hours
         long delta = MINUTES.toMillis(10);
         //1. Go past GC age and check no GC done as nothing deleted
-        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        clock.waitUntil(getCurrentTimestamp() + maxAge);
         VersionGCStats stats = gc.gc(maxAge, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
         assertEquals(0, stats.updatedDetailedGCDocsCount);


[jackrabbit-oak] 10/28: OAK-10199 : added test cases to fetch 5000+ modified docs in loop and verify them

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit c9c492cbdea3d32bc4f119d0e97b1bebdc7cf6ca
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Mon Jun 19 18:23:34 2023 +0530

    OAK-10199 : added test cases to fetch 5000+ modified docs in loop and verify them
---
 .../oak/plugins/document/VersionGCSupport.java     |   2 +-
 .../document/mongo/MongoVersionGCSupport.java      |   2 +-
 .../plugins/document/rdb/RDBVersionGCSupport.java  |   3 +-
 .../oak/plugins/document/VersionGCSupportTest.java | 115 ++++++++++++++++++++-
 4 files changed, 114 insertions(+), 8 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index e58ec05903..db54553061 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -86,7 +86,7 @@ public class VersionGCSupport {
      * @return matching documents.
      */
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
-                                                  final String fromId) {
+                                                  @NotNull final String fromId) {
         return StreamSupport
                 .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, fromId).spliterator(), false)
                 .filter(input -> modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified))
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index 690fd5a0d6..9896857e36 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -144,7 +144,7 @@ public class MongoVersionGCSupport extends VersionGCSupport {
      */
     @Override
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
-                                                  final String fromId) {
+                                                  @NotNull final String fromId) {
         // _modified >= fromModified && _modified < toModified && _id > fromId
         final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
                 lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), gt(ID, fromId));
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index 0d2f678911..7006c18683 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -48,6 +48,7 @@ import org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.Unsupport
 import org.apache.jackrabbit.oak.plugins.document.util.CloseableIterable;
 import org.apache.jackrabbit.oak.plugins.document.util.Utils;
 import org.apache.jackrabbit.oak.stats.Clock;
+import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -108,7 +109,7 @@ public class RDBVersionGCSupport extends VersionGCSupport {
      */
     @Override
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
-                                                  final String fromId) {
+                                                  @NotNull final String fromId) {
         List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
                 new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)),
                 new QueryCondition(ID, ">", of(fromId)));
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
index 4eb20986c2..0061771383 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
@@ -44,6 +44,8 @@ import static org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.ME
 import static org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.MONGO;
 import static org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.RDB_H2;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.setModified;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath;
 import static org.apache.jackrabbit.oak.stats.Clock.SIMPLE;
 import static org.junit.Assert.assertEquals;
@@ -106,7 +108,7 @@ public class VersionGCSupportTest {
             String id = getIdFromPath("/doc-" + i);
             ids.add(id);
             UpdateOp op = new UpdateOp(id, true);
-            NodeDocument.setModified(op, r);
+            setModified(op, r);
             NodeDocument.setDeleted(op, r, true);
             store.create(NODES, of(op));
         }
@@ -140,7 +142,7 @@ public class VersionGCSupportTest {
             String id = getIdFromPath("/doc-modified" + i);
             ids.add(id);
             UpdateOp op = new UpdateOp(id, true);
-            NodeDocument.setModified(op, r);
+            setModified(op, r);
             store.create(NODES, of(op));
         }
 
@@ -174,7 +176,7 @@ public class VersionGCSupportTest {
         String id = getIdFromPath("/doc-del");
         ids.add(id);
         UpdateOp op = new UpdateOp(id, true);
-        NodeDocument.setModified(op, r);
+        setModified(op, r);
         NodeDocument.setDeleted(op, r, true);
         store.create(NODES, of(op));
 
@@ -190,7 +192,7 @@ public class VersionGCSupportTest {
         String id = getIdFromPath("/doc-modified");
         ids.add(id);
         UpdateOp op = new UpdateOp(id, true);
-        NodeDocument.setModified(op, r);
+        setModified(op, r);
         store.create(NODES, of(op));
 
         NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE);
@@ -200,6 +202,110 @@ public class VersionGCSupportTest {
         assertEquals(id, oldestModifiedDocId);
     }
 
+    @Test
+    public void findModifiedDocsWhenModifiedIsDifferent() {
+        long secs = 42;
+        long offset = SECONDS.toMillis(secs);
+        List<UpdateOp> updateOps = new ArrayList<>(5_001);
+        for (int i = 0; i < 5_001; i++) {
+            Revision r = new Revision(offset + (i * 5), 0, 1);
+            String id = getIdFromPath("/x" + i);
+            ids.add(id);
+            UpdateOp op = new UpdateOp(id, true);
+            setModified(op, r);
+            updateOps.add(op);
+        }
+        // create 5_000 nodes
+        store.create(NODES, updateOps);
+
+        NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE);
+        String oldestModifiedDocId = oldestModifiedDoc.getId();
+        long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+        assertEquals(40L, oldestModifiedDocTs);
+        assertEquals("1:/x0", oldestModifiedDocId);
+
+        for(int i = 0; i < 5; i++) {
+            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), Long.MAX_VALUE, 1000, oldestModifiedDocId);
+            assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
+            long count = stream(modifiedDocs.spliterator(), false).count();
+            assertEquals(1000, count);
+            for (NodeDocument modifiedDoc : modifiedDocs) {
+                oldestModifiedDoc = modifiedDoc;
+            }
+            oldestModifiedDocId = oldestModifiedDoc.getId();
+            oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+        }
+    }
+
+    @Test
+    public void findModifiedDocsWhenOldestDocIsPresent() {
+        long offset = SECONDS.toMillis(42);
+        List<UpdateOp> updateOps = new ArrayList<>(5_001);
+        for (int i = 0; i < 5_001; i++) {
+            Revision r = new Revision(offset, 0, 1);
+            String id = getIdFromPath("/x" + i);
+            ids.add(id);
+            UpdateOp op = new UpdateOp(id, true);
+            setModified(op, r);
+            updateOps.add(op);
+        }
+        // create 5_000 nodes
+        store.create(NODES, updateOps);
+
+        NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE);
+        String oldestModifiedDocId = oldestModifiedDoc.getId();
+        long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+        assertEquals(40L, oldestModifiedDocTs);
+        assertEquals("1:/x0", oldestModifiedDocId);
+
+        for(int i = 0; i < 5; i++) {
+            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), Long.MAX_VALUE, 1000, oldestModifiedDocId);
+            assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
+            long count = stream(modifiedDocs.spliterator(), false).count();
+            assertEquals(1000, count);
+            for (NodeDocument modifiedDoc : modifiedDocs) {
+                oldestModifiedDoc = modifiedDoc;
+            }
+            oldestModifiedDocId = oldestModifiedDoc.getId();
+            oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+        }
+    }
+
+    @Test
+    public void findModifiedDocsWhenOldestDocIsAbsent() {
+
+        NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE);
+        String oldestModifiedDocId = MIN_ID_VALUE;
+        long oldestModifiedDocTs = 0L;
+        assertEquals(NULL, oldestModifiedDoc);
+
+        long offset = SECONDS.toMillis(42);
+        List<UpdateOp> updateOps = new ArrayList<>(5_000);
+        for (int i = 0; i < 5_000; i++) {
+            Revision r = new Revision(offset, 0, 1);
+            String id = getIdFromPath("/x" + i);
+            ids.add(id);
+            UpdateOp op = new UpdateOp(id, true);
+            setModified(op, r);
+            updateOps.add(op);
+        }
+        // create 5_000 nodes
+        store.create(NODES, updateOps);
+
+
+        for(int i = 0; i < 5; i++) {
+            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), Long.MAX_VALUE, 1000, oldestModifiedDocId);
+            assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
+            long count = stream(modifiedDocs.spliterator(), false).count();
+            assertEquals(1000, count);
+            for (NodeDocument modifiedDoc : modifiedDocs) {
+                oldestModifiedDoc = modifiedDoc;
+            }
+            oldestModifiedDocId = oldestModifiedDoc.getId();
+            oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+        }
+    }
+
     private void assertPossiblyDeleted(long fromSeconds, long toSeconds, long num) {
         Iterable<NodeDocument> docs = gcSupport.getPossiblyDeletedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds));
         assertEquals(num, stream(docs.spliterator(), false).count());
@@ -207,7 +313,6 @@ public class VersionGCSupportTest {
 
     private void assertModified(long fromSeconds, long toSeconds, long num) {
         Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE);
-        docs.forEach(d -> System.out.println(d.getModified() + " " + d.getId()));
         assertEquals(num, stream(docs.spliterator(), false).count());
         assertTrue(isInOrder(docs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
     }


[jackrabbit-oak] 23/28: OAK-10199 : added bundled properties while calculating the properties retain set as well

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit c9abe5e5cf10ef30e38c399d5ecc790236eb41d2
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Wed Aug 2 15:55:20 2023 +0530

    OAK-10199 : added bundled properties while calculating the properties retain set as well
---
 .../jackrabbit/oak/plugins/document/VersionGarbageCollector.java       | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index f2a334b75b..8b84d5c4b1 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -882,7 +882,8 @@ public class VersionGarbageCollector {
                 // eligible to be garbage collected.
 
                 final Set<String> retainPropSet = ofNullable(doc.getNodeAtRevision(nodeStore, headRevision, null))
-                        .map(DocumentNodeState::getPropertyNames)
+                        .map(DocumentNodeState::getAllBundledProperties)
+                        .map(Map::keySet)
                         .map(p -> p.stream().map(Utils::escapePropertyName).collect(toSet()))
                         .orElse(emptySet());
 


[jackrabbit-oak] 25/28: OAK-10199 : renamed method name to avoid confusion with other variable of same name

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit f5846e51937e8dfa025a1e4b25b61ac6c6e110f6
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Fri Aug 4 14:18:02 2023 +0530

    OAK-10199 : renamed method name to avoid confusion with other variable of same name
---
 .../plugins/document/VersionGarbageCollector.java  | 31 +++++++++++-----------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 8b84d5c4b1..c65ea1dc01 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -633,8 +633,7 @@ public class VersionGarbageCollector {
          * @param headRevision the current head revision of node store
          * @param rec {@link VersionGCRecommendations} to recommend GC operation
          */
-        private void collectDetailedGarbage(final GCPhases phases, final RevisionVector headRevision, final VersionGCRecommendations rec)
-                throws IOException {
+        private void collectDetailedGarbage(final GCPhases phases, final RevisionVector headRevision, final VersionGCRecommendations rec) {
 
             final long oldestModifiedMs = rec.scopeDetailedGC.fromMs;
             final long toModifiedMs = rec.scopeDetailedGC.toMs;
@@ -645,15 +644,15 @@ public class VersionGarbageCollector {
             long oldModifiedMs = oldestModifiedMs;
 
             try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) {
-                long fromModified = oldestModifiedMs;
+                long fromModifiedMs = oldestModifiedMs;
                 String fromId = ofNullable(oldestModifiedDocId).orElse(MIN_ID_VALUE);
                 NodeDocument lastDoc;
                 if (phases.start(GCPhase.DETAILED_GC)) {
-                    while (foundDoc && fromModified < toModifiedMs && docsTraversed < PROGRESS_BATCH_SIZE) {
+                    while (foundDoc && fromModifiedMs < toModifiedMs && docsTraversed < PROGRESS_BATCH_SIZE) {
                         // set foundDoc to false to allow exiting the while loop
                         foundDoc = false;
                         lastDoc = null;
-                        Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModified, toModifiedMs, DETAILED_GC_BATCH_SIZE, fromId);
+                        Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModifiedMs, toModifiedMs, DETAILED_GC_BATCH_SIZE, fromId);
                         try {
                             for (NodeDocument doc : itr) {
                                 foundDoc = true;
@@ -666,7 +665,7 @@ public class VersionGarbageCollector {
                                 docsTraversed++;
                                 if (docsTraversed % 100 == 0) {
                                     monitor.info("Iterated through {} documents so far. {} had detailed garbage",
-                                            docsTraversed, gc.getGarbageDocsCount());
+                                            docsTraversed, gc.getGarbageCount());
                                 }
 
                                 lastDoc = doc;
@@ -680,10 +679,10 @@ public class VersionGarbageCollector {
                                 if (modified == null) {
                                     monitor.warn("collectDetailedGarbage : document has no _modified property : {}",
                                             doc.getId());
-                                } else if (SECONDS.toMillis(modified) < fromModified) {
+                                } else if (SECONDS.toMillis(modified) < fromModifiedMs) {
                                     monitor.warn(
                                             "collectDetailedGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
-                                            modified, fromModified, toModifiedMs);
+                                            modified, fromModifiedMs, toModifiedMs);
                                 }
                             }
                             // now remove the garbage in one go, if any
@@ -692,13 +691,13 @@ public class VersionGarbageCollector {
                                 phases.stop(GCPhase.DETAILED_GC_CLEANUP);
                             }
                             if (lastDoc != null) {
-                                fromModified = lastDoc.getModified() == null ? oldModifiedMs : SECONDS.toMillis(lastDoc.getModified());
+                                fromModifiedMs = lastDoc.getModified() == null ? oldModifiedMs : SECONDS.toMillis(lastDoc.getModified());
                                 fromId = lastDoc.getId();
                             }
                         } finally {
                             Utils.closeIfCloseable(itr);
-                            phases.stats.oldestModifiedDocTimeStamp = fromModified;
-                            if (fromModified > oldModifiedMs) {
+                            phases.stats.oldestModifiedDocTimeStamp = fromModifiedMs;
+                            if (fromModifiedMs > oldModifiedMs) {
                                 // we have moved ahead, now we can reset oldestModifiedId to min value
                                 fromId = MIN_ID_VALUE;
                                 phases.stats.oldestModifiedDocId = MIN_ID_VALUE;
@@ -707,14 +706,14 @@ public class VersionGarbageCollector {
                                 // save the last _id traversed to avoid re-fetching of ids
                                 phases.stats.oldestModifiedDocId = fromId;
                             }
-                            oldModifiedMs = fromModified;
+                            oldModifiedMs = fromModifiedMs;
                         }
                         // if we didn't find any document i.e. either we are already at last document
                         // of current timeStamp or there is no document for this timeStamp
-                        // we need to reset fromId & increment fromModified and check again
+                        // we need to reset fromId & increment fromModifiedMs and check again
                         if (!foundDoc && !Objects.equals(fromId, MIN_ID_VALUE)) {
                             fromId = MIN_ID_VALUE;
-                            fromModified = fromModified + SECONDS.toMillis(5);
+                            fromModifiedMs = fromModifiedMs + SECONDS.toMillis(5);
                             foundDoc = true; // to run while loop again
                         }
                     }
@@ -865,7 +864,7 @@ public class VersionGarbageCollector {
 
         private void collectUnmergedBranchCommitDocument(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) {
             if (phases.start(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC)){
-                // TODO add umerged BC collection logic
+                // TODO add unmerged BC collection logic
                 phases.stop(GCPhase.DETAILED_GC_COLLECT_UNMERGED_BC);
             }
 
@@ -912,7 +911,7 @@ public class VersionGarbageCollector {
 
         }
 
-        int getGarbageDocsCount() {
+        int getGarbageCount() {
             return totalGarbageDocsCount;
         }
 


[jackrabbit-oak] 02/28: OAK-10199 : disable the detailGc in tearDown to avoid side-effects

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 0c2f2c6bf773c4d982b58f3c5978d7e5fcdbb782
Author: Stefan Egli <st...@apache.org>
AuthorDate: Thu Apr 20 18:38:45 2023 +0200

    OAK-10199 : disable the detailGc in tearDown to avoid side-effects
---
 .../java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
index 1bd81ce89c..445e7c4275 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
@@ -95,6 +95,7 @@ public class VersionGCTest {
 
     @After
     public void tearDown() throws Exception {
+        DetailGCHelper.disableDetailGC(ns);
         execService.shutdown();
         execService.awaitTermination(1, MINUTES);
     }


[jackrabbit-oak] 12/28: OAK-10199 : fixed the test case with different revision for DetailedGC

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 12d0837e08ec91efadfeb0903f831dff879111f5
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Mon Jun 19 23:11:30 2023 +0530

    OAK-10199 : fixed the test case with different revision for DetailedGC
---
 .../jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java     | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index f6e8554252..80dd47dee6 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -342,7 +342,6 @@ public class VersionGarbageCollectorIT {
 
     // Test when we have more than 1000 deleted properties with different revisions
     @Test
-    @Ignore
     public void testGCDeletedProps_2() throws Exception {
         //1. Create nodes with properties
         NodeBuilder b1 = null;
@@ -356,7 +355,7 @@ public class VersionGarbageCollectorIT {
             }
             store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
             // increase the clock to create new revision for next batch
-            clock.waitUntil(Revision.getCurrentTimestamp() + SECONDS.toMillis(k * 5));
+            clock.waitUntil(Revision.getCurrentTimestamp() + (k * 5));
         }
 
         // enable the detailed gc flag


[jackrabbit-oak] 03/28: OAK-10199 : provided support for feature toggle & osgi config for detailed gc

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 25e5b1363e3cf95dc6aeca92c8c9506854fe82fa
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Mon Apr 24 13:07:44 2023 +0530

    OAK-10199 : provided support for feature toggle & osgi config for detailed gc
---
 .../plugins/document/DocumentNodeStoreHelper.java  |   4 +-
 .../jackrabbit/oak/run/RevisionsCommand.java       |   3 +-
 .../oak/plugins/document/Configuration.java        |   1 +
 .../oak/plugins/document/DocumentNodeStore.java    |   3 +-
 .../plugins/document/DocumentNodeStoreBuilder.java |   1 +
 .../plugins/document/DocumentNodeStoreService.java |   8 +
 .../oak/plugins/document/NodeDocument.java         |  16 ++
 .../plugins/document/VersionGCRecommendations.java |  43 ++++-
 .../oak/plugins/document/VersionGCSupport.java     |  98 ++++++-----
 .../plugins/document/VersionGarbageCollector.java  | 183 ++++++++++++---------
 .../document/mongo/MongoVersionGCSupport.java      |  42 ++++-
 .../oak/plugins/document/util/Utils.java           |  11 ++
 .../DocumentNodeStoreServiceConfigurationTest.java |   1 +
 .../oak/plugins/document/VersionGCQueryTest.java   |   4 +-
 .../oak/plugins/document/VersionGCTest.java        |  11 +-
 .../document/VersionGarbageCollectorIT.java        |   8 +-
 .../mongo/MongoDocumentNodeStoreBuilderTest.java   |  12 ++
 .../oak/plugins/document/util/UtilsTest.java       |  41 ++++-
 18 files changed, 345 insertions(+), 145 deletions(-)

diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java
index cf839c88c5..3b39d4c3a1 100644
--- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java
+++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreHelper.java
@@ -72,8 +72,8 @@ public class DocumentNodeStoreHelper {
     }
 
     public static VersionGarbageCollector createVersionGC(
-            DocumentNodeStore nodeStore, VersionGCSupport gcSupport) {
-        return new VersionGarbageCollector(nodeStore, gcSupport);
+            DocumentNodeStore nodeStore, VersionGCSupport gcSupport, final boolean detailedGCEnabled) {
+        return new VersionGarbageCollector(nodeStore, gcSupport, detailedGCEnabled);
     }
 
     private static Iterable<BlobReferences> scan(DocumentNodeStore store,
diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
index e418148952..b995910c57 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java
@@ -60,6 +60,7 @@ import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreHelper.createVersionGC;
 import static org.apache.jackrabbit.oak.plugins.document.FormatVersion.versionOf;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getRootDocument;
+import static org.apache.jackrabbit.oak.plugins.document.util.Utils.isDetailedGCEnabled;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.timestampToString;
 import static org.apache.jackrabbit.oak.run.Utils.asCloseable;
 import static org.apache.jackrabbit.oak.run.Utils.createDocumentMKBuilder;
@@ -226,7 +227,7 @@ public class RevisionsCommand implements Command {
         useMemoryBlobStore(builder);
         // create a version GC that operates on a read-only DocumentNodeStore
         // and a GC support with a writable DocumentStore
-        VersionGarbageCollector gc = createVersionGC(builder.build(), gcSupport);
+        VersionGarbageCollector gc = createVersionGC(builder.build(), gcSupport, isDetailedGCEnabled(builder));
 
         VersionGCOptions gcOptions = gc.getOptions();
         gcOptions = gcOptions.withDelayFactor(options.getDelay());
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
index d9a00cb28d..ae7aa143d2 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
@@ -32,6 +32,7 @@ import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilde
 import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder.DEFAULT_NODE_CACHE_PERCENTAGE;
 import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder.DEFAULT_PREV_DOC_CACHE_PERCENTAGE;
 import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder.DEFAULT_UPDATE_LIMIT;
+import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreService.DEFAULT_DETAILED_GC_ENABLED;
 import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreService.DEFAULT_THROTTLING_ENABLED;
 
 @ObjectClassDefinition(
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java
index 9c4cee90d0..a22f711f1c 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStore.java
@@ -38,6 +38,7 @@ import static org.apache.jackrabbit.oak.plugins.document.Path.ROOT;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.alignWithExternalRevisions;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getModuleVersion;
+import static org.apache.jackrabbit.oak.plugins.document.util.Utils.isDetailedGCEnabled;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.isThrottlingEnabled;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.pathToId;
 import static org.apache.jackrabbit.oak.spi.observation.ChangeSet.COMMIT_CONTEXT_OBSERVATION_CHANGESET;
@@ -641,7 +642,7 @@ public final class DocumentNodeStore
         this.branches = new UnmergedBranches();
         this.asyncDelay = builder.getAsyncDelay();
         this.versionGarbageCollector = new VersionGarbageCollector(
-                this, builder.createVersionGCSupport());
+                this, builder.createVersionGCSupport(), isDetailedGCEnabled(builder));
         this.versionGarbageCollector.setStatisticsProvider(builder.getStatisticsProvider());
         this.versionGarbageCollector.setGCMonitor(builder.getGCMonitor());
         this.journalGarbageCollector = new JournalGarbageCollector(
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
index 6d93278b5d..aa3ab1ea81 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
@@ -125,6 +125,7 @@ public class DocumentNodeStoreBuilder<T extends DocumentNodeStoreBuilder<T>> {
     private boolean isReadOnlyMode = false;
     private Feature prefetchFeature;
     private Feature docStoreThrottlingFeature;
+    private Feature docStoreDetailedGCFeature;
     private Weigher<CacheValue, CacheValue> weigher = new EmpiricalWeigher();
     private long memoryCacheSize = DEFAULT_MEMORY_CACHE_SIZE;
     private int nodeCachePercentage = DEFAULT_NODE_CACHE_PERCENTAGE;
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
index 42bf88c120..ad01cda3ca 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
@@ -138,6 +138,7 @@ public class DocumentNodeStoreService {
     static final String DEFAULT_DB = "oak";
     static final boolean DEFAULT_SO_KEEP_ALIVE = true;
     static final boolean DEFAULT_THROTTLING_ENABLED = false;
+    static final boolean DEFAULT_DETAILED_GC_ENABLED = false;
     static final int DEFAULT_MONGO_LEASE_SO_TIMEOUT_MILLIS = 30000;
     static final String DEFAULT_PERSISTENT_CACHE = "cache";
     static final String DEFAULT_JOURNAL_CACHE = "diff-cache";
@@ -181,6 +182,11 @@ public class DocumentNodeStoreService {
      */
     private static final String FT_NAME_DOC_STORE_THROTTLING = "FT_THROTTLING_OAK-9909";
 
+    /**
+     * Feature toggle name to enable detailed GC for Mongo Document Store
+     */
+    private static final String FT_NAME_DEATILED_GC = "FT_DETAILED_GC_OAK-10199";
+
     // property name constants - values can come from framework properties or OSGi config
     public static final String CUSTOM_BLOB_STORE = "customBlobStore";
     public static final String PROP_REV_RECOVERY_INTERVAL = "lastRevRecoveryJobIntervalInSecs";
@@ -216,6 +222,7 @@ public class DocumentNodeStoreService {
     private JournalPropertyHandlerFactory journalPropertyHandlerFactory = new JournalPropertyHandlerFactory();
     private Feature prefetchFeature;
     private Feature docStoreThrottlingFeature;
+    private Feature docStoreDetailedGCFeature;
     private ComponentContext context;
     private Whiteboard whiteboard;
     private long deactivationTimestamp = 0;
@@ -250,6 +257,7 @@ public class DocumentNodeStoreService {
         documentStoreType = DocumentStoreType.fromString(this.config.documentStoreType());
         prefetchFeature = Feature.newFeature(FT_NAME_PREFETCH, whiteboard);
         docStoreThrottlingFeature = Feature.newFeature(FT_NAME_DOC_STORE_THROTTLING, whiteboard);
+        docStoreDetailedGCFeature = Feature.newFeature(FT_NAME_DEATILED_GC, whiteboard);
 
         registerNodeStoreIfPossible();
     }
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
index c79c6adfc4..71abba0a2e 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
@@ -32,6 +32,7 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
 
 import org.apache.jackrabbit.guava.common.base.Function;
 import org.apache.jackrabbit.guava.common.base.Predicate;
@@ -66,6 +67,7 @@ import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.mergeSorted;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
 import static java.util.Objects.requireNonNull;
+import static java.util.stream.Collectors.toMap;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.StableRevisionComparator.REVERSE;
 import static org.apache.jackrabbit.oak.plugins.document.UpdateOp.Key;
@@ -1669,6 +1671,20 @@ public final class NodeDocument extends Document {
         return map;
     }
 
+    /**
+     * Returns all the properties on this document
+     * @return Map of all properties along with their values
+     */
+    @NotNull
+    Map<String, SortedMap<Revision, String>> getProperties() {
+        return data
+                .keySet()
+                .stream()
+                .filter(Utils::isPropertyName)
+                .map(o -> Map.entry(o, getLocalMap(o)))
+                .collect(toMap(Entry::getKey, Entry::getValue));
+    }
+
     /**
      * @return the {@link #REVISIONS} stored on this document.
      */
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index ac47cc69d8..d8b091261d 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -21,6 +21,7 @@ package org.apache.jackrabbit.oak.plugins.document;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
+import com.google.common.collect.ImmutableMap;
 import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval;
 import org.apache.jackrabbit.oak.plugins.document.util.Utils;
@@ -31,6 +32,9 @@ import org.slf4j.LoggerFactory;
 
 import org.apache.jackrabbit.guava.common.collect.Maps;
 
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP;
+
 /**
  * Gives a recommendation about parameters for the next revision garbage collection run.
  */
@@ -43,6 +47,7 @@ public class VersionGCRecommendations {
 
     final boolean ignoreDueToCheckPoint;
     final TimeInterval scope;
+    final TimeInterval scopeFullGC;
     final long maxCollect;
     final long deleteCandidateCount;
     final long lastOldestTimestamp;
@@ -81,6 +86,7 @@ public class VersionGCRecommendations {
         long deletedOnceCount = 0;
         long suggestedIntervalMs;
         long oldestPossible;
+        long oldestPossibleFullGC;
         long collectLimit = options.collectLimit;
 
         this.vgc = vgc;
@@ -90,7 +96,7 @@ public class VersionGCRecommendations {
         TimeInterval keep = new TimeInterval(clock.getTime() - maxRevisionAgeMs, Long.MAX_VALUE);
 
         Map<String, Long> settings = getLongSettings();
-        lastOldestTimestamp = settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP);
+        lastOldestTimestamp = settings.get(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP);
         if (lastOldestTimestamp == 0) {
             log.debug("No lastOldestTimestamp found, querying for the oldest deletedOnce candidate");
             oldestPossible = vgc.getOldestDeletedOnceTimestamp(clock, options.precisionMs) - 1;
@@ -102,7 +108,21 @@ public class VersionGCRecommendations {
         TimeInterval scope = new TimeInterval(oldestPossible, Long.MAX_VALUE);
         scope = scope.notLaterThan(keep.fromMs);
 
-        fullDetailGCTimestamp = settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP);
+        fullDetailGCTimestamp = settings.get(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP);
+        if (fullDetailGCTimestamp == 0) {
+            if (log.isDebugEnabled()) {
+                log.debug("No fullDetailGCTimestamp found, querying for the oldest deletedOnce candidate");
+            }
+            oldestPossibleFullGC = vgc.getOldestModifiedTimestamp(clock) - 1;
+            if (log.isDebugEnabled()) {
+                log.debug("fullDetailGCTimestamp found: {}", Utils.timestampToString(oldestPossibleFullGC));
+            }
+        } else {
+            oldestPossibleFullGC = fullDetailGCTimestamp - 1;
+        }
+
+        TimeInterval scopeFullGC = new TimeInterval(oldestPossibleFullGC, Long.MAX_VALUE);
+        scopeFullGC = scopeFullGC.notLaterThan(keep.fromMs);
 
         suggestedIntervalMs = settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP);
         if (suggestedIntervalMs > 0) {
@@ -162,6 +182,7 @@ public class VersionGCRecommendations {
         this.precisionMs = options.precisionMs;
         this.ignoreDueToCheckPoint = ignoreDueToCheckPoint;
         this.scope = scope;
+        this.scopeFullGC = scopeFullGC;
         this.scopeIsComplete = scope.toMs >= keep.fromMs;
         this.maxCollect = collectLimit;
         this.suggestedIntervalMs = suggestedIntervalMs;
@@ -185,7 +206,10 @@ public class VersionGCRecommendations {
             stats.needRepeat = true;
         } else if (!stats.canceled && !stats.ignoredGCDueToCheckPoint) {
             // success, we would not expect to encounter revisions older than this in the future
-            setLongSetting(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs);
+//            setLongSetting(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs);
+//            setLongSetting(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, stats.oldestModifiedGced);
+            setLongSetting(ImmutableMap.of(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs,
+                    SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, stats.oldestModifiedGced));
 
             int count = stats.deletedDocGCCount - stats.deletedLeafDocGCCount;
             double usedFraction;
@@ -218,9 +242,9 @@ public class VersionGCRecommendations {
         Document versionGCDoc = vgc.getDocumentStore().find(Collection.SETTINGS, VersionGarbageCollector.SETTINGS_COLLECTION_ID, 0);
         Map<String, Long> settings = Maps.newHashMap();
         // default values
-        settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L);
+        settings.put(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L);
         settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP, 0L);
-        settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, -1L);
+        settings.put(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, 0L);
         if (versionGCDoc != null) {
             for (String k : versionGCDoc.keySet()) {
                 Object value = versionGCDoc.get(k);
@@ -233,8 +257,15 @@ public class VersionGCRecommendations {
     }
 
     void setLongSetting(String propName, long val) {
+        setLongSetting(Map.of(propName, val));
+//        UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true);
+//        updateOp.set(propName, val);
+//        vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
+    }
+
+    void setLongSetting(final Map<String, Long> propValMap) {
         UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true);
-        updateOp.set(propName, val);
+        propValMap.forEach(updateOp::set);
         vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
     }
 }
\ No newline at end of file
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index 0e5c26c83d..f23340acbc 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -20,11 +20,14 @@
 package org.apache.jackrabbit.oak.plugins.document;
 
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
+import static java.util.stream.Collectors.toList;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getAllDocuments;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getSelectedDocuments;
 
 import java.util.Set;
+import java.util.stream.StreamSupport;
 
 import org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType;
 import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
@@ -59,53 +62,40 @@ public class VersionGCSupport {
      * @param toModified the upper bound modified timestamp (exclusive)
      * @return matching documents.
      */
-    public Iterable<NodeDocument> getPossiblyDeletedDocs(final long fromModified,
-                                                         final long toModified) {
-        return filter(getSelectedDocuments(store, NodeDocument.DELETED_ONCE, 1), new Predicate<NodeDocument>() {
-            @Override
-            public boolean apply(NodeDocument input) {
-                return input.wasDeletedOnce()
-                        && modifiedGreaterThanEquals(input, fromModified)
-                        && modifiedLessThan(input, toModified);
-            }
-
-            private boolean modifiedGreaterThanEquals(NodeDocument doc,
-                                                      long time) {
-                Long modified = doc.getModified();
-                return modified != null && modified.compareTo(getModifiedInSecs(time)) >= 0;
-            }
-
-            private boolean modifiedLessThan(NodeDocument doc,
-                                             long time) {
-                Long modified = doc.getModified();
-                return modified != null && modified.compareTo(getModifiedInSecs(time)) < 0;
-            }
-        });
+    public Iterable<NodeDocument> getPossiblyDeletedDocs(final long fromModified, final long toModified) {
+        return StreamSupport
+                .stream(getSelectedDocuments(store, NodeDocument.DELETED_ONCE, 1).spliterator(), false)
+                .filter(input -> input.wasDeletedOnce() && modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified))
+                .collect(toList());
     }
 
     /**
-     * TODO: document me!
+     * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
+     * within the given range .The two passed modified timestamps are in milliseconds
+     * since the epoch and the implementation will convert them to seconds at
+     * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
+     * then perform the comparison.
+     *
+     * @param fromModified the lower bound modified timestamp (inclusive)
+     * @param toModified the upper bound modified timestamp (exclusive)
+     * @param limit the limit of documents to return
+     * @return matching documents.
      */
-    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified) {
-        return filter(getSelectedDocuments(store, NodeDocument.MODIFIED_IN_SECS, fromModified), new Predicate<NodeDocument>() {
-            @Override
-            public boolean apply(NodeDocument input) {
-                return modifiedGreaterThanEquals(input, fromModified)
-                        && modifiedLessThan(input, toModified);
-            }
-
-            private boolean modifiedGreaterThanEquals(NodeDocument doc,
-                                                      long time) {
-                Long modified = doc.getModified();
-                return modified != null && modified.compareTo(getModifiedInSecs(time)) >= 0;
-            }
+    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) {
+        return StreamSupport
+                .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, fromModified).spliterator(), false)
+                .filter(input -> modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified))
+                .limit(limit)
+                .collect(toList());
+    }
 
-            private boolean modifiedLessThan(NodeDocument doc,
-                                             long time) {
-                Long modified = doc.getModified();
-                return modified != null && modified.compareTo(getModifiedInSecs(time)) < 0;
-            }
-        });
+    private boolean modifiedGreaterThanEquals(final NodeDocument doc, final long time) {
+        Long modified = doc.getModified();
+        return modified != null && modified.compareTo(getModifiedInSecs(time)) >= 0;
+    }
+    private boolean modifiedLessThan(final NodeDocument doc, final long time) {
+        Long modified = doc.getModified();
+        return modified != null && modified.compareTo(getModifiedInSecs(time)) < 0;
     }
 
     /**
@@ -185,6 +175,30 @@ public class VersionGCSupport {
         return ts;
     }
 
+    /**
+     * Retrieve the time of the oldest modified document.
+     *
+     * @return the timestamp of the oldest modified document.
+     */
+    public long getOldestModifiedTimestamp(final Clock clock) {
+        long ts = 0;
+        long now = clock.getTime();
+        Iterable<NodeDocument> docs = null;
+
+        LOG.info("find oldest modified document");
+        try {
+            docs = getModifiedDocs(ts, now, 1);
+            if (docs.iterator().hasNext()) {
+                Long modified = docs.iterator().next().getModified();
+                return modified != null ? modified : 0L;
+            }
+        } finally {
+            Utils.closeIfCloseable(docs);
+        }
+        LOG.info("find oldest modified document to be {}", Utils.timestampToString(ts));
+        return ts;
+    }
+
     public long getDeletedOnceCount() throws UnsupportedOperationException {
         throw new UnsupportedOperationException("getDeletedOnceCount()");
     }
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index e7442a7d15..608ba02398 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -27,6 +27,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.SortedMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
@@ -54,7 +55,7 @@ import org.jetbrains.annotations.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static org.apache.jackrabbit.guava.common.base.Preconditions.checkNotNull;
+import static java.util.Objects.requireNonNull;
 import static org.apache.jackrabbit.guava.common.base.StandardSystemProperty.LINE_SEPARATOR;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.all;
 import static org.apache.jackrabbit.guava.common.collect.Iterators.partition;
@@ -66,6 +67,7 @@ import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_I
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH;
+import static org.apache.jackrabbit.oak.plugins.document.StableRevisionComparator.REVERSE;
 import static org.slf4j.helpers.MessageFormatter.arrayFormat;
 
 public class VersionGarbageCollector {
@@ -115,6 +117,7 @@ public class VersionGarbageCollector {
 
     private final DocumentNodeStore nodeStore;
     private final DocumentStore ds;
+    private final boolean detailedGCEnabled;
     private final VersionGCSupport versionStore;
     private final AtomicReference<GCJob> collector = newReference();
     private VersionGCOptions options;
@@ -122,10 +125,12 @@ public class VersionGarbageCollector {
     private RevisionGCStats gcStats = new RevisionGCStats(StatisticsProvider.NOOP);
 
     VersionGarbageCollector(DocumentNodeStore nodeStore,
-                            VersionGCSupport gcSupport) {
+                            VersionGCSupport gcSupport,
+                            final boolean detailedGCEnabled) {
         this.nodeStore = nodeStore;
         this.versionStore = gcSupport;
         this.ds = gcSupport.getDocumentStore();
+        this.detailedGCEnabled = detailedGCEnabled;
         this.options = new VersionGCOptions();
     }
 
@@ -201,7 +206,7 @@ public class VersionGarbageCollector {
     }
 
     public void setGCMonitor(@NotNull GCMonitor gcMonitor) {
-        this.gcMonitor = checkNotNull(gcMonitor);
+        this.gcMonitor = requireNonNull(gcMonitor);
     }
 
     public VersionGCOptions getOptions() {
@@ -259,6 +264,7 @@ public class VersionGarbageCollector {
     }
 
     public static class VersionGCStats {
+        public long oldestModifiedGced;
         boolean ignoredGCDueToCheckPoint;
         boolean canceled;
         boolean success = true;
@@ -274,14 +280,14 @@ public class VersionGarbageCollector {
         final Stopwatch active = Stopwatch.createUnstarted();
         final Stopwatch collectDeletedDocs = Stopwatch.createUnstarted();
         final Stopwatch checkDeletedDocs = Stopwatch.createUnstarted();
-        final Stopwatch detailGcDocs = Stopwatch.createUnstarted();
+        final Stopwatch detailedGcDocs = Stopwatch.createUnstarted();
         final Stopwatch deleteDeletedDocs = Stopwatch.createUnstarted();
         final Stopwatch collectAndDeleteSplitDocs = Stopwatch.createUnstarted();
         final Stopwatch deleteSplitDocs = Stopwatch.createUnstarted();
         final Stopwatch sortDocIds = Stopwatch.createUnstarted();
         final Stopwatch updateResurrectedDocuments = Stopwatch.createUnstarted();
         long activeElapsed, collectDeletedDocsElapsed, checkDeletedDocsElapsed, deleteDeletedDocsElapsed, collectAndDeleteSplitDocsElapsed,
-                deleteSplitDocsElapsed, sortDocIdsElapsed, updateResurrectedDocumentsElapsed, detailGcDocsElapsed;
+                deleteSplitDocsElapsed, sortDocIdsElapsed, updateResurrectedDocumentsElapsed, detailedGcDocsElapsed;
 
         @Override
         public String toString() {
@@ -318,6 +324,7 @@ public class VersionGarbageCollector {
 
             return "VersionGCStats{" +
                     "ignoredGCDueToCheckPoint=" + ignoredGCDueToCheckPoint +
+                    ", oldestModifiedGced=" + oldestModifiedGced +
                     ", canceled=" + canceled +
                     ", deletedDocGCCount=" + deletedDocGCCount + " (of which leaf: " + deletedLeafDocGCCount + ")" +
                     ", updateResurrectedGCCount=" + updateResurrectedGCCount +
@@ -331,6 +338,7 @@ public class VersionGarbageCollector {
         void addRun(VersionGCStats run) {
             ++iterationCount;
             this.ignoredGCDueToCheckPoint = run.ignoredGCDueToCheckPoint;
+            this.oldestModifiedGced = run.oldestModifiedGced;
             this.canceled = run.canceled;
             this.success = run.success;
             this.limitExceeded = run.limitExceeded;
@@ -350,7 +358,7 @@ public class VersionGarbageCollector {
                 this.deleteSplitDocsElapsed += run.deleteSplitDocsElapsed;
                 this.sortDocIdsElapsed += run.sortDocIdsElapsed;
                 this.updateResurrectedDocumentsElapsed += run.updateResurrectedDocumentsElapsed;
-                this.detailGcDocsElapsed += run.detailGcDocsElapsed;
+                this.detailedGcDocsElapsed += run.detailedGcDocsElapsed;
             } else {
                 // single run -> read from stop watches
                 this.activeElapsed += run.active.elapsed(MICROSECONDS);
@@ -361,7 +369,7 @@ public class VersionGarbageCollector {
                 this.deleteSplitDocsElapsed += run.deleteSplitDocs.elapsed(MICROSECONDS);
                 this.sortDocIdsElapsed += run.sortDocIds.elapsed(MICROSECONDS);
                 this.updateResurrectedDocumentsElapsed += run.updateResurrectedDocuments.elapsed(MICROSECONDS);
-                this.detailGcDocsElapsed += run.detailGcDocs.elapsed(MICROSECONDS);
+                this.detailedGcDocsElapsed += run.detailedGcDocs.elapsed(MICROSECONDS);
             }
         }
     }
@@ -370,7 +378,7 @@ public class VersionGarbageCollector {
         NONE,
         COLLECTING,
         CHECKING,
-        DETAILGC,
+        DETAILED_GC,
         DELETING,
         SORTING,
         SPLITS_CLEANUP,
@@ -398,7 +406,7 @@ public class VersionGarbageCollector {
             this.watches.put(GCPhase.NONE, Stopwatch.createStarted());
             this.watches.put(GCPhase.COLLECTING, stats.collectDeletedDocs);
             this.watches.put(GCPhase.CHECKING, stats.checkDeletedDocs);
-            this.watches.put(GCPhase.DETAILGC, stats.detailGcDocs);
+            this.watches.put(GCPhase.DETAILED_GC, stats.detailedGcDocs);
             this.watches.put(GCPhase.DELETING, stats.deleteDeletedDocs);
             this.watches.put(GCPhase.SORTING, stats.sortDocIds);
             this.watches.put(GCPhase.SPLITS_CLEANUP, stats.collectAndDeleteSplitDocs);
@@ -525,7 +533,10 @@ public class VersionGarbageCollector {
 
                     collectDeletedDocuments(phases, headRevision, rec);
                     collectSplitDocuments(phases, sweepRevisions, rec);
-                    collectDetailGarbage(phases, headRevision, rec);
+                    if (detailedGCEnabled) {
+                        // run only if enabled
+                        collectDetailedGarbage(phases, headRevision, rec);
+                    }
                 }
             } catch (LimitExceededException ex) {
                 stats.limitExceeded = true;
@@ -555,36 +566,33 @@ public class VersionGarbageCollector {
          * followed by voluntary paused (aka throttling) to avoid excessive load on the
          * system. The full repository scan does not have to finish particularly fast,
          * it is okay that it takes a considerable amount of time.
-         * 
-         * @param headRevision
+         *
+         * @param phases {@link GCPhases}
+         * @param headRevision the current head revision of
          * @throws IOException
          * @throws LimitExceededException
          */
-        private void collectDetailGarbage(GCPhases phases, RevisionVector headRevision, VersionGCRecommendations rec)
+        private void collectDetailedGarbage(final GCPhases phases, final RevisionVector headRevision, final VersionGCRecommendations rec)
                 throws IOException, LimitExceededException {
-            if (!DETAIL_GC_ENABLED) {
-                // TODO: this toggling should be done nicer asap
-                return;
-            }
             int docsTraversed = 0;
-            DetailGC gc = new DetailGC(headRevision, monitor);
-            try {
-                final long fromModified;
-                final long toModified;
-                if (rec.fullDetailGCTimestamp == -1) {
-                    // then full detail-gc is disabled or over - use regular scope then
-                    fromModified = rec.scope.fromMs;
-                    toModified = rec.scope.toMs;
-                } else {
-                    // then full detail-gc is enabled - use it then
-                    fromModified = rec.fullDetailGCTimestamp; // TODO: once we're passed rec.scope.fromMs we should
-                                                              // disable fullgc
-                    toModified = rec.scope.toMs; // the 'to' here is the max. it will process only eg 1 batch
-                }
-                long oldestGced = fromModified;
-                boolean foundAnything = false;
+            long oldestModifiedGced = rec.scopeFullGC.fromMs;
+            try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) {
+                final long fromModified = rec.scopeFullGC.fromMs;
+                final long toModified = rec.scopeFullGC.toMs;
+//                if (rec.fullDetailGCTimestamp == -1) {
+//                    // then full detail-gc is disabled or over - use regular scope then
+//                    fromModified = rec.scope.fromMs;
+//                    toModified = rec.scope.toMs;
+//                } else {
+//                    // then full detail-gc is enabled - use it then
+//                    fromModified = rec.fullDetailGCTimestamp; // TODO: once we're passed rec.scope.fromMs we should
+//                    // disable fullgc
+//                    toModified = rec.scope.toMs; // the 'to' here is the max. it will process only eg 1 batch
+//                }
+                // TODO : remove me
+                boolean foundAnything = false; // I think this flag is redundant
                 if (phases.start(GCPhase.COLLECTING)) {
-                    Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModified, toModified);
+                    Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedGced, toModified, 2000);
                     final Stopwatch timer = Stopwatch.createUnstarted();
                     timer.reset().start();
                     try {
@@ -593,44 +601,54 @@ public class VersionGarbageCollector {
                             if (cancel.get()) {
                                 break;
                             }
-                            foundAnything = true;
-                            if (phases.start(GCPhase.DETAILGC)) {
-                                gc.detailGC(doc, phases);
-                                phases.stop(GCPhase.DETAILGC);
+                            if (phases.start(GCPhase.DETAILED_GC)) {
+                                gc.detailedGC(doc, phases);
+                                phases.stop(GCPhase.DETAILED_GC);
                             }
+
+                            // TODO : remove this code, I don't think its possible to fetch these documents
+                            //  who doesn't have _modified field
                             final Long modified = doc.getModified();
                             if (modified == null) {
                                 monitor.warn("collectDetailGarbage : document has no _modified property : {}",
                                         doc.getId());
-                            } else if (modified < oldestGced) {
+                            } else if (modified < oldestModifiedGced) {
                                 monitor.warn(
                                         "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
                                         modified, fromModified, toModified);
                             } else {
-                                oldestGced = modified;
+                                oldestModifiedGced = modified;
                             }
+                            foundAnything = true;
                             docsTraversed++;
                             if (docsTraversed % PROGRESS_BATCH_SIZE == 0) {
                                 monitor.info("Iterated through {} documents so far. {} had detail garbage",
                                         docsTraversed, gc.getNumDocuments());
                             }
+                            // this would never hit, since we are only fetching the oldest 2000 element in batches of 1000
+                            // TODO: remove this if above mentioned logic is fine
                             if (rec.maxCollect > 0 && gc.getNumDocuments() > rec.maxCollect) {
                                 // TODO: how would we recover from this?
+                                // If we don't want above solution, then one of the another solution is to use lower time duration
+                                // as done in document deletion process or use lower limit value or
+                                // we should perform all the update ops in 1 go
                                 throw new LimitExceededException();
                             }
+                            oldestModifiedGced = modified == null ? fromModified : modified;
                         }
                     } finally {
                         Utils.closeIfCloseable(itr);
+                        // why do we need to stop this here, we are already stopping the original gc run.
+                        // can this be removed
                         delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS));
+                        phases.stats.oldestModifiedGced = oldestModifiedGced;
                     }
                     phases.stop(GCPhase.COLLECTING);
-                    if (!cancel.get() && foundAnything) {
-                        // TODO: move to evaluate()
-                        rec.setLongSetting(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, oldestGced + 1);
-                    }
+//                    if (!cancel.get() && foundAnything) {
+//                        // TODO: move to evaluate()
+//                        rec.setLongSetting(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, oldestModifiedGced + 1);
+//                    }
                 }
-            } finally {
-                gc.close();
             }
         }
 
@@ -665,8 +683,7 @@ public class VersionGarbageCollector {
                                              VersionGCRecommendations rec)
                 throws IOException, LimitExceededException {
             int docsTraversed = 0;
-            DeletedDocsGC gc = new DeletedDocsGC(headRevision, cancel, options, monitor);
-            try {
+            try (DeletedDocsGC gc = new DeletedDocsGC(headRevision, cancel, options, monitor)) {
                 if (phases.start(GCPhase.COLLECTING)) {
                     Iterable<NodeDocument> itr = versionStore.getPossiblyDeletedDocs(rec.scope.fromMs, rec.scope.toMs);
                     try {
@@ -731,53 +748,69 @@ public class VersionGarbageCollector {
                     gc.updateResurrectedDocuments(phases.stats);
                     phases.stop(GCPhase.UPDATING);
                 }
-            } finally {
-                gc.close();
             }
         }
     }
 
-    private class DetailGC implements Closeable {
+    private static class DetailedGC implements Closeable {
 
         private final RevisionVector headRevision;
         private final GCMonitor monitor;
+        private final AtomicBoolean cancel;
         private int count;
 
-        public DetailGC(@NotNull RevisionVector headRevision, @NotNull GCMonitor monitor) {
-            this.headRevision = checkNotNull(headRevision);
+        public DetailedGC(@NotNull RevisionVector headRevision, @NotNull GCMonitor monitor, @NotNull AtomicBoolean cancel) {
+            this.headRevision = requireNonNull(headRevision);
             this.monitor = monitor;
+            this.cancel = cancel;
         }
 
-        public void detailGC(NodeDocument doc, GCPhases phases) {
-            deleteSample(doc, phases);
-            deleteUnmergedBranchCommitDocument(doc, phases);
-            deleteDeletedProperties(doc, phases);
-            deleteOldRevisions(doc, phases);
+        public void detailedGC(NodeDocument doc, GCPhases phases) {
+//            deleteSample(doc, phases);
+            UpdateOp updateOp = new UpdateOp(requireNonNull(doc.getId()), false);
+            deleteDeletedProperties(doc, phases, updateOp);
+            deleteUnmergedBranchCommitDocument(doc, phases, updateOp);
+            deleteOldRevisions(doc, phases, updateOp);
         }
 
         /** TODO remove, this is just a skeleton sample */
-        private void deleteSample(NodeDocument doc, GCPhases phases) {
-            if (doc.getId().contains("should_delete")) {
-                if (phases.start(GCPhase.DELETING)) {
-                    monitor.info("deleteSample: should do the deletion now, but this is demo only. I'm still learning");
-                    System.out.println("do the actual deletion");
-                    count++;
-                    phases.stop(GCPhase.DELETING);
-                }
-            }
-        }
-
-        private void deleteUnmergedBranchCommitDocument(NodeDocument doc, GCPhases phases) {
+//        private void deleteSample(NodeDocument doc, GCPhases phases) {
+//            if (doc.getId().contains("should_delete")) {
+//                if (phases.start(GCPhase.DELETING)) {
+//                    monitor.info("deleteSample: should do the deletion now, but this is demo only. I'm still learning");
+//                    System.out.println("do the actual deletion");
+//                    count++;
+//                    phases.stop(GCPhase.DELETING);
+//                }
+//            }
+//        }
+
+        private void deleteUnmergedBranchCommitDocument(NodeDocument doc, GCPhases phases, UpdateOp updateOp) {
             // TODO Auto-generated method stub
 
         }
 
-        private void deleteDeletedProperties(NodeDocument doc, GCPhases phases) {
-            // TODO Auto-generated method stub
+        private void deleteDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) {
+
+            // get Map of all properties along with their values
+            final Map<String, SortedMap<Revision, String>> properties = doc.getProperties();
 
+            // find all the properties which can be removed from document
+            // All the properties whose value is null in their respective
+            // latest revision are eligible to be garbage collected.
+            properties.forEach((propName, revisionStringSortedMap) -> {
+                if (revisionStringSortedMap.keySet()
+                        .stream()
+                        .sorted(REVERSE)
+                        .limit(1)
+                        .anyMatch(revision -> revisionStringSortedMap.get(revision) == null)) {
+                    // set this property for removal
+                    updateOp.remove(propName);
+                }
+            });
         }
 
-        private void deleteOldRevisions(NodeDocument doc, GCPhases phases) {
+        private void deleteOldRevisions(NodeDocument doc, GCPhases phases, UpdateOp updateOp) {
             // TODO Auto-generated method stub
 
         }
@@ -813,8 +846,8 @@ public class VersionGarbageCollector {
                              @NotNull AtomicBoolean cancel,
                              @NotNull VersionGCOptions options,
                              @NotNull GCMonitor monitor) {
-            this.headRevision = checkNotNull(headRevision);
-            this.cancel = checkNotNull(cancel);
+            this.headRevision = requireNonNull(headRevision);
+            this.cancel = requireNonNull(cancel);
             this.timer = Stopwatch.createUnstarted();
             this.options = options;
             this.monitor = monitor;
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index 93f22f5202..e34d8f36b0 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -22,6 +22,9 @@ package org.apache.jackrabbit.oak.plugins.document.mongo;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.concat;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
+import static com.mongodb.client.model.Filters.and;
+import static com.mongodb.client.model.Filters.gte;
+import static com.mongodb.client.model.Filters.lt;
 import static java.util.Collections.emptyList;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
@@ -108,10 +111,10 @@ public class MongoVersionGCSupport extends VersionGCSupport {
     @Override
     public CloseableIterable<NodeDocument> getPossiblyDeletedDocs(final long fromModified, final long toModified) {
         //_deletedOnce == true && _modified >= fromModified && _modified < toModified
-        Bson query = Filters.and(
+        Bson query = and(
                 Filters.eq(DELETED_ONCE, true),
-                Filters.gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
-                Filters.lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified))
+                gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
+                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified))
         );
         FindIterable<BasicDBObject> cursor = getNodeCollection()
                 .find(query).batchSize(batchSize);
@@ -120,6 +123,29 @@ public class MongoVersionGCSupport extends VersionGCSupport {
                 input -> store.convertFromDBObject(NODES, input)));
     }
 
+    /**
+     * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
+     * within the given range in sorted order. The two passed modified timestamps
+     * are in milliseconds since the epoch and the implementation will convert them
+     * to seconds at the granularity of the {@link NodeDocument#MODIFIED_IN_SECS}
+     * field and then perform the comparison.
+     *
+     * @param fromModified the lower bound modified timestamp (inclusive)
+     * @param toModified   the upper bound modified timestamp (exclusive)
+     * @return matching documents in sorted order of {@link NodeDocument#MODIFIED_IN_SECS}
+     */
+    @Override
+    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) {
+        // _modified >= fromModified && _modified < toModified
+        final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
+                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)));
+        final FindIterable<BasicDBObject> cursor = getNodeCollection()
+                .find(query)
+                .sort(new org.bson.Document(MODIFIED_IN_SECS, 1))
+                .limit(limit);
+        return CloseableIterable.wrap(transform(cursor, input -> store.convertFromDBObject(NODES, input)));
+    }
+
     @Override
     public long getDeletedOnceCount() {
         Bson query = Filters.eq(DELETED_ONCE, Boolean.TRUE);
@@ -207,9 +233,9 @@ public class MongoVersionGCSupport extends VersionGCSupport {
         }
         // OAK-8351: this (last) query only contains SD_TYPE and SD_MAX_REV_TIME_IN_SECS
         // so mongodb should really use that _sdType_1__sdMaxRevTime_1 index
-        result.add(Filters.and(
+        result.add(and(
                 Filters.or(orClauses),
-                Filters.lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp))
+                lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp))
                 ));
 
         return result;
@@ -240,16 +266,16 @@ public class MongoVersionGCSupport extends VersionGCSupport {
             Bson idPathClause = Filters.or(
                     Filters.regex(ID, Pattern.compile(".*" + idSuffix)),
                     // previous documents with long paths do not have a '-' in the id
-                    Filters.and(
+                    and(
                             Filters.regex(ID, Pattern.compile("[^-]*")),
                             Filters.regex(PATH, Pattern.compile(".*" + idSuffix))
                     )
             );
 
             long minMaxRevTimeInSecs = Math.min(maxRevTimeInSecs, getModifiedInSecs(r.getTimestamp()));
-            result.add(Filters.and(
+            result.add(and(
                     Filters.eq(SD_TYPE, DEFAULT_NO_BRANCH.typeCode()),
-                    Filters.lt(SD_MAX_REV_TIME_IN_SECS, minMaxRevTimeInSecs),
+                    lt(SD_MAX_REV_TIME_IN_SECS, minMaxRevTimeInSecs),
                     idPathClause
                     ));
         }
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
index 793e3a9433..c9428429bc 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
@@ -894,6 +894,17 @@ public class Utils {
         return builder.isThrottlingEnabled() || (docStoreThrottlingFeature != null && docStoreThrottlingFeature.isEnabled());
     }
 
+    /**
+     * Check whether detailed GC is enabled or not for document store.
+     *
+     * @param builder instance for DocumentNodeStoreBuilder
+     * @return true if detailed GC is enabled else false
+     */
+    public static boolean isDetailedGCEnabled(final DocumentNodeStoreBuilder<?> builder) {
+        final Feature docStoreDetailedGCFeature = builder.getDocStoreDetailedGCFeature();
+        return builder.isDetailedGCEnabled() || (docStoreDetailedGCFeature != null && docStoreDetailedGCFeature.isEnabled());
+    }
+
     /**
      * Returns true if all the revisions in the {@code a} greater or equals
      * to their counterparts in {@code b}. If {@code b} contains revisions
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
index c46b6f699e..88d9d27553 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
@@ -33,6 +33,7 @@ import org.osgi.framework.BundleContext;
 import org.osgi.service.cm.ConfigurationAdmin;
 import org.osgi.service.component.ComponentContext;
 
+import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreService.DEFAULT_DETAILED_GC_ENABLED;
 import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreService.DEFAULT_THROTTLING_ENABLED;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCQueryTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCQueryTest.java
index 73eda05759..27333b57cf 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCQueryTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCQueryTest.java
@@ -107,7 +107,7 @@ public class VersionGCQueryTest {
         clock.waitUntil(clock.getTime() + TimeUnit.HOURS.toMillis(1));
 
         VersionGarbageCollector gc = new VersionGarbageCollector(
-                ns, new VersionGCSupport(store));
+                ns, new VersionGCSupport(store), false);
         prevDocIds.clear();
         VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
         assertEquals(11, stats.deletedDocGCCount);
@@ -140,7 +140,7 @@ public class VersionGCQueryTest {
         clock.waitUntil(clock.getTime() + TimeUnit.HOURS.toMillis(1));
 
         VersionGarbageCollector gc = new VersionGarbageCollector(
-                ns, new VersionGCSupport(store));
+                ns, new VersionGCSupport(store), false);
         prevDocIds.clear();
         VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
         assertEquals(1, stats.deletedDocGCCount);
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
index 445e7c4275..48f3f362ce 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
@@ -44,6 +44,7 @@ import org.jetbrains.annotations.NotNull;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 
@@ -324,7 +325,7 @@ public class VersionGCTest {
                 deletedOnceCountCalls.incrementAndGet();
                 return Iterables.size(Utils.getSelectedDocuments(store, NodeDocument.DELETED_ONCE, 1));
             }
-        });
+        }, false);
 
         // run first RGC
         gc.gc(1, TimeUnit.HOURS);
@@ -342,21 +343,25 @@ public class VersionGCTest {
 
     // OAK-10199
     @Test
+    @Ignore
     public void testDetailGcDocumentRead_disabled() throws Exception {
         DetailGCHelper.disableDetailGC(ns);
         VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
         assertNotNull(stats);
-        assertEquals(0, stats.detailGcDocsElapsed);
+        assertEquals(0, stats.detailedGcDocsElapsed);
     }
 
     @Test
+    @Ignore
     public void testDetailGcDocumentRead_enabled() throws Exception {
         DetailGCHelper.enableDetailGC(ns);
         VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
         assertNotNull(stats);
-        assertNotEquals(0, stats.detailGcDocsElapsed);
+        assertNotEquals(0, stats.detailedGcDocsElapsed);
     }
 
+    // OAK-10199
+
     private Future<VersionGCStats> gc() {
         // run gc in a separate thread
         return execService.submit(new Callable<VersionGCStats>() {
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index e58741733a..d33cc8c7de 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -497,7 +497,7 @@ public class VersionGarbageCollectorIT {
                         });
             }
         };
-        final VersionGarbageCollector gc = new VersionGarbageCollector(store, gcSupport);
+        final VersionGarbageCollector gc = new VersionGarbageCollector(store, gcSupport, false);
         // start GC -> will try to remove /foo and /bar
         Future<VersionGCStats> f = execService.submit(new Callable<VersionGCStats>() {
             @Override
@@ -658,7 +658,7 @@ public class VersionGarbageCollectorIT {
                 return super.getPossiblyDeletedDocs(fromModified, toModified);
             }
         };
-        gcRef.set(new VersionGarbageCollector(store, gcSupport));
+        gcRef.set(new VersionGarbageCollector(store, gcSupport, false));
         VersionGCStats stats = gcRef.get().gc(30, TimeUnit.MINUTES);
         assertTrue(stats.canceled);
         assertEquals(0, stats.deletedDocGCCount);
@@ -710,7 +710,7 @@ public class VersionGarbageCollectorIT {
                 return super.getPossiblyDeletedDocs(prevLastModifiedTime, lastModifiedTime).iterator();
             }
         };
-        gcRef.set(new VersionGarbageCollector(store, gcSupport));
+        gcRef.set(new VersionGarbageCollector(store, gcSupport, false));
         VersionGCStats stats = gcRef.get().gc(30, TimeUnit.MINUTES);
         assertTrue(stats.canceled);
         assertEquals(0, stats.deletedDocGCCount);
@@ -739,7 +739,7 @@ public class VersionGarbageCollectorIT {
                         });
             }
         };
-        final VersionGarbageCollector gc = new VersionGarbageCollector(store, nonReportingGcSupport);
+        final VersionGarbageCollector gc = new VersionGarbageCollector(store, nonReportingGcSupport, false);
         final long maxAgeHours = 1;
         final long clockDelta = HOURS.toMillis(maxAgeHours) + MINUTES.toMillis(5);
 
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java
index a78aef904e..a08abe05d3 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentNodeStoreBuilderTest.java
@@ -48,6 +48,18 @@ public class MongoDocumentNodeStoreBuilderTest {
         assertNull(builder.getDocStoreThrottlingFeature());
     }
 
+    @Test
+    public void detailedGCDisabled() {
+        MongoDocumentNodeStoreBuilder builder = new MongoDocumentNodeStoreBuilder();
+        assertFalse(builder.isDetailedGCEnabled());
+    }
+
+    @Test
+    public void detailedGCFeatureToggleDisabled() {
+        MongoDocumentNodeStoreBuilder builder = new MongoDocumentNodeStoreBuilder();
+        assertNull(builder.getDocStoreDetailedGCFeature());
+    }
+
     @Test
     public void collectionCompressionDisabled() {
         MongoDocumentNodeStoreBuilder builder = new MongoDocumentNodeStoreBuilder();
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
index 2b01f90b34..6041a41724 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/util/UtilsTest.java
@@ -50,13 +50,13 @@ import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.toggle.Feature;
 import org.apache.jackrabbit.oak.stats.Clock;
-import org.junit.Assert;
 import org.junit.Ignore;
 import org.junit.Test;
 import org.mockito.Mockito;
 import org.slf4j.event.Level;
 
 import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreBuilder.newDocumentNodeStoreBuilder;
+import static org.apache.jackrabbit.oak.plugins.document.util.Utils.isDetailedGCEnabled;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.isThrottlingEnabled;
 import static org.hamcrest.CoreMatchers.containsString;
 import static org.hamcrest.CoreMatchers.is;
@@ -184,6 +184,45 @@ public class UtilsTest {
         assertTrue("Throttling is enabled via Feature Toggle", throttlingEnabled);
     }
 
+    @Test
+    public void detailedGCEnabledDefaultValue() {
+        boolean detailedGCEnabled = isDetailedGCEnabled(newDocumentNodeStoreBuilder());
+        assertFalse("Detailed GC is disabled by default", detailedGCEnabled);
+    }
+
+    @Test
+    public void detailedGCExplicitlyDisabled() {
+        DocumentNodeStoreBuilder<?> builder = newDocumentNodeStoreBuilder();
+        builder.setDetailedGCEnabled(false);
+        Feature docStoreDetailedGCFeature = mock(Feature.class);
+        when(docStoreDetailedGCFeature.isEnabled()).thenReturn(false);
+        builder.setDocStoreDetailedGCFeature(docStoreDetailedGCFeature);
+        boolean detailedGCEnabled = isDetailedGCEnabled(builder);
+        assertFalse("Detailed GC is disabled explicitly", detailedGCEnabled);
+    }
+
+    @Test
+    public void detailedGCEnabledViaConfiguration() {
+        DocumentNodeStoreBuilder<?> builder = newDocumentNodeStoreBuilder();
+        builder.setDetailedGCEnabled(true);
+        Feature docStoreDetailedGCFeature = mock(Feature.class);
+        when(docStoreDetailedGCFeature.isEnabled()).thenReturn(false);
+        builder.setDocStoreDetailedGCFeature(docStoreDetailedGCFeature);
+        boolean detailedGCEnabled = isDetailedGCEnabled(builder);
+        assertTrue("Detailed GC is enabled via configuration", detailedGCEnabled);
+    }
+
+    @Test
+    public void detailedGCEnabledViaFeatureToggle() {
+        DocumentNodeStoreBuilder<?> builder = newDocumentNodeStoreBuilder();
+        builder.setDetailedGCEnabled(false);
+        Feature docStoreDetailedGCFeature = mock(Feature.class);
+        when(docStoreDetailedGCFeature.isEnabled()).thenReturn(true);
+        builder.setDocStoreDetailedGCFeature(docStoreDetailedGCFeature);
+        boolean detailedGCEnabled = isDetailedGCEnabled(builder);
+        assertTrue("Detailed GC is enabled via Feature Toggle", detailedGCEnabled);
+    }
+
     @Test
     public void getDepthFromId() throws Exception{
         assertEquals(1, Utils.getDepthFromId("1:/x"));


[jackrabbit-oak] 01/28: OAK-10199 : initial sketch of detail gc skeleton

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 2eb2a6cebe88af5ff13833cd8781d8d5141239fe
Author: Stefan Egli <st...@apache.org>
AuthorDate: Thu Apr 20 18:11:08 2023 +0200

    OAK-10199 : initial sketch of detail gc skeleton
---
 .../plugins/document/VersionGCRecommendations.java |   6 +-
 .../oak/plugins/document/VersionGCSupport.java     |  25 +++
 .../plugins/document/VersionGarbageCollector.java  | 183 ++++++++++++++++++++-
 .../oak/plugins/document/DetailGCHelper.java       |  42 +++++
 .../oak/plugins/document/VersionGCTest.java        |  18 ++
 5 files changed, 272 insertions(+), 2 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index 363c65789b..ac47cc69d8 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -46,6 +46,7 @@ public class VersionGCRecommendations {
     final long maxCollect;
     final long deleteCandidateCount;
     final long lastOldestTimestamp;
+    final long fullDetailGCTimestamp;
     final long originalCollectLimit;
 
     private final long precisionMs;
@@ -101,6 +102,8 @@ public class VersionGCRecommendations {
         TimeInterval scope = new TimeInterval(oldestPossible, Long.MAX_VALUE);
         scope = scope.notLaterThan(keep.fromMs);
 
+        fullDetailGCTimestamp = settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP);
+
         suggestedIntervalMs = settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP);
         if (suggestedIntervalMs > 0) {
             suggestedIntervalMs = Math.max(suggestedIntervalMs, options.precisionMs);
@@ -217,6 +220,7 @@ public class VersionGCRecommendations {
         // default values
         settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L);
         settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP, 0L);
+        settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, -1L);
         if (versionGCDoc != null) {
             for (String k : versionGCDoc.keySet()) {
                 Object value = versionGCDoc.get(k);
@@ -228,7 +232,7 @@ public class VersionGCRecommendations {
         return settings;
     }
 
-    private void setLongSetting(String propName, long val) {
+    void setLongSetting(String propName, long val) {
         UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true);
         updateOp.set(propName, val);
         vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index 13171b7fd5..0e5c26c83d 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -83,6 +83,31 @@ public class VersionGCSupport {
         });
     }
 
+    /**
+     * TODO: document me!
+     */
+    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified) {
+        return filter(getSelectedDocuments(store, NodeDocument.MODIFIED_IN_SECS, fromModified), new Predicate<NodeDocument>() {
+            @Override
+            public boolean apply(NodeDocument input) {
+                return modifiedGreaterThanEquals(input, fromModified)
+                        && modifiedLessThan(input, toModified);
+            }
+
+            private boolean modifiedGreaterThanEquals(NodeDocument doc,
+                                                      long time) {
+                Long modified = doc.getModified();
+                return modified != null && modified.compareTo(getModifiedInSecs(time)) >= 0;
+            }
+
+            private boolean modifiedLessThan(NodeDocument doc,
+                                             long time) {
+                Long modified = doc.getModified();
+                return modified != null && modified.compareTo(getModifiedInSecs(time)) < 0;
+            }
+        });
+    }
+
     /**
      * Returns the underlying document store.
      *
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index ed0b333d44..e7442a7d15 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -70,6 +70,9 @@ import static org.slf4j.helpers.MessageFormatter.arrayFormat;
 
 public class VersionGarbageCollector {
 
+    /** TODO temporary global flag to enable 'detail gc' during prototyping. Should eventually become eg a system property */
+    public static boolean DETAIL_GC_ENABLED = false;
+
     //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid re-partitioning
     private static final int DELETE_BATCH_SIZE = 450;
     private static final int UPDATE_BATCH_SIZE = 450;
@@ -99,6 +102,17 @@ public class VersionGarbageCollector {
      */
     static final String SETTINGS_COLLECTION_REC_INTERVAL_PROP = "recommendedIntervalMs";
 
+    /**
+     * Property name to timestamp when last full-detail-GC run happened, or -1 if not applicable/in-use.
+     * <p>
+     * <ul>
+     * <li>-1 : full repo scan is disabled</li>
+     * <li>0 : full repo scan is enabled and bound to start from zero == oldest _modified </li>
+     * <li>gt 0 : full repo scan is enabled, was already done up until this value</li>
+     * </ul>
+     */
+    static final String SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP = "fullDetailGCTimeStamp";
+
     private final DocumentNodeStore nodeStore;
     private final DocumentStore ds;
     private final VersionGCSupport versionStore;
@@ -260,13 +274,14 @@ public class VersionGarbageCollector {
         final Stopwatch active = Stopwatch.createUnstarted();
         final Stopwatch collectDeletedDocs = Stopwatch.createUnstarted();
         final Stopwatch checkDeletedDocs = Stopwatch.createUnstarted();
+        final Stopwatch detailGcDocs = Stopwatch.createUnstarted();
         final Stopwatch deleteDeletedDocs = Stopwatch.createUnstarted();
         final Stopwatch collectAndDeleteSplitDocs = Stopwatch.createUnstarted();
         final Stopwatch deleteSplitDocs = Stopwatch.createUnstarted();
         final Stopwatch sortDocIds = Stopwatch.createUnstarted();
         final Stopwatch updateResurrectedDocuments = Stopwatch.createUnstarted();
         long activeElapsed, collectDeletedDocsElapsed, checkDeletedDocsElapsed, deleteDeletedDocsElapsed, collectAndDeleteSplitDocsElapsed,
-                deleteSplitDocsElapsed, sortDocIdsElapsed, updateResurrectedDocumentsElapsed;
+                deleteSplitDocsElapsed, sortDocIdsElapsed, updateResurrectedDocumentsElapsed, detailGcDocsElapsed;
 
         @Override
         public String toString() {
@@ -335,6 +350,7 @@ public class VersionGarbageCollector {
                 this.deleteSplitDocsElapsed += run.deleteSplitDocsElapsed;
                 this.sortDocIdsElapsed += run.sortDocIdsElapsed;
                 this.updateResurrectedDocumentsElapsed += run.updateResurrectedDocumentsElapsed;
+                this.detailGcDocsElapsed += run.detailGcDocsElapsed;
             } else {
                 // single run -> read from stop watches
                 this.activeElapsed += run.active.elapsed(MICROSECONDS);
@@ -345,6 +361,7 @@ public class VersionGarbageCollector {
                 this.deleteSplitDocsElapsed += run.deleteSplitDocs.elapsed(MICROSECONDS);
                 this.sortDocIdsElapsed += run.sortDocIds.elapsed(MICROSECONDS);
                 this.updateResurrectedDocumentsElapsed += run.updateResurrectedDocuments.elapsed(MICROSECONDS);
+                this.detailGcDocsElapsed += run.detailGcDocs.elapsed(MICROSECONDS);
             }
         }
     }
@@ -353,6 +370,7 @@ public class VersionGarbageCollector {
         NONE,
         COLLECTING,
         CHECKING,
+        DETAILGC,
         DELETING,
         SORTING,
         SPLITS_CLEANUP,
@@ -380,6 +398,7 @@ public class VersionGarbageCollector {
             this.watches.put(GCPhase.NONE, Stopwatch.createStarted());
             this.watches.put(GCPhase.COLLECTING, stats.collectDeletedDocs);
             this.watches.put(GCPhase.CHECKING, stats.checkDeletedDocs);
+            this.watches.put(GCPhase.DETAILGC, stats.detailGcDocs);
             this.watches.put(GCPhase.DELETING, stats.deleteDeletedDocs);
             this.watches.put(GCPhase.SORTING, stats.sortDocIds);
             this.watches.put(GCPhase.SPLITS_CLEANUP, stats.collectAndDeleteSplitDocs);
@@ -506,6 +525,7 @@ public class VersionGarbageCollector {
 
                     collectDeletedDocuments(phases, headRevision, rec);
                     collectSplitDocuments(phases, sweepRevisions, rec);
+                    collectDetailGarbage(phases, headRevision, rec);
                 }
             } catch (LimitExceededException ex) {
                 stats.limitExceeded = true;
@@ -521,6 +541,112 @@ public class VersionGarbageCollector {
             return stats;
         }
 
+        /**
+         * "Detail garbage" refers to additional garbage identified as part of OAK-10199
+         * et al: essentially garbage that in earlier versions of Oak were ignored. This
+         * includes: deleted properties, revision information within documents, branch
+         * commit related garbage.
+         * <p/>
+         * TODO: limit this to run only on a singleton instance, eg the cluster leader
+         * <p/>
+         * The "detail garbage" collector can be instructed to do a full repository scan
+         * - or incrementally based on where it last left off. When doing a full
+         * repository scan (but not limited to that), it executes in (small) batches
+         * followed by voluntary paused (aka throttling) to avoid excessive load on the
+         * system. The full repository scan does not have to finish particularly fast,
+         * it is okay that it takes a considerable amount of time.
+         * 
+         * @param headRevision
+         * @throws IOException
+         * @throws LimitExceededException
+         */
+        private void collectDetailGarbage(GCPhases phases, RevisionVector headRevision, VersionGCRecommendations rec)
+                throws IOException, LimitExceededException {
+            if (!DETAIL_GC_ENABLED) {
+                // TODO: this toggling should be done nicer asap
+                return;
+            }
+            int docsTraversed = 0;
+            DetailGC gc = new DetailGC(headRevision, monitor);
+            try {
+                final long fromModified;
+                final long toModified;
+                if (rec.fullDetailGCTimestamp == -1) {
+                    // then full detail-gc is disabled or over - use regular scope then
+                    fromModified = rec.scope.fromMs;
+                    toModified = rec.scope.toMs;
+                } else {
+                    // then full detail-gc is enabled - use it then
+                    fromModified = rec.fullDetailGCTimestamp; // TODO: once we're passed rec.scope.fromMs we should
+                                                              // disable fullgc
+                    toModified = rec.scope.toMs; // the 'to' here is the max. it will process only eg 1 batch
+                }
+                long oldestGced = fromModified;
+                boolean foundAnything = false;
+                if (phases.start(GCPhase.COLLECTING)) {
+                    Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModified, toModified);
+                    final Stopwatch timer = Stopwatch.createUnstarted();
+                    timer.reset().start();
+                    try {
+                        for (NodeDocument doc : itr) {
+                            // continue with GC?
+                            if (cancel.get()) {
+                                break;
+                            }
+                            foundAnything = true;
+                            if (phases.start(GCPhase.DETAILGC)) {
+                                gc.detailGC(doc, phases);
+                                phases.stop(GCPhase.DETAILGC);
+                            }
+                            final Long modified = doc.getModified();
+                            if (modified == null) {
+                                monitor.warn("collectDetailGarbage : document has no _modified property : {}",
+                                        doc.getId());
+                            } else if (modified < oldestGced) {
+                                monitor.warn(
+                                        "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
+                                        modified, fromModified, toModified);
+                            } else {
+                                oldestGced = modified;
+                            }
+                            docsTraversed++;
+                            if (docsTraversed % PROGRESS_BATCH_SIZE == 0) {
+                                monitor.info("Iterated through {} documents so far. {} had detail garbage",
+                                        docsTraversed, gc.getNumDocuments());
+                            }
+                            if (rec.maxCollect > 0 && gc.getNumDocuments() > rec.maxCollect) {
+                                // TODO: how would we recover from this?
+                                throw new LimitExceededException();
+                            }
+                        }
+                    } finally {
+                        Utils.closeIfCloseable(itr);
+                        delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS));
+                    }
+                    phases.stop(GCPhase.COLLECTING);
+                    if (!cancel.get() && foundAnything) {
+                        // TODO: move to evaluate()
+                        rec.setLongSetting(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, oldestGced + 1);
+                    }
+                }
+            } finally {
+                gc.close();
+            }
+        }
+
+        private void delayOnModifications(long durationMs) {
+            long delayMs = Math.round(durationMs * options.delayFactor);
+            if (!cancel.get() && delayMs > 0) {
+                try {
+                    Clock clock = nodeStore.getClock();
+                    clock.waitUntil(clock.getTime() + delayMs);
+                }
+                catch (InterruptedException ex) {
+                    /* ignore */
+                }
+            }
+        }
+
         private void collectSplitDocuments(GCPhases phases,
                                            RevisionVector sweepRevisions,
                                            VersionGCRecommendations rec) {
@@ -611,6 +737,61 @@ public class VersionGarbageCollector {
         }
     }
 
+    private class DetailGC implements Closeable {
+
+        private final RevisionVector headRevision;
+        private final GCMonitor monitor;
+        private int count;
+
+        public DetailGC(@NotNull RevisionVector headRevision, @NotNull GCMonitor monitor) {
+            this.headRevision = checkNotNull(headRevision);
+            this.monitor = monitor;
+        }
+
+        public void detailGC(NodeDocument doc, GCPhases phases) {
+            deleteSample(doc, phases);
+            deleteUnmergedBranchCommitDocument(doc, phases);
+            deleteDeletedProperties(doc, phases);
+            deleteOldRevisions(doc, phases);
+        }
+
+        /** TODO remove, this is just a skeleton sample */
+        private void deleteSample(NodeDocument doc, GCPhases phases) {
+            if (doc.getId().contains("should_delete")) {
+                if (phases.start(GCPhase.DELETING)) {
+                    monitor.info("deleteSample: should do the deletion now, but this is demo only. I'm still learning");
+                    System.out.println("do the actual deletion");
+                    count++;
+                    phases.stop(GCPhase.DELETING);
+                }
+            }
+        }
+
+        private void deleteUnmergedBranchCommitDocument(NodeDocument doc, GCPhases phases) {
+            // TODO Auto-generated method stub
+
+        }
+
+        private void deleteDeletedProperties(NodeDocument doc, GCPhases phases) {
+            // TODO Auto-generated method stub
+
+        }
+
+        private void deleteOldRevisions(NodeDocument doc, GCPhases phases) {
+            // TODO Auto-generated method stub
+
+        }
+
+        long getNumDocuments() {
+            return count;
+        }
+
+        @Override
+        public void close() throws IOException {
+
+        }
+    }
+
     /**
      * A helper class to remove document for deleted nodes.
      */
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java
new file mode 100644
index 0000000000..8a585c7dc0
--- /dev/null
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.document;
+
+public class DetailGCHelper {
+
+    public static void setLongSetting(String propName, long val, DocumentNodeStore ns) {
+        UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true);
+        updateOp.set(propName, val);
+        ns.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
+    }
+
+    public static void enableDetailGC(DocumentNodeStore ns) {
+        VersionGarbageCollector.DETAIL_GC_ENABLED = true;
+        if (ns != null) {
+            setLongSetting(VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, 0, ns);
+        }
+    }
+
+    public static void disableDetailGC(DocumentNodeStore ns) {
+        VersionGarbageCollector.DETAIL_GC_ENABLED = false;
+        if (ns != null) {
+            setLongSetting(VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, -1, ns);
+        }
+    }
+}
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
index a3e7a5e1e3..1bd81ce89c 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
@@ -51,6 +51,7 @@ import static java.util.concurrent.TimeUnit.HOURS;
 import static java.util.concurrent.TimeUnit.MINUTES;
 import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
@@ -338,6 +339,23 @@ public class VersionGCTest {
         }
     }
 
+    // OAK-10199
+    @Test
+    public void testDetailGcDocumentRead_disabled() throws Exception {
+        DetailGCHelper.disableDetailGC(ns);
+        VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
+        assertNotNull(stats);
+        assertEquals(0, stats.detailGcDocsElapsed);
+    }
+
+    @Test
+    public void testDetailGcDocumentRead_enabled() throws Exception {
+        DetailGCHelper.enableDetailGC(ns);
+        VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
+        assertNotNull(stats);
+        assertNotEquals(0, stats.detailGcDocsElapsed);
+    }
+
     private Future<VersionGCStats> gc() {
         // run gc in a separate thread
         return execService.submit(new Callable<VersionGCStats>() {


[jackrabbit-oak] 17/28: OAK-10199 : handled escaped properties while deleting them

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 11794a11be86b9894e21d751f4d94c1f6da0b080
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Mon Jun 26 19:55:41 2023 +0530

    OAK-10199 : handled escaped properties while deleting them
---
 .../plugins/document/VersionGarbageCollector.java  |  2 +
 .../document/VersionGarbageCollectorIT.java        | 57 +++++++++++++++++++++-
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 9d918ee9a4..307315d5a8 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -62,6 +62,7 @@ import static java.util.Objects.requireNonNull;
 import static java.util.Optional.ofNullable;
 import static java.util.concurrent.TimeUnit.MILLISECONDS;
 import static java.util.stream.Collectors.joining;
+import static java.util.stream.Collectors.toSet;
 import static org.apache.jackrabbit.guava.common.base.StandardSystemProperty.LINE_SEPARATOR;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.all;
 import static org.apache.jackrabbit.guava.common.collect.Iterators.partition;
@@ -834,6 +835,7 @@ public class VersionGarbageCollector {
 
                 final Set<String> retainPropSet = ofNullable(doc.getNodeAtRevision(nodeStore, headRevision, null))
                         .map(DocumentNodeState::getPropertyNames)
+                        .map(p -> p.stream().map(Utils::escapePropertyName).collect(toSet()))
                         .orElse(emptySet());
                 final int deletedPropsGCCount = properties.stream()
                         .filter(p -> !retainPropSet.contains(p))
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index 031176ca33..ca00648244 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -454,7 +454,7 @@ public class VersionGarbageCollectorIT {
         assertEquals(10, stats.deletedPropsGCCount);
     }
 
-    // Test when properties are not collected in one GC cycle
+    // Test properties are collected after system crash had happened
     @Test
     public void testGCDeletedProps_4() throws Exception {
         final FailingDocumentStore fds = new FailingDocumentStore(fixture.createDocumentStore(), 42) {
@@ -522,6 +522,61 @@ public class VersionGarbageCollectorIT {
 
     }
 
+    // Test when escaped properties are collected
+    @Test
+    public void testGCDeletedProps_5() throws Exception {
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+
+        // Add property to node & save
+        for (int i = 0; i < 10; i++) {
+            b1.child("x").setProperty("test."+i, "t", STRING);
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(10);
+        //1. Go past GC age and check no GC done as nothing deleted
+        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        VersionGCStats stats = gc.gc(maxAge, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+        //Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        for (int i = 0; i < 10; i++) {
+            b2.getChildNode("x").removeProperty("test."+i);
+        }
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        store.runBackgroundOperations();
+
+        //2. Check that a deleted property is not collected before maxAge
+        //Clock cannot move back (it moved forward in #1) so double the maxAge
+        clock.waitUntil(clock.getTime() + delta);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+        //3. Check that deleted property does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(10, stats.deletedPropsGCCount);
+
+        //4. Check that a revived property (deleted and created again) does not get gc
+        NodeBuilder b4 = store.getRoot().builder();
+        for (int i = 0; i < 10; i++) {
+            b4.child("x").setProperty("test."+i, "t", STRING);
+        }
+        store.merge(b4, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+    }
+
     // OAK-10199 END
     
     private void gcSplitDocsInternal(String subNodeName) throws Exception {


[jackrabbit-oak] 15/28: OAK-10199 : fixed logic to include previously garbage collected documents if updated recently

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 69b52bd0fabc548528eccdb3e04a2f5e06f2a265
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Thu Jun 22 01:07:36 2023 +0530

    OAK-10199 : fixed logic to include previously garbage collected documents if updated recently
---
 .../plugins/document/VersionGCRecommendations.java |  11 +-
 .../oak/plugins/document/VersionGCSupport.java     |  15 ++-
 .../plugins/document/VersionGarbageCollector.java  |  50 ++++----
 .../document/mongo/MongoVersionGCSupport.java      |  13 +--
 .../plugins/document/rdb/RDBVersionGCSupport.java  |  15 ++-
 .../oak/plugins/document/VersionGCInitTest.java    |   2 +-
 .../oak/plugins/document/VersionGCSupportTest.java |  22 ++--
 .../oak/plugins/document/VersionGCTest.java        |  88 +++++++++++++-
 .../document/VersionGarbageCollectorIT.java        | 127 ++++++++++++++++++++-
 9 files changed, 278 insertions(+), 65 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index 056c2fe438..ac0bcc03e3 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -20,13 +20,13 @@ package org.apache.jackrabbit.oak.plugins.document;
 
 import java.util.HashMap;
 import java.util.Map;
-import java.util.Objects;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval;
 import org.apache.jackrabbit.oak.spi.gc.GCMonitor;
 import org.apache.jackrabbit.oak.stats.Clock;
+import org.jetbrains.annotations.NotNull;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -117,16 +117,16 @@ public class VersionGCRecommendations {
 
         detailedGCTimestamp = (long) settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP);
         oldestModifiedDocId = (String) settings.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP);
-        if (detailedGCTimestamp == 0 || Objects.equals(oldestModifiedDocId, MIN_ID_VALUE)) {
+        if (detailedGCTimestamp == 0) {
+            // it will only happens for the very first time, we run this detailedGC
             log.info("No detailedGCTimestamp found, querying for the oldest modified candidate");
             final NodeDocument doc = vgc.getOldestModifiedDoc(clock);
             if (doc == NULL) {
                 oldestModifiedDocTimeStamp = 0L;
-                oldestModifiedDocId = MIN_ID_VALUE;
             } else {
-                oldestModifiedDocId = doc.getId();
                 oldestModifiedDocTimeStamp = doc.getModified() == null ? 0L : doc.getModified() - 1;
             }
+            oldestModifiedDocId = MIN_ID_VALUE;
             log.info("detailedGCTimestamp found: {}", timestampToString(oldestModifiedDocTimeStamp));
         } else {
             oldestModifiedDocTimeStamp = detailedGCTimestamp - 1;
@@ -179,7 +179,8 @@ public class VersionGCRecommendations {
                 ignoreDueToCheckPoint = true;
             } else {
                 scope = scope.notLaterThan(checkpoint.getTimestamp() - 1);
-                log.debug("checkpoint at [{}] found, scope now {}", timestampToString(checkpoint.getTimestamp()), scope);
+                detailedGCTimeInternal = detailedGCTimeInternal.notLaterThan(checkpoint.getTimestamp() - 1);
+                log.info("checkpoint at [{}] found, scope now {}, detailedGcScope now {}", timestampToString(checkpoint.getTimestamp()), scope, detailedGCTimeInternal);
             }
         }
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index 96fa2bbaea..6086eef772 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -82,17 +82,16 @@ public class VersionGCSupport {
      * then perform the comparison.
      * <p/>
      *
-     * @param fromModified  the lower bound modified timestamp (inclusive)
-     * @param toModified    the upper bound modified timestamp (exclusive)
-     * @param limit         the limit of documents to return
-     * @param fromId        the lower bound {@link NodeDocument#ID}
-     * @param includeFromId boolean indicating whether {@code fromId} is inclusive or not
+     * @param fromModified the lower bound modified timestamp (inclusive)
+     * @param toModified   the upper bound modified timestamp (exclusive)
+     * @param limit        the limit of documents to return
+     * @param fromId       the lower bound {@link NodeDocument#ID}
      * @return matching documents.
      */
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
-                                                  @NotNull final String fromId, boolean includeFromId) {
+                                                  @NotNull final String fromId) {
         return StreamSupport
-                .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, includeFromId ? "\0"+fromId : fromId).spliterator(), false)
+                .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, fromId).spliterator(), false)
                 .filter(input -> modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified))
                 .sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))
                 .limit(limit)
@@ -197,7 +196,7 @@ public class VersionGCSupport {
 
         LOG.info("find oldest modified document");
         try {
-            docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE, false);
+            docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE);
             if (docs.iterator().hasNext()) {
                 return docs.iterator().next();
             }
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 0c7da0d4fa..9d918ee9a4 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -69,6 +69,7 @@ import static org.apache.jackrabbit.guava.common.util.concurrent.Atomics.newRefe
 import static java.util.concurrent.TimeUnit.MICROSECONDS;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
@@ -615,24 +616,26 @@ public class VersionGarbageCollector {
                 throws IOException {
             int docsTraversed = 0;
             boolean foundDoc = true;
-            boolean includeFromId = true;
-            long oldestModifiedDocTimeStamp = rec.scopeDetailedGC.fromMs;
-            String oldestModifiedDocId = rec.detailedGCId;
+            final long oldestModifiedDocTimeStamp = rec.scopeDetailedGC.fromMs;
+            final String oldestModifiedDocId = rec.detailedGCId;
             try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) {
-                final long fromModified = rec.scopeDetailedGC.fromMs;
+                long fromModified = oldestModifiedDocTimeStamp;
+                String fromId = oldestModifiedDocId;
+                NodeDocument lastDoc = null;
                 final long toModified = rec.scopeDetailedGC.toMs;
                 if (phases.start(GCPhase.DETAILED_GC)) {
-                    while (foundDoc && oldestModifiedDocTimeStamp < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) {
+                    while (foundDoc && fromModified < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) {
                         // set foundDoc to false to allow exiting the while loop
                         foundDoc = false;
-                        Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedDocTimeStamp, toModified, 1000, oldestModifiedDocId, includeFromId);
+                        Iterable<NodeDocument> itr = versionStore.getModifiedDocs(fromModified, toModified, 1000, fromId);
                         // set includeFromId to false for subsequent queries
-                        includeFromId = false;
                         try {
                             for (NodeDocument doc : itr) {
                                 foundDoc = true;
                                 // continue with GC?
                                 if (cancel.get()) {
+                                    foundDoc = false; // to exit while loop as well
+                                    log.info("Received GC cancel call. Terminating the GC Operation.");
                                     break;
                                 }
                                 docsTraversed++;
@@ -641,13 +644,14 @@ public class VersionGarbageCollector {
                                             docsTraversed, gc.getGarbageDocsCount());
                                 }
 
+                                lastDoc = doc;
                                 // collect the data to delete in next step
                                 if (phases.start(GCPhase.COLLECTING)) {
                                     gc.collectGarbage(doc, phases);
                                     phases.stop(GCPhase.COLLECTING);
                                 }
 
-                                final Long modified = doc.getModified();
+                                final Long modified = lastDoc.getModified();
                                 if (modified == null) {
                                     monitor.warn("collectDetailGarbage : document has no _modified property : {}",
                                             doc.getId());
@@ -655,22 +659,28 @@ public class VersionGarbageCollector {
                                     monitor.warn(
                                             "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
                                             modified, fromModified, toModified);
-                                } else {
-                                    oldestModifiedDocTimeStamp = modified;
                                 }
-
-                                if (gc.hasGarbage() && phases.start(GCPhase.DETAILED_GC_CLEANUP)) {
-                                    gc.removeGarbage(phases.stats);
-                                    phases.stop(GCPhase.DETAILED_GC_CLEANUP);
-                                }
-
-                                oldestModifiedDocTimeStamp = modified == null ? fromModified : modified;
-                                oldestModifiedDocId = doc.getId();
+                            }
+                            // now remove the garbage in one go, if any
+                            if (gc.hasGarbage() && phases.start(GCPhase.DETAILED_GC_CLEANUP)) {
+                                gc.removeGarbage(phases.stats);
+                                phases.stop(GCPhase.DETAILED_GC_CLEANUP);
+                            }
+                            if (lastDoc != null) {
+                                fromModified = ofNullable(lastDoc.getModified()).orElse(oldestModifiedDocTimeStamp);
+                                fromId = lastDoc.getId();
                             }
                         } finally {
                             Utils.closeIfCloseable(itr);
-                            phases.stats.oldestModifiedDocTimeStamp = oldestModifiedDocTimeStamp;
-                            phases.stats.oldestModifiedDocId = oldestModifiedDocId;
+                            phases.stats.oldestModifiedDocTimeStamp = fromModified;
+                            if (fromModified > oldestModifiedDocTimeStamp) {
+                                // we have moved ahead, now we can reset oldestModifiedId to min value
+                                phases.stats.oldestModifiedDocId = MIN_ID_VALUE;
+                            } else {
+                                // there are still documents pending at oldest Modified timestamp,
+                                // save the last _id traversed to avoid re-fetching of ids
+                                phases.stats.oldestModifiedDocId = fromId;
+                            }
                         }
                     }
                     phases.stop(GCPhase.DETAILED_GC);
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index ca9a8a955b..cf821fcf48 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -139,19 +139,18 @@ public class MongoVersionGCSupport extends VersionGCSupport {
      * then perform the comparison.
      * <p/>
      *
-     * @param fromModified  the lower bound modified timestamp (inclusive)
-     * @param toModified    the upper bound modified timestamp (exclusive)
-     * @param limit         the limit of documents to return
-     * @param fromId        the lower bound {@link NodeDocument#ID}
-     * @param includeFromId boolean indicating whether {@code fromId} is inclusive or not
+     * @param fromModified the lower bound modified timestamp (inclusive)
+     * @param toModified   the upper bound modified timestamp (exclusive)
+     * @param limit        the limit of documents to return
+     * @param fromId       the lower bound {@link NodeDocument#ID}
      * @return matching documents.
      */
     @Override
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
-                                                  @NotNull final String fromId, boolean includeFromId) {
+                                                  @NotNull final String fromId) {
         // _modified >= fromModified && _modified < toModified && _id > fromId
         final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
-                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), includeFromId ? gte(ID, fromId) :gt(ID, fromId));
+                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), gt(ID, fromId));
         // first sort by _modified and then by _id
         final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1));
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index efce4b8006..9d96c35811 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -104,19 +104,18 @@ public class RDBVersionGCSupport extends VersionGCSupport {
      * then perform the comparison.
      * <p/>
      *
-     * @param fromModified  the lower bound modified timestamp (inclusive)
-     * @param toModified    the upper bound modified timestamp (exclusive)
-     * @param limit         the limit of documents to return
-     * @param fromId        the lower bound {@link NodeDocument#ID}
-     * @param includeFromId boolean indicating whether {@code fromId} is inclusive or not
+     * @param fromModified the lower bound modified timestamp (inclusive)
+     * @param toModified   the upper bound modified timestamp (exclusive)
+     * @param limit        the limit of documents to return
+     * @param fromId       the lower bound {@link NodeDocument#ID}
      * @return matching documents.
      */
     @Override
     public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
-                                                  @NotNull final String fromId, boolean includeFromId) {
+                                                  @NotNull final String fromId) {
         List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
                 new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)),
-                new QueryCondition(ID, includeFromId ? ">=" : ">", of(fromId)));
+                new QueryCondition(ID, ">", of(fromId)));
         if (MODE == 1) {
             return getIterator(EMPTY_KEY_PATTERN, conditions);
         } else {
@@ -291,7 +290,7 @@ public class RDBVersionGCSupport extends VersionGCSupport {
         LOG.info("getOldestModifiedDoc() <- start");
         Iterable<NodeDocument> modifiedDocs = null;
         try {
-            modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE, false);
+            modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE);
             doc = modifiedDocs.iterator().hasNext() ? modifiedDocs.iterator().next() : NULL;
         } catch (DocumentStoreException ex) {
             LOG.error("getOldestModifiedDoc()", ex);
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
index 0bf7b8601a..4db64c942f 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
@@ -79,7 +79,7 @@ public class VersionGCInitTest {
         vgc = store.find(SETTINGS, "versionGC");
         assertNotNull(vgc);
         assertEquals(40L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
-        assertEquals(id, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
+        assertEquals(MIN_ID_VALUE, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
     }
 
     @Test
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
index cff9511a66..6d02fd38f7 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
@@ -224,10 +224,10 @@ public class VersionGCSupportTest {
         long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
         assertEquals(40L, oldestModifiedDocTs);
         assertEquals("1:/x0", oldestModifiedDocId);
-        boolean includeFromId = true;
+        oldestModifiedDocId = MIN_ID_VALUE;
 
         for(int i = 0; i < 5; i++) {
-            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, includeFromId);
+            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId);
             assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
             long count = stream(modifiedDocs.spliterator(), false).count();
             assertEquals(1000, count);
@@ -236,7 +236,6 @@ public class VersionGCSupportTest {
             }
             oldestModifiedDocId = oldestModifiedDoc.getId();
             oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
-            includeFromId = false;
         }
     }
 
@@ -260,10 +259,10 @@ public class VersionGCSupportTest {
         long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
         assertEquals(40L, oldestModifiedDocTs);
         assertEquals("1:/x0", oldestModifiedDocId);
-        boolean includeFromId = true;
+        oldestModifiedDocId = MIN_ID_VALUE;
 
         for(int i = 0; i < 5; i++) {
-            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, includeFromId);
+            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId);
             assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
             long count = stream(modifiedDocs.spliterator(), false).count();
             assertEquals(1000, count);
@@ -272,11 +271,10 @@ public class VersionGCSupportTest {
             }
             oldestModifiedDocId = oldestModifiedDoc.getId();
             oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
-            includeFromId = false;
         }
 
         // fetch last remaining document now
-        Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, false);
+        Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId);
         assertEquals(1, stream(modifiedDocs.spliterator(), false).count());
         assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
         oldestModifiedDoc = modifiedDocs.iterator().next();
@@ -284,7 +282,7 @@ public class VersionGCSupportTest {
         oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
 
         // all documents had been fetched, now we won't get any document
-        modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, false);
+        modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId);
         assertEquals(0, stream(modifiedDocs.spliterator(), false).count());
 
     }
@@ -309,10 +307,9 @@ public class VersionGCSupportTest {
         }
         // create 5_000 nodes
         store.create(NODES, updateOps);
-        boolean includeFromId = true;
 
         for(int i = 0; i < 5; i++) {
-            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, includeFromId);
+            Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId);
             assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
             long count = stream(modifiedDocs.spliterator(), false).count();
             assertEquals(1000, count);
@@ -321,11 +318,10 @@ public class VersionGCSupportTest {
             }
             oldestModifiedDocId = oldestModifiedDoc.getId();
             oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
-            includeFromId = false;
         }
 
         // all documents had been fetched, now we won't get any document
-        Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, false);
+        Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId);
         assertEquals(0, stream(modifiedDocs.spliterator(), false).count());
     }
 
@@ -335,7 +331,7 @@ public class VersionGCSupportTest {
     }
 
     private void assertModified(long fromSeconds, long toSeconds, long num) {
-        Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE, false);
+        Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE);
         assertEquals(num, stream(docs.spliterator(), false).count());
         assertTrue(isInOrder(docs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
     }
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
index f29716ca5d..cf3148a86d 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
@@ -50,6 +50,11 @@ import org.junit.Test;
 import static java.util.concurrent.TimeUnit.HOURS;
 import static java.util.concurrent.TimeUnit.MINUTES;
 import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS;
+import static org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.enableDetailGC;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNotNull;
@@ -187,6 +192,85 @@ public class VersionGCTest {
         }
     }
 
+    // OAK-10199
+    @Test
+    public void cancelMustNotUpdateLastOldestModifiedTimeStamp() throws Exception {
+        // get previous entry from SETTINGS
+        String versionGCId = SETTINGS_COLLECTION_ID;
+        String detailedGCTimestamp = SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
+        enableDetailGC(gc);
+        gc.gc(30, SECONDS);
+        Document statusBefore = store.find(SETTINGS, versionGCId);
+        // block gc call
+        store.semaphore.acquireUninterruptibly();
+        Future<VersionGCStats> stats = gc();
+        boolean gcBlocked = false;
+        for (int i = 0; i < 10; i ++) {
+            if (store.semaphore.hasQueuedThreads()) {
+                gcBlocked = true;
+                break;
+            }
+            Thread.sleep(100);
+        }
+        assertTrue(gcBlocked);
+        // now cancel the GC
+        gc.cancel();
+        store.semaphore.release();
+        assertTrue(stats.get().canceled);
+
+        // ensure a canceled GC doesn't update that versionGC SETTINGS entry
+        Document statusAfter = store.find(SETTINGS, SETTINGS_COLLECTION_ID);
+        if (statusBefore == null) {
+            assertNull(statusAfter);
+        } else {
+            assertNotNull(statusAfter);
+            assertEquals(
+                    "canceled GC shouldn't change the " + detailedGCTimestamp + " property on " + versionGCId
+                            + " settings entry",
+                    statusBefore.get(detailedGCTimestamp), statusAfter.get(detailedGCTimestamp));
+        }
+    }
+
+    @Test
+    public void cancelMustNotUpdateLastOldestModifiedDocId() throws Exception {
+        // get previous entry from SETTINGS
+        String versionGCId = SETTINGS_COLLECTION_ID;
+        String oldestModifiedDocId = SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
+        enableDetailGC(gc);
+        gc.gc(30, SECONDS);
+        Document statusBefore = store.find(SETTINGS, versionGCId);
+        // block gc call
+        store.semaphore.acquireUninterruptibly();
+        Future<VersionGCStats> stats = gc();
+        boolean gcBlocked = false;
+        for (int i = 0; i < 10; i ++) {
+            if (store.semaphore.hasQueuedThreads()) {
+                gcBlocked = true;
+                break;
+            }
+            Thread.sleep(100);
+        }
+        assertTrue(gcBlocked);
+        // now cancel the GC
+        gc.cancel();
+        store.semaphore.release();
+        assertTrue(stats.get().canceled);
+
+        // ensure a canceled GC doesn't update that versionGC SETTINGS entry
+        Document statusAfter = store.find(SETTINGS, SETTINGS_COLLECTION_ID);
+        if (statusBefore == null) {
+            assertNull(statusAfter);
+        } else {
+            assertNotNull(statusAfter);
+            assertEquals(
+                    "canceled GC shouldn't change the " + oldestModifiedDocId + " property on " + versionGCId
+                            + " settings entry",
+                    statusBefore.get(oldestModifiedDocId), statusAfter.get(oldestModifiedDocId));
+        }
+    }
+
+    // END - OAK-10199
+
     @Test
     public void getInfo() throws Exception {
         gc.gc(1, TimeUnit.HOURS);
@@ -351,7 +435,7 @@ public class VersionGCTest {
 
     @Test
     public void testDetailGcDocumentRead_enabled() throws Exception {
-        DetailGCHelper.enableDetailGC(gc);
+        enableDetailGC(gc);
         VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
         assertNotNull(stats);
         assertNotEquals(0, stats.detailedGCDocsElapsed);
@@ -417,7 +501,7 @@ public class VersionGCTest {
         @Override
         public <T extends Document> T find(Collection<T> collection,
                                            String key) {
-            if (collection == Collection.SETTINGS
+            if (collection == SETTINGS
                     && key.equals("versionGC")) {
                 findVersionGC.incrementAndGet();
             }
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index 80dd47dee6..e0de0c0617 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -34,7 +34,6 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 
-import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.apache.commons.lang3.reflect.FieldUtils.writeField;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.size;
@@ -53,6 +52,7 @@ import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 import static org.junit.Assume.assumeTrue;
 
 import org.apache.jackrabbit.guava.common.base.Function;
@@ -120,6 +120,7 @@ public class VersionGarbageCollectorIT {
         execService = Executors.newCachedThreadPool();
         clock = new Clock.Virtual();
         clock.waitUntil(System.currentTimeMillis());
+        ClusterNodeInfo.setClock(clock);
         Revision.setClock(clock);
         if (fixture instanceof RDBFixture) {
             ((RDBFixture) fixture).setRDBOptions(
@@ -230,6 +231,7 @@ public class VersionGarbageCollectorIT {
         gcSplitDocsInternal(Strings.repeat("sub", 120));
     }
 
+    // OAK-10199
     @Test
     public void testGCDeletedProps() throws Exception {
         //1. Create nodes with properties
@@ -393,6 +395,129 @@ public class VersionGarbageCollectorIT {
         assertEquals(50_000, stats.deletedPropsGCCount);
 
     }
+
+    // Test where we modify the already GCed nodes
+    @Test
+    public void testGCDeletedProps_3() throws Exception {
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+        // Add property to node & save
+        for (int i = 0; i < 10; i++) {
+            b1.child("z" + i).setProperty("prop" + i, "foo", STRING);
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(10);
+        //1. Go past GC age and check no GC done as nothing deleted
+        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        VersionGCStats stats = gc.gc(maxAge, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+        //Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        for (int i = 0; i < 10; i++) {
+            b2.getChildNode("z" + i).removeProperty("prop" + i);
+        }
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        store.runBackgroundOperations();
+
+        //2. Check that deleted property does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(10, stats.deletedPropsGCCount);
+
+        //3. now reCreate those properties again
+        NodeBuilder b3 = store.getRoot().builder();
+        // Add property to node & save
+        for (int i = 0; i < 10; i++) {
+            b3.child("z" + i).setProperty("prop" + i, "bar", STRING);
+        }
+        store.merge(b3, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        //Remove properties again
+        NodeBuilder b4 = store.getRoot().builder();
+        for (int i = 0; i < 10; i++) {
+            b4.getChildNode("z" + i).removeProperty("prop" + i);
+        }
+        store.merge(b4, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        store.runBackgroundOperations();
+
+
+        //4. Check that deleted property does get collected again
+        // increment the clock again by more than 2 hours + delta
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(10, stats.deletedPropsGCCount);
+    }
+
+    // Test when properties are not collected in one GC cycle
+    @Test
+    @Ignore
+    public void testGCDeletedProps_4() throws Exception {
+        documentMKBuilder = new DocumentMK.Builder().clock(clock)
+                .setLeaseCheckMode(LeaseCheckMode.DISABLED)
+                .setDocumentStore(new FailingDocumentStore(fixture.createDocumentStore(), 42)).setAsyncDelay(0);
+        store = documentMKBuilder.getNodeStore();
+        assertTrue(store.getDocumentStore() instanceof FailingDocumentStore);
+        MongoTestUtils.setReadPreference(store, ReadPreference.primary());
+        gc = store.getVersionGarbageCollector();
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+        // Add property to node & save
+        for (int i = 0; i < 10; i++) {
+            b1.child("z" + i).setProperty("prop" + i, "foo", STRING);
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        //2. Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        for (int i = 0; i < 10; i++) {
+            b2.getChildNode("z" + i).removeProperty("prop" + i);
+        }
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        store.runBackgroundOperations();
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(10);
+
+        //3. Check that deleted property does get collected again
+        // increment the clock again by more than 2 hours + delta
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+        gc.setOptions(gc.getOptions().withMaxIterations(1));
+
+        ((FailingDocumentStore) store.getDocumentStore()).fail().after(0).eternally();
+        try {
+            store.dispose();
+            fail("dispose() must fail with an exception");
+        } catch (DocumentStoreException e) {
+            // expected
+        }
+        ((FailingDocumentStore) store.getDocumentStore()).fail().never();
+
+        // create new store
+        store = new DocumentMK.Builder().clock(clock).setLeaseCheckMode(LeaseCheckMode.DISABLED)
+                .setDocumentStore(new FailingDocumentStore(fixture.createDocumentStore(1), 42)).setAsyncDelay(0)
+                .getNodeStore();
+        assertTrue(store.getDocumentStore() instanceof FailingDocumentStore);
+        MongoTestUtils.setReadPreference(store, ReadPreference.primary());
+        gc = store.getVersionGarbageCollector();
+        store.runBackgroundOperations();
+
+        //4. Check that deleted property does get collected again
+        // increment the clock again by more than 2 hours + delta
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+        VersionGCStats stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(10, stats.deletedPropsGCCount);
+
+    }
+
+    // OAK-10199 END
     
     private void gcSplitDocsInternal(String subNodeName) throws Exception {
         long maxAge = 1; //hrs


[jackrabbit-oak] 14/28: OAK-10199 : fixed the VG INIT test

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 644698d057397284b5e5c65af91caf3a2abdfee3
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Tue Jun 20 01:52:14 2023 +0530

    OAK-10199 : fixed the VG INIT test
---
 .../org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
index 6aceeac830..0bf7b8601a 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
@@ -78,7 +78,7 @@ public class VersionGCInitTest {
 
         vgc = store.find(SETTINGS, "versionGC");
         assertNotNull(vgc);
-        assertEquals(39L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertEquals(40L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
         assertEquals(id, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
     }
 


[jackrabbit-oak] 13/28: OAK-10199 : fixed the VGC IT test cases failures for RDB

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit c7a29e7358c060545b38f30489db2827d913b273
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Tue Jun 20 01:28:41 2023 +0530

    OAK-10199 : fixed the VGC IT test cases failures for RDB
---
 .../jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java    | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
index 59dfb968b0..87d0f4b4dd 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
@@ -20,6 +20,7 @@ import static java.util.List.of;
 import static java.util.stream.Collectors.joining;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
 import static org.apache.jackrabbit.guava.common.collect.Sets.newHashSet;
+import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
 import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.CHAR2OCTETRATIO;
 import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.asBytes;
 import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBJDBCTools.asDocumentStoreException;
@@ -461,7 +462,7 @@ public class RDBDocumentStoreJDBC {
                             + excludeKeyPatterns + ", conditions=" + conditions + ", limit=" + limit)
                     : null);
             stmt = prepareQuery(connection, tmd, fields, minId,
-                    maxId, excludeKeyPatterns, conditions, limit, of("ID"));
+                    maxId, excludeKeyPatterns, conditions, limit, of(ID));
             rs = stmt.executeQuery();
             while (rs.next() && result.size() < limit) {
                 int field = 1;
@@ -968,7 +969,7 @@ public class RDBDocumentStoreJDBC {
         tmp.put(NodeDocument.SD_TYPE, "SDTYPE");
         tmp.put(NodeDocument.SD_MAX_REV_TIME_IN_SECS, "SDMAXREVTIME");
         tmp.put(RDBDocumentStore.VERSIONPROP, "VERSION");
-        tmp.put(NodeDocument.ID, "ID");
+        tmp.put(ID, "ID");
         INDEXED_PROP_MAPPING = Collections.unmodifiableMap(tmp);
     }
 


[jackrabbit-oak] 08/28: OAK-10199 : updated logic to fetch nodes by sorting them on the basis of _modified & _id

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit 5463da0adf13d3f731d290d1bb078ea788a18c33
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Mon Jun 19 13:45:20 2023 +0530

    OAK-10199 : updated logic to fetch nodes by sorting them on the basis of _modified & _id
---
 .../plugins/document/VersionGCRecommendations.java | 83 +++++++++++-------
 .../oak/plugins/document/VersionGCSupport.java     | 25 +++---
 .../plugins/document/VersionGarbageCollector.java  | 36 +++++---
 .../document/mongo/MongoVersionGCSupport.java      | 45 +++++-----
 .../oak/plugins/document/rdb/RDBDocumentStore.java | 16 +++-
 .../plugins/document/rdb/RDBDocumentStoreJDBC.java | 16 ++--
 .../plugins/document/rdb/RDBVersionGCSupport.java  | 41 ++++-----
 .../oak/plugins/document/util/Utils.java           | 20 +++--
 .../oak/plugins/document/VersionGCInitTest.java    | 46 ++++++++--
 .../oak/plugins/document/VersionGCSupportTest.java | 98 ++++++++++++++++++----
 .../document/VersionGarbageCollectorIT.java        | 52 +++++++++++-
 11 files changed, 342 insertions(+), 136 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index 4584d925c0..056c2fe438 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -20,11 +20,11 @@ package org.apache.jackrabbit.oak.plugins.document;
 
 import java.util.HashMap;
 import java.util.Map;
+import java.util.Objects;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
 import org.apache.jackrabbit.oak.plugins.document.util.TimeInterval;
-import org.apache.jackrabbit.oak.plugins.document.util.Utils;
 import org.apache.jackrabbit.oak.spi.gc.GCMonitor;
 import org.apache.jackrabbit.oak.stats.Clock;
 import org.slf4j.Logger;
@@ -32,8 +32,14 @@ import org.slf4j.LoggerFactory;
 
 import static java.lang.Long.MAX_VALUE;
 import static java.util.Map.of;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_ID;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP;
+import static org.apache.jackrabbit.oak.plugins.document.util.Utils.timestampToString;
 
 /**
  * Gives a recommendation about parameters for the next revision garbage collection run.
@@ -47,13 +53,13 @@ public class VersionGCRecommendations {
 
     final boolean ignoreDueToCheckPoint;
     final TimeInterval scope;
-    final TimeInterval scopeFullGC;
+    final TimeInterval scopeDetailedGC;
     final long maxCollect;
     final long deleteCandidateCount;
     final long lastOldestTimestamp;
     final long detailedGCTimestamp;
+    final String detailedGCId;
     final long originalCollectLimit;
-
     private final long precisionMs;
     final long suggestedIntervalMs;
     private final boolean scopeIsComplete;
@@ -86,7 +92,8 @@ public class VersionGCRecommendations {
         long deletedOnceCount = 0;
         long suggestedIntervalMs;
         long oldestPossible;
-        long oldestPossibleFullGC;
+        long oldestModifiedDocTimeStamp;
+        String oldestModifiedDocId;
         long collectLimit = options.collectLimit;
 
         this.vgc = vgc;
@@ -95,12 +102,12 @@ public class VersionGCRecommendations {
 
         TimeInterval keep = new TimeInterval(clock.getTime() - maxRevisionAgeMs, Long.MAX_VALUE);
 
-        Map<String, Long> settings = getLongSettings();
-        lastOldestTimestamp = settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP);
+        Map<String, Object> settings = getVGCSettings();
+        lastOldestTimestamp = (long) settings.get(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP);
         if (lastOldestTimestamp == 0) {
-            log.debug("No lastOldestTimestamp found, querying for the oldest deletedOnce candidate");
+            log.info("No lastOldestTimestamp found, querying for the oldest deletedOnce candidate");
             oldestPossible = vgc.getOldestDeletedOnceTimestamp(clock, options.precisionMs) - 1;
-            log.debug("lastOldestTimestamp found: {}", Utils.timestampToString(oldestPossible));
+            log.info("lastOldestTimestamp found: {}", timestampToString(oldestPossible));
         } else {
             oldestPossible = lastOldestTimestamp - 1;
         }
@@ -108,23 +115,27 @@ public class VersionGCRecommendations {
         TimeInterval scope = new TimeInterval(oldestPossible, Long.MAX_VALUE);
         scope = scope.notLaterThan(keep.fromMs);
 
-        detailedGCTimestamp = settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP);
-        if (detailedGCTimestamp == 0) {
-            if (log.isDebugEnabled()) {
-                log.debug("No detailedGCTimestamp found, querying for the oldest deletedOnce candidate");
-            }
-            oldestPossibleFullGC = vgc.getOldestModifiedTimestamp(clock) - 1;
-            if (log.isDebugEnabled()) {
-                log.debug("detailedGCTimestamp found: {}", Utils.timestampToString(oldestPossibleFullGC));
+        detailedGCTimestamp = (long) settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP);
+        oldestModifiedDocId = (String) settings.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP);
+        if (detailedGCTimestamp == 0 || Objects.equals(oldestModifiedDocId, MIN_ID_VALUE)) {
+            log.info("No detailedGCTimestamp found, querying for the oldest modified candidate");
+            final NodeDocument doc = vgc.getOldestModifiedDoc(clock);
+            if (doc == NULL) {
+                oldestModifiedDocTimeStamp = 0L;
+                oldestModifiedDocId = MIN_ID_VALUE;
+            } else {
+                oldestModifiedDocId = doc.getId();
+                oldestModifiedDocTimeStamp = doc.getModified() == null ? 0L : doc.getModified() - 1;
             }
+            log.info("detailedGCTimestamp found: {}", timestampToString(oldestModifiedDocTimeStamp));
         } else {
-            oldestPossibleFullGC = detailedGCTimestamp - 1;
+            oldestModifiedDocTimeStamp = detailedGCTimestamp - 1;
         }
 
-        TimeInterval fullGCTimeInternal = new TimeInterval(oldestPossibleFullGC, MAX_VALUE);
-        fullGCTimeInternal = fullGCTimeInternal.notLaterThan(keep.fromMs);
+        TimeInterval detailedGCTimeInternal = new TimeInterval(oldestModifiedDocTimeStamp, MAX_VALUE);
+        detailedGCTimeInternal = detailedGCTimeInternal.notLaterThan(keep.fromMs);
 
-        suggestedIntervalMs = settings.get(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP);
+        suggestedIntervalMs = (long) settings.get(SETTINGS_COLLECTION_REC_INTERVAL_PROP);
         if (suggestedIntervalMs > 0) {
             suggestedIntervalMs = Math.max(suggestedIntervalMs, options.precisionMs);
             if (suggestedIntervalMs < scope.getDurationMs()) {
@@ -168,7 +179,7 @@ public class VersionGCRecommendations {
                 ignoreDueToCheckPoint = true;
             } else {
                 scope = scope.notLaterThan(checkpoint.getTimestamp() - 1);
-                log.debug("checkpoint at [{}] found, scope now {}", Utils.timestampToString(checkpoint.getTimestamp()), scope);
+                log.debug("checkpoint at [{}] found, scope now {}", timestampToString(checkpoint.getTimestamp()), scope);
             }
         }
 
@@ -182,7 +193,8 @@ public class VersionGCRecommendations {
         this.precisionMs = options.precisionMs;
         this.ignoreDueToCheckPoint = ignoreDueToCheckPoint;
         this.scope = scope;
-        this.scopeFullGC = fullGCTimeInternal;
+        this.scopeDetailedGC = detailedGCTimeInternal;
+        this.detailedGCId = oldestModifiedDocId;
         this.scopeIsComplete = scope.toMs >= keep.fromMs;
         this.maxCollect = collectLimit;
         this.suggestedIntervalMs = suggestedIntervalMs;
@@ -207,7 +219,8 @@ public class VersionGCRecommendations {
         } else if (!stats.canceled && !stats.ignoredGCDueToCheckPoint) {
             // success, we would not expect to encounter revisions older than this in the future
             setLongSetting(of(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs,
-                    SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, stats.oldestModifiedGced));
+                    SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, stats.oldestModifiedDocTimeStamp));
+            setStringSetting(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP, stats.oldestModifiedDocId);
 
             int count = stats.deletedDocGCCount - stats.deletedLeafDocGCCount;
             double usedFraction;
@@ -224,7 +237,7 @@ public class VersionGCRecommendations {
                     long nextDuration = (long) Math.ceil(suggestedIntervalMs * 1.5);
                     log.debug("successful run using {}% of limit, raising recommended interval to {} seconds",
                             Math.round(usedFraction * 1000) / 10.0, TimeUnit.MILLISECONDS.toSeconds(nextDuration));
-                    setLongSetting(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP, nextDuration);
+                    setLongSetting(SETTINGS_COLLECTION_REC_INTERVAL_PROP, nextDuration);
                 } else {
                     log.debug("not increasing limit: collected {} documents ({}% >= {}% limit)", count, usedFraction,
                             allowedFraction);
@@ -236,19 +249,23 @@ public class VersionGCRecommendations {
         }
     }
 
-    private Map<String, Long> getLongSettings() {
-        Document versionGCDoc = vgc.getDocumentStore().find(Collection.SETTINGS, VersionGarbageCollector.SETTINGS_COLLECTION_ID, 0);
-        Map<String, Long> settings = new HashMap<>();
+    private Map<String, Object> getVGCSettings() {
+        Document versionGCDoc = vgc.getDocumentStore().find(Collection.SETTINGS, SETTINGS_COLLECTION_ID, 0);
+        Map<String, Object> settings = new HashMap<>();
         // default values
-        settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L);
-        settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP, 0L);
+        settings.put(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L);
+        settings.put(SETTINGS_COLLECTION_REC_INTERVAL_PROP, 0L);
         settings.put(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, 0L);
+        settings.put(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP, MIN_ID_VALUE);
         if (versionGCDoc != null) {
             for (String k : versionGCDoc.keySet()) {
                 Object value = versionGCDoc.get(k);
                 if (value instanceof Number) {
                     settings.put(k, ((Number) value).longValue());
                 }
+                if (value instanceof String) {
+                    settings.put(k, value);
+                }
             }
         }
         return settings;
@@ -258,8 +275,14 @@ public class VersionGCRecommendations {
         setLongSetting(of(propName, val));
     }
 
+    private void setStringSetting(String propName, String val) {
+        UpdateOp updateOp = new UpdateOp(SETTINGS_COLLECTION_ID, true);
+        updateOp.set(propName, val);
+        vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
+    }
+
     private void setLongSetting(final Map<String, Long> propValMap) {
-        UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true);
+        UpdateOp updateOp = new UpdateOp(SETTINGS_COLLECTION_ID, true);
         propValMap.forEach(updateOp::set);
         vgc.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
     }
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index f23340acbc..abdfdf4a64 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -19,9 +19,12 @@
 
 package org.apache.jackrabbit.oak.plugins.document;
 
+import static java.util.Comparator.comparing;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static java.util.stream.Collectors.toList;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getAllDocuments;
 import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getSelectedDocuments;
@@ -52,7 +55,7 @@ public class VersionGCSupport {
 
     /**
      * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
-     * within the given range and the {@link NodeDocument#DELETED} set to
+     * within the given range and the {@link NodeDocument#  DELETED} set to
      * {@code true}. The two passed modified timestamps are in milliseconds
      * since the epoch and the implementation will convert them to seconds at
      * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
@@ -79,12 +82,15 @@ public class VersionGCSupport {
      * @param fromModified the lower bound modified timestamp (inclusive)
      * @param toModified the upper bound modified timestamp (exclusive)
      * @param limit the limit of documents to return
+     * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
      * @return matching documents.
      */
-    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) {
+    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
+                                                  final String fromId) {
         return StreamSupport
-                .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, fromModified).spliterator(), false)
+                .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, fromId).spliterator(), false)
                 .filter(input -> modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified))
+                .sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))
                 .limit(limit)
                 .collect(toList());
     }
@@ -176,27 +182,26 @@ public class VersionGCSupport {
     }
 
     /**
-     * Retrieve the time of the oldest modified document.
+     * Retrieve the oldest modified document.
      *
-     * @return the timestamp of the oldest modified document.
+     * @return the oldest modified document.
      */
-    public long getOldestModifiedTimestamp(final Clock clock) {
+    public NodeDocument getOldestModifiedDoc(final Clock clock) {
         long ts = 0;
         long now = clock.getTime();
         Iterable<NodeDocument> docs = null;
 
         LOG.info("find oldest modified document");
         try {
-            docs = getModifiedDocs(ts, now, 1);
+            docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE);
             if (docs.iterator().hasNext()) {
-                Long modified = docs.iterator().next().getModified();
-                return modified != null ? modified : 0L;
+                return docs.iterator().next();
             }
         } finally {
             Utils.closeIfCloseable(docs);
         }
         LOG.info("find oldest modified document to be {}", Utils.timestampToString(ts));
-        return ts;
+        return NULL;
     }
 
     public long getDeletedOnceCount() throws UnsupportedOperationException {
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index b562831e24..f54299e3fd 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -32,7 +32,6 @@ import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicReference;
-import java.util.stream.Collectors;
 
 import org.apache.jackrabbit.guava.common.base.Function;
 import org.apache.jackrabbit.guava.common.base.Joiner;
@@ -112,6 +111,11 @@ public class VersionGarbageCollector {
      */
     static final String SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP = "detailedGCTimeStamp";
 
+    /**
+     * Property name to _id till when last detailed-GC run happened
+     */
+    static final String SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP = "detailedGCId";
+
     private final DocumentNodeStore nodeStore;
     private final DocumentStore ds;
     private final boolean detailedGCEnabled;
@@ -272,7 +276,8 @@ public class VersionGarbageCollector {
         int splitDocGCCount;
         int intermediateSplitDocGCCount;
         int updateResurrectedGCCount;
-        long oldestModifiedGced;
+        long oldestModifiedDocTimeStamp;
+        String oldestModifiedDocId;
         int updatedDetailedGCDocsCount;
         int deletedPropsGCCount;
         final TimeDurationFormatter df = TimeDurationFormatter.forLogging();
@@ -343,7 +348,8 @@ public class VersionGarbageCollector {
                     ", updateResurrectedGCCount=" + updateResurrectedGCCount +
                     ", splitDocGCCount=" + splitDocGCCount +
                     ", intermediateSplitDocGCCount=" + intermediateSplitDocGCCount +
-                    ", oldestModifiedGced=" + oldestModifiedGced +
+                    ", oldestModifiedDocId=" + oldestModifiedDocId +
+                    ", oldestModifiedDocTimeStamp=" + oldestModifiedDocTimeStamp +
                     ", updatedDetailedGCDocsCount=" + updatedDetailedGCDocsCount +
                     ", deletedPropsGCCount=" + deletedPropsGCCount +
                     ", iterationCount=" + iterationCount +
@@ -363,7 +369,8 @@ public class VersionGarbageCollector {
             this.splitDocGCCount += run.splitDocGCCount;
             this.intermediateSplitDocGCCount += run.intermediateSplitDocGCCount;
             this.updateResurrectedGCCount += run.updateResurrectedGCCount;
-            this.oldestModifiedGced = run.oldestModifiedGced;
+            this.oldestModifiedDocTimeStamp = run.oldestModifiedDocTimeStamp;
+            this.oldestModifiedDocId = run.oldestModifiedDocId;
             this.updatedDetailedGCDocsCount += run.updatedDetailedGCDocsCount;
             this.deletedPropsGCCount += run.deletedPropsGCCount;
             if (run.iterationCount > 0) {
@@ -608,15 +615,16 @@ public class VersionGarbageCollector {
                 throws IOException {
             int docsTraversed = 0;
             boolean foundDoc = true;
-            long oldestModifiedGCed = rec.scopeFullGC.fromMs;
+            long oldestModifiedDocTimeStamp = rec.scopeDetailedGC.fromMs;
+            String oldestModifiedDocId = rec.detailedGCId;
             try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) {
-                final long fromModified = rec.scopeFullGC.fromMs;
-                final long toModified = rec.scopeFullGC.toMs;
+                final long fromModified = rec.scopeDetailedGC.fromMs;
+                final long toModified = rec.scopeDetailedGC.toMs;
                 if (phases.start(GCPhase.DETAILED_GC)) {
-                    while (foundDoc && oldestModifiedGCed < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) {
+                    while (foundDoc && oldestModifiedDocTimeStamp < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) {
                         // set foundDoc to false to allow exiting the while loop
                         foundDoc = false;
-                        Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedGCed, toModified, 1000);
+                        Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedDocTimeStamp, toModified, 1000, oldestModifiedDocId);
                         try {
                             for (NodeDocument doc : itr) {
                                 foundDoc = true;
@@ -640,12 +648,12 @@ public class VersionGarbageCollector {
                                 if (modified == null) {
                                     monitor.warn("collectDetailGarbage : document has no _modified property : {}",
                                             doc.getId());
-                                } else if (modified < oldestModifiedGCed) {
+                                } else if (modified < oldestModifiedDocTimeStamp) {
                                     monitor.warn(
                                             "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
                                             modified, fromModified, toModified);
                                 } else {
-                                    oldestModifiedGCed = modified;
+                                    oldestModifiedDocTimeStamp = modified;
                                 }
 
                                 if (gc.hasGarbage() && phases.start(GCPhase.DETAILED_GC_CLEANUP)) {
@@ -653,11 +661,13 @@ public class VersionGarbageCollector {
                                     phases.stop(GCPhase.DETAILED_GC_CLEANUP);
                                 }
 
-                                oldestModifiedGCed = modified == null ? fromModified : modified;
+                                oldestModifiedDocTimeStamp = modified == null ? fromModified : modified;
+                                oldestModifiedDocId = doc.getId();
                             }
                         } finally {
                             Utils.closeIfCloseable(itr);
-                            phases.stats.oldestModifiedGced = oldestModifiedGCed;
+                            phases.stats.oldestModifiedDocTimeStamp = oldestModifiedDocTimeStamp;
+                            phases.stats.oldestModifiedDocId = oldestModifiedDocId;
                         }
                     }
                     phases.stop(GCPhase.DETAILED_GC);
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index 324ade704c..690fd5a0d6 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -21,8 +21,8 @@ package org.apache.jackrabbit.oak.plugins.document.mongo;
 
 import static com.mongodb.client.model.Filters.eq;
 import static com.mongodb.client.model.Filters.exists;
+import static com.mongodb.client.model.Filters.gt;
 import static java.util.Optional.ofNullable;
-import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.concat;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
@@ -34,6 +34,7 @@ import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.DELETED_ONCE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PATH;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_MAX_REV_TIME_IN_SECS;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SD_TYPE;
@@ -130,21 +131,26 @@ public class MongoVersionGCSupport extends VersionGCSupport {
 
     /**
      * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
-     * within the given range in sorted order. The two passed modified timestamps
-     * are in milliseconds since the epoch and the implementation will convert them
-     * to seconds at the granularity of the {@link NodeDocument#MODIFIED_IN_SECS}
-     * field and then perform the comparison.
+     * within the given range .The two passed modified timestamps are in milliseconds
+     * since the epoch and the implementation will convert them to seconds at
+     * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
+     * then perform the comparison.
      *
      * @param fromModified the lower bound modified timestamp (inclusive)
-     * @param toModified   the upper bound modified timestamp (exclusive)
-     * @return matching documents in sorted order of {@link NodeDocument#MODIFIED_IN_SECS}
+     * @param toModified the upper bound modified timestamp (exclusive)
+     * @param limit the limit of documents to return
+     * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
+     * @return matching documents.
      */
     @Override
-    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) {
-        // _modified >= fromModified && _modified < toModified
+    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
+                                                  final String fromId) {
+        // _modified >= fromModified && _modified < toModified && _id > fromId
         final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
-                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)));
-        final Bson sort = eq(MODIFIED_IN_SECS, 1);
+                lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), gt(ID, fromId));
+        // first sort by _modified and then by _id
+        final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1));
+
         final FindIterable<BasicDBObject> cursor = getNodeCollection()
                 .find(query)
                 .sort(sort)
@@ -232,11 +238,11 @@ public class MongoVersionGCSupport extends VersionGCSupport {
      * @return the timestamp of the oldest modified document.
      */
     @Override
-    public long getOldestModifiedTimestamp(final Clock clock) {
-        LOG.info("getOldestModifiedTimestamp() <- start");
+    public NodeDocument getOldestModifiedDoc(final Clock clock) {
+        LOG.info("getOldestModifiedDoc() <- start");
 
-        final Bson sort = eq(MODIFIED_IN_SECS, 1);
-        final List<Long> result = new ArrayList<>(1);
+        final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1));
+        final List<NodeDocument> result = new ArrayList<>(1);
 
         // we need to add query condition to ignore `previous` documents which doesn't have this field
         final Bson query = exists(MODIFIED_IN_SECS);
@@ -245,14 +251,13 @@ public class MongoVersionGCSupport extends VersionGCSupport {
                 (Consumer<BasicDBObject>) document ->
                         ofNullable(store.convertFromDBObject(NODES, document))
                                 .ifPresent(doc -> {
-                    long modifiedMs = SECONDS.toMillis(ofNullable(doc.getModified()).orElse(0L));
-                    LOG.info("getOldestDeletedOnceTimestamp() -> {}", Utils.timestampToString(modifiedMs));
-                    result.add(modifiedMs);
+                    LOG.info("getOldestModifiedDoc() -> {}", doc);
+                    result.add(doc);
                 }));
 
         if (result.isEmpty()) {
-            LOG.info("getOldestModifiedTimestamp() -> none found, return current time");
-            result.add(clock.getTime());
+            LOG.info("getOldestModifiedDoc() -> none found, return NULL document");
+            result.add(NULL);
         }
         return result.get(0);
     }
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
index 82c09e213d..3a9ef2d95d 100755
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStore.java
@@ -971,8 +971,8 @@ public class RDBDocumentStore implements DocumentStore {
     public static String VERSIONPROP = "__version";
 
     // set of supported indexed properties
-    private static final Set<String> INDEXEDPROPERTIES = new HashSet<String>(Arrays.asList(new String[] { MODIFIED,
-            NodeDocument.HAS_BINARY_FLAG, NodeDocument.DELETED_ONCE, NodeDocument.SD_TYPE, NodeDocument.SD_MAX_REV_TIME_IN_SECS, VERSIONPROP }));
+    private static final Set<String> INDEXEDPROPERTIES = new HashSet<>(Arrays.asList(MODIFIED,
+            NodeDocument.HAS_BINARY_FLAG, NodeDocument.DELETED_ONCE, NodeDocument.SD_TYPE, NodeDocument.SD_MAX_REV_TIME_IN_SECS, VERSIONPROP, ID));
 
     // set of required table columns
     private static final Set<String> REQUIREDCOLUMNS = Collections.unmodifiableSet(new HashSet<String>(Arrays.asList(
@@ -1840,7 +1840,7 @@ public class RDBDocumentStore implements DocumentStore {
     }
 
     protected <T extends Document> Iterable<T> queryAsIterable(final Collection<T> collection, String fromKey, String toKey,
-            final List<String> excludeKeyPatterns, final List<QueryCondition> conditions, final int limit, final String sortBy) {
+            final List<String> excludeKeyPatterns, final List<QueryCondition> conditions, final int limit, final List<String> sortBy) {
 
         final RDBTableMetaData tmd = getTable(collection);
         Set<String> allowedProps = Sets.intersection(INDEXEDPROPERTIES, tmd.getColumnProperties());
@@ -1853,6 +1853,16 @@ public class RDBDocumentStore implements DocumentStore {
             }
         }
 
+        if (sortBy != null && !sortBy.isEmpty()) {
+            for (String key: sortBy) {
+                if (!allowedProps.contains(key)) {
+                    final String message = "indexed property " + key + " not supported. supported properties are " + allowedProps;
+                    LOG.error(message);
+                    throw new UnsupportedIndexedPropertyException(message);
+                }
+            }
+        }
+
         final String from = collection == Collection.NODES && NodeDocument.MIN_ID_VALUE.equals(fromKey) ? null : fromKey;
         final String to = collection == Collection.NODES && NodeDocument.MAX_ID_VALUE.equals(toKey) ? null : toKey;
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
index 26fc1311fa..59dfb968b0 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
@@ -16,6 +16,8 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
+import static java.util.List.of;
+import static java.util.stream.Collectors.joining;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
 import static org.apache.jackrabbit.guava.common.collect.Sets.newHashSet;
 import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.CHAR2OCTETRATIO;
@@ -459,7 +461,7 @@ public class RDBDocumentStoreJDBC {
                             + excludeKeyPatterns + ", conditions=" + conditions + ", limit=" + limit)
                     : null);
             stmt = prepareQuery(connection, tmd, fields, minId,
-                    maxId, excludeKeyPatterns, conditions, limit, "ID");
+                    maxId, excludeKeyPatterns, conditions, limit, of("ID"));
             rs = stmt.executeQuery();
             while (rs.next() && result.size() < limit) {
                 int field = 1;
@@ -554,7 +556,7 @@ public class RDBDocumentStoreJDBC {
 
     @NotNull
     public Iterator<RDBRow> queryAsIterator(RDBConnectionHandler ch, RDBTableMetaData tmd, String minId, String maxId,
-            List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, String sortBy) throws SQLException {
+            List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, List<String> sortBy) throws SQLException {
         return new ResultSetIterator(ch, tmd, minId, maxId, excludeKeyPatterns, conditions, limit, sortBy);
     }
 
@@ -573,7 +575,7 @@ public class RDBDocumentStoreJDBC {
         private long pstart;
 
         public ResultSetIterator(RDBConnectionHandler ch, RDBTableMetaData tmd, String minId, String maxId,
-                List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, String sortBy) throws SQLException {
+                List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, List<String> sortBy) throws SQLException {
             long start = System.currentTimeMillis();
             try {
                 this.ch = ch;
@@ -695,7 +697,7 @@ public class RDBDocumentStoreJDBC {
 
     @NotNull
     private PreparedStatement prepareQuery(Connection connection, RDBTableMetaData tmd, String columns, String minId, String maxId,
-            List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, String sortBy) throws SQLException {
+            List<String> excludeKeyPatterns, List<QueryCondition> conditions, int limit, List<String> sortBy) throws SQLException {
 
         StringBuilder selectClause = new StringBuilder();
 
@@ -714,9 +716,8 @@ public class RDBDocumentStoreJDBC {
             query.append(" where ").append(whereClause);
         }
 
-        if (sortBy != null) {
-            // FIXME : order should be determined via sortBy field
-            query.append(" order by ID");
+        if (sortBy != null && !sortBy.isEmpty()) {
+            query.append(" order by ").append(sortBy.stream().map(INDEXED_PROP_MAPPING::get).collect(joining(", ")));
         }
 
         if (limit != Integer.MAX_VALUE) {
@@ -967,6 +968,7 @@ public class RDBDocumentStoreJDBC {
         tmp.put(NodeDocument.SD_TYPE, "SDTYPE");
         tmp.put(NodeDocument.SD_MAX_REV_TIME_IN_SECS, "SDMAXREVTIME");
         tmp.put(RDBDocumentStore.VERSIONPROP, "VERSION");
+        tmp.put(NodeDocument.ID, "ID");
         INDEXED_PROP_MAPPING = Collections.unmodifiableMap(tmp);
     }
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index f26268bcd3..0d2f678911 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -16,11 +16,13 @@
  */
 package org.apache.jackrabbit.oak.plugins.document.rdb;
 
-import static java.util.Collections.emptyList;
 import static java.util.List.of;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
 import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.Document.ID;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NULL;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
 import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN;
 
@@ -99,18 +101,21 @@ public class RDBVersionGCSupport extends VersionGCSupport {
      * then perform the comparison.
      *
      * @param fromModified the lower bound modified timestamp (inclusive)
-     * @param toModified   the upper bound modified timestamp (exclusive)
-     * @param limit        the limit of documents to return
+     * @param toModified the upper bound modified timestamp (exclusive)
+     * @param limit the limit of documents to return
+     * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
      * @return matching documents.
      */
     @Override
-    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) {
+    public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
+                                                  final String fromId) {
         List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
-                new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)));
+                new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)),
+                new QueryCondition(ID, ">", of(fromId)));
         if (MODE == 1) {
             return getIterator(EMPTY_KEY_PATTERN, conditions);
         } else {
-            return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, conditions, limit, MODIFIED_IN_SECS);
+            return store.queryAsIterable(NODES, fromId, null, EMPTY_KEY_PATTERN, conditions, limit, of(MODIFIED_IN_SECS, ID));
         }
     }
 
@@ -275,24 +280,20 @@ public class RDBVersionGCSupport extends VersionGCSupport {
      * @return the timestamp of the oldest modified document.
      */
     @Override
-    public long getOldestModifiedTimestamp(Clock clock) {
-        long modifiedMs = Long.MIN_VALUE;
+    public NodeDocument getOldestModifiedDoc(Clock clock) {
+        NodeDocument doc = NULL;
 
-        LOG.info("getOldestModifiedTimestamp() <- start");
+        LOG.info("getOldestModifiedDoc() <- start");
+        Iterable<NodeDocument> modifiedDocs = null;
         try {
-            long modifiedSec = store.getMinValue(NODES, MODIFIED_IN_SECS, null, null, EMPTY_KEY_PATTERN, emptyList());
-            modifiedMs = TimeUnit.SECONDS.toMillis(modifiedSec);
+            modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE);
+            doc = modifiedDocs.iterator().hasNext() ? modifiedDocs.iterator().next() : NULL;
         } catch (DocumentStoreException ex) {
-            LOG.error("getOldestModifiedTimestamp()", ex);
-        }
-
-        if (modifiedMs > 0) {
-            LOG.info("getOldestModifiedTimestamp() -> {}", Utils.timestampToString(modifiedMs));
-            return modifiedMs;
-        } else {
-            LOG.info("getOldestModifiedTimestamp() -> none found, return current time");
-            return clock.getTime();
+            LOG.error("getOldestModifiedDoc()", ex);
+        } finally {
+            Utils.closeIfCloseable(modifiedDocs);
         }
+        return doc;
     }
 
     @Override
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
index c9428429bc..85bb1225fb 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/Utils.java
@@ -62,6 +62,7 @@ import org.slf4j.LoggerFactory;
 
 import static org.apache.jackrabbit.guava.common.base.Preconditions.checkNotNull;
 import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.isDeletedEntry;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.isCommitRootEntry;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.isRevisionsEntry;
@@ -668,7 +669,7 @@ public class Utils {
      * @return an {@link Iterable} over all documents in the store.
      */
     public static Iterable<NodeDocument> getAllDocuments(final DocumentStore store) {
-        return internalGetSelectedDocuments(store, null, 0, DEFAULT_BATCH_SIZE);
+        return internalGetSelectedDocuments(store, null, 0, MIN_ID_VALUE, DEFAULT_BATCH_SIZE);
     }
 
     /**
@@ -710,7 +711,7 @@ public class Utils {
      */
     public static Iterable<NodeDocument> getSelectedDocuments(
             DocumentStore store, String indexedProperty, long startValue, int batchSize) {
-        return internalGetSelectedDocuments(store, indexedProperty, startValue, batchSize);
+        return internalGetSelectedDocuments(store, indexedProperty, startValue, MIN_ID_VALUE, batchSize);
     }
 
     /**
@@ -719,12 +720,21 @@ public class Utils {
      */
     public static Iterable<NodeDocument> getSelectedDocuments(
             DocumentStore store, String indexedProperty, long startValue) {
-        return internalGetSelectedDocuments(store, indexedProperty, startValue, DEFAULT_BATCH_SIZE);
+        return internalGetSelectedDocuments(store, indexedProperty, startValue, MIN_ID_VALUE, DEFAULT_BATCH_SIZE);
+    }
+
+    /**
+     * Like {@link #getSelectedDocuments(DocumentStore, String, long, int)} with
+     * a default {@code batchSize}.
+     */
+    public static Iterable<NodeDocument> getSelectedDocuments(
+            DocumentStore store, String indexedProperty, long startValue, String fromId) {
+        return internalGetSelectedDocuments(store, indexedProperty, startValue, fromId, DEFAULT_BATCH_SIZE);
     }
 
     private static Iterable<NodeDocument> internalGetSelectedDocuments(
             final DocumentStore store, final String indexedProperty,
-            final long startValue, final int batchSize) {
+            final long startValue, String fromId, final int batchSize) {
         if (batchSize < 2) {
             throw new IllegalArgumentException("batchSize must be > 1");
         }
@@ -733,7 +743,7 @@ public class Utils {
             public Iterator<NodeDocument> iterator() {
                 return new AbstractIterator<NodeDocument>() {
 
-                    private String startId = NodeDocument.MIN_ID_VALUE;
+                    private String startId = fromId;
 
                     private Iterator<NodeDocument> batch = nextBatch();
 
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
index ed39a372b2..6aceeac830 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
@@ -22,14 +22,19 @@ import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 
+import static java.util.concurrent.TimeUnit.DAYS;
+import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.SETTINGS;
 import static org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.enableDetailGC;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP;
 import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
+import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 
-import java.util.concurrent.TimeUnit;
-
 public class VersionGCInitTest {
 
     @Rule
@@ -45,27 +50,50 @@ public class VersionGCInitTest {
     @Test
     public void lazyInitialize() throws Exception {
         DocumentStore store = ns.getDocumentStore();
-        Document vgc = store.find(Collection.SETTINGS, "versionGC");
+        Document vgc = store.find(SETTINGS, "versionGC");
         assertNull(vgc);
 
-        ns.getVersionGarbageCollector().gc(1, TimeUnit.DAYS);
+        ns.getVersionGarbageCollector().gc(1, DAYS);
 
-        vgc = store.find(Collection.SETTINGS, "versionGC");
+        vgc = store.find(SETTINGS, "versionGC");
         assertNotNull(vgc);
         assertEquals(0L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertNull(vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
     }
 
     @Test
     public void lazyInitializeWithDetailedGC() throws Exception {
         DocumentStore store = ns.getDocumentStore();
-        Document vgc = store.find(Collection.SETTINGS, "versionGC");
+        Document vgc = store.find(SETTINGS, "versionGC");
         assertNull(vgc);
 
         enableDetailGC(ns.getVersionGarbageCollector());
-        ns.getVersionGarbageCollector().gc(1, TimeUnit.DAYS);
+        long offset = SECONDS.toMillis(42);
+        String id = getIdFromPath("/node");
+        Revision r = new Revision(offset, 0, 1);
+        UpdateOp op = new UpdateOp(id, true);
+        NodeDocument.setModified(op, r);
+        store.createOrUpdate(NODES, op);
+        ns.getVersionGarbageCollector().gc(1, DAYS);
 
-        vgc = store.find(Collection.SETTINGS, "versionGC");
+        vgc = store.find(SETTINGS, "versionGC");
         assertNotNull(vgc);
-        assertEquals(-1L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertEquals(39L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertEquals(id, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
+    }
+
+    @Test
+    public void lazyInitializeWithDetailedGCWithNoData() throws Exception {
+        DocumentStore store = ns.getDocumentStore();
+        Document vgc = store.find(SETTINGS, "versionGC");
+        assertNull(vgc);
+
+        enableDetailGC(ns.getVersionGarbageCollector());
+        ns.getVersionGarbageCollector().gc(1, DAYS);
+
+        vgc = store.find(SETTINGS, "versionGC");
+        assertNotNull(vgc);
+        assertEquals(0L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+        assertEquals(MIN_ID_VALUE, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_DOCUMENT_ID_PROP));
     }
 }
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
index 831d2c89ec..4eb20986c2 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
@@ -18,10 +18,9 @@
  */
 package org.apache.jackrabbit.oak.plugins.document;
 
+import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.jackrabbit.guava.common.collect.Iterables;
-import org.apache.jackrabbit.guava.common.collect.Lists;
 import com.mongodb.ReadPreference;
 
 import org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentStore;
@@ -29,31 +28,38 @@ import org.apache.jackrabbit.oak.plugins.document.mongo.MongoTestUtils;
 import org.apache.jackrabbit.oak.plugins.document.mongo.MongoVersionGCSupport;
 import org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore;
 import org.apache.jackrabbit.oak.plugins.document.rdb.RDBVersionGCSupport;
-import org.apache.jackrabbit.oak.plugins.document.util.Utils;
-import org.apache.jackrabbit.oak.stats.Clock;
 import org.junit.After;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
+import static java.util.Comparator.comparing;
+import static java.util.List.of;
+import static java.util.Optional.ofNullable;
 import static java.util.concurrent.TimeUnit.SECONDS;
+import static java.util.stream.StreamSupport.stream;
+import static org.apache.jackrabbit.guava.common.collect.Comparators.isInOrder;
+import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
 import static org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.MEMORY;
 import static org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.MONGO;
 import static org.apache.jackrabbit.oak.plugins.document.DocumentStoreFixture.RDB_H2;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MIN_ID_VALUE;
+import static org.apache.jackrabbit.oak.plugins.document.util.Utils.getIdFromPath;
+import static org.apache.jackrabbit.oak.stats.Clock.SIMPLE;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 @RunWith(Parameterized.class)
 public class VersionGCSupportTest {
 
-    private DocumentStoreFixture fixture;
-    private DocumentStore store;
-    private VersionGCSupport gcSupport;
-    private List<String> ids = Lists.newArrayList();
+    private final DocumentStoreFixture fixture;
+    private final DocumentStore store;
+    private final VersionGCSupport gcSupport;
+    private final List<String> ids = new ArrayList<>();
 
     @Parameterized.Parameters(name="{0}")
     public static java.util.Collection<DocumentStoreFixture> fixtures() {
-        List<DocumentStoreFixture> fixtures = Lists.newArrayList();
+        List<DocumentStoreFixture> fixtures = new ArrayList<>(3);
         if (RDB_H2.isAvailable()) {
             fixtures.add(RDB_H2);
         }
@@ -88,7 +94,7 @@ public class VersionGCSupportTest {
 
     @After
     public void after() throws Exception {
-        store.remove(Collection.NODES, ids);
+        store.remove(NODES, ids);
         fixture.dispose();
     }
 
@@ -97,12 +103,12 @@ public class VersionGCSupportTest {
         long offset = SECONDS.toMillis(42);
         for (int i = 0; i < 5; i++) {
             Revision r = new Revision(offset + SECONDS.toMillis(i), 0, 1);
-            String id = Utils.getIdFromPath("/doc-" + i);
+            String id = getIdFromPath("/doc-" + i);
             ids.add(id);
             UpdateOp op = new UpdateOp(id, true);
             NodeDocument.setModified(op, r);
             NodeDocument.setDeleted(op, r, true);
-            store.create(Collection.NODES, Lists.newArrayList(op));
+            store.create(NODES, of(op));
         }
 
         assertPossiblyDeleted(0, 41, 0);
@@ -126,25 +132,83 @@ public class VersionGCSupportTest {
         assertPossiblyDeleted(51, 60, 0);
     }
 
+    @Test
+    public void getPossiblyModifiedDocs() {
+        long offset = SECONDS.toMillis(42);
+        for (int i = 0; i < 5; i++) {
+            Revision r = new Revision(offset + SECONDS.toMillis(i), 0, 1);
+            String id = getIdFromPath("/doc-modified" + i);
+            ids.add(id);
+            UpdateOp op = new UpdateOp(id, true);
+            NodeDocument.setModified(op, r);
+            store.create(NODES, of(op));
+        }
+
+        assertModified(0, 41, 0);
+        assertModified(0, 42, 0);
+        assertModified(0, 44, 0);
+        assertModified(0, 45, 3);
+        assertModified(0, 46, 3);
+        assertModified(0, 49, 3);
+        assertModified(0, 50, 5);
+        assertModified(0, 51, 5);
+        assertModified(39, 60, 5);
+        assertModified(40, 60, 5);
+        assertModified(41, 60, 5);
+        assertModified(42, 60, 5);
+        assertModified(44, 60, 5);
+        assertModified(45, 60, 2);
+        assertModified(47, 60, 2);
+        assertModified(48, 60, 2);
+        assertModified(49, 60, 2);
+        assertModified(50, 60, 0);
+        assertModified(51, 60, 0);
+    }
+
     @Test
     public void findOldest() {
         // see OAK-8476
         long secs = 123456;
         long offset = SECONDS.toMillis(secs);
         Revision r = new Revision(offset, 0, 1);
-        String id = Utils.getIdFromPath("/doc-del");
+        String id = getIdFromPath("/doc-del");
         ids.add(id);
         UpdateOp op = new UpdateOp(id, true);
         NodeDocument.setModified(op, r);
         NodeDocument.setDeleted(op, r, true);
-        store.create(Collection.NODES, Lists.newArrayList(op));
+        store.create(NODES, of(op));
 
-        long reportedsecs = gcSupport.getOldestDeletedOnceTimestamp(Clock.SIMPLE, 1) / SECONDS.toMillis(1);
-        assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedsecs), Math.abs(secs - reportedsecs) < 5);
+        long reportedSecs = gcSupport.getOldestDeletedOnceTimestamp(SIMPLE, 1) / SECONDS.toMillis(1);
+        assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedSecs), Math.abs(secs - reportedSecs) < 5);
+    }
+
+    @Test
+    public void findOldestModified() {
+        long secs = 1234567;
+        long offset = SECONDS.toMillis(secs);
+        Revision r = new Revision(offset, 0, 1);
+        String id = getIdFromPath("/doc-modified");
+        ids.add(id);
+        UpdateOp op = new UpdateOp(id, true);
+        NodeDocument.setModified(op, r);
+        store.create(NODES, of(op));
+
+        NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE);
+        String oldestModifiedDocId = oldestModifiedDoc.getId();
+        long reportedSecs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+        assertTrue("diff (s) should be < 5: " + Math.abs(secs - reportedSecs), Math.abs(secs - reportedSecs) < 5);
+        assertEquals(id, oldestModifiedDocId);
     }
 
     private void assertPossiblyDeleted(long fromSeconds, long toSeconds, long num) {
         Iterable<NodeDocument> docs = gcSupport.getPossiblyDeletedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds));
-        assertEquals(num, Iterables.size(docs));
+        assertEquals(num, stream(docs.spliterator(), false).count());
+    }
+
+    private void assertModified(long fromSeconds, long toSeconds, long num) {
+        Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE);
+        docs.forEach(d -> System.out.println(d.getModified() + " " + d.getId()));
+        assertEquals(num, stream(docs.spliterator(), false).count());
+        assertTrue(isInOrder(docs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
     }
 }
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index 4470a07f96..caa156a6d4 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -229,8 +229,8 @@ public class VersionGarbageCollectorIT {
     }
 
     @Test
-    public void testGCDeletedProps() throws Exception{
-        //1. Create nodes
+    public void testGCDeletedProps() throws Exception {
+        //1. Create nodes with properties
         NodeBuilder b1 = store.getRoot().builder();
 
         // Add property to node & save
@@ -289,6 +289,54 @@ public class VersionGarbageCollectorIT {
         stats = gc.gc(maxAge*2, HOURS);
         assertEquals(0, stats.deletedPropsGCCount);
     }
+
+    // Test when we have more than 1000 deleted properties
+    @Test
+    public void testGCDeletedProps_1() throws Exception {
+        //1. Create nodes with properties
+        NodeBuilder b1 = store.getRoot().builder();
+
+        // Add property to node & save
+        for (int i = 0; i < 5_000; i++) {
+            for (int j = 0; j < 10; j++) {
+                b1.child("z"+i).setProperty("prop"+j, "foo", STRING);
+            }
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        // enable the detailed gc flag
+        writeField(gc, "detailedGCEnabled", true, true);
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(10);
+        //1. Go past GC age and check no GC done as nothing deleted
+        clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+        VersionGCStats stats = gc.gc(maxAge, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+        //Remove property
+        NodeBuilder b2 = store.getRoot().builder();
+        for (int i = 0; i < 5_000; i++) {
+            for (int j = 0; j < 10; j++) {
+                b2.getChildNode("z"+i).removeProperty("prop"+j);
+            }
+        }
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        store.runBackgroundOperations();
+
+        //2. Check that a deleted property is not collected before maxAge
+        //Clock cannot move back (it moved forward in #1) so double the maxAge
+        clock.waitUntil(clock.getTime() + delta);
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(0, stats.deletedPropsGCCount);
+
+        //3. Check that deleted property does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+        stats = gc.gc(maxAge*2, HOURS);
+        assertEquals(50_000, stats.deletedPropsGCCount);
+
+    }
     
     private void gcSplitDocsInternal(String subNodeName) throws Exception {
         long maxAge = 1; //hrs


[jackrabbit-oak] 05/28: OAK-10199 : used bulk findAndModify api to perform garbage cleanup

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

daim pushed a commit to branch DetailedGC/OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit d2ed664f940de5e4de3863104577723847258421
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Tue May 30 19:38:02 2023 +0530

    OAK-10199 : used bulk findAndModify api to perform garbage cleanup
---
 .../oak/plugins/document/Configuration.java         |  8 ++++++++
 .../plugins/document/DocumentNodeStoreBuilder.java  | 21 +++++++++++++++++++++
 .../plugins/document/DocumentNodeStoreService.java  |  2 ++
 .../plugins/document/VersionGarbageCollector.java   |  5 +----
 .../DocumentNodeStoreServiceConfigurationTest.java  |  9 +++++++++
 5 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
index ae7aa143d2..52bf2ecc30 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/Configuration.java
@@ -290,4 +290,12 @@ import static org.apache.jackrabbit.oak.plugins.document.DocumentNodeStoreServic
                     "a change that is not yet visible. Default: " + DEFAULT_SUSPEND_TIMEOUT +
                     " (milliseconds).")
     long suspendTimeoutMillis() default DEFAULT_SUSPEND_TIMEOUT;
+
+    @AttributeDefinition(
+            name = "Document Node Store Detailed GC",
+            description = "Boolean value indicating whether Detailed GC should be enabled for " +
+                    "document node store or not. The Default value is " + DEFAULT_DETAILED_GC_ENABLED +
+                    ". Note that this value can be overridden via framework " +
+                    "property 'oak.documentstore.detailedGCEnabled'")
+    boolean detailedGCEnabled() default DEFAULT_DETAILED_GC_ENABLED;
 }
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
index aa3ab1ea81..d894aa27e4 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java
@@ -164,6 +164,7 @@ public class DocumentNodeStoreBuilder<T extends DocumentNodeStoreBuilder<T>> {
     private Predicate<Path> nodeCachePredicate = Predicates.alwaysTrue();
     private boolean clusterInvisible;
     private boolean throttlingEnabled;
+    private boolean detailedGCEnabled;
     private long suspendTimeoutMillis = DEFAULT_SUSPEND_TIMEOUT;
 
     /**
@@ -287,6 +288,16 @@ public class DocumentNodeStoreBuilder<T extends DocumentNodeStoreBuilder<T>> {
         return this.throttlingEnabled;
     }
 
+    public T setDetailedGCEnabled(boolean b) {
+        this.detailedGCEnabled = b;
+        return thisBuilder();
+    }
+
+    public boolean isDetailedGCEnabled() {
+        return this.detailedGCEnabled;
+    }
+
+
     public T setReadOnlyMode() {
         this.isReadOnlyMode = true;
         return thisBuilder();
@@ -316,6 +327,16 @@ public class DocumentNodeStoreBuilder<T extends DocumentNodeStoreBuilder<T>> {
         return docStoreThrottlingFeature;
     }
 
+    public T setDocStoreDetailedGCFeature(@Nullable Feature docStoreDetailedGC) {
+        this.docStoreDetailedGCFeature = docStoreDetailedGC;
+        return thisBuilder();
+    }
+
+    @Nullable
+    public Feature getDocStoreDetailedGCFeature() {
+        return docStoreDetailedGCFeature;
+    }
+
     public T setLeaseFailureHandler(LeaseFailureHandler leaseFailureHandler) {
         this.leaseFailureHandler = leaseFailureHandler;
         return thisBuilder();
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
index ad01cda3ca..16229db0e1 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
@@ -473,7 +473,9 @@ public class DocumentNodeStoreService {
                 setLeaseCheckMode(ClusterNodeInfo.DEFAULT_LEASE_CHECK_DISABLED ? LeaseCheckMode.DISABLED : LeaseCheckMode.valueOf(config.leaseCheckMode())).
                 setPrefetchFeature(prefetchFeature).
                 setDocStoreThrottlingFeature(docStoreThrottlingFeature).
+                setDocStoreDetailedGCFeature(docStoreDetailedGCFeature).
                 setThrottlingEnabled(config.throttlingEnabled()).
+                setDetailedGCEnabled(config.detailedGCEnabled()).
                 setSuspendTimeoutMillis(config.suspendTimeoutMillis()).
                 setLeaseFailureHandler(new LeaseFailureHandler() {
 
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 27ee36204a..3f9cb23f9a 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -635,8 +635,6 @@ public class VersionGarbageCollector {
                                     phases.stop(GCPhase.COLLECTING);
                                 }
 
-                                // TODO : remove this code, I don't think its possible to fetch these documents
-                                //  who doesn't have _modified field
                                 final Long modified = doc.getModified();
                                 if (modified == null) {
                                     monitor.warn("collectDetailGarbage : document has no _modified property : {}",
@@ -874,8 +872,7 @@ public class VersionGarbageCollector {
 
             timer.reset().start();
             try {
-                // TODO create an api to bulk update findAndUpdate Ops
-                updatedDocs = (int) updateOpList.stream().map(op -> ds.findAndUpdate(NODES, op)).filter(Objects::nonNull).count();
+                updatedDocs = (int) ds.findAndUpdate(NODES, updateOpList).stream().filter(Objects::nonNull).count();
                 stats.updatedDetailedGCDocsCount += updatedDocs;
                 log.info("Updated [{}] documents", updatedDocs);
                 // now reset delete metadata
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
index 88d9d27553..b3d5d69fb3 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreServiceConfigurationTest.java
@@ -85,6 +85,7 @@ public class DocumentNodeStoreServiceConfigurationTest {
         assertEquals(Arrays.asList("/"), Arrays.asList(config.persistentCacheIncludes()));
         assertEquals("STRICT", config.leaseCheckMode());
         assertEquals(DEFAULT_THROTTLING_ENABLED, config.throttlingEnabled());
+        assertEquals(DEFAULT_DETAILED_GC_ENABLED, config.detailedGCEnabled());
         assertEquals(CommitQueue.DEFAULT_SUSPEND_TIMEOUT, config.suspendTimeoutMillis());
     }
 
@@ -104,6 +105,14 @@ public class DocumentNodeStoreServiceConfigurationTest {
         assertEquals(throttleDocStore, config.throttlingEnabled());
     }
 
+    @Test
+    public void detailedGCEnabled() throws Exception {
+        boolean detailedGCDocStore = true;
+        addConfigurationEntry(preset, "detailedGCEnabled", detailedGCDocStore);
+        Configuration config = createConfiguration();
+        assertEquals(detailedGCDocStore, config.detailedGCEnabled());
+    }
+
     @Test
     public void presetSocketKeepAlive() throws Exception {
         boolean keepAlive = !DocumentNodeStoreService.DEFAULT_SO_KEEP_ALIVE;