You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by da...@apache.org on 2023/06/19 17:34:45 UTC
[jackrabbit-oak] branch OAK-10199 updated: OAK-10199 : added check to include oldestId when running detailedGc very first time
This is an automated email from the ASF dual-hosted git repository.
daim pushed a commit to branch OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/OAK-10199 by this push:
new dc7092527e OAK-10199 : added check to include oldestId when running detailedGc very first time
dc7092527e is described below
commit dc7092527edbdd565dca86da055fc45fd06ab2f2
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Mon Jun 19 23:04:33 2023 +0530
OAK-10199 : added check to include oldestId when running detailedGc very first time
---
.../oak/plugins/document/VersionGCSupport.java | 20 +++++---
.../plugins/document/VersionGarbageCollector.java | 5 +-
.../document/mongo/MongoVersionGCSupport.java | 18 ++++---
.../plugins/document/rdb/RDBVersionGCSupport.java | 22 +++++----
.../oak/plugins/document/VersionGCSupportTest.java | 39 ++++++++++++---
.../document/VersionGarbageCollectorIT.java | 57 ++++++++++++++++++++++
6 files changed, 128 insertions(+), 33 deletions(-)
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
index db54553061..96fa2bbaea 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupport.java
@@ -74,21 +74,25 @@ public class VersionGCSupport {
/**
* Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
- * within the given range .The two passed modified timestamps are in milliseconds
+ * within the given range and are greater than given @{@link NodeDocument#ID}.
+ * <p>
+ * The two passed modified timestamps are in milliseconds
* since the epoch and the implementation will convert them to seconds at
* the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
* then perform the comparison.
+ * <p/>
*
- * @param fromModified the lower bound modified timestamp (inclusive)
- * @param toModified the upper bound modified timestamp (exclusive)
- * @param limit the limit of documents to return
- * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
+ * @param fromModified the lower bound modified timestamp (inclusive)
+ * @param toModified the upper bound modified timestamp (exclusive)
+ * @param limit the limit of documents to return
+ * @param fromId the lower bound {@link NodeDocument#ID}
+ * @param includeFromId boolean indicating whether {@code fromId} is inclusive or not
* @return matching documents.
*/
public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
- @NotNull final String fromId) {
+ @NotNull final String fromId, boolean includeFromId) {
return StreamSupport
- .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, fromId).spliterator(), false)
+ .stream(getSelectedDocuments(store, MODIFIED_IN_SECS, 1, includeFromId ? "\0"+fromId : fromId).spliterator(), false)
.filter(input -> modifiedGreaterThanEquals(input, fromModified) && modifiedLessThan(input, toModified))
.sorted((o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2))
.limit(limit)
@@ -193,7 +197,7 @@ public class VersionGCSupport {
LOG.info("find oldest modified document");
try {
- docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE);
+ docs = getModifiedDocs(ts, now, 1, MIN_ID_VALUE, false);
if (docs.iterator().hasNext()) {
return docs.iterator().next();
}
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index f54299e3fd..0c7da0d4fa 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -615,6 +615,7 @@ public class VersionGarbageCollector {
throws IOException {
int docsTraversed = 0;
boolean foundDoc = true;
+ boolean includeFromId = true;
long oldestModifiedDocTimeStamp = rec.scopeDetailedGC.fromMs;
String oldestModifiedDocId = rec.detailedGCId;
try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) {
@@ -624,7 +625,9 @@ public class VersionGarbageCollector {
while (foundDoc && oldestModifiedDocTimeStamp < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) {
// set foundDoc to false to allow exiting the while loop
foundDoc = false;
- Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedDocTimeStamp, toModified, 1000, oldestModifiedDocId);
+ Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedDocTimeStamp, toModified, 1000, oldestModifiedDocId, includeFromId);
+ // set includeFromId to false for subsequent queries
+ includeFromId = false;
try {
for (NodeDocument doc : itr) {
foundDoc = true;
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index 9896857e36..ca9a8a955b 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -131,23 +131,27 @@ public class MongoVersionGCSupport extends VersionGCSupport {
/**
* Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
- * within the given range .The two passed modified timestamps are in milliseconds
+ * within the given range and are greater than given @{@link NodeDocument#ID}.
+ * <p>
+ * The two passed modified timestamps are in milliseconds
* since the epoch and the implementation will convert them to seconds at
* the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
* then perform the comparison.
+ * <p/>
*
- * @param fromModified the lower bound modified timestamp (inclusive)
- * @param toModified the upper bound modified timestamp (exclusive)
- * @param limit the limit of documents to return
- * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
+ * @param fromModified the lower bound modified timestamp (inclusive)
+ * @param toModified the upper bound modified timestamp (exclusive)
+ * @param limit the limit of documents to return
+ * @param fromId the lower bound {@link NodeDocument#ID}
+ * @param includeFromId boolean indicating whether {@code fromId} is inclusive or not
* @return matching documents.
*/
@Override
public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
- @NotNull final String fromId) {
+ @NotNull final String fromId, boolean includeFromId) {
// _modified >= fromModified && _modified < toModified && _id > fromId
final Bson query = and(gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
- lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), gt(ID, fromId));
+ lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified)), includeFromId ? gte(ID, fromId) :gt(ID, fromId));
// first sort by _modified and then by _id
final Bson sort = and(eq(MODIFIED_IN_SECS, 1), eq(ID, 1));
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index 7006c18683..efce4b8006 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -96,27 +96,31 @@ public class RDBVersionGCSupport extends VersionGCSupport {
/**
* Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
- * within the given range .The two passed modified timestamps are in milliseconds
+ * within the given range and are greater than given @{@link NodeDocument#ID}.
+ * <p>
+ * The two passed modified timestamps are in milliseconds
* since the epoch and the implementation will convert them to seconds at
* the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
* then perform the comparison.
+ * <p/>
*
- * @param fromModified the lower bound modified timestamp (inclusive)
- * @param toModified the upper bound modified timestamp (exclusive)
- * @param limit the limit of documents to return
- * @param fromId the lower bound {@link NodeDocument#ID} (exclusive)
+ * @param fromModified the lower bound modified timestamp (inclusive)
+ * @param toModified the upper bound modified timestamp (exclusive)
+ * @param limit the limit of documents to return
+ * @param fromId the lower bound {@link NodeDocument#ID}
+ * @param includeFromId boolean indicating whether {@code fromId} is inclusive or not
* @return matching documents.
*/
@Override
public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
- @NotNull final String fromId) {
+ @NotNull final String fromId, boolean includeFromId) {
List<QueryCondition> conditions = of(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)),
new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)),
- new QueryCondition(ID, ">", of(fromId)));
+ new QueryCondition(ID, includeFromId ? ">=" : ">", of(fromId)));
if (MODE == 1) {
return getIterator(EMPTY_KEY_PATTERN, conditions);
} else {
- return store.queryAsIterable(NODES, fromId, null, EMPTY_KEY_PATTERN, conditions, limit, of(MODIFIED_IN_SECS, ID));
+ return store.queryAsIterable(NODES, null, null, EMPTY_KEY_PATTERN, conditions, limit, of(MODIFIED_IN_SECS, ID));
}
}
@@ -287,7 +291,7 @@ public class RDBVersionGCSupport extends VersionGCSupport {
LOG.info("getOldestModifiedDoc() <- start");
Iterable<NodeDocument> modifiedDocs = null;
try {
- modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE);
+ modifiedDocs = getModifiedDocs(0L, clock.getTime(), 1, MIN_ID_VALUE, false);
doc = modifiedDocs.iterator().hasNext() ? modifiedDocs.iterator().next() : NULL;
} catch (DocumentStoreException ex) {
LOG.error("getOldestModifiedDoc()", ex);
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
index 0061771383..cff9511a66 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCSupportTest.java
@@ -33,6 +33,7 @@ import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
+import static java.lang.Long.MAX_VALUE;
import static java.util.Comparator.comparing;
import static java.util.List.of;
import static java.util.Optional.ofNullable;
@@ -206,8 +207,8 @@ public class VersionGCSupportTest {
public void findModifiedDocsWhenModifiedIsDifferent() {
long secs = 42;
long offset = SECONDS.toMillis(secs);
- List<UpdateOp> updateOps = new ArrayList<>(5_001);
- for (int i = 0; i < 5_001; i++) {
+ List<UpdateOp> updateOps = new ArrayList<>(5_000);
+ for (int i = 0; i < 5_000; i++) {
Revision r = new Revision(offset + (i * 5), 0, 1);
String id = getIdFromPath("/x" + i);
ids.add(id);
@@ -223,9 +224,10 @@ public class VersionGCSupportTest {
long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
assertEquals(40L, oldestModifiedDocTs);
assertEquals("1:/x0", oldestModifiedDocId);
+ boolean includeFromId = true;
for(int i = 0; i < 5; i++) {
- Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), Long.MAX_VALUE, 1000, oldestModifiedDocId);
+ Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, includeFromId);
assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
long count = stream(modifiedDocs.spliterator(), false).count();
assertEquals(1000, count);
@@ -234,6 +236,7 @@ public class VersionGCSupportTest {
}
oldestModifiedDocId = oldestModifiedDoc.getId();
oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+ includeFromId = false;
}
}
@@ -249,7 +252,7 @@ public class VersionGCSupportTest {
setModified(op, r);
updateOps.add(op);
}
- // create 5_000 nodes
+ // create 5_001 nodes
store.create(NODES, updateOps);
NodeDocument oldestModifiedDoc = gcSupport.getOldestModifiedDoc(SIMPLE);
@@ -257,9 +260,10 @@ public class VersionGCSupportTest {
long oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
assertEquals(40L, oldestModifiedDocTs);
assertEquals("1:/x0", oldestModifiedDocId);
+ boolean includeFromId = true;
for(int i = 0; i < 5; i++) {
- Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), Long.MAX_VALUE, 1000, oldestModifiedDocId);
+ Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, includeFromId);
assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
long count = stream(modifiedDocs.spliterator(), false).count();
assertEquals(1000, count);
@@ -268,7 +272,21 @@ public class VersionGCSupportTest {
}
oldestModifiedDocId = oldestModifiedDoc.getId();
oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+ includeFromId = false;
}
+
+ // fetch last remaining document now
+ Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, false);
+ assertEquals(1, stream(modifiedDocs.spliterator(), false).count());
+ assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
+ oldestModifiedDoc = modifiedDocs.iterator().next();
+ oldestModifiedDocId = oldestModifiedDoc.getId();
+ oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+
+ // all documents had been fetched, now we won't get any document
+ modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, false);
+ assertEquals(0, stream(modifiedDocs.spliterator(), false).count());
+
}
@Test
@@ -291,10 +309,10 @@ public class VersionGCSupportTest {
}
// create 5_000 nodes
store.create(NODES, updateOps);
-
+ boolean includeFromId = true;
for(int i = 0; i < 5; i++) {
- Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), Long.MAX_VALUE, 1000, oldestModifiedDocId);
+ Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, includeFromId);
assertTrue(isInOrder(modifiedDocs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
long count = stream(modifiedDocs.spliterator(), false).count();
assertEquals(1000, count);
@@ -303,7 +321,12 @@ public class VersionGCSupportTest {
}
oldestModifiedDocId = oldestModifiedDoc.getId();
oldestModifiedDocTs = ofNullable(oldestModifiedDoc.getModified()).orElse(0L);
+ includeFromId = false;
}
+
+ // all documents had been fetched, now we won't get any document
+ Iterable<NodeDocument> modifiedDocs = gcSupport.getModifiedDocs(SECONDS.toMillis(oldestModifiedDocTs), MAX_VALUE, 1000, oldestModifiedDocId, false);
+ assertEquals(0, stream(modifiedDocs.spliterator(), false).count());
}
private void assertPossiblyDeleted(long fromSeconds, long toSeconds, long num) {
@@ -312,7 +335,7 @@ public class VersionGCSupportTest {
}
private void assertModified(long fromSeconds, long toSeconds, long num) {
- Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE);
+ Iterable<NodeDocument> docs = gcSupport.getModifiedDocs(SECONDS.toMillis(fromSeconds), SECONDS.toMillis(toSeconds), 10, MIN_ID_VALUE, false);
assertEquals(num, stream(docs.spliterator(), false).count());
assertTrue(isInOrder(docs, (o1, o2) -> comparing(NodeDocument::getModified).thenComparing(Document::getId).compare(o1, o2)));
}
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index caa156a6d4..f6e8554252 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -34,6 +34,7 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
+import static java.util.concurrent.TimeUnit.SECONDS;
import static org.apache.commons.lang3.reflect.FieldUtils.writeField;
import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
import static org.apache.jackrabbit.guava.common.collect.Iterables.size;
@@ -83,6 +84,7 @@ import org.apache.jackrabbit.oak.stats.Clock;
import org.jetbrains.annotations.NotNull;
import org.junit.After;
import org.junit.Before;
+import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@@ -337,6 +339,61 @@ public class VersionGarbageCollectorIT {
assertEquals(50_000, stats.deletedPropsGCCount);
}
+
+ // Test when we have more than 1000 deleted properties with different revisions
+ @Test
+ @Ignore
+ public void testGCDeletedProps_2() throws Exception {
+ //1. Create nodes with properties
+ NodeBuilder b1 = null;
+ for (int k = 0; k < 50; k ++) {
+ b1 = store.getRoot().builder();
+ // Add property to node & save
+ for (int i = 0; i < 100; i++) {
+ for (int j = 0; j < 10; j++) {
+ b1.child(k + "z" + i).setProperty("prop" + j, "foo", STRING);
+ }
+ }
+ store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+ // increase the clock to create new revision for next batch
+ clock.waitUntil(Revision.getCurrentTimestamp() + SECONDS.toMillis(k * 5));
+ }
+
+ // enable the detailed gc flag
+ writeField(gc, "detailedGCEnabled", true, true);
+ long maxAge = 1; //hours
+ long delta = TimeUnit.MINUTES.toMillis(10);
+ //1. Go past GC age and check no GC done as nothing deleted
+ clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+ VersionGCStats stats = gc.gc(maxAge, HOURS);
+ assertEquals(0, stats.deletedPropsGCCount);
+
+ //Remove property
+ NodeBuilder b2 = store.getRoot().builder();
+ for (int k = 0; k < 50; k ++) {
+ for (int i = 0; i < 100; i++) {
+ for (int j = 0; j < 10; j++) {
+ b2.getChildNode(k + "z" + i).removeProperty("prop" + j);
+ }
+ }
+ }
+ store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ store.runBackgroundOperations();
+
+ //2. Check that a deleted property is not collected before maxAge
+ //Clock cannot move back (it moved forward in #1) so double the maxAge
+ clock.waitUntil(clock.getTime() + delta);
+ stats = gc.gc(maxAge*2, HOURS);
+ assertEquals(0, stats.deletedPropsGCCount);
+
+ //3. Check that deleted property does get collected post maxAge
+ clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+ stats = gc.gc(maxAge*2, HOURS);
+ assertEquals(50_000, stats.deletedPropsGCCount);
+
+ }
private void gcSplitDocsInternal(String subNodeName) throws Exception {
long maxAge = 1; //hrs