You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by da...@apache.org on 2023/04/26 16:14:59 UTC
[jackrabbit-oak] 01/01: OAK-10199 : override getModifiedDocs() for RDB and added unit cases for deletedProps
This is an automated email from the ASF dual-hosted git repository.
daim pushed a commit to branch OAK-10199
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 52c90cc3beeeb86b6cf8a7edae89ab2c7cac1777
Author: Rishabh Kumar <di...@adobe.com>
AuthorDate: Wed Apr 26 21:16:44 2023 +0530
OAK-10199 : override getModifiedDocs() for RDB and added unit cases for deletedProps
---
.../oak/plugins/document/NodeDocument.java | 12 +-
.../plugins/document/VersionGCRecommendations.java | 20 +-
.../plugins/document/VersionGarbageCollector.java | 381 ++++++++++++---------
.../document/mongo/MongoVersionGCSupport.java | 16 +-
.../plugins/document/rdb/RDBDocumentStoreJDBC.java | 1 +
.../plugins/document/rdb/RDBVersionGCSupport.java | 27 ++
.../oak/plugins/document/DetailGCHelper.java | 22 +-
.../oak/plugins/document/NodeDocumentTest.java | 32 ++
.../oak/plugins/document/VersionGCInitTest.java | 17 +
.../oak/plugins/document/VersionGCStatsTest.java | 15 +
.../oak/plugins/document/VersionGCTest.java | 13 +-
.../document/VersionGarbageCollectorIT.java | 64 ++++
12 files changed, 412 insertions(+), 208 deletions(-)
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
index 71abba0a2e..66a1bc2eae 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/NodeDocument.java
@@ -32,7 +32,6 @@ import java.util.SortedSet;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
-import java.util.stream.Collectors;
import org.apache.jackrabbit.guava.common.base.Function;
import org.apache.jackrabbit.guava.common.base.Predicate;
@@ -59,6 +58,7 @@ import org.apache.jackrabbit.guava.common.collect.Iterables;
import org.apache.jackrabbit.guava.common.collect.Maps;
import org.apache.jackrabbit.guava.common.collect.Sets;
+import static java.util.stream.Collectors.toSet;
import static org.apache.jackrabbit.guava.common.base.Objects.equal;
import static org.apache.jackrabbit.guava.common.base.Preconditions.checkArgument;
import static org.apache.jackrabbit.guava.common.base.Preconditions.checkNotNull;
@@ -67,7 +67,6 @@ import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
import static org.apache.jackrabbit.guava.common.collect.Iterables.mergeSorted;
import static org.apache.jackrabbit.guava.common.collect.Iterables.transform;
import static java.util.Objects.requireNonNull;
-import static java.util.stream.Collectors.toMap;
import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
import static org.apache.jackrabbit.oak.plugins.document.StableRevisionComparator.REVERSE;
import static org.apache.jackrabbit.oak.plugins.document.UpdateOp.Key;
@@ -1672,17 +1671,16 @@ public final class NodeDocument extends Document {
}
/**
- * Returns all the properties on this document
- * @return Map of all properties along with their values
+ * Returns name of all the properties on this document
+ * @return Set of all property names
*/
@NotNull
- Map<String, SortedMap<Revision, String>> getProperties() {
+ Set<String> getPropertyNames() {
return data
.keySet()
.stream()
.filter(Utils::isPropertyName)
- .map(o -> Map.entry(o, getLocalMap(o)))
- .collect(toMap(Entry::getKey, Entry::getValue));
+ .collect(toSet());
}
/**
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
index d8b091261d..d830a34653 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGCRecommendations.java
@@ -32,7 +32,7 @@ import org.slf4j.LoggerFactory;
import org.apache.jackrabbit.guava.common.collect.Maps;
-import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP;
/**
@@ -51,7 +51,7 @@ public class VersionGCRecommendations {
final long maxCollect;
final long deleteCandidateCount;
final long lastOldestTimestamp;
- final long fullDetailGCTimestamp;
+ final long detailedGCTimestamp;
final long originalCollectLimit;
private final long precisionMs;
@@ -108,17 +108,17 @@ public class VersionGCRecommendations {
TimeInterval scope = new TimeInterval(oldestPossible, Long.MAX_VALUE);
scope = scope.notLaterThan(keep.fromMs);
- fullDetailGCTimestamp = settings.get(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP);
- if (fullDetailGCTimestamp == 0) {
+ detailedGCTimestamp = settings.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP);
+ if (detailedGCTimestamp == 0) {
if (log.isDebugEnabled()) {
- log.debug("No fullDetailGCTimestamp found, querying for the oldest deletedOnce candidate");
+ log.debug("No detailedGCTimestamp found, querying for the oldest deletedOnce candidate");
}
oldestPossibleFullGC = vgc.getOldestModifiedTimestamp(clock) - 1;
if (log.isDebugEnabled()) {
- log.debug("fullDetailGCTimestamp found: {}", Utils.timestampToString(oldestPossibleFullGC));
+ log.debug("detailedGCTimestamp found: {}", Utils.timestampToString(oldestPossibleFullGC));
}
} else {
- oldestPossibleFullGC = fullDetailGCTimestamp - 1;
+ oldestPossibleFullGC = detailedGCTimestamp - 1;
}
TimeInterval scopeFullGC = new TimeInterval(oldestPossibleFullGC, Long.MAX_VALUE);
@@ -206,10 +206,8 @@ public class VersionGCRecommendations {
stats.needRepeat = true;
} else if (!stats.canceled && !stats.ignoredGCDueToCheckPoint) {
// success, we would not expect to encounter revisions older than this in the future
-// setLongSetting(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs);
-// setLongSetting(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, stats.oldestModifiedGced);
setLongSetting(ImmutableMap.of(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, scope.toMs,
- SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, stats.oldestModifiedGced));
+ SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, stats.oldestModifiedGced));
int count = stats.deletedDocGCCount - stats.deletedLeafDocGCCount;
double usedFraction;
@@ -244,7 +242,7 @@ public class VersionGCRecommendations {
// default values
settings.put(SETTINGS_COLLECTION_OLDEST_TIMESTAMP_PROP, 0L);
settings.put(VersionGarbageCollector.SETTINGS_COLLECTION_REC_INTERVAL_PROP, 0L);
- settings.put(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, 0L);
+ settings.put(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP, 0L);
if (versionGCDoc != null) {
for (String k : versionGCDoc.keySet()) {
Object value = versionGCDoc.get(k);
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
index 608ba02398..defd4c1bb2 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
@@ -21,16 +21,18 @@ package org.apache.jackrabbit.oak.plugins.document;
import java.io.Closeable;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
import java.util.Set;
-import java.util.SortedMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
import org.apache.jackrabbit.guava.common.base.Function;
import org.apache.jackrabbit.guava.common.base.Joiner;
@@ -55,7 +57,11 @@ import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static java.lang.Math.round;
+import static java.util.Collections.emptySet;
import static java.util.Objects.requireNonNull;
+import static java.util.Optional.ofNullable;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static org.apache.jackrabbit.guava.common.base.StandardSystemProperty.LINE_SEPARATOR;
import static org.apache.jackrabbit.guava.common.collect.Iterables.all;
import static org.apache.jackrabbit.guava.common.collect.Iterators.partition;
@@ -67,14 +73,10 @@ import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_I
import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_NO_BRANCH;
-import static org.apache.jackrabbit.oak.plugins.document.StableRevisionComparator.REVERSE;
import static org.slf4j.helpers.MessageFormatter.arrayFormat;
public class VersionGarbageCollector {
- /** TODO temporary global flag to enable 'detail gc' during prototyping. Should eventually become eg a system property */
- public static boolean DETAIL_GC_ENABLED = false;
-
//Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid re-partitioning
private static final int DELETE_BATCH_SIZE = 450;
private static final int UPDATE_BATCH_SIZE = 450;
@@ -105,15 +107,9 @@ public class VersionGarbageCollector {
static final String SETTINGS_COLLECTION_REC_INTERVAL_PROP = "recommendedIntervalMs";
/**
- * Property name to timestamp when last full-detail-GC run happened, or -1 if not applicable/in-use.
- * <p>
- * <ul>
- * <li>-1 : full repo scan is disabled</li>
- * <li>0 : full repo scan is enabled and bound to start from zero == oldest _modified </li>
- * <li>gt 0 : full repo scan is enabled, was already done up until this value</li>
- * </ul>
+ * Property name to timestamp till when last detailed-GC run happened
*/
- static final String SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP = "fullDetailGCTimeStamp";
+ static final String SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP = "detailedGCTimeStamp";
private final DocumentNodeStore nodeStore;
private final DocumentStore ds;
@@ -170,7 +166,7 @@ public class VersionGarbageCollector {
break;
}
averageDurationMs = ((averageDurationMs * (overall.iterationCount - 1))
- + stats.active.elapsed(TimeUnit.MILLISECONDS)) / overall.iterationCount;
+ + stats.active.elapsed(MILLISECONDS)) / overall.iterationCount;
}
success = true;
return overall;
@@ -274,20 +270,27 @@ public class VersionGarbageCollector {
int deletedDocGCCount;
int deletedLeafDocGCCount;
int splitDocGCCount;
+ int updatedDetailedGCDocsCount;
+ int deletedPropsGCCount;
int intermediateSplitDocGCCount;
int updateResurrectedGCCount;
final TimeDurationFormatter df = TimeDurationFormatter.forLogging();
final Stopwatch active = Stopwatch.createUnstarted();
final Stopwatch collectDeletedDocs = Stopwatch.createUnstarted();
+ final Stopwatch collectDeletedProps = Stopwatch.createUnstarted();
+ final Stopwatch collectDeletedOldRevs = Stopwatch.createUnstarted();
+ final Stopwatch collectUnmergedBC = Stopwatch.createUnstarted();
final Stopwatch checkDeletedDocs = Stopwatch.createUnstarted();
- final Stopwatch detailedGcDocs = Stopwatch.createUnstarted();
+ final Stopwatch detailedGCDocs = Stopwatch.createUnstarted();
+ final Stopwatch deleteDetailedGCDocs = Stopwatch.createUnstarted();
final Stopwatch deleteDeletedDocs = Stopwatch.createUnstarted();
final Stopwatch collectAndDeleteSplitDocs = Stopwatch.createUnstarted();
final Stopwatch deleteSplitDocs = Stopwatch.createUnstarted();
final Stopwatch sortDocIds = Stopwatch.createUnstarted();
final Stopwatch updateResurrectedDocuments = Stopwatch.createUnstarted();
long activeElapsed, collectDeletedDocsElapsed, checkDeletedDocsElapsed, deleteDeletedDocsElapsed, collectAndDeleteSplitDocsElapsed,
- deleteSplitDocsElapsed, sortDocIdsElapsed, updateResurrectedDocumentsElapsed, detailedGcDocsElapsed;
+ deleteSplitDocsElapsed, sortDocIdsElapsed, updateResurrectedDocumentsElapsed, detailedGCDocsElapsed, collectDeletedPropsElapsed,
+ deleteDetailedGCDocsElapsed, collectDeletedOldRevsElapsed, collectUnmergedBCElapsed;
@Override
public String toString() {
@@ -306,6 +309,11 @@ public class VersionGarbageCollector {
df.format(updateResurrectedDocumentsElapsed, MICROSECONDS),
df.format(deleteDeletedDocsElapsed, MICROSECONDS),
df.format(collectAndDeleteSplitDocsElapsed, MICROSECONDS),
+ df.format(detailedGCDocsElapsed, MICROSECONDS),
+ df.format(deleteDetailedGCDocsElapsed, MICROSECONDS),
+ df.format(collectDeletedPropsElapsed, MICROSECONDS),
+ df.format(collectDeletedOldRevsElapsed, MICROSECONDS),
+ df.format(collectUnmergedBCElapsed, MICROSECONDS),
timeDeletingSplitDocs);
} else {
String timeDeletingSplitDocs = "";
@@ -319,17 +327,24 @@ public class VersionGarbageCollector {
df.format(updateResurrectedDocuments.elapsed(MICROSECONDS), MICROSECONDS),
df.format(deleteDeletedDocs.elapsed(MICROSECONDS), MICROSECONDS),
df.format(collectAndDeleteSplitDocs.elapsed(MICROSECONDS), MICROSECONDS),
+ df.format(detailedGCDocs.elapsed(MICROSECONDS), MICROSECONDS),
+ df.format(deleteDetailedGCDocs.elapsed(MICROSECONDS), MICROSECONDS),
+ df.format(collectDeletedProps.elapsed(MICROSECONDS), MICROSECONDS),
+ df.format(collectDeletedOldRevs.elapsed(MICROSECONDS), MICROSECONDS),
+ df.format(collectUnmergedBC.elapsed(MICROSECONDS), MICROSECONDS),
timeDeletingSplitDocs);
}
return "VersionGCStats{" +
"ignoredGCDueToCheckPoint=" + ignoredGCDueToCheckPoint +
- ", oldestModifiedGced=" + oldestModifiedGced +
", canceled=" + canceled +
", deletedDocGCCount=" + deletedDocGCCount + " (of which leaf: " + deletedLeafDocGCCount + ")" +
", updateResurrectedGCCount=" + updateResurrectedGCCount +
", splitDocGCCount=" + splitDocGCCount +
", intermediateSplitDocGCCount=" + intermediateSplitDocGCCount +
+ ", oldestModifiedGced=" + oldestModifiedGced +
+ ", updatedDetailedGCDocsCount=" + updatedDetailedGCDocsCount +
+ ", deletedPropsGCCount=" + deletedPropsGCCount +
", iterationCount=" + iterationCount +
", timeActive=" + df.format(activeElapsed, MICROSECONDS) +
", " + timings + "}";
@@ -338,7 +353,6 @@ public class VersionGarbageCollector {
void addRun(VersionGCStats run) {
++iterationCount;
this.ignoredGCDueToCheckPoint = run.ignoredGCDueToCheckPoint;
- this.oldestModifiedGced = run.oldestModifiedGced;
this.canceled = run.canceled;
this.success = run.success;
this.limitExceeded = run.limitExceeded;
@@ -348,17 +362,24 @@ public class VersionGarbageCollector {
this.splitDocGCCount += run.splitDocGCCount;
this.intermediateSplitDocGCCount += run.intermediateSplitDocGCCount;
this.updateResurrectedGCCount += run.updateResurrectedGCCount;
+ this.oldestModifiedGced = run.oldestModifiedGced;
+ this.updatedDetailedGCDocsCount += run.updatedDetailedGCDocsCount;
+ this.deletedPropsGCCount += run.deletedPropsGCCount;
if (run.iterationCount > 0) {
// run is cumulative with times in elapsed fields
this.activeElapsed += run.activeElapsed;
- this.collectDeletedDocsElapsed += run.collectDeletedDocsElapsed;
+ this.collectUnmergedBCElapsed += run.collectUnmergedBCElapsed;
this.checkDeletedDocsElapsed += run.checkDeletedDocsElapsed;
this.deleteDeletedDocsElapsed += run.deleteDeletedDocsElapsed;
this.collectAndDeleteSplitDocsElapsed += run.collectAndDeleteSplitDocsElapsed;
this.deleteSplitDocsElapsed += run.deleteSplitDocsElapsed;
this.sortDocIdsElapsed += run.sortDocIdsElapsed;
this.updateResurrectedDocumentsElapsed += run.updateResurrectedDocumentsElapsed;
- this.detailedGcDocsElapsed += run.detailedGcDocsElapsed;
+ this.detailedGCDocsElapsed += run.detailedGCDocsElapsed;
+ this.deleteDetailedGCDocsElapsed += run.deleteDetailedGCDocsElapsed;
+ this.collectDeletedDocsElapsed += run.collectDeletedDocsElapsed;
+ this.collectDeletedPropsElapsed += run.collectDeletedPropsElapsed;
+ this.collectDeletedOldRevsElapsed += run.collectDeletedOldRevsElapsed;
} else {
// single run -> read from stop watches
this.activeElapsed += run.active.elapsed(MICROSECONDS);
@@ -369,7 +390,11 @@ public class VersionGarbageCollector {
this.deleteSplitDocsElapsed += run.deleteSplitDocs.elapsed(MICROSECONDS);
this.sortDocIdsElapsed += run.sortDocIds.elapsed(MICROSECONDS);
this.updateResurrectedDocumentsElapsed += run.updateResurrectedDocuments.elapsed(MICROSECONDS);
- this.detailedGcDocsElapsed += run.detailedGcDocs.elapsed(MICROSECONDS);
+ this.detailedGCDocsElapsed += run.detailedGCDocs.elapsed(MICROSECONDS);
+ this.deleteDetailedGCDocsElapsed += run.deleteDetailedGCDocs.elapsed(MICROSECONDS);
+ this.collectDeletedPropsElapsed += run.collectDeletedProps.elapsed(MICROSECONDS);
+ this.collectDeletedOldRevsElapsed += run.collectDeletedOldRevs.elapsed(MICROSECONDS);
+ this.collectUnmergedBCElapsed += run.collectUnmergedBC.elapsed(MICROSECONDS);
}
}
}
@@ -378,10 +403,14 @@ public class VersionGarbageCollector {
NONE,
COLLECTING,
CHECKING,
- DETAILED_GC,
DELETING,
SORTING,
SPLITS_CLEANUP,
+ DETAILED_GC,
+ COLLECT_PROPS,
+ COLLECT_OLD_REVS,
+ COLLECT_UNMERGED_BC,
+ DETAILED_GC_CLEANUP,
UPDATING
}
@@ -406,11 +435,15 @@ public class VersionGarbageCollector {
this.watches.put(GCPhase.NONE, Stopwatch.createStarted());
this.watches.put(GCPhase.COLLECTING, stats.collectDeletedDocs);
this.watches.put(GCPhase.CHECKING, stats.checkDeletedDocs);
- this.watches.put(GCPhase.DETAILED_GC, stats.detailedGcDocs);
this.watches.put(GCPhase.DELETING, stats.deleteDeletedDocs);
this.watches.put(GCPhase.SORTING, stats.sortDocIds);
this.watches.put(GCPhase.SPLITS_CLEANUP, stats.collectAndDeleteSplitDocs);
this.watches.put(GCPhase.UPDATING, stats.updateResurrectedDocuments);
+ this.watches.put(GCPhase.DETAILED_GC, stats.detailedGCDocs);
+ this.watches.put(GCPhase.COLLECT_PROPS, stats.collectDeletedProps);
+ this.watches.put(GCPhase.COLLECT_OLD_REVS, stats.collectDeletedOldRevs);
+ this.watches.put(GCPhase.COLLECT_UNMERGED_BC, stats.collectUnmergedBC);
+ this.watches.put(GCPhase.DETAILED_GC_CLEANUP, stats.deleteDetailedGCDocs);
this.canceled = canceled;
}
@@ -534,7 +567,7 @@ public class VersionGarbageCollector {
collectDeletedDocuments(phases, headRevision, rec);
collectSplitDocuments(phases, sweepRevisions, rec);
if (detailedGCEnabled) {
- // run only if enabled
+ // run only if detailed GC enabled
collectDetailedGarbage(phases, headRevision, rec);
}
}
@@ -568,102 +601,74 @@ public class VersionGarbageCollector {
* it is okay that it takes a considerable amount of time.
*
* @param phases {@link GCPhases}
- * @param headRevision the current head revision of
- * @throws IOException
- * @throws LimitExceededException
+ * @param headRevision the current head revision of node store
*/
private void collectDetailedGarbage(final GCPhases phases, final RevisionVector headRevision, final VersionGCRecommendations rec)
throws IOException, LimitExceededException {
int docsTraversed = 0;
- long oldestModifiedGced = rec.scopeFullGC.fromMs;
+ boolean foundDoc = true;
+ long oldestModifiedGCed = rec.scopeFullGC.fromMs;
try (DetailedGC gc = new DetailedGC(headRevision, monitor, cancel)) {
final long fromModified = rec.scopeFullGC.fromMs;
final long toModified = rec.scopeFullGC.toMs;
-// if (rec.fullDetailGCTimestamp == -1) {
-// // then full detail-gc is disabled or over - use regular scope then
-// fromModified = rec.scope.fromMs;
-// toModified = rec.scope.toMs;
-// } else {
-// // then full detail-gc is enabled - use it then
-// fromModified = rec.fullDetailGCTimestamp; // TODO: once we're passed rec.scope.fromMs we should
-// // disable fullgc
-// toModified = rec.scope.toMs; // the 'to' here is the max. it will process only eg 1 batch
-// }
- // TODO : remove me
- boolean foundAnything = false; // I think this flag is redundant
- if (phases.start(GCPhase.COLLECTING)) {
- Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedGced, toModified, 2000);
- final Stopwatch timer = Stopwatch.createUnstarted();
- timer.reset().start();
- try {
- for (NodeDocument doc : itr) {
- // continue with GC?
- if (cancel.get()) {
- break;
- }
- if (phases.start(GCPhase.DETAILED_GC)) {
- gc.detailedGC(doc, phases);
- phases.stop(GCPhase.DETAILED_GC);
- }
+ if (phases.start(GCPhase.DETAILED_GC)) {
+ while (foundDoc && oldestModifiedGCed < toModified && docsTraversed <= PROGRESS_BATCH_SIZE) {
+ // set foundDoc to false to allow exiting the while loop
+ foundDoc = false;
+ Iterable<NodeDocument> itr = versionStore.getModifiedDocs(oldestModifiedGCed, toModified, 1000);
+ try {
+ for (NodeDocument doc : itr) {
+ foundDoc = true;
+ // continue with GC?
+ if (cancel.get()) {
+ break;
+ }
+ docsTraversed++;
+ if (docsTraversed % PROGRESS_BATCH_SIZE == 0) {
+ monitor.info("Iterated through {} documents so far. {} had detail garbage",
+ docsTraversed, gc.getGarbageDocsCount());
+ }
- // TODO : remove this code, I don't think its possible to fetch these documents
- // who doesn't have _modified field
- final Long modified = doc.getModified();
- if (modified == null) {
- monitor.warn("collectDetailGarbage : document has no _modified property : {}",
- doc.getId());
- } else if (modified < oldestModifiedGced) {
- monitor.warn(
- "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
- modified, fromModified, toModified);
- } else {
- oldestModifiedGced = modified;
- }
- foundAnything = true;
- docsTraversed++;
- if (docsTraversed % PROGRESS_BATCH_SIZE == 0) {
- monitor.info("Iterated through {} documents so far. {} had detail garbage",
- docsTraversed, gc.getNumDocuments());
- }
- // this would never hit, since we are only fetching the oldest 2000 element in batches of 1000
- // TODO: remove this if above mentioned logic is fine
- if (rec.maxCollect > 0 && gc.getNumDocuments() > rec.maxCollect) {
- // TODO: how would we recover from this?
- // If we don't want above solution, then one of the another solution is to use lower time duration
- // as done in document deletion process or use lower limit value or
- // we should perform all the update ops in 1 go
- throw new LimitExceededException();
+ // collect the data to delete in next step
+ if (phases.start(GCPhase.COLLECTING)) {
+ gc.collectGarbage(doc, phases);
+ phases.stop(GCPhase.COLLECTING);
+ }
+
+ // TODO : remove this code, I don't think its possible to fetch these documents
+ // who doesn't have _modified field
+ final Long modified = doc.getModified();
+ if (modified == null) {
+ monitor.warn("collectDetailGarbage : document has no _modified property : {}",
+ doc.getId());
+ } else if (modified < oldestModifiedGCed) {
+ monitor.warn(
+ "collectDetailGarbage : document has older _modified than query boundary : {} (from: {}, to: {})",
+ modified, fromModified, toModified);
+ } else {
+ oldestModifiedGCed = modified;
+ }
+
+ if (gc.hasGarbage()) {
+ if (phases.start(GCPhase.DETAILED_GC_CLEANUP)) {
+ gc.removeGarbage(phases.stats);
+ phases.stop(GCPhase.DETAILED_GC_CLEANUP);
+ }
+ }
+
+ oldestModifiedGCed = modified == null ? fromModified : modified;
}
- oldestModifiedGced = modified == null ? fromModified : modified;
+ } finally {
+ Utils.closeIfCloseable(itr);
+ phases.stats.oldestModifiedGced = oldestModifiedGCed;
}
- } finally {
- Utils.closeIfCloseable(itr);
- // why do we need to stop this here, we are already stopping the original gc run.
- // can this be removed
- delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS));
- phases.stats.oldestModifiedGced = oldestModifiedGced;
}
- phases.stop(GCPhase.COLLECTING);
-// if (!cancel.get() && foundAnything) {
-// // TODO: move to evaluate()
-// rec.setLongSetting(SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, oldestModifiedGced + 1);
-// }
+ phases.stop(GCPhase.DETAILED_GC);
}
}
}
- private void delayOnModifications(long durationMs) {
- long delayMs = Math.round(durationMs * options.delayFactor);
- if (!cancel.get() && delayMs > 0) {
- try {
- Clock clock = nodeStore.getClock();
- clock.waitUntil(clock.getTime() + delayMs);
- }
- catch (InterruptedException ex) {
- /* ignore */
- }
- }
- }
+
private void collectSplitDocuments(GCPhases phases,
RevisionVector sweepRevisions,
@@ -752,77 +757,146 @@ public class VersionGarbageCollector {
}
}
- private static class DetailedGC implements Closeable {
+ private class DetailedGC implements Closeable {
private final RevisionVector headRevision;
private final GCMonitor monitor;
private final AtomicBoolean cancel;
- private int count;
+ private final Stopwatch timer;
+ private final List<UpdateOp> updateOpList;
+ private int garbageDocsCount;
public DetailedGC(@NotNull RevisionVector headRevision, @NotNull GCMonitor monitor, @NotNull AtomicBoolean cancel) {
this.headRevision = requireNonNull(headRevision);
this.monitor = monitor;
this.cancel = cancel;
+ this.updateOpList = new ArrayList<>();
+ this.timer = Stopwatch.createUnstarted();
}
- public void detailedGC(NodeDocument doc, GCPhases phases) {
-// deleteSample(doc, phases);
- UpdateOp updateOp = new UpdateOp(requireNonNull(doc.getId()), false);
- deleteDeletedProperties(doc, phases, updateOp);
- deleteUnmergedBranchCommitDocument(doc, phases, updateOp);
- deleteOldRevisions(doc, phases, updateOp);
+ public void collectGarbage(final NodeDocument doc, final GCPhases phases) {
+
+ monitor.info("Collecting Detailed Garbage for doc [{}]", doc.getId());
+
+ final UpdateOp op = new UpdateOp(requireNonNull(doc.getId()), false);
+ collectDeletedProperties(doc, phases, op);
+ collectUnmergedBranchCommitDocument(doc, phases, op);
+ collectOldRevisions(doc, phases, op);
+ // only add if there are changes for this doc
+ if (op.hasChanges()) {
+ garbageDocsCount++;
+ monitor.info("Collected [{}] garbage for doc [{}]", op.getChanges().size(), doc.getId());
+ updateOpList.add(op);
+ }
}
- /** TODO remove, this is just a skeleton sample */
-// private void deleteSample(NodeDocument doc, GCPhases phases) {
-// if (doc.getId().contains("should_delete")) {
-// if (phases.start(GCPhase.DELETING)) {
-// monitor.info("deleteSample: should do the deletion now, but this is demo only. I'm still learning");
-// System.out.println("do the actual deletion");
-// count++;
-// phases.stop(GCPhase.DELETING);
-// }
-// }
-// }
+ private boolean hasGarbage() {
+ return garbageDocsCount > 0;
+ }
- private void deleteUnmergedBranchCommitDocument(NodeDocument doc, GCPhases phases, UpdateOp updateOp) {
- // TODO Auto-generated method stub
+ private void collectUnmergedBranchCommitDocument(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) {
+ if (phases.start(GCPhase.COLLECT_UNMERGED_BC)){
+ // TODO add umerged BC collection logic
+ phases.stop(GCPhase.COLLECT_UNMERGED_BC);
+ }
}
- private void deleteDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) {
+ private void collectDeletedProperties(final NodeDocument doc, final GCPhases phases, final UpdateOp updateOp) {
// get Map of all properties along with their values
- final Map<String, SortedMap<Revision, String>> properties = doc.getProperties();
-
- // find all the properties which can be removed from document
- // All the properties whose value is null in their respective
- // latest revision are eligible to be garbage collected.
- properties.forEach((propName, revisionStringSortedMap) -> {
- if (revisionStringSortedMap.keySet()
- .stream()
- .sorted(REVERSE)
- .limit(1)
- .anyMatch(revision -> revisionStringSortedMap.get(revision) == null)) {
- // set this property for removal
- updateOp.remove(propName);
+ if (phases.start(GCPhase.COLLECT_PROPS)) {
+ final Set<String> properties = doc.getPropertyNames();
+
+ // find all the properties which can be removed from document.
+ // All the properties whose value is null in head revision are
+ // eligible to be garbage collected.
+
+ final Set<String> retainPropSet = ofNullable(doc.getNodeAtRevision(nodeStore, headRevision, null))
+ .map(DocumentNodeState::getPropertyNames)
+ .orElse(emptySet());
+ final int deletedPropsGCCount = properties.stream()
+ .filter(p -> !retainPropSet.contains(p))
+ .mapToInt(x -> {
+ updateOp.remove(x);
+ return 1;})
+ .sum();
+
+
+ phases.stats.deletedPropsGCCount += deletedPropsGCCount;
+ if (log.isDebugEnabled()) {
+ log.debug("Collected {} deleted properties for document {}", deletedPropsGCCount, doc.getId());
}
- });
+ phases.stop(GCPhase.COLLECT_PROPS);
+ }
}
- private void deleteOldRevisions(NodeDocument doc, GCPhases phases, UpdateOp updateOp) {
- // TODO Auto-generated method stub
+ private void collectOldRevisions(NodeDocument doc, GCPhases phases, UpdateOp updateOp) {
+
+ if (phases.start(GCPhase.COLLECT_OLD_REVS)){
+ // TODO add old rev collection logic
+ phases.stop(GCPhase.COLLECT_OLD_REVS);
+ }
}
- long getNumDocuments() {
- return count;
+ int getGarbageDocsCount() {
+ return garbageDocsCount;
}
@Override
public void close() throws IOException {
}
+
+ public void removeGarbage(final VersionGCStats stats) {
+
+ if (updateOpList.isEmpty()) {
+ if (log.isDebugEnabled()) {
+ log.debug("Skipping removal of detailed garbage, cause no garbage detected");
+ }
+ return;
+ }
+
+ int updatedDocs;
+
+ monitor.info("Proceeding to update [{}] documents", updateOpList.size());
+
+ if (log.isDebugEnabled()) {
+ String collect = updateOpList.stream().map(UpdateOp::getId).collect(Collectors.joining(","));
+ log.trace("Performing batch update of documents with following id's. \n" + collect);
+ }
+
+ if (cancel.get()) {
+ log.info("Aborting the removal of detailed garbage since RGC had been cancelled");
+ return;
+ }
+
+ timer.reset().start();
+ try {
+ // TODO create an api to bulk update findAndUpdate Ops
+ updatedDocs = (int) updateOpList.stream().map(op -> ds.findAndUpdate(NODES, op)).filter(Objects::nonNull).count();
+ stats.updatedDetailedGCDocsCount += updatedDocs;
+ log.info("Updated [{}] documents", updatedDocs);
+ // now reset delete metadata
+ updateOpList.clear();
+ garbageDocsCount = 0;
+ } finally {
+ delayOnModifications(timer.stop().elapsed(MILLISECONDS), cancel);
+ }
+ }
+ }
+ private void delayOnModifications(final long durationMs, final AtomicBoolean cancel) {
+ long delayMs = round(durationMs * options.delayFactor);
+ if (!cancel.get() && delayMs > 0) {
+ try {
+ Clock clock = nodeStore.getClock();
+ clock.waitUntil(clock.getTime() + delayMs);
+ }
+ catch (InterruptedException ex) {
+ /* ignore */
+ }
+ }
}
/**
@@ -959,19 +1033,6 @@ public class VersionGarbageCollector {
//------------------------------< internal >----------------------------
- private void delayOnModifications(long durationMs) {
- long delayMs = Math.round(durationMs * options.delayFactor);
- if (!cancel.get() && delayMs > 0) {
- try {
- Clock clock = nodeStore.getClock();
- clock.waitUntil(clock.getTime() + delayMs);
- }
- catch (InterruptedException ex) {
- /* ignore */
- }
- }
- }
-
private Iterator<String> previousDocIdsFor(NodeDocument doc) {
Map<Revision, Range> prevRanges = doc.getPreviousRanges(true);
if (prevRanges.isEmpty()) {
@@ -1127,7 +1188,7 @@ public class VersionGarbageCollector {
monitor.info(msg);
}
} finally {
- delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS));
+ delayOnModifications(timer.stop().elapsed(MILLISECONDS), cancel);
}
}
return deletedCount;
@@ -1160,7 +1221,7 @@ public class VersionGarbageCollector {
}
}
finally {
- delayOnModifications(timer.stop().elapsed(TimeUnit.MILLISECONDS));
+ delayOnModifications(timer.stop().elapsed(MILLISECONDS), cancel);
}
return updateCount;
}
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
index e34d8f36b0..ef192af390 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java
@@ -111,10 +111,10 @@ public class MongoVersionGCSupport extends VersionGCSupport {
@Override
public CloseableIterable<NodeDocument> getPossiblyDeletedDocs(final long fromModified, final long toModified) {
//_deletedOnce == true && _modified >= fromModified && _modified < toModified
- Bson query = and(
+ Bson query = Filters.and(
Filters.eq(DELETED_ONCE, true),
- gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
- lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified))
+ Filters.gte(MODIFIED_IN_SECS, getModifiedInSecs(fromModified)),
+ Filters.lt(MODIFIED_IN_SECS, getModifiedInSecs(toModified))
);
FindIterable<BasicDBObject> cursor = getNodeCollection()
.find(query).batchSize(batchSize);
@@ -233,9 +233,9 @@ public class MongoVersionGCSupport extends VersionGCSupport {
}
// OAK-8351: this (last) query only contains SD_TYPE and SD_MAX_REV_TIME_IN_SECS
// so mongodb should really use that _sdType_1__sdMaxRevTime_1 index
- result.add(and(
+ result.add(Filters.and(
Filters.or(orClauses),
- lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp))
+ Filters.lt(SD_MAX_REV_TIME_IN_SECS, getModifiedInSecs(oldestRevTimeStamp))
));
return result;
@@ -266,16 +266,16 @@ public class MongoVersionGCSupport extends VersionGCSupport {
Bson idPathClause = Filters.or(
Filters.regex(ID, Pattern.compile(".*" + idSuffix)),
// previous documents with long paths do not have a '-' in the id
- and(
+ Filters.and(
Filters.regex(ID, Pattern.compile("[^-]*")),
Filters.regex(PATH, Pattern.compile(".*" + idSuffix))
)
);
long minMaxRevTimeInSecs = Math.min(maxRevTimeInSecs, getModifiedInSecs(r.getTimestamp()));
- result.add(and(
+ result.add(Filters.and(
Filters.eq(SD_TYPE, DEFAULT_NO_BRANCH.typeCode()),
- lt(SD_MAX_REV_TIME_IN_SECS, minMaxRevTimeInSecs),
+ Filters.lt(SD_MAX_REV_TIME_IN_SECS, minMaxRevTimeInSecs),
idPathClause
));
}
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
index 5caa65d875..26fc1311fa 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBDocumentStoreJDBC.java
@@ -715,6 +715,7 @@ public class RDBDocumentStoreJDBC {
}
if (sortBy != null) {
+ // FIXME : order should be determined via sortBy field
query.append(" order by ID");
}
diff --git a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
index a463499793..1895b9c9e4 100644
--- a/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
+++ b/oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/rdb/RDBVersionGCSupport.java
@@ -17,6 +17,9 @@
package org.apache.jackrabbit.oak.plugins.document.rdb;
import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
+import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.getModifiedInSecs;
+import static org.apache.jackrabbit.oak.plugins.document.rdb.RDBDocumentStore.EMPTY_KEY_PATTERN;
import java.io.Closeable;
import java.io.IOException;
@@ -85,6 +88,30 @@ public class RDBVersionGCSupport extends VersionGCSupport {
}
}
+ /**
+ * Returns documents that have a {@link NodeDocument#MODIFIED_IN_SECS} value
+ * within the given range .The two passed modified timestamps are in milliseconds
+ * since the epoch and the implementation will convert them to seconds at
+ * the granularity of the {@link NodeDocument#MODIFIED_IN_SECS} field and
+ * then perform the comparison.
+ *
+ * @param fromModified the lower bound modified timestamp (inclusive)
+ * @param toModified the upper bound modified timestamp (exclusive)
+ * @param limit the limit of documents to return
+ * @return matching documents.
+ */
+ @Override
+ public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit) {
+ List<QueryCondition> conditions = new ArrayList<>();
+ conditions.add(new QueryCondition(MODIFIED_IN_SECS, "<", getModifiedInSecs(toModified)));
+ conditions.add(new QueryCondition(MODIFIED_IN_SECS, ">=", getModifiedInSecs(fromModified)));
+ if (MODE == 1) {
+ return getIterator(EMPTY_KEY_PATTERN, conditions);
+ } else {
+ return store.queryAsIterable(Collection.NODES, null, null, EMPTY_KEY_PATTERN, conditions, limit, MODIFIED_IN_SECS);
+ }
+ }
+
@Override
protected Iterable<NodeDocument> identifyGarbage(final Set<SplitDocType> gcTypes, final RevisionVector sweepRevs,
final long oldestRevTimeStamp) {
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java
index 8a585c7dc0..d52c5e33ae 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DetailGCHelper.java
@@ -18,25 +18,19 @@
*/
package org.apache.jackrabbit.oak.plugins.document;
-public class DetailGCHelper {
+import static org.apache.commons.lang3.reflect.FieldUtils.writeField;
- public static void setLongSetting(String propName, long val, DocumentNodeStore ns) {
- UpdateOp updateOp = new UpdateOp(VersionGarbageCollector.SETTINGS_COLLECTION_ID, true);
- updateOp.set(propName, val);
- ns.getDocumentStore().createOrUpdate(Collection.SETTINGS, updateOp);
- }
+public class DetailGCHelper {
- public static void enableDetailGC(DocumentNodeStore ns) {
- VersionGarbageCollector.DETAIL_GC_ENABLED = true;
- if (ns != null) {
- setLongSetting(VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, 0, ns);
+ public static void enableDetailGC(final VersionGarbageCollector vgc) throws IllegalAccessException {
+ if (vgc != null) {
+ writeField(vgc, "detailedGCEnabled", true, true);
}
}
- public static void disableDetailGC(DocumentNodeStore ns) {
- VersionGarbageCollector.DETAIL_GC_ENABLED = false;
- if (ns != null) {
- setLongSetting(VersionGarbageCollector.SETTINGS_COLLECTION_FULL_DETAILGC_TIMESTAMP_PROP, -1, ns);
+ public static void disableDetailGC(final VersionGarbageCollector vgc) throws IllegalAccessException {
+ if (vgc != null) {
+ writeField(vgc, "detailedGCEnabled", false, true);
}
}
}
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java
index b99897dfb0..2adcc9f86a 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/NodeDocumentTest.java
@@ -449,6 +449,38 @@ public class NodeDocumentTest {
assertEquals(140, uncommittedRevisions);
}
+ @Test
+ public void getPropertyNames() throws CommitFailedException {
+ DocumentStore store = new MemoryDocumentStore();
+ DocumentNodeStore ns = new DocumentMK.Builder().setDocumentStore(store).setAsyncDelay(0).getNodeStore();
+
+ // add properties
+ for (int i = 0; i < 10; i++) {
+ NodeBuilder nb = ns.getRoot().builder();
+ nb.child("x").setProperty("p"+i, i);
+ merge(ns, nb);
+ }
+
+ final NodeDocument nodeDocument = store.find(NODES, "1:/x");
+ assert nodeDocument != null;
+ assertEquals(10, nodeDocument.getPropertyNames().size());
+ }
+
+ @Test
+ public void getNoPropertyNames() throws CommitFailedException {
+ DocumentStore store = new MemoryDocumentStore();
+ DocumentNodeStore ns = new DocumentMK.Builder().setDocumentStore(store).setAsyncDelay(0).getNodeStore();
+
+ // add no property
+ NodeBuilder nb = ns.getRoot().builder();
+ nb.child("x");
+ merge(ns, nb);
+
+ final NodeDocument nodeDocument = store.find(NODES, "1:/x");
+ assert nodeDocument != null;
+ assertEquals(0, nodeDocument.getPropertyNames().size());
+ }
+
@Test
public void getNewestRevisionTooExpensive() throws Exception {
final int NUM_CHANGES = 200;
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
index 055188be46..ed39a372b2 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCInitTest.java
@@ -22,6 +22,9 @@ import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
+import static org.apache.jackrabbit.oak.plugins.document.DetailGCHelper.enableDetailGC;
+import static org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
@@ -49,6 +52,20 @@ public class VersionGCInitTest {
vgc = store.find(Collection.SETTINGS, "versionGC");
assertNotNull(vgc);
+ assertEquals(0L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
}
+ @Test
+ public void lazyInitializeWithDetailedGC() throws Exception {
+ DocumentStore store = ns.getDocumentStore();
+ Document vgc = store.find(Collection.SETTINGS, "versionGC");
+ assertNull(vgc);
+
+ enableDetailGC(ns.getVersionGarbageCollector());
+ ns.getVersionGarbageCollector().gc(1, TimeUnit.DAYS);
+
+ vgc = store.find(Collection.SETTINGS, "versionGC");
+ assertNotNull(vgc);
+ assertEquals(-1L, vgc.get(SETTINGS_COLLECTION_DETAILED_GC_TIMESTAMP_PROP));
+ }
}
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCStatsTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCStatsTest.java
index 13448e5403..1515ea9312 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCStatsTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCStatsTest.java
@@ -67,6 +67,11 @@ public class VersionGCStatsTest {
assertEquals(stats.collectAndDeleteSplitDocs.elapsed(MICROSECONDS), cumulative.collectAndDeleteSplitDocsElapsed);
assertEquals(stats.sortDocIds.elapsed(MICROSECONDS), cumulative.sortDocIdsElapsed);
assertEquals(stats.updateResurrectedDocuments.elapsed(MICROSECONDS), cumulative.updateResurrectedDocumentsElapsed);
+ assertEquals(stats.detailedGCDocs.elapsed(MICROSECONDS), cumulative.detailedGCDocsElapsed);
+ assertEquals(stats.deleteDetailedGCDocs.elapsed(MICROSECONDS), cumulative.deleteDetailedGCDocsElapsed);
+ assertEquals(stats.collectDeletedProps.elapsed(MICROSECONDS), cumulative.collectDeletedPropsElapsed);
+ assertEquals(stats.collectDeletedOldRevs.elapsed(MICROSECONDS), cumulative.collectDeletedOldRevsElapsed);
+ assertEquals(stats.collectUnmergedBC.elapsed(MICROSECONDS), cumulative.collectUnmergedBCElapsed);
}
@Test
@@ -83,6 +88,11 @@ public class VersionGCStatsTest {
assertEquals(stats.collectAndDeleteSplitDocs.elapsed(MICROSECONDS) * 2, cumulative.collectAndDeleteSplitDocsElapsed);
assertEquals(stats.sortDocIds.elapsed(MICROSECONDS) * 2, cumulative.sortDocIdsElapsed);
assertEquals(stats.updateResurrectedDocuments.elapsed(MICROSECONDS) * 2, cumulative.updateResurrectedDocumentsElapsed);
+ assertEquals(stats.detailedGCDocs.elapsed(MICROSECONDS) * 2, cumulative.detailedGCDocsElapsed);
+ assertEquals(stats.deleteDetailedGCDocs.elapsed(MICROSECONDS) * 2, cumulative.deleteDetailedGCDocsElapsed);
+ assertEquals(stats.collectDeletedProps.elapsed(MICROSECONDS) * 2, cumulative.collectDeletedPropsElapsed);
+ assertEquals(stats.collectDeletedOldRevs.elapsed(MICROSECONDS) * 2, cumulative.collectDeletedOldRevsElapsed);
+ assertEquals(stats.collectUnmergedBC.elapsed(MICROSECONDS) * 2, cumulative.collectUnmergedBCElapsed);
}
private void forEachStopwatch(VersionGCStats stats, Callable c) {
@@ -93,6 +103,11 @@ public class VersionGCStatsTest {
c.call(stats.collectAndDeleteSplitDocs);
c.call(stats.sortDocIds);
c.call(stats.updateResurrectedDocuments);
+ c.call(stats.detailedGCDocs);
+ c.call(stats.deleteDetailedGCDocs);
+ c.call(stats.collectDeletedProps);
+ c.call(stats.collectDeletedOldRevs);
+ c.call(stats.collectUnmergedBC);
}
private interface Callable {
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
index 48f3f362ce..f29716ca5d 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCTest.java
@@ -44,7 +44,6 @@ import org.jetbrains.annotations.NotNull;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
-import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
@@ -96,7 +95,7 @@ public class VersionGCTest {
@After
public void tearDown() throws Exception {
- DetailGCHelper.disableDetailGC(ns);
+ DetailGCHelper.disableDetailGC(gc);
execService.shutdown();
execService.awaitTermination(1, MINUTES);
}
@@ -343,21 +342,19 @@ public class VersionGCTest {
// OAK-10199
@Test
- @Ignore
public void testDetailGcDocumentRead_disabled() throws Exception {
- DetailGCHelper.disableDetailGC(ns);
+ DetailGCHelper.disableDetailGC(gc);
VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
assertNotNull(stats);
- assertEquals(0, stats.detailedGcDocsElapsed);
+ assertEquals(0, stats.detailedGCDocsElapsed);
}
@Test
- @Ignore
public void testDetailGcDocumentRead_enabled() throws Exception {
- DetailGCHelper.enableDetailGC(ns);
+ DetailGCHelper.enableDetailGC(gc);
VersionGCStats stats = gc.gc(30, TimeUnit.MINUTES);
assertNotNull(stats);
- assertNotEquals(0, stats.detailedGcDocsElapsed);
+ assertNotEquals(0, stats.detailedGCDocsElapsed);
}
// OAK-10199
diff --git a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
index d33cc8c7de..4470a07f96 100644
--- a/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
+++ b/oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollectorIT.java
@@ -34,10 +34,12 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
+import static org.apache.commons.lang3.reflect.FieldUtils.writeField;
import static org.apache.jackrabbit.guava.common.collect.Iterables.filter;
import static org.apache.jackrabbit.guava.common.collect.Iterables.size;
import static java.util.concurrent.TimeUnit.HOURS;
import static java.util.concurrent.TimeUnit.MINUTES;
+import static org.apache.jackrabbit.oak.api.Type.STRING;
import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.NUM_REVS_THRESHOLD;
import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.PREV_SPLIT_FACTOR;
@@ -225,6 +227,68 @@ public class VersionGarbageCollectorIT {
public void gcLongPathSplitDocs() throws Exception {
gcSplitDocsInternal(Strings.repeat("sub", 120));
}
+
+ @Test
+ public void testGCDeletedProps() throws Exception{
+ //1. Create nodes
+ NodeBuilder b1 = store.getRoot().builder();
+
+ // Add property to node & save
+ b1.child("x").setProperty("test", "t", STRING);
+ b1.child("z").setProperty("prop", "foo", STRING);
+ store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ // update the property
+ b1 = store.getRoot().builder();
+ b1.getChildNode("z").setProperty("prop", "bar", STRING);
+ store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ // update property again
+ b1 = store.getRoot().builder();
+ b1.getChildNode("z").setProperty("prop", "baz", STRING);
+ store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ // enable the detailed gc flag
+ writeField(gc, "detailedGCEnabled", true, true);
+ long maxAge = 1; //hours
+ long delta = TimeUnit.MINUTES.toMillis(10);
+ //1. Go past GC age and check no GC done as nothing deleted
+ clock.waitUntil(Revision.getCurrentTimestamp() + maxAge);
+ VersionGCStats stats = gc.gc(maxAge, HOURS);
+ assertEquals(0, stats.deletedPropsGCCount);
+
+ //Remove property
+ NodeBuilder b2 = store.getRoot().builder();
+ b2.getChildNode("z").removeProperty("prop");
+ store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ store.runBackgroundOperations();
+
+ //2. Check that a deleted property is not collected before maxAge
+ //Clock cannot move back (it moved forward in #1) so double the maxAge
+ clock.waitUntil(clock.getTime() + delta);
+ stats = gc.gc(maxAge*2, HOURS);
+ assertEquals(0, stats.deletedPropsGCCount);
+
+ //3. Check that deleted property does get collected post maxAge
+ clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+
+ stats = gc.gc(maxAge*2, HOURS);
+ assertEquals(1, stats.deletedPropsGCCount);
+
+ //4. Check that a revived property (deleted and created again) does not get gc
+ NodeBuilder b3 = store.getRoot().builder();
+ b3.child("x").removeProperty("test");
+ store.merge(b3, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ NodeBuilder b4 = store.getRoot().builder();
+ b4.child("x").setProperty("test", "t", STRING);
+ store.merge(b4, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+ stats = gc.gc(maxAge*2, HOURS);
+ assertEquals(0, stats.deletedPropsGCCount);
+ }
private void gcSplitDocsInternal(String subNodeName) throws Exception {
long maxAge = 1; //hrs