You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2023/05/23 05:23:37 UTC
[jackrabbit-oak] branch trunk updated: OAK-10253: Option to only collect references when calling checkConsis… (#947)
This is an automated email from the ASF dual-hosted git repository.
amitj pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new dff1d17362 OAK-10253: Option to only collect references when calling checkConsis… (#947)
dff1d17362 is described below
commit dff1d173625d9ffe03a39efe6004b42214699a95
Author: Amit Jain <am...@apache.org>
AuthorDate: Tue May 23 10:53:32 2023 +0530
OAK-10253: Option to only collect references when calling checkConsis… (#947)
- Add markOnly option for the check-consistency command
- document option for oak-run
---
.../oak/plugins/blob/BlobGarbageCollector.java | 9 ++
.../plugins/blob/MarkSweepGarbageCollector.java | 101 +++++++++++----------
.../jackrabbit/oak/plugins/blob/BlobGCTest.java | 16 ++++
oak-run/README.md | 6 +-
.../jackrabbit/oak/run/DataStoreCommand.java | 2 +-
.../jackrabbit/oak/run/DataStoreOptions.java | 10 +-
.../jackrabbit/oak/run/DataStoreCommandTest.java | 55 +++++++----
7 files changed, 128 insertions(+), 71 deletions(-)
diff --git a/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java b/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
index 1e7598f15f..f5164f6e7a 100644
--- a/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
+++ b/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
@@ -59,6 +59,15 @@ public interface BlobGarbageCollector {
*/
long checkConsistency() throws Exception;
+ /**
+ * Collects the blob references and consolidates references from other repositories if available in the DataStore.
+ * Adds relevant metrics.
+ *
+ * @return
+ * @throws Exception
+ */
+ long checkConsistency(boolean markOnly) throws Exception;
+
/**
* Returns operation statistics
*
diff --git a/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java b/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
index ccd6eebb05..078f058801 100644
--- a/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
+++ b/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
@@ -685,15 +685,9 @@ public class MarkSweepGarbageCollector implements BlobGarbageCollector {
closeQuietly(writer);
}
}
-
- /**
- * Checks for the DataStore consistency and reports the number of missing blobs still referenced.
- *
- * @return the missing blobs
- * @throws Exception
- */
+
@Override
- public long checkConsistency() throws Exception {
+ public long checkConsistency(boolean markOnly) throws Exception {
consistencyStatsCollector.start();
Stopwatch sw = Stopwatch.createStarted();
@@ -702,47 +696,36 @@ public class MarkSweepGarbageCollector implements BlobGarbageCollector {
long candidates = 0;
try {
- LOG.info("Starting blob consistency check");
-
- // Find all blobs available in the blob store
- ListenableFutureTask<Integer> blobIdRetriever = ListenableFutureTask.create(new BlobIdRetriever(fs,
- true));
- executor.execute(blobIdRetriever);
+ LOG.info("Starting blob consistency check with markOnly = {}", markOnly);
// Mark all used blob references
+ // Create a time marker in the data store if applicable
+ String uniqueSuffix = UUID.randomUUID().toString();
+ GarbageCollectionType.get(blobStore).addMarkedStartMarker(blobStore, repoId, uniqueSuffix);
iterateNodeTree(fs, true);
// Move the marked references file to the data store meta area if applicable
- String uniqueSuffix = UUID.randomUUID().toString();
GarbageCollectionType.get(blobStore).addMarked(blobStore, fs, repoId, uniqueSuffix);
consistencyStatsCollector.updateMarkDuration(sw.elapsed(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS);
- try {
- blobIdRetriever.get();
- } catch (ExecutionException e) {
- LOG.warn("Error occurred while fetching all the blobIds from the BlobStore");
- threw = false;
- throw e;
- }
-
if (SharedDataStoreUtils.isShared(blobStore)) {
// Retrieve all other marked present in the datastore
List<DataRecord> refFiles =
- ((SharedDataStore) blobStore).getAllMetadataRecords(SharedStoreRecordType.REFERENCES.getType());
+ ((SharedDataStore) blobStore).getAllMetadataRecords(SharedStoreRecordType.REFERENCES.getType());
// Get all the repositories registered
List<DataRecord> repoFiles =
- ((SharedDataStore) blobStore).getAllMetadataRecords(SharedStoreRecordType.REPOSITORY.getType());
+ ((SharedDataStore) blobStore).getAllMetadataRecords(SharedStoreRecordType.REPOSITORY.getType());
LOG.info("Repositories registered {}", repoFiles);
// Retrieve repos for which reference files have not been created
Set<String> unAvailRepos =
- SharedDataStoreUtils.refsNotAvailableFromRepos(repoFiles, refFiles);
+ SharedDataStoreUtils.refsNotAvailableFromRepos(repoFiles, refFiles);
LOG.info("Repositories with unavailable references {}", unAvailRepos);
-
+
if (!unAvailRepos.isEmpty()) {
throw new NotAllRepositoryMarkedException("Not all repositories have marked references available");
}
-
+
if (refFiles.size() > 0) {
File temp = new File(root, repoId + UUID.randomUUID().toString());
copyFile(fs.getMarkedRefs(), temp);
@@ -760,30 +743,45 @@ public class MarkSweepGarbageCollector implements BlobGarbageCollector {
// Get size
getBlobReferencesSize(fs, consistencyStats);
+
+ if (!markOnly) {
+ // Find all blobs available in the blob store
+ ListenableFutureTask<Integer> blobIdRetriever = ListenableFutureTask.create(new BlobIdRetriever(fs,
+ true));
+ executor.execute(blobIdRetriever);
- LOG.trace("Starting difference phase of the consistency check");
- FileLineDifferenceIterator iter = new FileLineDifferenceIterator(
- fs.getAvailableRefs(),
- fs.getMarkedRefs(),
- transformer);
- // If tracking then also filter ids being tracked which are active deletions for lucene
- candidates = BlobCollectionType.get(blobStore).filter(blobStore, iter, fs);
+ try {
+ blobIdRetriever.get();
+ } catch (ExecutionException e) {
+ LOG.warn("Error occurred while fetching all the blobIds from the BlobStore");
+ threw = false;
+ throw e;
+ }
+
+ LOG.trace("Starting difference phase of the consistency check");
+ FileLineDifferenceIterator iter = new FileLineDifferenceIterator(
+ fs.getAvailableRefs(),
+ fs.getMarkedRefs(),
+ transformer);
+ // If tracking then also filter ids being tracked which are active deletions for lucene
+ candidates = BlobCollectionType.get(blobStore).filter(blobStore, iter, fs);
- GarbageCollectionType.get(blobStore).removeAllMarkedReferences(blobStore);
+ GarbageCollectionType.get(blobStore).removeAllMarkedReferences(blobStore);
- LOG.trace("Ending difference phase of the consistency check");
- LOG.info("Consistency check found [{}] missing blobs", candidates);
+ LOG.trace("Ending difference phase of the consistency check");
+ LOG.info("Consistency check found [{}] missing blobs", candidates);
- if (candidates > 0) {
- try (LineIterator lineIterator = new LineIterator(new FileReader(fs.getGcCandidates()))) {
- while(lineIterator.hasNext()) {
- LOG.warn("Missing Blob [{}]", lineIterator.nextLine());
+ if (candidates > 0) {
+ try (LineIterator lineIterator = new LineIterator(new FileReader(fs.getGcCandidates()))) {
+ while (lineIterator.hasNext()) {
+ LOG.warn("Missing Blob [{}]", lineIterator.nextLine());
+ }
}
- }
- LOG.warn("Consistency check failure in the the blob store : {}, check missing candidates in file {}",
+ LOG.warn("Consistency check failure in the the blob store : {}, check missing candidates in file {}",
blobStore, fs.getGcCandidates().getAbsolutePath());
- consistencyStatsCollector.finishFailure();
- consistencyStatsCollector.updateNumDeleted(candidates);
+ consistencyStatsCollector.finishFailure();
+ consistencyStatsCollector.updateNumDeleted(candidates);
+ }
}
} finally {
if (!traceOutput && (!LOG.isTraceEnabled() && candidates == 0)) {
@@ -795,6 +793,17 @@ public class MarkSweepGarbageCollector implements BlobGarbageCollector {
return candidates;
}
+ /**
+ * Checks for the DataStore consistency and reports the number of missing blobs still referenced.
+ *
+ * @return the missing blobs
+ * @throws Exception
+ */
+ @Override
+ public long checkConsistency() throws Exception {
+ return checkConsistency(false);
+ }
+
public void setTraceOutput(boolean trace) {
traceOutput = trace;
}
diff --git a/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java b/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java
index 7ad972ecfe..f7b193d935 100644
--- a/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java
+++ b/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java
@@ -416,6 +416,22 @@ public class BlobGCTest {
assertStatsBean(collector.getConsistencyOperationStats(), 1, 0, 0);
}
+ @Test
+ public void checkConsistencyMarkOnly() throws Exception {
+ log.info("Starting checkConsistencyMarkOnly()");
+
+ long afterSetupTime = clock.getTime();
+ log.info("after setup time {}", afterSetupTime);
+
+ MarkSweepGarbageCollector collector = cluster.getCollector(0);
+ long missing = collector.checkConsistency(true);
+
+ assertEquals(0, missing);
+ assertStats(cluster.statsProvider, 1, 0, 0, 0, cluster.blobStoreState.blobsPresent.size(), cluster.blobSize,
+ CONSISTENCY_NAME);
+ assertStatsBean(collector.getConsistencyOperationStats(), 1, 0, 0);
+ }
+
@Test
public void checkConsistencyFailure() throws Exception {
log.info("Starting checkConsistencyFailure()");
diff --git a/oak-run/README.md b/oak-run/README.md
index e3a997e6c2..049961d3e7 100644
--- a/oak-run/README.md
+++ b/oak-run/README.md
@@ -629,7 +629,7 @@ Maintenance commands for the DataStore:
* Data store consistency check
- $ java -jar oak-run-*.jar datastore [--check-consistency|--collect-garbage [true]] \
+ $ java -jar oak-run-*.jar datastore [--check-consistency [true]|--collect-garbage [true]] \
[--s3ds <s3ds_config>|--fds <fds_config>|--azureds <s3ds_config>|fake-ds-path <ds_path>] \
[--out-dir <output_path>] \
[--work-dir <temporary_path>] \
@@ -643,7 +643,7 @@ Maintenance commands for the DataStore:
The following operations are available:
--collect-garbage - Execute garbage collection on the data store. If only mark phase to be run specify a true parameter.
- --check-consistency - List all the missing blobs by doing a consistency check.
+ --check-consistency - List all the missing blobs by doing a consistency check. If only mark phase to be run specify a true parameter, in which case only the references will be retrieved.
--dump-ref - List all the blob references in the node store
--dump-id - List all the ids in the data store
--get-metadata - Retrieves a machine readable format GC datastore metadata
@@ -695,7 +695,7 @@ The command to be executed when using this option is:
$ java -classpath oak-run-*.jar:simpleclient_common-0.6.0.jar:simpleclient-0.6.0.jar:simpleclient_dropwizard-0.6.0.jar:simpleclient_pushgateway-0.6.0.jar \
org.apache.jackrabbit.oak.run.Main \
- datastore [--check-consistency|--collect-garbage [true]] \
+ datastore [--check-consistency [true]|--collect-garbage [true]] \
[--s3ds <s3ds_config>|--fds <fds_config>|--azureds <s3ds_config>|fake-ds-path <ds_path>] \
[--out-dir <output_path>] \
[--work-dir <temporary_path>] \
diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java
index f47ebb9091..84620ba898 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java
@@ -269,7 +269,7 @@ public class DataStoreCommand implements Command {
} else {
MarkSweepGarbageCollector collector = getCollector(fixture, dataStoreOpts, opts, closer);
if (dataStoreOpts.checkConsistency()) {
- long missing = collector.checkConsistency();
+ long missing = collector.checkConsistency(dataStoreOpts.consistencyCheckMarkOnly());
log.warn("Found {} missing blobs", missing);
if (dataStoreOpts.isVerbose()) {
diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java
index 299221705d..9f79fa124a 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java
@@ -40,7 +40,7 @@ public class DataStoreOptions implements OptionsBean {
private final OptionSpec<File> workDirOpt;
private final OptionSpec<File> outputDirOpt;
private final OptionSpec<Boolean> collectGarbage;
- private final OptionSpec<Void> consistencyCheck;
+ private final OptionSpec<Boolean> consistencyCheck;
private final OptionSpec<Void> refOp;
private final OptionSpec<Void> idOp;
private final OptionSpec<Boolean> checkConsistencyAfterGC;
@@ -74,7 +74,9 @@ public class DataStoreOptions implements OptionsBean {
.withOptionalArg().ofType(Boolean.class).defaultsTo(Boolean.FALSE);
consistencyCheck =
- parser.accepts("check-consistency", "Performs a consistency check on the repository/datastore defined");
+ parser.accepts("check-consistency", "Performs a consistency check on the repository/datastore defined. An optional boolean specifying "
+ + "'markOnly' required if only collecting references")
+ .withOptionalArg().ofType(Boolean.class).defaultsTo(Boolean.FALSE);
refOp = parser.accepts("dump-ref", "Gets a dump of Blob References");
@@ -193,6 +195,10 @@ public class DataStoreOptions implements OptionsBean {
return collectGarbage.value(options);
}
+ public boolean consistencyCheckMarkOnly() {
+ return consistencyCheck.value(options);
+ }
+
public long getBlobGcMaxAgeInSecs() {
return blobGcMaxAgeInSecs.value(options);
}
diff --git a/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java
index e240d25189..63def135d0 100644
--- a/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java
+++ b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java
@@ -348,9 +348,9 @@ public class DataStoreCommandTest {
File dump = temporaryFolder.newFolder();
List<String> argsList = Lists
- .newArrayList("--check-consistency", storeFixture.getConnectionString(),
+ .newArrayList(storeFixture.getConnectionString(),
"--out-dir", dump.getAbsolutePath(), "--reset-log-config", "false", "--work-dir",
- temporaryFolder.newFolder().getAbsolutePath());
+ temporaryFolder.newFolder().getAbsolutePath(), "--check-consistency");
if (!Strings.isNullOrEmpty(additionalParams)) {
argsList.add(additionalParams);
}
@@ -364,7 +364,7 @@ public class DataStoreCommandTest {
Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
storeFixture.close();
- testConsistency(dump, data, false);
+ testConsistency(dump, data, false, false);
}
@Test
@@ -373,7 +373,7 @@ public class DataStoreCommandTest {
Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
storeFixture.close();
- testConsistency(dump, data, true);
+ testConsistency(dump, data, true, false);
}
@Test
@@ -553,7 +553,16 @@ public class DataStoreCommandTest {
Data data = prepareData(storeFixture, blobFixture, 10, 5, 0);
storeFixture.close();
- testConsistency(dump, data, false);
+ testConsistency(dump, data, false, false);
+ }
+
+ @Test
+ public void testConsistencyMarkOnly() throws Exception {
+ File dump = temporaryFolder.newFolder();
+ Data data = prepareData(storeFixture, blobFixture, 10, 5, 0);
+ storeFixture.close();
+
+ testConsistency(dump, data, false, false, true);
}
@Test
@@ -714,13 +723,13 @@ public class DataStoreCommandTest {
}
- private void testConsistency(File dump, Data data, boolean verbose) throws Exception {
- testConsistency(dump, data, verbose, false);
+ private void testConsistency(File dump, Data data, boolean verbose, boolean verboseRootPath) throws Exception {
+ testConsistency(dump, data, verbose, verboseRootPath, false);
}
- private void testConsistency(File dump, Data data, boolean verbose, boolean verboseRootPath) throws Exception {
+ private void testConsistency(File dump, Data data, boolean verbose, boolean verboseRootPath, boolean markOnly) throws Exception {
List<String> argsList = Lists
- .newArrayList("--check-consistency", "--" + getOption(blobFixture.getType()), blobFixture.getConfigPath(),
+ .newArrayList("--check-consistency", String.valueOf(markOnly), "--" + getOption(blobFixture.getType()), blobFixture.getConfigPath(),
storeFixture.getConnectionString(), "--out-dir", dump.getAbsolutePath(), "--work-dir",
temporaryFolder.newFolder().getAbsolutePath());
if (!Strings.isNullOrEmpty(additionalParams)) {
@@ -732,8 +741,12 @@ public class DataStoreCommandTest {
}
DataStoreCommand cmd = new DataStoreCommand();
cmd.execute(argsList.toArray(new String[0]));
-
- assertFileEquals(dump, "avail-", Sets.difference(data.added, data.missingDataStore));
+
+ if (!markOnly) {
+ assertFileEquals(dump, "avail-", Sets.difference(data.added, data.missingDataStore));
+ } else {
+ assertFileNull(dump, "avail-");
+ }
// Verbose would have paths as well as ids changed but normally only DocumentNS would have paths suffixed
assertFileEquals(dump, "marked-", verbose ?
@@ -742,14 +755,18 @@ public class DataStoreCommandTest {
(storeFixture instanceof StoreFixture.MongoStoreFixture) ?
encodedIdsAndPath(Sets.difference(data.added, data.deleted), blobFixture.getType(), data.idToPath, false) :
Sets.difference(data.added, data.deleted));
-
- // Verbose would have paths as well as ids changed but normally only DocumentNS would have paths suffixed
- assertFileEquals(dump, "gccand-", verbose ?
- encodedIdsAndPath(verboseRootPath ? Sets.intersection(data.addedSubset, data.missingDataStore) :
- data.missingDataStore, blobFixture.getType(), data.idToPath, true) :
- (storeFixture instanceof StoreFixture.MongoStoreFixture) ?
- encodedIdsAndPath(data.missingDataStore, blobFixture.getType(), data.idToPath, false) :
- data.missingDataStore);
+
+ if (!markOnly) {
+ // Verbose would have paths as well as ids changed but normally only DocumentNS would have paths suffixed
+ assertFileEquals(dump, "gccand-", verbose ?
+ encodedIdsAndPath(verboseRootPath ? Sets.intersection(data.addedSubset, data.missingDataStore) :
+ data.missingDataStore, blobFixture.getType(), data.idToPath, true) :
+ (storeFixture instanceof StoreFixture.MongoStoreFixture) ?
+ encodedIdsAndPath(data.missingDataStore, blobFixture.getType(), data.idToPath, false) :
+ data.missingDataStore);
+ } else {
+ assertFileNull(dump, "gccand-");
+ }
}
private void testDumpRef(File dump, Data data, boolean verbose, boolean verboseRootPath) throws Exception {