You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2023/05/23 05:23:37 UTC

[jackrabbit-oak] branch trunk updated: OAK-10253: Option to only collect references when calling checkConsis… (#947)

This is an automated email from the ASF dual-hosted git repository.

amitj pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new dff1d17362 OAK-10253: Option to only collect references when calling checkConsis… (#947)
dff1d17362 is described below

commit dff1d173625d9ffe03a39efe6004b42214699a95
Author: Amit Jain <am...@apache.org>
AuthorDate: Tue May 23 10:53:32 2023 +0530

    OAK-10253: Option to only collect references when calling checkConsis… (#947)
    
    - Add markOnly option for the check-consistency command
    - document option for oak-run
---
 .../oak/plugins/blob/BlobGarbageCollector.java     |   9 ++
 .../plugins/blob/MarkSweepGarbageCollector.java    | 101 +++++++++++----------
 .../jackrabbit/oak/plugins/blob/BlobGCTest.java    |  16 ++++
 oak-run/README.md                                  |   6 +-
 .../jackrabbit/oak/run/DataStoreCommand.java       |   2 +-
 .../jackrabbit/oak/run/DataStoreOptions.java       |  10 +-
 .../jackrabbit/oak/run/DataStoreCommandTest.java   |  55 +++++++----
 7 files changed, 128 insertions(+), 71 deletions(-)

diff --git a/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java b/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
index 1e7598f15f..f5164f6e7a 100644
--- a/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
+++ b/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGarbageCollector.java
@@ -59,6 +59,15 @@ public interface BlobGarbageCollector {
      */
     long checkConsistency() throws Exception;
 
+    /**
+     * Collects the blob references and consolidates references from other repositories if available in the DataStore.
+     * Adds relevant metrics.
+     * 
+     * @return
+     * @throws Exception
+     */
+    long checkConsistency(boolean markOnly) throws Exception;
+
     /**
      * Returns operation statistics
      *
diff --git a/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java b/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
index ccd6eebb05..078f058801 100644
--- a/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
+++ b/oak-blob-plugins/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
@@ -685,15 +685,9 @@ public class MarkSweepGarbageCollector implements BlobGarbageCollector {
             closeQuietly(writer);
         }
     }
-
-    /**
-     * Checks for the DataStore consistency and reports the number of missing blobs still referenced.
-     *
-     * @return the missing blobs
-     * @throws Exception
-     */
+    
     @Override
-    public long checkConsistency() throws Exception {
+    public long checkConsistency(boolean markOnly) throws Exception {
         consistencyStatsCollector.start();
         Stopwatch sw = Stopwatch.createStarted();
 
@@ -702,47 +696,36 @@ public class MarkSweepGarbageCollector implements BlobGarbageCollector {
         long candidates = 0;
 
         try {
-            LOG.info("Starting blob consistency check");
-
-            // Find all blobs available in the blob store
-            ListenableFutureTask<Integer> blobIdRetriever = ListenableFutureTask.create(new BlobIdRetriever(fs,
-                true));
-            executor.execute(blobIdRetriever);
+            LOG.info("Starting blob consistency check with markOnly = {}", markOnly);
 
             // Mark all used blob references
+            // Create a time marker in the data store if applicable
+            String uniqueSuffix = UUID.randomUUID().toString();
+            GarbageCollectionType.get(blobStore).addMarkedStartMarker(blobStore, repoId, uniqueSuffix);
             iterateNodeTree(fs, true);
             // Move the marked references file to the data store meta area if applicable
-            String uniqueSuffix = UUID.randomUUID().toString();
             GarbageCollectionType.get(blobStore).addMarked(blobStore, fs, repoId, uniqueSuffix);
             consistencyStatsCollector.updateMarkDuration(sw.elapsed(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS);
 
-            try {
-                blobIdRetriever.get();
-            } catch (ExecutionException e) {
-                LOG.warn("Error occurred while fetching all the blobIds from the BlobStore");
-                threw = false;
-                throw e;
-            }
-
             if (SharedDataStoreUtils.isShared(blobStore)) {
                 // Retrieve all other marked present in the datastore
                 List<DataRecord> refFiles =
-                    ((SharedDataStore) blobStore).getAllMetadataRecords(SharedStoreRecordType.REFERENCES.getType());
+                        ((SharedDataStore) blobStore).getAllMetadataRecords(SharedStoreRecordType.REFERENCES.getType());
 
                 // Get all the repositories registered
                 List<DataRecord> repoFiles =
-                    ((SharedDataStore) blobStore).getAllMetadataRecords(SharedStoreRecordType.REPOSITORY.getType());
+                        ((SharedDataStore) blobStore).getAllMetadataRecords(SharedStoreRecordType.REPOSITORY.getType());
                 LOG.info("Repositories registered {}", repoFiles);
 
                 // Retrieve repos for which reference files have not been created
                 Set<String> unAvailRepos =
-                    SharedDataStoreUtils.refsNotAvailableFromRepos(repoFiles, refFiles);
+                        SharedDataStoreUtils.refsNotAvailableFromRepos(repoFiles, refFiles);
                 LOG.info("Repositories with unavailable references {}", unAvailRepos);
-                
+
                 if (!unAvailRepos.isEmpty()) {
                     throw new NotAllRepositoryMarkedException("Not all repositories have marked references available");
                 }
-                
+
                 if (refFiles.size() > 0) {
                     File temp = new File(root, repoId + UUID.randomUUID().toString());
                     copyFile(fs.getMarkedRefs(), temp);
@@ -760,30 +743,45 @@ public class MarkSweepGarbageCollector implements BlobGarbageCollector {
 
             // Get size
             getBlobReferencesSize(fs, consistencyStats);
+            
+            if (!markOnly) {
+                // Find all blobs available in the blob store
+                ListenableFutureTask<Integer> blobIdRetriever = ListenableFutureTask.create(new BlobIdRetriever(fs,
+                        true));
+                executor.execute(blobIdRetriever);
 
-            LOG.trace("Starting difference phase of the consistency check");
-            FileLineDifferenceIterator iter = new FileLineDifferenceIterator(
-                fs.getAvailableRefs(),
-                fs.getMarkedRefs(),
-                transformer);
-            // If tracking then also filter ids being tracked which are active deletions for lucene
-            candidates = BlobCollectionType.get(blobStore).filter(blobStore, iter, fs);
+                try {
+                    blobIdRetriever.get();
+                } catch (ExecutionException e) {
+                    LOG.warn("Error occurred while fetching all the blobIds from the BlobStore");
+                    threw = false;
+                    throw e;
+                }
+
+                LOG.trace("Starting difference phase of the consistency check");
+                FileLineDifferenceIterator iter = new FileLineDifferenceIterator(
+                        fs.getAvailableRefs(),
+                        fs.getMarkedRefs(),
+                        transformer);
+                // If tracking then also filter ids being tracked which are active deletions for lucene
+                candidates = BlobCollectionType.get(blobStore).filter(blobStore, iter, fs);
 
-            GarbageCollectionType.get(blobStore).removeAllMarkedReferences(blobStore);
+                GarbageCollectionType.get(blobStore).removeAllMarkedReferences(blobStore);
 
-            LOG.trace("Ending difference phase of the consistency check");
-            LOG.info("Consistency check found [{}] missing blobs", candidates);
+                LOG.trace("Ending difference phase of the consistency check");
+                LOG.info("Consistency check found [{}] missing blobs", candidates);
 
-            if (candidates > 0) {
-                try (LineIterator lineIterator = new LineIterator(new FileReader(fs.getGcCandidates()))) {
-                    while(lineIterator.hasNext()) {
-                        LOG.warn("Missing Blob [{}]", lineIterator.nextLine());
+                if (candidates > 0) {
+                    try (LineIterator lineIterator = new LineIterator(new FileReader(fs.getGcCandidates()))) {
+                        while (lineIterator.hasNext()) {
+                            LOG.warn("Missing Blob [{}]", lineIterator.nextLine());
+                        }
                     }
-                }
-                LOG.warn("Consistency check failure in the the blob store : {}, check missing candidates in file {}",
+                    LOG.warn("Consistency check failure in the the blob store : {}, check missing candidates in file {}",
                             blobStore, fs.getGcCandidates().getAbsolutePath());
-                consistencyStatsCollector.finishFailure();
-                consistencyStatsCollector.updateNumDeleted(candidates);
+                    consistencyStatsCollector.finishFailure();
+                    consistencyStatsCollector.updateNumDeleted(candidates);
+                }
             }
         } finally {
             if (!traceOutput && (!LOG.isTraceEnabled() && candidates == 0)) {
@@ -795,6 +793,17 @@ public class MarkSweepGarbageCollector implements BlobGarbageCollector {
         return candidates;
     }
 
+    /**
+     * Checks for the DataStore consistency and reports the number of missing blobs still referenced.
+     *
+     * @return the missing blobs
+     * @throws Exception
+     */
+    @Override
+    public long checkConsistency() throws Exception {
+        return checkConsistency(false);
+    }
+
     public void setTraceOutput(boolean trace) {
         traceOutput = trace;
     }
diff --git a/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java b/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java
index 7ad972ecfe..f7b193d935 100644
--- a/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java
+++ b/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java
@@ -416,6 +416,22 @@ public class BlobGCTest {
         assertStatsBean(collector.getConsistencyOperationStats(), 1, 0, 0);
     }
 
+    @Test
+    public void checkConsistencyMarkOnly() throws Exception {
+        log.info("Starting checkConsistencyMarkOnly()");
+
+        long afterSetupTime = clock.getTime();
+        log.info("after setup time {}", afterSetupTime);
+
+        MarkSweepGarbageCollector collector = cluster.getCollector(0);
+        long missing = collector.checkConsistency(true);
+
+        assertEquals(0, missing);
+        assertStats(cluster.statsProvider, 1, 0, 0, 0, cluster.blobStoreState.blobsPresent.size(), cluster.blobSize,
+                CONSISTENCY_NAME);
+        assertStatsBean(collector.getConsistencyOperationStats(), 1, 0, 0);
+    }
+
     @Test
     public void checkConsistencyFailure() throws Exception {
         log.info("Starting checkConsistencyFailure()");
diff --git a/oak-run/README.md b/oak-run/README.md
index e3a997e6c2..049961d3e7 100644
--- a/oak-run/README.md
+++ b/oak-run/README.md
@@ -629,7 +629,7 @@ Maintenance commands for the DataStore:
 * Data store consistency check
 
 
-    $ java -jar oak-run-*.jar datastore [--check-consistency|--collect-garbage [true]] \
+    $ java -jar oak-run-*.jar datastore [--check-consistency [true]|--collect-garbage [true]] \
             [--s3ds <s3ds_config>|--fds <fds_config>|--azureds <s3ds_config>|fake-ds-path <ds_path>] \
             [--out-dir <output_path>] \
             [--work-dir <temporary_path>] \
@@ -643,7 +643,7 @@ Maintenance commands for the DataStore:
 The following operations are available:
     
     --collect-garbage          - Execute garbage collection on the data store. If only mark phase to be run specify a true parameter.
-    --check-consistency        - List all the missing blobs by doing a consistency check.
+    --check-consistency        - List all the missing blobs by doing a consistency check. If only mark phase to be run specify a true parameter, in which case only the references will be retrieved.
     --dump-ref                 - List all the blob references in the node store
     --dump-id                  - List all the ids in the data store
     --get-metadata             - Retrieves a machine readable format GC datastore metadata
@@ -695,7 +695,7 @@ The command to be executed when using this option is:
 
     $ java -classpath oak-run-*.jar:simpleclient_common-0.6.0.jar:simpleclient-0.6.0.jar:simpleclient_dropwizard-0.6.0.jar:simpleclient_pushgateway-0.6.0.jar \
         org.apache.jackrabbit.oak.run.Main \
-        datastore [--check-consistency|--collect-garbage [true]] \
+        datastore [--check-consistency [true]|--collect-garbage [true]] \
         [--s3ds <s3ds_config>|--fds <fds_config>|--azureds <s3ds_config>|fake-ds-path <ds_path>] \
         [--out-dir <output_path>] \
         [--work-dir <temporary_path>] \
diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java
index f47ebb9091..84620ba898 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java
@@ -269,7 +269,7 @@ public class DataStoreCommand implements Command {
             } else {
                 MarkSweepGarbageCollector collector = getCollector(fixture, dataStoreOpts, opts, closer);
                 if (dataStoreOpts.checkConsistency()) {
-                    long missing = collector.checkConsistency();
+                    long missing = collector.checkConsistency(dataStoreOpts.consistencyCheckMarkOnly());
                     log.warn("Found {} missing blobs", missing);
 
                     if (dataStoreOpts.isVerbose()) {
diff --git a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java
index 299221705d..9f79fa124a 100644
--- a/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java
+++ b/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java
@@ -40,7 +40,7 @@ public class DataStoreOptions implements OptionsBean {
     private final OptionSpec<File> workDirOpt;
     private final OptionSpec<File> outputDirOpt;
     private final OptionSpec<Boolean> collectGarbage;
-    private final OptionSpec<Void> consistencyCheck;
+    private final OptionSpec<Boolean> consistencyCheck;
     private final OptionSpec<Void> refOp;
     private final OptionSpec<Void> idOp;
     private final OptionSpec<Boolean> checkConsistencyAfterGC;
@@ -74,7 +74,9 @@ public class DataStoreOptions implements OptionsBean {
             .withOptionalArg().ofType(Boolean.class).defaultsTo(Boolean.FALSE);
 
         consistencyCheck =
-            parser.accepts("check-consistency", "Performs a consistency check on the repository/datastore defined");
+            parser.accepts("check-consistency", "Performs a consistency check on the repository/datastore defined. An optional boolean specifying "
+                + "'markOnly' required if only collecting references")
+            .withOptionalArg().ofType(Boolean.class).defaultsTo(Boolean.FALSE);
 
         refOp = parser.accepts("dump-ref", "Gets a dump of Blob References");
 
@@ -193,6 +195,10 @@ public class DataStoreOptions implements OptionsBean {
         return collectGarbage.value(options);
     }
 
+    public boolean consistencyCheckMarkOnly() {
+        return consistencyCheck.value(options);
+    }
+    
     public long getBlobGcMaxAgeInSecs() {
         return blobGcMaxAgeInSecs.value(options);
     }
diff --git a/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java
index e240d25189..63def135d0 100644
--- a/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java
+++ b/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java
@@ -348,9 +348,9 @@ public class DataStoreCommandTest {
 
         File dump = temporaryFolder.newFolder();
         List<String> argsList = Lists
-            .newArrayList("--check-consistency", storeFixture.getConnectionString(),
+            .newArrayList(storeFixture.getConnectionString(),
                 "--out-dir", dump.getAbsolutePath(), "--reset-log-config", "false", "--work-dir",
-                temporaryFolder.newFolder().getAbsolutePath());
+                temporaryFolder.newFolder().getAbsolutePath(), "--check-consistency");
         if (!Strings.isNullOrEmpty(additionalParams)) {
             argsList.add(additionalParams);
         }
@@ -364,7 +364,7 @@ public class DataStoreCommandTest {
         Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
         storeFixture.close();
 
-        testConsistency(dump, data, false);
+        testConsistency(dump, data, false, false);
     }
 
     @Test
@@ -373,7 +373,7 @@ public class DataStoreCommandTest {
         Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
         storeFixture.close();
 
-        testConsistency(dump, data, true);
+        testConsistency(dump, data, true, false);
     }
 
     @Test
@@ -553,7 +553,16 @@ public class DataStoreCommandTest {
         Data data = prepareData(storeFixture, blobFixture, 10, 5, 0);
         storeFixture.close();
 
-        testConsistency(dump, data, false);
+        testConsistency(dump, data, false, false);
+    }
+
+    @Test
+    public void testConsistencyMarkOnly() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        Data data = prepareData(storeFixture, blobFixture, 10, 5, 0);
+        storeFixture.close();
+
+        testConsistency(dump, data, false, false, true);
     }
 
     @Test
@@ -714,13 +723,13 @@ public class DataStoreCommandTest {
     }
 
 
-    private void testConsistency(File dump, Data data, boolean verbose) throws Exception {
-        testConsistency(dump, data, verbose, false);
+    private void testConsistency(File dump, Data data, boolean verbose, boolean verboseRootPath) throws Exception {
+        testConsistency(dump, data, verbose, verboseRootPath, false);
     }
 
-    private void testConsistency(File dump, Data data, boolean verbose, boolean verboseRootPath) throws Exception {
+    private void testConsistency(File dump, Data data, boolean verbose, boolean verboseRootPath, boolean markOnly) throws Exception {
         List<String> argsList = Lists
-            .newArrayList("--check-consistency", "--" + getOption(blobFixture.getType()), blobFixture.getConfigPath(),
+            .newArrayList("--check-consistency", String.valueOf(markOnly), "--" + getOption(blobFixture.getType()), blobFixture.getConfigPath(),
                 storeFixture.getConnectionString(), "--out-dir", dump.getAbsolutePath(), "--work-dir",
                 temporaryFolder.newFolder().getAbsolutePath());
         if (!Strings.isNullOrEmpty(additionalParams)) {
@@ -732,8 +741,12 @@ public class DataStoreCommandTest {
         }
         DataStoreCommand cmd = new DataStoreCommand();
         cmd.execute(argsList.toArray(new String[0]));
-
-        assertFileEquals(dump, "avail-", Sets.difference(data.added, data.missingDataStore));
+        
+        if (!markOnly) {
+            assertFileEquals(dump, "avail-", Sets.difference(data.added, data.missingDataStore));
+        } else {
+            assertFileNull(dump, "avail-");
+        }
 
         // Verbose would have paths as well as ids changed but normally only DocumentNS would have paths suffixed
         assertFileEquals(dump, "marked-", verbose ?
@@ -742,14 +755,18 @@ public class DataStoreCommandTest {
                 (storeFixture instanceof StoreFixture.MongoStoreFixture) ?
                         encodedIdsAndPath(Sets.difference(data.added, data.deleted), blobFixture.getType(), data.idToPath, false) :
                         Sets.difference(data.added, data.deleted));
-
-        // Verbose would have paths as well as ids changed but normally only DocumentNS would have paths suffixed
-        assertFileEquals(dump, "gccand-", verbose ?
-            encodedIdsAndPath(verboseRootPath ? Sets.intersection(data.addedSubset, data.missingDataStore) :
-                    data.missingDataStore, blobFixture.getType(), data.idToPath, true) :
-            (storeFixture instanceof StoreFixture.MongoStoreFixture) ?
-                encodedIdsAndPath(data.missingDataStore, blobFixture.getType(), data.idToPath, false) :
-                data.missingDataStore);
+        
+        if (!markOnly) {
+            // Verbose would have paths as well as ids changed but normally only DocumentNS would have paths suffixed
+            assertFileEquals(dump, "gccand-", verbose ?
+                    encodedIdsAndPath(verboseRootPath ? Sets.intersection(data.addedSubset, data.missingDataStore) :
+                            data.missingDataStore, blobFixture.getType(), data.idToPath, true) :
+                    (storeFixture instanceof StoreFixture.MongoStoreFixture) ?
+                            encodedIdsAndPath(data.missingDataStore, blobFixture.getType(), data.idToPath, false) :
+                            data.missingDataStore);
+        } else {
+            assertFileNull(dump, "gccand-");
+        }
     }
 
     private void testDumpRef(File dump, Data data, boolean verbose, boolean verboseRootPath) throws Exception {