You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2018/07/27 10:53:53 UTC

svn commit: r1836804 - in /jackrabbit/oak/trunk: oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/ oak-run/ oak-run/src/main/java/org/apache/jackrabbit/oak/run/ oak-run/src/main/resources/ oak-run/src/test/java/org/apache/jackrabbi...

Author: amitj
Date: Fri Jul 27 10:53:53 2018
New Revision: 1836804

URL: http://svn.apache.org/viewvc?rev=1836804&view=rev
Log:
OAK-7649: oak-run command to execute datastore garbage collection

- Enable 'datastore' command with operations --check-consistency & --collect-garbage

Added:
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java   (with props)
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java   (with props)
    jackrabbit/oak/trunk/oak-run/src/main/resources/logback-datastore.xml   (with props)
    jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java   (with props)
Modified:
    jackrabbit/oak/trunk/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java
    jackrabbit/oak/trunk/oak-run/pom.xml
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/AvailableModes.java

Modified: jackrabbit/oak/trunk/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java?rev=1836804&r1=1836803&r2=1836804&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java (original)
+++ jackrabbit/oak/trunk/oak-blob-plugins/src/test/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCTest.java Fri Jul 27 10:53:53 2018
@@ -377,7 +377,7 @@ public class BlobGCTest {
     /**
      * MemoryNodeStore extension which created blobs in the in-memory blob store
      */
-    static class MemoryBlobStoreNodeStore extends MemoryNodeStore {
+    public static class MemoryBlobStoreNodeStore extends MemoryNodeStore {
         private final BlobStore blobStore;
         Set<String> referencedBlobs;
 

Modified: jackrabbit/oak/trunk/oak-run/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/pom.xml?rev=1836804&r1=1836803&r2=1836804&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-run/pom.xml Fri Jul 27 10:53:53 2018
@@ -381,6 +381,13 @@
     </dependency>
     <dependency>
       <groupId>org.apache.jackrabbit</groupId>
+      <artifactId>oak-commons</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.jackrabbit</groupId>
       <artifactId>oak-blob-plugins</artifactId>
       <version>${project.version}</version>
       <type>test-jar</type>

Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/AvailableModes.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/AvailableModes.java?rev=1836804&r1=1836803&r2=1836804&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/AvailableModes.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/AvailableModes.java Fri Jul 27 10:53:53 2018
@@ -58,5 +58,6 @@ public final class AvailableModes {
             .put(NodeStateExportCommand.NAME, new NodeStateExportCommand())
             .put(IOTraceCommand.NAME, new IOTraceCommand())
             .put("server", new ServerCommand())
+            .put(DataStoreCommand.NAME, new DataStoreCommand())
             .build());
 }

Added: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java?rev=1836804&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java (added)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java Fri Jul 27 10:53:53 2018
@@ -0,0 +1,349 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.run;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+import com.google.common.base.Function;
+import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
+import com.google.common.io.Closer;
+import joptsimple.OptionParser;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.filefilter.FileFilterUtils;
+import org.apache.jackrabbit.oak.api.Blob;
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.commons.FileIOUtils;
+import org.apache.jackrabbit.oak.commons.FileIOUtils.BurnOnCloseFileIterator;
+import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.commons.concurrent.ExecutorCloser;
+import org.apache.jackrabbit.oak.commons.sort.EscapeUtils;
+import org.apache.jackrabbit.oak.plugins.blob.BlobReferenceRetriever;
+import org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector;
+import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
+import org.apache.jackrabbit.oak.plugins.document.DocumentBlobReferenceRetriever;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.run.cli.BlobStoreOptions;
+import org.apache.jackrabbit.oak.run.cli.CommonOptions;
+import org.apache.jackrabbit.oak.run.cli.NodeStoreFixture;
+import org.apache.jackrabbit.oak.run.cli.NodeStoreFixtureProvider;
+import org.apache.jackrabbit.oak.run.cli.Options;
+import org.apache.jackrabbit.oak.run.commons.Command;
+import org.apache.jackrabbit.oak.run.commons.LoggingInitializer;
+import org.apache.jackrabbit.oak.segment.SegmentBlobReferenceRetriever;
+import org.apache.jackrabbit.oak.segment.file.FileStore;
+import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
+import org.apache.jackrabbit.oak.spi.cluster.ClusterRepositoryInfo;
+import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.apache.jackrabbit.oak.stats.StatisticsProvider;
+import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.StandardSystemProperty.FILE_SEPARATOR;
+import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.AZURE;
+import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.FAKE;
+import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.FDS;
+import static org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type.S3;
+import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.getService;
+
+/**
+ * Command to check data store consistency and also optionally retrieve ids
+ * and references.
+ */
+public class DataStoreCommand implements Command {
+    private static final Logger log = LoggerFactory.getLogger(DataStoreCommand.class);
+
+    public static final String NAME = "datastore";
+    private static final String summary = "Provides DataStore management operations";
+
+    private Options opts;
+    private DataStoreOptions dataStoreOpts;
+
+    @Override
+    public void execute(String... args) throws Exception {
+        OptionParser parser = new OptionParser();
+
+        opts = new Options();
+        opts.setCommandName(NAME);
+        opts.setSummary(summary);
+        opts.setConnectionString(CommonOptions.DEFAULT_CONNECTION_STRING);
+        opts.registerOptionsFactory(DataStoreOptions.FACTORY);
+        opts.parseAndConfigure(parser, args);
+
+        dataStoreOpts = opts.getOptionBean(DataStoreOptions.class);
+        logCliArgs(args);
+
+        //Clean up before setting up NodeStore as the temp
+        //directory might be used by NodeStore for cache stuff like persistentCache
+        setupDirectories(dataStoreOpts);
+        setupLogging(dataStoreOpts);
+
+
+        boolean success = false;
+        try (Closer closer = Closer.create()) {
+            NodeStoreFixture fixture = NodeStoreFixtureProvider.create(opts);
+            closer.register(fixture);
+
+            if (!checkParameters(dataStoreOpts, opts, fixture, parser)) {
+                return;
+            }
+            execute(fixture, dataStoreOpts, opts, closer);
+            success = true;
+        } catch (Throwable e) {
+            log.error("Error occurred while performing datastore operation", e);
+            e.printStackTrace(System.err);
+        } finally {
+            shutdownLogging();
+        }
+
+        if (!success) {
+            System.exit(1);
+        }
+    }
+
+    private static boolean checkParameters(DataStoreOptions dataStoreOpts, Options opts, NodeStoreFixture fixture,
+        OptionParser parser) throws IOException {
+
+        if (!dataStoreOpts.anyActionSelected()) {
+            log.info("No actions specified");
+            parser.printHelpOn(System.out);
+            return false;
+        } else if (fixture.getStore() == null) {
+            log.info("No NodeStore specified");
+            parser.printHelpOn(System.out);
+            return false;
+        } else if (!opts.getCommonOpts().isDocument() && fixture.getBlobStore() == null) {
+            log.info("No BlobStore specified");
+            parser.printHelpOn(System.out);
+            return false;
+        }
+        return true;
+    }
+
+    private void execute(NodeStoreFixture fixture,  DataStoreOptions dataStoreOpts, Options opts, Closer closer)
+        throws Exception {
+        MarkSweepGarbageCollector collector = getCollector(fixture, dataStoreOpts, opts, closer);
+        if (dataStoreOpts.checkConsistency()) {
+            long missing = collector.checkConsistency();
+            log.warn("Found {} missing blobs", missing);
+
+            if (dataStoreOpts.isVerbose()) {
+                new VerboseIdLogger(opts).log();
+            }
+        } else if (dataStoreOpts.collectGarbage()) {
+            collector.collectGarbage(dataStoreOpts.markOnly());
+        }
+    }
+
+    private static void setupDirectories(DataStoreOptions opts) throws IOException {
+        if (opts.getOutDir().exists()) {
+            FileUtils.cleanDirectory(opts.getOutDir());
+        }
+        FileUtils.cleanDirectory(opts.getWorkDir());
+    }
+
+    private static MarkSweepGarbageCollector getCollector(NodeStoreFixture fixture, DataStoreOptions dataStoreOpts,
+        Options opts, Closer closer) throws IOException {
+
+        BlobReferenceRetriever retriever;
+        if (opts.getCommonOpts().isDocument()) {
+            retriever = new DocumentBlobReferenceRetriever((DocumentNodeStore) fixture.getStore());
+        } else {
+            if (dataStoreOpts.isVerbose()) {
+                retriever = new NodeTraverserReferenceRetriever(fixture.getStore());
+            } else {
+                FileStore fileStore = getService(fixture.getWhiteboard(), FileStore.class);
+                retriever = new SegmentBlobReferenceRetriever(fileStore);
+            }
+        }
+
+        ExecutorService service = Executors.newSingleThreadExecutor();
+        closer.register(new ExecutorCloser(service));
+
+        String repositoryId = ClusterRepositoryInfo.getId(fixture.getStore());
+        checkNotNull(repositoryId);
+
+        MarkSweepGarbageCollector collector =
+            new MarkSweepGarbageCollector(retriever, (GarbageCollectableBlobStore) fixture.getBlobStore(), service,
+                dataStoreOpts.getOutDir().getAbsolutePath(), dataStoreOpts.getBatchCount(),
+                SECONDS.toMillis(dataStoreOpts.getBlobGcMaxAgeInSecs()), repositoryId, fixture.getWhiteboard(),
+                getService(fixture.getWhiteboard(), StatisticsProvider.class));
+        collector.setTraceOutput(true);
+
+        return collector;
+    }
+
+    protected static void setupLogging(DataStoreOptions dataStoreOpts) throws IOException {
+        new LoggingInitializer(dataStoreOpts.getWorkDir(), NAME, dataStoreOpts.isResetLoggingConfig()).init();
+    }
+
+    private static void shutdownLogging() {
+        LoggingInitializer.shutdownLogging();
+    }
+
+    private static void logCliArgs(String[] args) {
+        log.info("Command line arguments used for datastore command [{}]", Joiner.on(' ').join(args));
+        List<String> inputArgs = ManagementFactory.getRuntimeMXBean().getInputArguments();
+        if (!inputArgs.isEmpty()) {
+            log.info("System properties and vm options passed {}", inputArgs);
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        DataStoreCommand cmd = new DataStoreCommand();
+        cmd.execute(
+            "--fds", "/Users/amjain/installs/org.apache.jackrabbit.oak.plugins.blob.datastore.FileDataStore.config",
+            "--read-write",
+            "/Users/amjain/installs/cq640/crx-quickstart/repository/segmentstore");
+    }
+
+    /**
+     * {@link BlobReferenceRetriever} instance which iterates over the whole node store to find
+     * blobs being referred. Useful when path of those blobs needed and the underlying {@link NodeStore}
+     * native implementation does not provide that.
+     */
+    static class NodeTraverserReferenceRetriever implements BlobReferenceRetriever {
+        private final NodeStore nodeStore;
+
+        public NodeTraverserReferenceRetriever(NodeStore nodeStore) {
+            this.nodeStore = nodeStore;
+        }
+
+        private void binaryProperties(NodeState state, String path, ReferenceCollector collector) {
+            for (PropertyState p : state.getProperties()) {
+                String propPath = path;//PathUtils.concat(path, p.getName());
+                if (p.getType() == Type.BINARY) {
+                    String blobId = p.getValue(Type.BINARY).getContentIdentity();
+                    if (blobId != null) {
+                        collector.addReference(blobId, propPath);
+                    }
+                } else if (p.getType() == Type.BINARIES && p.count() > 0) {
+                    Iterator<Blob> iterator = p.getValue(Type.BINARIES).iterator();
+                    while (iterator.hasNext()) {
+                        String blobId = iterator.next().getContentIdentity();
+                        if (blobId != null) {
+                            collector.addReference(blobId, propPath);
+                        }
+                    }
+                }
+            }
+        }
+
+        private void traverseChildren(NodeState state, String path, ReferenceCollector collector) {
+            binaryProperties(state, path, collector);
+            for (ChildNodeEntry c : state.getChildNodeEntries()) {
+                traverseChildren(c.getNodeState(), PathUtils.concat(path, c.getName()), collector);
+            }
+        }
+
+
+
+        @Override public void collectReferences(ReferenceCollector collector) throws IOException {
+            log.info("Starting dump of blob references by traversing");
+            traverseChildren(nodeStore.getRoot(), "/", collector);
+        }
+    }
+
+    static class VerboseIdLogger {
+        static final String DELIM = ",";
+        static final String DASH = "-";
+        static final String HASH = "#";
+        static final Comparator<String> idComparator = new Comparator<String>() {
+            @Override
+            public int compare(String s1, String s2) {
+                return s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]);
+            }
+        };
+        private final static Joiner delimJoiner = Joiner.on(DELIM).skipNulls();
+        private final static Splitter delimSplitter = Splitter.on(DELIM).trimResults().omitEmptyStrings();
+
+        private final BlobStoreOptions optionBean;
+        private final BlobStoreOptions.Type blobStoreType;
+        private final File outDir;
+        private final File outFile;
+
+        public VerboseIdLogger(Options options) {
+            this.optionBean = options.getOptionBean(BlobStoreOptions.class);
+            this.blobStoreType = optionBean.getBlobStoreType();
+            outDir = options.getOptionBean(DataStoreOptions.class).getOutDir();
+
+            outFile = filterFiles(outDir, "gccand-");
+            if (outFile == null) {
+                throw new IllegalArgumentException("No candidate file found");
+            }
+        }
+
+        @Nullable
+        static File filterFiles(File outDir, String prefix) {
+            List<File> subDirs = FileFilterUtils.filterList(FileFilterUtils
+                    .and(FileFilterUtils.prefixFileFilter("gcworkdir-"), FileFilterUtils.directoryFileFilter()),
+                outDir.listFiles());
+
+            if (subDirs != null && !subDirs.isEmpty()) {
+                File workDir = subDirs.get(0);
+                List<File> outFiles = FileFilterUtils.filterList(FileFilterUtils.prefixFileFilter(prefix), workDir.listFiles());
+
+                if (outFiles != null && !outFiles.isEmpty()) {
+                    return outFiles.get(0);
+                }
+            }
+
+            return null;
+        }
+
+        static String encodeId(String line, BlobStoreOptions.Type dsType) {
+            List<String> list = delimSplitter.splitToList(line);
+
+            String id = list.get(0);
+            List<String> idLengthSepList = Splitter.on(HASH).trimResults().omitEmptyStrings().splitToList(id);
+            String blobId = idLengthSepList.get(0);
+
+            if (dsType == FAKE || dsType == FDS) {
+                blobId = (blobId.substring(0, 2) + FILE_SEPARATOR.value() + blobId.substring(2, 4) + FILE_SEPARATOR.value() + blobId
+                    .substring(4, 6) + FILE_SEPARATOR.value() + blobId);
+            } else if (dsType == S3 || dsType == AZURE) {
+                blobId = (blobId.substring(0, 4) + DASH + blobId.substring(4));
+            }
+            return delimJoiner.join(blobId, EscapeUtils.unescapeLineBreaks(list.get(1)));
+        }
+
+        public void log() throws IOException {
+            File tempFile = new File(outDir, outFile.getName() + "-temp");
+            FileUtils.moveFile(outFile, tempFile);
+            try (BurnOnCloseFileIterator iterator =
+                    new BurnOnCloseFileIterator(FileUtils.lineIterator(tempFile, UTF_8.toString()), tempFile,
+                        (Function<String, String>) input -> encodeId(input, blobStoreType))) {
+                FileIOUtils.writeStrings(iterator, outFile, true, log, "Transformed to verbose ids - ");
+            }
+        }
+    }
+}
+

Propchange: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java?rev=1836804&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java (added)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java Fri Jul 27 10:53:53 2018
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.run;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import com.google.common.collect.ImmutableSet;
+import joptsimple.OptionParser;
+import joptsimple.OptionSet;
+import joptsimple.OptionSpec;
+import org.apache.commons.io.FileUtils;
+import org.apache.jackrabbit.oak.run.cli.OptionsBean;
+import org.apache.jackrabbit.oak.run.cli.OptionsBeanFactory;
+
+public class DataStoreOptions implements OptionsBean {
+
+    public static final OptionsBeanFactory FACTORY = DataStoreOptions::new;
+
+    private final OptionSpec<File> workDirOpt;
+    private final OptionSpec<File> outputDirOpt;
+    private final OptionSpec<Boolean> collectGarbage;
+    private final OptionSpec<Void> consistencyCheck;
+    private final OptionSpec<Integer> batchCount;
+    private OptionSet options;
+    private final Set<OptionSpec> actionOpts;
+    private final Set<String> operationNames;
+    private final OptionSpec<Long> blobGcMaxAgeInSecs;
+    private final OptionSpec<Void> verbose;
+    private final OptionSpec<Boolean> resetLoggingConfig;
+
+    public DataStoreOptions(OptionParser parser) {
+        collectGarbage = parser.accepts("collect-garbage",
+            "Performs DataStore Garbage Collection on the repository/datastore defined. An option boolean specifying "
+                + "'markOnly' required if only mark phase of garbage collection is to be executed")
+            .withOptionalArg().ofType(Boolean.class).defaultsTo(Boolean.FALSE);
+
+        consistencyCheck =
+            parser.accepts("check-consistency", "Performs a consistency check on the repository/datastore defined");
+
+        blobGcMaxAgeInSecs = parser.accepts("max-age", "")
+            .withRequiredArg().ofType(Long.class).defaultsTo(86400L);
+        batchCount = parser.accepts("batch", "Batch count")
+            .withRequiredArg().ofType(Integer.class).defaultsTo(2048);
+
+        workDirOpt = parser.accepts("work-dir", "Directory used for storing temporary files")
+            .withRequiredArg().ofType(File.class).defaultsTo(new File("temp"));
+        outputDirOpt = parser.accepts("out-dir", "Directory for storing output files")
+            .withRequiredArg().ofType(File.class).defaultsTo(new File("datastore-out"));
+
+        verbose =
+            parser.accepts("verbose", "Option to get all the paths and implementation specific blob ids");
+
+        resetLoggingConfig =
+            parser.accepts("reset-log-config", "Reset logging config for testing purposes only").withOptionalArg()
+                .ofType(Boolean.class).defaultsTo(Boolean.TRUE);
+
+        //Set of options which define action
+        actionOpts = ImmutableSet.of(collectGarbage, consistencyCheck);
+        operationNames = collectionOperationNames(actionOpts);
+    }
+
+    @Override
+    public void configure(OptionSet options) {
+        this.options = options;
+    }
+
+    @Override
+    public String title() {
+        return "";
+    }
+
+    @Override
+    public String description() {
+        return "The datastore command supports the following operations.";
+    }
+
+    @Override
+    public int order() {
+        return 50;
+    }
+
+    @Override
+    public Set<String> operationNames() {
+        return operationNames;
+    }
+
+    public boolean anyActionSelected() {
+        for (OptionSpec spec : actionOpts) {
+            if (options.has(spec)){
+                return true;
+            }
+        }
+        return false;
+    }
+
+    public File getWorkDir() throws IOException {
+        File workDir = workDirOpt.value(options);
+        FileUtils.forceMkdir(workDir);
+        return workDir;
+    }
+
+    public File getOutDir() {
+        return outputDirOpt.value(options);
+    }
+
+    public boolean collectGarbage() {
+        return options.has(collectGarbage);
+    }
+
+    public boolean checkConsistency(){
+        return options.has(consistencyCheck);
+    }
+
+    public boolean markOnly() {
+        return collectGarbage.value(options);
+    }
+
+    public long getBlobGcMaxAgeInSecs() {
+        return blobGcMaxAgeInSecs.value(options);
+    }
+
+    public int getBatchCount() {
+        return batchCount.value(options);
+    }
+
+    public boolean isVerbose() {
+        return options.has(verbose);
+    }
+
+    public boolean isResetLoggingConfig() {
+        return resetLoggingConfig.value(options);
+    }
+
+    private static Set<String> collectionOperationNames(Set<OptionSpec> actionOpts) {
+        Set<String> result = new HashSet<>();
+        for (OptionSpec spec : actionOpts){
+            result.addAll(spec.options());
+        }
+        return result;
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-run/src/main/resources/logback-datastore.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/resources/logback-datastore.xml?rev=1836804&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/resources/logback-datastore.xml (added)
+++ jackrabbit/oak/trunk/oak-run/src/main/resources/logback-datastore.xml Fri Jul 27 10:53:53 2018
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.
+  -->
+<configuration scan="true" scanPeriod="1 second">
+
+  <appender name="datastore" class="ch.qos.logback.core.FileAppender">
+    <file>${oak.workDir}/datastore.log</file>
+    <encoder>
+      <pattern>%d %-5level [%thread] %logger{30} %marker- %msg %n</pattern>
+    </encoder>
+  </appender>
+
+  <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+    <target>System.out</target>
+    <encoder>
+      <pattern>%d{HH:mm:ss} - %msg%n</pattern>
+    </encoder>
+  </appender>
+
+  <!-- For datastore operations -->
+  <logger name="org.apache.jackrabbit.oak.blob" level="INFO">
+    <appender-ref ref="STDOUT" />
+  </logger>
+  <logger name="org.apache.jackrabbit.oak.plugins.blob" level="INFO">
+    <appender-ref ref="STDOUT" />
+  </logger>
+  <logger name="org.apache.jackrabbit.core.data" level="INFO">
+    <appender-ref ref="STDOUT" />
+  </logger>
+
+  <root level="INFO">
+    <appender-ref ref="datastore" />
+  </root>
+
+</configuration>

Propchange: jackrabbit/oak/trunk/oak-run/src/main/resources/logback-datastore.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java?rev=1836804&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java (added)
+++ jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java Fri Jul 27 10:53:53 2018
@@ -0,0 +1,905 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.run;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import ch.qos.logback.classic.Level;
+import com.google.common.base.Function;
+import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
+import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.apache.commons.io.FileUtils;
+import org.apache.felix.cm.file.ConfigurationHandler;
+import org.apache.jackrabbit.core.data.DataStore;
+import org.apache.jackrabbit.core.data.DataStoreException;
+import org.apache.jackrabbit.oak.api.Blob;
+import org.apache.jackrabbit.oak.api.CommitFailedException;
+import org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.AzureConstants;
+import org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.AzureDataStoreUtils;
+import org.apache.jackrabbit.oak.blob.cloud.s3.S3Constants;
+import org.apache.jackrabbit.oak.blob.cloud.s3.S3DataStoreUtils;
+import org.apache.jackrabbit.oak.commons.FileIOUtils;
+import org.apache.jackrabbit.oak.commons.junit.LogCustomizer;
+import org.apache.jackrabbit.oak.plugins.blob.BlobGCTest.MemoryBlobStoreNodeStore;
+import org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.OakFileDataStore;
+import org.apache.jackrabbit.oak.plugins.document.DocumentMK;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.document.MongoUtils;
+import org.apache.jackrabbit.oak.plugins.document.Revision;
+import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector;
+import org.apache.jackrabbit.oak.plugins.document.util.MongoConnection;
+import org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type;
+import org.apache.jackrabbit.oak.segment.SegmentNodeStore;
+import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders;
+import org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions;
+import org.apache.jackrabbit.oak.segment.file.FileStore;
+import org.apache.jackrabbit.oak.segment.file.FileStoreBuilder;
+import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
+import org.apache.jackrabbit.oak.spi.cluster.ClusterRepositoryInfo;
+import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
+import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
+import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.apache.jackrabbit.oak.stats.Clock;
+import org.jetbrains.annotations.Nullable;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static com.google.common.base.StandardSystemProperty.FILE_SEPARATOR;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
+import static org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils.SharedStoreRecordType.REFERENCES;
+import static org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils.SharedStoreRecordType.REPOSITORY;
+import static org.apache.jackrabbit.oak.run.DataStoreCommand.VerboseIdLogger.DASH;
+import static org.apache.jackrabbit.oak.run.DataStoreCommand.VerboseIdLogger.HASH;
+import static org.apache.jackrabbit.oak.run.DataStoreCommand.VerboseIdLogger.filterFiles;
+import static org.apache.jackrabbit.oak.segment.compaction.SegmentGCOptions.defaultGCOptions;
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests for {@link DataStoreCommand}
+ */
+@RunWith(Parameterized.class)
+public class DataStoreCommandTest {
+    private static Logger log = LoggerFactory.getLogger(DataStoreCommandTest.class);
+
+    @Rule
+    public final TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target"));
+
+    private DataStoreFixture blobFixture;
+
+    private StoreFixture storeFixture;
+
+    private String additionalParams;
+
+    private DataStoreBlobStore setupDataStore;
+
+    private NodeStore store;
+
+    public DataStoreCommandTest(StoreFixture storeFixture, DataStoreFixture blobFixture) {
+        this.storeFixture = storeFixture;
+        this.blobFixture = blobFixture;
+    }
+
+    @Parameterized.Parameters(name="{index}: ({0} : {1})")
+    public static List<Object[]> fixtures() {
+        return FixtureHelper.get();
+    }
+
+    @Before
+    public void setup() throws Exception {
+        setupDataStore = blobFixture.init(temporaryFolder);
+        store = storeFixture.init(setupDataStore, temporaryFolder.newFolder());
+        if (store instanceof SegmentNodeStore) {
+            additionalParams = "--read-write";
+        } else {
+            additionalParams = "--ds-read-write";
+        }
+        String repoId = ClusterRepositoryInfo.getOrCreateId(store);
+        setupDataStore.addMetadataRecord(new ByteArrayInputStream(new byte[0]),
+            REPOSITORY.getNameFromId(repoId));
+    }
+
+    private static Data prepareData(StoreFixture storeFixture, DataStoreFixture blobFixture, int numBlobs,
+        int numMaxDeletions, int missingDataStore) throws Exception {
+
+        DataStoreBlobStore blobStore = blobFixture.getDataStore();
+        NodeStore store = storeFixture.getNodeStore();
+        storeFixture.preDataPrepare();
+
+        Data data = new Data();
+
+        List<Integer> toBeDeleted = Lists.newArrayList();
+        Random rand = new Random();
+        for (int i = 0; i < numMaxDeletions; i++) {
+            int n = rand.nextInt(numBlobs);
+            if (!toBeDeleted.contains(n)) {
+                toBeDeleted.add(n);
+            }
+        }
+
+        NodeBuilder a = store.getRoot().builder();
+        for (int i = 0; i < numBlobs; i++) {
+            Blob b = store.createBlob(randomStream(i, 18342));
+            Iterator<String> idIter = blobStore.resolveChunks(b.getContentIdentity());
+            while (idIter.hasNext()) {
+                String chunk = idIter.next();
+                data.added.add(chunk);
+                data.idToPath.put(chunk, "/c" + i);
+                if (toBeDeleted.contains(i)) {
+                    data.deleted.add(chunk);
+                }
+            }
+            a.child("c" + i).setProperty("x", b);
+        }
+
+        store.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        log.info("Created Data : {}", data);
+
+        for (int id : toBeDeleted) {
+            delete("c" + id, store);
+        }
+        log.info("Deleted nodes : {}", toBeDeleted.size());
+
+        int missing = 0;
+        Iterator<String> iterator = data.added.iterator();
+        while (iterator.hasNext()) {
+            if (missing < missingDataStore) {
+                String id = iterator.next();
+                if (!data.deleted.contains(id)) {
+                    data.missingDataStore.add(id);
+                    missing++;
+                }
+            } else {
+                break;
+            }
+        }
+
+        for (String id : data.missingDataStore) {
+            long count = blobStore.countDeleteChunks(ImmutableList.of(id), 0);
+            assertEquals(1, count);
+        }
+
+        // Sleep a little to make eligible for cleanup
+        TimeUnit.MILLISECONDS.sleep(10);
+
+        storeFixture.postDataPrepare();
+
+        return data;
+    }
+
+    protected static void delete(String nodeId, NodeStore nodeStore) throws CommitFailedException {
+        NodeBuilder builder = nodeStore.getRoot().builder();
+        builder.child(nodeId).remove();
+
+        nodeStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+    }
+
+    @After
+    public void tearDown() {
+        storeFixture.after();
+        blobFixture.after();
+    }
+
+    @Test
+    public void testMissingOpParams() throws Exception {
+        storeFixture.close();
+        File dump = temporaryFolder.newFolder();
+        List<String> argsList = Lists
+            .newArrayList("--" + getOption(blobFixture.getType()), blobFixture.getConfigPath(), "--out-dir",
+                dump.getAbsolutePath(), storeFixture.getConnectionString(), "--reset-log-config", "false", "--work-dir",
+                temporaryFolder.newFolder().getAbsolutePath());
+        if (!Strings.isNullOrEmpty(additionalParams)) {
+            argsList.add(additionalParams);
+        }
+
+        log.info("Running testMissingOpParams: {}", argsList);
+        testIncorrectParams(argsList, Lists.newArrayList("No actions specified"), DataStoreCommand.class);
+    }
+
+    /**
+     * Only for Segment/Tar
+     * @throws Exception
+     */
+    @Test
+    public void testTarNoDS() throws Exception {
+        storeFixture.close();
+        Assume.assumeTrue(storeFixture instanceof StoreFixture.SegmentStoreFixture);
+
+        File dump = temporaryFolder.newFolder();
+        List<String> argsList = Lists
+            .newArrayList("--check-consistency", storeFixture.getConnectionString(),
+                "--out-dir", dump.getAbsolutePath(), "--reset-log-config", "false", "--work-dir",
+                temporaryFolder.newFolder().getAbsolutePath());
+        if (!Strings.isNullOrEmpty(additionalParams)) {
+            argsList.add(additionalParams);
+        }
+
+        testIncorrectParams(argsList, Lists.newArrayList("No BlobStore specified"), DataStoreCommand.class);
+    }
+
+    @Test
+    public void testConsistencyMissing() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
+        storeFixture.close();
+
+        testConsistency(dump, data, false);
+    }
+
+    @Test
+    public void testConsistencyVerbose() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
+        storeFixture.close();
+
+        testConsistency(dump, data, true);
+    }
+
+    @Test
+    public void testConsistencyNoMissing() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        Data data = prepareData(storeFixture, blobFixture, 10, 5, 0);
+        storeFixture.close();
+
+        testConsistency(dump, data, false);
+    }
+
+
+    @Test
+    public void gc() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
+        storeFixture.close();
+
+        testGc(dump, data, 0, false);
+    }
+
+    @Test
+    public void gcNoDeletion() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        Data data = prepareData(storeFixture, blobFixture, 10, 0, 1);
+        storeFixture.close();
+
+        testGc(dump, data, 0, false);
+    }
+
+    @Test
+    public void gcNoneOld() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
+        storeFixture.close();
+
+        testGc(dump, data, 100, false);
+    }
+
+    @Test
+    public void gcOnlyMark() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
+        storeFixture.close();
+
+        testGc(dump, data, 100, true);
+    }
+
+    @Test
+    public void gcMarkOnRemote() throws Exception {
+        MemoryBlobStoreNodeStore memNodeStore = new MemoryBlobStoreNodeStore(setupDataStore);
+        String rep2Id = ClusterRepositoryInfo.getOrCreateId(memNodeStore);
+        setupDataStore.addMetadataRecord(new ByteArrayInputStream(new byte[0]),
+            REPOSITORY.getNameFromId(rep2Id));
+        Map<String, String> idMapping =
+            dummyData(memNodeStore, rep2Id, store, setupDataStore, temporaryFolder.newFile());
+
+        File dump = temporaryFolder.newFolder();
+        Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
+        data.added.addAll(idMapping.keySet());
+        data.idToPath.putAll(idMapping);
+
+        storeFixture.close();
+
+        testGc(dump, data, 0, false);
+    }
+
+    @Test
+    public void gcNoMarkOnRemote() throws Exception {
+        MemoryBlobStoreNodeStore memNodeStore = new MemoryBlobStoreNodeStore(setupDataStore);
+        String rep2Id = ClusterRepositoryInfo.getOrCreateId(memNodeStore);
+        setupDataStore.addMetadataRecord(new ByteArrayInputStream(new byte[0]),
+            REPOSITORY.getNameFromId(rep2Id));
+
+        File dump = temporaryFolder.newFolder();
+        Data data = prepareData(storeFixture, blobFixture, 10, 5, 1);
+        storeFixture.close();
+
+        List<String> argsList = Lists
+            .newArrayList("--collect-garbage", "--max-age", String.valueOf(0), "--" + getOption(blobFixture.getType()),
+                blobFixture.getConfigPath(), storeFixture.getConnectionString(), "--out-dir", dump.getAbsolutePath(),
+                "--reset-log-config", "false", "--work-dir", temporaryFolder.newFolder().getAbsolutePath());
+        if (!Strings.isNullOrEmpty(additionalParams)) {
+            argsList.add(additionalParams);
+        }
+
+        testIncorrectParams(argsList, Lists.newArrayList("Not all repositories have marked references available : "),
+            MarkSweepGarbageCollector.class);
+    }
+
+    /**
+     * Only for Segment/Tar
+     * @throws Exception
+     */
+    @Test
+    public void testConsistencyFakeDS() throws Exception {
+        Assume.assumeTrue(storeFixture instanceof StoreFixture.SegmentStoreFixture);
+        File dump = temporaryFolder.newFolder();
+        File dsPath = temporaryFolder.newFolder();
+
+        Data data = prepareData(storeFixture, blobFixture, 10, 5, 0);
+        storeFixture.close();
+
+        List<String> argsList = Lists
+            .newArrayList("--check-consistency", "--fake-ds-path", dsPath.getAbsolutePath(),
+                storeFixture.getConnectionString(), "--out-dir", dump.getAbsolutePath(), "--work-dir",
+                temporaryFolder.newFolder().getAbsolutePath());
+        if (!Strings.isNullOrEmpty(additionalParams)) {
+            argsList.add(additionalParams);
+        }
+        DataStoreCommand cmd = new DataStoreCommand();
+
+        cmd.execute(argsList.toArray(new String[0]));
+        assertFileEquals(dump, "avail-", Sets.newHashSet());
+        assertFileEquals(dump, "marked-", Sets.difference(data.added, data.deleted));
+    }
+
+    private void testConsistency(File dump, Data data, boolean verbose) throws Exception {
+        List<String> argsList = Lists
+            .newArrayList("--check-consistency", "--" + getOption(blobFixture.getType()), blobFixture.getConfigPath(),
+                storeFixture.getConnectionString(), "--out-dir", dump.getAbsolutePath(), "--work-dir",
+                temporaryFolder.newFolder().getAbsolutePath());
+        if (!Strings.isNullOrEmpty(additionalParams)) {
+            argsList.add(additionalParams);
+        }
+
+        if (verbose) {
+            argsList.add("--verbose");
+        }
+        DataStoreCommand cmd = new DataStoreCommand();
+        cmd.execute(argsList.toArray(new String[0]));
+
+        assertFileEquals(dump, "avail-", Sets.difference(data.added, data.missingDataStore));
+
+        // Only verbose or Document would have paths suffixed
+        assertFileEquals(dump, "marked-", (verbose || storeFixture instanceof StoreFixture.MongoStoreFixture) ?
+            encodedIdsAndPath(Sets.difference(data.added, data.deleted), blobFixture.getType(), data.idToPath, false) :
+            Sets.difference(data.added, data.deleted));
+
+        // Verbose would have paths as well as ids changed but normally only DocumentNS would have paths suffixed
+        assertFileEquals(dump, "gccand-", verbose ?
+            encodedIdsAndPath(data.missingDataStore, blobFixture.getType(), data.idToPath, true) :
+            (storeFixture instanceof StoreFixture.MongoStoreFixture) ?
+                encodedIdsAndPath(data.missingDataStore, blobFixture.getType(), data.idToPath, false) :
+                data.missingDataStore);
+    }
+
+
+    private void testGc(File dump, Data data, long maxAge, boolean markOnly) throws Exception {
+        List<String> argsList = Lists
+            .newArrayList("--collect-garbage", String.valueOf(markOnly), "--max-age", String.valueOf(maxAge),
+                "--" + getOption(blobFixture.getType()), blobFixture.getConfigPath(),
+                storeFixture.getConnectionString(), "--out-dir", dump.getAbsolutePath(), "--work-dir",
+                temporaryFolder.newFolder().getAbsolutePath());
+        if (!Strings.isNullOrEmpty(additionalParams)) {
+            argsList.add(additionalParams);
+        }
+
+        DataStoreCommand cmd = new DataStoreCommand();
+        cmd.execute(argsList.toArray(new String[0]));
+
+        if (!markOnly) {
+            assertFileEquals(dump, "avail-", Sets.difference(data.added, data.missingDataStore));
+        } else {
+            assertFileNull(dump, "avail-");
+        }
+
+        assertFileEquals(dump, "marked-", Sets.difference(data.added, data.deleted));
+        if (!markOnly) {
+            assertFileEquals(dump, "gccand-", data.deleted);
+        } else {
+            assertFileNull(dump, "gccand-");
+        }
+
+        Sets.SetView<String> blobsBeforeGc = Sets.difference(data.added, data.missingDataStore);
+        if (maxAge <= 0) {
+            assertEquals(Sets.difference(blobsBeforeGc, data.deleted), blobs(setupDataStore));
+        } else {
+            assertEquals(blobsBeforeGc, blobs(setupDataStore));
+        }
+    }
+
+    public static void testIncorrectParams(List<String> argList, ArrayList<String> assertMsg, Class logger) {
+        LogCustomizer customLogs = LogCustomizer
+            .forLogger(logger.getName())
+            .enable(Level.INFO)
+            .filter(Level.INFO)
+            .contains(assertMsg.get(0))
+            .create();
+        customLogs.starting();
+
+        DataStoreCommand cmd = new DataStoreCommand();
+        try {
+            cmd.execute(argList.toArray(new String[0]));
+        } catch (Exception e) {
+            log.error("", e);
+        }
+
+        Assert.assertNotNull(customLogs.getLogs().get(0));
+        customLogs.finished();
+    }
+
+    private static Map<String, String> dummyData(MemoryBlobStoreNodeStore memNodeStore, String rep2Id, NodeStore store,
+        DataStoreBlobStore setupDataStore, File f)
+        throws IOException, CommitFailedException, DataStoreException {
+        List<String> list = Lists.newArrayList();
+        Map<String, String> idMapping = Maps.newHashMap();
+        NodeBuilder a = memNodeStore.getRoot().builder();
+        for (int i = 0; i < 2; i++) {
+            Blob b = store.createBlob(randomStream(i+100, 18342));
+            Iterator<String> idIter = setupDataStore.resolveChunks(b.getContentIdentity());
+            while (idIter.hasNext()) {
+                String id = idIter.next();
+                list.add(id);
+                idMapping.put(id, "/d" + i);
+            }
+            a.child("d" + i).setProperty("x", b);
+        }
+        memNodeStore.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        writeStrings(list.iterator(), f, false);
+        sort(f);
+        setupDataStore.addMetadataRecord(f, REFERENCES.getNameFromId(rep2Id));
+        return idMapping;
+    }
+
+    private static void assertFileEquals(File dump, String prefix, Set<String> blobsAdded)
+        throws IOException {
+        File file = filterFiles(dump, prefix);
+        Assert.assertNotNull(file);
+        Assert.assertTrue(file.exists());
+        assertEquals(blobsAdded,
+            FileIOUtils.readStringsAsSet(new FileInputStream(file), false));
+    }
+
+    private static void assertFileNull(File dump, String prefix) {
+        File file = filterFiles(dump, prefix);
+        Assert.assertNull(file);
+    }
+
+    private static Set<String> blobs(GarbageCollectableBlobStore blobStore) throws Exception {
+        Iterator<String> cur = blobStore.getAllChunkIds(0);
+
+        Set<String> existing = Sets.newHashSet();
+        while (cur.hasNext()) {
+            existing.add(cur.next());
+        }
+        return existing;
+    }
+
+    static InputStream randomStream(int seed, int size) {
+        Random r = new Random(seed);
+        byte[] data = new byte[size];
+        r.nextBytes(data);
+        return new ByteArrayInputStream(data);
+    }
+
+    private static String createTempConfig(File cfgFile, Properties props) throws IOException {
+        FileOutputStream fos = FileUtils.openOutputStream(cfgFile);
+        ConfigurationHandler.write(fos, props);
+        return cfgFile.getAbsolutePath();
+    }
+
+    private static Set<String> encodedIdsAndPath(Set<String> ids, Type dsOption, Map<String, String> idToNodes,
+        boolean encodeId) {
+
+        return Sets.newHashSet(Iterators.transform(ids.iterator(), new Function<String, String>() {
+            @Nullable @Override public String apply(@Nullable String input) {
+                return Joiner.on(",").join(encodeId ? encodeId(input, dsOption) : input, idToNodes.get(input));
+            }
+        }));
+    }
+
+    static String encodeId(String id, Type dsType) {
+        List<String> idLengthSepList = Splitter.on(HASH).trimResults().omitEmptyStrings().splitToList(id);
+        String blobId = idLengthSepList.get(0);
+
+        if (dsType == Type.FDS) {
+            return (blobId.substring(0, 2) + FILE_SEPARATOR.value() + blobId.substring(2, 4) + FILE_SEPARATOR.value() + blobId
+                .substring(4, 6) + FILE_SEPARATOR.value() + blobId);
+        } else if (dsType == Type.S3 || dsType == Type.AZURE) {
+            return (blobId.substring(0, 4) + DASH + blobId.substring(4));
+        }
+        return id;
+    }
+
+    private static String getOption(Type dsOption) {
+        if (dsOption == Type.FDS) {
+            return "fds";
+        } else if (dsOption == Type.S3) {
+            return "s3ds";
+        } else if (dsOption == Type.AZURE) {
+            return "azureds";
+        } else {
+            return "fake-ds-path";
+        }
+    }
+
+    static class Data {
+        private Set<String> added;
+        private Map<String, String> idToPath;
+        private Set<String> deleted;
+        private Set<String> missingDataStore;
+
+        public Data() {
+            added = Sets.newHashSet();
+            idToPath = Maps.newHashMap();
+            deleted = Sets.newHashSet();
+            missingDataStore = Sets.newHashSet();
+        }
+    }
+
+    interface StoreFixture {
+        NodeStore init(DataStoreBlobStore blobStore, File storeFile) throws Exception;
+
+        NodeStore getNodeStore() throws Exception;
+
+        String getConnectionString();
+
+        boolean isAvailable();
+
+        void preDataPrepare() throws Exception;
+
+        void postDataPrepare() throws Exception;
+
+        void close();
+
+        void after();
+
+        StoreFixture MONGO = new MongoStoreFixture();
+        StoreFixture SEGMENT = new SegmentStoreFixture();
+
+
+        class MongoStoreFixture implements StoreFixture {
+            private final Clock.Virtual clock;
+            MongoConnection c;
+            DocumentMK.Builder builder;
+            private DocumentNodeStore nodeStore;
+
+            public MongoStoreFixture() {
+                c = MongoUtils.getConnection();
+                if (c != null) {
+                    MongoUtils.dropCollections(c.getDBName());
+                }
+                clock = new Clock.Virtual();
+            }
+
+            @Override public NodeStore init(DataStoreBlobStore blobStore, File storeFile) {
+                c = MongoUtils.getConnection();
+                if (c != null) {
+                    MongoUtils.dropCollections(c.getDBName());
+                }
+                clock.waitUntil(Revision.getCurrentTimestamp());
+                builder = new DocumentMK.Builder().clock(clock).setMongoDB(c.getMongoClient(), c.getDBName());
+                nodeStore = builder.setBlobStore(blobStore).getNodeStore();
+
+                return nodeStore;
+            }
+
+            @Override public NodeStore getNodeStore() {
+                return nodeStore;
+            }
+
+            @Override public String getConnectionString() {
+                return MongoUtils.URL;
+            }
+
+            @Override public void postDataPrepare() throws Exception {
+                long maxAge = 20; // hours
+                // 1. Go past GC age and check no GC done as nothing deleted
+                clock.waitUntil(clock.getTime() + TimeUnit.MINUTES.toMillis(maxAge));
+                VersionGarbageCollector vGC = nodeStore.getVersionGarbageCollector();
+                VersionGarbageCollector.VersionGCStats stats = vGC.gc(0, TimeUnit.MILLISECONDS);
+            }
+
+            @Override public void close() {
+                nodeStore.dispose();
+            }
+
+            @Override public boolean isAvailable() {
+                return c != null;
+            }
+
+            @Override public void preDataPrepare() {
+            }
+
+            @Override public void after() {
+                MongoUtils.dropCollections(c.getDBName());
+                nodeStore.dispose();
+            }
+        }
+
+        class SegmentStoreFixture implements StoreFixture {
+            private FileStore fileStore;
+            private SegmentNodeStore store;
+            private SegmentGCOptions gcOptions = defaultGCOptions();
+            private String storePath;
+
+            @Override public NodeStore init(DataStoreBlobStore blobStore, File storeFile)
+                throws Exception {
+                storePath = storeFile.getAbsolutePath();
+                FileStoreBuilder fileStoreBuilder =
+                    FileStoreBuilder.fileStoreBuilder(storeFile);
+
+                fileStore = fileStoreBuilder.withBlobStore(blobStore).withMaxFileSize(256).withSegmentCacheSize(64).build();
+                store = SegmentNodeStoreBuilders.builder(fileStore).build();
+                return store;
+            }
+
+            @Override public NodeStore getNodeStore() {
+                return store;
+            }
+
+            @Override public String getConnectionString() {
+                return storePath;
+            }
+
+            @Override public void postDataPrepare() throws Exception {
+                for (int k = 0; k < gcOptions.getRetainedGenerations(); k++) {
+                    fileStore.compactFull();
+                }
+                fileStore.cleanup();
+            }
+
+            @Override public void close() {
+                fileStore.close();
+            }
+
+            @Override public void after() {
+            }
+
+            @Override public boolean isAvailable() {
+                return true;
+            }
+
+            @Override public void preDataPrepare() throws Exception {
+                NodeBuilder a = store.getRoot().builder();
+
+                /* Create garbage by creating in-lined blobs (size < 16KB) */
+                int number = 500;
+                NodeBuilder content = a.child("content");
+                for (int i = 0; i < number; i++) {
+                    NodeBuilder c = content.child("x" + i);
+                    for (int j = 0; j < 5; j++) {
+                        c.setProperty("p" + j, store.createBlob(randomStream(j, 16384)));
+                    }
+                }
+                store.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+            }
+        }
+    }
+
+    interface DataStoreFixture {
+        boolean isAvailable();
+
+        DataStoreBlobStore init(TemporaryFolder folder) throws Exception;
+
+        DataStoreBlobStore getDataStore();
+
+        String getConfigPath();
+
+        Type getType();
+
+        void after();
+
+        DataStoreFixture S3 = new S3DataStoreFixture();
+        DataStoreFixture AZURE = new AzureDataStoreFixture();
+        DataStoreFixture FDS = new FileDataStoreFixture();
+
+        class S3DataStoreFixture implements DataStoreFixture {
+            DataStoreBlobStore blobStore;
+            String cfgFilePath;
+            String container;
+
+            @Override public boolean isAvailable() {
+                return S3DataStoreUtils.isS3Configured();
+            }
+
+            @Override public DataStoreBlobStore init(TemporaryFolder folder) throws Exception {
+                Properties props = S3DataStoreUtils.getS3Config();
+                props.setProperty("cacheSize", "0");
+                container = props.getProperty(S3Constants.S3_BUCKET);
+                container = container + System.currentTimeMillis();
+                props.setProperty(S3Constants.S3_BUCKET, container);
+                DataStore ds = S3DataStoreUtils.getS3DataStore(S3DataStoreUtils.getFixtures().get(0), props,
+                    folder.newFolder().getAbsolutePath());
+                blobStore = new DataStoreBlobStore(ds);
+                cfgFilePath = createTempConfig(
+                    folder.newFile(getType().name() + String.valueOf(System.currentTimeMillis()) + ".config"), props);
+                return blobStore;
+            }
+
+            @Override public DataStoreBlobStore getDataStore() {
+                return blobStore;
+            }
+
+            @Override public String getConfigPath() {
+                return cfgFilePath;
+            }
+
+            @Override public Type getType() {
+                return Type.S3;
+            }
+
+            @Override public void after() {
+                try {
+                    S3DataStoreUtils.deleteBucket(container, new Date());
+                } catch (Exception e) {
+                    log.error("Error in cleaning the container {}", container, e);
+                }
+            }
+        }
+
+        class AzureDataStoreFixture implements DataStoreFixture {
+            DataStoreBlobStore blobStore;
+            String cfgFilePath;
+            String container;
+
+            @Override public boolean isAvailable() {
+                return AzureDataStoreUtils.isAzureConfigured();
+            }
+
+            @Override public DataStoreBlobStore init(TemporaryFolder folder) throws Exception {
+                Properties props = AzureDataStoreUtils.getAzureConfig();
+                props.setProperty("cacheSize", "0");
+                container = props.getProperty(AzureConstants.AZURE_BLOB_CONTAINER_NAME);
+                container = container + System.currentTimeMillis();
+                props.setProperty(AzureConstants.AZURE_BLOB_CONTAINER_NAME, container);
+                DataStore ds = AzureDataStoreUtils.getAzureDataStore(props, folder.newFolder().getAbsolutePath());
+                blobStore = new DataStoreBlobStore(ds);
+                cfgFilePath = createTempConfig(
+                    folder.newFile(getType().name() + String.valueOf(System.currentTimeMillis()) + ".config"), props);
+                return blobStore;
+            }
+
+            @Override public DataStoreBlobStore getDataStore() {
+                return blobStore;
+            }
+
+            @Override public String getConfigPath() {
+                return cfgFilePath;
+            }
+
+            @Override public Type getType() {
+                return Type.AZURE;
+            }
+
+            @Override public void after() {
+                try {
+                    AzureDataStoreUtils.deleteContainer(container);
+                } catch (Exception e) {
+                    log.error("Error in cleaning the container {}", container, e);
+                }
+            }
+        }
+
+        class FileDataStoreFixture implements DataStoreFixture {
+            DataStoreBlobStore blobStore;
+            String cfgFilePath;
+            String container;
+
+            @Override public boolean isAvailable() {
+                return true;
+            }
+
+            @Override public DataStoreBlobStore init(TemporaryFolder folder) throws Exception {
+                OakFileDataStore delegate = new OakFileDataStore();
+                container = folder.newFolder().getAbsolutePath();
+                delegate.setPath(container);
+                delegate.init(null);
+                blobStore = new DataStoreBlobStore(delegate);
+
+                File cfgFile = folder.newFile();
+                Properties props = new Properties();
+                props.put("path", container);
+                props.put("minRecordLength", new Long(4096));
+                cfgFilePath = createTempConfig(cfgFile, props);
+
+                return blobStore;
+            }
+
+            @Override public DataStoreBlobStore getDataStore() {
+                return blobStore;
+            }
+
+            @Override public String getConfigPath() {
+                return cfgFilePath;
+            }
+
+            @Override public Type getType() {
+                return Type.FDS;
+            }
+
+            @Override public void after() {
+            }
+        }
+    }
+
+    static class FixtureHelper {
+        static List<StoreFixture> getStoreFixtures() {
+            return ImmutableList.of(StoreFixture.MONGO, StoreFixture.SEGMENT);
+        }
+
+        static List<DataStoreFixture> getDataStoreFixtures() {
+            return ImmutableList.of(DataStoreFixture.S3, DataStoreFixture.AZURE, DataStoreFixture.FDS);
+        }
+
+        static List<Object[]> get() {
+            List<Object[]> fixtures = Lists.newArrayList();
+            for (StoreFixture storeFixture : getStoreFixtures()) {
+                if (storeFixture.isAvailable()) {
+                    for (DataStoreFixture dsFixture : getDataStoreFixtures()) {
+                        if (dsFixture.isAvailable()) {
+                            fixtures.add(new Object[] {storeFixture, dsFixture});
+                        }
+                    }
+                }
+            }
+            return fixtures;
+        }
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java
------------------------------------------------------------------------------
    svn:eol-style = native