You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2017/12/04 08:31:08 UTC

svn commit: r1817067 - in /jackrabbit/oak/trunk/oak-run: README.md src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java

Author: amitj
Date: Mon Dec  4 08:31:08 2017
New Revision: 1817067

URL: http://svn.apache.org/viewvc?rev=1817067&view=rev
Log:
OAK-6551: Add an option to datastorecheck command to include path details in the missing blobs report

- Path information for SegmentTar would be available with --verbose option

Modified:
    jackrabbit/oak/trunk/oak-run/README.md
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java
    jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java

Modified: jackrabbit/oak/trunk/oak-run/README.md
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/README.md?rev=1817067&r1=1817066&r2=1817067&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/README.md (original)
+++ jackrabbit/oak/trunk/oak-run/README.md Mon Dec  4 08:31:08 2017
@@ -477,7 +477,9 @@ The following options are available:
     --nods           - To check for misconfigured external references when no data store should be there (Use instead of --s3ds or --fds)
     --repoHome       - Path of the local reposity home folder. Mandatory for --consistency & --track options 
     --track          - Whether to force override the tracked ids (Valid only for --id & --consistency options)
-    --verbose        - Outputs backend friendly blobids. Adds the sub-directories created in FDS and the changes done for S3/Azure when stored in the respective container.
+    --verbose        - Outputs backend friendly blobids and also adds the node path (for SegmentNodeStore) from where referred. 
+                       This options would typically be a slower option since, it requires the whole repo traversal.  
+                       Adds the sub-directories created in FDS and the changes done for S3/Azure when stored in the respective container.
 Note:
 
 The command to be executed for S3DataStore

Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java?rev=1817067&r1=1817066&r2=1817067&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java Mon Dec  4 08:31:08 2017
@@ -16,18 +16,6 @@
  */
 package org.apache.jackrabbit.oak.run;
 
-import static com.google.common.base.StandardSystemProperty.JAVA_IO_TMPDIR;
-import static com.google.common.base.StandardSystemProperty.FILE_SEPARATOR;
-import static com.google.common.base.Stopwatch.createStarted;
-import static com.google.common.io.Closeables.close;
-import static java.io.File.createTempFile;
-import static java.util.Arrays.asList;
-import static org.apache.commons.io.FileUtils.forceDelete;
-import static org.apache.commons.io.FileUtils.listFiles;
-import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
-import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeAsLine;
-import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
-
 import java.io.BufferedWriter;
 import java.io.Closeable;
 import java.io.File;
@@ -43,37 +31,60 @@ import java.util.concurrent.atomic.Atomi
 
 import javax.annotation.Nullable;
 
-import com.google.common.base.Splitter;
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.FilenameUtils;
-import org.apache.commons.io.LineIterator;
-import org.apache.commons.io.filefilter.FileFilterUtils;
-import org.apache.jackrabbit.oak.commons.FileIOUtils;
-import org.apache.jackrabbit.oak.commons.FileIOUtils.FileLineDifferenceIterator;
-import org.apache.jackrabbit.oak.run.commons.Command;
-import org.apache.jackrabbit.oak.plugins.blob.BlobReferenceRetriever;
-import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
-import org.apache.jackrabbit.oak.plugins.document.DocumentBlobReferenceRetriever;
-import org.apache.jackrabbit.oak.plugins.document.DocumentMK;
-import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
-import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
-
 import com.google.common.base.Charsets;
 import com.google.common.base.Function;
 import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
 import com.google.common.base.Stopwatch;
 import com.google.common.collect.Maps;
+import com.google.common.io.Closeables;
 import com.google.common.io.Closer;
 import com.google.common.io.Files;
 import com.mongodb.MongoClient;
 import com.mongodb.MongoClientURI;
 import com.mongodb.MongoURI;
-
 import joptsimple.ArgumentAcceptingOptionSpec;
 import joptsimple.OptionParser;
 import joptsimple.OptionSet;
 import joptsimple.OptionSpec;
 import joptsimple.OptionSpecBuilder;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.io.LineIterator;
+import org.apache.commons.io.filefilter.FileFilterUtils;
+import org.apache.jackrabbit.oak.api.Blob;
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.commons.FileIOUtils;
+import org.apache.jackrabbit.oak.commons.FileIOUtils.FileLineDifferenceIterator;
+import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.blob.BlobReferenceRetriever;
+import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
+import org.apache.jackrabbit.oak.plugins.document.DocumentBlobReferenceRetriever;
+import org.apache.jackrabbit.oak.plugins.document.DocumentMK;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.segment.SegmentNodeStore;
+import org.apache.jackrabbit.oak.run.commons.Command;
+import org.apache.jackrabbit.oak.segment.SegmentBlobReferenceRetriever;
+import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders;
+import org.apache.jackrabbit.oak.segment.file.FileStore;
+import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
+import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+
+import static com.google.common.base.StandardSystemProperty.FILE_SEPARATOR;
+import static com.google.common.base.StandardSystemProperty.JAVA_IO_TMPDIR;
+import static com.google.common.base.Stopwatch.createStarted;
+import static com.google.common.io.Closeables.close;
+import static java.io.File.createTempFile;
+import static java.util.Arrays.asList;
+import static org.apache.commons.io.FileUtils.forceDelete;
+import static org.apache.commons.io.FileUtils.listFiles;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeAsLine;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
+import static org.apache.jackrabbit.oak.segment.file.FileStoreBuilder.fileStoreBuilder;
 
 /**
  * Command to check data store consistency and also optionally retrieve ids
@@ -87,6 +98,13 @@ public class DataStoreCheckCommand imple
     private static final String DASH = "-";
     private static final String HASH = "#";
 
+    private static final Comparator<String> idComparator = new Comparator<String>() {
+        @Override
+        public int compare(String s1, String s2) {
+            return s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]);
+        }
+    };
+
     @Override
     public void execute(String... args) throws Exception {
         OptionParser parser = new OptionParser();
@@ -154,18 +172,24 @@ public class DataStoreCheckCommand imple
 
             GarbageCollectableBlobStore blobStore  = null;
             BlobReferenceRetriever marker = null;
+            NodeStore nodeStore = null;
             if (options.has(store)) {
                 String source = options.valueOf(store);
                 if (source.startsWith(MongoURI.MONGODB_PREFIX)) {
                     MongoClientURI uri = new MongoClientURI(source);
                     MongoClient client = new MongoClient(uri);
-                    DocumentNodeStore nodeStore =
+                    DocumentNodeStore docNodeStore =
                         new DocumentMK.Builder().setMongoDB(client.getDB(uri.getDatabase())).getNodeStore();
-                    closer.register(Utils.asCloseable(nodeStore));
-                    blobStore = (GarbageCollectableBlobStore) nodeStore.getBlobStore();
-                    marker = new DocumentBlobReferenceRetriever(nodeStore);
+                    closer.register(Utils.asCloseable(docNodeStore));
+                    blobStore = (GarbageCollectableBlobStore) docNodeStore.getBlobStore();
+                    marker = new DocumentBlobReferenceRetriever(docNodeStore);
+                    nodeStore = docNodeStore;
                 } else {
-                    marker = SegmentTarUtils.newBlobReferenceRetriever(source, closer);
+                    FileStore fileStore = fileStoreBuilder(new File(source)).withStrictVersionCheck(true).build();
+                    marker = new SegmentBlobReferenceRetriever(fileStore);
+                    closer.register(fileStore);
+                    nodeStore =
+                        SegmentNodeStoreBuilders.builder(fileStore).build();
                 }
             }
 
@@ -215,8 +239,17 @@ public class DataStoreCheckCommand imple
             }
 
             if (options.has(refOp) || options.has(consistencyOp)) {
-                retrieveBlobReferences(blobStore, marker,
-                    register.createFile(refOp, dumpPath), dsType, options.has(verbose));
+                if (options.has(verbose) &&
+                    (nodeStore instanceof SegmentNodeStore ||
+                        nodeStore instanceof org.apache.jackrabbit.oak.segment.SegmentNodeStore)) {
+                    NodeTraverser traverser = new NodeTraverser(nodeStore, dsType);
+                    closer.register(traverser);
+                    traverser.traverse();
+                    FileUtils.copyFile(traverser.references, register.createFile(refOp, dumpPath));
+                } else {
+                    retrieveBlobReferences(blobStore, marker,
+                        register.createFile(refOp, dumpPath), dsType, options.has(verbose));
+                }
             }
 
             if (options.has(consistencyOp)) {
@@ -407,12 +440,8 @@ public class DataStoreCheckCommand imple
                 }
             );
             writer.flush();
-            sort(marked, new Comparator<String>() {
-                @Override
-                public int compare(String s1, String s2) {
-                    return s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]);
-                }
-            });
+            sort(marked, idComparator);
+
             System.out.println(count.get() + " blob references found");
             System.out.println("Finished in " + watch.elapsed(TimeUnit.SECONDS) + " seconds");
             threw = false;
@@ -434,4 +463,80 @@ public class DataStoreCheckCommand imple
         System.out.println(count + " blob ids found");
         System.out.println("Finished in " + watch.elapsed(TimeUnit.SECONDS) + " seconds");
     }
+
+    static class NodeTraverser implements Closeable {
+        private final String dsType;
+        private final File references;
+        private final NodeStore nodeStore;
+        private final Joiner delimJoiner = Joiner.on(DELIM).skipNulls();
+
+        public NodeTraverser(NodeStore nodeStore, String dsType) throws IOException {
+            this.references = File.createTempFile("traverseref", null);
+            this.nodeStore = nodeStore;
+            this.dsType = dsType;
+        }
+
+        private void binaryProperties(NodeState state, String path, BufferedWriter writer, AtomicInteger count) {
+            for (PropertyState p : state.getProperties()) {
+                String propPath = PathUtils.concat(path, p.getName());
+                try {
+                    if (p.getType() == Type.BINARY) {
+                        count.incrementAndGet();
+                        writeAsLine(writer,
+                            getLine(p.getValue(Type.BINARY).getContentIdentity(), propPath), true);
+                    } else if (p.getType() == Type.BINARIES && p.count() > 0) {
+                        Iterator<Blob> iterator = p.getValue(Type.BINARIES).iterator();
+                        while (iterator.hasNext()) {
+                            count.incrementAndGet();
+
+                            String id = iterator.next().getContentIdentity();
+                            writeAsLine(writer,
+                                getLine(id, propPath), true);
+                        }
+                    }
+                } catch (Exception e) {
+                    System.err.println("Error in retrieving blob id for path " + propPath);
+                }
+            }
+        }
+
+        private String getLine(String id, String path) {
+            return delimJoiner.join(encodeId(id, dsType), path);
+        }
+
+        private void traverseChildren(NodeState state, String path, BufferedWriter writer, AtomicInteger count) {
+            binaryProperties(state, path, writer, count);
+            for (ChildNodeEntry c : state.getChildNodeEntries()) {
+                traverseChildren(c.getNodeState(), PathUtils.concat(path, c.getName()), writer, count);
+            }
+        }
+
+        public void traverse() throws IOException {
+            BufferedWriter writer = null;
+            final AtomicInteger count = new AtomicInteger();
+            boolean threw = true;
+            System.out.println("Starting dump of blob references by traversing");
+            Stopwatch watch = createStarted();
+
+            try {
+                writer = Files.newWriter(references, Charsets.UTF_8);
+                traverseChildren(nodeStore.getRoot(), "/", writer, count);
+
+                writer.flush();
+                sort(references, idComparator);
+
+                System.out.println(count.get() + " blob references found");
+                System.out.println("Finished in " + watch.elapsed(TimeUnit.SECONDS) + " seconds");
+                threw = false;
+            } finally {
+                Closeables.close(writer, threw);
+            }
+        }
+
+
+        @Override
+        public void close() throws IOException {
+            FileUtils.forceDelete(references);
+        }
+    }
 }

Modified: jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java?rev=1817067&r1=1817066&r2=1817067&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java Mon Dec  4 08:31:08 2017
@@ -34,6 +34,7 @@ import java.util.ArrayList;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Properties;
 import java.util.Random;
 import java.util.Set;
@@ -41,10 +42,12 @@ import java.util.Set;
 import javax.annotation.Nullable;
 
 import com.google.common.base.Function;
+import com.google.common.base.Joiner;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import joptsimple.internal.Strings;
 import org.apache.commons.io.FileUtils;
@@ -82,12 +85,17 @@ public class DataStoreCheckTest {
     private static final Logger log = LoggerFactory.getLogger(DataStoreCheckTest.class);
 
     @Rule
-    public final TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target"));
+    public final TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target")) {
+        @Override
+        public void delete() {}
+    };
 
     private String storePath;
 
     private Set<String> blobsAdded;
 
+    private Map<String, String> blobsAddedWithNodes;
+
     private String cfgFilePath;
 
     private String dsPath;
@@ -148,12 +156,15 @@ public class DataStoreCheckTest {
         NodeBuilder a = store.getRoot().builder();
         int numBlobs = 10;
         blobsAdded = Sets.newHashSet();
+        blobsAddedWithNodes = Maps.newHashMap();
+
         for (int i = 0; i < numBlobs; i++) {
             SegmentBlob b = (SegmentBlob) store.createBlob(randomStream(i, 18342));
             Iterator<String> idIter = setupDataStore.resolveChunks(b.getBlobId());
             while (idIter.hasNext()) {
                 String chunk = idIter.next();
                 blobsAdded.add(chunk);
+                blobsAddedWithNodes.put(chunk, "/c"+i+"/x");
             }
             a.child("c" + i).setProperty("x", b);
         }
@@ -215,15 +226,20 @@ public class DataStoreCheckTest {
         Random rand = new Random();
         String deletedBlobId = Iterables.get(blobsAdded, rand.nextInt(blobsAdded.size()));
         blobsAdded.remove(deletedBlobId);
-        long count = setupDataStore.countDeleteChunks(ImmutableList.of(deletedBlobId), 0);
+
+        long count = setupDataStore
+            .countDeleteChunks(ImmutableList.of(deletedBlobId),
+                0);
         assertEquals(1, count);
         setupDataStore.close();
 
         testAllParamsVerbose(dump, repoHome);
 
         assertFileEquals(dump, "[id]", encodedIds(blobsAdded, dsOption));
-        assertFileEquals(dump, "[ref]", encodedIds(Sets.union(blobsAdded, Sets.newHashSet(deletedBlobId)), dsOption));
-        assertFileEquals(dump, "[consistency]", encodedIds(Sets.newHashSet(deletedBlobId), dsOption));
+        assertFileEquals(dump, "[ref]",
+            encodedIdsAndPath(Sets.union(blobsAdded, Sets.newHashSet(deletedBlobId)), dsOption, blobsAddedWithNodes));
+        assertFileEquals(dump, "[consistency]",
+            encodedIdsAndPath(Sets.newHashSet(deletedBlobId), dsOption, blobsAddedWithNodes));
     }
 
     @Test
@@ -285,8 +301,10 @@ public class DataStoreCheckTest {
 
         assertFileEquals(dump, "[id]", encodedIds(blobsAdded, dsOption));
         assertFileEquals(dump, "[ref]",
-            encodedIds(Sets.union(blobsAdded, Sets.newHashSet(deletedBlobId, activeDeletedBlobId)), dsOption));
-        assertFileEquals(dump, "[consistency]", encodedIds(Sets.newHashSet(deletedBlobId), dsOption));
+            encodedIdsAndPath(Sets.union(blobsAdded, Sets.newHashSet(deletedBlobId, activeDeletedBlobId)), dsOption,
+                blobsAddedWithNodes));
+        assertFileEquals(dump, "[consistency]",
+            encodedIdsAndPath(Sets.newHashSet(deletedBlobId), dsOption, blobsAddedWithNodes));
     }
 
     @Test
@@ -441,4 +459,14 @@ public class DataStoreCheckTest {
             }
         }));
     }
+
+    private static Set<String> encodedIdsAndPath(Set<String> ids, String dsOption, Map<String, String> blobsAddedWithNodes) {
+        return Sets.newHashSet(Iterators.transform(ids.iterator(), new Function<String, String>() {
+            @Nullable @Override public String apply(@Nullable String input) {
+                return Joiner.on(",").join(
+                    DataStoreCheckCommand.encodeId(input, "--"+dsOption),
+                    blobsAddedWithNodes.get(input));
+            }
+        }));
+    }
 }