You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2017/12/04 08:31:08 UTC
svn commit: r1817067 - in /jackrabbit/oak/trunk/oak-run: README.md
src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java
src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java
Author: amitj
Date: Mon Dec 4 08:31:08 2017
New Revision: 1817067
URL: http://svn.apache.org/viewvc?rev=1817067&view=rev
Log:
OAK-6551: Add an option to datastorecheck command to include path details in the missing blobs report
- Path information for SegmentTar would be available with --verbose option
Modified:
jackrabbit/oak/trunk/oak-run/README.md
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java
jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java
Modified: jackrabbit/oak/trunk/oak-run/README.md
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/README.md?rev=1817067&r1=1817066&r2=1817067&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/README.md (original)
+++ jackrabbit/oak/trunk/oak-run/README.md Mon Dec 4 08:31:08 2017
@@ -477,7 +477,9 @@ The following options are available:
--nods - To check for misconfigured external references when no data store should be there (Use instead of --s3ds or --fds)
--repoHome - Path of the local reposity home folder. Mandatory for --consistency & --track options
--track - Whether to force override the tracked ids (Valid only for --id & --consistency options)
- --verbose - Outputs backend friendly blobids. Adds the sub-directories created in FDS and the changes done for S3/Azure when stored in the respective container.
+ --verbose - Outputs backend friendly blobids and also adds the node path (for SegmentNodeStore) from where referred.
+ This options would typically be a slower option since, it requires the whole repo traversal.
+ Adds the sub-directories created in FDS and the changes done for S3/Azure when stored in the respective container.
Note:
The command to be executed for S3DataStore
Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java?rev=1817067&r1=1817066&r2=1817067&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCheckCommand.java Mon Dec 4 08:31:08 2017
@@ -16,18 +16,6 @@
*/
package org.apache.jackrabbit.oak.run;
-import static com.google.common.base.StandardSystemProperty.JAVA_IO_TMPDIR;
-import static com.google.common.base.StandardSystemProperty.FILE_SEPARATOR;
-import static com.google.common.base.Stopwatch.createStarted;
-import static com.google.common.io.Closeables.close;
-import static java.io.File.createTempFile;
-import static java.util.Arrays.asList;
-import static org.apache.commons.io.FileUtils.forceDelete;
-import static org.apache.commons.io.FileUtils.listFiles;
-import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
-import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeAsLine;
-import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
-
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
@@ -43,37 +31,60 @@ import java.util.concurrent.atomic.Atomi
import javax.annotation.Nullable;
-import com.google.common.base.Splitter;
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.FilenameUtils;
-import org.apache.commons.io.LineIterator;
-import org.apache.commons.io.filefilter.FileFilterUtils;
-import org.apache.jackrabbit.oak.commons.FileIOUtils;
-import org.apache.jackrabbit.oak.commons.FileIOUtils.FileLineDifferenceIterator;
-import org.apache.jackrabbit.oak.run.commons.Command;
-import org.apache.jackrabbit.oak.plugins.blob.BlobReferenceRetriever;
-import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
-import org.apache.jackrabbit.oak.plugins.document.DocumentBlobReferenceRetriever;
-import org.apache.jackrabbit.oak.plugins.document.DocumentMK;
-import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
-import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
-
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Maps;
+import com.google.common.io.Closeables;
import com.google.common.io.Closer;
import com.google.common.io.Files;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
import com.mongodb.MongoURI;
-
import joptsimple.ArgumentAcceptingOptionSpec;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import joptsimple.OptionSpec;
import joptsimple.OptionSpecBuilder;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.io.LineIterator;
+import org.apache.commons.io.filefilter.FileFilterUtils;
+import org.apache.jackrabbit.oak.api.Blob;
+import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.commons.FileIOUtils;
+import org.apache.jackrabbit.oak.commons.FileIOUtils.FileLineDifferenceIterator;
+import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.blob.BlobReferenceRetriever;
+import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
+import org.apache.jackrabbit.oak.plugins.document.DocumentBlobReferenceRetriever;
+import org.apache.jackrabbit.oak.plugins.document.DocumentMK;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.segment.SegmentNodeStore;
+import org.apache.jackrabbit.oak.run.commons.Command;
+import org.apache.jackrabbit.oak.segment.SegmentBlobReferenceRetriever;
+import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders;
+import org.apache.jackrabbit.oak.segment.file.FileStore;
+import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
+import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+
+import static com.google.common.base.StandardSystemProperty.FILE_SEPARATOR;
+import static com.google.common.base.StandardSystemProperty.JAVA_IO_TMPDIR;
+import static com.google.common.base.Stopwatch.createStarted;
+import static com.google.common.io.Closeables.close;
+import static java.io.File.createTempFile;
+import static java.util.Arrays.asList;
+import static org.apache.commons.io.FileUtils.forceDelete;
+import static org.apache.commons.io.FileUtils.listFiles;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeAsLine;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
+import static org.apache.jackrabbit.oak.segment.file.FileStoreBuilder.fileStoreBuilder;
/**
* Command to check data store consistency and also optionally retrieve ids
@@ -87,6 +98,13 @@ public class DataStoreCheckCommand imple
private static final String DASH = "-";
private static final String HASH = "#";
+ private static final Comparator<String> idComparator = new Comparator<String>() {
+ @Override
+ public int compare(String s1, String s2) {
+ return s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]);
+ }
+ };
+
@Override
public void execute(String... args) throws Exception {
OptionParser parser = new OptionParser();
@@ -154,18 +172,24 @@ public class DataStoreCheckCommand imple
GarbageCollectableBlobStore blobStore = null;
BlobReferenceRetriever marker = null;
+ NodeStore nodeStore = null;
if (options.has(store)) {
String source = options.valueOf(store);
if (source.startsWith(MongoURI.MONGODB_PREFIX)) {
MongoClientURI uri = new MongoClientURI(source);
MongoClient client = new MongoClient(uri);
- DocumentNodeStore nodeStore =
+ DocumentNodeStore docNodeStore =
new DocumentMK.Builder().setMongoDB(client.getDB(uri.getDatabase())).getNodeStore();
- closer.register(Utils.asCloseable(nodeStore));
- blobStore = (GarbageCollectableBlobStore) nodeStore.getBlobStore();
- marker = new DocumentBlobReferenceRetriever(nodeStore);
+ closer.register(Utils.asCloseable(docNodeStore));
+ blobStore = (GarbageCollectableBlobStore) docNodeStore.getBlobStore();
+ marker = new DocumentBlobReferenceRetriever(docNodeStore);
+ nodeStore = docNodeStore;
} else {
- marker = SegmentTarUtils.newBlobReferenceRetriever(source, closer);
+ FileStore fileStore = fileStoreBuilder(new File(source)).withStrictVersionCheck(true).build();
+ marker = new SegmentBlobReferenceRetriever(fileStore);
+ closer.register(fileStore);
+ nodeStore =
+ SegmentNodeStoreBuilders.builder(fileStore).build();
}
}
@@ -215,8 +239,17 @@ public class DataStoreCheckCommand imple
}
if (options.has(refOp) || options.has(consistencyOp)) {
- retrieveBlobReferences(blobStore, marker,
- register.createFile(refOp, dumpPath), dsType, options.has(verbose));
+ if (options.has(verbose) &&
+ (nodeStore instanceof SegmentNodeStore ||
+ nodeStore instanceof org.apache.jackrabbit.oak.segment.SegmentNodeStore)) {
+ NodeTraverser traverser = new NodeTraverser(nodeStore, dsType);
+ closer.register(traverser);
+ traverser.traverse();
+ FileUtils.copyFile(traverser.references, register.createFile(refOp, dumpPath));
+ } else {
+ retrieveBlobReferences(blobStore, marker,
+ register.createFile(refOp, dumpPath), dsType, options.has(verbose));
+ }
}
if (options.has(consistencyOp)) {
@@ -407,12 +440,8 @@ public class DataStoreCheckCommand imple
}
);
writer.flush();
- sort(marked, new Comparator<String>() {
- @Override
- public int compare(String s1, String s2) {
- return s1.split(DELIM)[0].compareTo(s2.split(DELIM)[0]);
- }
- });
+ sort(marked, idComparator);
+
System.out.println(count.get() + " blob references found");
System.out.println("Finished in " + watch.elapsed(TimeUnit.SECONDS) + " seconds");
threw = false;
@@ -434,4 +463,80 @@ public class DataStoreCheckCommand imple
System.out.println(count + " blob ids found");
System.out.println("Finished in " + watch.elapsed(TimeUnit.SECONDS) + " seconds");
}
+
+ static class NodeTraverser implements Closeable {
+ private final String dsType;
+ private final File references;
+ private final NodeStore nodeStore;
+ private final Joiner delimJoiner = Joiner.on(DELIM).skipNulls();
+
+ public NodeTraverser(NodeStore nodeStore, String dsType) throws IOException {
+ this.references = File.createTempFile("traverseref", null);
+ this.nodeStore = nodeStore;
+ this.dsType = dsType;
+ }
+
+ private void binaryProperties(NodeState state, String path, BufferedWriter writer, AtomicInteger count) {
+ for (PropertyState p : state.getProperties()) {
+ String propPath = PathUtils.concat(path, p.getName());
+ try {
+ if (p.getType() == Type.BINARY) {
+ count.incrementAndGet();
+ writeAsLine(writer,
+ getLine(p.getValue(Type.BINARY).getContentIdentity(), propPath), true);
+ } else if (p.getType() == Type.BINARIES && p.count() > 0) {
+ Iterator<Blob> iterator = p.getValue(Type.BINARIES).iterator();
+ while (iterator.hasNext()) {
+ count.incrementAndGet();
+
+ String id = iterator.next().getContentIdentity();
+ writeAsLine(writer,
+ getLine(id, propPath), true);
+ }
+ }
+ } catch (Exception e) {
+ System.err.println("Error in retrieving blob id for path " + propPath);
+ }
+ }
+ }
+
+ private String getLine(String id, String path) {
+ return delimJoiner.join(encodeId(id, dsType), path);
+ }
+
+ private void traverseChildren(NodeState state, String path, BufferedWriter writer, AtomicInteger count) {
+ binaryProperties(state, path, writer, count);
+ for (ChildNodeEntry c : state.getChildNodeEntries()) {
+ traverseChildren(c.getNodeState(), PathUtils.concat(path, c.getName()), writer, count);
+ }
+ }
+
+ public void traverse() throws IOException {
+ BufferedWriter writer = null;
+ final AtomicInteger count = new AtomicInteger();
+ boolean threw = true;
+ System.out.println("Starting dump of blob references by traversing");
+ Stopwatch watch = createStarted();
+
+ try {
+ writer = Files.newWriter(references, Charsets.UTF_8);
+ traverseChildren(nodeStore.getRoot(), "/", writer, count);
+
+ writer.flush();
+ sort(references, idComparator);
+
+ System.out.println(count.get() + " blob references found");
+ System.out.println("Finished in " + watch.elapsed(TimeUnit.SECONDS) + " seconds");
+ threw = false;
+ } finally {
+ Closeables.close(writer, threw);
+ }
+ }
+
+
+ @Override
+ public void close() throws IOException {
+ FileUtils.forceDelete(references);
+ }
+ }
}
Modified: jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java?rev=1817067&r1=1817066&r2=1817067&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCheckTest.java Mon Dec 4 08:31:08 2017
@@ -34,6 +34,7 @@ import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.Set;
@@ -41,10 +42,12 @@ import java.util.Set;
import javax.annotation.Nullable;
import com.google.common.base.Function;
+import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import joptsimple.internal.Strings;
import org.apache.commons.io.FileUtils;
@@ -82,12 +85,17 @@ public class DataStoreCheckTest {
private static final Logger log = LoggerFactory.getLogger(DataStoreCheckTest.class);
@Rule
- public final TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target"));
+ public final TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target")) {
+ @Override
+ public void delete() {}
+ };
private String storePath;
private Set<String> blobsAdded;
+ private Map<String, String> blobsAddedWithNodes;
+
private String cfgFilePath;
private String dsPath;
@@ -148,12 +156,15 @@ public class DataStoreCheckTest {
NodeBuilder a = store.getRoot().builder();
int numBlobs = 10;
blobsAdded = Sets.newHashSet();
+ blobsAddedWithNodes = Maps.newHashMap();
+
for (int i = 0; i < numBlobs; i++) {
SegmentBlob b = (SegmentBlob) store.createBlob(randomStream(i, 18342));
Iterator<String> idIter = setupDataStore.resolveChunks(b.getBlobId());
while (idIter.hasNext()) {
String chunk = idIter.next();
blobsAdded.add(chunk);
+ blobsAddedWithNodes.put(chunk, "/c"+i+"/x");
}
a.child("c" + i).setProperty("x", b);
}
@@ -215,15 +226,20 @@ public class DataStoreCheckTest {
Random rand = new Random();
String deletedBlobId = Iterables.get(blobsAdded, rand.nextInt(blobsAdded.size()));
blobsAdded.remove(deletedBlobId);
- long count = setupDataStore.countDeleteChunks(ImmutableList.of(deletedBlobId), 0);
+
+ long count = setupDataStore
+ .countDeleteChunks(ImmutableList.of(deletedBlobId),
+ 0);
assertEquals(1, count);
setupDataStore.close();
testAllParamsVerbose(dump, repoHome);
assertFileEquals(dump, "[id]", encodedIds(blobsAdded, dsOption));
- assertFileEquals(dump, "[ref]", encodedIds(Sets.union(blobsAdded, Sets.newHashSet(deletedBlobId)), dsOption));
- assertFileEquals(dump, "[consistency]", encodedIds(Sets.newHashSet(deletedBlobId), dsOption));
+ assertFileEquals(dump, "[ref]",
+ encodedIdsAndPath(Sets.union(blobsAdded, Sets.newHashSet(deletedBlobId)), dsOption, blobsAddedWithNodes));
+ assertFileEquals(dump, "[consistency]",
+ encodedIdsAndPath(Sets.newHashSet(deletedBlobId), dsOption, blobsAddedWithNodes));
}
@Test
@@ -285,8 +301,10 @@ public class DataStoreCheckTest {
assertFileEquals(dump, "[id]", encodedIds(blobsAdded, dsOption));
assertFileEquals(dump, "[ref]",
- encodedIds(Sets.union(blobsAdded, Sets.newHashSet(deletedBlobId, activeDeletedBlobId)), dsOption));
- assertFileEquals(dump, "[consistency]", encodedIds(Sets.newHashSet(deletedBlobId), dsOption));
+ encodedIdsAndPath(Sets.union(blobsAdded, Sets.newHashSet(deletedBlobId, activeDeletedBlobId)), dsOption,
+ blobsAddedWithNodes));
+ assertFileEquals(dump, "[consistency]",
+ encodedIdsAndPath(Sets.newHashSet(deletedBlobId), dsOption, blobsAddedWithNodes));
}
@Test
@@ -441,4 +459,14 @@ public class DataStoreCheckTest {
}
}));
}
+
+ private static Set<String> encodedIdsAndPath(Set<String> ids, String dsOption, Map<String, String> blobsAddedWithNodes) {
+ return Sets.newHashSet(Iterators.transform(ids.iterator(), new Function<String, String>() {
+ @Nullable @Override public String apply(@Nullable String input) {
+ return Joiner.on(",").join(
+ DataStoreCheckCommand.encodeId(input, "--"+dsOption),
+ blobsAddedWithNodes.get(input));
+ }
+ }));
+ }
}