You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by mr...@apache.org on 2021/05/20 16:23:40 UTC
svn commit: r1890051 - in /jackrabbit/oak/trunk/oak-run: ./
src/main/java/org/apache/jackrabbit/oak/run/
src/test/java/org/apache/jackrabbit/oak/run/
Author: mreutegg
Date: Thu May 20 16:23:40 2021
New Revision: 1890051
URL: http://svn.apache.org/viewvc?rev=1890051&view=rev
Log:
OAK-9435: Speed up oak-run datastore --dump-ref
Modified:
jackrabbit/oak/trunk/oak-run/README.md
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java
jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java
Modified: jackrabbit/oak/trunk/oak-run/README.md
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/README.md?rev=1890051&r1=1890050&r2=1890051&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/README.md (original)
+++ jackrabbit/oak/trunk/oak-run/README.md Thu May 20 16:23:40 2021
@@ -597,7 +597,8 @@ Maintenance commands for the DataStore:
[--work-dir <temporary_path>] \
[--max-age <seconds>] \
[--verbose] \
- [--verboseRootPath]
+ [--verboseRootPath] \
+ [--useDirListing] \
[<store_path>|<mongo_uri>]
[--metrics] [--export-metrics]
@@ -632,6 +633,7 @@ The following options are available:
For example , to look for blob refrences under specific paths such as /b1/b2/foo, /c1/c2/foo under the rootPath /a
use --verboseRootPath /a --verbosePathInclusionRegex /*/*/foo
This option is only available when --verboseRootPath is used.
+ --useDirListing - Use dirListing property for efficient reading of Lucene index files.
<store_path|mongo_uri> - Path to the tar segment store or the segment azure uri as specified in
http://jackrabbit.apache.org/oak/docs/nodestore/segment/overview.html#remote-segment-stores
or if Mongo NodeStore then the mongo uri.
Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java?rev=1890051&r1=1890050&r2=1890051&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java Thu May 20 16:23:40 2021
@@ -35,6 +35,7 @@ import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
@@ -63,6 +64,7 @@ import org.apache.jackrabbit.oak.plugins
import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils;
import org.apache.jackrabbit.oak.plugins.document.DocumentBlobReferenceRetriever;
import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.index.lucene.directory.OakDirectory;
import org.apache.jackrabbit.oak.run.cli.BlobStoreOptions;
import org.apache.jackrabbit.oak.run.cli.CommonOptions;
import org.apache.jackrabbit.oak.run.cli.NodeStoreFixture;
@@ -74,10 +76,12 @@ import org.apache.jackrabbit.oak.segment
import org.apache.jackrabbit.oak.segment.file.ReadOnlyFileStore;
import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
import org.apache.jackrabbit.oak.spi.cluster.ClusterRepositoryInfo;
+import org.apache.jackrabbit.oak.spi.state.AbstractChildNodeEntry;
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.apache.jackrabbit.oak.stats.StatisticsProvider;
+import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -370,7 +374,8 @@ public class DataStoreCommand implements
List<String> roothPathInclusionRegex = dataStoreOpts.getVerboseInclusionRegex();
retriever = new NodeTraverserReferenceRetriever(fixture.getStore(),
rootPathList.toArray(new String[rootPathList.size()]),
- roothPathInclusionRegex.toArray(new String[roothPathInclusionRegex.size()]));
+ roothPathInclusionRegex.toArray(new String[roothPathInclusionRegex.size()]),
+ dataStoreOpts.isUseDirListing());
} else {
ReadOnlyFileStore fileStore = getService(fixture.getWhiteboard(), ReadOnlyFileStore.class);
retriever = new SegmentBlobReferenceRetriever(fileStore);
@@ -428,15 +433,20 @@ public class DataStoreCommand implements
private final NodeStore nodeStore;
private final String[] paths;
private final String[] inclusionRegex;
+ private boolean useDirListing;
public NodeTraverserReferenceRetriever(NodeStore nodeStore) {
- this(nodeStore, null, null);
+ this(nodeStore, null, null, false);
}
- public NodeTraverserReferenceRetriever(NodeStore nodeStore, String[] paths, String[] inclusionRegex) {
+ public NodeTraverserReferenceRetriever(NodeStore nodeStore,
+ String[] paths,
+ String[] inclusionRegex,
+ boolean useDirListing) {
this.nodeStore = nodeStore;
this.paths = paths;
this.inclusionRegex = inclusionRegex;
+ this.useDirListing = useDirListing;
}
private void binaryProperties(NodeState state, String path, ReferenceCollector collector) {
@@ -462,11 +472,36 @@ public class DataStoreCommand implements
private void traverseChildren(NodeState state, String path, ReferenceCollector collector) {
binaryProperties(state, path, collector);
- for (ChildNodeEntry c : state.getChildNodeEntries()) {
+ for (ChildNodeEntry c : getChildNodeEntries(state)) {
traverseChildren(c.getNodeState(), PathUtils.concat(path, c.getName()), collector);
}
}
+ private Iterable<? extends ChildNodeEntry> getChildNodeEntries(NodeState state) {
+ if (useDirListing) {
+ PropertyState dirListing = state.getProperty(OakDirectory.PROP_DIR_LISTING);
+ if (dirListing != null && dirListing.isArray()) {
+ return StreamSupport.stream(dirListing.getValue(Type.STRINGS).spliterator(), false)
+ .map(name -> new AbstractChildNodeEntry() {
+ @Override
+ public @NotNull String getName() {
+ return name;
+ }
+
+ @Override
+ public @NotNull NodeState getNodeState() {
+ return state.getChildNode(name);
+ }
+ })
+ .filter(cne -> cne.getNodeState().exists())
+ .collect(Collectors.toList());
+ }
+ }
+
+ // fallback to full traversal
+ return state.getChildNodeEntries();
+ }
+
@Override public void collectReferences(ReferenceCollector collector) throws IOException {
log.info("Starting dump of blob references by traversing");
if (paths == null || paths.length == 0) {
Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java?rev=1890051&r1=1890050&r2=1890051&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java Thu May 20 16:23:40 2021
@@ -53,6 +53,7 @@ public class DataStoreOptions implements
private final OptionSpec<Void> verbose;
private final OptionSpec<String> verboseRootPath;
private final OptionSpec<String> verbosePathInclusionRegex;
+ private final OptionSpec<Void> useDirListing;
private final OptionSpec<Boolean> resetLoggingConfig;
private OptionSpec<String> exportMetrics;
private static final String DELIM = ",";
@@ -107,6 +108,8 @@ public class DataStoreOptions implements
"nodes that will be scanned under the path provided with the option --verboseRootPath").availableIf(verboseRootPath).
withRequiredArg().withValuesSeparatedBy(DELIM).ofType(String.class);
+ useDirListing = parser.accepts("useDirListing", "Use dirListing property for efficient reading of Lucene index files");
+
resetLoggingConfig =
parser.accepts("reset-log-config", "Reset logging config for testing purposes only").withOptionalArg()
.ofType(Boolean.class).defaultsTo(Boolean.TRUE);
@@ -238,6 +241,10 @@ public class DataStoreOptions implements
return options.valuesOf(verbosePathInclusionRegex);
}
+ public boolean isUseDirListing() {
+ return options.has(useDirListing);
+ }
+
public boolean sweepIfRefsPastRetention() {
return options.has(sweepIfRefsPastRetention) && sweepIfRefsPastRetention.value(options) ;
}
Modified: jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java?rev=1890051&r1=1890050&r2=1890051&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java Thu May 20 16:23:40 2021
@@ -34,6 +34,8 @@ import java.util.Properties;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
import ch.qos.logback.classic.Level;
import com.google.common.base.Function;
@@ -68,6 +70,7 @@ import org.apache.jackrabbit.oak.plugins
import org.apache.jackrabbit.oak.plugins.document.Revision;
import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector;
import org.apache.jackrabbit.oak.plugins.document.util.MongoConnection;
+import org.apache.jackrabbit.oak.plugins.index.lucene.directory.OakDirectory;
import org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type;
import org.apache.jackrabbit.oak.segment.SegmentNodeStore;
import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders;
@@ -98,6 +101,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.google.common.base.StandardSystemProperty.FILE_SEPARATOR;
+import static org.apache.jackrabbit.oak.api.Type.STRINGS;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
import static org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils.SharedStoreRecordType.REFERENCES;
@@ -165,6 +169,11 @@ public class DataStoreCommandTest {
private static Data prepareData(StoreFixture storeFixture, DataStoreFixture blobFixture, int numBlobs,
int numMaxDeletions, int missingDataStore, boolean createMultiLevelNodes) throws Exception {
+ return prepareData(storeFixture, blobFixture, numBlobs, numMaxDeletions, missingDataStore, createMultiLevelNodes, false);
+ }
+
+ private static Data prepareData(StoreFixture storeFixture, DataStoreFixture blobFixture, int numBlobs,
+ int numMaxDeletions, int missingDataStore, boolean createMultiLevelNodes, boolean useDirListing) throws Exception {
DataStoreBlobStore blobStore = blobFixture.getDataStore();
NodeStore store = storeFixture.getNodeStore();
@@ -212,10 +221,13 @@ public class DataStoreCommandTest {
data.deleted.add(chunk);
}
}
+ NodeBuilder parent = a;
if (createMultiLevelNodes) {
- map.get(pathRoot).child("c" + i).setProperty("x", b);
- } else {
- a.child("c" + i).setProperty("x", b);
+ parent = map.get(pathRoot);
+ }
+ parent.child("c" + i).setProperty("x", b);
+ if (useDirListing) {
+ setDirListing(parent);
}
}
@@ -258,6 +270,12 @@ public class DataStoreCommandTest {
return data;
}
+ private static void setDirListing(NodeBuilder parent) {
+ List<String> names = StreamSupport.stream(parent.getChildNodeNames().spliterator(), false)
+ .collect(Collectors.toList());
+ parent.setProperty(OakDirectory.PROP_DIR_LISTING, names, STRINGS);
+ }
+
protected static void delete(String nodeId, NodeStore nodeStore) throws CommitFailedException {
NodeBuilder builder = nodeStore.getRoot().builder();
builder.child(nodeId).remove();
@@ -495,6 +513,23 @@ public class DataStoreCommandTest {
}
@Test
+ public void testDumpRefWithUseDirListing() throws Exception {
+ File dump = temporaryFolder.newFolder();
+ Data data = prepareData(storeFixture, blobFixture, 10, 4, 1, true, true);
+ storeFixture.close();
+
+ additionalParams += " --useDirListing --verboseRootPath /foo --verbosePathInclusionRegex /*/test";
+
+ for (String id : data.idToPath.keySet()) {
+ if (data.idToPath.get(id).contains("/foo")) {
+ data.addedSubset.add(id);
+ }
+ }
+
+ testDumpRef(dump, data, true, true);
+ }
+
+ @Test
public void testDumpId() throws Exception {
File dump = temporaryFolder.newFolder();
Data data = prepareData(storeFixture, blobFixture, 10, 4, 1);