You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by mr...@apache.org on 2021/05/20 16:23:40 UTC

svn commit: r1890051 - in /jackrabbit/oak/trunk/oak-run: ./ src/main/java/org/apache/jackrabbit/oak/run/ src/test/java/org/apache/jackrabbit/oak/run/

Author: mreutegg
Date: Thu May 20 16:23:40 2021
New Revision: 1890051

URL: http://svn.apache.org/viewvc?rev=1890051&view=rev
Log:
OAK-9435: Speed up oak-run datastore --dump-ref

Modified:
    jackrabbit/oak/trunk/oak-run/README.md
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java
    jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java

Modified: jackrabbit/oak/trunk/oak-run/README.md
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/README.md?rev=1890051&r1=1890050&r2=1890051&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/README.md (original)
+++ jackrabbit/oak/trunk/oak-run/README.md Thu May 20 16:23:40 2021
@@ -597,7 +597,8 @@ Maintenance commands for the DataStore:
             [--work-dir <temporary_path>] \
             [--max-age <seconds>] \
             [--verbose] \
-            [--verboseRootPath]
+            [--verboseRootPath] \
+            [--useDirListing] \
             [<store_path>|<mongo_uri>]
             [--metrics] [--export-metrics]
 
@@ -632,6 +633,7 @@ The following options are available:
                                  For example , to look for blob refrences under specific paths such as /b1/b2/foo, /c1/c2/foo under the rootPath /a
                                  use --verboseRootPath /a --verbosePathInclusionRegex /*/*/foo
                                  This option is only available when --verboseRootPath is used.
+    --useDirListing         - Use dirListing property for efficient reading of Lucene index files.
     <store_path|mongo_uri>     - Path to the tar segment store or the segment azure uri as specified in 
                                  http://jackrabbit.apache.org/oak/docs/nodestore/segment/overview.html#remote-segment-stores
                                  or if Mongo NodeStore then the mongo uri.

Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java?rev=1890051&r1=1890050&r2=1890051&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreCommand.java Thu May 20 16:23:40 2021
@@ -35,6 +35,7 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Splitter;
@@ -63,6 +64,7 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils;
 import org.apache.jackrabbit.oak.plugins.document.DocumentBlobReferenceRetriever;
 import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.index.lucene.directory.OakDirectory;
 import org.apache.jackrabbit.oak.run.cli.BlobStoreOptions;
 import org.apache.jackrabbit.oak.run.cli.CommonOptions;
 import org.apache.jackrabbit.oak.run.cli.NodeStoreFixture;
@@ -74,10 +76,12 @@ import org.apache.jackrabbit.oak.segment
 import org.apache.jackrabbit.oak.segment.file.ReadOnlyFileStore;
 import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
 import org.apache.jackrabbit.oak.spi.cluster.ClusterRepositoryInfo;
+import org.apache.jackrabbit.oak.spi.state.AbstractChildNodeEntry;
 import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
 import org.apache.jackrabbit.oak.stats.StatisticsProvider;
+import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -370,7 +374,8 @@ public class DataStoreCommand implements
                 List<String> roothPathInclusionRegex = dataStoreOpts.getVerboseInclusionRegex();
                 retriever = new NodeTraverserReferenceRetriever(fixture.getStore(),
                     rootPathList.toArray(new String[rootPathList.size()]),
-                    roothPathInclusionRegex.toArray(new String[roothPathInclusionRegex.size()]));
+                    roothPathInclusionRegex.toArray(new String[roothPathInclusionRegex.size()]),
+                    dataStoreOpts.isUseDirListing());
             } else {
                 ReadOnlyFileStore fileStore = getService(fixture.getWhiteboard(), ReadOnlyFileStore.class);
                 retriever = new SegmentBlobReferenceRetriever(fileStore);
@@ -428,15 +433,20 @@ public class DataStoreCommand implements
         private final NodeStore nodeStore;
         private final String[] paths;
         private final String[] inclusionRegex;
+        private boolean useDirListing;
 
         public NodeTraverserReferenceRetriever(NodeStore nodeStore) {
-            this(nodeStore, null, null);
+            this(nodeStore, null, null, false);
         }
 
-        public NodeTraverserReferenceRetriever(NodeStore nodeStore, String[] paths, String[] inclusionRegex) {
+        public NodeTraverserReferenceRetriever(NodeStore nodeStore,
+                                               String[] paths,
+                                               String[] inclusionRegex,
+                                               boolean useDirListing) {
             this.nodeStore = nodeStore;
             this.paths = paths;
             this.inclusionRegex = inclusionRegex;
+            this.useDirListing = useDirListing;
         }
 
         private void binaryProperties(NodeState state, String path, ReferenceCollector collector) {
@@ -462,11 +472,36 @@ public class DataStoreCommand implements
 
         private void traverseChildren(NodeState state, String path, ReferenceCollector collector) {
             binaryProperties(state, path, collector);
-            for (ChildNodeEntry c : state.getChildNodeEntries()) {
+            for (ChildNodeEntry c : getChildNodeEntries(state)) {
                 traverseChildren(c.getNodeState(), PathUtils.concat(path, c.getName()), collector);
             }
         }
 
+        private Iterable<? extends ChildNodeEntry> getChildNodeEntries(NodeState state) {
+            if (useDirListing) {
+                PropertyState dirListing = state.getProperty(OakDirectory.PROP_DIR_LISTING);
+                if (dirListing != null && dirListing.isArray()) {
+                    return StreamSupport.stream(dirListing.getValue(Type.STRINGS).spliterator(), false)
+                            .map(name -> new AbstractChildNodeEntry() {
+                                @Override
+                                public @NotNull String getName() {
+                                    return name;
+                                }
+
+                                @Override
+                                public @NotNull NodeState getNodeState() {
+                                    return state.getChildNode(name);
+                                }
+                            })
+                            .filter(cne -> cne.getNodeState().exists())
+                            .collect(Collectors.toList());
+                }
+            }
+
+            // fallback to full traversal
+            return state.getChildNodeEntries();
+        }
+
         @Override public void collectReferences(ReferenceCollector collector) throws IOException {
             log.info("Starting dump of blob references by traversing");
             if (paths == null || paths.length == 0) {

Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java?rev=1890051&r1=1890050&r2=1890051&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DataStoreOptions.java Thu May 20 16:23:40 2021
@@ -53,6 +53,7 @@ public class DataStoreOptions implements
     private final OptionSpec<Void> verbose;
     private final OptionSpec<String> verboseRootPath;
     private final OptionSpec<String> verbosePathInclusionRegex;
+    private final OptionSpec<Void> useDirListing;
     private final OptionSpec<Boolean> resetLoggingConfig;
     private OptionSpec<String> exportMetrics;
     private static final String DELIM = ",";
@@ -107,6 +108,8 @@ public class DataStoreOptions implements
                 "nodes that will be scanned under the path provided with the option --verboseRootPath").availableIf(verboseRootPath).
                 withRequiredArg().withValuesSeparatedBy(DELIM).ofType(String.class);
 
+        useDirListing = parser.accepts("useDirListing", "Use dirListing property for efficient reading of Lucene index files");
+
         resetLoggingConfig =
             parser.accepts("reset-log-config", "Reset logging config for testing purposes only").withOptionalArg()
                 .ofType(Boolean.class).defaultsTo(Boolean.TRUE);
@@ -238,6 +241,10 @@ public class DataStoreOptions implements
         return options.valuesOf(verbosePathInclusionRegex);
     }
 
+    public boolean isUseDirListing() {
+        return options.has(useDirListing);
+    }
+
     public boolean sweepIfRefsPastRetention() {
         return options.has(sweepIfRefsPastRetention) && sweepIfRefsPastRetention.value(options) ;
     }

Modified: jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java?rev=1890051&r1=1890050&r2=1890051&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/run/DataStoreCommandTest.java Thu May 20 16:23:40 2021
@@ -34,6 +34,8 @@ import java.util.Properties;
 import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
 
 import ch.qos.logback.classic.Level;
 import com.google.common.base.Function;
@@ -68,6 +70,7 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.plugins.document.Revision;
 import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector;
 import org.apache.jackrabbit.oak.plugins.document.util.MongoConnection;
+import org.apache.jackrabbit.oak.plugins.index.lucene.directory.OakDirectory;
 import org.apache.jackrabbit.oak.run.cli.BlobStoreOptions.Type;
 import org.apache.jackrabbit.oak.segment.SegmentNodeStore;
 import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders;
@@ -98,6 +101,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import static com.google.common.base.StandardSystemProperty.FILE_SEPARATOR;
+import static org.apache.jackrabbit.oak.api.Type.STRINGS;
 import static org.apache.jackrabbit.oak.commons.FileIOUtils.sort;
 import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
 import static org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils.SharedStoreRecordType.REFERENCES;
@@ -165,6 +169,11 @@ public class DataStoreCommandTest {
 
     private static Data prepareData(StoreFixture storeFixture, DataStoreFixture blobFixture, int numBlobs,
         int numMaxDeletions, int missingDataStore, boolean createMultiLevelNodes) throws Exception {
+        return prepareData(storeFixture, blobFixture, numBlobs, numMaxDeletions, missingDataStore, createMultiLevelNodes, false);
+    }
+
+    private static Data prepareData(StoreFixture storeFixture, DataStoreFixture blobFixture, int numBlobs,
+        int numMaxDeletions, int missingDataStore, boolean createMultiLevelNodes, boolean useDirListing) throws Exception {
 
         DataStoreBlobStore blobStore = blobFixture.getDataStore();
         NodeStore store = storeFixture.getNodeStore();
@@ -212,10 +221,13 @@ public class DataStoreCommandTest {
                     data.deleted.add(chunk);
                 }
             }
+            NodeBuilder parent = a;
             if (createMultiLevelNodes) {
-                map.get(pathRoot).child("c" + i).setProperty("x", b);
-            } else {
-                a.child("c" + i).setProperty("x", b);
+                parent = map.get(pathRoot);
+            }
+            parent.child("c" + i).setProperty("x", b);
+            if (useDirListing) {
+                setDirListing(parent);
             }
         }
 
@@ -258,6 +270,12 @@ public class DataStoreCommandTest {
         return data;
     }
 
+    private static void setDirListing(NodeBuilder parent) {
+        List<String> names = StreamSupport.stream(parent.getChildNodeNames().spliterator(), false)
+                .collect(Collectors.toList());
+        parent.setProperty(OakDirectory.PROP_DIR_LISTING, names, STRINGS);
+    }
+
     protected static void delete(String nodeId, NodeStore nodeStore) throws CommitFailedException {
         NodeBuilder builder = nodeStore.getRoot().builder();
         builder.child(nodeId).remove();
@@ -495,6 +513,23 @@ public class DataStoreCommandTest {
     }
 
     @Test
+    public void testDumpRefWithUseDirListing() throws Exception {
+        File dump = temporaryFolder.newFolder();
+        Data data = prepareData(storeFixture, blobFixture, 10, 4, 1, true, true);
+        storeFixture.close();
+
+        additionalParams += " --useDirListing --verboseRootPath /foo --verbosePathInclusionRegex /*/test";
+
+        for (String id : data.idToPath.keySet()) {
+            if (data.idToPath.get(id).contains("/foo")) {
+                data.addedSubset.add(id);
+            }
+        }
+
+        testDumpRef(dump, data, true, true);
+    }
+
+    @Test
     public void testDumpId() throws Exception {
         File dump = temporaryFolder.newFolder();
         Data data = prepareData(storeFixture, blobFixture, 10, 4, 1);