You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mb...@apache.org on 2021/03/06 16:58:00 UTC

[asterixdb] 13/17: [ASTERIXDB-2841][*DB][STO] Encode multiple-dataverse parts as subdirs on disk

This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 4b3583211aa27922555a78993b79f04afa4d8bdb
Author: Michael Blow <mb...@apache.org>
AuthorDate: Tue Mar 2 16:28:47 2021 -0500

    [ASTERIXDB-2841][*DB][STO] Encode multiple-dataverse parts as subdirs on disk
    
    - Multipart dataverse names are expressed on disk as a directory tree
    - The first part is expressed normally, subsequent parts have a carat (^)
      prepended
    
    Change-Id: Idcfc45eb7f39153349a13d2baecb784244bdf177
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/10324
    Reviewed-by: Michael Blow <mb...@apache.org>
    Tested-by: Michael Blow <mb...@apache.org>
---
 .../common/storage/DatasetCopyIdentifier.java      |  4 +-
 .../asterix/common/storage/ResourceReference.java  | 61 +++++++++++++++++-----
 .../asterix/common/utils/StoragePathUtil.java      | 18 +++++--
 .../apache/asterix/external/util/FeedUtils.java    |  4 +-
 .../metadata/utils/SplitsAndConstraintsUtil.java   |  9 ++--
 5 files changed, 72 insertions(+), 24 deletions(-)

diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java
index bf72c19..e520271 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java
@@ -71,8 +71,8 @@ public class DatasetCopyIdentifier implements Serializable {
     }
 
     public boolean isMatch(ResourceReference resourceReference) {
-        return resourceReference.getDataverse().equals(dataverse.getCanonicalForm())
-                && resourceReference.getDataset().equals(dataset) && resourceReference.getRebalance().equals(rebalance);
+        return resourceReference.getDataverse().equals(dataverse) && resourceReference.getDataset().equals(dataset)
+                && resourceReference.getRebalance().equals(rebalance);
     }
 
     @Override
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java
index 7791926..0e78152 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java
@@ -21,24 +21,34 @@ package org.apache.asterix.common.storage;
 import java.io.File;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 
+import org.apache.asterix.common.metadata.DataverseName;
 import org.apache.asterix.common.utils.StorageConstants;
+import org.apache.asterix.common.utils.StoragePathUtil;
+import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hyracks.storage.am.lsm.common.impls.IndexComponentFileReference;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 
 public class ResourceReference {
 
+    private static final Logger LOGGER = LogManager.getLogger();
     protected final String root;
     protected final String partition;
-    protected final String dataverse; // == DataverseName.getCanonicalForm()
+    protected final DataverseName dataverse;
     protected final String dataset;
     protected final String rebalance;
     protected final String index;
     protected final String name;
-    private volatile Path relativePath;
+    private final Path relativePath;
 
     protected ResourceReference(String path) {
         // format: root/partition/dataverse/dataset/rebalanceCount/index/fileName
+        // format: root/partition/dataverse_p1[/^dataverse_p2[/^dataverse_p3...]]/dataset/rebalanceCount/index/fileName
         final String[] tokens = StringUtils.split(path, File.separatorChar);
         if (tokens.length < 6) {
             throw new IllegalStateException("Unrecognized path structure: " + path);
@@ -48,9 +58,40 @@ public class ResourceReference {
         index = tokens[--offset];
         rebalance = tokens[--offset];
         dataset = tokens[--offset];
-        dataverse = tokens[--offset]; //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
-        partition = tokens[--offset];
-        root = tokens[--offset];
+        List<String> dvParts = new ArrayList<>();
+        String dvPart = tokens[--offset];
+        while (dvPart.charAt(0) == StoragePathUtil.DATAVERSE_CONTINUATION_MARKER) {
+            dvParts.add(dvPart.substring(1));
+            dvPart = tokens[--offset];
+        }
+        String probablyPartition = tokens[--offset];
+        if (dvParts.isEmpty()) {
+            // root/partition/dataverse/dataset/rebalanceCount/index/fileName
+            dataverse = DataverseName.createSinglePartName(dvPart);
+            partition = probablyPartition;
+            root = tokens[--offset];
+        } else if (probablyPartition.startsWith(StorageConstants.PARTITION_DIR_PREFIX)) {
+            // root/partition/dataverse_p1/^dataverse_p2/.../^dataverse_pn/dataset/rebalanceCount/index/fileName
+            dvParts.add(dvPart);
+            Collections.reverse(dvParts);
+            dataverse = DataverseName.create(dvParts);
+            partition = probablyPartition;
+            root = tokens[--offset];
+        } else if (dvPart.startsWith(StorageConstants.PARTITION_DIR_PREFIX)) {
+            // root/partition/dataverse/dataset/rebalanceCount/index/fileName (where dataverse starts with ^)
+            if (dvParts.size() != 1) {
+                throw new IllegalArgumentException("unable to parse path: '" + path + "'!");
+            }
+            dataverse =
+                    DataverseName.createSinglePartName(StoragePathUtil.DATAVERSE_CONTINUATION_MARKER + dvParts.get(0));
+            LOGGER.info("legacy dataverse starting with ^ found: '{}'; this is not supported for new dataverses",
+                    dataverse);
+            partition = dvPart;
+            root = probablyPartition;
+        } else {
+            throw new IllegalArgumentException("unable to parse path: '" + path + "'!");
+        }
+        relativePath = Paths.get(root, ArrayUtils.subarray(tokens, offset + 1, tokens.length - 1));
     }
 
     public static ResourceReference ofIndex(String indexPath) {
@@ -65,7 +106,7 @@ public class ResourceReference {
         return partition;
     }
 
-    public String getDataverse() { //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
+    public DataverseName getDataverse() {
         return dataverse;
     }
 
@@ -86,19 +127,15 @@ public class ResourceReference {
     }
 
     public Path getRelativePath() {
-        if (relativePath == null) {
-            relativePath = Paths.get(root, partition, dataverse, dataset, rebalance, index);
-        }
         return relativePath;
     }
 
     public ResourceReference getDatasetReference() {
-        return ResourceReference
-                .ofIndex(Paths.get(root, partition, dataverse, dataset, rebalance, dataset).toFile().getPath());
+        return ResourceReference.ofIndex(relativePath.getParent().resolve(dataset).toFile().getPath());
     }
 
     public Path getFileRelativePath() {
-        return Paths.get(root, partition, dataverse, dataset, rebalance, index, name);
+        return relativePath.resolve(name);
     }
 
     public int getPartitionNum() {
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java
index 587b8b3..32a226e 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java
@@ -21,6 +21,7 @@ package org.apache.asterix.common.utils;
 import java.io.File;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.Iterator;
 
 import org.apache.asterix.common.cluster.ClusterPartition;
 import org.apache.asterix.common.metadata.DataverseName;
@@ -40,6 +41,7 @@ import org.apache.logging.log4j.Logger;
 public class StoragePathUtil {
 
     private static final Logger LOGGER = LogManager.getLogger();
+    public static final char DATAVERSE_CONTINUATION_MARKER = '^';
 
     private StoragePathUtil() {
     }
@@ -66,11 +68,21 @@ public class StoragePathUtil {
 
     public static String prepareDataverseIndexName(DataverseName dataverseName, String datasetName, String idxName,
             long rebalanceCount) {
-        return prepareDataverseIndexName(dataverseName, prepareFullIndexName(datasetName, idxName, rebalanceCount));
+        return prepareDataverseComponentName(dataverseName, prepareFullIndexName(datasetName, idxName, rebalanceCount));
     }
 
-    public static String prepareDataverseIndexName(DataverseName dataverseName, String fullIndexName) {
-        return dataverseName.getCanonicalForm() + File.separator + fullIndexName; //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
+    public static String prepareDataverseName(DataverseName dataverseName) {
+        Iterator<String> dvParts = dataverseName.getParts().iterator();
+        StringBuilder builder = new StringBuilder();
+        builder.append(dvParts.next());
+        while (dvParts.hasNext()) {
+            builder.append(File.separatorChar).append(DATAVERSE_CONTINUATION_MARKER).append(dvParts.next());
+        }
+        return builder.toString();
+    }
+
+    public static String prepareDataverseComponentName(DataverseName dataverseName, String component) {
+        return prepareDataverseName(dataverseName) + File.separatorChar + component;
     }
 
     private static String prepareFullIndexName(String datasetName, String idxName, long rebalanceCount) {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java
index 2110dee..7f3d911 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java
@@ -86,10 +86,10 @@ public class FeedUtils {
 
     public static FileSplit splitsForAdapter(DataverseName dataverseName, String feedName, String nodeName,
             ClusterPartition partition) {
-        String relPathFile = dataverseName.getCanonicalForm() + File.separator + feedName; //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
+        String relPathFile = StoragePathUtil.prepareDataverseComponentName(dataverseName, feedName);
         String storagePartitionPath = StoragePathUtil.prepareStoragePartitionPath(partition.getPartitionId());
         // Note: feed adapter instances in a single node share the feed logger
-        // format: 'storage dir name'/partition_#/dataverse/feed/node
+        // format: 'storage dir name'/partition_#/dataverse_part1[/ dataverse_part2[...]]/feed/node
         File f = new File(storagePartitionPath + File.separator + relPathFile + File.separator + nodeName);
         return StoragePathUtil.getFileSplitForClusterPartition(partition, f.getPath());
     }
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
index b93674c..c85f661 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
@@ -48,11 +48,10 @@ public class SplitsAndConstraintsUtil {
             DataverseName dataverseName) {
         List<FileSplit> splits = new ArrayList<>();
         // get all partitions
-        ClusterPartition[] clusterPartition = clusterStateManager.getClusterPartitons();
-        for (int j = 0; j < clusterPartition.length; j++) {
-            File f = new File(StoragePathUtil.prepareStoragePartitionPath(clusterPartition[j].getPartitionId()),
-                    dataverseName.getCanonicalForm()); //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
-            splits.add(StoragePathUtil.getFileSplitForClusterPartition(clusterPartition[j], f.getPath()));
+        for (ClusterPartition clusterPartition : clusterStateManager.getClusterPartitons()) {
+            File f = new File(StoragePathUtil.prepareStoragePartitionPath(clusterPartition.getPartitionId()),
+                    StoragePathUtil.prepareDataverseName(dataverseName));
+            splits.add(StoragePathUtil.getFileSplitForClusterPartition(clusterPartition, f.getPath()));
         }
         return splits.toArray(new FileSplit[] {});
     }