You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2017/07/07 14:02:51 UTC

hadoop git commit: HADOOP-14457. create() does not notify metadataStore of parent directories or ensure they're not existing files. Contributed by Sean Mackrory. [Forced Update!]

Repository: hadoop
Updated Branches:
  refs/heads/HADOOP-13345 930feb100 -> 30e1146c5 (forced update)


HADOOP-14457. create() does not notify metadataStore of parent directories or
ensure they're not existing files.
Contributed by Sean Mackrory.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/30e1146c
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/30e1146c
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/30e1146c

Branch: refs/heads/HADOOP-13345
Commit: 30e1146c515d154544707d0e4d4a1c743e6d518f
Parents: 309b8c0
Author: Steve Loughran <st...@apache.org>
Authored: Fri Jul 7 14:55:50 2017 +0100
Committer: Steve Loughran <st...@apache.org>
Committed: Fri Jul 7 15:02:22 2017 +0100

----------------------------------------------------------------------
 .../org/apache/hadoop/fs/s3a/S3AFileSystem.java |  3 +-
 .../fs/s3a/s3guard/DynamoDBMetadataStore.java   | 60 +++++++++++--------
 .../fs/s3a/s3guard/LocalMetadataStore.java      |  7 +++
 .../hadoop/fs/s3a/s3guard/MetadataStore.java    | 10 ++++
 .../fs/s3a/s3guard/NullMetadataStore.java       |  5 ++
 .../apache/hadoop/fs/s3a/s3guard/S3Guard.java   | 25 +++++++-
 .../hadoop/fs/s3a/ITestS3GuardCreate.java       | 61 ++++++++++++++++++++
 7 files changed, 145 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 878f2f7..3279f1c 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -1773,7 +1773,7 @@ public class S3AFileSystem extends FileSystem {
       String key = pathToKey(f);
       createFakeDirectory(key);
     }
-    S3Guard.makeDirsOrdered(metadataStore, metadataStoreDirs, username);
+    S3Guard.makeDirsOrdered(metadataStore, metadataStoreDirs, username, true);
     // this is complicated because getParent(a/b/c/) returns a/b/c, but
     // we want a/b. See HADOOP-14428 for more details.
     deleteUnnecessaryFakeDirectories(new Path(f.toString()).getParent());
@@ -2292,6 +2292,7 @@ public class S3AFileSystem extends FileSystem {
     // See note about failure semantics in s3guard.md doc.
     try {
       if (hasMetadataStore()) {
+        S3Guard.addAncestors(metadataStore, p, username);
         S3AFileStatus status = createUploadFileStatus(p,
             S3AUtils.objectRepresentsDirectory(key, length), length,
             getDefaultBlockSize(p), username);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
index 784b815..97c9135 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
@@ -25,7 +25,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Date;
-import java.util.HashSet;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
@@ -436,6 +436,30 @@ public class DynamoDBMetadataStore implements MetadataStore {
     }
   }
 
+  Collection<PathMetadata> completeAncestry(
+      Collection<PathMetadata> pathsToCreate) {
+    // Key on path to allow fast lookup
+    Map<Path, PathMetadata> ancestry = new HashMap<>();
+
+    for (PathMetadata meta : pathsToCreate) {
+      Preconditions.checkArgument(meta != null);
+      Path path = meta.getFileStatus().getPath();
+      if (path.isRoot()) {
+        break;
+      }
+      ancestry.put(path, meta);
+      Path parent = path.getParent();
+      while (!parent.isRoot() && !ancestry.containsKey(parent)) {
+        LOG.debug("auto-create ancestor path {} for child path {}",
+            parent, path);
+        final FileStatus status = makeDirStatus(parent, username);
+        ancestry.put(parent, new PathMetadata(status, Tristate.FALSE, false));
+        parent = parent.getParent();
+      }
+    }
+     return ancestry.values();
+   }
+
   @Override
   public void move(Collection<Path> pathsToDelete,
       Collection<PathMetadata> pathsToCreate) throws IOException {
@@ -457,27 +481,7 @@ public class DynamoDBMetadataStore implements MetadataStore {
     // ancestor paths that are not explicitly added to paths to create
     Collection<PathMetadata> newItems = new ArrayList<>();
     if (pathsToCreate != null) {
-      newItems.addAll(pathsToCreate);
-      // help set for fast look up; we should avoid putting duplicate paths
-      final Collection<Path> fullPathsToCreate = new HashSet<>();
-      for (PathMetadata meta : pathsToCreate) {
-        fullPathsToCreate.add(meta.getFileStatus().getPath());
-      }
-
-      for (PathMetadata meta : pathsToCreate) {
-        Preconditions.checkArgument(meta != null);
-        Path parent = meta.getFileStatus().getPath().getParent();
-        while (parent != null
-            && !parent.isRoot()
-            && !fullPathsToCreate.contains(parent)) {
-          LOG.debug("move: auto-create ancestor path {} for child path {}",
-              parent, meta.getFileStatus().getPath());
-          final FileStatus status = makeDirStatus(parent, username);
-          newItems.add(new PathMetadata(status, Tristate.FALSE, false));
-          fullPathsToCreate.add(parent);
-          parent = parent.getParent();
-        }
-      }
+      newItems.addAll(completeAncestry(pathsToCreate));
     }
     if (pathsToDelete != null) {
       for (Path meta : pathsToDelete) {
@@ -575,7 +579,17 @@ public class DynamoDBMetadataStore implements MetadataStore {
     // For performance purpose, we generate the full paths to put and use batch
     // write item request to save the items.
     LOG.debug("Saving to table {} in region {}: {}", tableName, region, meta);
-    processBatchWriteRequest(null, pathMetadataToItem(fullPathsToPut(meta)));
+
+    Collection<PathMetadata> wrapper = new ArrayList<>(1);
+    wrapper.add(meta);
+    put(wrapper);
+  }
+
+  @Override
+  public void put(Collection<PathMetadata> metas) throws IOException {
+    LOG.debug("Saving batch to table {} in region {}", tableName, region);
+
+    processBatchWriteRequest(null, pathMetadataToItem(completeAncestry(metas)));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
index b3c7de5..bd1f8df 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
@@ -273,6 +273,13 @@ public class LocalMetadataStore implements MetadataStore {
     dirHash.put(standardize(meta.getPath()), meta);
   }
 
+  public synchronized void put(Collection<PathMetadata> metas) throws
+      IOException {
+    for (PathMetadata meta : metas) {
+      put(meta);
+    }
+  }
+
   @Override
   public void close() throws IOException {
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
index 9502a8a..76d5cdd 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
@@ -165,6 +165,16 @@ public interface MetadataStore extends Closeable {
   void put(PathMetadata meta) throws IOException;
 
   /**
+   * Saves metadata for any number of paths.
+   *
+   * Semantics are otherwise the same as single-path puts.
+   *
+   * @param metas the metadata to save
+   * @throws IOException if there is an error
+   */
+  void put(Collection<PathMetadata> metas) throws IOException;
+
+  /**
    * Save directory listing metadata. Callers may save a partial directory
    * listing for a given path, or may store a complete and authoritative copy
    * of the directory listing.  {@code MetadataStore} implementations may

http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
index 65019eb..c6a4507 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
@@ -89,6 +89,11 @@ public class NullMetadataStore implements MetadataStore {
   }
 
   @Override
+  public void put(Collection<PathMetadata> meta) throws IOException {
+    return;
+  }
+
+  @Override
   public void put(DirListingMetadata meta) throws IOException {
     return;
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
index 19f0e50..51c370c 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.s3a.S3AFileStatus;
 import org.apache.hadoop.fs.s3a.S3AInstrumentation;
 import org.apache.hadoop.fs.s3a.S3AUtils;
+import org.apache.hadoop.fs.s3a.Tristate;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -251,9 +252,10 @@ public final class S3Guard {
    *              an empty, dir, and the other dirs only contain their child
    *              dir.
    * @param owner Hadoop user name.
+   * @param authoritative Whether to mark new directories as authoritative.
    */
   public static void makeDirsOrdered(MetadataStore ms, List<Path> dirs,
-      String owner) {
+      String owner, boolean authoritative) {
     if (dirs == null) {
       return;
     }
@@ -286,7 +288,7 @@ public final class S3Guard {
         children.add(new PathMetadata(prevStatus));
       }
       DirListingMetadata dirMeta =
-          new DirListingMetadata(f, children, true);
+          new DirListingMetadata(f, children, authoritative);
       try {
         ms.put(dirMeta);
         ms.put(new PathMetadata(status));
@@ -396,6 +398,25 @@ public final class S3Guard {
     }
   }
 
+  public static void addAncestors(MetadataStore metadataStore,
+      Path qualifiedPath, String username) throws IOException {
+    Collection<PathMetadata> newDirs = new ArrayList<>();
+    Path parent = qualifiedPath.getParent();
+    while (!parent.isRoot()) {
+      PathMetadata directory = metadataStore.get(parent);
+      if (directory == null || directory.isDeleted()) {
+        FileStatus status = new FileStatus(0, true, 1, 0, 0, 0, null, username,
+            null, parent);
+        PathMetadata meta = new PathMetadata(status, Tristate.UNKNOWN, false);
+        newDirs.add(meta);
+      } else {
+        break;
+      }
+      parent = parent.getParent();
+    }
+    metadataStore.put(newDirs);
+  }
+
   private static void addMoveStatus(Collection<Path> srcPaths,
       Collection<PathMetadata> dstMetas, Path srcPath, FileStatus dstStatus)
   {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/30e1146c/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardCreate.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardCreate.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardCreate.java
new file mode 100644
index 0000000..dcc2538
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardCreate.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
+import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
+import org.junit.Assume;
+import org.junit.Test;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
+
+/**
+ * Home for testing the creation of new files and directories with S3Guard
+ * enabled.
+ */
+public class ITestS3GuardCreate extends AbstractS3ATestBase {
+
+  /**
+   * Test that ancestor creation during S3AFileSystem#create() is properly
+   * accounted for in the MetadataStore.  This should be handled by the
+   * FileSystem, and be a FS contract test, but S3A does not handle ancestors on
+   * create(), so we need to take care in the S3Guard code to do the right
+   * thing.  This may change: See HADOOP-13221 for more detail.
+   */
+  @Test
+  public void testCreatePopulatesFileAncestors() throws Exception {
+    final S3AFileSystem fs = getFileSystem();
+    Assume.assumeTrue(fs.hasMetadataStore());
+    final MetadataStore ms = fs.getMetadataStore();
+    final Path parent = path("testCreatePopulatesFileAncestors");
+
+    try {
+      fs.mkdirs(parent);
+      final Path nestedFile = new Path(parent, "dir1/dir2/file4");
+      touch(fs, nestedFile);
+
+      DirListingMetadata list = ms.listChildren(parent);
+      assertFalse("MetadataStore falsely reports authoritative empty list",
+          list.isEmpty() == Tristate.TRUE);
+    } finally {
+      fs.delete(parent, true);
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org