You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ma...@apache.org on 2017/05/25 12:13:15 UTC

[3/6] hadoop git commit: HADOOP-13760. S3Guard: add delete tracking.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2f3305db/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java
new file mode 100644
index 0000000..43eb2c0
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+/**
+ * Place for the S3A listing classes; keeps all the small classes under control.
+ */
+public class TestListing extends AbstractS3AMockTest {
+
+  private static class MockRemoteIterator<FileStatus> implements
+      RemoteIterator<FileStatus> {
+    private Iterator<FileStatus> iterator;
+
+    MockRemoteIterator(Collection<FileStatus> source) {
+      iterator = source.iterator();
+    }
+
+    public boolean hasNext() {
+      return iterator.hasNext();
+    }
+
+    public FileStatus next() {
+      return iterator.next();
+    }
+  }
+
+  private FileStatus blankFileStatus(Path path) {
+    return new FileStatus(0, true, 0, 0, 0, path);
+  }
+
+  @Test
+  public void testTombstoneReconcilingIterator() throws Exception {
+    Path parent = new Path("/parent");
+    Path liveChild = new Path(parent, "/liveChild");
+    Path deletedChild = new Path(parent, "/deletedChild");
+    Path[] allFiles = {parent, liveChild, deletedChild};
+    Path[] liveFiles = {parent, liveChild};
+
+    Listing listing = new Listing(fs);
+    Collection<FileStatus> statuses = new ArrayList<>();
+    statuses.add(blankFileStatus(parent));
+    statuses.add(blankFileStatus(liveChild));
+    statuses.add(blankFileStatus(deletedChild));
+
+    Set<Path> tombstones = new HashSet<>();
+    tombstones.add(deletedChild);
+
+    RemoteIterator<FileStatus> sourceIterator = new MockRemoteIterator(
+        statuses);
+    RemoteIterator<LocatedFileStatus> locatedIterator =
+        listing.createLocatedFileStatusIterator(sourceIterator);
+    RemoteIterator<LocatedFileStatus> reconcilingIterator =
+        listing.createTombstoneReconcilingIterator(locatedIterator, tombstones);
+
+    Set<Path> expectedPaths = new HashSet<>();
+    expectedPaths.add(parent);
+    expectedPaths.add(liveChild);
+
+    Set<Path> actualPaths = new HashSet<>();
+    while (reconcilingIterator.hasNext()) {
+      actualPaths.add(reconcilingIterator.next().getPath());
+    }
+    Assert.assertTrue(actualPaths.equals(expectedPaths));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2f3305db/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
index 99acf6e..dfa8a9e 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.fs.s3a.s3guard;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.s3a.S3ATestUtils;
 import org.apache.hadoop.fs.s3a.Tristate;
@@ -134,12 +135,52 @@ public abstract class MetadataStoreTestBase extends Assert {
   }
 
   /**
+   * Helper function for verifying DescendantsIterator and
+   * MetadataStoreListFilesIterator behavior.
+   * @param createNodes List of paths to create
+   * @param checkNodes List of paths that the iterator should return
+   * @throws IOException
+   */
+  private void doTestDescendantsIterator(
+      Class implementation, String[] createNodes,
+      String[] checkNodes) throws Exception {
+    // we set up the example file system tree in metadata store
+    for (String pathStr : createNodes) {
+      final FileStatus status = pathStr.contains("file")
+          ? basicFileStatus(strToPath(pathStr), 100, false)
+          : basicFileStatus(strToPath(pathStr), 0, true);
+      ms.put(new PathMetadata(status));
+    }
+
+    final PathMetadata rootMeta = new PathMetadata(makeDirStatus("/"));
+    RemoteIterator<FileStatus> iterator;
+    if (implementation == DescendantsIterator.class) {
+      iterator = new DescendantsIterator(ms, rootMeta);
+    } else if (implementation == MetadataStoreListFilesIterator.class) {
+      iterator = new MetadataStoreListFilesIterator(ms, rootMeta, false);
+    } else {
+      throw new UnsupportedOperationException("Unrecognized class");
+    }
+
+    final Set<String> actual = new HashSet<>();
+    while (iterator.hasNext()) {
+      final Path p = iterator.next().getPath();
+      actual.add(Path.getPathWithoutSchemeAndAuthority(p).toString());
+    }
+    LOG.info("We got {} by iterating DescendantsIterator", actual);
+
+    if (!allowMissing()) {
+      assertEquals(Sets.newHashSet(checkNodes), actual);
+    }
+  }
+
+  /**
    * Test that we can get the whole sub-tree by iterating DescendantsIterator.
    *
    * The tree is similar to or same as the example in code comment.
    */
   @Test
-  public void testDescendantsIterator() throws IOException {
+  public void testDescendantsIterator() throws Exception {
     final String[] tree = new String[] {
         "/dir1",
         "/dir1/dir2",
@@ -152,26 +193,38 @@ public abstract class MetadataStoreTestBase extends Assert {
         "/dir1/dir3/dir5/file4",
         "/dir1/dir3/dir6"
     };
-    // we set up the example file system tree in metadata store
-    for (String pathStr : tree) {
-      final FileStatus status = pathStr.contains("file")
-          ? basicFileStatus(strToPath(pathStr), 100, false)
-          : basicFileStatus(strToPath(pathStr), 0, true);
-      ms.put(new PathMetadata(status));
-    }
-
-    final Set<String> actual = new HashSet<>();
-    final PathMetadata rootMeta = new PathMetadata(makeDirStatus("/"));
-    for (DescendantsIterator desc = new DescendantsIterator(ms, rootMeta);
-         desc.hasNext();) {
-      final Path p = desc.next().getPath();
-      actual.add(Path.getPathWithoutSchemeAndAuthority(p).toString());
-    }
-    LOG.info("We got {} by iterating DescendantsIterator", actual);
+    doTestDescendantsIterator(DescendantsIterator.class,
+        tree, tree);
+  }
 
-    if (!allowMissing()) {
-      assertEquals(Sets.newHashSet(tree), actual);
-    }
+  /**
+   * Test that we can get the correct subset of the tree with
+   * MetadataStoreListFilesIterator.
+   *
+   * The tree is similar to or same as the example in code comment.
+   */
+  @Test
+  public void testMetadataStoreListFilesIterator() throws Exception {
+    final String[] wholeTree = new String[] {
+        "/dir1",
+        "/dir1/dir2",
+        "/dir1/dir3",
+        "/dir1/dir2/file1",
+        "/dir1/dir2/file2",
+        "/dir1/dir3/dir4",
+        "/dir1/dir3/dir5",
+        "/dir1/dir3/dir4/file3",
+        "/dir1/dir3/dir5/file4",
+        "/dir1/dir3/dir6"
+    };
+    final String[] leafNodes = new String[] {
+        "/dir1/dir2/file1",
+        "/dir1/dir2/file2",
+        "/dir1/dir3/dir4/file3",
+        "/dir1/dir3/dir5/file4"
+    };
+    doTestDescendantsIterator(MetadataStoreListFilesIterator.class, wholeTree,
+        leafNodes);
   }
 
   @Test
@@ -258,7 +311,7 @@ public abstract class MetadataStoreTestBase extends Assert {
     /* Ensure delete happened. */
     assertDirectorySize("/ADirectory1/db1", 1);
     PathMetadata meta = ms.get(strToPath("/ADirectory1/db1/file2"));
-    assertNull("File deleted", meta);
+    assertTrue("File deleted", meta == null || meta.isDeleted());
   }
 
   @Test
@@ -284,10 +337,10 @@ public abstract class MetadataStoreTestBase extends Assert {
     ms.deleteSubtree(strToPath(p + "/ADirectory1/db1/"));
 
     assertEmptyDirectory(p + "/ADirectory1");
-    assertNotCached(p + "/ADirectory1/db1");
-    assertNotCached(p + "/ADirectory1/file1");
-    assertNotCached(p + "/ADirectory1/file2");
-    assertNotCached(p + "/ADirectory1/db1/dc1/dd1/deepFile");
+    assertDeleted(p + "/ADirectory1/db1");
+    assertDeleted(p + "/ADirectory1/file1");
+    assertDeleted(p + "/ADirectory1/file2");
+    assertDeleted(p + "/ADirectory1/db1/dc1/dd1/deepFile");
     assertEmptyDirectory(p + "/ADirectory2");
   }
 
@@ -302,11 +355,11 @@ public abstract class MetadataStoreTestBase extends Assert {
     setUpDeleteTest();
 
     ms.deleteSubtree(strToPath("/"));
-    assertNotCached("/ADirectory1");
-    assertNotCached("/ADirectory2");
-    assertNotCached("/ADirectory2/db1");
-    assertNotCached("/ADirectory2/db1/file1");
-    assertNotCached("/ADirectory2/db1/file2");
+    assertDeleted("/ADirectory1");
+    assertDeleted("/ADirectory2");
+    assertDeleted("/ADirectory2/db1");
+    assertDeleted("/ADirectory2/db1/file1");
+    assertDeleted("/ADirectory2/db1/file2");
   }
 
   @Test
@@ -350,6 +403,12 @@ public abstract class MetadataStoreTestBase extends Assert {
       verifyFileStatus(meta.getFileStatus(), 100);
     }
 
+    if (!(ms instanceof NullMetadataStore)) {
+      ms.delete(strToPath(filePath));
+      meta = ms.get(strToPath(filePath));
+      assertTrue("Tombstone not left for deleted file", meta.isDeleted());
+    }
+
     meta = ms.get(strToPath(dirPath));
     if (!allowMissing() || meta != null) {
       assertNotNull("Get found file (dir)", meta);
@@ -441,6 +500,7 @@ public abstract class MetadataStoreTestBase extends Assert {
 
     dirMeta = ms.listChildren(strToPath("/a1"));
     if (!allowMissing() || dirMeta != null) {
+      dirMeta = dirMeta.withoutTombstones();
       assertListingsEqual(dirMeta.getListing(), "/a1/b1", "/a1/b2");
     }
 
@@ -486,6 +546,7 @@ public abstract class MetadataStoreTestBase extends Assert {
     Collection<PathMetadata> entries;
     DirListingMetadata dirMeta = ms.listChildren(strToPath("/"));
     if (!allowMissing() || dirMeta != null) {
+      dirMeta = dirMeta.withoutTombstones();
       assertNotNull("Listing root", dirMeta);
       entries = dirMeta.getListing();
       assertListingsEqual(entries, "/a1", "/a2", "/a3");
@@ -513,13 +574,12 @@ public abstract class MetadataStoreTestBase extends Assert {
     dirMeta = ms.listChildren(strToPath("/a1"));
     if (!allowMissing() || dirMeta != null) {
       assertNotNull("Listing /a1", dirMeta);
-      entries = dirMeta.getListing();
+      entries = dirMeta.withoutTombstones().getListing();
       assertListingsEqual(entries, "/a1/b2");
     }
 
     PathMetadata meta = ms.get(strToPath("/a1/b1/file1"));
-    // TODO allow return of PathMetadata with isDeleted == true
-    assertNull("Src path deleted", meta);
+    assertTrue("Src path deleted", meta == null || meta.isDeleted());
 
     // Assert dest looks right
     meta = ms.get(strToPath("/b1/file1"));
@@ -596,7 +656,7 @@ public abstract class MetadataStoreTestBase extends Assert {
     ms.prune(cutoff);
     ls = ms.listChildren(strToPath("/pruneFiles"));
     if (allowMissing()) {
-      assertNotCached("/pruneFiles/old");
+      assertDeleted("/pruneFiles/old");
     } else {
       assertListingsEqual(ls.getListing(), "/pruneFiles/new");
     }
@@ -625,7 +685,7 @@ public abstract class MetadataStoreTestBase extends Assert {
 
     ms.prune(cutoff);
 
-    assertNotCached("/pruneDirs/dir/file");
+    assertDeleted("/pruneDirs/dir/file");
   }
 
   /*
@@ -646,6 +706,7 @@ public abstract class MetadataStoreTestBase extends Assert {
         "file3"));
     DirListingMetadata dirMeta = ms.listChildren(strToPath(parent));
     if (!allowMissing() || dirMeta != null) {
+      dirMeta = dirMeta.withoutTombstones();
       assertNotNull("list after putListStatus", dirMeta);
       Collection<PathMetadata> entries = dirMeta.getListing();
       assertNotNull("listStatus has entries", entries);
@@ -700,6 +761,7 @@ public abstract class MetadataStoreTestBase extends Assert {
       assertNotNull("Directory " + pathStr + " in cache", dirMeta);
     }
     if (!allowMissing() || dirMeta != null) {
+      dirMeta = dirMeta.withoutTombstones();
       assertEquals("Number of entries in dir " + pathStr, size,
           nonDeleted(dirMeta.getListing()).size());
     }
@@ -708,21 +770,27 @@ public abstract class MetadataStoreTestBase extends Assert {
   /** @return only file statuses which are *not* marked deleted. */
   private Collection<PathMetadata> nonDeleted(
       Collection<PathMetadata> statuses) {
-    /* TODO: filter out paths marked for deletion. */
-    return statuses;
+    Collection<PathMetadata> currentStatuses = new ArrayList<>();
+    for (PathMetadata status : statuses) {
+      if (!status.isDeleted()) {
+        currentStatuses.add(status);
+      }
+    }
+    return currentStatuses;
   }
 
-  private void assertNotCached(String pathStr) throws IOException {
-    // TODO this should return an entry with deleted flag set
+  private void assertDeleted(String pathStr) throws IOException {
     Path path = strToPath(pathStr);
     PathMetadata meta = ms.get(path);
-    assertNull(pathStr + " should not be cached.", meta);
+    boolean cached = meta != null && !meta.isDeleted();
+    assertFalse(pathStr + " should not be cached.", cached);
   }
 
   protected void assertCached(String pathStr) throws IOException {
     Path path = strToPath(pathStr);
     PathMetadata meta = ms.get(path);
-    assertNotNull(pathStr + " should be cached.", meta);
+    boolean cached = meta != null && !meta.isDeleted();
+    assertTrue(pathStr + " should be cached.", cached);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2f3305db/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java
index 3584b54..27416bb 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java
@@ -294,7 +294,7 @@ public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
     if (oldMetas != null) {
       // put all metadata of old paths and verify
       ms.put(new DirListingMetadata(oldDir, oldMetas, false));
-      assertEquals(0, ms.listChildren(newDir).numEntries());
+      assertEquals(0, ms.listChildren(newDir).withoutTombstones().numEntries());
       assertTrue(CollectionUtils.isEqualCollection(oldMetas,
           ms.listChildren(oldDir).getListing()));
 
@@ -306,7 +306,7 @@ public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
 
     // move the old paths to new paths and verify
     ms.move(pathsToDelete, newMetas);
-    assertEquals(0, ms.listChildren(oldDir).numEntries());
+    assertEquals(0, ms.listChildren(oldDir).withoutTombstones().numEntries());
     if (newMetas != null) {
       assertTrue(CollectionUtils.isEqualCollection(newMetas,
           ms.listChildren(newDir).getListing()));

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2f3305db/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java
index 4cffc6f..89d0498 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java
@@ -75,7 +75,7 @@ public class TestLocalMetadataStore extends MetadataStoreTestBase {
 
   @Test
   public void testClearByAncestor() {
-    Map<Path, String> map = new HashMap<>();
+    Map<Path, PathMetadata> map = new HashMap<>();
 
     // 1. Test paths without scheme/host
     assertClearResult(map, "", "/", 0);
@@ -90,21 +90,37 @@ public class TestLocalMetadataStore extends MetadataStoreTestBase {
     assertClearResult(map, p, "/invalid", 5);
   }
 
-  private static void populateMap(Map<Path, String> map, String prefix) {
-    String dummyVal = "dummy";
-    map.put(new Path(prefix + "/dirA/dirB/"), dummyVal);
-    map.put(new Path(prefix + "/dirA/dirB/dirC"), dummyVal);
-    map.put(new Path(prefix + "/dirA/dirB/dirC/file1"), dummyVal);
-    map.put(new Path(prefix + "/dirA/dirB/dirC/file2"), dummyVal);
-    map.put(new Path(prefix + "/dirA/file1"), dummyVal);
+  private static void populateMap(Map<Path, PathMetadata> map,
+      String prefix) {
+    populateEntry(map, new Path(prefix + "/dirA/dirB/"));
+    populateEntry(map, new Path(prefix + "/dirA/dirB/dirC"));
+    populateEntry(map, new Path(prefix + "/dirA/dirB/dirC/file1"));
+    populateEntry(map, new Path(prefix + "/dirA/dirB/dirC/file2"));
+    populateEntry(map, new Path(prefix + "/dirA/file1"));
   }
 
-  private static void assertClearResult(Map <Path, String> map,
+  private static void populateEntry(Map<Path, PathMetadata> map,
+      Path path) {
+    map.put(path, new PathMetadata(new FileStatus(0, true, 0, 0, 0, path)));
+  }
+
+  private static int sizeOfMap(Map<Path, PathMetadata> map) {
+    int count = 0;
+    for (PathMetadata meta : map.values()) {
+      if (!meta.isDeleted()) {
+        count++;
+      }
+    }
+    return count;
+  }
+
+  private static void assertClearResult(Map <Path, PathMetadata> map,
       String prefixStr, String pathStr, int leftoverSize) {
     populateMap(map, prefixStr);
-    LocalMetadataStore.clearHashByAncestor(new Path(prefixStr + pathStr), map);
+    LocalMetadataStore.deleteHashByAncestor(new Path(prefixStr + pathStr), map,
+        true);
     assertEquals(String.format("Map should have %d entries", leftoverSize),
-        leftoverSize, map.size());
+        leftoverSize, sizeOfMap(map));
     map.clear();
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2f3305db/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java
index 9b8e3c1..876cc80 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java
@@ -114,7 +114,7 @@ public abstract class AbstractITestS3AMetadataStoreScale extends
         describe("Running move workload");
         NanoTimer moveTimer = new NanoTimer();
         LOG.info("Running {} moves of {} paths each", operations,
-            origPaths.size());
+            origMetas.size());
         for (int i = 0; i < operations; i++) {
           Collection<Path> toDelete;
           Collection<PathMetadata> toCreate;


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org