You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by fa...@apache.org on 2017/02/21 01:49:15 UTC

hadoop git commit: HADOOP-14041 CLI command to prune old metadata (Contributed by Sean Mackrory)

Repository: hadoop
Updated Branches:
  refs/heads/HADOOP-13345 7a1bce5e2 -> 2ff5812d4


HADOOP-14041 CLI command to prune old metadata (Contributed by Sean Mackrory)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2ff5812d
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2ff5812d
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2ff5812d

Branch: refs/heads/HADOOP-13345
Commit: 2ff5812d4b353fcf82b85a586e15276fe86bd20c
Parents: 7a1bce5
Author: Aaron Fabbri <fa...@apache.org>
Authored: Mon Feb 20 10:25:09 2017 -0800
Committer: Aaron Fabbri <fa...@apache.org>
Committed: Mon Feb 20 16:10:09 2017 -0800

----------------------------------------------------------------------
 .../src/main/resources/core-default.xml         |  23 +++-
 .../org/apache/hadoop/fs/s3a/Constants.java     |  18 ++++
 .../fs/s3a/s3guard/DynamoDBMetadataStore.java   |  41 ++++++++
 .../fs/s3a/s3guard/LocalMetadataStore.java      |  42 ++++++++
 .../hadoop/fs/s3a/s3guard/MetadataStore.java    |  14 +++
 .../fs/s3a/s3guard/NullMetadataStore.java       |   5 +
 .../hadoop/fs/s3a/s3guard/S3GuardTool.java      |  90 +++++++++++++++-
 .../site/markdown/tools/hadoop-aws/s3guard.md   |   8 ++
 .../fs/s3a/s3guard/MetadataStoreTestBase.java   | 105 +++++++++++++++++--
 .../fs/s3a/s3guard/S3GuardToolTestBase.java     |  44 +++++++-
 .../hadoop/fs/s3a/s3guard/TestS3GuardTool.java  |   4 +-
 11 files changed, 380 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ff5812d/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index 84c8950..d4e6635 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -1260,6 +1260,16 @@
     </description>
 </property>
 
+<property>
+    <name>fs.s3a.s3guard.cli.prune.age</name>
+    <value>86400000</value>
+    <description>
+        Default age (in milliseconds) after which to prune metadata from the
+        metadatastore when the prune command is run.  Can be overridden on the
+        command-line.
+    </description>
+</property>
+
 
 <property>
   <name>fs.s3a.impl</name>
@@ -1331,7 +1341,18 @@
     </description>
 </property>
 
-  <property>
+<property>
+  <name>fs.s3a.s3guard.ddb.background.sleep</name>
+  <value>25</value>
+  <description>
+    Length (in milliseconds) of pause between each batch of deletes when
+    pruning metadata.  Prevents prune operations (which can typically be low
+    priority background operations) from overly interfering with other I/O
+    operations.
+  </description>
+</property>
+
+<property>
   <name>fs.AbstractFileSystem.s3a.impl</name>
   <value>org.apache.hadoop.fs.s3a.S3A</value>
   <description>The implementation class of the S3A AbstractFileSystem.</description>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ff5812d/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index 2cc810c..3636186 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -313,6 +313,14 @@ public final class Constants {
   /* Constants. */
   public static final String S3_METADATA_STORE_IMPL =
       "fs.s3a.metadatastore.impl";
+
+  /** Minimum period of time (in milliseconds) to keep metadata (may only be
+   * applied when a prune command is manually run).
+   */
+  @InterfaceStability.Unstable
+  public static final String S3GUARD_CLI_PRUNE_AGE =
+      "fs.s3a.s3guard.cli.prune.age";
+
   /**
    * The endpoint of the DynamoDB service.
    *
@@ -366,6 +374,16 @@ public final class Constants {
   public static final int S3GUARD_DDB_MAX_RETRIES_DEFAULT = 9;
 
   /**
+   * Period of time (in milliseconds) to sleep between batches of writes.
+   * Currently only applies to prune operations, as they are naturally a
+   * lower priority than other operations.
+   */
+  @InterfaceStability.Unstable
+  public static final String S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY =
+      "fs.s3a.s3guard.ddb.background.sleep";
+  public static final int S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_DEFAULT = 25;
+
+  /**
    * V1 committer.
    */
   @InterfaceStability.Unstable

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ff5812d/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
index 46357d7..c7d7ac3 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
@@ -38,10 +38,12 @@ import com.amazonaws.services.dynamodbv2.document.ItemCollection;
 import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
 import com.amazonaws.services.dynamodbv2.document.PutItemOutcome;
 import com.amazonaws.services.dynamodbv2.document.QueryOutcome;
+import com.amazonaws.services.dynamodbv2.document.ScanOutcome;
 import com.amazonaws.services.dynamodbv2.document.Table;
 import com.amazonaws.services.dynamodbv2.document.TableWriteItems;
 import com.amazonaws.services.dynamodbv2.document.spec.GetItemSpec;
 import com.amazonaws.services.dynamodbv2.document.spec.QuerySpec;
+import com.amazonaws.services.dynamodbv2.document.utils.ValueMap;
 import com.amazonaws.services.dynamodbv2.model.CreateTableRequest;
 import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput;
 import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
@@ -598,6 +600,45 @@ public class DynamoDBMetadataStore implements MetadataStore {
     }
   }
 
+  private ItemCollection<ScanOutcome> expiredFiles(long modTime) {
+    String filterExpression = "mod_time < :mod_time";
+    String projectionExpression = "parent,child";
+    ValueMap map = new ValueMap().withLong(":mod_time", modTime);
+    return table.scan(filterExpression, projectionExpression, null, map);
+  }
+
+  @Override
+  public void prune(long modTime) throws IOException {
+    int itemCount = 0;
+    try {
+      Collection<Path> deletionBatch =
+          new ArrayList(S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT);
+      int delay = conf.getInt(S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY,
+          S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_DEFAULT);
+      for (Item item : expiredFiles(modTime)) {
+        PathMetadata md = PathMetadataDynamoDBTranslation
+            .itemToPathMetadata(item, username);
+        Path path = md.getFileStatus().getPath();
+        deletionBatch.add(path);
+        itemCount++;
+        if (deletionBatch.size() == S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT) {
+          Thread.sleep(delay);
+          processBatchWriteRequest(pathToKey(deletionBatch), new Item[0]);
+          deletionBatch.clear();
+        }
+      }
+      if (deletionBatch.size() > 0) {
+        Thread.sleep(delay);
+        processBatchWriteRequest(pathToKey(deletionBatch), new Item[0]);
+      }
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupted();
+      throw new InterruptedIOException("Pruning was interrupted");
+    }
+    LOG.info("Finished pruning {} items in batches of {}", itemCount,
+        S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT);
+  }
+
   @Override
   public String toString() {
     return getClass().getSimpleName() + '{'

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ff5812d/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
index d40c45c..97d4fe2 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
@@ -33,6 +33,7 @@ import java.io.IOException;
 import java.net.URI;
 import java.util.Collection;
 import java.util.Iterator;
+import java.util.LinkedList;
 import java.util.Map;
 
 /**
@@ -270,6 +271,47 @@ public class LocalMetadataStore implements MetadataStore {
     }
   }
 
+  @Override
+  public synchronized void prune(long modTime) throws IOException {
+    Iterator<Map.Entry<Path, PathMetadata>> files =
+        fileHash.entrySet().iterator();
+    while (files.hasNext()) {
+      Map.Entry<Path, PathMetadata> entry = files.next();
+      if (expired(entry.getValue().getFileStatus(), modTime)) {
+        files.remove();
+      }
+    }
+    Iterator<Map.Entry<Path, DirListingMetadata>> dirs =
+        dirHash.entrySet().iterator();
+    Collection<Path> ancestors = new LinkedList<>();
+    while (dirs.hasNext()) {
+      Map.Entry<Path, DirListingMetadata> entry = dirs.next();
+      Path path = entry.getKey();
+      DirListingMetadata metadata = entry.getValue();
+      Collection<PathMetadata> oldChildren = metadata.getListing();
+      Collection<PathMetadata> newChildren = new LinkedList<>();
+
+      for (PathMetadata child : oldChildren) {
+        FileStatus status = child.getFileStatus();
+        if (!expired(status, modTime)) {
+          newChildren.add(child);
+        }
+      }
+      if (newChildren.size() == 0) {
+        dirs.remove();
+        ancestors.add(entry.getKey());
+      } else {
+        dirHash.put(path, new DirListingMetadata(path, newChildren, false));
+      }
+    }
+  }
+
+  private boolean expired(FileStatus status, long expiry) {
+    // Note: S3 doesn't track modification time on directories, so for
+    // consistency with the DynamoDB implementation we ignore that here
+    return status.getModificationTime() < expiry && !status.isDirectory();
+  }
+
   @VisibleForTesting
   static <T> void clearHashByAncestor(Path ancestor, Map<Path, T> hash) {
     for (Iterator<Map.Entry<Path, T>> it = hash.entrySet().iterator();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ff5812d/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
index 59b39a5..dc3433c 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
@@ -165,4 +165,18 @@ public interface MetadataStore extends Closeable {
    * @throws IOException if there is an error
    */
   void destroy() throws IOException;
+
+  /**
+   * Clear any metadata older than a specified time from the repository.
+   * Implementations MUST clear file metadata, and MAY clear directory metadata
+   * (s3a itself does not track modification time for directories).
+   * Implementations may also choose to throw UnsupportedOperationException
+   * istead. Note that modification times should be in UTC, as returned by
+   * System.currentTimeMillis at the time of modification.
+   *
+   * @param modTime Oldest modification time to allow
+   * @throws IOException if there is an error
+   * @throws UnsupportedOperationException if not implemented
+   */
+  void prune(long modTime) throws IOException, UnsupportedOperationException;
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ff5812d/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
index aed9590..082e0f9 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
@@ -87,6 +87,11 @@ public class NullMetadataStore implements MetadataStore {
   }
 
   @Override
+  public void prune(long modTime) {
+    return;
+  }
+
+  @Override
   public String toString() {
     return "NullMetadataStore";
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ff5812d/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
index e5cc416..d187524 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.Constants;
 import org.apache.hadoop.fs.s3a.S3AFileStatus;
 import org.apache.hadoop.fs.s3a.S3AFileSystem;
 import org.apache.hadoop.fs.shell.CommandFormat;
@@ -45,6 +46,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.TimeUnit;
 
 import static org.apache.hadoop.fs.s3a.Constants.*;
 
@@ -601,10 +603,93 @@ public abstract class S3GuardTool extends Configured implements Tool {
     }
   }
 
+  /**
+   * Prune metadata that has not been modified recently.
+   */
+  static class Prune extends S3GuardTool {
+    private static final String NAME = "prune";
+    private static final String USAGE = NAME +
+        "([-D days] [-H hours] [-M minutes] [-S seconds]))" +
+        " ( -m METASTORE | s3a://bucket/path/ )";
+
+    Prune(Configuration conf) {
+      super(conf);
+
+      commandFormat.addOptionWithValue("D");
+      commandFormat.addOptionWithValue("H");
+      commandFormat.addOptionWithValue("M");
+      commandFormat.addOptionWithValue("S");
+    }
+
+    @VisibleForTesting
+    void setMetadataStore(MetadataStore ms) {
+      Preconditions.checkNotNull(ms);
+      this.ms = ms;
+    }
+
+    @Override
+    String getName() {
+      return NAME;
+    }
+
+    private long getDeltaComponent(TimeUnit unit, String arg) {
+      String raw = commandFormat.getOptValue(arg);
+      if (raw == null || raw.isEmpty()) {
+        return 0;
+      }
+      Long parsed = Long.parseLong(raw);
+      return unit.toMillis(parsed);
+    }
+
+    @VisibleForTesting
+    public int run(String[] args, PrintStream out) throws
+        InterruptedException, IOException {
+      List<String> paths = parseArgs(args);
+      if (!parseDynamoDBEndPoint(paths)) {
+        System.out.println(USAGE);
+        return INVALID_ARGUMENT;
+      }
+      initMetadataStore(false);
+
+      Configuration conf = getConf();
+      long confDelta = conf.getLong(Constants.S3GUARD_CLI_PRUNE_AGE, 0);
+
+      long cliDelta = 0;
+      cliDelta += getDeltaComponent(TimeUnit.DAYS, "D");
+      cliDelta += getDeltaComponent(TimeUnit.HOURS, "H");
+      cliDelta += getDeltaComponent(TimeUnit.MINUTES, "M");
+      cliDelta += getDeltaComponent(TimeUnit.SECONDS, "S");
+
+      if (confDelta <= 0 && cliDelta <= 0) {
+        System.err.println(
+            "You must specify a positive age for metadata to prune.");
+      }
+
+      // A delta provided on the CLI overrides if one is configured
+      long delta = confDelta;
+      if (cliDelta > 0) {
+        delta = cliDelta;
+      }
+
+      long now = System.currentTimeMillis();
+      long divide = now - delta;
+
+      ms.prune(divide);
+
+      out.flush();
+      return SUCCESS;
+    }
+
+    @Override
+    public int run(String[] args) throws InterruptedException, IOException {
+      return run(args, System.out);
+    }
+  }
+
   private static void printHelp() {
     System.out.println("Usage: hadoop " + NAME + " [" +
         InitMetadata.NAME + "|" + DestroyMetadata.NAME +
-        "|" + Import.NAME + "|" + Diff.NAME +
+        "|" + Import.NAME + "|" + Diff.NAME + "|" + Prune.NAME +
         "] [OPTIONS] [ARGUMENTS]");
 
     System.out.println("\tperform metadata store " +
@@ -639,6 +724,9 @@ public abstract class S3GuardTool extends Configured implements Tool {
     case Diff.NAME:
       cmd = new Diff(conf);
       break;
+    case Prune.NAME:
+      cmd = new Prune(conf);
+      break;
     default:
       printHelp();
       return INVALID_ARGUMENT;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ff5812d/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
index 368a809..4455334 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
@@ -280,6 +280,14 @@ hadoop s3a destroy [-m URI] ( -e ENDPOINT | s3a://BUCKET )
 
 Deletes a metadata store.
 
+### Prune
+
+```
+hadoop s3a prune [-D DAYS] [-H HOURS] [-M MINUTES] [-S SECONDS] [-m URI] ( -e ENDPOINT | s3a://BUCKET )
+```
+
+Trims metadata for files that are older than the time given. Must supply at least length of time.
+
 ## Debugging and Error Handling
 
 If you run into network connectivity issues, or have a machine failure in the

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ff5812d/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
index bbf3b6d..bb0c24f 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.io.IOUtils;
 import com.google.common.collect.Sets;
 import org.junit.After;
 import org.junit.Assert;
+import org.junit.Assume;
 import org.junit.Before;
 import org.junit.Test;
 import org.slf4j.Logger;
@@ -77,6 +78,15 @@ public abstract class MetadataStoreTestBase extends Assert {
     return false;
   }
 
+  /**
+   * Pruning is an optional feature for metadata store implementations.
+   * Tests will only check that functionality if it is expected to work.
+   * @return true if the test should expect pruning to work.
+   */
+  public boolean supportsPruning() {
+    return true;
+  }
+
   /** The MetadataStore contract used to test against. */
   private AbstractMSContract contract;
 
@@ -484,6 +494,68 @@ public abstract class MetadataStoreTestBase extends Assert {
     ms.delete(new Path(p1));
   }
 
+  @Test
+  public void testPruneFiles() throws Exception {
+    Assume.assumeTrue(supportsPruning());
+    createNewDirs("/pruneFiles");
+
+    long oldTime = getTime();
+    ms.put(new PathMetadata(makeFileStatus("/pruneFiles/old", 1, oldTime,
+        oldTime)));
+    DirListingMetadata ls2 = ms.listChildren(strToPath("/pruneFiles"));
+    if (!allowMissing()) {
+      assertListingsEqual(ls2.getListing(), "/pruneFiles/old");
+    }
+
+    // It's possible for the Local implementation to get from /pruneFiles/old's
+    // modification time to here in under 1ms, causing it to not get pruned
+    Thread.sleep(1);
+    long cutoff = System.currentTimeMillis();
+    long newTime = getTime();
+    ms.put(new PathMetadata(makeFileStatus("/pruneFiles/new", 1, newTime,
+        newTime)));
+
+    DirListingMetadata ls;
+    ls = ms.listChildren(strToPath("/pruneFiles"));
+    if (!allowMissing()) {
+      assertListingsEqual(ls.getListing(), "/pruneFiles/new",
+          "/pruneFiles/old");
+    }
+    ms.prune(cutoff);
+    ls = ms.listChildren(strToPath("/pruneFiles"));
+    if (allowMissing()) {
+      assertNotCached("/pruneFiles/old");
+    } else {
+      assertListingsEqual(ls.getListing(), "/pruneFiles/new");
+    }
+  }
+
+  @Test
+  public void testPruneDirs() throws Exception {
+    Assume.assumeTrue(supportsPruning());
+
+    // We only test that files, not dirs, are removed during prune.
+    // We specifically allow directories to remain, as it is more robust
+    // for DynamoDBMetadataStore's prune() implementation: If a
+    // file was created in a directory while it was being pruned, it would
+    // violate the invariant that all ancestors of a file exist in the table.
+
+    createNewDirs("/pruneDirs/dir");
+
+    long oldTime = getTime();
+    ms.put(new PathMetadata(makeFileStatus("/pruneDirs/dir/file",
+        1, oldTime, oldTime)));
+
+    // It's possible for the Local implementation to get from the old
+    // modification time to here in under 1ms, causing it to not get pruned
+    Thread.sleep(1);
+    long cutoff = getTime();
+
+    ms.prune(cutoff);
+
+    assertNotCached("/pruneDirs/dir/file");
+  }
+
   /*
    * Helper functions.
    */
@@ -600,23 +672,34 @@ public abstract class MetadataStoreTestBase extends Assert {
     }
   }
 
-  FileStatus basicFileStatus(Path path, int size, boolean isDir)
-      throws IOException {
-    return new FileStatus(size, isDir, REPLICATION, BLOCK_SIZE, modTime,
-        accessTime, PERMISSION, OWNER, GROUP, path);
+  FileStatus basicFileStatus(Path path, int size, boolean isDir) throws
+      IOException {
+    return basicFileStatus(path, size, isDir, modTime, accessTime);
   }
 
-  private FileStatus makeFileStatus(String pathStr, int size)
-      throws IOException {
-    return basicFileStatus(strToPath(pathStr), size, false);
+  FileStatus basicFileStatus(Path path, int size, boolean isDir,
+      long newModTime, long newAccessTime) throws IOException {
+    return new FileStatus(size, isDir, REPLICATION, BLOCK_SIZE, newModTime,
+        newAccessTime, PERMISSION, OWNER, GROUP, path);
+  }
+
+  private FileStatus makeFileStatus(String pathStr, int size) throws
+      IOException {
+    return makeFileStatus(pathStr, size, modTime, accessTime);
+  }
+
+  private FileStatus makeFileStatus(String pathStr, int size, long newModTime,
+      long newAccessTime) throws IOException {
+    return basicFileStatus(strToPath(pathStr), size, false,
+        newModTime, newAccessTime);
   }
 
   void verifyFileStatus(FileStatus status, long size) {
-    S3ATestUtils.verifyFileStatus(status, size, BLOCK_SIZE, getModTime());
+    S3ATestUtils.verifyFileStatus(status, size, BLOCK_SIZE, modTime);
   }
 
   private FileStatus makeDirStatus(String pathStr) throws IOException {
-    return basicFileStatus(strToPath(pathStr), 0, true);
+    return basicFileStatus(strToPath(pathStr), 0, true, modTime, accessTime);
   }
 
   /**
@@ -635,4 +718,8 @@ public abstract class MetadataStoreTestBase extends Assert {
     return accessTime;
   }
 
+  protected static long getTime() {
+    return System.currentTimeMillis();
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ff5812d/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardToolTestBase.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardToolTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardToolTestBase.java
index 32fcef1..666ec8e 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardToolTestBase.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardToolTestBase.java
@@ -21,15 +21,20 @@ package org.apache.hadoop.fs.s3a.s3guard;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.s3a.Constants;
 import org.apache.hadoop.fs.s3a.S3AFileStatus;
 import org.apache.hadoop.fs.s3a.S3AFileSystem;
 import org.apache.hadoop.fs.s3a.S3ATestUtils;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Test;
+import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS;
 
 import java.io.IOException;
+import java.util.concurrent.TimeUnit;
 
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertEquals;
 
 /**
  * Common functionality for S3GuardTool test cases.
@@ -112,9 +117,46 @@ public abstract class S3GuardToolTestBase {
     }
 
     if (onMetadataStore) {
-      S3AFileStatus status = new S3AFileStatus(100L, 10000L,
+      S3AFileStatus status = new S3AFileStatus(100L, System.currentTimeMillis(),
           fs.qualify(path), 512L, "hdfs");
       putFile(ms, status);
     }
   }
+
+  private void testPruneCommand(Configuration cmdConf, String[] args)
+      throws Exception {
+    Path parent = new Path(getTestPath("/prune-cli"));
+    try {
+      fs.mkdirs(parent);
+
+      S3GuardTool.Prune cmd = new S3GuardTool.Prune(cmdConf);
+      cmd.setMetadataStore(ms);
+
+      createFile(new Path(parent, "stale"), true, true);
+      Thread.sleep(TimeUnit.SECONDS.toMillis(2));
+      createFile(new Path(parent, "fresh"), true, true);
+
+      assertEquals(2, ms.listChildren(parent).getListing().size());
+      assertEquals("Prune command did not exit successfully - see output",
+          SUCCESS, cmd.run(args));
+      assertEquals(1, ms.listChildren(parent).getListing().size());
+    } finally {
+      fs.delete(parent, true);
+      ms.prune(Long.MAX_VALUE);
+    }
+  }
+
+  @Test
+  public void testPruneCommandCLI() throws Exception {
+    String testPath = getTestPath("testPruneCommandCLI");
+    testPruneCommand(fs.getConf(), new String[]{"prune", "-S", "1", testPath});
+  }
+
+  @Test
+  public void testPruneCommandConf() throws Exception {
+    conf.setLong(Constants.S3GUARD_CLI_PRUNE_AGE,
+        TimeUnit.SECONDS.toMillis(1));
+    String testPath = getTestPath("testPruneCommandConf");
+    testPruneCommand(conf, new String[]{"prune", testPath});
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2ff5812d/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java
index 77f0919..6a39d7b 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java
@@ -78,8 +78,8 @@ public class TestS3GuardTool extends S3GuardToolTestBase {
   public void testDiffCommand() throws IOException {
     S3AFileSystem fs = getFs();
     MetadataStore ms = getMetadataStore();
-    Set<Path> filesOnS3 = new HashSet<>();  // files on S3.
-    Set<Path> filesOnMS = new HashSet<>();  // files on metadata store.
+    Set<Path> filesOnS3 = new HashSet<>(); // files on S3.
+    Set<Path> filesOnMS = new HashSet<>(); // files on metadata store.
 
     String testPath = getTestPath("/test-diff");
     mkdirs(new Path(testPath), true, true);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org