You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by as...@apache.org on 2016/02/11 08:59:46 UTC

[12/50] hadoop git commit: HADOOP-12292. Make use of DeleteObjects optional. (Thomas Demoor via stevel)

HADOOP-12292. Make use of DeleteObjects optional.  (Thomas Demoor via stevel)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/29ae2580
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/29ae2580
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/29ae2580

Branch: refs/heads/yarn-2877
Commit: 29ae25801380b94442253c4202dee782dc4713f5
Parents: fe124da
Author: Steve Loughran <st...@apache.org>
Authored: Sat Feb 6 15:05:16 2016 +0000
Committer: Steve Loughran <st...@apache.org>
Committed: Sat Feb 6 15:05:16 2016 +0000

----------------------------------------------------------------------
 hadoop-common-project/hadoop-common/CHANGES.txt |  3 ++
 .../src/main/resources/core-default.xml         |  9 ++++
 hadoop-project/pom.xml                          |  3 +-
 .../org/apache/hadoop/fs/s3a/Constants.java     |  3 ++
 .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 57 +++++++++++++-------
 .../src/site/markdown/tools/hadoop-aws/index.md |  9 ++++
 .../hadoop/fs/s3a/scale/S3AScaleTestBase.java   | 11 +++-
 .../fs/s3a/scale/TestS3ADeleteManyFiles.java    |  2 +-
 8 files changed, 75 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 73eadef..c8effc5 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -265,6 +265,9 @@ Trunk (Unreleased)
     HADOOP-11828. Implement the Hitchhiker erasure coding algorithm.
     (Jack Liuquan via zhz)
 
+    HADOOP-12292. Make use of DeleteObjects optional.
+    (Thomas Demoor via stevel)
+
   BUG FIXES
 
     HADOOP-12617. SPNEGO authentication request to non-default realm gets

http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index ceae3ed..34e1236 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -888,6 +888,15 @@ for ldap providers in the same way as above does.
 </property>
 
 <property>
+  <name>fs.s3a.multiobjectdelete.enable</name>
+  <value>true</value>
+  <description>When enabled, multiple single-object delete requests are replaced by
+    a single 'delete multiple objects'-request, reducing the number of requests.
+    Beware: legacy S3-compatible object stores might not support this request.
+  </description>
+</property>
+
+<property>
   <name>fs.s3a.acl.default</name>
   <description>Set a canned ACL for newly created and copied objects. Value may be private, 
      public-read, public-read-write, authenticated-read, log-delivery-write, 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-project/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml
index 4e9b502..6078573 100644
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@@ -115,6 +115,7 @@
     <exec-maven-plugin.version>1.3.1</exec-maven-plugin.version>
     <make-maven-plugin.version>1.0-beta-1</make-maven-plugin.version>
     <native-maven-plugin.version>1.0-alpha-8</native-maven-plugin.version>
+    <surefire.fork.timeout>900</surefire.fork.timeout>
   </properties>
 
   <dependencyManagement>
@@ -1129,7 +1130,7 @@
         <artifactId>maven-surefire-plugin</artifactId>
         <configuration>
           <reuseForks>false</reuseForks>
-          <forkedProcessTimeoutInSeconds>900</forkedProcessTimeoutInSeconds>
+          <forkedProcessTimeoutInSeconds>${surefire.fork.timeout}</forkedProcessTimeoutInSeconds>
           <argLine>${maven-surefire-plugin.argLine}</argLine>
           <environmentVariables>
             <HADOOP_COMMON_HOME>${hadoop.common.build.dir}</HADOOP_COMMON_HOME>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index fc99cb9..f10f3db 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -79,6 +79,9 @@ public class Constants {
   public static final String MIN_MULTIPART_THRESHOLD = "fs.s3a.multipart.threshold";
   public static final long DEFAULT_MIN_MULTIPART_THRESHOLD = Integer.MAX_VALUE;
 
+  //enable multiobject-delete calls?
+  public static final String ENABLE_MULTI_DELETE = "fs.s3a.multiobjectdelete.enable";
+
   // comma separated list of directories
   public static final String BUFFER_DIR = "fs.s3a.buffer.dir";
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 71771bd..5ea6bec 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -38,6 +38,7 @@ import com.amazonaws.auth.AWSCredentialsProviderChain;
 import com.amazonaws.auth.InstanceProfileCredentialsProvider;
 import com.amazonaws.services.s3.AmazonS3Client;
 import com.amazonaws.services.s3.model.CannedAccessControlList;
+import com.amazonaws.services.s3.model.DeleteObjectRequest;
 import com.amazonaws.services.s3.model.DeleteObjectsRequest;
 import com.amazonaws.services.s3.model.ListObjectsRequest;
 import com.amazonaws.services.s3.model.ObjectListing;
@@ -82,6 +83,7 @@ public class S3AFileSystem extends FileSystem {
   private String bucket;
   private int maxKeys;
   private long partSize;
+  private boolean enableMultiObjectsDelete;
   private TransferManager transfers;
   private ExecutorService threadPoolExecutor;
   private long multiPartThreshold;
@@ -200,6 +202,7 @@ public class S3AFileSystem extends FileSystem {
     partSize = conf.getLong(MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE);
     multiPartThreshold = conf.getLong(MIN_MULTIPART_THRESHOLD,
       DEFAULT_MIN_MULTIPART_THRESHOLD);
+    enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true);
 
     if (partSize < 5 * 1024 * 1024) {
       LOG.error(MULTIPART_SIZE + " must be at least 5 MB");
@@ -522,11 +525,7 @@ public class S3AFileSystem extends FileSystem {
           copyFile(summary.getKey(), newDstKey);
 
           if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
-            DeleteObjectsRequest deleteRequest =
-                new DeleteObjectsRequest(bucket).withKeys(keysToDelete);
-            s3.deleteObjects(deleteRequest);
-            statistics.incrementWriteOps(1);
-            keysToDelete.clear();
+            removeKeys(keysToDelete, true);
           }
         }
 
@@ -534,11 +533,8 @@ public class S3AFileSystem extends FileSystem {
           objects = s3.listNextBatchOfObjects(objects);
           statistics.incrementReadOps(1);
         } else {
-          if (keysToDelete.size() > 0) {
-            DeleteObjectsRequest deleteRequest =
-                new DeleteObjectsRequest(bucket).withKeys(keysToDelete);
-            s3.deleteObjects(deleteRequest);
-            statistics.incrementWriteOps(1);
+          if (!keysToDelete.isEmpty()) {
+            removeKeys(keysToDelete, false);
           }
           break;
         }
@@ -552,6 +548,36 @@ public class S3AFileSystem extends FileSystem {
     return true;
   }
 
+  /**
+   * A helper method to delete a list of keys on a s3-backend.
+   *
+   * @param keysToDelete collection of keys to delete on the s3-backend
+   * @param clearKeys clears the keysToDelete-list after processing the list
+   *            when set to true
+   */
+  private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
+          boolean clearKeys) {
+    if (enableMultiObjectsDelete) {
+      DeleteObjectsRequest deleteRequest
+          = new DeleteObjectsRequest(bucket).withKeys(keysToDelete);
+      s3.deleteObjects(deleteRequest);
+      statistics.incrementWriteOps(1);
+    } else {
+      int writeops = 0;
+
+      for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
+        s3.deleteObject(
+            new DeleteObjectRequest(bucket, keyVersion.getKey()));
+        writeops++;
+      }
+
+      statistics.incrementWriteOps(writeops);
+    }
+    if (clearKeys) {
+      keysToDelete.clear();
+    }
+  }
+
   /** Delete a file.
    *
    * @param f the path to delete.
@@ -626,11 +652,7 @@ public class S3AFileSystem extends FileSystem {
             }
 
             if (keys.size() == MAX_ENTRIES_TO_DELETE) {
-              DeleteObjectsRequest deleteRequest =
-                  new DeleteObjectsRequest(bucket).withKeys(keys);
-              s3.deleteObjects(deleteRequest);
-              statistics.incrementWriteOps(1);
-              keys.clear();
+              removeKeys(keys, true);
             }
           }
 
@@ -639,10 +661,7 @@ public class S3AFileSystem extends FileSystem {
             statistics.incrementReadOps(1);
           } else {
             if (!keys.isEmpty()) {
-              DeleteObjectsRequest deleteRequest =
-                  new DeleteObjectsRequest(bucket).withKeys(keys);
-              s3.deleteObjects(deleteRequest);
-              statistics.incrementWriteOps(1);
+              removeKeys(keys, false);
             }
             break;
           }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 9fb0f36..73775c8 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -267,6 +267,15 @@ If you do any of these: change your credentials immediately!
     </property>
 
     <property>
+      <name>fs.s3a.multiobjectdelete.enable</name>
+      <value>false</value>
+      <description>When enabled, multiple single-object delete requests are replaced by
+        a single 'delete multiple objects'-request, reducing the number of requests.
+        Beware: legacy S3-compatible object stores might not support this request.
+      </description>
+    </property>
+
+    <property>
       <name>fs.s3a.acl.default</name>
       <description>Set a canned ACL for newly created and copied objects. Value may be private,
          public-read, public-read-write, authenticated-read, log-delivery-write,

http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
index e0cbc92..e44a90e 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
@@ -36,13 +36,21 @@ import static org.junit.Assume.assumeTrue;
 
 /**
  * Base class for scale tests; here is where the common scale configuration
- * keys are defined
+ * keys are defined.
  */
 public class S3AScaleTestBase {
 
   public static final String SCALE_TEST = "scale.test.";
+
+  /**
+   * The number of operations to perform: {@value}
+   */
   public static final String KEY_OPERATION_COUNT =
       SCALE_TEST + "operation.count";
+
+  /**
+   * The default number of operations to perform: {@value}
+   */
   public static final long DEFAULT_OPERATION_COUNT = 2005;
 
   protected S3AFileSystem fs;
@@ -71,6 +79,7 @@ public class S3AScaleTestBase {
   @Before
   public void setUp() throws Exception {
     conf = createConfiguration();
+    LOG.info("Scale test operation count = {}", getOperationCount());
     fs = S3ATestUtils.createTestFileSystem(conf);
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/29ae2580/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
index c913a67..d521ba8 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
@@ -61,7 +61,7 @@ public class TestS3ADeleteManyFiles extends S3AScaleTestBase {
     // use Executor to speed up file creation
     ExecutorService exec = Executors.newFixedThreadPool(16);
     final ExecutorCompletionService<Boolean> completionService =
-        new ExecutorCompletionService<Boolean>(exec);
+        new ExecutorCompletionService<>(exec);
     try {
       final byte[] data = ContractTestUtils.dataset(testBufferSize, 'a', 'z');