You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sz...@apache.org on 2014/02/26 21:55:51 UTC

svn commit: r1572251 - in /hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common: ./ src/main/java/ src/main/java/org/apache/hadoop/fs/s3native/ src/main/java/org/apache/hadoop/util/ src/main/resources/ src/test/java/org/apache/hadoop/fs...

Author: szetszwo
Date: Wed Feb 26 20:55:50 2014
New Revision: 1572251

URL: http://svn.apache.org/r1572251
Log:
Merge r1569890 through r1572250 from trunk.

Added:
    hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/TestJets3tNativeFileSystemStore.java
      - copied unchanged from r1572250, hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/TestJets3tNativeFileSystemStore.java
    hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/test/resources/jets3t.properties
      - copied unchanged from r1572250, hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/resources/jets3t.properties
Modified:
    hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/CHANGES.txt   (contents, props changed)
    hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/java/   (props changed)
    hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java
    hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java
    hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

Modified: hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1572251&r1=1572250&r2=1572251&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/CHANGES.txt (original)
+++ hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/CHANGES.txt Wed Feb 26 20:55:50 2014
@@ -8,9 +8,6 @@ Trunk (Unreleased)
     FSDataOutputStream.sync() and Syncable.sync().  (szetszwo)
 
   NEW FEATURES
-
-    HADOOP-10184. Hadoop Common changes required to support HDFS ACLs. (See
-    breakdown of tasks below for features and contributors)
     
   IMPROVEMENTS
 
@@ -300,41 +297,6 @@ Trunk (Unreleased)
 
     HADOOP-10044 Improve the javadoc of rpc code (sanjay Radia)
 
-  BREAKDOWN OF HADOOP-10184 SUBTASKS AND RELATED JIRAS
-
-    HADOOP-10185. FileSystem API for ACLs. (cnauroth)
-
-    HADOOP-10186. Remove AclReadFlag and AclWriteFlag in FileSystem API.
-    (Haohui Mai via cnauroth)
-
-    HADOOP-10187. FsShell CLI: add getfacl and setfacl with minimal support for
-    getting and setting ACLs. (Vinay via cnauroth)
-
-    HADOOP-10192. FileSystem#getAclStatus has incorrect JavaDocs. (cnauroth)
-
-    HADOOP-10220. Add ACL indicator bit to FsPermission. (cnauroth)
-
-    HADOOP-10241. Clean up output of FsShell getfacl. (Chris Nauroth via wheat9)
-
-    HADOOP-10213. Fix bugs parsing ACL spec in FsShell setfacl.
-    (Vinay via cnauroth)
-
-    HADOOP-10277. setfacl -x fails to parse ACL spec if trying to remove the
-    mask entry. (Vinay via cnauroth)
-
-    HADOOP-10270. getfacl does not display effective permissions of masked
-    entries. (cnauroth)
-
-    HADOOP-10344. Fix TestAclCommands after merging HADOOP-10338 patch.
-    (cnauroth)
-
-    HADOOP-10352. Recursive setfacl erroneously attempts to apply default ACL to
-    files. (cnauroth)
-
-    HADOOP-10354. TestWebHDFS fails after merge of HDFS-4685 to trunk. (cnauroth)
-
-    HADOOP-10361. Correct alignment in CLI output for ACLs. (cnauroth)
-
   OPTIMIZATIONS
 
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
@@ -362,6 +324,9 @@ Release 2.4.0 - UNRELEASED
 
   NEW FEATURES
 
+    HADOOP-10184. Hadoop Common changes required to support HDFS ACLs. (See
+    breakdown of tasks below for features and contributors)
+
   IMPROVEMENTS
 
     HADOOP-10139. Update and improve the Single Cluster Setup document.
@@ -378,6 +343,9 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10348. Deprecate hadoop.ssl.configuration in branch-2, and remove
     it in trunk. (Haohui Mai via jing9)
 
+    HADOOP-9454. Support multipart uploads for s3native. (Jordan Mendelson and
+    Akira AJISAKA via atm)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -413,6 +381,44 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10070. RPC client doesn't use per-connection conf to determine
     server's expected Kerberos principal name. (atm)
 
+    HADOOP-10368. InputStream is not closed in VersionInfo ctor.
+    (Tsuyoshi OZAWA via szetszwo)
+
+  BREAKDOWN OF HADOOP-10184 SUBTASKS AND RELATED JIRAS
+
+    HADOOP-10185. FileSystem API for ACLs. (cnauroth)
+
+    HADOOP-10186. Remove AclReadFlag and AclWriteFlag in FileSystem API.
+    (Haohui Mai via cnauroth)
+
+    HADOOP-10187. FsShell CLI: add getfacl and setfacl with minimal support for
+    getting and setting ACLs. (Vinay via cnauroth)
+
+    HADOOP-10192. FileSystem#getAclStatus has incorrect JavaDocs. (cnauroth)
+
+    HADOOP-10220. Add ACL indicator bit to FsPermission. (cnauroth)
+
+    HADOOP-10241. Clean up output of FsShell getfacl. (Chris Nauroth via wheat9)
+
+    HADOOP-10213. Fix bugs parsing ACL spec in FsShell setfacl.
+    (Vinay via cnauroth)
+
+    HADOOP-10277. setfacl -x fails to parse ACL spec if trying to remove the
+    mask entry. (Vinay via cnauroth)
+
+    HADOOP-10270. getfacl does not display effective permissions of masked
+    entries. (cnauroth)
+
+    HADOOP-10344. Fix TestAclCommands after merging HADOOP-10338 patch.
+    (cnauroth)
+
+    HADOOP-10352. Recursive setfacl erroneously attempts to apply default ACL to
+    files. (cnauroth)
+
+    HADOOP-10354. TestWebHDFS fails after merge of HDFS-4685 to trunk. (cnauroth)
+
+    HADOOP-10361. Correct alignment in CLI output for ACLs. (cnauroth)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

Propchange: hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/CHANGES.txt
------------------------------------------------------------------------------
  Merged /hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt:r1571814-1572250

Propchange: hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/java/
------------------------------------------------------------------------------
  Merged /hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java:r1571814-1572250

Modified: hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java?rev=1572251&r1=1572250&r2=1572251&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java (original)
+++ hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java Wed Feb 26 20:55:50 2014
@@ -28,6 +28,9 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -41,10 +44,13 @@ import org.jets3t.service.S3ServiceExcep
 import org.jets3t.service.ServiceException;
 import org.jets3t.service.StorageObjectsChunk;
 import org.jets3t.service.impl.rest.httpclient.RestS3Service;
+import org.jets3t.service.model.MultipartPart;
+import org.jets3t.service.model.MultipartUpload;
 import org.jets3t.service.model.S3Bucket;
 import org.jets3t.service.model.S3Object;
 import org.jets3t.service.model.StorageObject;
 import org.jets3t.service.security.AWSCredentials;
+import org.jets3t.service.utils.MultipartUtils;
 
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
@@ -52,6 +58,12 @@ class Jets3tNativeFileSystemStore implem
   
   private S3Service s3Service;
   private S3Bucket bucket;
+
+  private long multipartBlockSize;
+  private boolean multipartEnabled;
+  private long multipartCopyBlockSize;
+  static final long MAX_PART_SIZE = (long)5 * 1024 * 1024 * 1024;
+  
   public static final Log LOG =
       LogFactory.getLog(Jets3tNativeFileSystemStore.class);
 
@@ -67,13 +79,27 @@ class Jets3tNativeFileSystemStore implem
     } catch (S3ServiceException e) {
       handleS3ServiceException(e);
     }
+    multipartEnabled =
+        conf.getBoolean("fs.s3n.multipart.uploads.enabled", false);
+    multipartBlockSize = Math.min(
+        conf.getLong("fs.s3n.multipart.uploads.block.size", 64 * 1024 * 1024),
+        MAX_PART_SIZE);
+    multipartCopyBlockSize = Math.min(
+        conf.getLong("fs.s3n.multipart.copy.block.size", MAX_PART_SIZE),
+        MAX_PART_SIZE);
+
     bucket = new S3Bucket(uri.getHost());
   }
   
   @Override
   public void storeFile(String key, File file, byte[] md5Hash)
     throws IOException {
-    
+
+    if (multipartEnabled && file.length() >= multipartBlockSize) {
+      storeLargeFile(key, file, md5Hash);
+      return;
+    }
+
     BufferedInputStream in = null;
     try {
       in = new BufferedInputStream(new FileInputStream(file));
@@ -98,6 +124,31 @@ class Jets3tNativeFileSystemStore implem
     }
   }
 
+  public void storeLargeFile(String key, File file, byte[] md5Hash)
+      throws IOException {
+    S3Object object = new S3Object(key);
+    object.setDataInputFile(file);
+    object.setContentType("binary/octet-stream");
+    object.setContentLength(file.length());
+    if (md5Hash != null) {
+      object.setMd5Hash(md5Hash);
+    }
+
+    List<StorageObject> objectsToUploadAsMultipart =
+        new ArrayList<StorageObject>();
+    objectsToUploadAsMultipart.add(object);
+    MultipartUtils mpUtils = new MultipartUtils(multipartBlockSize);
+
+    try {
+      mpUtils.uploadObjects(bucket.getName(), s3Service,
+                            objectsToUploadAsMultipart, null);
+    } catch (ServiceException e) {
+      handleServiceException(e);
+    } catch (Exception e) {
+      throw new S3Exception(e);
+    }
+  }
+  
   @Override
   public void storeEmptyFile(String key) throws IOException {
     try {
@@ -152,11 +203,8 @@ class Jets3tNativeFileSystemStore implem
       }
       S3Object object = s3Service.getObject(bucket.getName(), key);
       return object.getDataInputStream();
-    } catch (S3ServiceException e) {
-      handleS3ServiceException(key, e);
-      return null; //never returned - keep compiler happy
     } catch (ServiceException e) {
-      handleServiceException(e);
+      handleServiceException(key, e);
       return null; //return null if key not found
     }
   }
@@ -180,11 +228,8 @@ class Jets3tNativeFileSystemStore implem
       S3Object object = s3Service.getObject(bucket, key, null, null, null,
                                             null, byteRangeStart, null);
       return object.getDataInputStream();
-    } catch (S3ServiceException e) {
-      handleS3ServiceException(key, e);
-      return null; //never returned - keep compiler happy
     } catch (ServiceException e) {
-      handleServiceException(e);
+      handleServiceException(key, e);
       return null; //return null if key not found
     }
   }
@@ -244,8 +289,16 @@ class Jets3tNativeFileSystemStore implem
         LOG.debug("Deleting key:" + key + "from bucket" + bucket.getName());
       }
       s3Service.deleteObject(bucket, key);
-    } catch (S3ServiceException e) {
-      handleS3ServiceException(key, e);
+    } catch (ServiceException e) {
+      handleServiceException(key, e);
+    }
+  }
+
+  public void rename(String srcKey, String dstKey) throws IOException {
+    try {
+      s3Service.renameObject(bucket.getName(), srcKey, new S3Object(dstKey));
+    } catch (ServiceException e) {
+      handleServiceException(e);
     }
   }
   
@@ -255,10 +308,52 @@ class Jets3tNativeFileSystemStore implem
       if(LOG.isDebugEnabled()) {
         LOG.debug("Copying srcKey: " + srcKey + "to dstKey: " + dstKey + "in bucket: " + bucket.getName());
       }
+      if (multipartEnabled) {
+        S3Object object = s3Service.getObjectDetails(bucket, srcKey, null,
+                                                     null, null, null);
+        if (multipartCopyBlockSize > 0 &&
+            object.getContentLength() > multipartCopyBlockSize) {
+          copyLargeFile(object, dstKey);
+          return;
+        }
+      }
       s3Service.copyObject(bucket.getName(), srcKey, bucket.getName(),
           new S3Object(dstKey), false);
-    } catch (S3ServiceException e) {
-      handleS3ServiceException(srcKey, e);
+    } catch (ServiceException e) {
+      handleServiceException(srcKey, e);
+    }
+  }
+
+  public void copyLargeFile(S3Object srcObject, String dstKey) throws IOException {
+    try {
+      long partCount = srcObject.getContentLength() / multipartCopyBlockSize +
+          (srcObject.getContentLength() % multipartCopyBlockSize > 0 ? 1 : 0);
+
+      MultipartUpload multipartUpload = s3Service.multipartStartUpload
+          (bucket.getName(), dstKey, srcObject.getMetadataMap());
+
+      List<MultipartPart> listedParts = new ArrayList<MultipartPart>();
+      for (int i = 0; i < partCount; i++) {
+        long byteRangeStart = i * multipartCopyBlockSize;
+        long byteLength;
+        if (i < partCount - 1) {
+          byteLength = multipartCopyBlockSize;
+        } else {
+          byteLength = srcObject.getContentLength() % multipartCopyBlockSize;
+          if (byteLength == 0) {
+            byteLength = multipartCopyBlockSize;
+          }
+        }
+
+        MultipartPart copiedPart = s3Service.multipartUploadPartCopy
+            (multipartUpload, i + 1, bucket.getName(), srcObject.getKey(),
+             null, null, null, null, byteRangeStart,
+             byteRangeStart + byteLength - 1, null);
+        listedParts.add(copiedPart);
+      }
+      
+      Collections.reverse(listedParts);
+      s3Service.multipartCompleteUpload(multipartUpload, listedParts);
     } catch (ServiceException e) {
       handleServiceException(e);
     }
@@ -291,11 +386,11 @@ class Jets3tNativeFileSystemStore implem
     System.out.println(sb);
   }
 
-  private void handleS3ServiceException(String key, S3ServiceException e) throws IOException {
-    if ("NoSuchKey".equals(e.getS3ErrorCode())) {
+  private void handleServiceException(String key, ServiceException e) throws IOException {
+    if ("NoSuchKey".equals(e.getErrorCode())) {
       throw new FileNotFoundException("Key '" + key + "' does not exist in S3");
     } else {
-      handleS3ServiceException(e);
+      handleServiceException(e);
     }
   }
 

Modified: hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java?rev=1572251&r1=1572250&r2=1572251&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java (original)
+++ hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java Wed Feb 26 20:55:50 2014
@@ -31,6 +31,7 @@ import org.apache.hadoop.classification.
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Properties;
+import org.apache.hadoop.io.IOUtils;
 
 /**
  * This class returns build information about Hadoop components.
@@ -45,16 +46,19 @@ public class VersionInfo {
   protected VersionInfo(String component) {
     info = new Properties();
     String versionInfoFile = component + "-version-info.properties";
+    InputStream is = null;
     try {
-      InputStream is = Thread.currentThread().getContextClassLoader()
+      is = Thread.currentThread().getContextClassLoader()
         .getResourceAsStream(versionInfoFile);
       if (is == null) {
         throw new IOException("Resource not found");
       }
       info.load(is);
     } catch (IOException ex) {
-      LogFactory.getLog(getClass()).warn("Could not read '" + 
-        versionInfoFile + "', " + ex.toString(), ex);
+      LogFactory.getLog(getClass()).warn("Could not read '" +
+          versionInfoFile + "', " + ex.toString(), ex);
+    } finally {
+      IOUtils.closeStream(is);
     }
   }
 

Modified: hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml?rev=1572251&r1=1572250&r2=1572251&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml (original)
+++ hadoop/common/branches/HDFS-5535/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml Wed Feb 26 20:55:50 2014
@@ -533,6 +533,31 @@
 </property>
 
 <property>
+  <name>fs.s3n.multipart.uploads.enabled</name>
+  <value>false</value>
+  <description>Setting this property to true enables multiple uploads to
+  native S3 filesystem. When uploading a file, it is split into blocks
+  if the size is larger than fs.s3n.multipart.uploads.block.size.
+  </description>
+</property>
+
+<property>
+  <name>fs.s3n.multipart.uploads.block.size</name>
+  <value>67108864</value>
+  <description>The block size for multipart uploads to native S3 filesystem.
+  Default size is 64MB.
+  </description>
+</property>
+
+<property>
+  <name>fs.s3n.multipart.copy.block.size</name>
+  <value>5368709120</value>
+  <description>The block size for multipart copy in native S3 filesystem.
+  Default size is 5GB.
+  </description>
+</property>
+
+<property>
   <name>io.seqfile.compress.blocksize</name>
   <value>1000000</value>
   <description>The minimum block size for compression in block compressed