You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-issues@hadoop.apache.org by GitBox <gi...@apache.org> on 2022/05/04 11:01:29 UTC

[GitHub] [hadoop] steveloughran commented on a diff in pull request #3877: HADOOP-12020. Add configurable storage class option

steveloughran commented on code in PR #3877:
URL: https://github.com/apache/hadoop/pull/3877#discussion_r864694450


##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AStorageClass.java:
##########
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.nio.file.AccessDeniedException;
+import java.util.Map;
+
+import com.amazonaws.services.s3.model.StorageClass;
+import org.assertj.core.api.Assertions;
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.apache.hadoop.test.GenericTestUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.XA_STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.decodeBytes;
+
+/**
+ * Tests of storage class.
+ */
+public class ITestS3AStorageClass extends AbstractS3ATestBase {
+
+  @Override
+  protected Configuration createConfiguration() {
+    Configuration conf = super.createConfiguration();
+    disableFilesystemCaching(conf);
+    removeBaseAndBucketOverrides(conf, STORAGE_CLASS);
+
+    return conf;
+  }
+
+  /*
+   * This test ensures the default storage class configuration (no config or null)
+   * works well with create and copy operations
+   */
+  @Test
+  public void testCreateAndCopyObjectWithStorageClassDefault() throws Throwable {
+    Configuration conf = this.createConfiguration();
+    S3AContract contract = (S3AContract) createContract(conf);
+    contract.init();
+
+    FileSystem fs = contract.getTestFileSystem();
+    Path dir = methodPath();
+    fs.mkdirs(dir);
+    assertObjectHasNoStorageClass(dir);
+    Path path = new Path(dir, "file1");
+    ContractTestUtils.touch(fs, path);
+    assertObjectHasNoStorageClass(path);
+    Path path2 = new Path(dir, "file1");
+    fs.rename(path, path2);
+    assertObjectHasNoStorageClass(path2);
+  }
+
+  /*
+   * Verify object can be created and copied correctly
+   * with specified storage class
+   */
+  @Test
+  public void testCreateAndCopyObjectWithStorageClassReducedRedundancy() throws Throwable {
+    Configuration conf = this.createConfiguration();
+    conf.set(STORAGE_CLASS, StorageClass.ReducedRedundancy.toString());

Review Comment:
   i think it would be good to have these names in the Constants file as static strings



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AStorageClass.java:
##########
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.nio.file.AccessDeniedException;
+import java.util.Map;
+
+import com.amazonaws.services.s3.model.StorageClass;
+import org.assertj.core.api.Assertions;
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.apache.hadoop.test.GenericTestUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.XA_STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.decodeBytes;
+
+/**
+ * Tests of storage class.
+ */
+public class ITestS3AStorageClass extends AbstractS3ATestBase {

Review Comment:
   1. add a test for a completely invalid storage class
   2. add a test for storage class ""



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java:
##########
@@ -936,13 +936,16 @@ protected RequestFactory createRequestFactory() {
     // Any encoding type
     String contentEncoding = getConf().getTrimmed(CONTENT_ENCODING, null);
 
+    String storageClass = getConf().getTrimmed(STORAGE_CLASS, null);
+
     return RequestFactoryImpl.builder()
         .withBucket(requireNonNull(bucket))
         .withCannedACL(getCannedACL())
         .withEncryptionSecrets(requireNonNull(encryptionSecrets))
         .withMultipartPartCountLimit(partCountLimit)
         .withRequestPreparer(getAuditManager()::requestCreated)
         .withContentEncoding(contentEncoding)
+        .withStorageClass(storageClass)

Review Comment:
   harmless if the class is null, right?



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java:
##########
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.scale;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.Constants;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+
+public class ITestS3AHugeFilesStorageClass extends AbstractSTestS3AHugeFiles {

Review Comment:
   this adds another slow test. could you just take one of the existing subclasses and set it there?
   
   then add another test_XYX test case to validate the storage class



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java:
##########
@@ -365,6 +380,7 @@ public PutObjectRequest newPutObjectRequest(String key,
         srcfile);
     setOptionalPutRequestParameters(putObjectRequest);
     putObjectRequest.setCannedAcl(cannedACL);
+    putObjectRequest.setStorageClass(storageClass);

Review Comment:
   only set if non null/empty



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java:
##########
@@ -439,6 +455,7 @@ public InitiateMultipartUploadRequest newMultipartUploadRequest(
             destKey,
             newObjectMetadata(-1));
     initiateMPURequest.setCannedACL(getCannedACL());
+    initiateMPURequest.withStorageClass(getStorageClass());

Review Comment:
   only set if non null/empty



##########
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md:
##########
@@ -1071,6 +1071,17 @@ options are covered in [Testing](./testing.md).
   </description>
 </property>
 
+<property>
+  <name>fs.s3a.storage.class</name>
+  <value></value>
+  <description>
+      Storage class: STANDARD, REDUCED_REDUNDANCY, INTELLIGENT_TIERING, etc.

Review Comment:
   prefer lower class stings here, for consistency with the rest. even if toLower is called, docs should all use the lower case example



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AStorageClass.java:
##########
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.nio.file.AccessDeniedException;
+import java.util.Map;
+
+import com.amazonaws.services.s3.model.StorageClass;
+import org.assertj.core.api.Assertions;
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.apache.hadoop.test.GenericTestUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.XA_STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.decodeBytes;
+
+/**
+ * Tests of storage class.
+ */
+public class ITestS3AStorageClass extends AbstractS3ATestBase {
+
+  @Override
+  protected Configuration createConfiguration() {
+    Configuration conf = super.createConfiguration();
+    disableFilesystemCaching(conf);
+    removeBaseAndBucketOverrides(conf, STORAGE_CLASS);
+
+    return conf;
+  }
+
+  /*
+   * This test ensures the default storage class configuration (no config or null)
+   * works well with create and copy operations
+   */
+  @Test
+  public void testCreateAndCopyObjectWithStorageClassDefault() throws Throwable {
+    Configuration conf = this.createConfiguration();
+    S3AContract contract = (S3AContract) createContract(conf);
+    contract.init();
+
+    FileSystem fs = contract.getTestFileSystem();
+    Path dir = methodPath();
+    fs.mkdirs(dir);
+    assertObjectHasNoStorageClass(dir);
+    Path path = new Path(dir, "file1");
+    ContractTestUtils.touch(fs, path);
+    assertObjectHasNoStorageClass(path);
+    Path path2 = new Path(dir, "file1");
+    fs.rename(path, path2);
+    assertObjectHasNoStorageClass(path2);
+  }
+
+  /*
+   * Verify object can be created and copied correctly
+   * with specified storage class
+   */
+  @Test
+  public void testCreateAndCopyObjectWithStorageClassReducedRedundancy() throws Throwable {
+    Configuration conf = this.createConfiguration();
+    conf.set(STORAGE_CLASS, StorageClass.ReducedRedundancy.toString());
+    S3AContract contract = (S3AContract) createContract(conf);
+    contract.init();
+
+    FileSystem fs = contract.getTestFileSystem();
+    Path dir = methodPath();
+    fs.mkdirs(dir);
+    // even with storage class specified
+    // directories do not have storage class
+    assertObjectHasNoStorageClass(dir);
+    Path path = new Path(dir, "file1");
+    ContractTestUtils.touch(fs, path);
+    assertObjectHasStorageClass(path, StorageClass.ReducedRedundancy.toString());
+    Path path2 = new Path(dir, "file1");
+    fs.rename(path, path2);
+    assertObjectHasStorageClass(path2, StorageClass.ReducedRedundancy.toString());
+  }
+
+  /*
+   * Archive storage classes have different behavior
+   * from general storage classes
+   */
+  @Test(expected = AccessDeniedException.class)
+  public void testCreateAndCopyObjectWithStorageClassGlacier() throws Throwable {
+    Configuration conf = this.createConfiguration();
+    conf.set(STORAGE_CLASS, StorageClass.Glacier.toString());
+    S3AContract contract = (S3AContract) createContract(conf);
+    contract.init();
+
+    FileSystem fs = contract.getTestFileSystem();
+    Path dir = methodPath();
+    fs.mkdirs(dir);
+    // even with storage class specified
+    // directories do not have storage class
+    assertObjectHasNoStorageClass(dir);
+    Path path = new Path(dir, "file1");
+    ContractTestUtils.touch(fs, path);
+    assertObjectHasStorageClass(path, StorageClass.Glacier.toString());
+    Path path2 = new Path(dir, "file2");
+    try {
+      // this is the current behavior
+      // object with archive storage class can't be read directly
+      // when trying to read it, AccessDeniedException will be thrown
+      // with message InvalidObjectState
+      fs.rename(path, path2);

Review Comment:
   use
   
   ```java
   intercept(AccessDeniedException.class, "InvalidObjectState", () -> fs.rename(path, path2));
   ```
   
   its better, especially as if that lambda expression returns a string, we use it in our assertion failure report



##########
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md:
##########
@@ -1071,6 +1071,17 @@ options are covered in [Testing](./testing.md).
   </description>
 </property>
 
+<property>
+  <name>fs.s3a.storage.class</name>
+  <value></value>
+  <description>
+      Storage class: STANDARD, REDUCED_REDUNDANCY, INTELLIGENT_TIERING, etc.
+      Specify the storage class for S3A put object requests.

Review Comment:
   nit: use PUT in capitals here though



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java:
##########
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.scale;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.Constants;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+
+public class ITestS3AHugeFilesStorageClass extends AbstractSTestS3AHugeFiles {
+
+  private static final String TEST_STORAGE_CLASS = "REDUCED_REDUNDANCY";

Review Comment:
   use the Constant you will be adding...



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java:
##########
@@ -936,13 +936,16 @@ protected RequestFactory createRequestFactory() {
     // Any encoding type
     String contentEncoding = getConf().getTrimmed(CONTENT_ENCODING, null);
 
+    String storageClass = getConf().getTrimmed(STORAGE_CLASS, null);

Review Comment:
   use "" as the empty value, and handle lower case values by converting .toUpper(Locale.EN_US)l before passing to the builder



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java:
##########
@@ -106,6 +106,12 @@ public interface RequestFactory {
    */
   String getContentEncoding();
 
+  /**
+   * Get the object storage class (e.g. STANDARD, REDUCED_REDUNDANCY) or return null if none.

Review Comment:
   support empty string



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscribe@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-issues-help@hadoop.apache.org