You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by "ASF GitHub Bot (JIRA)" <ji...@apache.org> on 2018/10/10 09:06:00 UTC

[jira] [Work logged] (BEAM-5062) Add ability to configure S3ClientOptions

     [ https://issues.apache.org/jira/browse/BEAM-5062?focusedWorklogId=153082&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-153082 ]

ASF GitHub Bot logged work on BEAM-5062:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 10/Oct/18 09:05
            Start Date: 10/Oct/18 09:05
    Worklog Time Spent: 10m 
      Work Description: iemejia closed pull request #6122: [BEAM-5062] Add ability to provide custom S3ClientOptions
URL: https://github.com/apache/beam/pull/6122
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3ClientBuilderFactory.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3ClientBuilderFactory.java
new file mode 100644
index 00000000000..ce6eaa57cd8
--- /dev/null
+++ b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3ClientBuilderFactory.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.aws.options;
+
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+
+/** Construct AmazonS3ClientBuilder from S3 pipeline options. */
+public interface S3ClientBuilderFactory {
+  AmazonS3ClientBuilder createBuilder(S3Options s3Options);
+}
diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3Options.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3Options.java
index 4b549f5be0f..03bbbbcf7c6 100644
--- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3Options.java
+++ b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3Options.java
@@ -20,6 +20,7 @@
 import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams;
 import com.amazonaws.services.s3.model.SSECustomerKey;
 import javax.annotation.Nullable;
+import org.apache.beam.sdk.io.aws.s3.DefaultS3ClientBuilderFactory;
 import org.apache.beam.sdk.options.Default;
 import org.apache.beam.sdk.options.DefaultValueFactory;
 import org.apache.beam.sdk.options.Description;
@@ -72,6 +73,14 @@
 
   void setSSEAwsKeyManagementParams(SSEAwsKeyManagementParams value);
 
+  @Description(
+      "Factory class that should be created and used to create a builder of AmazonS3 client."
+          + "Override the default value if you need a S3 client with custom properties, like path style access, etc.")
+  @Default.Class(DefaultS3ClientBuilderFactory.class)
+  Class<? extends S3ClientBuilderFactory> getS3ClientFactoryClass();
+
+  void setS3ClientFactoryClass(Class<? extends S3ClientBuilderFactory> s3ClientFactoryClass);
+
   /**
    * Provide the default s3 upload buffer size in bytes: 64MB if more than 512MB in RAM are
    * available and 5MB otherwise.
diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/DefaultS3ClientBuilderFactory.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/DefaultS3ClientBuilderFactory.java
new file mode 100644
index 00000000000..dd31137dffc
--- /dev/null
+++ b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/DefaultS3ClientBuilderFactory.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.aws.s3;
+
+import com.amazonaws.client.builder.AwsClientBuilder;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.google.common.base.Strings;
+import org.apache.beam.sdk.io.aws.options.S3ClientBuilderFactory;
+import org.apache.beam.sdk.io.aws.options.S3Options;
+
+/**
+ * Construct AmazonS3ClientBuilder with default values of S3 client properties like path style
+ * access, accelerated mode, etc.
+ */
+public class DefaultS3ClientBuilderFactory implements S3ClientBuilderFactory {
+
+  @Override
+  public AmazonS3ClientBuilder createBuilder(S3Options s3Options) {
+    AmazonS3ClientBuilder builder =
+        AmazonS3ClientBuilder.standard().withCredentials(s3Options.getAwsCredentialsProvider());
+
+    if (s3Options.getClientConfiguration() != null) {
+      builder = builder.withClientConfiguration(s3Options.getClientConfiguration());
+    }
+
+    if (Strings.isNullOrEmpty(s3Options.getAwsServiceEndpoint())) {
+      builder = builder.withRegion(s3Options.getAwsRegion());
+    } else {
+      builder =
+          builder.withEndpointConfiguration(
+              new AwsClientBuilder.EndpointConfiguration(
+                  s3Options.getAwsServiceEndpoint(), s3Options.getAwsRegion()));
+    }
+    return builder;
+  }
+}
diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystem.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystem.java
index 3332d37c80e..7d55357ce38 100644
--- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystem.java
+++ b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystem.java
@@ -22,9 +22,7 @@
 import static com.google.common.base.Preconditions.checkState;
 
 import com.amazonaws.AmazonClientException;
-import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
 import com.amazonaws.services.s3.AmazonS3;
-import com.amazonaws.services.s3.AmazonS3ClientBuilder;
 import com.amazonaws.services.s3.model.AmazonS3Exception;
 import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest;
 import com.amazonaws.services.s3.model.CompleteMultipartUploadResult;
@@ -72,9 +70,11 @@
 import java.util.stream.Collectors;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.io.FileSystem;
+import org.apache.beam.sdk.io.aws.options.S3ClientBuilderFactory;
 import org.apache.beam.sdk.io.aws.options.S3Options;
 import org.apache.beam.sdk.io.fs.CreateOptions;
 import org.apache.beam.sdk.io.fs.MatchResult;
+import org.apache.beam.sdk.util.InstanceBuilder;
 import org.apache.beam.sdk.util.MoreFutures;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -105,7 +105,12 @@
           "The AWS S3 Beam extension was included in this build, but the awsRegion flag "
               + "was not specified. If you don't plan to use S3, then ignore this message.");
     }
-    this.amazonS3 = buildAmazonS3Client(options);
+    this.amazonS3 =
+        InstanceBuilder.ofType(S3ClientBuilderFactory.class)
+            .fromClass(options.getS3ClientFactoryClass())
+            .build()
+            .createBuilder(options)
+            .build();
 
     checkNotNull(options.getS3StorageClass(), "storageClass");
     checkArgument(options.getS3ThreadPoolSize() > 0, "threadPoolSize");
@@ -115,24 +120,6 @@
                 options.getS3ThreadPoolSize(), new ThreadFactoryBuilder().setDaemon(true).build()));
   }
 
-  private static AmazonS3 buildAmazonS3Client(S3Options options) {
-    AmazonS3ClientBuilder builder =
-        AmazonS3ClientBuilder.standard().withCredentials(options.getAwsCredentialsProvider());
-
-    if (options.getClientConfiguration() != null) {
-      builder = builder.withClientConfiguration(options.getClientConfiguration());
-    }
-
-    if (Strings.isNullOrEmpty(options.getAwsServiceEndpoint())) {
-      builder = builder.withRegion(options.getAwsRegion());
-    } else {
-      builder =
-          builder.withEndpointConfiguration(
-              new EndpointConfiguration(options.getAwsServiceEndpoint(), options.getAwsRegion()));
-    }
-    return builder.build();
-  }
-
   @Override
   protected String getScheme() {
     return S3ResourceId.SCHEME;
diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemTest.java
index 0abf2170a36..0da08b87028 100644
--- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemTest.java
+++ b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemTest.java
@@ -21,6 +21,7 @@
 import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.buildMockedS3FileSystem;
 import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.getSSECustomerKeyMd5;
 import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3Options;
+import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3OptionsWithCustomEndpointAndPathStyleAccessEnabled;
 import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3OptionsWithSSECustomerKey;
 import static org.apache.beam.sdk.io.fs.CreateOptions.StandardCreateOptions.builder;
 import static org.hamcrest.Matchers.contains;
@@ -61,6 +62,8 @@
 import io.findify.s3mock.S3Mock;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.URL;
 import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.WritableByteChannel;
@@ -124,6 +127,14 @@ public void testGetScheme() {
     assertEquals("s3", s3FileSystem.getScheme());
   }
 
+  @Test
+  public void testGetPathStyleAccessEnabled() throws URISyntaxException {
+    S3FileSystem s3FileSystem =
+        new S3FileSystem(s3OptionsWithCustomEndpointAndPathStyleAccessEnabled());
+    URL s3Url = s3FileSystem.getAmazonS3Client().getUrl("bucket", "file");
+    assertEquals("https://s3.custom.dns/bucket/file", s3Url.toURI().toString());
+  }
+
   @Test
   public void testCopy() throws IOException {
     testCopy(s3Options());
diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3TestUtils.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3TestUtils.java
index fdd6733411b..1554e5a18e2 100644
--- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3TestUtils.java
+++ b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3TestUtils.java
@@ -18,6 +18,7 @@
 package org.apache.beam.sdk.io.aws.s3;
 
 import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
 import com.amazonaws.services.s3.model.ObjectMetadata;
 import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams;
 import com.amazonaws.services.s3.model.SSECustomerKey;
@@ -37,6 +38,15 @@ static S3Options s3Options() {
     return options;
   }
 
+  static S3Options s3OptionsWithCustomEndpointAndPathStyleAccessEnabled() {
+    S3Options options = PipelineOptionsFactory.as(S3Options.class);
+    options.setAwsServiceEndpoint("https://s3.custom.dns");
+    options.setAwsRegion("no-matter");
+    options.setS3UploadBufferSizeBytes(5_242_880);
+    options.setS3ClientFactoryClass(PathStyleAcccessS3ClientBuilderFactory.class);
+    return options;
+  }
+
   static S3Options s3OptionsWithSSEAlgorithm() {
     S3Options options = s3Options();
     options.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
@@ -83,4 +93,12 @@ static String getSSECustomerKeyMd5(S3Options options) {
     }
     return null;
   }
+
+  private static class PathStyleAcccessS3ClientBuilderFactory
+      extends DefaultS3ClientBuilderFactory {
+    @Override
+    public AmazonS3ClientBuilder createBuilder(S3Options s3Options) {
+      return super.createBuilder(s3Options).withPathStyleAccessEnabled(true);
+    }
+  }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 153082)
    Time Spent: 3h 20m  (was: 3h 10m)

> Add ability to configure S3ClientOptions
> ----------------------------------------
>
>                 Key: BEAM-5062
>                 URL: https://issues.apache.org/jira/browse/BEAM-5062
>             Project: Beam
>          Issue Type: Improvement
>          Components: io-java-aws
>            Reporter: Kirill Kozlov
>            Assignee: Kirill Kozlov
>            Priority: Minor
>             Fix For: 2.8.0
>
>          Time Spent: 3h 20m
>  Remaining Estimate: 0h
>
> It would be very useful to have an ability to configure [S3ClientOptions|https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/S3ClientOptions.html] for Apache Beam jobs.
> For example, there are some implementations of S3, that does not supportĀ virtual-hosted-style URLs for buckets, only path-style. Currently it's impossible to enable path style access for amazon s3 client, which is used by an apache-beam job.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)