You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2020/01/24 13:42:00 UTC
[hadoop] branch trunk updated: HADOOP-16792: Make S3 client request
timeout configurable.
This is an automated email from the ASF dual-hosted git repository.
stevel pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 8390547 HADOOP-16792: Make S3 client request timeout configurable.
8390547 is described below
commit 839054754b84b9731d2122096882582df6ff4b36
Author: Mustafa Iman <mu...@cloudera.com>
AuthorDate: Fri Jan 24 13:37:07 2020 +0000
HADOOP-16792: Make S3 client request timeout configurable.
Contributed by Mustafa Iman.
This adds a new configuration option fs.s3a.connection.request.timeout
to declare the time out on HTTP requests to the AWS service;
0 means no timeout.
Measured in seconds; the usual time suffixes are all supported
Important: this is the maximum duration of any AWS service call,
including upload and copy operations. If non-zero, it must be larger
than the time to upload multi-megabyte blocks to S3 from the client,
and to rename many-GB files. Use with care.
Change-Id: I407745341068b702bf8f401fb96450a9f987c51c
---
.../src/main/resources/core-default.xml | 17 +++++++++
.../java/org/apache/hadoop/fs/s3a/Constants.java | 5 +++
.../java/org/apache/hadoop/fs/s3a/S3AUtils.java | 10 ++++++
.../src/site/markdown/tools/hadoop-aws/index.md | 17 +++++++++
.../tools/hadoop-aws/troubleshooting_s3a.md | 40 ++++++++++++++++++++++
.../hadoop/fs/s3a/ITestS3AConfiguration.java | 13 +++++++
6 files changed, 102 insertions(+)
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index 0b8de17..07f3e8a 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -1941,6 +1941,23 @@
</property>
<property>
+ <name>fs.s3a.connection.request.timeout</name>
+ <value>0</value>
+ <description>
+ Time out on HTTP requests to the AWS service; 0 means no timeout.
+ Measured in seconds; the usual time suffixes are all supported
+
+ Important: this is the maximum duration of any AWS service call,
+ including upload and copy operations. If non-zero, it must be larger
+ than the time to upload multi-megabyte blocks to S3 from the client,
+ and to rename many-GB files. Use with care.
+
+ Values that are larger than Integer.MAX_VALUE milliseconds are
+ converged to Integer.MAX_VALUE milliseconds
+ </description>
+</property>
+
+<property>
<name>fs.s3a.etag.checksum.enabled</name>
<value>false</value>
<description>
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index 3e4de05..e107d49 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -187,6 +187,11 @@ public final class Constants {
public static final String SOCKET_TIMEOUT = "fs.s3a.connection.timeout";
public static final int DEFAULT_SOCKET_TIMEOUT = 200000;
+ // milliseconds until a request is timed-out
+ public static final String REQUEST_TIMEOUT =
+ "fs.s3a.connection.request.timeout";
+ public static final int DEFAULT_REQUEST_TIMEOUT = 0;
+
// socket send buffer to be used in Amazon client
public static final String SOCKET_SEND_BUFFER = "fs.s3a.socket.send.buffer";
public static final int DEFAULT_SOCKET_SEND_BUFFER = 8 * 1024;
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
index 0250881..e2a488e 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java
@@ -82,6 +82,7 @@ import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
import static org.apache.commons.lang3.StringUtils.isEmpty;
import static org.apache.hadoop.fs.s3a.Constants.*;
@@ -1284,6 +1285,15 @@ public final class S3AUtils {
DEFAULT_SOCKET_SEND_BUFFER, 2048);
int sockRecvBuffer = intOption(conf, SOCKET_RECV_BUFFER,
DEFAULT_SOCKET_RECV_BUFFER, 2048);
+ long requestTimeoutMillis = conf.getTimeDuration(REQUEST_TIMEOUT,
+ DEFAULT_REQUEST_TIMEOUT, TimeUnit.SECONDS, TimeUnit.MILLISECONDS);
+
+ if (requestTimeoutMillis > Integer.MAX_VALUE) {
+ LOG.debug("Request timeout is too high({} ms). Setting to {} ms instead",
+ requestTimeoutMillis, Integer.MAX_VALUE);
+ requestTimeoutMillis = Integer.MAX_VALUE;
+ }
+ awsConf.setRequestTimeout((int) requestTimeoutMillis);
awsConf.setSocketBufferSizeHints(sockSendBuffer, sockRecvBuffer);
String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, "");
if (!signerOverride.isEmpty()) {
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 4458516..9697e7a 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -983,6 +983,23 @@ options are covered in [Testing](./testing.md).
<description>Select which version of the S3 SDK's List Objects API to use.
Currently support 2 (default) and 1 (older API).</description>
</property>
+
+<property>
+ <name>fs.s3a.connection.request.timeout</name>
+ <value>0</value>
+ <description>
+ Time out on HTTP requests to the AWS service; 0 means no timeout.
+ Measured in seconds; the usual time suffixes are all supported
+
+ Important: this is the maximum duration of any AWS service call,
+ including upload and copy operations. If non-zero, it must be larger
+ than the time to upload multi-megabyte blocks to S3 from the client,
+ and to rename many-GB files. Use with care.
+
+ Values that are larger than Integer.MAX_VALUE milliseconds are
+ converged to Integer.MAX_VALUE milliseconds
+ </description>
+</property>
```
## <a name="retry_and_recovery"></a>Retry and Recovery
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md
index 5b7421c..5408c44 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md
@@ -1384,3 +1384,43 @@ For this reason, the number of retry events are limited.
</description>
</property>
```
+
+### <a name="aws-timeouts"></a> Tuning AWS request timeouts
+
+It is possible to configure a global timeout for AWS service calls using following property:
+
+```xml
+<property>
+ <name>fs.s3a.connection.request.timeout</name>
+ <value>0</value>
+ <description>
+ Time out on HTTP requests to the AWS service; 0 means no timeout.
+ Measured in seconds; the usual time suffixes are all supported
+
+ Important: this is the maximum duration of any AWS service call,
+ including upload and copy operations. If non-zero, it must be larger
+ than the time to upload multi-megabyte blocks to S3 from the client,
+ and to rename many-GB files. Use with care.
+
+ Values that are larger than Integer.MAX_VALUE milliseconds are
+ converged to Integer.MAX_VALUE milliseconds
+ </description>
+</property>
+```
+
+If this value is configured too low, user may encounter `SdkClientException`s due to many requests
+timing-out.
+
+```
+com.amazonaws.SdkClientException: Unable to execute HTTP request:
+ Request did not complete before the request timeout configuration.:
+ Unable to execute HTTP request: Request did not complete before the request timeout configuration.
+ at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:205)
+ at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:112)
+ at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$4(Invoker.java:315)
+ at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:407)
+ at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:311)
+```
+
+When this happens, try to set `fs.s3a.connection.request.timeout` to a larger value or disable it
+completely by setting it to `0`.
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
index 32f3235..57cbbcb 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java
@@ -391,6 +391,19 @@ public class ITestS3AConfiguration {
}
@Test
+ public void testRequestTimeout() throws Exception {
+ conf = new Configuration();
+ conf.set(REQUEST_TIMEOUT, "120");
+ fs = S3ATestUtils.createTestFileSystem(conf);
+ AmazonS3 s3 = fs.getAmazonS3ClientForTesting("Request timeout (ms)");
+ ClientConfiguration awsConf = getField(s3, ClientConfiguration.class,
+ "clientConfiguration");
+ assertEquals("Configured " + REQUEST_TIMEOUT +
+ " is different than what AWS sdk configuration uses internally",
+ 120000, awsConf.getRequestTimeout());
+ }
+
+ @Test
public void testCloseIdempotent() throws Throwable {
conf = new Configuration();
fs = S3ATestUtils.createTestFileSystem(conf);
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org