You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2020/04/17 13:21:07 UTC

[hadoop] branch branch-3.3 updated: HADOOP-13873. log DNS addresses on s3a initialization.

This is an automated email from the ASF dual-hosted git repository.

stevel pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 96d7ceb  HADOOP-13873. log DNS addresses on s3a initialization.
96d7ceb is described below

commit 96d7ceb39a14dd1baa7d72f7620186addbb756b1
Author: Mukund Thakur <mt...@cloudera.com>
AuthorDate: Fri Apr 17 14:15:38 2020 +0100

    HADOOP-13873. log DNS addresses on s3a initialization.
    
    Contributed by Mukund Thakur.
    
    If you set the log org.apache.hadoop.fs.s3a.impl.NetworkBinding
    to DEBUG, then when the S3A bucket probe is made -the DNS address
    of the S3 endpoint is calculated and printed.
    
    This is useful to see if a large set of processes are all using
    the same IP address from the pool of load balancers to which AWS
    directs clients when an AWS S3 endpoint is resolved.
    
    This can have implications for performance: if all clients
    access the same load balancer performance may be suboptimal.
    
    Note: if bucket probes are disabled, fs.s3a.bucket.probe = 0,
    the DNS logging does not take place.
    
    Change-Id: I21b3ac429dc0b543f03e357fdeb94c2d2a328dd8
---
 .../java/org/apache/hadoop/fs/s3a/Constants.java   |  6 +++++
 .../org/apache/hadoop/fs/s3a/S3AFileSystem.java    |  5 ++++
 .../apache/hadoop/fs/s3a/impl/NetworkBinding.java  | 31 ++++++++++++++++++++++
 3 files changed, 42 insertions(+)

diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index 0ca4aa0..430a6bc 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -163,6 +163,12 @@ public final class Constants {
   //use a custom endpoint?
   public static final String ENDPOINT = "fs.s3a.endpoint";
 
+  /**
+   * Default value of s3 endpoint. If not set explicitly using
+   * {@code AmazonS3#setEndpoint()}, this is used.
+   */
+  public static final String DEFAULT_ENDPOINT = "s3.amazonaws.com";
+
   //Enable path style access? Overrides default virtual hosting
   public static final String PATH_STYLE_ACCESS = "fs.s3a.path.style.access";
 
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 9630a9e..6d2b3a8 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -176,6 +176,7 @@ import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletionIg
 import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
 import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;
 import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion;
+import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.logDnsLookup;
 import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
 
 /**
@@ -469,6 +470,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
    * S3AFileSystem initialization. When set to 1 or 2, bucket existence check
    * will be performed which is potentially slow.
    * If 3 or higher: warn and use the v2 check.
+   * Also logging DNS address of the s3 endpoint if the bucket probe value is
+   * greater than 0 else skipping it for increased performance.
    * @throws UnknownStoreException the bucket is absent
    * @throws IOException any other problem talking to S3
    */
@@ -483,9 +486,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
       LOG.debug("skipping check for bucket existence");
       break;
     case 1:
+      logDnsLookup(getConf());
       verifyBucketExists();
       break;
     case 2:
+      logDnsLookup(getConf());
       verifyBucketExistsV2();
       break;
     default:
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java
index 7ff4451..8b34376 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java
@@ -21,6 +21,8 @@ package org.apache.hadoop.fs.s3a.impl;
 import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
+import java.net.URI;
+import java.net.URISyntaxException;
 
 import javax.net.ssl.HostnameVerifier;
 import javax.net.ssl.SSLSocketFactory;
@@ -30,9 +32,12 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
 
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SSL_CHANNEL_MODE;
+import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
 import static org.apache.hadoop.fs.s3a.Constants.SSL_CHANNEL_MODE;
 
 /**
@@ -121,4 +126,30 @@ public class NetworkBinding {
         ? "us-east-1"
         : region;
   }
+
+  /**
+   * Log the dns address associated with s3 endpoint. If endpoint is
+   * not set in the configuration, the {@code Constants#DEFAULT_ENDPOINT}
+   * will be used.
+   * @param conf input configuration.
+   */
+  public static void logDnsLookup(Configuration conf) {
+    String endPoint = conf.getTrimmed(ENDPOINT, DEFAULT_ENDPOINT);
+    String hostName = endPoint;
+    if (!endPoint.isEmpty() && LOG.isDebugEnabled()) {
+      // Updating the hostname if there is a scheme present.
+      if (endPoint.contains("://")) {
+        try {
+          URI uri = new URI(endPoint);
+          hostName = uri.getHost();
+        } catch (URISyntaxException e) {
+          LOG.debug("Got URISyntaxException, ignoring");
+        }
+      }
+      LOG.debug("Bucket endpoint : {}, Hostname : {}, DNSAddress : {}",
+              endPoint,
+              hostName,
+              NetUtils.normalizeHostName(hostName));
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org