You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by wy...@apache.org on 2021/09/10 05:05:03 UTC

[asterixdb] branch master updated: [NO ISSUE][EXT] Disable Hadoop FileSystem Cache

This is an automated email from the ASF dual-hosted git repository.

wyk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 22422b0  [NO ISSUE][EXT] Disable Hadoop FileSystem Cache
22422b0 is described below

commit 22422b09f982a862d606096244997e79c911c927
Author: Wail Alkowaileet <wa...@gmail.com>
AuthorDate: Thu Sep 9 17:27:00 2021 -0700

    [NO ISSUE][EXT] Disable Hadoop FileSystem Cache
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    Hadoop FileSystem can be cached along with any credentials
    (e.g., S3 tokens). We want to avoid caching the credentials
    to avoid any risks that could be caused by caching them.
    
    Change-Id: Icc36ddf013eadff0fe1cca7c2e52fcd5f2bbbb5b
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/13145
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Wael Alkowaileet <wa...@gmail.com>
    Reviewed-by: Hussain Towaileb <hu...@gmail.com>
---
 .../apache/asterix/external/util/ExternalDataConstants.java  |  5 ++---
 .../org/apache/asterix/external/util/ExternalDataUtils.java  |  4 ++++
 .../java/org/apache/asterix/external/util/HDFSUtils.java     | 12 ++++++++++++
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 243b87c..f7d9de2 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -63,6 +63,8 @@ public class ExternalDataConstants {
     public static final String KEY_HADOOP_ASTERIX_WARNINGS_ENABLED = "org.apache.asterix.warnings.enabled";
     //Base64 encoded warnings issued from Hadoop
     public static final String KEY_HADOOP_ASTERIX_WARNINGS_LIST = "org.apache.asterix.warnings.list";
+    //Disable caching FileSystem for Hadoop
+    public static final String KEY_HADOOP_DISABLE_FS_CACHE_TEMPLATE = "fs.%s.impl.disable.cache";
     //Base64 encoded function call information
     public static final String KEY_HADOOP_ASTERIX_FUNCTION_CALL_INFORMATION = "org.apache.asterix.function.info";
     public static final String KEY_SOURCE_DATATYPE = "type-name";
@@ -84,7 +86,6 @@ public class ExternalDataConstants {
     public static final String KEY_ESCAPE = "escape";
     public static final String KEY_PARSER = "parser";
     public static final String KEY_DATASET_RECORD = "dataset-record";
-    public static final String KEY_HIVE_SERDE = "hive-serde";
     public static final String KEY_RSS_URL = "url";
     public static final String KEY_INTERVAL = "interval";
     public static final String KEY_IS_FEED = "is-feed";
@@ -178,7 +179,6 @@ public class ExternalDataConstants {
     /**
      * supported builtin record formats
      */
-    public static final String FORMAT_HIVE = "hive";
     public static final String FORMAT_BINARY = "binary";
     public static final String FORMAT_ADM = "adm";
     public static final String FORMAT_JSON_LOWER_CASE = "json";
@@ -198,7 +198,6 @@ public class ExternalDataConstants {
 
     static {
         Set<String> formats = new HashSet<>(14);
-        formats.add(FORMAT_HIVE);
         formats.add(FORMAT_BINARY);
         formats.add(FORMAT_ADM);
         formats.add(FORMAT_JSON_LOWER_CASE);
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index dd44436..dcd8f9a 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -31,6 +31,7 @@ import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOO
 import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_CREDENTIAL_PROVIDER_KEY;
 import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_PATH_STYLE_ACCESS;
 import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_S3_CONNECTION_POOL_SIZE;
+import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_S3_PROTOCOL;
 import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_SECRET_ACCESS_KEY;
 import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_SESSION_TOKEN;
 import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_TEMP_ACCESS;
@@ -927,6 +928,9 @@ public class ExternalDataUtils {
             String sessionToken = configuration.get(ExternalDataConstants.AwsS3.SESSION_TOKEN_FIELD_NAME);
             String serviceEndpoint = configuration.get(ExternalDataConstants.AwsS3.SERVICE_END_POINT_FIELD_NAME);
 
+            //Disable caching S3 FileSystem
+            HDFSUtils.disableHadoopFileSystemCache(conf, HADOOP_S3_PROTOCOL);
+
             /*
              * Authentication Methods:
              * 1- Anonymous: no accessKeyId and no secretAccessKey
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
index e774b40..9f65cd7 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
@@ -336,4 +336,16 @@ public class HDFSUtils {
             conf.unset(ExternalDataConstants.KEY_HADOOP_ASTERIX_WARNINGS_LIST);
         }
     }
+
+    /**
+     * Hadoop can cache FileSystem instance if reading the same file. This method allows for disabling the cache
+     *
+     * @param conf     Hadoop configuration
+     * @param protocol fs scheme (or protocol). e.g., s3a
+     */
+    public static void disableHadoopFileSystemCache(Configuration conf, String protocol) {
+        //Disable fs cache
+        conf.set(String.format(ExternalDataConstants.KEY_HADOOP_DISABLE_FS_CACHE_TEMPLATE, protocol),
+                ExternalDataConstants.TRUE);
+    }
 }