You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2024/01/03 07:21:40 UTC

(impala) 01/02: IMPALA-12380: Securing dbcp.password for JDBC external data source

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 9a132bc436e956641e1eb58073b446f71369fb3c
Author: Gaurav Singh <ga...@gmail>
AuthorDate: Tue Dec 5 13:42:27 2023 -0800

    IMPALA-12380: Securing dbcp.password for JDBC
    external data source
    
    In the current implementation of external JDBC data source,
    the user has to provide both the username and password in
    plain text which is not a good practice.
    
    This patch extends the functionality of existing implementation
    to either provide:
    a) username and password
    b) username or key and keystore
    
    If the user provides the password, then that password is used.
    However, if no password is provided and the user provides only the
    key/keystore, then it fetches the password from the secure jceks
    keystore.
    
    Testing:
    - Added unit test TestExtDataSourcesWithKeyStore
    
    Change-Id: Iec83a9b6e00456f0a1bbee747bd752b2cf9bf238
    Reviewed-on: http://gerrit.cloudera.org:8080/20809
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../extdatasource/jdbc/conf/JdbcStorageConfig.java |   7 +-
 .../jdbc/conf/JdbcStorageConfigManager.java        |  57 ++++++++-
 .../jdbc/dao/GenericJdbcDatabaseAccessor.java      |   6 +
 testdata/bin/copy-ext-data-sources.sh              |  17 +++
 .../QueryTest/jdbc-data-source-with-keystore.test  | 127 +++++++++++++++++++++
 tests/common/impala_test_suite.py                  |   4 +-
 tests/query_test/test_ext_data_sources.py          |   5 +
 tests/util/filesystem_utils.py                     |  11 ++
 8 files changed, 229 insertions(+), 5 deletions(-)

diff --git a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfig.java b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfig.java
index e48fb07ef..36d62e8f2 100644
--- a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfig.java
+++ b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfig.java
@@ -37,6 +37,12 @@ public enum JdbcStorageConfig {
   DBCP_USERNAME("dbcp.username", false),
   // Password of the user.
   DBCP_PASSWORD("dbcp.password", false),
+  // Key of the keystore.
+  DBCP_PASSWORD_KEY("dbcp.password.key", false),
+  // Keystore URI in the format like jceks://hdfs/test-warehouse/data-sources/test.jceks.
+  // In Impala unit-test environment, URI scheme of filesystem is set as following:
+  // "hdfs" for HDFS, "s3a" for S3, and "ofs" for Ozone.
+  DBCP_PASSWORD_KEYSTORE("dbcp.password.keystore", false),
   // Number of rows to fetch in a batch.
   JDBC_FETCH_SIZE("jdbc.fetch.size", false),
   // SQL query which specify how to get data from external database.
@@ -50,7 +56,6 @@ public enum JdbcStorageConfig {
   private final String propertyName;
   private boolean required = false;
 
-
   JdbcStorageConfig(String propertyName, boolean required) {
     this.propertyName = propertyName;
     this.required = required;
diff --git a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfigManager.java b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfigManager.java
index e5e2e972f..280fbccd5 100644
--- a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfigManager.java
+++ b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/conf/JdbcStorageConfigManager.java
@@ -17,11 +17,13 @@
 
 package org.apache.impala.extdatasource.jdbc.conf;
 
-
 import java.util.Map;
 import java.util.Map.Entry;
 
+import java.io.IOException;
+
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.alias.CredentialProviderFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -30,8 +32,8 @@ import org.slf4j.LoggerFactory;
  */
 public class JdbcStorageConfigManager {
 
-  private static final Logger LOG = LoggerFactory
-      .getLogger(JdbcStorageConfigManager.class);
+  private static final Logger LOGGER = LoggerFactory.getLogger(
+      JdbcStorageConfigManager.class);
 
   public static Configuration convertMapToConfiguration(Map<String, String> props) {
     checkRequiredPropertiesAreDefined(props);
@@ -44,6 +46,55 @@ public class JdbcStorageConfigManager {
     return conf;
   }
 
+  public static String getPasswordFromProperties(Configuration conf) {
+    String username = conf.get(JdbcStorageConfig.DBCP_USERNAME.getPropertyName());
+    String passwd = conf.get(JdbcStorageConfig.DBCP_PASSWORD.getPropertyName());
+    String keystore = conf.get(JdbcStorageConfig.DBCP_PASSWORD_KEYSTORE.
+        getPropertyName());
+    if (countNonNull(passwd, keystore) > 1) {
+      LOGGER.warn("Only one of " + passwd + ", " + keystore + " can be set");
+    }
+    if (passwd == null && keystore != null) {
+      String key = conf.get(JdbcStorageConfig.DBCP_PASSWORD_KEY.getPropertyName());
+      if (key == null) {
+        key = username;
+      }
+      LOGGER.info("hadoop keystore: " + keystore + " hadoop key: " + key);
+      try {
+        passwd = getPasswdFromKeystore(keystore, key);
+      } catch (IOException e) {
+        LOGGER.error("Failed to get password from keystore " + key + ", error: " + e);
+      }
+    }
+    return passwd;
+  }
+
+  private static int countNonNull(String ... values) {
+    int count = 0;
+    for (String str : values) {
+      if (str != null) {
+        count++;
+      }
+    }
+    return count;
+  }
+
+  public static String getPasswdFromKeystore(String keystore, String key)
+      throws IOException {
+    String passwd = null;
+    if (keystore != null && key != null) {
+      Configuration conf = new Configuration();
+      conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, keystore);
+      char[] pwdCharArray = conf.getPassword(key);
+      if (pwdCharArray != null) {
+        passwd = new String(pwdCharArray);
+      } else {
+        LOGGER.error("empty or null password for " + key);
+      }
+    }
+    return passwd;
+  }
+
   private static void checkRequiredPropertiesAreDefined(Map<String, String> props) {
 
     try {
diff --git a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java
index 4ab214d2c..b60c3d15a 100644
--- a/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java
+++ b/java/ext-data-source/jdbc/src/main/java/org/apache/impala/extdatasource/jdbc/dao/GenericJdbcDatabaseAccessor.java
@@ -283,6 +283,12 @@ public class GenericJdbcDatabaseAccessor implements DatabaseAccessor {
       }
     }
 
+    String passwd = JdbcStorageConfigManager.getPasswordFromProperties(conf);
+    if (passwd != null) {
+      dbProperties.put(JdbcStorageConfig.DBCP_PASSWORD.getPropertyName().replaceFirst(
+          DBCP_CONFIG_PREFIX + "\\.", ""), passwd);
+    }
+
     // essential properties
     String jdbcUrl = conf.get(JdbcStorageConfig.JDBC_URL.getPropertyName());
     String jdbcAuth = conf.get(JdbcStorageConfig.JDBC_AUTH.getPropertyName());
diff --git a/testdata/bin/copy-ext-data-sources.sh b/testdata/bin/copy-ext-data-sources.sh
index 7b651914f..c3896aa24 100755
--- a/testdata/bin/copy-ext-data-sources.sh
+++ b/testdata/bin/copy-ext-data-sources.sh
@@ -54,3 +54,20 @@ hadoop fs -put -f \
 
 echo "Copied" ${IMPALA_HOME}/fe/target/dependency/postgresql-*.jar \
   "into HDFS" ${JDBC_DRIVERS_HDFS_PATH}
+
+# Extract URI scheme from DEFAULT_FS environment variable.
+FILESYSTEM_URI_SCHEME="hdfs"
+if [ ! -z "$DEFAULT_FS" ] && [[ $DEFAULT_FS =~ ":" ]]; then
+  FILESYSTEM_URI_SCHEME=`echo $DEFAULT_FS | cut -d \: -f 1`
+fi
+# Delete hivuser if it already exists in the jceks file
+if [ $(hadoop credential list -provider jceks://${FILESYSTEM_URI_SCHEME}/test-warehouse/\
+data-sources/test.jceks | grep -c 'hiveuser') -eq 1 ]; then
+  hadoop credential delete hiveuser -f -provider jceks://${FILESYSTEM_URI_SCHEME}/\
+test-warehouse/data-sources/test.jceks > /dev/null 2>&1
+fi
+
+# Store password in a Java keystore file on HDFS
+hadoop credential create hiveuser -provider \
+  jceks://${FILESYSTEM_URI_SCHEME}/test-warehouse/data-sources/test.jceks -v password\
+  > /dev/null 2>&1
diff --git a/testdata/workloads/functional-query/queries/QueryTest/jdbc-data-source-with-keystore.test b/testdata/workloads/functional-query/queries/QueryTest/jdbc-data-source-with-keystore.test
new file mode 100644
index 000000000..2e961d9a3
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/jdbc-data-source-with-keystore.test
@@ -0,0 +1,127 @@
+====
+---- QUERY
+# Create DataSource
+DROP DATA SOURCE IF EXISTS TestJdbcDataSourceWithKeystore;
+CREATE DATA SOURCE TestJdbcDataSourceWithKeystore
+LOCATION '$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-data-source.jar'
+CLASS 'org.apache.impala.extdatasource.jdbc.JdbcDataSource'
+API_VERSION 'V1';
+---- RESULTS
+'Data source has been created.'
+====
+---- QUERY
+# Show created DataSource
+SHOW DATA SOURCES LIKE 'testjdbcdatasourcewithkeystore';
+---- LABELS
+NAME,LOCATION,CLASS NAME,API VERSION
+---- RESULTS
+'testjdbcdatasourcewithkeystore',regex:'.*/test-warehouse/data-sources/jdbc-data-source.jar','org.apache.impala.extdatasource.jdbc.JdbcDataSource','V1'
+---- TYPES
+STRING,STRING,STRING,STRING
+====
+---- QUERY
+# Create external JDBC DataSource table with username, key and keystore
+DROP TABLE IF EXISTS alltypes_jdbc_datasource_keystore;
+CREATE TABLE alltypes_jdbc_datasource_keystore (
+ id INT,
+ bool_col BOOLEAN,
+ tinyint_col TINYINT,
+ smallint_col SMALLINT,
+ int_col INT,
+ bigint_col BIGINT,
+ float_col FLOAT,
+ double_col DOUBLE,
+ date_string_col STRING,
+ string_col STRING,
+ timestamp_col TIMESTAMP)
+PRODUCED BY DATA SOURCE TestJdbcDataSourceWithKeystore(
+'{"database.type":"POSTGRES",
+"jdbc.url":"jdbc:postgresql://$INTERNAL_LISTEN_HOST:5432/functional",
+"jdbc.driver":"org.postgresql.Driver",
+"driver.url":"$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
+"dbcp.username":"hiveuser",
+"dbcp.password.keystore":"jceks://$FILESYSTEM_URI_SCHEME/test-warehouse/data-sources/test.jceks",
+"dbcp.password.key":"hiveuser",
+"table":"alltypes"}');
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+# Test the jdbc DataSource
+# count(*) with a predicate evaluated by Impala
+select count(*) from alltypes_jdbc_datasource_keystore
+where float_col = 0 and string_col is not NULL
+---- RESULTS
+730
+---- TYPES
+BIGINT
+====
+---- QUERY
+# count(*) with no predicates has no materialized slots
+select count(*) from alltypes_jdbc_datasource_keystore
+---- RESULTS
+7300
+---- TYPES
+BIGINT
+====
+---- QUERY
+# Drop table
+DROP TABLE alltypes_jdbc_datasource_keystore;
+---- RESULTS
+'Table has been dropped.'
+====
+---- QUERY
+# Create external JDBC DataSource table with username and keystore
+DROP TABLE IF EXISTS alltypes_jdbc_datasource_keystore;
+CREATE TABLE alltypes_jdbc_datasource_keystore (
+ id INT,
+ bool_col BOOLEAN,
+ tinyint_col TINYINT,
+ smallint_col SMALLINT,
+ int_col INT,
+ bigint_col BIGINT,
+ float_col FLOAT,
+ double_col DOUBLE,
+ date_string_col STRING,
+ string_col STRING,
+ timestamp_col TIMESTAMP)
+PRODUCED BY DATA SOURCE TestJdbcDataSourceWithKeystore(
+'{"database.type":"POSTGRES",
+"jdbc.url":"jdbc:postgresql://$INTERNAL_LISTEN_HOST:5432/functional",
+"jdbc.driver":"org.postgresql.Driver",
+"driver.url":"$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
+"dbcp.username":"hiveuser",
+"dbcp.password.keystore":"jceks://$FILESYSTEM_URI_SCHEME/test-warehouse/data-sources/test.jceks",
+"table":"alltypes"}');
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+# Test the jdbc DataSource
+# count(*) with a predicate evaluated by Impala
+select count(*) from alltypes_jdbc_datasource_keystore
+where float_col = 0 and string_col is not NULL
+---- RESULTS
+730
+---- TYPES
+BIGINT
+====
+---- QUERY
+# count(*) with no predicates has no materialized slots
+select count(*) from alltypes_jdbc_datasource_keystore
+---- RESULTS
+7300
+---- TYPES
+BIGINT
+====
+---- QUERY
+# Drop table
+DROP TABLE alltypes_jdbc_datasource_keystore;
+---- RESULTS
+'Table has been dropped.'
+---- QUERY
+# Drop DataSource
+DROP DATA SOURCE TestJdbcDataSourceWithKeystore;
+---- RESULTS
+'Data source has been dropped.'
+====
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index bbf430206..89e0cc4e7 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -77,7 +77,8 @@ from tests.util.filesystem_utils import (
     S3GUARD_ENABLED,
     ADLS_STORE_NAME,
     FILESYSTEM_PREFIX,
-    FILESYSTEM_NAME)
+    FILESYSTEM_NAME,
+    FILESYSTEM_URI_SCHEME)
 
 from tests.util.hdfs_util import (
   HdfsConfig,
@@ -495,6 +496,7 @@ class ImpalaTestSuite(BaseTestSuite):
     repl = dict(('$' + k, globs[k]) for k in [
         "FILESYSTEM_PREFIX",
         "FILESYSTEM_NAME",
+        "FILESYSTEM_URI_SCHEME",
         "GROUP_NAME",
         "NAMENODE",
         "IMPALA_HOME",
diff --git a/tests/query_test/test_ext_data_sources.py b/tests/query_test/test_ext_data_sources.py
index 094f0cd67..773ec2698 100644
--- a/tests/query_test/test_ext_data_sources.py
+++ b/tests/query_test/test_ext_data_sources.py
@@ -80,3 +80,8 @@ class TestExtDataSources(ImpalaTestSuite):
 
   def test_jdbc_data_source(self, vector, unique_database):
     self.run_test_case('QueryTest/jdbc-data-source', vector, use_db=unique_database)
+
+  def test_jdbc_data_source_with_keystore(self, vector, unique_database):
+    # Impala query tests for external data sources with keystore.
+    self.run_test_case('QueryTest/jdbc-data-source-with-keystore', vector,
+        use_db=unique_database)
diff --git a/tests/util/filesystem_utils.py b/tests/util/filesystem_utils.py
index a3114e18b..42d0e69e3 100644
--- a/tests/util/filesystem_utils.py
+++ b/tests/util/filesystem_utils.py
@@ -85,6 +85,17 @@ def get_secondary_fs_path(path):
   return prepend_with_fs(SECONDARY_FILESYSTEM, path)
 
 
+def get_fs_uri_scheme():
+  # Set default URI scheme as "hdfs".
+  uri_scheme = "hdfs"
+  # Extract URI scheme from DEFAULT_FS environment variable.
+  default_fs = os.getenv("DEFAULT_FS")
+  if default_fs and default_fs.find(':') != -1:
+    uri_scheme = default_fs.split(':')[0]
+  return uri_scheme
+
+
 WAREHOUSE = get_fs_path('/test-warehouse')
 FILESYSTEM_NAME = get_fs_name(FILESYSTEM)
 WAREHOUSE_PREFIX = os.getenv("WAREHOUSE_LOCATION_PREFIX")
+FILESYSTEM_URI_SCHEME = get_fs_uri_scheme()