You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by cs...@apache.org on 2020/02/14 10:49:03 UTC

[impala] branch master updated: IMPALA-9304: Support starting Hive with Ranger in minicluster

This is an automated email from the ASF dual-hosted git repository.

csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new cad1561  IMPALA-9304: Support starting Hive with Ranger in minicluster
cad1561 is described below

commit cad156181b29b7897fb2366bd621f2349c090e20
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Mon Feb 10 09:42:37 2020 +0800

    IMPALA-9304: Support starting Hive with Ranger in minicluster
    
    Add a new flag -with_ranger in testdata/bin/run-hive-server.sh to start
    Hive with Ranger integration. The relative configuration files are
    generated in bin/create-test-configuration.sh using a new varient
    ranger_auth in hive-site.xml.py. Only Hive3 is supported.
    
    Current limitation:
    Can't use different username in Beeline by the -n option. "select
    current_user()" keeps returning my username, while "select
    logged_in_user()" can return the username given by -n option but it's
    not used in authorization.
    
    Tests:
     - Ran bin/create-test-configuration.sh and verified the generated
       hive-site_ranger_auth.xml contains Ranger configurations.
     - Ran testdata/bin/run-hive-server.sh -with_ranger. Verified column
       masking and row filtering policies took effect in Beeline.
     - Added test in test_ranger.py for this mode.
    
    Change-Id: I01e3a195b00a98388244a922a1a79e65146cec42
    Reviewed-on: http://gerrit.cloudera.org:8080/15189
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/create-test-configuration.sh                   | 12 +++++++++
 fe/src/test/resources/hive-site.xml.py             |  7 +++++
 testdata/bin/run-hive-server.sh                    | 25 ++++++++++++++++++
 .../queries/QueryTest/hive_ranger_integration.test | 15 +++++++++++
 tests/authorization/test_ranger.py                 | 30 ++++++++++++++++++++--
 tests/common/impala_connection.py                  |  7 ++---
 tests/common/skip.py                               |  2 ++
 7 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/bin/create-test-configuration.sh b/bin/create-test-configuration.sh
index 32d56c3..6b012f7 100755
--- a/bin/create-test-configuration.sh
+++ b/bin/create-test-configuration.sh
@@ -146,6 +146,18 @@ mkdir -p hive-site-without-hms
 rm -f hive-site-without-hms/hive-site.xml
 ln -s "${CONFIG_DIR}/hive-site_without_hms.xml" hive-site-without-hms/hive-site.xml
 
+export HIVE_VARIANT=ranger_auth
+HIVE_RANGER_CONF_DIR=hive-site-ranger-auth
+$IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_ranger_auth.xml
+rm -rf $HIVE_RANGER_CONF_DIR
+mkdir -p $HIVE_RANGER_CONF_DIR
+ln -s "${CONFIG_DIR}/hive-site_ranger_auth.xml" $HIVE_RANGER_CONF_DIR/hive-site.xml
+# Link some neccessary config files for Hive.
+for f in ranger-hive-security.xml ranger-hive-audit.xml log4j.properties \
+    hive-log4j2.properties; do
+  ln -s "${CONFIG_DIR}/$f" "$HIVE_RANGER_CONF_DIR/$f"
+done
+
 generate_config hive-log4j2.properties.template hive-log4j2.properties
 
 if [ $CREATE_METASTORE -eq 1 ]; then
diff --git a/fe/src/test/resources/hive-site.xml.py b/fe/src/test/resources/hive-site.xml.py
index eb68401..6566d93 100644
--- a/fe/src/test/resources/hive-site.xml.py
+++ b/fe/src/test/resources/hive-site.xml.py
@@ -63,6 +63,13 @@ if variant == 'changed_external_dir':
   CONFIG.update({
     'hive.metastore.warehouse.external.dir': '${WAREHOUSE_LOCATION_PREFIX}/test-warehouse-external',
   })
+elif variant == 'ranger_auth':
+  CONFIG.update({
+    'hive.security.authorization.manager':
+        'org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory',
+    'hive.metastore.pre.event.listeners':
+        'org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthorizer',
+  })
 
 # HBase-related configs.
 # Impala processes need to connect to zookeeper on INTERNAL_LISTEN_HOST for HBase.
diff --git a/testdata/bin/run-hive-server.sh b/testdata/bin/run-hive-server.sh
index 46a1edd..47b47c2 100755
--- a/testdata/bin/run-hive-server.sh
+++ b/testdata/bin/run-hive-server.sh
@@ -28,6 +28,7 @@ LOGDIR=${IMPALA_CLUSTER_LOGS_DIR}/hive
 HIVES2_TRANSPORT="plain_sasl"
 METASTORE_TRANSPORT="buffered"
 ONLY_METASTORE=0
+ENABLE_RANGER_AUTH=0
 
 CLUSTER_BIN=${IMPALA_HOME}/testdata/bin
 
@@ -48,9 +49,18 @@ do
     -only_metastore)
       ONLY_METASTORE=1
       ;;
+    -with_ranger)
+      if [[ "$USE_CDP_HIVE" = "false" ]]; then
+        echo "Ranger authorization is not supported in Hive 2."
+        exit 1
+      fi
+      ENABLE_RANGER_AUTH=1
+      echo "Starting Hive with Ranger authorization."
+      ;;
     -help|-h|*)
       echo "run-hive-server.sh : Starts the hive server and the metastore."
       echo "[-only_metastore] : Only starts the hive metastore."
+      echo "[-with_ranger] : Starts with Ranger authorization (only for Hive 3)."
       exit 1;
       ;;
     esac
@@ -79,6 +89,21 @@ if [[ "$USE_CDP_HIVE" = "true" && -n "$SENTRY_HOME" ]]; then
   done
 fi
 
+# Add Ranger dependencies if we are starting with Ranger authorization enabled.
+if [[ $ENABLE_RANGER_AUTH -eq 1 ]]; then
+  export HIVE_CONF_DIR="$HADOOP_CONF_DIR/hive-site-ranger-auth/"
+  for f in "$RANGER_HOME"/ews/webapp/WEB-INF/classes/ranger-plugins/hive/ranger-*.jar \
+      "$RANGER_HOME"/ews/webapp/WEB-INF/lib/*.jar \
+      "$RANGER_HOME"/ews/lib/ranger-*.jar; do
+    FILE_NAME=$(basename $f)
+    # Exclude unneccessary jars.
+    if [[ ! $FILE_NAME == hive* && ! $FILE_NAME == hadoop* && ! $FILE_NAME == hbase* \
+        && ! $FILE_NAME == zookeeper* ]]; then
+      export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${f}
+    fi
+  done
+fi
+
 # For Hive 3, we use Tez for execution. We have to add it to the classpath.
 # NOTE: it would seem like this would only be necessary on the HS2 classpath,
 # but compactions are initiated from the HMS in Hive 3. This may change at
diff --git a/testdata/workloads/functional-query/queries/QueryTest/hive_ranger_integration.test b/testdata/workloads/functional-query/queries/QueryTest/hive_ranger_integration.test
new file mode 100644
index 0000000..5cfb5de
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/hive_ranger_integration.test
@@ -0,0 +1,15 @@
+====
+---- HIVE_QUERY
+select id from functional.alltypestiny
+---- RESULTS
+0
+100
+200
+300
+400
+500
+600
+700
+---- TYPES
+INT
+====
diff --git a/tests/authorization/test_ranger.py b/tests/authorization/test_ranger.py
index 2de7283..f6f6ba6 100644
--- a/tests/authorization/test_ranger.py
+++ b/tests/authorization/test_ranger.py
@@ -17,13 +17,17 @@
 #
 # Client tests for SQL statement authorization
 
+import os
 import grp
 import json
 import pytest
 import requests
+from subprocess import check_call
 
 from getpass import getuser
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
+from tests.common.skip import (SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon,
+                               SkipIfLocal, SkipIfHive2)
 from tests.util.hdfs_util import NAMENODE
 from tests.util.calculation_util import get_random_id
 
@@ -866,8 +870,7 @@ class TestRanger(CustomClusterTestSuite):
       self._run_query_as_user("drop database {0} cascade".format(test_db), ADMIN, True)
 
   @CustomClusterTestSuite.with_args(
-    impalad_args="{0} {1}".format(IMPALAD_ARGS, "--enable_column_masking"),
-    catalogd_args=CATALOGD_ARGS)
+    impalad_args=IMPALAD_ARGS, catalogd_args=CATALOGD_ARGS)
   def test_column_masking(self, vector, unique_name):
     user = getuser()
     unique_database = unique_name + '_db'
@@ -932,3 +935,26 @@ class TestRanger(CustomClusterTestSuite):
       admin_client.execute("drop database %s cascade" % unique_database)
       for i in range(policy_cnt):
         TestRanger._remove_column_masking_policy(unique_name + str(i))
+
+  @SkipIfABFS.hive
+  @SkipIfADLS.hive
+  @SkipIfIsilon.hive
+  @SkipIfLocal.hive
+  @SkipIfS3.hive
+  @SkipIfHive2.ranger_auth
+  @CustomClusterTestSuite.with_args()
+  def test_hive_with_ranger_setup(self, vector):
+    """Test for setup of Hive-Ranger integration. Make sure future upgrades on
+    Hive/Ranger won't break the tool."""
+    script = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin/run-hive-server.sh')
+    try:
+      # Add the policy before restarting Hive. So it can take effect immediately after
+      # HiveServer2 starts.
+      TestRanger._add_column_masking_policy(
+          "col_mask_for_hive", getuser(), "functional", "alltypestiny", "id", "CUSTOM",
+          "{col} * 100")
+      check_call([script, '-with_ranger'])
+      self.run_test_case("QueryTest/hive_ranger_integration", vector)
+    finally:
+      check_call([script])
+      TestRanger._remove_column_masking_policy("col_mask_for_hive")
diff --git a/tests/common/impala_connection.py b/tests/common/impala_connection.py
index 4f16fd9..1e24abc 100644
--- a/tests/common/impala_connection.py
+++ b/tests/common/impala_connection.py
@@ -285,7 +285,7 @@ class ImpylaHS2Connection(ImpalaConnection):
 
   def clear_configuration(self):
     self.__query_options.clear()
-    if hasattr(tests.common, "current_node"):
+    if hasattr(tests.common, "current_node") and not self._is_hive:
       self.set_configuration_option("client_identifier", tests.common.current_node)
 
   def connect(self):
@@ -496,8 +496,9 @@ def create_connection(host_port, use_kerberos=False, protocol='beeswax',
     c = ImpylaHS2Connection(host_port=host_port, use_kerberos=use_kerberos,
         is_hive=is_hive, use_http_transport=True, http_path='cliservice')
 
-  # A hook in conftest sets tests.common.current_node.
-  if hasattr(tests.common, "current_node"):
+  # A hook in conftest sets tests.common.current_node. Skip for Hive connections since
+  # Hive cannot modify client_identifier at runtime.
+  if hasattr(tests.common, "current_node") and not is_hive:
     c.set_configuration_option("client_identifier", tests.common.current_node)
   return c
 
diff --git a/tests/common/skip.py b/tests/common/skip.py
index 4d6aecf..628ceb9 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -232,6 +232,8 @@ class SkipIfHive2:
              " See IMPALA-9092 for details.")
   orc = pytest.mark.skipif(HIVE_MAJOR_VERSION <= 2,
       reason="CREATE TABLE LIKE ORC is only supported with Hive version >= 3")
+  ranger_auth = pytest.mark.skipif(HIVE_MAJOR_VERSION <= 2,
+      reason="Hive 2 doesn't support Ranger authorization.")
 
 class SkipIfCatalogV2:
   """Expose decorators as methods so that is_catalog_v2_cluster() can be evaluated lazily