You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by cs...@apache.org on 2020/02/14 10:49:03 UTC
[impala] branch master updated: IMPALA-9304: Support starting Hive
with Ranger in minicluster
This is an automated email from the ASF dual-hosted git repository.
csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new cad1561 IMPALA-9304: Support starting Hive with Ranger in minicluster
cad1561 is described below
commit cad156181b29b7897fb2366bd621f2349c090e20
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Mon Feb 10 09:42:37 2020 +0800
IMPALA-9304: Support starting Hive with Ranger in minicluster
Add a new flag -with_ranger in testdata/bin/run-hive-server.sh to start
Hive with Ranger integration. The relative configuration files are
generated in bin/create-test-configuration.sh using a new varient
ranger_auth in hive-site.xml.py. Only Hive3 is supported.
Current limitation:
Can't use different username in Beeline by the -n option. "select
current_user()" keeps returning my username, while "select
logged_in_user()" can return the username given by -n option but it's
not used in authorization.
Tests:
- Ran bin/create-test-configuration.sh and verified the generated
hive-site_ranger_auth.xml contains Ranger configurations.
- Ran testdata/bin/run-hive-server.sh -with_ranger. Verified column
masking and row filtering policies took effect in Beeline.
- Added test in test_ranger.py for this mode.
Change-Id: I01e3a195b00a98388244a922a1a79e65146cec42
Reviewed-on: http://gerrit.cloudera.org:8080/15189
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
bin/create-test-configuration.sh | 12 +++++++++
fe/src/test/resources/hive-site.xml.py | 7 +++++
testdata/bin/run-hive-server.sh | 25 ++++++++++++++++++
.../queries/QueryTest/hive_ranger_integration.test | 15 +++++++++++
tests/authorization/test_ranger.py | 30 ++++++++++++++++++++--
tests/common/impala_connection.py | 7 ++---
tests/common/skip.py | 2 ++
7 files changed, 93 insertions(+), 5 deletions(-)
diff --git a/bin/create-test-configuration.sh b/bin/create-test-configuration.sh
index 32d56c3..6b012f7 100755
--- a/bin/create-test-configuration.sh
+++ b/bin/create-test-configuration.sh
@@ -146,6 +146,18 @@ mkdir -p hive-site-without-hms
rm -f hive-site-without-hms/hive-site.xml
ln -s "${CONFIG_DIR}/hive-site_without_hms.xml" hive-site-without-hms/hive-site.xml
+export HIVE_VARIANT=ranger_auth
+HIVE_RANGER_CONF_DIR=hive-site-ranger-auth
+$IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py hive-site_ranger_auth.xml
+rm -rf $HIVE_RANGER_CONF_DIR
+mkdir -p $HIVE_RANGER_CONF_DIR
+ln -s "${CONFIG_DIR}/hive-site_ranger_auth.xml" $HIVE_RANGER_CONF_DIR/hive-site.xml
+# Link some neccessary config files for Hive.
+for f in ranger-hive-security.xml ranger-hive-audit.xml log4j.properties \
+ hive-log4j2.properties; do
+ ln -s "${CONFIG_DIR}/$f" "$HIVE_RANGER_CONF_DIR/$f"
+done
+
generate_config hive-log4j2.properties.template hive-log4j2.properties
if [ $CREATE_METASTORE -eq 1 ]; then
diff --git a/fe/src/test/resources/hive-site.xml.py b/fe/src/test/resources/hive-site.xml.py
index eb68401..6566d93 100644
--- a/fe/src/test/resources/hive-site.xml.py
+++ b/fe/src/test/resources/hive-site.xml.py
@@ -63,6 +63,13 @@ if variant == 'changed_external_dir':
CONFIG.update({
'hive.metastore.warehouse.external.dir': '${WAREHOUSE_LOCATION_PREFIX}/test-warehouse-external',
})
+elif variant == 'ranger_auth':
+ CONFIG.update({
+ 'hive.security.authorization.manager':
+ 'org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory',
+ 'hive.metastore.pre.event.listeners':
+ 'org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthorizer',
+ })
# HBase-related configs.
# Impala processes need to connect to zookeeper on INTERNAL_LISTEN_HOST for HBase.
diff --git a/testdata/bin/run-hive-server.sh b/testdata/bin/run-hive-server.sh
index 46a1edd..47b47c2 100755
--- a/testdata/bin/run-hive-server.sh
+++ b/testdata/bin/run-hive-server.sh
@@ -28,6 +28,7 @@ LOGDIR=${IMPALA_CLUSTER_LOGS_DIR}/hive
HIVES2_TRANSPORT="plain_sasl"
METASTORE_TRANSPORT="buffered"
ONLY_METASTORE=0
+ENABLE_RANGER_AUTH=0
CLUSTER_BIN=${IMPALA_HOME}/testdata/bin
@@ -48,9 +49,18 @@ do
-only_metastore)
ONLY_METASTORE=1
;;
+ -with_ranger)
+ if [[ "$USE_CDP_HIVE" = "false" ]]; then
+ echo "Ranger authorization is not supported in Hive 2."
+ exit 1
+ fi
+ ENABLE_RANGER_AUTH=1
+ echo "Starting Hive with Ranger authorization."
+ ;;
-help|-h|*)
echo "run-hive-server.sh : Starts the hive server and the metastore."
echo "[-only_metastore] : Only starts the hive metastore."
+ echo "[-with_ranger] : Starts with Ranger authorization (only for Hive 3)."
exit 1;
;;
esac
@@ -79,6 +89,21 @@ if [[ "$USE_CDP_HIVE" = "true" && -n "$SENTRY_HOME" ]]; then
done
fi
+# Add Ranger dependencies if we are starting with Ranger authorization enabled.
+if [[ $ENABLE_RANGER_AUTH -eq 1 ]]; then
+ export HIVE_CONF_DIR="$HADOOP_CONF_DIR/hive-site-ranger-auth/"
+ for f in "$RANGER_HOME"/ews/webapp/WEB-INF/classes/ranger-plugins/hive/ranger-*.jar \
+ "$RANGER_HOME"/ews/webapp/WEB-INF/lib/*.jar \
+ "$RANGER_HOME"/ews/lib/ranger-*.jar; do
+ FILE_NAME=$(basename $f)
+ # Exclude unneccessary jars.
+ if [[ ! $FILE_NAME == hive* && ! $FILE_NAME == hadoop* && ! $FILE_NAME == hbase* \
+ && ! $FILE_NAME == zookeeper* ]]; then
+ export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${f}
+ fi
+ done
+fi
+
# For Hive 3, we use Tez for execution. We have to add it to the classpath.
# NOTE: it would seem like this would only be necessary on the HS2 classpath,
# but compactions are initiated from the HMS in Hive 3. This may change at
diff --git a/testdata/workloads/functional-query/queries/QueryTest/hive_ranger_integration.test b/testdata/workloads/functional-query/queries/QueryTest/hive_ranger_integration.test
new file mode 100644
index 0000000..5cfb5de
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/hive_ranger_integration.test
@@ -0,0 +1,15 @@
+====
+---- HIVE_QUERY
+select id from functional.alltypestiny
+---- RESULTS
+0
+100
+200
+300
+400
+500
+600
+700
+---- TYPES
+INT
+====
diff --git a/tests/authorization/test_ranger.py b/tests/authorization/test_ranger.py
index 2de7283..f6f6ba6 100644
--- a/tests/authorization/test_ranger.py
+++ b/tests/authorization/test_ranger.py
@@ -17,13 +17,17 @@
#
# Client tests for SQL statement authorization
+import os
import grp
import json
import pytest
import requests
+from subprocess import check_call
from getpass import getuser
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
+from tests.common.skip import (SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon,
+ SkipIfLocal, SkipIfHive2)
from tests.util.hdfs_util import NAMENODE
from tests.util.calculation_util import get_random_id
@@ -866,8 +870,7 @@ class TestRanger(CustomClusterTestSuite):
self._run_query_as_user("drop database {0} cascade".format(test_db), ADMIN, True)
@CustomClusterTestSuite.with_args(
- impalad_args="{0} {1}".format(IMPALAD_ARGS, "--enable_column_masking"),
- catalogd_args=CATALOGD_ARGS)
+ impalad_args=IMPALAD_ARGS, catalogd_args=CATALOGD_ARGS)
def test_column_masking(self, vector, unique_name):
user = getuser()
unique_database = unique_name + '_db'
@@ -932,3 +935,26 @@ class TestRanger(CustomClusterTestSuite):
admin_client.execute("drop database %s cascade" % unique_database)
for i in range(policy_cnt):
TestRanger._remove_column_masking_policy(unique_name + str(i))
+
+ @SkipIfABFS.hive
+ @SkipIfADLS.hive
+ @SkipIfIsilon.hive
+ @SkipIfLocal.hive
+ @SkipIfS3.hive
+ @SkipIfHive2.ranger_auth
+ @CustomClusterTestSuite.with_args()
+ def test_hive_with_ranger_setup(self, vector):
+ """Test for setup of Hive-Ranger integration. Make sure future upgrades on
+ Hive/Ranger won't break the tool."""
+ script = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin/run-hive-server.sh')
+ try:
+ # Add the policy before restarting Hive. So it can take effect immediately after
+ # HiveServer2 starts.
+ TestRanger._add_column_masking_policy(
+ "col_mask_for_hive", getuser(), "functional", "alltypestiny", "id", "CUSTOM",
+ "{col} * 100")
+ check_call([script, '-with_ranger'])
+ self.run_test_case("QueryTest/hive_ranger_integration", vector)
+ finally:
+ check_call([script])
+ TestRanger._remove_column_masking_policy("col_mask_for_hive")
diff --git a/tests/common/impala_connection.py b/tests/common/impala_connection.py
index 4f16fd9..1e24abc 100644
--- a/tests/common/impala_connection.py
+++ b/tests/common/impala_connection.py
@@ -285,7 +285,7 @@ class ImpylaHS2Connection(ImpalaConnection):
def clear_configuration(self):
self.__query_options.clear()
- if hasattr(tests.common, "current_node"):
+ if hasattr(tests.common, "current_node") and not self._is_hive:
self.set_configuration_option("client_identifier", tests.common.current_node)
def connect(self):
@@ -496,8 +496,9 @@ def create_connection(host_port, use_kerberos=False, protocol='beeswax',
c = ImpylaHS2Connection(host_port=host_port, use_kerberos=use_kerberos,
is_hive=is_hive, use_http_transport=True, http_path='cliservice')
- # A hook in conftest sets tests.common.current_node.
- if hasattr(tests.common, "current_node"):
+ # A hook in conftest sets tests.common.current_node. Skip for Hive connections since
+ # Hive cannot modify client_identifier at runtime.
+ if hasattr(tests.common, "current_node") and not is_hive:
c.set_configuration_option("client_identifier", tests.common.current_node)
return c
diff --git a/tests/common/skip.py b/tests/common/skip.py
index 4d6aecf..628ceb9 100644
--- a/tests/common/skip.py
+++ b/tests/common/skip.py
@@ -232,6 +232,8 @@ class SkipIfHive2:
" See IMPALA-9092 for details.")
orc = pytest.mark.skipif(HIVE_MAJOR_VERSION <= 2,
reason="CREATE TABLE LIKE ORC is only supported with Hive version >= 3")
+ ranger_auth = pytest.mark.skipif(HIVE_MAJOR_VERSION <= 2,
+ reason="Hive 2 doesn't support Ranger authorization.")
class SkipIfCatalogV2:
"""Expose decorators as methods so that is_catalog_v2_cluster() can be evaluated lazily