You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/12/28 01:49:05 UTC

[impala] branch master updated: IMPALA-10399, IMPALA-11060, IMPALA-11788: Reset Ranger policy repository in an E2E test

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new db1cac2a4 IMPALA-10399, IMPALA-11060, IMPALA-11788: Reset Ranger policy repository in an E2E test
db1cac2a4 is described below

commit db1cac2a495fa393f407d72775c072551a43dab4
Author: Fang-Yu Rao <fa...@cloudera.com>
AuthorDate: Wed Dec 14 19:58:52 2022 -0800

    IMPALA-10399, IMPALA-11060, IMPALA-11788: Reset Ranger policy repository in an E2E test
    
    test_show_grant_hive_privilege() uses Ranger's REST API to get all the
    existing policies from the Ranger server after creating a policy that
    grants the LOCK and SELECT privileges on all the tables and columns in
    the unique database in order to verify the granted privileges indeed
    exist in Ranger's policy repository.
    
    The way we download all the policies from the Ranger server in
    test_show_grant_hive_privilege(), however, did not
    always work. Specifically, when there were already a lot of existing
    policies in Ranger, the policy that granted the LOCK and SELECT
    privileges would not be included in the result returned via one single
    GET request. We found that to reproduce the issue it suffices to add 300
    Ranger policies before adding the policy granting those 2 privileges.
    
    Moreover, we found that even we set the argument 'stream' of
    requests.get() to True and used iter_content() to read the response in
    chunks, we still could not retrieve the policy added in
    test_show_grant_hive_privilege().
    
    As a workaround, instead of changing how we download all the policies
    from the Ranger server, this patch resets Ranger's policy repository for
    Impala before we create the policy granting those 2 privileges so that
    this test will be more resilient to the number of existing policies in
    the repository.
    
    Change-Id: Iff56ec03ceeb2912039241ea302f4bb8948d61f8
    Reviewed-on: http://gerrit.cloudera.org:8080/19373
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Quanlong Huang <hu...@gmail.com>
---
 testdata/bin/create-load-data.sh          | 67 +----------------------
 testdata/bin/setup-ranger.sh              | 89 +++++++++++++++++++++++++++++++
 tests/authorization/test_ranger.py        |  8 +--
 tests/common/custom_cluster_test_suite.py | 24 ++++++++-
 4 files changed, 115 insertions(+), 73 deletions(-)

diff --git a/testdata/bin/create-load-data.sh b/testdata/bin/create-load-data.sh
index 9585fa3d3..e13f1302c 100755
--- a/testdata/bin/create-load-data.sh
+++ b/testdata/bin/create-load-data.sh
@@ -327,70 +327,6 @@ function load-aux-workloads {
   fi
 }
 
-function setup-ranger {
-  echo "SETTING UP RANGER"
-
-  RANGER_SETUP_DIR="${IMPALA_HOME}/testdata/cluster/ranger/setup"
-
-  perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
-    "${RANGER_SETUP_DIR}/impala_group_owner.json.template" > \
-    "${RANGER_SETUP_DIR}/impala_group_owner.json"
-
-  GROUP_ID_OWNER=$(wget -qO - --auth-no-challenge --user=admin --password=admin \
-    --post-file="${RANGER_SETUP_DIR}/impala_group_owner.json" \
-    --header="accept:application/json" \
-    --header="Content-Type:application/json" \
-    http://localhost:6080/service/xusers/secure/groups |
-    python -c "import sys, json; print json.load(sys.stdin)['id']")
-  export GROUP_ID_OWNER
-
-  GROUP_ID_NON_OWNER=$(wget -qO - --auth-no-challenge --user=admin \
-    --password=admin --post-file="${RANGER_SETUP_DIR}/impala_group_non_owner.json" \
-    --header="accept:application/json" \
-    --header="Content-Type:application/json" \
-    http://localhost:6080/service/xusers/secure/groups |
-    python -c "import sys, json; print json.load(sys.stdin)['id']")
-  export GROUP_ID_NON_OWNER
-
-  perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
-    "${RANGER_SETUP_DIR}/impala_user_owner.json.template" > \
-    "${RANGER_SETUP_DIR}/impala_user_owner.json"
-
-  perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
-    "${RANGER_SETUP_DIR}/impala_user_non_owner.json.template" > \
-    "${RANGER_SETUP_DIR}/impala_user_non_owner.json"
-
-  if grep "\${[A-Z_]*}" "${RANGER_SETUP_DIR}/impala_user_owner.json"; then
-    echo "Found undefined variables in ${RANGER_SETUP_DIR}/impala_user_owner.json."
-    exit 1
-  fi
-
-  if grep "\${[A-Z_]*}" "${RANGER_SETUP_DIR}/impala_user_non_owner.json"; then
-    echo "Found undefined variables in ${RANGER_SETUP_DIR}/impala_user_non_owner.json."
-    exit 1
-  fi
-
-  wget -O /dev/null --auth-no-challenge --user=admin --password=admin \
-    --post-file="${RANGER_SETUP_DIR}/impala_user_owner.json" \
-    --header="Content-Type:application/json" \
-    http://localhost:6080/service/xusers/secure/users
-
-  wget -O /dev/null --auth-no-challenge --user=admin --password=admin \
-    --post-file="${RANGER_SETUP_DIR}/impala_user_non_owner.json" \
-    --header="Content-Type:application/json" \
-    http://localhost:6080/service/xusers/secure/users
-
-  wget -O /dev/null --auth-no-challenge --user=admin --password=admin \
-    --post-file="${RANGER_SETUP_DIR}/impala_service.json" \
-    --header="Content-Type:application/json" \
-    http://localhost:6080/service/public/v2/api/service
-
-  curl -f -u admin:admin -H "Accept: application/json" \
-    -H "Content-Type: application/json" \
-    -X PUT http://localhost:6080/service/public/v2/api/policy/5 \
-    -d @"${RANGER_SETUP_DIR}/policy_5_revised.json"
-}
-
 function copy-and-load-dependent-tables {
   # COPY
   # TODO: The multi-format table will move these files. So we need to copy them to a
@@ -703,7 +639,8 @@ if [[ -z "$REMOTE_LOAD" ]]; then
 fi
 
 if [[ $SKIP_RANGER -eq 0 ]]; then
-  run-step "Setting up Ranger" setup-ranger.log setup-ranger
+  run-step "Setting up Ranger" setup-ranger.log \
+      ${IMPALA_HOME}/testdata/bin/setup-ranger.sh
 fi
 
 # Restart the minicluster. This is strictly to provide a sanity check that
diff --git a/testdata/bin/setup-ranger.sh b/testdata/bin/setup-ranger.sh
new file mode 100755
index 000000000..e57f79c76
--- /dev/null
+++ b/testdata/bin/setup-ranger.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euo pipefail
+. $IMPALA_HOME/bin/report_build_error.sh
+setup_report_build_error
+set -x
+
+function setup-ranger {
+  echo "SETTING UP RANGER"
+
+  RANGER_SETUP_DIR="${IMPALA_HOME}/testdata/cluster/ranger/setup"
+
+  perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
+    "${RANGER_SETUP_DIR}/impala_group_owner.json.template" > \
+    "${RANGER_SETUP_DIR}/impala_group_owner.json"
+
+  GROUP_ID_OWNER=$(wget -qO - --auth-no-challenge --user=admin --password=admin \
+    --post-file="${RANGER_SETUP_DIR}/impala_group_owner.json" \
+    --header="accept:application/json" \
+    --header="Content-Type:application/json" \
+    http://localhost:6080/service/xusers/secure/groups |
+    python -c "import sys, json; print json.load(sys.stdin)['id']")
+  export GROUP_ID_OWNER
+
+  GROUP_ID_NON_OWNER=$(wget -qO - --auth-no-challenge --user=admin \
+    --password=admin --post-file="${RANGER_SETUP_DIR}/impala_group_non_owner.json" \
+    --header="accept:application/json" \
+    --header="Content-Type:application/json" \
+    http://localhost:6080/service/xusers/secure/groups |
+    python -c "import sys, json; print json.load(sys.stdin)['id']")
+  export GROUP_ID_NON_OWNER
+
+  perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
+    "${RANGER_SETUP_DIR}/impala_user_owner.json.template" > \
+    "${RANGER_SETUP_DIR}/impala_user_owner.json"
+
+  perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
+    "${RANGER_SETUP_DIR}/impala_user_non_owner.json.template" > \
+    "${RANGER_SETUP_DIR}/impala_user_non_owner.json"
+
+  if grep "\${[A-Z_]*}" "${RANGER_SETUP_DIR}/impala_user_owner.json"; then
+    echo "Found undefined variables in ${RANGER_SETUP_DIR}/impala_user_owner.json."
+    exit 1
+  fi
+
+  if grep "\${[A-Z_]*}" "${RANGER_SETUP_DIR}/impala_user_non_owner.json"; then
+    echo "Found undefined variables in ${RANGER_SETUP_DIR}/impala_user_non_owner.json."
+    exit 1
+  fi
+
+  wget -O /dev/null --auth-no-challenge --user=admin --password=admin \
+    --post-file="${RANGER_SETUP_DIR}/impala_user_owner.json" \
+    --header="Content-Type:application/json" \
+    http://localhost:6080/service/xusers/secure/users
+
+  wget -O /dev/null --auth-no-challenge --user=admin --password=admin \
+    --post-file="${RANGER_SETUP_DIR}/impala_user_non_owner.json" \
+    --header="Content-Type:application/json" \
+    http://localhost:6080/service/xusers/secure/users
+
+  wget -O /dev/null --auth-no-challenge --user=admin --password=admin \
+    --post-file="${RANGER_SETUP_DIR}/impala_service.json" \
+    --header="Content-Type:application/json" \
+    http://localhost:6080/service/public/v2/api/service
+
+  curl -f -u admin:admin -H "Accept: application/json" \
+    -H "Content-Type: application/json" \
+    -X PUT http://localhost:6080/service/public/v2/api/policy/5 \
+    -d @"${RANGER_SETUP_DIR}/policy_5_revised.json"
+}
+
+setup-ranger
diff --git a/tests/authorization/test_ranger.py b/tests/authorization/test_ranger.py
index 9eda63d6e..2463b57bd 100644
--- a/tests/authorization/test_ranger.py
+++ b/tests/authorization/test_ranger.py
@@ -726,15 +726,9 @@ class TestRanger(CustomClusterTestSuite):
       admin_client.execute("drop database if exists {0} cascade".format(unique_db),
                            user=ADMIN)
 
-  # TODO(IMPALA-10399, IMPALA-10401): We found that if this test is run after
-  # test_grant_revoke_with_role() in the exhaustive tests, the test could fail due to an
-  # empty list returned from the first call to _get_ranger_privileges_db() although a
-  # list consisting of "lock" and "select" is expected. We suspect there might be
-  # something wrong with the underlying Ranger API but it requires more thorough
-  # investigation.
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(
-    impalad_args=IMPALAD_ARGS, catalogd_args=CATALOGD_ARGS)
+    impalad_args=IMPALAD_ARGS, catalogd_args=CATALOGD_ARGS, reset_ranger=True)
   def test_show_grant_hive_privilege(self, unique_name):
     user = getuser()
     admin_client = self.create_impala_client()
diff --git a/tests/common/custom_cluster_test_suite.py b/tests/common/custom_cluster_test_suite.py
index 2bb43e23f..6e8205532 100644
--- a/tests/common/custom_cluster_test_suite.py
+++ b/tests/common/custom_cluster_test_suite.py
@@ -53,6 +53,8 @@ NUM_EXCLUSIVE_COORDINATORS = 'num_exclusive_coordinators'
 STATESTORED_TIMEOUT_S = 'statestored_timeout_s'
 IMPALAD_TIMEOUT_S = 'impalad_timeout_s'
 EXPECT_CORES = 'expect_cores'
+# Additional arg to determine whether we should reset the Ranger policy repository.
+RESET_RANGER = 'reset_ranger'
 
 # Run with fast topic updates by default to reduce time to first query running.
 DEFAULT_STATESTORE_ARGS = '--statestore_update_frequency_ms=50 \
@@ -106,7 +108,7 @@ class CustomClusterTestSuite(ImpalaTestSuite):
       start_args=None, default_query_options=None,
       impala_log_dir=None, hive_conf_dir=None, cluster_size=None,
       num_exclusive_coordinators=None, kudu_args=None, statestored_timeout_s=None,
-      impalad_timeout_s=None, expect_cores=None):
+      impalad_timeout_s=None, expect_cores=None, reset_ranger=False):
     """Records arguments to be passed to a cluster by adding them to the decorated
     method's func_dict"""
     def decorate(func):
@@ -135,6 +137,8 @@ class CustomClusterTestSuite(ImpalaTestSuite):
         func.func_dict[IMPALAD_TIMEOUT_S] = impalad_timeout_s
       if expect_cores is not None:
         func.func_dict[EXPECT_CORES] = expect_cores
+      if reset_ranger is not False:
+        func.func_dict[RESET_RANGER] = True
       return func
     return decorate
 
@@ -157,6 +161,9 @@ class CustomClusterTestSuite(ImpalaTestSuite):
     if KUDU_ARGS in method.func_dict:
       self._restart_kudu_service(method.func_dict[KUDU_ARGS])
 
+    if RESET_RANGER in method.func_dict:
+      self._reset_ranger_policy_repository()
+
     cluster_size = DEFAULT_CLUSTER_SIZE
     if CLUSTER_SIZE in method.func_dict:
       cluster_size = method.func_dict[CLUSTER_SIZE]
@@ -253,6 +260,21 @@ class CustomClusterTestSuite(ImpalaTestSuite):
                                         "testdata/bin/kill-hive-server.sh")],
                           close_fds=True)
 
+  @classmethod
+  def _reset_ranger_policy_repository(cls):
+    script_kill_ranger = os.path.join(os.environ['IMPALA_HOME'],
+                                      'testdata/bin/kill-ranger-server.sh')
+    script_run_ranger = os.path.join(os.environ['IMPALA_HOME'],
+                                     'testdata/bin/run-ranger-server.sh')
+    script_create_test_config = os.path.join(os.environ['IMPALA_HOME'],
+                                             'bin/create-test-configuration.sh')
+    script_setup_ranger = os.path.join(os.environ['IMPALA_HOME'],
+                                       'testdata/bin/setup-ranger.sh')
+    check_call([script_kill_ranger])
+    check_call([script_create_test_config, '-create_ranger_policy_db'])
+    check_call([script_run_ranger])
+    check_call([script_setup_ranger])
+
   @classmethod
   def _start_impala_cluster(cls,
                             options,