You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/10/15 17:25:17 UTC

[impala] branch master updated: IMPALA-9002: Add query option to only check SELECT privilege in SHOW TABLES

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 63a1d21  IMPALA-9002: Add query option to only check SELECT privilege in SHOW TABLES
63a1d21 is described below

commit 63a1d210d3476fa6f673c640bb26cd96c835c641
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Wed Oct 9 11:46:21 2019 +0800

    IMPALA-9002: Add query option to only check SELECT privilege in SHOW TABLES
    
    If authorization is enabled, SHOW TABLES statements or GET_TABLES
    requests in HS2 protocol (used in HUE and JDBC drivers) will only return
    tables that the user has ANY privileges on them. If the user don't have
    any privileges on a table, we need 8 privilege checks (ALL, INSERT,
    SELECT, ALTER, CREATE, DROP, OWNER, REFRESH) to get this conclusion.
    It takes time in Sentry to check these one by one if there are thousands
    of tables. Unfortunately, there are no batch API for these checks. This
    introduces a performance regression after we supported fine-grained
    privileges, since before that we just check 3 privileges (ALL, INSERT,
    SELECT).
    
    In practice, SELECT privilege is the minimal privilege set. It's wired
    to grant INSERT or other privileges to a resource without SELECT
    privilege. We can simplify the process to only check on SELECT privilege
    if users make sure that SELECT privilege is the minimal privilege set in
    their environment. This patch adds a flag(SIMPLIFY_CHECK_ON_SHOW_TABLES)
    to bypass checking other privileges in SHOW TABLE statements.
    
    Testing in a database with 40k tables and granting the user SELECT
    privilege on only 6 tables. When using Sentry, the SHOW TABLES statement
    takes 5s. With the SIMPLIFY_CHECK_ON_SHOW_TABLES enabled, time reduces
    to 1.2s. No performance gain is observed when using Ranger since Ranger
    is fast enough.
    
    Tests:
     - Add custom cluster test for the flag in test_authorization.py for
     both Sentry and Ranger.
     - Run CORE tests
    
    Change-Id: I17e2b7bf9e36c54627276a6812b459912156cc3c
    Reviewed-on: http://gerrit.cloudera.org:8080/14400
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/common/global-flags.cc                      |  7 ++
 be/src/util/backend-gflag-util.cc                  |  2 +
 common/thrift/BackendGflags.thrift                 |  2 +
 .../org/apache/impala/authorization/Privilege.java | 15 ++--
 .../org/apache/impala/service/BackendConfig.java   |  4 ++
 .../java/org/apache/impala/service/Frontend.java   |  6 +-
 tests/authorization/test_authorization.py          | 80 +++++++++++++++++++++-
 7 files changed, 108 insertions(+), 8 deletions(-)

diff --git a/be/src/common/global-flags.cc b/be/src/common/global-flags.cc
index 49eb97a..ccd8921 100644
--- a/be/src/common/global-flags.cc
+++ b/be/src/common/global-flags.cc
@@ -275,6 +275,13 @@ DEFINE_bool_hidden(recursively_list_partitions, true,
 DEFINE_bool(unlock_zorder_sort, false,
     "(Experimental) If true, enables using ZORDER option for SORT BY.");
 
+DEFINE_bool(simplify_check_on_show_tables, false,
+    "If true, only check SELECT privilege on SHOW TABLES or GET_TABLES when enabling "
+    "authorization. If false, all privileges will be checked for visibility of a table. "
+    "A table will show up if the user has any privileges on it. This flag is used to "
+    "improve SHOW TABLES performance when using Sentry and have thousands of candidate "
+    "tables to be checked. No performance gain is found in using Ranger");
+
 // ++========================++
 // || Startup flag graveyard ||
 // ++========================++
diff --git a/be/src/util/backend-gflag-util.cc b/be/src/util/backend-gflag-util.cc
index 5e7019f..aa24377 100644
--- a/be/src/util/backend-gflag-util.cc
+++ b/be/src/util/backend-gflag-util.cc
@@ -80,6 +80,7 @@ DECLARE_bool(use_dedicated_coordinator_estimates);
 DECLARE_string(blacklisted_dbs);
 DECLARE_bool(unlock_zorder_sort);
 DECLARE_string(blacklisted_tables);
+DECLARE_bool(simplify_check_on_show_tables);
 
 namespace impala {
 
@@ -163,6 +164,7 @@ Status GetThriftBackendGflags(JNIEnv* jni_env, jbyteArray* cfg_bytes) {
   cfg.__set_blacklisted_dbs(FLAGS_blacklisted_dbs);
   cfg.__set_unlock_zorder_sort(FLAGS_unlock_zorder_sort);
   cfg.__set_blacklisted_tables(FLAGS_blacklisted_tables);
+  cfg.__set_simplify_check_on_show_tables(FLAGS_simplify_check_on_show_tables);
   RETURN_IF_ERROR(SerializeThriftMsg(jni_env, &cfg, cfg_bytes));
   return Status::OK();
 }
diff --git a/common/thrift/BackendGflags.thrift b/common/thrift/BackendGflags.thrift
index 0038559..068cedb 100644
--- a/common/thrift/BackendGflags.thrift
+++ b/common/thrift/BackendGflags.thrift
@@ -141,4 +141,6 @@ struct TBackendGflags {
   58: required string blacklisted_tables
 
   59: required bool unlock_zorder_sort
+
+  60: required bool simplify_check_on_show_tables
 }
diff --git a/fe/src/main/java/org/apache/impala/authorization/Privilege.java b/fe/src/main/java/org/apache/impala/authorization/Privilege.java
index 14dc912..23c3d78 100644
--- a/fe/src/main/java/org/apache/impala/authorization/Privilege.java
+++ b/fe/src/main/java/org/apache/impala/authorization/Privilege.java
@@ -22,17 +22,20 @@ import com.google.common.base.Preconditions;
 import java.util.EnumSet;
 
 /**
- * List of Impala privileges.
+ * List of Impala privileges. Declare them in the order from least allowing to most
+ * allowing privilege so EnumSet used in {@link Privilege#getImpliedPrivileges()} can
+ * iterate them in this order. This helps in more efficiently checking for VIEW_METADATA
+ * and ANY privilege if the user does have access to the resource.
  */
 public enum Privilege {
-  ALL,
-  OWNER,
+  SELECT,
+  INSERT,
+  REFRESH,
   ALTER,
   DROP,
   CREATE,
-  INSERT,
-  SELECT,
-  REFRESH,
+  ALL,
+  OWNER,
   // Privileges required to view metadata on a server object.
   VIEW_METADATA(true),
   // Special privilege that is used to determine if the user has any valid privileges
diff --git a/fe/src/main/java/org/apache/impala/service/BackendConfig.java b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
index bfe364a..1f77d16 100644
--- a/fe/src/main/java/org/apache/impala/service/BackendConfig.java
+++ b/fe/src/main/java/org/apache/impala/service/BackendConfig.java
@@ -189,6 +189,10 @@ public class BackendConfig {
     backendCfg_.setUnlock_zorder_sort(zOrdering);
   }
 
+  public boolean simplifyCheckOnShowTables() {
+    return backendCfg_.simplify_check_on_show_tables;
+  }
+
   // Inits the auth_to_local configuration in the static KerberosName class.
   private static void initAuthToLocal() {
     // If auth_to_local is enabled, we read the configuration hadoop.security.auth_to_local
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java
index 29104f5..8bf468d 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -773,6 +773,10 @@ public class Frontend {
     FeCatalog catalog = getCatalog();
     List<String> tblNames = catalog.getTableNames(dbName, matcher);
     if (authzFactory_.getAuthorizationConfig().isEnabled()) {
+      Privilege requiredPrivilege = Privilege.ANY;
+      if (BackendConfig.INSTANCE.simplifyCheckOnShowTables()) {
+        requiredPrivilege = Privilege.SELECT;
+      }
       Iterator<String> iter = tblNames.iterator();
       while (iter.hasNext()) {
         String tblName = iter.next();
@@ -791,7 +795,7 @@ public class Frontend {
         }
         PrivilegeRequest privilegeRequest = new PrivilegeRequestBuilder(
             authzFactory_.getAuthorizableFactory())
-            .any().onAnyColumn(dbName, tblName, tableOwner).build();
+            .allOf(requiredPrivilege).onAnyColumn(dbName, tblName, tableOwner).build();
         if (!authzChecker_.get().hasAccess(user, privilegeRequest)) {
           iter.remove();
         }
diff --git a/tests/authorization/test_authorization.py b/tests/authorization/test_authorization.py
index 0236699..d58dea2 100644
--- a/tests/authorization/test_authorization.py
+++ b/tests/authorization/test_authorization.py
@@ -37,8 +37,11 @@ from tests.common.file_utils import assert_file_in_dir_contains,\
     assert_no_files_in_dir_contain
 
 SENTRY_CONFIG_DIR = os.getenv('IMPALA_HOME') + '/fe/src/test/resources/'
+SENTRY_BASE_LOG_DIR = os.getenv('IMPALA_CLUSTER_LOGS_DIR') + "/sentry"
 SENTRY_CONFIG_FILE = SENTRY_CONFIG_DIR + 'sentry-site.xml'
-
+SENTRY_CONFIG_FILE_OO = SENTRY_CONFIG_DIR + 'sentry-site_oo.xml'
+PRIVILEGES = ['all', 'alter', 'drop', 'insert', 'refresh', 'select']
+ADMIN = "admin"
 
 class TestAuthorization(CustomClusterTestSuite):
   def setup(self):
@@ -482,3 +485,78 @@ class TestAuthorization(CustomClusterTestSuite):
       assert any("refresh" in x for x in result.data)
       self.execute_query_expect_success(client,
                                         "select * from functional.alltypes limit 1")
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(
+    impalad_args="--server_name=server1 --sentry_config=%s "
+                 "--authorized_proxy_user_config=%s=* "
+                 "--simplify_check_on_show_tables=true" %
+                 (SENTRY_CONFIG_FILE, getuser()),
+    catalogd_args="--sentry_config={0}".format(SENTRY_CONFIG_FILE),
+    sentry_config=SENTRY_CONFIG_FILE_OO,  # Enable Sentry Object Ownership
+    sentry_log_dir="{0}/test_fast_show_tables_with_sentry".format(SENTRY_BASE_LOG_DIR))
+  def test_fast_show_tables_with_sentry(self, unique_role, unique_name):
+    unique_db = unique_name + "_db"
+    # TODO: can we create and use a temp username instead of using root?
+    another_user = 'root'
+    another_user_grp = 'root'
+    self.role_cleanup(unique_role)
+    try:
+      self.client.execute("create role %s" % unique_role)
+      self.client.execute("grant create on server to role %s" % unique_role)
+      self.client.execute("grant drop on server to role %s" % unique_role)
+      self.client.execute("grant role %s to group %s" %
+                          (unique_role, grp.getgrnam(getuser()).gr_name))
+
+      self.client.execute("drop database if exists %s cascade" % unique_db)
+      self.client.execute("create database %s" % unique_db)
+      for priv in PRIVILEGES:
+        self.client.execute("create table %s.tbl_%s (i int)" % (unique_db, priv))
+        self.client.execute("grant {0} on table {1}.tbl_{2} to role {3}"
+                            .format(priv, unique_db, priv, unique_role))
+      self.client.execute("grant role %s to group %s" %
+                          (unique_role, another_user_grp))
+
+      # Owner (current user) can still see all the tables
+      result = self.client.execute("show tables in %s" % unique_db)
+      assert result.data == ["tbl_%s" % p for p in PRIVILEGES]
+
+      # Check SHOW TABLES using another username
+      # Create another client so we can user another username
+      root_impalad_client = self.create_impala_client()
+      result = self.execute_query_expect_success(
+        root_impalad_client, "show tables in %s" % unique_db, user=another_user)
+      # Only show tables with privileges implying SELECT privilege
+      assert result.data == ['tbl_all', 'tbl_select']
+    finally:
+      self.role_cleanup(unique_role)
+      self.client.execute("drop database if exists %s cascade" % unique_db)
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args(
+    impalad_args="--server-name=server1 --ranger_service_type=hive "
+                 "--ranger_app_id=impala --authorization_provider=ranger "
+                 "--simplify_check_on_show_tables=true",
+    catalogd_args="--server-name=server1 --ranger_service_type=hive "
+                  "--ranger_app_id=impala --authorization_provider=ranger")
+  def test_fast_show_tables_with_ranger(self, unique_role, unique_name):
+    unique_db = unique_name + "_db"
+    admin_client = self.create_impala_client()
+    try:
+      admin_client.execute("drop database if exists %s cascade" % unique_db, user=ADMIN)
+      admin_client.execute("create database %s" % unique_db, user=ADMIN)
+      for priv in PRIVILEGES:
+        admin_client.execute("create table %s.tbl_%s (i int)" % (unique_db, priv))
+        admin_client.execute("grant {0} on table {1}.tbl_{2} to user {3}"
+                            .format(priv, unique_db, priv, getuser()))
+
+      # Admin can still see all the tables
+      result = admin_client.execute("show tables in %s" % unique_db)
+      assert result.data == ["tbl_%s" % p for p in PRIVILEGES]
+
+      # Check SHOW TABLES using another username
+      result = self.client.execute("show tables in %s" % unique_db)
+      # Only show tables with privileges implying SELECT privilege
+      assert result.data == ['tbl_all', 'tbl_select']
+    finally:
+      admin_client.execute("drop database if exists %s cascade" % unique_db)