You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ha...@apache.org on 2019/06/11 05:27:35 UTC

[kudu] branch master updated (ec1a1f3 -> 13fdac8)

This is an automated email from the ASF dual-hosted git repository.

hahao pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from ec1a1f3  thirdparty: fix build_curl with unusual krb5-config location
     new 69645c1  sentry: avoid authorizing every table in ListTables
     new 13fdac8  hms: clarify message to pass in --hive_metastore_uris

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/master/sentry_authz_provider-test.cc | 83 +++++++++++++++++++++++++++
 src/kudu/master/sentry_authz_provider.cc      | 42 +++++++++++++-
 src/kudu/tools/kudu-tool-test.cc              |  6 +-
 src/kudu/tools/tool_action_hms.cc             |  5 +-
 4 files changed, 131 insertions(+), 5 deletions(-)


[kudu] 01/02: sentry: avoid authorizing every table in ListTables

Posted by ha...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

hahao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 69645c198766499e257eacd1cdf44f7856d093b8
Author: Andrew Wong <aw...@apache.org>
AuthorDate: Sat Jun 8 11:07:54 2019 -0700

    sentry: avoid authorizing every table in ListTables
    
    Currently ListTables will call into Sentry for every table in Kudu's
    catalog, checking whether the user has metadata privileges on the table,
    and adding it to the ListTablesResponsePB if so. This is expensive,
    particularly when there are thousands of tables in Kudu.
    
    This patch updates the authorization behavior to check whether the user
    has METADATA privileges on the table's database for each table. If no
    such privilege exists for the database, each table within the database
    is checked.
    
    A benchmark is added to gauge the performance in various scenarios:
    
    Iterating through many databases:
     $ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=300 --num_tables_per_db=1 --has-db-privileges=true
    sentry_authz_provider-test.cc:418] Time spent Listing tables: real 25.243s user 0.015s     sys 0.001s
    
    Iterating through many databases and many tables. This is the worst case
    since listing a given table will require two lookups in Sentry -- one
    for the database and one for the table:
     $ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=300 --num_tables_per_db=1 --has-db-privileges=false
    sentry_authz_provider-test.cc:418] Time spent Listing tables: real 47.543s  user 0.040s     sys 0.013s
    
    This above worst case is actually less performant than without this
    patch. A follow-up patch will make this more performant by caching
    privileges from requests at the database scope.
    
    Iterating through one database and no tables since the user has
    database-level privileges:
     $ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=1 --num_tables_per_db=600 --has-db-privileges=true
    sentry_authz_provider-test.cc:418] Time spent Listing tables: real 0.271s  user 0.000s     sys 0.000s
    
    Iterating through one database and many tables:
     $ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=1 --num_tables_per_db=600 --has-db-privileges=false
    sentry_authz_provider-test.cc:418] Time spent Listing tables: real 47.441s       user 0.031s     sys 0.013s
    
    Change-Id: I7c495c635fbd2a661709b6d7f04973b7864b5527
    Reviewed-on: http://gerrit.cloudera.org:8080/13549
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Reviewed-by: Hao Hao <ha...@cloudera.com>
    Tested-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/master/sentry_authz_provider-test.cc | 83 +++++++++++++++++++++++++++
 src/kudu/master/sentry_authz_provider.cc      | 42 +++++++++++++-
 2 files changed, 123 insertions(+), 2 deletions(-)

diff --git a/src/kudu/master/sentry_authz_provider-test.cc b/src/kudu/master/sentry_authz_provider-test.cc
index 21a8588..3a281df 100644
--- a/src/kudu/master/sentry_authz_provider-test.cc
+++ b/src/kudu/master/sentry_authz_provider-test.cc
@@ -71,6 +71,18 @@ DEFINE_int32(num_table_privileges, 100,
     "Number of table privileges to use in testing");
 TAG_FLAG(num_table_privileges, hidden);
 
+DEFINE_int32(num_databases, 10,
+    "Number of databases to use in testing");
+TAG_FLAG(num_databases, hidden);
+
+DEFINE_int32(num_tables_per_db, 10,
+    "Number of tables to use per database to use in testing");
+TAG_FLAG(num_tables_per_db, hidden);
+
+DEFINE_bool(has_db_privileges, true,
+    "Whether the user should have db-level privileges in testing");
+TAG_FLAG(has_db_privileges, hidden);
+
 DECLARE_int32(sentry_service_recv_timeout_seconds);
 DECLARE_int32(sentry_service_send_timeout_seconds);
 DECLARE_uint32(sentry_privileges_cache_capacity_mb);
@@ -193,6 +205,7 @@ TEST(SentryPrivilegesFetcherStaticTest, TestPrivilegesWellFormed) {
 class SentryAuthzProviderTest : public SentryTestBase {
  public:
   static const char* const kTestUser;
+  static const char* const kTrustedUser;
   static const char* const kUserGroup;
   static const char* const kRoleName;
 
@@ -205,6 +218,7 @@ class SentryAuthzProviderTest : public SentryTestBase {
     // Configure the SentryAuthzProvider flags.
     FLAGS_sentry_privileges_cache_capacity_mb = CachingEnabled() ? 1 : 0;
     FLAGS_sentry_service_rpc_addresses = sentry_->address().ToString();
+    FLAGS_trusted_user_acl = kTrustedUser;
     sentry_authz_provider_.reset(new SentryAuthzProvider(metric_entity_));
     ASSERT_OK(sentry_authz_provider_->Start());
   }
@@ -292,6 +306,7 @@ class SentryAuthzProviderTest : public SentryTestBase {
 };
 
 const char* const SentryAuthzProviderTest::kTestUser = "test-user";
+const char* const SentryAuthzProviderTest::kTrustedUser = "trusted-user";
 const char* const SentryAuthzProviderTest::kUserGroup = "user";
 const char* const SentryAuthzProviderTest::kRoleName = "developer";
 
@@ -372,6 +387,74 @@ TEST_F(SentryAuthzProviderTest, BroadAuthzScopeBenchmark) {
   ASSERT_TRUE(s.IsNotAuthorized());
 }
 
+// Benchmark to test the time it takes to evaluate privileges when listing
+// tables.
+TEST_F(SentryAuthzProviderTest, ListTablesBenchmark) {
+  ASSERT_OK(CreateRoleAndAddToGroups());
+  unordered_set<string> tables;
+  for (int d = 0; d < FLAGS_num_databases; d++) {
+    const string db_name = Substitute("$0_$1", kDb, d);
+    // Regardless of whether the user has database-level privileges on the
+    // database, make sure there's at least one privilege for a database to
+    // keep the benchmark consistent when toggling this flag.
+    const string dummy_name = Substitute("$0_$1", "foo", d);
+    ASSERT_OK(AlterRoleGrantPrivilege(
+        GetDatabasePrivilege(FLAGS_has_db_privileges ? db_name : dummy_name, "METADATA")));
+    for (int t = 0; t < FLAGS_num_tables_per_db; t++) {
+      const string table_name = Substitute("$0_$1", kTable, t);
+      const string table_ident = Substitute("$0.$1", db_name, table_name);
+
+      KLOG_EVERY_N_SECS(INFO, 3) << "Granted privilege on table: " << table_ident;
+      ASSERT_OK(AlterRoleGrantPrivilege(GetTablePrivilege(db_name, table_name, "METADATA")));
+      EmplaceOrDie(&tables, table_ident);
+    }
+  }
+  bool checked_table_names = false;
+  LOG_TIMING(INFO, "Listing tables") {
+    ASSERT_OK(sentry_authz_provider_->AuthorizeListTables(
+        kTestUser, &tables, &checked_table_names));
+  }
+  ASSERT_TRUE(checked_table_names);
+  ASSERT_EQ(FLAGS_num_databases * FLAGS_num_tables_per_db, tables.size());
+}
+
+TEST_F(SentryAuthzProviderTest, TestListTables) {
+  ASSERT_OK(CreateRoleAndAddToGroups());
+  const int kNumDbs = 2;
+  const int kNumTablesPerDb = 5;
+  const int kNumNonHiveTables = 3;
+  unordered_set<string> tables;
+  for (int d = 0; d < kNumDbs; d++) {
+    const string db_name = Substitute("$0_$1", kDb, d);
+    for (int t = 0; t < kNumTablesPerDb; t++) {
+      const string table_name = Substitute("$0_$1", kTable, t);
+      // To test the absence of privileges, only grant privileges on one table
+      // per database.
+      if (t == 0) {
+        ASSERT_OK(AlterRoleGrantPrivilege(GetTablePrivilege(db_name, table_name, "METADATA")));
+      }
+      EmplaceOrDie(&tables, Substitute("$0.$1", db_name, table_name));
+    }
+  }
+  // Add some tables that don't conform to Hive's naming convention.
+  for (int i = 0; i < kNumNonHiveTables; i++) {
+    EmplaceOrDie(&tables, Substitute("badname_$0!", i));
+  }
+  bool checked_table_names = false;
+  // List tables as a trusted user. All tables, including non-Hive-conformant
+  // ones, should be visible.
+  ASSERT_OK(sentry_authz_provider_->AuthorizeListTables(
+      kTrustedUser, &tables, &checked_table_names));
+  ASSERT_FALSE(checked_table_names);
+  ASSERT_EQ(kNumDbs * kNumTablesPerDb + kNumNonHiveTables, tables.size());
+
+  // Now try as a regular user. Only the tables with Hive-conformant names that
+  // the user has privileges on should be visible.
+  ASSERT_OK(sentry_authz_provider_->AuthorizeListTables(kTestUser, &tables, &checked_table_names));
+  ASSERT_TRUE(checked_table_names);
+  ASSERT_EQ(kNumDbs, tables.size());
+}
+
 class SentryAuthzProviderFilterPrivilegesTest : public SentryAuthzProviderTest {
  public:
   SentryAuthzProviderFilterPrivilegesTest()
diff --git a/src/kudu/master/sentry_authz_provider.cc b/src/kudu/master/sentry_authz_provider.cc
index feba6a2..80b8074 100644
--- a/src/kudu/master/sentry_authz_provider.cc
+++ b/src/kudu/master/sentry_authz_provider.cc
@@ -17,19 +17,23 @@
 
 #include "kudu/master/sentry_authz_provider.h"
 
+#include <unordered_map>
 #include <unordered_set>
 #include <utility>
+#include <vector>
 
 #include <gflags/gflags_declare.h>
 #include <glog/logging.h>
 
 #include "kudu/common/common.pb.h"
+#include "kudu/common/table_util.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/master/sentry_privileges_fetcher.h"
 #include "kudu/security/token.pb.h"
 #include "kudu/sentry/sentry_action.h"
 #include "kudu/sentry/sentry_authorizable_scope.h"
+#include "kudu/util/slice.h"
 #include "kudu/util/trace.h"
 
 DECLARE_string(sentry_service_rpc_addresses);
@@ -39,7 +43,9 @@ using kudu::security::TablePrivilegePB;
 using kudu::sentry::SentryAction;
 using kudu::sentry::SentryAuthorizableScope;
 using std::string;
+using std::unordered_map;
 using std::unordered_set;
+using std::vector;
 using strings::Substitute;
 
 namespace kudu {
@@ -194,11 +200,43 @@ Status SentryAuthzProvider::AuthorizeGetTableMetadata(const string& table_name,
 Status SentryAuthzProvider::AuthorizeListTables(const string& user,
                                                 unordered_set<string>* table_names,
                                                 bool* checked_table_names) {
+  if (IsTrustedUser(user)) {
+    *checked_table_names = false;
+    return Status::OK();
+  }
   unordered_set<string> authorized_tables;
+  unordered_map<string, vector<string>> tables_by_db;
   for (auto table_name : *table_names) {
-    Status s = AuthorizeGetTableMetadata(table_name, user);
+    Slice db_slice;
+    Slice unused_table_slice;
+    Status s = ParseHiveTableIdentifier(table_name, &db_slice, &unused_table_slice);
+    if (!s.ok()) {
+      continue;
+    }
+    LookupOrInsert(&tables_by_db, db_slice.ToString(), {}).emplace_back(std::move(table_name));
+  }
+  for (auto db_and_tables : tables_by_db) {
+    auto tables_in_db = db_and_tables.second;
+    DCHECK(!tables_in_db.empty());
+    // Authorize database-level privileges first in case the user has
+    // database-level privileges. This would allow us to avoid authorizing each
+    // indiviudual table.
+    // Note: the exact table isn't particularly important, as long as we pass
+    // in a table within the database we're interested in.
+    const string& first_table_name_in_db = tables_in_db[0];
+    Status s = Authorize(SentryAuthorizableScope::Scope::DATABASE, SentryAction::METADATA,
+                         first_table_name_in_db, user);
     if (s.ok()) {
-      EmplaceOrDie(&authorized_tables, std::move(table_name));
+      for (auto table_name : tables_in_db) {
+        EmplaceOrDie(&authorized_tables, std::move(table_name));
+      }
+    } else {
+      for (auto table_name : tables_in_db) {
+        s = AuthorizeGetTableMetadata(table_name, user);
+        if (s.ok()) {
+          EmplaceOrDie(&authorized_tables, std::move(table_name));
+        }
+      }
     }
   }
   *table_names = authorized_tables;


[kudu] 02/02: hms: clarify message to pass in --hive_metastore_uris

Posted by ha...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

hahao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 13fdac8e55e635d049d2a0c3a0232d229e630da8
Author: Andrew Wong <aw...@apache.org>
AuthorDate: Mon Jun 10 14:56:05 2019 -0700

    hms: clarify message to pass in --hive_metastore_uris
    
    The error when running the `kudu hms` with no --hive_metastore_uris flag
    did not indicate clearly what the user should do (i.e. pass in the
    flag). This adds that clarity.
    
    Change-Id: I0ec8bcedc8e038cb20f9dd96c929f15429051327
    Reviewed-on: http://gerrit.cloudera.org:8080/13578
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/tools/kudu-tool-test.cc  | 6 ++++--
 src/kudu/tools/tool_action_hms.cc | 5 ++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index a1beeeb..8761522 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -4020,8 +4020,10 @@ TEST_F(ToolTest, TestHmsList) {
   string err;
   RunActionStderrString(Substitute("hms list $0", master_addr), &err);
   ASSERT_STR_CONTAINS(err,
-      "Configuration error: the Kudu leader master is not configured with "
-      "the Hive Metastore integration");
+      "Configuration error: Could not fetch the Hive Metastore locations from "
+      "the Kudu master since it is not configured with the Hive Metastore "
+      "integration. Run the tool with --hive_metastore_uris and pass in the "
+      "location(s) of the Hive Metastore.");
 
   // Enable the HMS integration.
   cluster_->ShutdownNodes(cluster::ClusterNodes::MASTERS_ONLY);
diff --git a/src/kudu/tools/tool_action_hms.cc b/src/kudu/tools/tool_action_hms.cc
index 1da8e74..201d031 100644
--- a/src/kudu/tools/tool_action_hms.cc
+++ b/src/kudu/tools/tool_action_hms.cc
@@ -131,7 +131,10 @@ Status Init(const RunnerContext& context,
     string hive_metastore_uris = (*kudu_client)->GetHiveMetastoreUris();
     if (hive_metastore_uris.empty()) {
       return Status::ConfigurationError(
-          "the Kudu leader master is not configured with the Hive Metastore integration");
+          "Could not fetch the Hive Metastore locations from the Kudu master "
+          "since it is not configured with the Hive Metastore integration. "
+          "Run the tool with --hive_metastore_uris and pass in the location(s) "
+          "of the Hive Metastore.");
     }
     bool hive_metastore_sasl_enabled = (*kudu_client)->GetHiveMetastoreSaslEnabled();