You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ha...@apache.org on 2019/06/11 05:27:35 UTC
[kudu] branch master updated (ec1a1f3 -> 13fdac8)
This is an automated email from the ASF dual-hosted git repository.
hahao pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.
from ec1a1f3 thirdparty: fix build_curl with unusual krb5-config location
new 69645c1 sentry: avoid authorizing every table in ListTables
new 13fdac8 hms: clarify message to pass in --hive_metastore_uris
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
src/kudu/master/sentry_authz_provider-test.cc | 83 +++++++++++++++++++++++++++
src/kudu/master/sentry_authz_provider.cc | 42 +++++++++++++-
src/kudu/tools/kudu-tool-test.cc | 6 +-
src/kudu/tools/tool_action_hms.cc | 5 +-
4 files changed, 131 insertions(+), 5 deletions(-)
[kudu] 01/02: sentry: avoid authorizing every table in ListTables
Posted by ha...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
hahao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 69645c198766499e257eacd1cdf44f7856d093b8
Author: Andrew Wong <aw...@apache.org>
AuthorDate: Sat Jun 8 11:07:54 2019 -0700
sentry: avoid authorizing every table in ListTables
Currently ListTables will call into Sentry for every table in Kudu's
catalog, checking whether the user has metadata privileges on the table,
and adding it to the ListTablesResponsePB if so. This is expensive,
particularly when there are thousands of tables in Kudu.
This patch updates the authorization behavior to check whether the user
has METADATA privileges on the table's database for each table. If no
such privilege exists for the database, each table within the database
is checked.
A benchmark is added to gauge the performance in various scenarios:
Iterating through many databases:
$ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=300 --num_tables_per_db=1 --has-db-privileges=true
sentry_authz_provider-test.cc:418] Time spent Listing tables: real 25.243s user 0.015s sys 0.001s
Iterating through many databases and many tables. This is the worst case
since listing a given table will require two lookups in Sentry -- one
for the database and one for the table:
$ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=300 --num_tables_per_db=1 --has-db-privileges=false
sentry_authz_provider-test.cc:418] Time spent Listing tables: real 47.543s user 0.040s sys 0.013s
This above worst case is actually less performant than without this
patch. A follow-up patch will make this more performant by caching
privileges from requests at the database scope.
Iterating through one database and no tables since the user has
database-level privileges:
$ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=1 --num_tables_per_db=600 --has-db-privileges=true
sentry_authz_provider-test.cc:418] Time spent Listing tables: real 0.271s user 0.000s sys 0.000s
Iterating through one database and many tables:
$ ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=1 --num_tables_per_db=600 --has-db-privileges=false
sentry_authz_provider-test.cc:418] Time spent Listing tables: real 47.441s user 0.031s sys 0.013s
Change-Id: I7c495c635fbd2a661709b6d7f04973b7864b5527
Reviewed-on: http://gerrit.cloudera.org:8080/13549
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Reviewed-by: Hao Hao <ha...@cloudera.com>
Tested-by: Andrew Wong <aw...@cloudera.com>
---
src/kudu/master/sentry_authz_provider-test.cc | 83 +++++++++++++++++++++++++++
src/kudu/master/sentry_authz_provider.cc | 42 +++++++++++++-
2 files changed, 123 insertions(+), 2 deletions(-)
diff --git a/src/kudu/master/sentry_authz_provider-test.cc b/src/kudu/master/sentry_authz_provider-test.cc
index 21a8588..3a281df 100644
--- a/src/kudu/master/sentry_authz_provider-test.cc
+++ b/src/kudu/master/sentry_authz_provider-test.cc
@@ -71,6 +71,18 @@ DEFINE_int32(num_table_privileges, 100,
"Number of table privileges to use in testing");
TAG_FLAG(num_table_privileges, hidden);
+DEFINE_int32(num_databases, 10,
+ "Number of databases to use in testing");
+TAG_FLAG(num_databases, hidden);
+
+DEFINE_int32(num_tables_per_db, 10,
+ "Number of tables to use per database to use in testing");
+TAG_FLAG(num_tables_per_db, hidden);
+
+DEFINE_bool(has_db_privileges, true,
+ "Whether the user should have db-level privileges in testing");
+TAG_FLAG(has_db_privileges, hidden);
+
DECLARE_int32(sentry_service_recv_timeout_seconds);
DECLARE_int32(sentry_service_send_timeout_seconds);
DECLARE_uint32(sentry_privileges_cache_capacity_mb);
@@ -193,6 +205,7 @@ TEST(SentryPrivilegesFetcherStaticTest, TestPrivilegesWellFormed) {
class SentryAuthzProviderTest : public SentryTestBase {
public:
static const char* const kTestUser;
+ static const char* const kTrustedUser;
static const char* const kUserGroup;
static const char* const kRoleName;
@@ -205,6 +218,7 @@ class SentryAuthzProviderTest : public SentryTestBase {
// Configure the SentryAuthzProvider flags.
FLAGS_sentry_privileges_cache_capacity_mb = CachingEnabled() ? 1 : 0;
FLAGS_sentry_service_rpc_addresses = sentry_->address().ToString();
+ FLAGS_trusted_user_acl = kTrustedUser;
sentry_authz_provider_.reset(new SentryAuthzProvider(metric_entity_));
ASSERT_OK(sentry_authz_provider_->Start());
}
@@ -292,6 +306,7 @@ class SentryAuthzProviderTest : public SentryTestBase {
};
const char* const SentryAuthzProviderTest::kTestUser = "test-user";
+const char* const SentryAuthzProviderTest::kTrustedUser = "trusted-user";
const char* const SentryAuthzProviderTest::kUserGroup = "user";
const char* const SentryAuthzProviderTest::kRoleName = "developer";
@@ -372,6 +387,74 @@ TEST_F(SentryAuthzProviderTest, BroadAuthzScopeBenchmark) {
ASSERT_TRUE(s.IsNotAuthorized());
}
+// Benchmark to test the time it takes to evaluate privileges when listing
+// tables.
+TEST_F(SentryAuthzProviderTest, ListTablesBenchmark) {
+ ASSERT_OK(CreateRoleAndAddToGroups());
+ unordered_set<string> tables;
+ for (int d = 0; d < FLAGS_num_databases; d++) {
+ const string db_name = Substitute("$0_$1", kDb, d);
+ // Regardless of whether the user has database-level privileges on the
+ // database, make sure there's at least one privilege for a database to
+ // keep the benchmark consistent when toggling this flag.
+ const string dummy_name = Substitute("$0_$1", "foo", d);
+ ASSERT_OK(AlterRoleGrantPrivilege(
+ GetDatabasePrivilege(FLAGS_has_db_privileges ? db_name : dummy_name, "METADATA")));
+ for (int t = 0; t < FLAGS_num_tables_per_db; t++) {
+ const string table_name = Substitute("$0_$1", kTable, t);
+ const string table_ident = Substitute("$0.$1", db_name, table_name);
+
+ KLOG_EVERY_N_SECS(INFO, 3) << "Granted privilege on table: " << table_ident;
+ ASSERT_OK(AlterRoleGrantPrivilege(GetTablePrivilege(db_name, table_name, "METADATA")));
+ EmplaceOrDie(&tables, table_ident);
+ }
+ }
+ bool checked_table_names = false;
+ LOG_TIMING(INFO, "Listing tables") {
+ ASSERT_OK(sentry_authz_provider_->AuthorizeListTables(
+ kTestUser, &tables, &checked_table_names));
+ }
+ ASSERT_TRUE(checked_table_names);
+ ASSERT_EQ(FLAGS_num_databases * FLAGS_num_tables_per_db, tables.size());
+}
+
+TEST_F(SentryAuthzProviderTest, TestListTables) {
+ ASSERT_OK(CreateRoleAndAddToGroups());
+ const int kNumDbs = 2;
+ const int kNumTablesPerDb = 5;
+ const int kNumNonHiveTables = 3;
+ unordered_set<string> tables;
+ for (int d = 0; d < kNumDbs; d++) {
+ const string db_name = Substitute("$0_$1", kDb, d);
+ for (int t = 0; t < kNumTablesPerDb; t++) {
+ const string table_name = Substitute("$0_$1", kTable, t);
+ // To test the absence of privileges, only grant privileges on one table
+ // per database.
+ if (t == 0) {
+ ASSERT_OK(AlterRoleGrantPrivilege(GetTablePrivilege(db_name, table_name, "METADATA")));
+ }
+ EmplaceOrDie(&tables, Substitute("$0.$1", db_name, table_name));
+ }
+ }
+ // Add some tables that don't conform to Hive's naming convention.
+ for (int i = 0; i < kNumNonHiveTables; i++) {
+ EmplaceOrDie(&tables, Substitute("badname_$0!", i));
+ }
+ bool checked_table_names = false;
+ // List tables as a trusted user. All tables, including non-Hive-conformant
+ // ones, should be visible.
+ ASSERT_OK(sentry_authz_provider_->AuthorizeListTables(
+ kTrustedUser, &tables, &checked_table_names));
+ ASSERT_FALSE(checked_table_names);
+ ASSERT_EQ(kNumDbs * kNumTablesPerDb + kNumNonHiveTables, tables.size());
+
+ // Now try as a regular user. Only the tables with Hive-conformant names that
+ // the user has privileges on should be visible.
+ ASSERT_OK(sentry_authz_provider_->AuthorizeListTables(kTestUser, &tables, &checked_table_names));
+ ASSERT_TRUE(checked_table_names);
+ ASSERT_EQ(kNumDbs, tables.size());
+}
+
class SentryAuthzProviderFilterPrivilegesTest : public SentryAuthzProviderTest {
public:
SentryAuthzProviderFilterPrivilegesTest()
diff --git a/src/kudu/master/sentry_authz_provider.cc b/src/kudu/master/sentry_authz_provider.cc
index feba6a2..80b8074 100644
--- a/src/kudu/master/sentry_authz_provider.cc
+++ b/src/kudu/master/sentry_authz_provider.cc
@@ -17,19 +17,23 @@
#include "kudu/master/sentry_authz_provider.h"
+#include <unordered_map>
#include <unordered_set>
#include <utility>
+#include <vector>
#include <gflags/gflags_declare.h>
#include <glog/logging.h>
#include "kudu/common/common.pb.h"
+#include "kudu/common/table_util.h"
#include "kudu/gutil/map-util.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/master/sentry_privileges_fetcher.h"
#include "kudu/security/token.pb.h"
#include "kudu/sentry/sentry_action.h"
#include "kudu/sentry/sentry_authorizable_scope.h"
+#include "kudu/util/slice.h"
#include "kudu/util/trace.h"
DECLARE_string(sentry_service_rpc_addresses);
@@ -39,7 +43,9 @@ using kudu::security::TablePrivilegePB;
using kudu::sentry::SentryAction;
using kudu::sentry::SentryAuthorizableScope;
using std::string;
+using std::unordered_map;
using std::unordered_set;
+using std::vector;
using strings::Substitute;
namespace kudu {
@@ -194,11 +200,43 @@ Status SentryAuthzProvider::AuthorizeGetTableMetadata(const string& table_name,
Status SentryAuthzProvider::AuthorizeListTables(const string& user,
unordered_set<string>* table_names,
bool* checked_table_names) {
+ if (IsTrustedUser(user)) {
+ *checked_table_names = false;
+ return Status::OK();
+ }
unordered_set<string> authorized_tables;
+ unordered_map<string, vector<string>> tables_by_db;
for (auto table_name : *table_names) {
- Status s = AuthorizeGetTableMetadata(table_name, user);
+ Slice db_slice;
+ Slice unused_table_slice;
+ Status s = ParseHiveTableIdentifier(table_name, &db_slice, &unused_table_slice);
+ if (!s.ok()) {
+ continue;
+ }
+ LookupOrInsert(&tables_by_db, db_slice.ToString(), {}).emplace_back(std::move(table_name));
+ }
+ for (auto db_and_tables : tables_by_db) {
+ auto tables_in_db = db_and_tables.second;
+ DCHECK(!tables_in_db.empty());
+ // Authorize database-level privileges first in case the user has
+ // database-level privileges. This would allow us to avoid authorizing each
+ // indiviudual table.
+ // Note: the exact table isn't particularly important, as long as we pass
+ // in a table within the database we're interested in.
+ const string& first_table_name_in_db = tables_in_db[0];
+ Status s = Authorize(SentryAuthorizableScope::Scope::DATABASE, SentryAction::METADATA,
+ first_table_name_in_db, user);
if (s.ok()) {
- EmplaceOrDie(&authorized_tables, std::move(table_name));
+ for (auto table_name : tables_in_db) {
+ EmplaceOrDie(&authorized_tables, std::move(table_name));
+ }
+ } else {
+ for (auto table_name : tables_in_db) {
+ s = AuthorizeGetTableMetadata(table_name, user);
+ if (s.ok()) {
+ EmplaceOrDie(&authorized_tables, std::move(table_name));
+ }
+ }
}
}
*table_names = authorized_tables;
[kudu] 02/02: hms: clarify message to pass in --hive_metastore_uris
Posted by ha...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
hahao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 13fdac8e55e635d049d2a0c3a0232d229e630da8
Author: Andrew Wong <aw...@apache.org>
AuthorDate: Mon Jun 10 14:56:05 2019 -0700
hms: clarify message to pass in --hive_metastore_uris
The error when running the `kudu hms` with no --hive_metastore_uris flag
did not indicate clearly what the user should do (i.e. pass in the
flag). This adds that clarity.
Change-Id: I0ec8bcedc8e038cb20f9dd96c929f15429051327
Reviewed-on: http://gerrit.cloudera.org:8080/13578
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
src/kudu/tools/kudu-tool-test.cc | 6 ++++--
src/kudu/tools/tool_action_hms.cc | 5 ++++-
2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index a1beeeb..8761522 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -4020,8 +4020,10 @@ TEST_F(ToolTest, TestHmsList) {
string err;
RunActionStderrString(Substitute("hms list $0", master_addr), &err);
ASSERT_STR_CONTAINS(err,
- "Configuration error: the Kudu leader master is not configured with "
- "the Hive Metastore integration");
+ "Configuration error: Could not fetch the Hive Metastore locations from "
+ "the Kudu master since it is not configured with the Hive Metastore "
+ "integration. Run the tool with --hive_metastore_uris and pass in the "
+ "location(s) of the Hive Metastore.");
// Enable the HMS integration.
cluster_->ShutdownNodes(cluster::ClusterNodes::MASTERS_ONLY);
diff --git a/src/kudu/tools/tool_action_hms.cc b/src/kudu/tools/tool_action_hms.cc
index 1da8e74..201d031 100644
--- a/src/kudu/tools/tool_action_hms.cc
+++ b/src/kudu/tools/tool_action_hms.cc
@@ -131,7 +131,10 @@ Status Init(const RunnerContext& context,
string hive_metastore_uris = (*kudu_client)->GetHiveMetastoreUris();
if (hive_metastore_uris.empty()) {
return Status::ConfigurationError(
- "the Kudu leader master is not configured with the Hive Metastore integration");
+ "Could not fetch the Hive Metastore locations from the Kudu master "
+ "since it is not configured with the Hive Metastore integration. "
+ "Run the tool with --hive_metastore_uris and pass in the location(s) "
+ "of the Hive Metastore.");
}
bool hive_metastore_sasl_enabled = (*kudu_client)->GetHiveMetastoreSaslEnabled();