You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2019/06/10 20:58:51 UTC

[kudu] 02/03: sentry: allow caching of COLUMN/TABLE privileges when checking higher scopes

This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit d3820b3c4458abe768ced6ee683470dc2cc39b50
Author: Andrew Wong <aw...@apache.org>
AuthorDate: Wed Jun 5 16:39:09 2019 -0700

    sentry: allow caching of COLUMN/TABLE privileges when checking higher scopes
    
    Currently, Kudu will not cache COLUMN/TABLE when checking for
    DATABASE/SERVER Sentry privileges. This is because, when written, Kudu
    would send requests that would yield significantly more privileges than
    required. Commit 0040a731e4741bde6d8c9d81e796cb000cd24817 changed this
    behavior, and as such, this patch gates the optimization behind an
    enum in the Sentry provider's Authorize() signature.
    
    The effect of this is that when checking for DATABASE/SERVER privileges,
    two entries will be added to the privilege cache instead of one: one for
    the DATABASE/SERVER privileges, and one for the TABLE/COLUMN privileges.
    
    This is optional because in some cases, it isn't necessarily desirable
    to cache table-level privileges. E.g. when creating a table, we
    shouldn't cache table-level privileges because Sentry may create OWNER
    privileges for the new table that caching may miss out on when first
    authorizing the create request.
    
    This improves the worst case performance of ListTables when there are
    many databases on which the user has no privileges, and a single table
    per database. In this scenario, without this patch, there would be
    2 * (# tables) calls to Sentry, and with this patch, there would be
    1 * (# tables) calls.
    
    With this patch:
    ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=300 --num_tables_per_db=1 --has-db-privileges=false
    sentry_authz_provider-test.cc:418] Time spent Listing tables: real 24.735s  user 0.033s     sys 0.009s
    
    Without this patch:
    ./bin/sentry_authz_provider-test --gtest_filter=*ListTablesBench* --num_databases=300 --num_tables_per_db=1 --has-db-privileges=false
    sentry_authz_provider-test.cc:418] Time spent Listing tables: real 47.543s  user 0.040s     sys 0.013s
    
    Change-Id: Icec75ae9e5626c887af37568a6f64a8361d888b7
    Reviewed-on: http://gerrit.cloudera.org:8080/13552
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
    Tested-by: Kudu Jenkins
---
 src/kudu/master/sentry_authz_provider-test.cc | 12 ++--
 src/kudu/master/sentry_authz_provider.cc      | 33 ++++++---
 src/kudu/master/sentry_authz_provider.h       | 14 +++-
 src/kudu/master/sentry_privileges_fetcher.cc  | 96 +++++++++++++++------------
 src/kudu/master/sentry_privileges_fetcher.h   | 14 +++-
 5 files changed, 106 insertions(+), 63 deletions(-)

diff --git a/src/kudu/master/sentry_authz_provider-test.cc b/src/kudu/master/sentry_authz_provider-test.cc
index 1b37927..21a8588 100644
--- a/src/kudu/master/sentry_authz_provider-test.cc
+++ b/src/kudu/master/sentry_authz_provider-test.cc
@@ -592,7 +592,8 @@ TEST_P(SentryAuthzProviderFilterPrivilegesScopeTest, TestFilterInvalidResponses)
                                        SentryAuthorizableScope::TABLE }) {
     SentryPrivilegesBranch privileges_info;
     ASSERT_OK(sentry_authz_provider_->fetcher_.GetSentryPrivileges(
-        requested_scope, table_ident, kTestUser, &privileges_info));
+        requested_scope, table_ident, kTestUser,
+        SentryCaching::ALL, &privileges_info));
     // Kudu should ignore all of the invalid privileges.
     ASSERT_TRUE(privileges_info.privileges().empty());
   }
@@ -614,7 +615,8 @@ TEST_P(SentryAuthzProviderFilterPrivilegesScopeTest, TestFilterValidResponses) {
                                        SentryAuthorizableScope::TABLE }) {
     SentryPrivilegesBranch privileges_info;
     ASSERT_OK(sentry_authz_provider_->fetcher_.GetSentryPrivileges(
-        requested_scope, table_ident, kTestUser, &privileges_info));
+        requested_scope, table_ident, kTestUser,
+        SentryCaching::ALL, &privileges_info));
     ASSERT_EQ(1, privileges_info.privileges().size());
     const auto& authorizable_privileges = *privileges_info.privileges().cbegin();
     ASSERT_EQ(GetParam(), authorizable_privileges.scope)
@@ -858,11 +860,11 @@ TEST_F(SentryAuthzProviderTest, CacheBehaviorScopeHierarchyAdjacentBranches) {
   ASSERT_EQ(0, GetTasksFailedNonFatal());
 }
 
-// Ensure requests for authorizables of the DATABASE scope hit cache once
+// Ensure requests to authorize CreateTables and AlterTables hit cache once
 // the information was fetched from Sentry for an authorizable of the TABLE
 // scope in the same hierarchy branch. A bit of context: Sentry sends all
 // available information for the branch up the authz scope hierarchy.
-TEST_F(SentryAuthzProviderTest, CacheBehaviorForDatabaseScope) {
+TEST_F(SentryAuthzProviderTest, CacheBehaviorForCreateAndAlter) {
   ASSERT_OK(CreateRoleAndAddToGroups());
   ASSERT_OK(AlterRoleGrantPrivilege(GetDatabasePrivilege("db0", "ALTER")));
   ASSERT_OK(AlterRoleGrantPrivilege(GetDatabasePrivilege("db1", "CREATE")));
@@ -1001,7 +1003,7 @@ TEST_F(SentryAuthzProviderTest, CacheBehaviorHybridLookups) {
 
 // Verify that information on TABLE-scope privileges are fetched from Sentry,
 // but not cached when SentryPrivilegeFetcher receives a ListPrivilegesByUser
-// response for a DATABASE-scope authorizable.
+// response for a DATABASE-scope authorizable for CreateTables or AlterTables.
 TEST_F(SentryAuthzProviderTest, CacheBehaviorNotCachingTableInfo) {
   ASSERT_OK(CreateRoleAndAddToGroups());
   ASSERT_OK(AlterRoleGrantPrivilege(GetDatabasePrivilege("db", "CREATE")));
diff --git a/src/kudu/master/sentry_authz_provider.cc b/src/kudu/master/sentry_authz_provider.cc
index bdc2fdf..feba6a2 100644
--- a/src/kudu/master/sentry_authz_provider.cc
+++ b/src/kudu/master/sentry_authz_provider.cc
@@ -52,7 +52,7 @@ namespace {
 // with GRANT ALL option, if any required ('requires_all_with_grant').
 bool IsActionAllowed(SentryAction::Action required_action,
                      SentryAuthorizableScope::Scope required_scope,
-                     bool requires_all_with_grant,
+                     SentryGrantRequired requires_all_with_grant,
                      const SentryPrivilegesBranch& privileges_branch) {
   // In general, a privilege implies another when:
   // 1. the authorizable from the former implies the authorizable from the latter
@@ -81,7 +81,7 @@ bool IsActionAllowed(SentryAction::Action required_action,
   for (const auto& privilege : privileges) {
     // A grant option cannot imply the other if the latter is set but the
     // former is not.
-    if (requires_all_with_grant && !privilege.all_with_grant) {
+    if (requires_all_with_grant == REQUIRED && !privilege.all_with_grant) {
       continue;
     }
     // Both privilege scope and action need to imply the other.
@@ -132,16 +132,20 @@ Status SentryAuthzProvider::AuthorizeCreateTable(const string& table_name,
   //
   // Otherwise, table creation requires 'CREATE ON DATABASE' privilege.
   SentryAction::Action action;
-  bool grant_option;
+  SentryGrantRequired grant_option;
   if (user == owner) {
     action = SentryAction::Action::CREATE;
-    grant_option = false;
+    grant_option = NOT_REQUIRED;
   } else {
     action = SentryAction::Action::ALL;
-    grant_option = true;
+    grant_option = REQUIRED;
   }
+  // Note: in our request to Sentry, we shouldn't cache table- or column-level
+  // privileges for the table, since Sentry may automatically grant privileges
+  // upon creation of new tables that caching might miss.
   return Authorize(SentryAuthorizableScope::Scope::DATABASE, action,
-                   table_name, user, grant_option);
+                   table_name, user, grant_option,
+                   SentryCaching::SERVER_AND_DB_ONLY);
 }
 
 Status SentryAuthzProvider::AuthorizeDropTable(const string& table_name,
@@ -170,9 +174,13 @@ Status SentryAuthzProvider::AuthorizeAlterTable(const string& old_table,
   RETURN_NOT_OK(Authorize(SentryAuthorizableScope::Scope::TABLE,
                           SentryAction::Action::ALL,
                           old_table, user));
+  // Note: in our request to Sentry, we shouldn't cache table- or column-level
+  // privileges for the table, since Sentry may automatically alter privileges
+  // upon altering tables that caching might miss.
   return Authorize(SentryAuthorizableScope::Scope::DATABASE,
-                   SentryAction::Action::CREATE,
-                   new_table, user);
+                   SentryAction::Action::CREATE, new_table, user,
+                   SentryGrantRequired::NOT_REQUIRED,
+                   SentryCaching::SERVER_AND_DB_ONLY);
 }
 
 Status SentryAuthzProvider::AuthorizeGetTableMetadata(const string& table_name,
@@ -220,7 +228,8 @@ Status SentryAuthzProvider::FillTablePrivilegePB(const string& table_name,
   // be different upon subsequent calls to this function.
   SentryPrivilegesBranch privileges_branch;
   RETURN_NOT_OK(fetcher_.GetSentryPrivileges(
-      SentryAuthorizableScope::TABLE, table_name, user, &privileges_branch));
+      SentryAuthorizableScope::TABLE, table_name, user,
+      SentryCaching::ALL, &privileges_branch));
   unordered_set<string> scannable_col_names;
   static const SentryAuthorizableScope kTableScope(SentryAuthorizableScope::TABLE);
   for (const auto& privilege : privileges_branch.privileges()) {
@@ -273,13 +282,15 @@ Status SentryAuthzProvider::Authorize(SentryAuthorizableScope::Scope scope,
                                       SentryAction::Action action,
                                       const string& table_ident,
                                       const string& user,
-                                      bool require_grant_option) {
+                                      SentryGrantRequired require_grant_option,
+                                      SentryCaching caching) {
   if (AuthzProvider::IsTrustedUser(user)) {
     return Status::OK();
   }
 
   SentryPrivilegesBranch privileges;
-  RETURN_NOT_OK(fetcher_.GetSentryPrivileges(scope, table_ident, user, &privileges));
+  RETURN_NOT_OK(fetcher_.GetSentryPrivileges(scope, table_ident, user,
+      caching, &privileges));
   if (IsActionAllowed(action, scope, require_grant_option, privileges)) {
     return Status::OK();
   }
diff --git a/src/kudu/master/sentry_authz_provider.h b/src/kudu/master/sentry_authz_provider.h
index c23e74f..7f1496f 100644
--- a/src/kudu/master/sentry_authz_provider.h
+++ b/src/kudu/master/sentry_authz_provider.h
@@ -41,6 +41,13 @@ class TablePrivilegePB;
 
 namespace master {
 
+// Enum indicating whether a grant option is required to perform a specific
+// action.
+enum SentryGrantRequired {
+  NOT_REQUIRED,
+  REQUIRED,
+};
+
 // An implementation of AuthzProvider that connects to the Sentry service
 // for authorization metadata and allows or denies the actions performed by
 // users based on the metadata.
@@ -109,11 +116,16 @@ class SentryAuthzProvider : public AuthzProvider {
   // If the operation is not authorized, returns Status::NotAuthorized().
   // Note that the authorization process is case insensitive for the
   // authorizables.
+  //
+  // If 'caching' is SERVER_AND_DB_ONLY and the underlying
+  // SentryPrivilegesFetcher is configured to cache privileges, it will not
+  // cache privileges equal to or below the 'TABLE' scope.
   Status Authorize(sentry::SentryAuthorizableScope::Scope scope,
                    sentry::SentryAction::Action action,
                    const std::string& table_ident,
                    const std::string& user,
-                   bool require_grant_option = false);
+                   SentryGrantRequired require_grant_option = NOT_REQUIRED,
+                   SentryCaching caching = ALL);
 
   // An instance of utility class that provides interface to search for
   // required privileges through the information received from Sentry.
diff --git a/src/kudu/master/sentry_privileges_fetcher.cc b/src/kudu/master/sentry_privileges_fetcher.cc
index ff04a1f..f293610 100644
--- a/src/kudu/master/sentry_privileges_fetcher.cc
+++ b/src/kudu/master/sentry_privileges_fetcher.cc
@@ -562,7 +562,7 @@ void SentryPrivilegesFetcher::Stop() {
 Status SentryPrivilegesFetcher::ResetCache() {
   const auto cache_capacity_bytes =
       FLAGS_sentry_privileges_cache_capacity_mb * 1024 * 1024;
-  shared_ptr<AuthzInfoCache> new_cache;
+  shared_ptr<PrivilegeCache> new_cache;
   if (cache_capacity_bytes != 0) {
     const auto cache_entry_ttl = MonoDelta::FromSeconds(
         FLAGS_authz_token_validity_seconds *
@@ -574,7 +574,7 @@ Status SentryPrivilegesFetcher::ResetCache() {
           FLAGS_sentry_privileges_cache_scrubbing_period_sec));
     }
 
-    new_cache = make_shared<AuthzInfoCache>(
+    new_cache = make_shared<PrivilegeCache>(
         cache_capacity_bytes, cache_entry_ttl, cache_scrubbing_period,
         FLAGS_sentry_privileges_cache_max_scrubbed_entries_per_pass,
         "sentry-privileges-ttl-cache");
@@ -596,6 +596,7 @@ Status SentryPrivilegesFetcher::GetSentryPrivileges(
     SentryAuthorizableScope::Scope requested_scope,
     const string& table_ident,
     const string& user,
+    SentryCaching caching,
     SentryPrivilegesBranch* privileges) {
   Slice db_slice;
   Slice table_slice;
@@ -605,6 +606,8 @@ Status SentryPrivilegesFetcher::GetSentryPrivileges(
   const string table = table_slice.ToString();
   const string db = db_slice.ToString();
 
+  // 1. Put together the requested authorizable.
+
   TSentryAuthorizable authorizable;
   RETURN_NOT_OK(GetAuthorizable(db, table, requested_scope, &authorizable));
 
@@ -623,37 +626,39 @@ Status SentryPrivilegesFetcher::GetSentryPrivileges(
   // even in tests.
   DCHECK_NE(SentryAuthorizableScope::COLUMN, requested_scope);
 
-  const AuthzInfoKey aggregate_key(user, authorizable);
+  const AuthzInfoKey requested_info(user, authorizable);
   // Do not query Sentry for authz scopes narrower than 'TABLE'.
-  const auto& key = aggregate_key.GetKey(SentryAuthorizableScope::TABLE);
-  const auto& key_seq = aggregate_key.key_sequence();
+  const auto& requested_key = requested_info.GetKey(SentryAuthorizableScope::TABLE);
+  const auto& requested_key_seq = requested_info.key_sequence();
+
+  // 2. Check the cache to see if it contains the requested privileges.
 
   // Copy the shared pointer to the cache. That's necessary because:
   //   * the cache_ member may be reset by concurrent ResetCache()
   //   * TTLCache is based on Cache that doesn't allow for outstanding handles
   //     if the cache itself destructed (in this case, goes out of scope).
-  shared_ptr<AuthzInfoCache> cache;
+  shared_ptr<PrivilegeCache> cache;
   {
     shared_lock<rw_spinlock> l(cache_lock_);
     cache = cache_;
   }
-  vector<typename AuthzInfoCache::EntryHandle> handles;
+  vector<typename PrivilegeCache::EntryHandle> handles;
   handles.reserve(AuthzInfoKey::kKeySequenceMaxSize);
 
   if (PREDICT_TRUE(cache)) {
-    for (const auto& e : key_seq) {
+    for (const auto& e : requested_key_seq) {
       auto handle = cache->Get(e);
-      VLOG(3) << Substitute("'$0': '$1' key lookup", key, e);
+      VLOG(3) << Substitute("'$0': '$1' key lookup", requested_key, e);
       if (!handle) {
         continue;
       }
-      VLOG(2) << Substitute("'$0': '$1' key found", key, e);
+      VLOG(2) << Substitute("'$0': '$1' key found", requested_key, e);
       handles.emplace_back(std::move(handle));
     }
   }
   // If the cache contains all the necessary information, repackage the
   // cached information and return as the result.
-  if (handles.size() == key_seq.size()) {
+  if (handles.size() == requested_key_seq.size()) {
     SentryPrivilegesBranch result;
     for (const auto& e : handles) {
       DCHECK(e);
@@ -663,6 +668,15 @@ Status SentryPrivilegesFetcher::GetSentryPrivileges(
     return Status::OK();
   }
 
+  // 3. The required privileges do not exist in the cache. Fetch them from
+  // Sentry.
+
+  // Narrow the scope of the authorizable to limit the number of privileges
+  // sent back from Sentry to be relevant to the provided table.
+  NarrowAuthzScopeForFetch(db, table, &authorizable);
+  const AuthzInfoKey full_authz_info(user, authorizable);
+  const string& full_key = full_authz_info.GetKey(SentryAuthorizableScope::TABLE);
+
   Synchronizer sync;
   bool is_first_request = false;
   // The result (i.e. the retrieved informaton on privileges) might be used
@@ -672,7 +686,7 @@ Status SentryPrivilegesFetcher::GetSentryPrivileges(
   {
     std::lock_guard<simple_spinlock> l(pending_requests_lock_);
     auto& pending_request = LookupOrEmplace(&pending_requests_,
-                                            key, SentryRequestsInfo());
+                                            full_key, SentryRequestsInfo());
     // Is the queue of pending requests for the same key empty?
     // If yes, that's the first request being sent out.
     is_first_request = pending_request.callbacks.empty();
@@ -690,11 +704,12 @@ Status SentryPrivilegesFetcher::GetSentryPrivileges(
     return Status::OK();
   }
 
-  NarrowAuthzScopeForFetch(db, table, &authorizable);
   TRACE("Fetching privileges from Sentry");
   const auto s = FetchPrivilegesFromSentry(FLAGS_kudu_service_name,
                                            user, authorizable,
                                            fetched_privileges.get());
+
+  // 4. Cache the privileges from Sentry.
   if (s.ok() && PREDICT_TRUE(cache)) {
     // Put the result into the cache. Negative results (i.e. errors) are not
     // cached. Split the information on privileges into at most two cache
@@ -709,43 +724,36 @@ Status SentryPrivilegesFetcher::GetSentryPrivileges(
     SentryPrivilegesBranch priv_table_column;
     fetched_privileges->Split(&priv_srv_db, &priv_table_column);
     if (requested_scope != SentryAuthorizableScope::SERVER) {
-      unique_ptr<SentryPrivilegesBranch> result_ptr(
-          new SentryPrivilegesBranch(std::move(priv_srv_db)));
-      const auto& key = aggregate_key.GetKey(
-          SentryAuthorizableScope::DATABASE);
-      const auto result_footprint = result_ptr->memory_footprint() +
-          key.capacity();
-      cache->Put(key, std::move(result_ptr), result_footprint);
-      VLOG(2) << Substitute(
-          "added entry of size $0 bytes for key '$1' (server-database scope)",
-          result_footprint, key);
-    }
-    // Don't add table-level records from the response into the cache
-    // if the request to Sentry was of database or higher scope. Due to the
-    // tree-like structure of Sentry's responses, those responses might contain
-    // information on non-Kudu tables which are not relevant in the context of
-    // Kudu's AuthzProvider. Upon detecting a cache miss for a table-scope key,
-    // the fetcher requests information on corresponding table-scope privileges
-    // from Sentry explicitly.
-    if ((requested_scope != SentryAuthorizableScope::SERVER) &&
-        (requested_scope != SentryAuthorizableScope::DATABASE)) {
-      unique_ptr<SentryPrivilegesBranch> result_ptr(
-          new SentryPrivilegesBranch(std::move(priv_table_column)));
-      const auto& key = aggregate_key.GetKey(
-          SentryAuthorizableScope::TABLE);
-      const auto result_footprint = result_ptr->memory_footprint() +
-          key.capacity();
-      cache->Put(key, std::move(result_ptr), result_footprint);
-      VLOG(2) << Substitute(
-          "added entry of size $0 bytes for key '$1' (table-column scope)",
-          result_footprint, key);
+      {
+        unique_ptr<SentryPrivilegesBranch> result_ptr(
+            new SentryPrivilegesBranch(std::move(priv_srv_db)));
+        const auto& db_key = full_authz_info.GetKey(SentryAuthorizableScope::DATABASE);
+        const auto result_footprint =
+            result_ptr->memory_footprint() + db_key.capacity();
+        cache->Put(db_key, std::move(result_ptr), result_footprint);
+        VLOG(2) << Substitute(
+            "added entry of size $0 bytes for key '$1' (server-database scope)",
+            result_footprint, db_key);
+      }
+      if (caching == ALL) {
+        unique_ptr<SentryPrivilegesBranch> result_ptr(
+            new SentryPrivilegesBranch(std::move(priv_table_column)));
+        const auto& table_key = full_authz_info.GetKey(SentryAuthorizableScope::TABLE);
+        const auto result_footprint =
+            result_ptr->memory_footprint() + table_key.capacity();
+        cache->Put(table_key, std::move(result_ptr), result_footprint);
+        VLOG(2) << Substitute(
+            "added entry of size $0 bytes for key '$1' (table-column scope)",
+            result_footprint, table_key);
+      }
     }
   }
 
+  // 5. Run any pending callbacks and return.
   SentryRequestsInfo info;
   {
     std::lock_guard<simple_spinlock> l(pending_requests_lock_);
-    info = EraseKeyReturnValuePtr(&pending_requests_, key);
+    info = EraseKeyReturnValuePtr(&pending_requests_, full_key);
   }
   CHECK_LE(1, info.callbacks.size());
   for (auto& cb : info.callbacks) {
diff --git a/src/kudu/master/sentry_privileges_fetcher.h b/src/kudu/master/sentry_privileges_fetcher.h
index a1b704c..a43732c 100644
--- a/src/kudu/master/sentry_privileges_fetcher.h
+++ b/src/kudu/master/sentry_privileges_fetcher.h
@@ -50,6 +50,11 @@ class TSentryPrivilege;
 namespace kudu {
 namespace master {
 
+enum SentryCaching {
+  ALL,
+  SERVER_AND_DB_ONLY,
+};
+
 // Utility struct to facilitate evaluating the privileges of a given
 // authorizable. This is preferred to using Sentry's Thrift responses directly,
 // since useful information has already been parsed to generate this struct
@@ -167,10 +172,15 @@ class SentryPrivilegesFetcher {
   // by the given table and scope. The result privileges might be served
   // from the cache, if caching is enabled and corresponding entry exists
   // in the cache.
+  //
+  // If 'caching' is SERVER_AND_DB_ONLY and the SentryPrivilegesFetcher is
+  // configured to cache privileges, it will not cache privileges equal to or
+  // below the 'TABLE' scope.
   Status GetSentryPrivileges(
       sentry::SentryAuthorizableScope::Scope requested_scope,
       const std::string& table_ident,
       const std::string& user,
+      SentryCaching caching,
       SentryPrivilegesBranch* privileges);
 
  private:
@@ -222,8 +232,8 @@ class SentryPrivilegesFetcher {
   // The TTL cache to store information on privileges received from Sentry.
   // The instance is wrapped into std::shared_ptr to handle operations with
   // cache items along with concurrent requests to reset the instance.
-  typedef TTLCache<std::string, SentryPrivilegesBranch> AuthzInfoCache;
-  std::shared_ptr<AuthzInfoCache> cache_;
+  typedef TTLCache<std::string, SentryPrivilegesBranch> PrivilegeCache;
+  std::shared_ptr<PrivilegeCache> cache_;
 
   // Synchronization primitive to guard access to the cache in the presence
   // of operations with cache items and concurrent requests to reset the cache.