You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/04/26 23:45:51 UTC

[impala] 02/03: IMPALA-8444: Fix performance regression when building privilege name

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 349142635250f92412003a1e62829fc4b441dc75
Author: Fredy Wijaya <fw...@cloudera.com>
AuthorDate: Tue Apr 23 15:35:01 2019 -0700

    IMPALA-8444: Fix performance regression when building privilege name
    
    This patch fixes the performance regression when building privilege name
    by rewriting PrincipalPrivilege.buildPrivilegeName() with a simple
    string concatentation instead of using a list that gets converted into a
    string.
    
    Below is the result of running a benchmark using JMH comparing the old
    and new implementations:
    
    Result "org.apache.impala.BuildPrivilegeNameBenchmark.fast":
      0.344 ±(99.9%) 0.004 us/op [Average]
      (min, avg, max) = (0.336, 0.344, 0.355), stdev = 0.005
      CI (99.9%): [0.339, 0.348] (assumes normal distribution)
    
    Result "org.apache.impala.BuildPrivilegeNameBenchmark.slow":
      0.831 ±(99.9%) 0.011 us/op [Average]
      (min, avg, max) = (0.807, 0.831, 0.856), stdev = 0.015
      CI (99.9%): [0.820, 0.842] (assumes normal distribution)
    
    Benchmark                         Mode  Cnt  Score   Error Units
    BuildPrivilegeNameBenchmark.fast  avgt   25  0.344 ± 0.004 us/op
    BuildPrivilegeNameBenchmark.slow  avgt   25  0.831 ± 0.011 us/op
    
    This patch also updates SentryAuthorizationPolicy.listPrivileges() to
    reuse the privilege names that have already been built instead of
    building them again. While fixing this, I found a bug where Principal
    stores the PrincipalPrivilege in a case insensitive way. This is true
    for all privilege scopes, except URI. This patch fixes the issue by
    making privilege name to be case sensitive instead.
    
    This patch removes incorrect synchronization in
    SentryAuthorizationPolicy.listPrivileges() that can cause the operation
    to run in serial in a highly concurrent workload.
    
    Testing:
    - Ran all FE tests
    - Ran all E2E authorization tests
    - Added E2E test for privilege name case sensitivity bug
    
    Change-Id: I942d9b55f07c8972f69e532567d9b7d80fceb6e5
    Reviewed-on: http://gerrit.cloudera.org:8080/13095
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../impala/authorization/AuthorizationPolicy.java  |  22 ---
 .../sentry/SentryAuthorizationPolicy.java          |  27 +---
 .../impala/authorization/sentry/SentryProxy.java   |   2 +-
 .../java/org/apache/impala/catalog/Catalog.java    |   5 +-
 .../apache/impala/catalog/CatalogObjectCache.java  |   2 +-
 .../java/org/apache/impala/catalog/Principal.java  |  10 +-
 .../apache/impala/catalog/PrincipalPrivilege.java  | 152 +++++++++++----------
 tests/authorization/test_grant_revoke.py           |  47 ++++++-
 8 files changed, 141 insertions(+), 126 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/authorization/AuthorizationPolicy.java b/fe/src/main/java/org/apache/impala/authorization/AuthorizationPolicy.java
index adbc004..33b7c51 100644
--- a/fe/src/main/java/org/apache/impala/authorization/AuthorizationPolicy.java
+++ b/fe/src/main/java/org/apache/impala/authorization/AuthorizationPolicy.java
@@ -162,28 +162,6 @@ public class AuthorizationPolicy {
   }
 
   /**
-   * Removes a privilege from the policy mapping to the role specified by the principal ID
-   * in the privilege. Throws a CatalogException if no role with a corresponding ID exists
-   * in the catalog. Returns null if no matching privilege is found in this principal.
-   */
-  public synchronized PrincipalPrivilege removePrivilege(PrincipalPrivilege privilege)
-      throws CatalogException {
-    Principal principal = getPrincipal(privilege.getPrincipalId(),
-        privilege.getPrincipalType());
-    if (principal == null) {
-      throw new CatalogException(String.format("Error removing privilege: %s. %s ID " +
-          "'%d' does not exist.", privilege.getName(),
-          Principal.toString(privilege.getPrincipalType()), privilege.getPrincipalId()));
-    }
-    if (LOG.isTraceEnabled()) {
-      LOG.trace("Removing privilege: " + privilege.getName() + " from " +
-          Principal.toString(privilege.getPrincipalType()).toLowerCase() + ": " +
-          principal.getName() + " with ID: " + principal.getId());
-    }
-    return principal.removePrivilege(privilege.getName());
-  }
-
-  /**
    * Returns all roles in the policy. Returns an empty list if no roles exist.
    */
   public synchronized List<Role> getAllRoles() {
diff --git a/fe/src/main/java/org/apache/impala/authorization/sentry/SentryAuthorizationPolicy.java b/fe/src/main/java/org/apache/impala/authorization/sentry/SentryAuthorizationPolicy.java
index 9c31421..1adfec8 100644
--- a/fe/src/main/java/org/apache/impala/authorization/sentry/SentryAuthorizationPolicy.java
+++ b/fe/src/main/java/org/apache/impala/authorization/sentry/SentryAuthorizationPolicy.java
@@ -20,7 +20,6 @@ package org.apache.impala.authorization.sentry;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Sets;
 import org.apache.impala.authorization.AuthorizationPolicy;
-import org.apache.impala.catalog.PrincipalPrivilege;
 import org.apache.impala.catalog.Role;
 import org.apache.impala.catalog.User;
 import org.apache.sentry.core.common.ActiveRoleSet;
@@ -53,7 +52,7 @@ public class SentryAuthorizationPolicy implements PrivilegeCache {
    * Returns a set of privilege strings in Sentry format.
    */
   @Override
-  public synchronized Set<String> listPrivileges(Set<String> groups,
+  public Set<String> listPrivileges(Set<String> groups,
       ActiveRoleSet roleSet) {
     Set<String> privileges = Sets.newHashSet();
     if (roleSet != ActiveRoleSet.ALL) {
@@ -64,16 +63,7 @@ public class SentryAuthorizationPolicy implements PrivilegeCache {
     for (String groupName: groups) {
       List<Role> grantedRoles = authzPolicy_.getGrantedRoles(groupName);
       for (Role role: grantedRoles) {
-        for (PrincipalPrivilege privilege: role.getPrivileges()) {
-          String authorizable = privilege.getName();
-          if (authorizable == null) {
-            if (LOG.isTraceEnabled()) {
-              LOG.trace("Ignoring invalid privilege: " + privilege.getName());
-            }
-            continue;
-          }
-          privileges.add(authorizable);
-        }
+        privileges.addAll(role.getPrivilegeNames());
       }
     }
     return privileges;
@@ -83,22 +73,13 @@ public class SentryAuthorizationPolicy implements PrivilegeCache {
    * Returns a set of privilege strings in Sentry format.
    */
   @Override
-  public synchronized Set<String> listPrivileges(Set<String> groups, Set<String> users,
+  public Set<String> listPrivileges(Set<String> groups, Set<String> users,
       ActiveRoleSet roleSet) {
     Set<String> privileges = listPrivileges(groups, roleSet);
     for (String userName: users) {
       User user = authzPolicy_.getUser(userName);
       if (user != null) {
-        for (PrincipalPrivilege privilege: user.getPrivileges()) {
-          String authorizable = privilege.getName();
-          if (authorizable == null) {
-            if (LOG.isTraceEnabled()) {
-              LOG.trace("Ignoring invalid privilege: " + privilege.getName());
-            }
-            continue;
-          }
-          privileges.add(authorizable);
-        }
+        privileges.addAll(user.getPrivilegeNames());
       }
     }
     return privileges;
diff --git a/fe/src/main/java/org/apache/impala/authorization/sentry/SentryProxy.java b/fe/src/main/java/org/apache/impala/authorization/sentry/SentryProxy.java
index d2f7c9a..fa6265d 100644
--- a/fe/src/main/java/org/apache/impala/authorization/sentry/SentryProxy.java
+++ b/fe/src/main/java/org/apache/impala/authorization/sentry/SentryProxy.java
@@ -337,7 +337,7 @@ public class SentryProxy {
       TPrivilege thriftPriv =
           SentryPolicyService.sentryPrivilegeToTPrivilege(sentryPriv, principal);
       String privilegeName = PrincipalPrivilege.buildPrivilegeName(thriftPriv);
-      privilegesToRemove.remove(privilegeName.toLowerCase());
+      privilegesToRemove.remove(privilegeName);
       PrincipalPrivilege existingPrincipalPriv = principal.getPrivilege(privilegeName);
       // We already know about this privilege (privileges cannot be modified).
       if (existingPrincipalPriv != null &&
diff --git a/fe/src/main/java/org/apache/impala/catalog/Catalog.java b/fe/src/main/java/org/apache/impala/catalog/Catalog.java
index a7615af..3b22865 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Catalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Catalog.java
@@ -579,9 +579,8 @@ public abstract class Catalog implements AutoCloseable {
         // The combination of privilege name + principal ID + principal type is
         // guaranteed to be unique.
         return "PRIVILEGE:" +
-            PrincipalPrivilege.buildPrivilegeName(catalogObject.getPrivilege())
-                .toLowerCase() + "." +
-            Integer.toString(catalogObject.getPrivilege().getPrincipal_id()) + "." +
+            PrincipalPrivilege.buildPrivilegeName(catalogObject.getPrivilege()) + "." +
+            catalogObject.getPrivilege().getPrincipal_id() + "." +
             catalogObject.getPrivilege().getPrincipal_type();
       case HDFS_CACHE_POOL:
         return "HDFS_CACHE_POOL:" +
diff --git a/fe/src/main/java/org/apache/impala/catalog/CatalogObjectCache.java b/fe/src/main/java/org/apache/impala/catalog/CatalogObjectCache.java
index d613cf9..d9bfc14 100644
--- a/fe/src/main/java/org/apache/impala/catalog/CatalogObjectCache.java
+++ b/fe/src/main/java/org/apache/impala/catalog/CatalogObjectCache.java
@@ -155,4 +155,4 @@ public class CatalogObjectCache<T extends CatalogObject> implements Iterable<T>
   public Iterator<T> iterator() {
     return metadataCache_.values().iterator();
   }
-}
+}
\ No newline at end of file
diff --git a/fe/src/main/java/org/apache/impala/catalog/Principal.java b/fe/src/main/java/org/apache/impala/catalog/Principal.java
index 650eac4..1012fb2 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Principal.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Principal.java
@@ -17,6 +17,8 @@
 
 package org.apache.impala.catalog;
 
+import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -37,9 +39,9 @@ public abstract class Principal extends CatalogObjectImpl {
   private final TPrincipal principal_;
   // The last principal ID assigned, starts at 0.
   private static AtomicInteger principalId_ = new AtomicInteger(0);
-
+  // URIs are case sensitive, so we need to store privilege names in a case sensitive way.
   private final CatalogObjectCache<PrincipalPrivilege> principalPrivileges_ =
-      new CatalogObjectCache<>();
+      new CatalogObjectCache<>(false);
 
   protected Principal(String principalName, TPrincipalType type,
       Set<String> grantGroups) {
@@ -68,7 +70,7 @@ public abstract class Principal extends CatalogObjectImpl {
    * principal, an empty list is returned.
    */
   public List<PrincipalPrivilege> getPrivileges() {
-    return Lists.newArrayList(principalPrivileges_.getValues());
+    return new ArrayList<>(principalPrivileges_.getValues());
   }
 
   /**
@@ -76,7 +78,7 @@ public abstract class Principal extends CatalogObjectImpl {
    * granted to the principal.
    */
   public Set<String> getPrivilegeNames() {
-    return Sets.newHashSet(principalPrivileges_.keySet());
+    return new HashSet<>(principalPrivileges_.keySet());
   }
 
   /**
diff --git a/fe/src/main/java/org/apache/impala/catalog/PrincipalPrivilege.java b/fe/src/main/java/org/apache/impala/catalog/PrincipalPrivilege.java
index 2b97dc4..1b2da26 100644
--- a/fe/src/main/java/org/apache/impala/catalog/PrincipalPrivilege.java
+++ b/fe/src/main/java/org/apache/impala/catalog/PrincipalPrivilege.java
@@ -17,8 +17,6 @@
 
 package org.apache.impala.catalog;
 
-import java.util.List;
-
 import org.apache.impala.authorization.AuthorizationPolicy;
 import org.apache.impala.thrift.TCatalogObject;
 import org.apache.impala.thrift.TCatalogObjectType;
@@ -28,9 +26,7 @@ import org.apache.impala.thrift.TPrivilegeLevel;
 import org.apache.impala.thrift.TPrivilegeScope;
 import org.apache.log4j.Logger;
 
-import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
 
 /**
  * Represents a privilege that has been granted to a principal in an authorization policy.
@@ -38,11 +34,8 @@ import com.google.common.collect.Lists;
  */
 public class PrincipalPrivilege extends CatalogObjectImpl {
   private static final Logger LOG = Logger.getLogger(AuthorizationPolicy.class);
-  // These Joiners are used to build principal names. For simplicity, the principal name
-  // we use can also be sent to the Sentry library to perform authorization checks
-  // so we build them in the same format.
-  private static final Joiner AUTHORIZABLE_JOINER = Joiner.on("->");
-  private static final Joiner KV_JOINER = Joiner.on("=");
+  private static final String AUTHORIZABLE_SEPARATOR = "->";
+  private static final String KV_SEPARATOR = "=";
   private final TPrivilege privilege_;
 
   private PrincipalPrivilege(TPrivilege privilege) {
@@ -61,70 +54,89 @@ public class PrincipalPrivilege extends CatalogObjectImpl {
    * [ServerName=value]->[DbName=value]->[TableName=value]->[ColumnName=value]->[Action Granted=value]->[Grant Option=value]
    */
   public static String buildPrivilegeName(TPrivilege privilege) {
-    List<String> authorizable = Lists.newArrayListWithExpectedSize(4);
-    try {
-      Preconditions.checkNotNull(privilege);
-      TPrivilegeScope scope = privilege.getScope();
-      Preconditions.checkNotNull(scope);
-      switch (scope) {
-        case SERVER: {
-          authorizable.add(KV_JOINER.join("server", privilege.getServer_name().
-              toLowerCase()));
-          break;
-        }
-        case URI: {
-          authorizable.add(KV_JOINER.join("server", privilege.getServer_name().
-              toLowerCase()));
-          // (IMPALA-2695) URIs are case sensitive
-          authorizable.add(KV_JOINER.join("uri", privilege.getUri()));
-          break;
-        }
-        case DATABASE: {
-          authorizable.add(KV_JOINER.join("server", privilege.getServer_name().
-              toLowerCase()));
-          authorizable.add(KV_JOINER.join("db", privilege.getDb_name().
-              toLowerCase()));
-          break;
-        }
-        case TABLE: {
-          authorizable.add(KV_JOINER.join("server", privilege.getServer_name().
-              toLowerCase()));
-          authorizable.add(KV_JOINER.join("db", privilege.getDb_name().
-              toLowerCase()));
-          authorizable.add(KV_JOINER.join("table", privilege.getTable_name().
-              toLowerCase()));
-          break;
-        }
-        case COLUMN: {
-          authorizable.add(KV_JOINER.join("server", privilege.getServer_name().
-              toLowerCase()));
-          authorizable.add(KV_JOINER.join("db", privilege.getDb_name().
-              toLowerCase()));
-          authorizable.add(KV_JOINER.join("table", privilege.getTable_name().
-              toLowerCase()));
-          authorizable.add(KV_JOINER.join("column", privilege.getColumn_name().
-              toLowerCase()));
-          break;
-        }
-        default: {
-          throw new UnsupportedOperationException(
-              "Unknown privilege scope: " + scope.toString());
-        }
+    StringBuilder privilegeName = new StringBuilder();
+    Preconditions.checkNotNull(privilege);
+    TPrivilegeScope scope = privilege.getScope();
+    Preconditions.checkNotNull(scope);
+    switch (scope) {
+      case SERVER: {
+        privilegeName.append("server")
+            .append(KV_SEPARATOR)
+            .append(privilege.getServer_name().toLowerCase());
+        break;
       }
-
-      // The ALL privilege is always implied and does not need to be included as part
-      // of the name.
-      if (privilege.getPrivilege_level() != TPrivilegeLevel.ALL) {
-        authorizable.add(KV_JOINER.join("action",
-            privilege.getPrivilege_level().toString()));
+      case URI: {
+        privilegeName.append("server")
+            .append(KV_SEPARATOR)
+            .append(privilege.getServer_name().toLowerCase());
+        privilegeName.append(AUTHORIZABLE_SEPARATOR);
+        // (IMPALA-2695) URIs are case sensitive
+        privilegeName.append("uri")
+            .append(KV_SEPARATOR)
+            .append(privilege.getUri());
+        break;
+      }
+      case DATABASE: {
+        privilegeName.append("server")
+            .append(KV_SEPARATOR)
+            .append(privilege.getServer_name().toLowerCase());
+        privilegeName.append(AUTHORIZABLE_SEPARATOR);
+        privilegeName.append("db")
+            .append(KV_SEPARATOR)
+            .append(privilege.getDb_name().toLowerCase());
+        break;
+      }
+      case TABLE: {
+        privilegeName.append("server")
+            .append(KV_SEPARATOR)
+            .append(privilege.getServer_name().toLowerCase());
+        privilegeName.append(AUTHORIZABLE_SEPARATOR);
+        privilegeName.append("db")
+            .append(KV_SEPARATOR)
+            .append(privilege.getDb_name().toLowerCase());
+        privilegeName.append(AUTHORIZABLE_SEPARATOR);
+        privilegeName.append("table")
+            .append(KV_SEPARATOR)
+            .append(privilege.getTable_name().toLowerCase());
+        break;
       }
-      authorizable.add(KV_JOINER.join("grantoption", privilege.isHas_grant_opt()));
-      return AUTHORIZABLE_JOINER.join(authorizable);
-    } catch (Exception e) {
-      // Should never make it here unless the privilege is malformed.
-      LOG.error("ERROR: ", e);
-      return null;
+      case COLUMN: {
+        privilegeName.append("server")
+            .append(KV_SEPARATOR)
+            .append(privilege.getServer_name().toLowerCase());
+        privilegeName.append(AUTHORIZABLE_SEPARATOR);
+        privilegeName.append("db")
+            .append(KV_SEPARATOR)
+            .append(privilege.getDb_name().toLowerCase());
+        privilegeName.append(AUTHORIZABLE_SEPARATOR);
+        privilegeName.append("table")
+            .append(KV_SEPARATOR)
+            .append(privilege.getTable_name().toLowerCase());
+        privilegeName.append(AUTHORIZABLE_SEPARATOR);
+        privilegeName.append("column")
+            .append(KV_SEPARATOR)
+            .append(privilege.getColumn_name().toLowerCase());
+        break;
+      }
+      default: {
+        throw new UnsupportedOperationException(
+            "Unknown privilege scope: " + scope.toString());
+      }
+    }
+
+    // The ALL privilege is always implied and does not need to be included as part
+    // of the name.
+    if (privilege.getPrivilege_level() != TPrivilegeLevel.ALL) {
+      privilegeName.append(AUTHORIZABLE_SEPARATOR);
+      privilegeName.append("action")
+          .append(KV_SEPARATOR)
+          .append(privilege.getPrivilege_level().toString().toLowerCase());
     }
+    privilegeName.append(AUTHORIZABLE_SEPARATOR);
+    privilegeName.append("grantoption")
+        .append(KV_SEPARATOR)
+        .append(privilege.isHas_grant_opt());
+    return privilegeName.toString();
   }
 
   /**
diff --git a/tests/authorization/test_grant_revoke.py b/tests/authorization/test_grant_revoke.py
index 0dcd423..542f749 100644
--- a/tests/authorization/test_grant_revoke.py
+++ b/tests/authorization/test_grant_revoke.py
@@ -347,10 +347,10 @@ class TestGrantRevoke(SentryCacheTestSuite):
                     "org.apache.impala.service.CustomClusterResourceAuthorizationProvider"
                     .format(SENTRY_CONFIG_FILE_OO),
       sentry_config=SENTRY_CONFIG_FILE_OO)
-  def test_same_name_for_role_and_user_invalidate_metadata(self, testid_checksum):
+  def test_same_name_for_role_and_user_invalidate_metadata(self, unique_name):
     """IMPALA-7729: Tests running invalidate metadata with for the same name for both
     user and role should not cause Impala query to hang."""
-    db_prefix = testid_checksum
+    db_prefix = unique_name
     role_name = "foobar"
     # Use two different clients so that the sessions will use two different user names.
     foobar_impalad_client = self.create_impala_client()
@@ -380,3 +380,46 @@ class TestGrantRevoke(SentryCacheTestSuite):
       self.client.execute("drop database {0}_db1".format(db_prefix))
       self.client.execute("drop database {0}_db2".format(db_prefix))
       self.client.execute("drop role {0}".format(role_name))
+
+  @pytest.mark.execute_serially
+  @SentryCacheTestSuite.with_args(
+      impalad_args="--server_name=server1",
+      catalogd_args="--sentry_config={0}".format(SENTRY_CONFIG_FILE),
+      sentry_config=SENTRY_CONFIG_FILE)
+  def test_uri_privilege_case_sensitive(self, unique_role):
+    """Tests that revoking on a granted URI with a different case should not be
+       allowed."""
+    try:
+      self.client.execute("create role {0}".format(unique_role))
+      self.client.execute("grant role {0} to group `{1}`"
+                          .format(unique_role, grp.getgrnam(getuser()).gr_name))
+      self.client.execute("grant refresh on server to {0}".format(unique_role))
+      self.client.execute("grant all on uri '/test-warehouse/FOO' to {0}"
+                          .format(unique_role))
+      self.client.execute("grant all on uri '/test-warehouse/foo' to {0}"
+                          .format(unique_role))
+      result = self.client.execute("show grant role {0}".format(unique_role))
+
+      # Both URIs should exist.
+      assert any("/test-warehouse/FOO" in x for x in result.data)
+      assert any("/test-warehouse/foo" in x for x in result.data)
+
+      self.client.execute("refresh authorization")
+      # After refresh authorization, we should expect both URIs to still exist.
+      assert any("/test-warehouse/FOO" in x for x in result.data)
+      assert any("/test-warehouse/foo" in x for x in result.data)
+
+      self.client.execute("revoke all on uri '/test-warehouse/foo' from {0}"
+                          .format(unique_role))
+
+      result = self.client.execute("show grant role {0}".format(unique_role))
+      # Only one URI should exist.
+      assert any("/test-warehouse/FOO" in x for x in result.data)
+      assert not any("/test-warehouse/foo" in x for x in result.data)
+
+      self.client.execute("refresh authorization")
+      # After refresh authorization, the one URI should still exist.
+      assert any("/test-warehouse/FOO" in x for x in result.data)
+      assert not any("/test-warehouse/foo" in x for x in result.data)
+    finally:
+      self.client.execute("drop role {0}".format(unique_role))