You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/06/21 04:35:25 UTC

[impala] 01/04: IMPALA-8443: Record time spent in authorization in the runtime profile

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 65175a23b2ed73d91734607d474ba80e15323c9b
Author: Tamas Mate <tm...@cloudera.com>
AuthorDate: Mon Jun 17 08:30:27 2019 -0700

    IMPALA-8443: Record time spent in authorization in the runtime profile
    
    The analysis and authorization is handled together as authorization
    depends on the results of the analysis. The timeline EventSequence is
    moved into the AuthorizationContext and the markEvent is called during
    the postAuthorize method.
    
    In some cases the EventSequence is not available when the
    AuthorizationContext is created therefore it is wrapped in Optional.
    
    Change-Id: I5bb85e57fcc75d41f3eb2911e6d375e0da6f82ae
    Reviewed-on: http://gerrit.cloudera.org:8080/13353
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/analysis/AnalysisContext.java   |  4 +++-
 .../impala/authorization/AuthorizationChecker.java    |  6 +++++-
 .../impala/authorization/AuthorizationContext.java    | 15 ++++++++++++---
 .../authorization/BaseAuthorizationChecker.java       | 19 +++++++++++++------
 .../authorization/NoopAuthorizationFactory.java       |  6 ++++--
 .../ranger/RangerAuthorizationChecker.java            | 10 +++++++---
 .../ranger/RangerAuthorizationContext.java            |  6 +++++-
 .../sentry/SentryAuthorizationChecker.java            |  6 ++++--
 .../main/java/org/apache/impala/service/Frontend.java |  3 +--
 .../java/org/apache/impala/util/EventSequence.java    |  7 +++++--
 .../org/apache/impala/common/FrontendTestBase.java    |  7 +++++--
 tests/observability/test_log_fragments.py             |  3 ++-
 tests/query_test/test_observability.py                | 19 ++++++++++++++++++-
 13 files changed, 84 insertions(+), 27 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
index 1c1e059..2353bfc 100644
--- a/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
+++ b/fe/src/main/java/org/apache/impala/analysis/AnalysisContext.java
@@ -21,6 +21,7 @@ import static org.apache.impala.analysis.ToSqlOptions.REWRITTEN;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Optional;
 import java.util.Set;
 
 import org.apache.impala.analysis.StmtMetadataLoader.StmtTableCache;
@@ -415,6 +416,7 @@ public class AnalysisContext {
     } catch (AnalysisException e) {
       analysisException = e;
     }
+    timeline_.markEvent("Analysis finished");
 
     // Authorize statement and record exception. Authorization relies on information
     // collected during analysis.
@@ -425,7 +427,7 @@ public class AnalysisContext {
       authzCtx = authzChecker.createAuthorizationContext(true,
           clientRequest.isSetRedacted_stmt() ?
               clientRequest.getRedacted_stmt() : clientRequest.getStmt(),
-          queryCtx_.getSession());
+          queryCtx_.getSession(), Optional.of(timeline_));
       authzChecker.authorize(authzCtx, analysisResult_, catalog_);
     } catch (AuthorizationException e) {
       authException = e;
diff --git a/fe/src/main/java/org/apache/impala/authorization/AuthorizationChecker.java b/fe/src/main/java/org/apache/impala/authorization/AuthorizationChecker.java
index e582d2a..8038913 100644
--- a/fe/src/main/java/org/apache/impala/authorization/AuthorizationChecker.java
+++ b/fe/src/main/java/org/apache/impala/authorization/AuthorizationChecker.java
@@ -21,7 +21,9 @@ import org.apache.impala.analysis.AnalysisContext.AnalysisResult;
 import org.apache.impala.catalog.FeCatalog;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.thrift.TSessionState;
+import org.apache.impala.util.EventSequence;
 
+import java.util.Optional;
 import java.util.Set;
 
 /**
@@ -42,9 +44,11 @@ public interface AuthorizationChecker {
    * @param doAudits a flag whether or not to do the audits
    * @param sqlStmt the SQL statement to be logged for auditing
    * @param sessionState the client session state
+   * @param timeline optional timeline to mark events in the query profile
    */
   AuthorizationContext createAuthorizationContext(boolean doAudits, String sqlStmt,
-      TSessionState sessionState) throws InternalException;
+      TSessionState sessionState, Optional<EventSequence> timeline)
+      throws InternalException;
 
   /**
    * Authorize an analyzed statement.
diff --git a/fe/src/main/java/org/apache/impala/authorization/AuthorizationContext.java b/fe/src/main/java/org/apache/impala/authorization/AuthorizationContext.java
index 3ed59ce..1930dc2 100644
--- a/fe/src/main/java/org/apache/impala/authorization/AuthorizationContext.java
+++ b/fe/src/main/java/org/apache/impala/authorization/AuthorizationContext.java
@@ -17,14 +17,23 @@
 
 package org.apache.impala.authorization;
 
+import org.apache.impala.util.EventSequence;
+
+import java.util.Optional;
+
 /**
  * An authorization context class that is created per authorization check.
  */
 public class AuthorizationContext {
-  private final long startTime_ = System.currentTimeMillis();
+  private final Optional<EventSequence> timeline_;
+
+  public AuthorizationContext(Optional<EventSequence> timeline) {
+    this.timeline_ = timeline;
+  }
 
   /**
-   * Gets the start time when the authorization check started.
+   * Gets the timeline which can be used to mark events in the query profile.
    */
-  public long getStartTime() { return startTime_; }
+  public Optional<EventSequence> getTimeline() { return timeline_; }
+
 }
diff --git a/fe/src/main/java/org/apache/impala/authorization/BaseAuthorizationChecker.java b/fe/src/main/java/org/apache/impala/authorization/BaseAuthorizationChecker.java
index 255a184..2104ed7 100644
--- a/fe/src/main/java/org/apache/impala/authorization/BaseAuthorizationChecker.java
+++ b/fe/src/main/java/org/apache/impala/authorization/BaseAuthorizationChecker.java
@@ -26,6 +26,7 @@ import org.apache.impala.catalog.FeCatalog;
 import org.apache.impala.catalog.FeDb;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.common.Pair;
+import org.apache.impala.util.EventSequence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -35,6 +36,7 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.Optional;
 
 /**
  * A base class for the {@link AuthorizationChecker}.
@@ -60,11 +62,12 @@ public abstract class BaseAuthorizationChecker implements AuthorizationChecker {
    */
   @Override
   public boolean hasAccess(User user, PrivilegeRequest request) throws InternalException {
-    // We don't want to do an audit log here. This method is used by "show databases",
-    // "show tables", "describe" to filter out unauthorized database, table, or column
-    // names.
+    // We don't want to do an audit log or profile events logged here. This method is used
+    // by "show databases", "show tables", "describe" to filter out unauthorized database,
+    // table, or column names.
     return hasAccess(createAuthorizationContext(false /*no audit log*/,
-        null /*no SQL statement*/, null /*no session state*/), user, request);
+        null /*no SQL statement*/, null /*no session state*/,
+        Optional.empty()), user, request);
   }
 
   private boolean hasAccess(AuthorizationContext authzCtx, User user,
@@ -86,8 +89,12 @@ public abstract class BaseAuthorizationChecker implements AuthorizationChecker {
    */
   @Override
   public void postAuthorize(AuthorizationContext authzCtx) {
-    long durationMs = System.currentTimeMillis() - authzCtx.getStartTime();
-    LOG.debug("Authorization check took {} ms", durationMs);
+    if (authzCtx.getTimeline().isPresent()) {
+      EventSequence timeline = authzCtx.getTimeline().get();
+      long durationMs = timeline.markEvent(String.format("Authorization finished (%s)",
+          config_.getProviderName())) / 1000;
+      LOG.debug("Authorization check took {} ms", durationMs);
+    }
   }
 
   /**
diff --git a/fe/src/main/java/org/apache/impala/authorization/NoopAuthorizationFactory.java b/fe/src/main/java/org/apache/impala/authorization/NoopAuthorizationFactory.java
index 2a371ea..33f2414 100644
--- a/fe/src/main/java/org/apache/impala/authorization/NoopAuthorizationFactory.java
+++ b/fe/src/main/java/org/apache/impala/authorization/NoopAuthorizationFactory.java
@@ -36,9 +36,11 @@ import org.apache.impala.thrift.TShowGrantPrincipalParams;
 import org.apache.impala.thrift.TShowRolesParams;
 import org.apache.impala.thrift.TShowRolesResult;
 import org.apache.impala.util.ClassUtil;
+import org.apache.impala.util.EventSequence;
 
 import java.util.Collections;
 import java.util.List;
+import java.util.Optional;
 import java.util.Set;
 import java.util.function.Supplier;
 
@@ -207,8 +209,8 @@ public class NoopAuthorizationFactory implements AuthorizationFactory {
 
       @Override
       public AuthorizationContext createAuthorizationContext(boolean doAudits,
-          String sqlStmt, TSessionState sessionState) {
-        return new AuthorizationContext();
+          String sqlStmt, TSessionState sessionState, Optional<EventSequence> timeline) {
+        return new AuthorizationContext(timeline);
       }
     };
   }
diff --git a/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationChecker.java b/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationChecker.java
index 13e8261..7f76cad 100644
--- a/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationChecker.java
+++ b/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationChecker.java
@@ -35,6 +35,7 @@ import org.apache.impala.authorization.User;
 import org.apache.impala.catalog.FeCatalog;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.thrift.TSessionState;
+import org.apache.impala.util.EventSequence;
 import org.apache.ranger.audit.model.AuthzAuditEvent;
 import org.apache.ranger.plugin.policyengine.RangerAccessRequest;
 import org.apache.ranger.plugin.policyengine.RangerAccessRequestImpl;
@@ -48,6 +49,7 @@ import java.util.ArrayList;
 import java.util.EnumSet;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
 
@@ -208,8 +210,10 @@ public class RangerAuthorizationChecker extends BaseAuthorizationChecker {
 
   @Override
   public AuthorizationContext createAuthorizationContext(boolean doAudits,
-      String sqlStmt, TSessionState sessionState) throws InternalException {
-    RangerAuthorizationContext authzCtx = new RangerAuthorizationContext(sessionState);
+      String sqlStmt, TSessionState sessionState, Optional<EventSequence> timeline)
+      throws InternalException {
+    RangerAuthorizationContext authzCtx =
+        new RangerAuthorizationContext(sessionState, timeline);
     if (doAudits) {
       // Any statement that goes through {@link authorize} will need to have audit logs.
       if (sqlStmt != null) {
@@ -235,7 +239,7 @@ public class RangerAuthorizationChecker extends BaseAuthorizationChecker {
     // case 4: table (select) ERROR, columns (select) ERROR --> only add the first column
     //                                                          event
     RangerAuthorizationContext tmpCtx = new RangerAuthorizationContext(
-        originalCtx.getSessionState());
+        originalCtx.getSessionState(), originalCtx.getTimeline());
     tmpCtx.setAuditHandler(new RangerBufferAuditHandler(
         originalAuditHandler.getSqlStmt(), originalAuditHandler.getClusterName(),
         originalAuditHandler.getClientIp()));
diff --git a/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationContext.java b/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationContext.java
index f4ad66a..afdf395 100644
--- a/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationContext.java
+++ b/fe/src/main/java/org/apache/impala/authorization/ranger/RangerAuthorizationContext.java
@@ -20,8 +20,10 @@ package org.apache.impala.authorization.ranger;
 import com.google.common.base.Preconditions;
 import org.apache.impala.authorization.AuthorizationContext;
 import org.apache.impala.thrift.TSessionState;
+import org.apache.impala.util.EventSequence;
 
 import javax.annotation.Nullable;
+import java.util.Optional;
 
 /**
  * Ranger specific {@link AuthorizationContext}.
@@ -31,7 +33,9 @@ public class RangerAuthorizationContext extends AuthorizationContext {
   // Audit handler can be null meaning we don't want to do an audit log.
   private @Nullable RangerBufferAuditHandler auditHandler_;
 
-  public RangerAuthorizationContext(TSessionState sessionState) {
+  public RangerAuthorizationContext(TSessionState sessionState,
+      Optional<EventSequence> timeline) {
+    super(timeline);
     sessionState_ = sessionState;
   }
 
diff --git a/fe/src/main/java/org/apache/impala/authorization/sentry/SentryAuthorizationChecker.java b/fe/src/main/java/org/apache/impala/authorization/sentry/SentryAuthorizationChecker.java
index 46a393f..cd0f5ff 100644
--- a/fe/src/main/java/org/apache/impala/authorization/sentry/SentryAuthorizationChecker.java
+++ b/fe/src/main/java/org/apache/impala/authorization/sentry/SentryAuthorizationChecker.java
@@ -30,6 +30,7 @@ import org.apache.impala.authorization.User;
 import org.apache.impala.authorization.AuthorizationPolicy;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.thrift.TSessionState;
+import org.apache.impala.util.EventSequence;
 import org.apache.sentry.core.common.ActiveRoleSet;
 import org.apache.sentry.core.common.Subject;
 import org.apache.sentry.core.model.db.DBModelAuthorizable;
@@ -38,6 +39,7 @@ import org.apache.sentry.provider.common.ResourceAuthorizationProvider;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.List;
+import java.util.Optional;
 import java.util.Set;
 
 /**
@@ -89,8 +91,8 @@ public class SentryAuthorizationChecker extends BaseAuthorizationChecker {
 
   @Override
   public AuthorizationContext createAuthorizationContext(boolean doAudits,
-      String sqlStmt, TSessionState sessionState) {
-    return new AuthorizationContext();
+      String sqlStmt, TSessionState sessionState, Optional<EventSequence> timeline) {
+    return new AuthorizationContext(timeline);
   }
 
   /*
diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java
index d0979c0..fbc5d36 100644
--- a/fe/src/main/java/org/apache/impala/service/Frontend.java
+++ b/fe/src/main/java/org/apache/impala/service/Frontend.java
@@ -1248,8 +1248,7 @@ public class Frontend {
     AnalysisContext analysisCtx = new AnalysisContext(queryCtx, authzFactory_, timeline);
     AnalysisResult analysisResult =
         analysisCtx.analyzeAndAuthorize(stmt, stmtTableCache, authzChecker_.get());
-    LOG.info("Analysis finished.");
-    timeline.markEvent("Analysis finished");
+    LOG.info("Analysis and authorization finished.");
     Preconditions.checkNotNull(analysisResult.getStmt());
 
     TExecRequest result = createBaseExecRequest(queryCtx, analysisResult);
diff --git a/fe/src/main/java/org/apache/impala/util/EventSequence.java b/fe/src/main/java/org/apache/impala/util/EventSequence.java
index 92552d1..d137208 100644
--- a/fe/src/main/java/org/apache/impala/util/EventSequence.java
+++ b/fe/src/main/java/org/apache/impala/util/EventSequence.java
@@ -41,11 +41,14 @@ public class EventSequence {
 
   /**
    * Saves an event at the current time with the given label.
+   * It returns the duration in nano seconds between the last and the current event.
    */
-  public void markEvent(String label) {
+  public long markEvent(String label) {
     // Timestamps should be in ns resolution
-    timestamps_.add(System.nanoTime() - startTime_);
+    long durationNs = System.nanoTime() - startTime_;
+    timestamps_.add(durationNs);
     labels_.add(label);
+    return durationNs;
   }
 
   // For testing
diff --git a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
index 9771fe4..71285eb 100644
--- a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
+++ b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.fail;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Optional;
 import java.util.Set;
 import java.util.function.Supplier;
 
@@ -58,6 +59,7 @@ import org.apache.impala.testutil.ImpaladTestCatalog;
 import org.apache.impala.thrift.TAccessEvent;
 import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.thrift.TSessionState;
+import org.apache.impala.util.EventSequence;
 import org.junit.Assert;
 
 import com.google.common.base.Preconditions;
@@ -377,8 +379,9 @@ public class FrontendTestBase extends AbstractFrontendTest {
 
           @Override
           public AuthorizationContext createAuthorizationContext(boolean doAudits,
-              String sqlStmt, TSessionState sessionState) {
-            return new AuthorizationContext();
+              String sqlStmt, TSessionState sessionState,
+              Optional<EventSequence> timeline) {
+            return new AuthorizationContext(timeline);
           }
         };
       }
diff --git a/tests/observability/test_log_fragments.py b/tests/observability/test_log_fragments.py
index b3d6afc..81fbf53 100644
--- a/tests/observability/test_log_fragments.py
+++ b/tests/observability/test_log_fragments.py
@@ -42,7 +42,8 @@ class TestLogFragments(ImpalaTestSuite):
     self.execute_query("select 1")
     # Logging may be buffered, so sleep to wait out the buffering.
     time.sleep(6)
-    self.assert_impalad_log_contains('INFO', query_id + "] Analysis finished.")
+    self.assert_impalad_log_contains('INFO', query_id +
+      "] Analysis and authorization finished.")
     assert query_id.endswith("000")
     # Looks for a fragment instance that doesn't end with "0" to make sure instances
     # are getting propagated too.
diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py
index 005a661..fef2fef 100644
--- a/tests/query_test/test_observability.py
+++ b/tests/query_test/test_observability.py
@@ -279,7 +279,24 @@ class TestObservability(ImpalaTestSuite):
     assert results.runtime_profile.count("AGGREGATION_NODE") == 2
     assert results.runtime_profile.count("PLAN_ROOT_SINK") == 2
 
-  def test_query_profile_contains_query_events(self):
+  def test_query_profile_contains_query_compilation_events(self):
+    """Test that the expected events show up in a query profile.
+       If the table metadata is not cached this test will fail, as the metadata load
+       creates lines dynamically."""
+    event_regexes = [r'Query Compilation:',
+        r'Metadata of all .* tables cached:',
+        r'Analysis finished:',
+        r'Authorization finished (.*):',
+        r'Value transfer graph computed:',
+        r'Single node plan created:',
+        r'Runtime filters computed:',
+        r'Distributed plan created:',
+        r'Planning finished:']
+    query = "select * from functional.alltypes"
+    runtime_profile = self.execute_query(query).runtime_profile
+    self.__verify_profile_event_sequence(event_regexes, runtime_profile)
+
+  def test_query_profile_contains_query_timeline_events(self):
     """Test that the expected events show up in a query profile."""
     event_regexes = [r'Query Timeline:',
         r'Query submitted:',