You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/03/27 18:47:15 UTC

[1/7] impala git commit: IMPALA-6743: bump from 2.11 to 2.12

Repository: impala
Updated Branches:
  refs/heads/2.x 57f95865c -> 3d52f8c99


IMPALA-6743: bump from 2.11 to 2.12

Next release is 2.12 so update the 2.x branch to
refer to 2.12 (2.11 has already been released).

Change-Id: Iaeaf230bf6f0cbf299edd4cf5ede4cb808523f1c
Reviewed-on: http://gerrit.cloudera.org:8080/9809
Reviewed-by: Lars Volker <lv...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/ae20eb44
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/ae20eb44
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/ae20eb44

Branch: refs/heads/2.x
Commit: ae20eb4470ae6e41371f70894fd666355fb56b14
Parents: 57f9586
Author: Vuk Ercegovac <ve...@cloudera.com>
Authored: Mon Mar 26 15:33:45 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Mar 27 02:50:09 2018 +0000

----------------------------------------------------------------------
 bin/save-version.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/ae20eb44/bin/save-version.sh
----------------------------------------------------------------------
diff --git a/bin/save-version.sh b/bin/save-version.sh
index 1b1507a..9ead5be 100755
--- a/bin/save-version.sh
+++ b/bin/save-version.sh
@@ -21,7 +21,7 @@
 # Note: for internal (aka pre-release) versions, the version should have
 # "-INTERNAL" appended. Parts of the code will look for this to distinguish
 # between released and internal versions.
-VERSION=2.11.0-SNAPSHOT
+VERSION=2.12.0-SNAPSHOT
 GIT_HASH=$(git rev-parse HEAD 2> /dev/null)
 if [ -z $GIT_HASH ]
 then


[4/7] impala git commit: IMPALA-6614: ClientRequestState should use HS2 TOperationState

Posted by ta...@apache.org.
IMPALA-6614: ClientRequestState should use HS2 TOperationState

Currently it uses beeswax's QueryState enum, but the TOperationState
is a superset. In order to remove dependencies on beeswax, and also
set things up for a future change to use the TOperationState explicit
CANCELED_STATE (see IMPALA-1262), migrate CLR to use TOperationState.

The intent of this change is to make no client visible change.

Change-Id: I36287eaf8f1dac23c306b470f95f379dfdc6bb5b
Reviewed-on: http://gerrit.cloudera.org:8080/9501
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/46c95b52
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/46c95b52
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/46c95b52

Branch: refs/heads/2.x
Commit: 46c95b52075454c99e19523e132a8a026822b9ff
Parents: f05cf90
Author: Dan Hecht <dh...@cloudera.com>
Authored: Mon Mar 5 16:08:28 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Mar 27 03:35:00 2018 +0000

----------------------------------------------------------------------
 be/src/service/client-request-state.cc  | 55 ++++++++++++++++++----------
 be/src/service/client-request-state.h   | 54 ++++++++++++++++-----------
 be/src/service/impala-beeswax-server.cc | 11 +++---
 be/src/service/impala-hs2-server.cc     | 19 ++--------
 be/src/service/impala-http-handler.cc   |  3 +-
 be/src/service/impala-server.cc         |  9 +++--
 6 files changed, 84 insertions(+), 67 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/46c95b52/be/src/service/client-request-state.cc
----------------------------------------------------------------------
diff --git a/be/src/service/client-request-state.cc b/be/src/service/client-request-state.cc
index 12bf3b7..29c493e 100644
--- a/be/src/service/client-request-state.cc
+++ b/be/src/service/client-request-state.cc
@@ -101,7 +101,7 @@ ClientRequestState::ClientRequestState(
       TimePrecision::Nanosecond));
   summary_profile_->AddInfoString("End Time", "");
   summary_profile_->AddInfoString("Query Type", "N/A");
-  summary_profile_->AddInfoString("Query State", PrintQueryState(query_state_));
+  summary_profile_->AddInfoString("Query State", PrintQueryState(BeeswaxQueryState()));
   summary_profile_->AddInfoString("Query Status", "OK");
   summary_profile_->AddInfoString("Impala Version", GetVersionString(/* compact */ true));
   summary_profile_->AddInfoString("User", effective_user());
@@ -214,7 +214,7 @@ Status ClientRequestState::Exec(TExecRequest* exec_request) {
     }
     default:
       stringstream errmsg;
-      errmsg << "Unknown  exec request stmt type: " << exec_request_.stmt_type;
+      errmsg << "Unknown exec request stmt type: " << exec_request_.stmt_type;
       return Status(errmsg.str());
   }
 }
@@ -643,8 +643,11 @@ void ClientRequestState::Wait() {
     discard_result(UpdateQueryStatus(status));
   }
   if (status.ok()) {
-    UpdateNonErrorQueryState(beeswax::QueryState::FINISHED);
+    UpdateNonErrorOperationState(TOperationState::FINISHED_STATE);
   }
+  // UpdateQueryStatus() or UpdateNonErrorOperationState() have updated operation_state_.
+  DCHECK(operation_state_ == TOperationState::FINISHED_STATE ||
+      operation_state_ == TOperationState::ERROR_STATE);
 }
 
 Status ClientRequestState::WaitInternal() {
@@ -719,17 +722,18 @@ Status ClientRequestState::RestartFetch() {
   return Status::OK();
 }
 
-void ClientRequestState::UpdateNonErrorQueryState(
-    beeswax::QueryState::type query_state) {
+void ClientRequestState::UpdateNonErrorOperationState(
+    TOperationState::type operation_state) {
   lock_guard<mutex> l(lock_);
-  DCHECK(query_state != beeswax::QueryState::EXCEPTION);
-  if (query_state_ < query_state) UpdateQueryState(query_state);
+  DCHECK(operation_state == TOperationState::RUNNING_STATE
+      || operation_state == TOperationState::FINISHED_STATE);
+  if (operation_state_ < operation_state) UpdateOperationState(operation_state);
 }
 
 Status ClientRequestState::UpdateQueryStatus(const Status& status) {
   // Preserve the first non-ok status
   if (!status.ok() && query_status_.ok()) {
-    UpdateQueryState(beeswax::QueryState::EXCEPTION);
+    UpdateOperationState(TOperationState::ERROR_STATE);
     query_status_ = status;
     summary_profile_->AddInfoStringRedacted("Query Status", query_status_.GetDetail());
   }
@@ -739,12 +743,13 @@ Status ClientRequestState::UpdateQueryStatus(const Status& status) {
 
 Status ClientRequestState::FetchRowsInternal(const int32_t max_rows,
     QueryResultSet* fetched_rows) {
-  DCHECK(query_state_ != beeswax::QueryState::EXCEPTION);
+  // Wait() guarantees that we've transitioned at least to FINISHED_STATE (and any
+  // state beyond that should have a non-OK query_status_ set).
+  DCHECK(operation_state_ == TOperationState::FINISHED_STATE);
 
   if (eos_) return Status::OK();
 
   if (request_result_set_ != NULL) {
-    UpdateQueryState(beeswax::QueryState::FINISHED);
     int num_rows = 0;
     const vector<TResultRow>& all_rows = (*(request_result_set_.get()));
     // max_rows <= 0 means no limit
@@ -772,9 +777,6 @@ Status ClientRequestState::FetchRowsInternal(const int32_t max_rows,
     if (num_rows_fetched_from_cache >= max_rows) return Status::OK();
   }
 
-  // results will be ready after this call
-  UpdateQueryState(beeswax::QueryState::FINISHED);
-
   // Maximum number of rows to be fetched from the coord.
   int32_t max_coord_rows = max_rows;
   if (max_rows > 0) {
@@ -875,13 +877,13 @@ Status ClientRequestState::Cancel(bool check_inflight, const Status* cause) {
   Coordinator* coord;
   {
     lock_guard<mutex> lock(lock_);
-    // If the query is completed or cancelled, no need to update state.
-    bool already_done = eos_ || query_state_ == beeswax::QueryState::EXCEPTION;
+    // If the query has reached a terminal state, no need to update the state.
+    bool already_done = eos_ || operation_state_ == TOperationState::ERROR_STATE;
     if (!already_done && cause != NULL) {
       DCHECK(!cause->ok());
       discard_result(UpdateQueryStatus(*cause));
       query_events_->MarkEvent("Cancelled");
-      DCHECK_EQ(query_state_, beeswax::QueryState::EXCEPTION);
+      DCHECK_EQ(operation_state_, TOperationState::ERROR_STATE);
     }
     // Get a copy of the coordinator pointer while holding 'lock_'.
     coord = coord_.get();
@@ -1100,10 +1102,23 @@ void ClientRequestState::ClearResultCache() {
   result_cache_.reset(NULL);
 }
 
-void ClientRequestState::UpdateQueryState(
-    beeswax::QueryState::type query_state) {
-  query_state_ = query_state;
-  summary_profile_->AddInfoString("Query State", PrintQueryState(query_state_));
+void ClientRequestState::UpdateOperationState(
+    TOperationState::type operation_state) {
+  operation_state_ = operation_state;
+  summary_profile_->AddInfoString("Query State", PrintQueryState(BeeswaxQueryState()));
+}
+
+beeswax::QueryState::type ClientRequestState::BeeswaxQueryState() const {
+  switch (operation_state_) {
+    case TOperationState::INITIALIZED_STATE: return beeswax::QueryState::CREATED;
+    case TOperationState::RUNNING_STATE: return beeswax::QueryState::RUNNING;
+    case TOperationState::FINISHED_STATE: return beeswax::QueryState::FINISHED;
+    case TOperationState::ERROR_STATE: return beeswax::QueryState::EXCEPTION;
+    default: {
+      DCHECK(false) << "Add explicit translation for all used TOperationState values";
+      return beeswax::QueryState::EXCEPTION;
+    }
+  }
 }
 
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/46c95b52/be/src/service/client-request-state.h
----------------------------------------------------------------------
diff --git a/be/src/service/client-request-state.h b/be/src/service/client-request-state.h
index 0341ec5..0c05bc4 100644
--- a/be/src/service/client-request-state.h
+++ b/be/src/service/client-request-state.h
@@ -28,7 +28,7 @@
 #include "util/condition-variable.h"
 #include "util/runtime-profile.h"
 #include "gen-cpp/Frontend_types.h"
-#include "gen-cpp/Frontend_types.h"
+#include "gen-cpp/ImpalaHiveServer2Service.h"
 
 #include <boost/thread.hpp>
 #include <boost/unordered_set.hpp>
@@ -74,8 +74,8 @@ class ClientRequestState {
   Status Exec(const TMetadataOpRequest& exec_request) WARN_UNUSED_RESULT;
 
   /// Call this to ensure that rows are ready when calling FetchRows(). Updates the
-  /// query_status_, and advances query_state_ to FINISHED or EXCEPTION. Must be preceded
-  /// by call to Exec(). Waits for all child queries to complete. Takes lock_.
+  /// query_status_, and advances operation_state_ to FINISHED or EXCEPTION. Must be
+  /// preceded by call to Exec(). Waits for all child queries to complete. Takes lock_.
   void Wait();
 
   /// Calls Wait() asynchronously in a thread and returns immediately.
@@ -93,7 +93,7 @@ class ClientRequestState {
   /// Caller needs to hold fetch_rows_lock_ and lock_.
   /// Caller should verify that EOS has not be reached before calling.
   /// Must be preceeded by call to Wait() (or WaitAsync()/BlockOnWait()).
-  /// Also updates query_state_/status_ in case of error.
+  /// Also updates operation_state_/query_status_ in case of error.
   Status FetchRows(const int32_t max_rows, QueryResultSet* fetched_rows)
       WARN_UNUSED_RESULT;
 
@@ -106,11 +106,12 @@ class ClientRequestState {
   /// The caller must hold fetch_rows_lock_ and lock_.
   Status RestartFetch() WARN_UNUSED_RESULT;
 
-  /// Update query state if the requested state isn't already obsolete. This is only for
-  /// non-error states - if the query encounters an error the query status needs to be set
-  /// with information about the error so UpdateQueryStatus must be used instead.
-  /// Takes lock_.
-  void UpdateNonErrorQueryState(beeswax::QueryState::type query_state);
+  /// Update operation state if the requested state isn't already obsolete. This is
+  /// only for non-error states - if the query encounters an error the query status
+  /// needs to be set with information about the error so UpdateQueryStatus() must be
+  /// used instead. Takes lock_.
+  void UpdateNonErrorOperationState(
+      apache::hive::service::cli::thrift::TOperationState::type operation_state);
 
   /// Update the query status and the "Query Status" summary profile string.
   /// If current status is already != ok, no update is made (we preserve the first error)
@@ -124,7 +125,7 @@ class ClientRequestState {
 
   /// Cancels the child queries and the coordinator with the given cause.
   /// If cause is NULL, assume this was deliberately cancelled by the user.
-  /// Otherwise, sets state to EXCEPTION.
+  /// Otherwise, sets state to ERROR_STATE (TODO: use CANCELED_STATE).
   /// Does nothing if the query has reached EOS or already cancelled.
   ///
   /// Only returns an error if 'check_inflight' is true and the query is not yet
@@ -137,7 +138,7 @@ class ClientRequestState {
   void Done();
 
   /// Sets the API-specific (Beeswax, HS2) result cache and its size bound.
-  /// The given cache is owned by this query exec state, even if an error is returned.
+  /// The given cache is owned by this client request state, even if an error is returned.
   /// Returns a non-ok status if max_size exceeds the per-impalad allowed maximum.
   Status SetResultCache(QueryResultSet* cache, int64_t max_size) WARN_UNUSED_RESULT;
 
@@ -179,7 +180,12 @@ class ClientRequestState {
   }
   boost::mutex* lock() { return &lock_; }
   boost::mutex* fetch_rows_lock() { return &fetch_rows_lock_; }
-  beeswax::QueryState::type query_state() const { return query_state_; }
+  apache::hive::service::cli::thrift::TOperationState::type operation_state() const {
+    return operation_state_;
+  }
+  // Translate operation_state() to a beeswax::QueryState. TODO: remove calls to this
+  // and replace with uses of operation_state() directly.
+  beeswax::QueryState::type BeeswaxQueryState() const;
   const Status& query_status() const { return query_status_; }
   void set_result_metadata(const TResultSetMetadata& md) { result_metadata_ = md; }
   void set_user_profile_access(bool user_has_profile_access) {
@@ -335,15 +341,20 @@ class ClientRequestState {
 
   bool is_cancelled_ = false; // if true, Cancel() was called.
   bool eos_ = false;  // if true, there are no more rows to return
-  /// We enforce the invariant that query_status_ is not OK iff query_state_ is EXCEPTION,
-  /// given that lock_ is held. query_state_ should only be updated using
-  /// UpdateQueryState(), to ensure that the query profile is also updated.
-  beeswax::QueryState::type query_state_ = beeswax::QueryState::CREATED;
+
+  /// We enforce the invariant that query_status_ is not OK iff operation_state_ is
+  /// ERROR_STATE, given that lock_ is held. operation_state_ should only be updated
+  /// using UpdateOperationState(), to ensure that the query profile is also updated.
+  apache::hive::service::cli::thrift::TOperationState::type operation_state_ =
+      apache::hive::service::cli::thrift::TOperationState::INITIALIZED_STATE;
+
   Status query_status_;
   TExecRequest exec_request_;
+
   /// If true, effective_user() has access to the runtime profile and execution
   /// summary.
   bool user_has_profile_access_ = true;
+
   TResultSetMetadata result_metadata_; // metadata for select query
   RowBatch* current_batch_ = nullptr; // the current row batch; only applicable if coord is set
   int current_batch_row_ = 0 ; // number of rows fetched within the current batch
@@ -395,10 +406,10 @@ class ClientRequestState {
   /// Executes a LOAD DATA
   Status ExecLoadDataRequest() WARN_UNUSED_RESULT;
 
-  /// Core logic of Wait(). Does not update query_state_/status_.
+  /// Core logic of Wait(). Does not update operation_state_/query_status_.
   Status WaitInternal() WARN_UNUSED_RESULT;
 
-  /// Core logic of FetchRows(). Does not update query_state_/status_.
+  /// Core logic of FetchRows(). Does not update operation_state_/query_status_.
   /// Caller needs to hold fetch_rows_lock_ and lock_.
   Status FetchRowsInternal(const int32_t max_rows, QueryResultSet* fetched_rows)
       WARN_UNUSED_RESULT;
@@ -436,10 +447,11 @@ class ClientRequestState {
   /// This function is a no-op if the cache has already been cleared.
   void ClearResultCache();
 
-  /// Update the query state and the "Query State" summary profile string.
+  /// Update the operation state and the "Query State" summary profile string.
   /// Does not take lock_, but requires it: caller must ensure lock_
-  /// is taken before calling UpdateQueryState.
-  void UpdateQueryState(beeswax::QueryState::type query_state);
+  /// is taken before calling UpdateOperationState.
+  void UpdateOperationState(
+      apache::hive::service::cli::thrift::TOperationState::type operation_state);
 
   /// Gets the query options, their values and levels and populates the result set
   /// with them. It covers the subset of options for 'SET' and all of them for

http://git-wip-us.apache.org/repos/asf/impala/blob/46c95b52/be/src/service/impala-beeswax-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-beeswax-server.cc b/be/src/service/impala-beeswax-server.cc
index 2ac80b2..b827ff3 100644
--- a/be/src/service/impala-beeswax-server.cc
+++ b/be/src/service/impala-beeswax-server.cc
@@ -65,7 +65,7 @@ void ImpalaServer::query(QueryHandle& query_handle, const Query& query) {
   RAISE_IF_ERROR(Execute(&query_ctx, session, &request_state),
       SQLSTATE_SYNTAX_ERROR_OR_ACCESS_VIOLATION);
 
-  request_state->UpdateNonErrorQueryState(beeswax::QueryState::RUNNING);
+  request_state->UpdateNonErrorOperationState(TOperationState::RUNNING_STATE);
   // start thread to wait for results to become available, which will allow
   // us to advance query state to FINISHED or EXCEPTION
   Status status = request_state->WaitAsync();
@@ -110,7 +110,7 @@ void ImpalaServer::executeAndWait(QueryHandle& query_handle, const Query& query,
   RAISE_IF_ERROR(Execute(&query_ctx, session, &request_state),
       SQLSTATE_SYNTAX_ERROR_OR_ACCESS_VIOLATION);
 
-  request_state->UpdateNonErrorQueryState(beeswax::QueryState::RUNNING);
+  request_state->UpdateNonErrorOperationState(TOperationState::RUNNING_STATE);
   // Once the query is running do a final check for session closure and add it to the
   // set of in-flight queries.
   Status status = SetQueryInflight(session, request_state);
@@ -255,9 +255,10 @@ beeswax::QueryState::type ImpalaServer::get_state(const QueryHandle& handle) {
   // Take the lock to ensure that if the client sees a query_state == EXCEPTION, it is
   // guaranteed to see the error query_status.
   lock_guard<mutex> l(*request_state->lock());
-  DCHECK_EQ(request_state->query_state() == beeswax::QueryState::EXCEPTION,
+  beeswax::QueryState::type query_state = request_state->BeeswaxQueryState();
+  DCHECK_EQ(query_state == beeswax::QueryState::EXCEPTION,
       !request_state->query_status().ok());
-  return request_state->query_state();
+  return query_state;
 }
 
 void ImpalaServer::echo(string& echo_string, const string& input_string) {
@@ -293,7 +294,7 @@ void ImpalaServer::get_log(string& log, const LogContextId& context) {
     // Take the lock to ensure that if the client sees a query_state == EXCEPTION, it is
     // guaranteed to see the error query_status.
     lock_guard<mutex> l(*request_state->lock());
-    DCHECK_EQ(request_state->query_state() == beeswax::QueryState::EXCEPTION,
+    DCHECK_EQ(request_state->BeeswaxQueryState() == beeswax::QueryState::EXCEPTION,
         !request_state->query_status().ok());
     // If the query status is !ok, include the status error message at the top of the log.
     if (!request_state->query_status().ok()) {

http://git-wip-us.apache.org/repos/asf/impala/blob/46c95b52/be/src/service/impala-hs2-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-hs2-server.cc b/be/src/service/impala-hs2-server.cc
index e495dec..4381f81 100644
--- a/be/src/service/impala-hs2-server.cc
+++ b/be/src/service/impala-hs2-server.cc
@@ -86,18 +86,6 @@ namespace impala {
 
 const string IMPALA_RESULT_CACHING_OPT = "impala.resultset.cache.size";
 
-// Helper function to translate between Beeswax and HiveServer2 type
-static TOperationState::type QueryStateToTOperationState(
-    const beeswax::QueryState::type& query_state) {
-  switch (query_state) {
-    case beeswax::QueryState::CREATED: return TOperationState::INITIALIZED_STATE;
-    case beeswax::QueryState::RUNNING: return TOperationState::RUNNING_STATE;
-    case beeswax::QueryState::FINISHED: return TOperationState::FINISHED_STATE;
-    case beeswax::QueryState::EXCEPTION: return TOperationState::ERROR_STATE;
-    default: return TOperationState::UKNOWN_STATE;
-  }
-}
-
 void ImpalaServer::ExecuteMetadataOp(const THandleIdentifier& session_handle,
     TMetadataOpRequest* request, TOperationHandle* handle, thrift::TStatus* status) {
   TUniqueId session_id;
@@ -161,7 +149,7 @@ void ImpalaServer::ExecuteMetadataOp(const THandleIdentifier& session_handle,
     return;
   }
 
-  request_state->UpdateNonErrorQueryState(beeswax::QueryState::FINISHED);
+  request_state->UpdateNonErrorOperationState(TOperationState::FINISHED_STATE);
 
   Status inflight_status = SetQueryInflight(session, request_state);
   if (!inflight_status.ok()) {
@@ -466,7 +454,7 @@ void ImpalaServer::ExecuteStatement(TExecuteStatementResp& return_val,
         cache_num_rows);
     if (!status.ok()) goto return_error;
   }
-  request_state->UpdateNonErrorQueryState(beeswax::QueryState::RUNNING);
+  request_state->UpdateNonErrorOperationState(TOperationState::RUNNING_STATE);
   // Start thread to wait for results to become available.
   status = request_state->WaitAsync();
   if (!status.ok()) goto return_error;
@@ -651,8 +639,7 @@ void ImpalaServer::GetOperationStatus(TGetOperationStatusResp& return_val,
 
   {
     lock_guard<mutex> l(*request_state->lock());
-    TOperationState::type operation_state = QueryStateToTOperationState(
-        request_state->query_state());
+    TOperationState::type operation_state = request_state->operation_state();
     return_val.__set_operationState(operation_state);
     if (operation_state == TOperationState::ERROR_STATE) {
       DCHECK(!request_state->query_status().ok());

http://git-wip-us.apache.org/repos/asf/impala/blob/46c95b52/be/src/service/impala-http-handler.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-http-handler.cc b/be/src/service/impala-http-handler.cc
index 0156023..43b03d1 100644
--- a/be/src/service/impala-http-handler.cc
+++ b/be/src/service/impala-http-handler.cc
@@ -42,6 +42,7 @@
 
 #include "common/names.h"
 
+using namespace apache::hive::service::cli::thrift;
 using namespace apache::thrift;
 using namespace beeswax;
 using namespace impala;
@@ -757,7 +758,7 @@ void ImpalaHttpHandler::QuerySummaryHandler(bool include_json_plan, bool include
       // state lock to be acquired, since it could potentially be an expensive
       // call, if the table Catalog metadata loading is in progress. Instead
       // update the caller that the plan information is unavailable.
-      if (request_state->query_state() == beeswax::QueryState::CREATED) {
+      if (request_state->operation_state() == TOperationState::INITIALIZED_STATE) {
         document->AddMember(
             "plan_metadata_unavailable", "true", document->GetAllocator());
         return;

http://git-wip-us.apache.org/repos/asf/impala/blob/46c95b52/be/src/service/impala-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index dd43dd1..ea88d73 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -98,10 +98,11 @@ using boost::get_system_time;
 using boost::system_time;
 using boost::uuids::random_generator;
 using boost::uuids::uuid;
+using namespace apache::hive::service::cli::thrift;
 using namespace apache::thrift;
 using namespace apache::thrift::transport;
-using namespace boost::posix_time;
 using namespace beeswax;
+using namespace boost::posix_time;
 using namespace rapidjson;
 using namespace strings;
 
@@ -615,8 +616,8 @@ Status ImpalaServer::GetRuntimeProfileStr(const TUniqueId& query_id,
   {
     shared_ptr<ClientRequestState> request_state = GetClientRequestState(query_id);
     if (request_state.get() != nullptr) {
-      // For queries in CREATED state, the profile information isn't populated yet.
-      if (request_state->query_state() == beeswax::QueryState::CREATED) {
+      // For queries in INITIALIZED_STATE, the profile information isn't populated yet.
+      if (request_state->operation_state() == TOperationState::INITIALIZED_STATE) {
         return Status::Expected("Query plan is not ready.");
       }
       lock_guard<mutex> l(*request_state->lock());
@@ -1689,7 +1690,7 @@ ImpalaServer::QueryStateRecord::QueryStateRecord(const ClientRequestState& reque
     total_fragments = coord->progress().total();
     has_coord = true;
   }
-  query_state = request_state.query_state();
+  query_state = request_state.BeeswaxQueryState();
   num_rows_fetched = request_state.num_rows_fetched();
   query_status = request_state.query_status();
 


[6/7] impala git commit: Use "mvn -B" in builds to avoid dowloading progress bars in logs.

Posted by ta...@apache.org.
Use "mvn -B" in builds to avoid dowloading progress bars in logs.

Maven's batch (or non-interactive) mode prevents progress bar output
when Maven is downloading artifacts, which isn't generally useful.
Now that we keep Maven logs in logs/mvn/mvn.log, this makes
them slightly more tidy.

Change-Id: I5aa117272c2a86b63b0f9062099a4145324eb6fc
Reviewed-on: http://gerrit.cloudera.org:8080/9792
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/00f131c8
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/00f131c8
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/00f131c8

Branch: refs/heads/2.x
Commit: 00f131c80e71c9d4cb76491d06105a298c4f7555
Parents: dadcf55
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Fri Mar 23 15:59:04 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Mar 27 07:17:39 2018 +0000

----------------------------------------------------------------------
 common/yarn-extras/CMakeLists.txt | 2 +-
 ext-data-source/CMakeLists.txt    | 2 +-
 fe/CMakeLists.txt                 | 2 +-
 impala-parent/CMakeLists.txt      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/00f131c8/common/yarn-extras/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/common/yarn-extras/CMakeLists.txt b/common/yarn-extras/CMakeLists.txt
index a7930de..2b5f005 100644
--- a/common/yarn-extras/CMakeLists.txt
+++ b/common/yarn-extras/CMakeLists.txt
@@ -16,5 +16,5 @@
 # under the License.
 
 add_custom_target(yarn-extras ALL DEPENDS impala-parent
-  COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh install -DskipTests
+  COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh -B install -DskipTests
 )

http://git-wip-us.apache.org/repos/asf/impala/blob/00f131c8/ext-data-source/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/ext-data-source/CMakeLists.txt b/ext-data-source/CMakeLists.txt
index 2b58f4e..2cd2b20 100644
--- a/ext-data-source/CMakeLists.txt
+++ b/ext-data-source/CMakeLists.txt
@@ -16,5 +16,5 @@
 # under the License.
 
 add_custom_target(ext-data-source ALL DEPENDS gen-deps impala-parent
-  COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh install -DskipTests
+  COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh -B install -DskipTests
 )

http://git-wip-us.apache.org/repos/asf/impala/blob/00f131c8/fe/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/fe/CMakeLists.txt b/fe/CMakeLists.txt
index 3f15554..789f509 100644
--- a/fe/CMakeLists.txt
+++ b/fe/CMakeLists.txt
@@ -17,5 +17,5 @@
 
 add_custom_target(fe ALL DEPENDS
   thrift-deps fb-deps yarn-extras function-registry ext-data-source impala-parent
-  COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh install -DskipTests
+  COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh -B install -DskipTests
 )

http://git-wip-us.apache.org/repos/asf/impala/blob/00f131c8/impala-parent/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/impala-parent/CMakeLists.txt b/impala-parent/CMakeLists.txt
index 39507f5..fdd6e98 100644
--- a/impala-parent/CMakeLists.txt
+++ b/impala-parent/CMakeLists.txt
@@ -16,5 +16,5 @@
 # under the License.
 
 add_custom_target(impala-parent ALL
-  COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh install -DskipTests
+  COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh -B install -DskipTests
 )


[2/7] impala git commit: IMPALA-6731: Use private index in bootstrap_virtualenv

Posted by ta...@apache.org.
IMPALA-6731: Use private index in bootstrap_virtualenv

This change switches to using a private pypi index url when using a
private pypi mirror. This allows to run the tests without relying on the
public Python pypi mirrors.

Some packages can not detect their dependencies correctly when they get
installed together with the dependencies in the same call to pip. This
change adds a second stage of package installation to separate these
packages from their dependencies.

It also adds a few missing packages and updates some packages to newer
versions.

Testing: Ran this on a box where I blocked DNS resolution to Python's
upstream pypi.

Change-Id: I85f75f1f1a305f3043e0910ab88a880eeb30f00b
Reviewed-on: http://gerrit.cloudera.org:8080/9798
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: Lars Volker <lv...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5ce2bc44
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5ce2bc44
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5ce2bc44

Branch: refs/heads/2.x
Commit: 5ce2bc4475ed45f216ae622d6dde18fff21c1d92
Parents: 46c95b5
Author: Lars Volker <lv...@cloudera.com>
Authored: Fri Mar 23 21:50:34 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Mar 27 03:35:00 2018 +0000

----------------------------------------------------------------------
 infra/python/bootstrap_virtualenv.py        | 33 ++++++++++++++++++++----
 infra/python/deps/compiled-requirements.txt |  5 ++--
 infra/python/deps/pip_download.py           |  5 ++--
 infra/python/deps/requirements.txt          | 11 ++++----
 infra/python/deps/stage2-requirements.txt   | 32 +++++++++++++++++++++++
 5 files changed, 71 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/bootstrap_virtualenv.py
----------------------------------------------------------------------
diff --git a/infra/python/bootstrap_virtualenv.py b/infra/python/bootstrap_virtualenv.py
index e26aaf3..fe7695d 100644
--- a/infra/python/bootstrap_virtualenv.py
+++ b/infra/python/bootstrap_virtualenv.py
@@ -21,6 +21,8 @@
 # A multi-step bootstrapping process is required to build and install all of the
 # dependencies:
 # 1. install basic non-C/C++ packages into the virtualenv
+# 1b. install packages that depend on step 1 but cannot be installed together with their
+#     dependencies
 # 2. use the virtualenv Python to bootstrap the toolchain
 # 3. use toolchain gcc to build C/C++ packages
 # 4. build the kudu-python package with toolchain gcc and Cython
@@ -49,9 +51,13 @@ LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
 DEPS_DIR = os.path.join(os.path.dirname(__file__), "deps")
 ENV_DIR = os.path.join(os.path.dirname(__file__), "env")
 
-# Generated using "pip install --download <DIR> -r requirements.txt"
+# Requirements file with packages we need for our build and tests.
 REQS_PATH = os.path.join(DEPS_DIR, "requirements.txt")
 
+# Second stage of requirements which cannot be installed together with their dependencies
+# in requirements.txt.
+REQS2_PATH = os.path.join(DEPS_DIR, "stage2-requirements.txt")
+
 # Requirements for the next bootstrapping step that builds compiled requirements
 # with toolchain gcc.
 COMPILED_REQS_PATH = os.path.join(DEPS_DIR, "compiled-requirements.txt")
@@ -140,9 +146,23 @@ def exec_pip_install(args, cc="no-cc-available", env=None):
   #
   # --no-cache-dir is used to prevent caching of compiled artifacts, which may be built
   # with different compilers or settings.
-  exec_cmd([os.path.join(ENV_DIR, "bin", "python"), os.path.join(ENV_DIR, "bin", "pip"),
-    "install", "--no-binary", "--no-index", "--no-cache-dir", "--find-links",
-    "file://%s" % urllib.pathname2url(os.path.abspath(DEPS_DIR))] + args, env=env)
+  cmd = [os.path.join(ENV_DIR, "bin", "python"), os.path.join(ENV_DIR, "bin", "pip"),
+    "install", "-v", "--no-binary", "--no-cache-dir"]
+
+  # When using a custom mirror, we also must use the index of that mirror.
+  if "PYPI_MIRROR" in os.environ:
+    cmd.extend(["--index-url", "%s/simple" % os.environ["PYPI_MIRROR"]])
+  else:
+    # Prevent fetching additional packages from the index. If we forget to add a package
+    # to one of the requirements.txt files, this should trigger an error. However, we will
+    # still access the index for version/dependency resolution, hence we need to change it
+    # when using a private mirror.
+    cmd.append("--no-index")
+
+  cmd.extend(["--find-links",
+      "file://%s" % urllib.pathname2url(os.path.abspath(DEPS_DIR))])
+  cmd.extend(args)
+  exec_cmd(cmd, env=env)
 
 
 def find_file(*paths):
@@ -181,6 +201,9 @@ def install_deps():
   LOG.info("Installing packages into the virtualenv")
   exec_pip_install(["-r", REQS_PATH])
   mark_reqs_installed(REQS_PATH)
+  LOG.info("Installing stage 2 packages into the virtualenv")
+  exec_pip_install(["-r", REQS2_PATH])
+  mark_reqs_installed(REQS2_PATH)
 
 def have_toolchain():
   '''Return true if the Impala toolchain is available'''
@@ -335,7 +358,7 @@ def reqs_are_installed(reqs_path):
     installed_reqs_file.close()
 
 def setup_virtualenv_if_not_exists():
-  if not reqs_are_installed(REQS_PATH):
+  if not (reqs_are_installed(REQS_PATH) and reqs_are_installed(REQS2_PATH)):
     delete_virtualenv_if_exist()
     create_virtualenv()
     install_deps()

http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/deps/compiled-requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/compiled-requirements.txt b/infra/python/deps/compiled-requirements.txt
index b3f9f4d..289a78e 100644
--- a/infra/python/deps/compiled-requirements.txt
+++ b/infra/python/deps/compiled-requirements.txt
@@ -26,12 +26,13 @@ Fabric == 1.10.2
 impyla == 0.14.0
   bitarray == 0.8.1
   sasl == 0.1.3
-  six == 1.9.0
+  six == 1.11.0
   # Thrift usually comes from the thirdparty dir but in case the virtualenv is needed
   # before thirdparty is built thrift will be installed anyways.
   thrift == 0.9.0
-  thrift_sasl == 0.1.0
+  thrift-sasl == 0.1.0
 psutil == 0.7.1
 # Required for Kudu:
   Cython == 0.23.4
   numpy == 1.10.4
+  pytz == 2018.3

http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/deps/pip_download.py
----------------------------------------------------------------------
diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py
index 2e84426..6fbb683 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -35,8 +35,9 @@ NUM_DOWNLOAD_ATTEMPTS = 8
 PYPI_MIRROR = os.environ.get('PYPI_MIRROR', 'https://pypi.python.org')
 
 # The requirement files that list all of the required packages and versions.
-REQUIREMENTS_FILES = ['requirements.txt', 'compiled-requirements.txt',
-                      'kudu-requirements.txt', 'adls-requirements.txt']
+REQUIREMENTS_FILES = ['requirements.txt', 'stage2-requirements.txt',
+                      'compiled-requirements.txt', 'kudu-requirements.txt',
+                      'adls-requirements.txt']
 
 
 def check_digest(filename, algorithm, expected_digest):

http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/deps/requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/requirements.txt b/infra/python/deps/requirements.txt
index abf5d7d..bea16f4 100644
--- a/infra/python/deps/requirements.txt
+++ b/infra/python/deps/requirements.txt
@@ -27,6 +27,7 @@ boto3 == 1.2.3
   simplejson == 3.3.0 # For python version 2.6
   botocore == 1.3.30
   python_dateutil == 2.5.2
+    six == 1.11.0
   docutils == 0.12
   jmespath == 0.9.0
   futures == 3.0.5
@@ -47,13 +48,11 @@ pexpect == 3.3
 pg8000 == 1.10.2
 prettytable == 0.7.2
 pyparsing == 2.0.3
-pytest == 2.9.2
-  py == 1.4.32
-pytest-random == 0.02
-pytest-xdist == 1.15.0
 python-magic == 0.4.11
-pywebhdfs == 0.3.2
-  pbr == 1.8.1
+# pbr is required for pywebhdfs but must be installed in a separate call to pip before
+# attempting to install pywebhdfs (https://github.com/pywebhdfs/pywebhdfs/issues/52).
+# pywebhdfs itself will be installed in stage 2.
+  pbr == 3.1.1
 requests == 2.7.0
 # Newer versions of setuptools don't support Python 2.6
 setuptools == 36.8.0

http://git-wip-us.apache.org/repos/asf/impala/blob/5ce2bc44/infra/python/deps/stage2-requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/stage2-requirements.txt b/infra/python/deps/stage2-requirements.txt
new file mode 100644
index 0000000..eda2cd3
--- /dev/null
+++ b/infra/python/deps/stage2-requirements.txt
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This file contains packages that have dependencies in requirements.txt and that have to
+# be installed in a separate invocation of pip.
+
+# Remember, all modules below need to support python 2.6.
+
+# Requires setuptools-scm
+pytest == 2.9.2
+  py == 1.4.32
+  pytest-forked == 0.2
+  pytest-random == 0.02
+  pytest-runner == 4.2
+  pytest-xdist == 1.17.1
+
+# Requires pbr
+pywebhdfs == 0.3.2


[5/7] impala git commit: KUDU-2374: Add RpcContext::GetTimeReceived()

Posted by ta...@apache.org.
KUDU-2374: Add RpcContext::GetTimeReceived()

This change adds RpcContext::GetTimeReceived() which returns
the time at which the inbound call associated with the RpcContext
was received. It's helpful to make this accessible to the RPC
handlers for its own book-keeping purpose (e.g. reporting the
average dispatch latency as part of query profile in Impala).

Change-Id: I6b39c7f2ea856eccfdab8c1bb1433829e979ae13
Reviewed-on: http://gerrit.cloudera.org:8080/9796
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>
Reviewed-on: http://gerrit.cloudera.org:8080/9807
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/dadcf557
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/dadcf557
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/dadcf557

Branch: refs/heads/2.x
Commit: dadcf557fdd14f40a5000a1862a2463fd5af7a7f
Parents: 5ce2bc4
Author: Michael Ho <kw...@cloudera.com>
Authored: Sat Mar 24 17:49:58 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Mar 27 03:35:01 2018 +0000

----------------------------------------------------------------------
 be/src/kudu/rpc/rpc-test-base.h | 3 +++
 be/src/kudu/rpc/rpc_context.cc  | 4 ++++
 be/src/kudu/rpc/rpc_context.h   | 3 +++
 3 files changed, 10 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/dadcf557/be/src/kudu/rpc/rpc-test-base.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/rpc/rpc-test-base.h b/be/src/kudu/rpc/rpc-test-base.h
index 332a7a1..5a5652e 100644
--- a/be/src/kudu/rpc/rpc-test-base.h
+++ b/be/src/kudu/rpc/rpc-test-base.h
@@ -41,6 +41,7 @@
 #include "kudu/util/env.h"
 #include "kudu/util/faststring.h"
 #include "kudu/util/mem_tracker.h"
+#include "kudu/util/monotime.h"
 #include "kudu/util/net/sockaddr.h"
 #include "kudu/util/path_util.h"
 #include "kudu/util/pb_util.h"
@@ -210,6 +211,8 @@ class GenericCalculatorService : public ServiceIf {
 
     LOG(INFO) << "got call: " << SecureShortDebugString(req);
     SleepFor(MonoDelta::FromMicroseconds(req.sleep_micros()));
+    MonoDelta duration(MonoTime::Now().GetDeltaSince(incoming->GetTimeReceived()));
+    CHECK_GE(duration.ToMicroseconds(), req.sleep_micros());
     SleepResponsePB resp;
     incoming->RespondSuccess(resp);
   }

http://git-wip-us.apache.org/repos/asf/impala/blob/dadcf557/be/src/kudu/rpc/rpc_context.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/rpc/rpc_context.cc b/be/src/kudu/rpc/rpc_context.cc
index 274966b..3f5d556 100644
--- a/be/src/kudu/rpc/rpc_context.cc
+++ b/be/src/kudu/rpc/rpc_context.cc
@@ -183,6 +183,10 @@ MonoTime RpcContext::GetClientDeadline() const {
   return call_->GetClientDeadline();
 }
 
+MonoTime RpcContext::GetTimeReceived() const {
+  return call_->GetTimeReceived();
+}
+
 Trace* RpcContext::trace() {
   return call_->trace();
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/dadcf557/be/src/kudu/rpc/rpc_context.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/rpc/rpc_context.h b/be/src/kudu/rpc/rpc_context.h
index 938576c..353bedd 100644
--- a/be/src/kudu/rpc/rpc_context.h
+++ b/be/src/kudu/rpc/rpc_context.h
@@ -192,6 +192,9 @@ class RpcContext {
   // If the client did not specify a deadline, returns MonoTime::Max().
   MonoTime GetClientDeadline() const;
 
+  // Return the time when the inbound call was received.
+  MonoTime GetTimeReceived() const;
+
   // Whether the results of this RPC are tracked with a ResultTracker.
   // If this returns true, both result_tracker() and request_id() should return non-null results.
   bool AreResultsTracked() const { return result_tracker_.get() != nullptr; }


[7/7] impala git commit: IMPALA-6728: Always use Kudu based kinit if FLAGS_use_krpc=true

Posted by ta...@apache.org.
IMPALA-6728: Always use Kudu based kinit if FLAGS_use_krpc=true

We rely on the KPRC logic to do the Kerberos authentication
when KRPC is enabled. Therefore, when FLAGS_ues_krpc=true,
we must always call kudu::security::InitKerberosForServer()
to initialize the Kerberos related logic. This change makes
Impala ignore FLAGS_use_kudu_kinit=false when FLAGS_use_krpc=true.

Change-Id: Ia7086e5c9b460233e9e957f886141b3e6bba414b
Reviewed-on: http://gerrit.cloudera.org:8080/9797
Reviewed-by: Michael Ho <kw...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3d52f8c9
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3d52f8c9
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3d52f8c9

Branch: refs/heads/2.x
Commit: 3d52f8c99955f01721cf5abdf3277490008dae60
Parents: 00f131c
Author: Michael Ho <kw...@cloudera.com>
Authored: Sat Mar 24 17:25:39 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Mar 27 11:01:40 2018 +0000

----------------------------------------------------------------------
 be/src/rpc/auth-provider.h            |  6 +--
 be/src/rpc/authentication.cc          | 13 ++++--
 be/src/rpc/rpc-mgr-kerberized-test.cc | 65 +++++++++++++++++++---------
 be/src/rpc/rpc-mgr-test-base.h        |  6 ---
 be/src/rpc/thrift-server-test.cc      | 69 +++++++++++++++++++-----------
 be/src/testutil/mini-kdc-wrapper.cc   | 59 +++++++++++--------------
 be/src/testutil/mini-kdc-wrapper.h    | 44 ++++++++++---------
 7 files changed, 149 insertions(+), 113 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/3d52f8c9/be/src/rpc/auth-provider.h
----------------------------------------------------------------------
diff --git a/be/src/rpc/auth-provider.h b/be/src/rpc/auth-provider.h
index ee3bc5f..3e5517f 100644
--- a/be/src/rpc/auth-provider.h
+++ b/be/src/rpc/auth-provider.h
@@ -143,9 +143,9 @@ class SaslAuthProvider : public AuthProvider {
   /// function as a client.
   bool needs_kinit_;
 
-  /// Runs "RunKinit" below if needs_kinit_ is true and FLAGS_use_kudu_kinit is false.
-  /// Once started, this thread lives as long as the process does and periodically forks
-  /// impalad and execs the 'kinit' process.
+  /// Runs "RunKinit" below if needs_kinit_ is true and FLAGS_use_kudu_kinit is false
+  /// and FLAGS_use_krpc is false. Once started, this thread lives as long as the process
+  /// does and periodically forks impalad and execs the 'kinit' process.
   std::unique_ptr<Thread> kinit_thread_;
 
   /// Periodically (roughly once every FLAGS_kerberos_reinit_interval minutes) calls kinit

http://git-wip-us.apache.org/repos/asf/impala/blob/3d52f8c9/be/src/rpc/authentication.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/authentication.cc b/be/src/rpc/authentication.cc
index fc41723..4c3df50 100644
--- a/be/src/rpc/authentication.cc
+++ b/be/src/rpc/authentication.cc
@@ -67,6 +67,7 @@ using namespace apache::thrift;
 using namespace boost::filesystem;   // for is_regular()
 using namespace strings;
 
+DECLARE_bool(use_krpc);
 DECLARE_string(keytab_file);
 DECLARE_string(principal);
 DECLARE_string(be_principal);
@@ -107,11 +108,12 @@ DEFINE_string(internal_principals_whitelist, "hdfs", "(Advanced) Comma-separated
     "'hdfs' which is the system user that in certain deployments must access "
     "catalog server APIs.");
 
-// TODO: Remove this flag and the old kerberos code in a compatibility-breaking release.
+// TODO: Remove this flag and the old kerberos code once we remove 'use_krpc' flag.
 // (IMPALA-5893)
 DEFINE_bool(use_kudu_kinit, true, "If true, Impala will programatically perform kinit "
     "by calling into the libkrb5 library using the provided APIs. If false, it will fork "
-    "off a kinit process.");
+    "off a kinit process. If use_krpc=true, this flag is treated as true regardless of "
+    "what it's set to.");
 
 namespace impala {
 
@@ -840,7 +842,12 @@ Status SaslAuthProvider::Start() {
   if (needs_kinit_) {
     DCHECK(is_internal_);
     DCHECK(!principal_.empty());
-    if (FLAGS_use_kudu_kinit) {
+    if (FLAGS_use_kudu_kinit || FLAGS_use_krpc) {
+      // With KRPC enabled, we always rely on the Kudu library to carry out the Kerberos
+      // authentication during connection negotiation.
+      if (!FLAGS_use_kudu_kinit) {
+        LOG(INFO) << "Ignoring --use_kudu_kinit=false as KRPC and Kerberos are enabled";
+      }
       // Starts a thread that periodically does a 'kinit'. The thread lives as long as the
       // process does.
       KUDU_RETURN_IF_ERROR(kudu::security::InitKerberosForServer(principal_, keytab_file_,

http://git-wip-us.apache.org/repos/asf/impala/blob/3d52f8c9/be/src/rpc/rpc-mgr-kerberized-test.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/rpc-mgr-kerberized-test.cc b/be/src/rpc/rpc-mgr-kerberized-test.cc
index 6244c2d..141f359 100644
--- a/be/src/rpc/rpc-mgr-kerberized-test.cc
+++ b/be/src/rpc/rpc-mgr-kerberized-test.cc
@@ -18,45 +18,46 @@
 #include "rpc/rpc-mgr-test-base.h"
 #include "service/fe-support.h"
 
+DECLARE_bool(use_kudu_kinit);
+DECLARE_bool(use_krpc);
+
+DECLARE_string(be_principal);
+DECLARE_string(hostname);
+DECLARE_string(principal);
 DECLARE_string(ssl_client_ca_certificate);
 DECLARE_string(ssl_server_certificate);
 DECLARE_string(ssl_private_key);
 
-namespace impala {
+// The principal name and the realm used for creating the mini-KDC.
+// To be initialized at main().
+static string kdc_principal;
+static string kdc_realm;
 
-static int kdc_port = GetServerPort();
+namespace impala {
 
 class RpcMgrKerberizedTest :
     public RpcMgrTestBase<testing::TestWithParam<KerberosSwitch> > {
-  virtual void SetUp() override {
-    IpAddr ip;
-    ASSERT_OK(HostnameToIpAddr(FLAGS_hostname, &ip));
-    string spn = Substitute("impala-test/$0", ip);
 
-    kdc_wrapper_.reset(new MiniKdcWrapper(
-        std::move(spn), "KRBTEST.COM", "24h", "7d", kdc_port));
-    DCHECK(kdc_wrapper_.get() != nullptr);
-
-    ASSERT_OK(kdc_wrapper_->SetupAndStartMiniKDC(GetParam()));
+  virtual void SetUp() override {
+    KerberosSwitch k = GetParam();
+    FLAGS_use_krpc = true;
+    FLAGS_use_kudu_kinit = k == USE_KRPC_KUDU_KERBEROS;
+    FLAGS_principal = "dummy-service/host@realm";
+    FLAGS_be_principal = strings::Substitute("$0@$1", kdc_principal, kdc_realm);
     ASSERT_OK(InitAuth(CURRENT_EXECUTABLE_PATH));
-
     RpcMgrTestBase::SetUp();
   }
 
   virtual void TearDown() override {
-    ASSERT_OK(kdc_wrapper_->TearDownMiniKDC(GetParam()));
-    RpcMgrTestBase::TearDown();
+    FLAGS_principal.clear();
+    FLAGS_be_principal.clear();
   }
-
- private:
-  boost::scoped_ptr<MiniKdcWrapper> kdc_wrapper_;
 };
 
-// TODO: IMPALA-6477: This test breaks on CentOS 6.4. Re-enable after a fix.
 INSTANTIATE_TEST_CASE_P(KerberosOnAndOff,
                         RpcMgrKerberizedTest,
-                        ::testing::Values(USE_KUDU_KERBEROS,
-                                          USE_IMPALA_KERBEROS));
+                        ::testing::Values(USE_KRPC_IMPALA_KERBEROS,
+                                          USE_KRPC_KUDU_KERBEROS));
 
 TEST_P(RpcMgrKerberizedTest, MultipleServicesTls) {
   // TODO: We're starting a seperate RpcMgr here instead of configuring
@@ -81,12 +82,34 @@ TEST_P(RpcMgrKerberizedTest, MultipleServicesTls) {
 
 } // namespace impala
 
+using impala::Status;
+
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
   impala::InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
   impala::InitFeSupport();
 
+  // Set up and start KDC.
+  impala::IpAddr ip;
+  impala::Status status = impala::HostnameToIpAddr(FLAGS_hostname, &ip);
+  DCHECK(status.ok());
+  kdc_principal = Substitute("impala-test/$0", ip);
+  kdc_realm = "KRBTEST.COM";
+
+  int port = impala::FindUnusedEphemeralPort(nullptr);
+  std::unique_ptr<impala::MiniKdcWrapper> kdc;
+  status = impala::MiniKdcWrapper::SetupAndStartMiniKDC(
+      kdc_principal, kdc_realm, "24h", "7d", port, &kdc);
+  DCHECK(status.ok());
+
   // Fill in the path of the current binary for use by the tests.
   CURRENT_EXECUTABLE_PATH = argv[0];
-  return RUN_ALL_TESTS();
+  int retval = RUN_ALL_TESTS();
+
+  // Shutdown KDC.
+  status = kdc->TearDownMiniKDC();
+  DCHECK(status.ok());
+
+  return retval;
+
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/3d52f8c9/be/src/rpc/rpc-mgr-test-base.h
----------------------------------------------------------------------
diff --git a/be/src/rpc/rpc-mgr-test-base.h b/be/src/rpc/rpc-mgr-test-base.h
index ce063f8..f994fd8 100644
--- a/be/src/rpc/rpc-mgr-test-base.h
+++ b/be/src/rpc/rpc-mgr-test-base.h
@@ -67,12 +67,6 @@ namespace impala {
 
 static int32_t SERVICE_PORT = FindUnusedEphemeralPort(nullptr);
 
-int GetServerPort() {
-  int port = FindUnusedEphemeralPort(nullptr);
-  EXPECT_FALSE(port == -1);
-  return port;
-}
-
 const static string IMPALA_HOME(getenv("IMPALA_HOME"));
 const string& SERVER_CERT =
     Substitute("$0/be/src/testutil/server-cert.pem", IMPALA_HOME);

http://git-wip-us.apache.org/repos/asf/impala/blob/3d52f8c9/be/src/rpc/thrift-server-test.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/thrift-server-test.cc b/be/src/rpc/thrift-server-test.cc
index 8bd7275..f0a0bc5 100644
--- a/be/src/rpc/thrift-server-test.cc
+++ b/be/src/rpc/thrift-server-test.cc
@@ -35,6 +35,11 @@ using namespace strings;
 using namespace apache::thrift;
 using apache::thrift::transport::SSLProtocol;
 
+DECLARE_bool(use_kudu_kinit);
+DECLARE_bool(use_krpc);
+
+DECLARE_string(principal);
+DECLARE_string(be_principal);
 DECLARE_string(ssl_client_ca_certificate);
 DECLARE_string(ssl_cipher_list);
 DECLARE_string(ssl_minimum_version);
@@ -44,22 +49,26 @@ DECLARE_int32(state_store_port);
 DECLARE_int32(be_port);
 DECLARE_int32(beeswax_port);
 
-string IMPALA_HOME(getenv("IMPALA_HOME"));
-const string& SERVER_CERT =
+static string IMPALA_HOME(getenv("IMPALA_HOME"));
+static const string& SERVER_CERT =
     Substitute("$0/be/src/testutil/server-cert.pem", IMPALA_HOME);
-const string& PRIVATE_KEY =
+static const string& PRIVATE_KEY =
     Substitute("$0/be/src/testutil/server-key.pem", IMPALA_HOME);
-const string& BAD_SERVER_CERT =
+static const string& BAD_SERVER_CERT =
     Substitute("$0/be/src/testutil/bad-cert.pem", IMPALA_HOME);
-const string& BAD_PRIVATE_KEY =
+static const string& BAD_PRIVATE_KEY =
     Substitute("$0/be/src/testutil/bad-key.pem", IMPALA_HOME);
-const string& PASSWORD_PROTECTED_PRIVATE_KEY =
+static const string& PASSWORD_PROTECTED_PRIVATE_KEY =
     Substitute("$0/be/src/testutil/server-key-password.pem", IMPALA_HOME);
 
+// The principal name and the realm used for creating the mini-KDC.
+static const string kdc_principal = "impala/localhost";
+static const string kdc_realm = "KRBTEST.COM";
+
 // Only use TLSv1.0 compatible ciphers, as tests might run on machines with only TLSv1.0
 // support.
-const string TLS1_0_COMPATIBLE_CIPHER = "RC4-SHA";
-const string TLS1_0_COMPATIBLE_CIPHER_2 = "RC4-MD5";
+static const string TLS1_0_COMPATIBLE_CIPHER = "RC4-SHA";
+static const string TLS1_0_COMPATIBLE_CIPHER_2 = "RC4-MD5";
 
 /// Dummy server class (chosen because it has the smallest interface to implement) that
 /// tests can use to start Thrift servers.
@@ -81,8 +90,6 @@ int GetServerPort() {
   return port;
 }
 
-static int kdc_port = GetServerPort();
-
 template <class T> class ThriftTestBase : public T {
  protected:
   virtual void SetUp() {}
@@ -95,31 +102,33 @@ static string CURRENT_EXECUTABLE_PATH;
 
 class ThriftKerberizedParamsTest :
     public ThriftTestBase<testing::TestWithParam<KerberosSwitch> > {
-  virtual void SetUp() {
-    kdc_wrapper_.reset(new MiniKdcWrapper(
-        "impala/localhost", "KRBTEST.COM", "24h", "7d", kdc_port));
-    DCHECK(kdc_wrapper_.get() != nullptr);
 
-    ASSERT_OK(kdc_wrapper_->SetupAndStartMiniKDC(GetParam()));
+  virtual void SetUp() override {
+    KerberosSwitch k = GetParam();
+    FLAGS_use_krpc = false;
+    if (k == KERBEROS_OFF) {
+      FLAGS_principal.clear();
+      FLAGS_be_principal.clear();
+    } else {
+      FLAGS_use_kudu_kinit = k == USE_THRIFT_KUDU_KERBEROS;
+      FLAGS_principal = "dummy-service/host@realm";
+      FLAGS_be_principal = strings::Substitute("$0@$1", kdc_principal, kdc_realm);
+    }
     ASSERT_OK(InitAuth(CURRENT_EXECUTABLE_PATH));
-
     ThriftTestBase::SetUp();
   }
 
-  virtual void TearDown() {
-    ASSERT_OK(kdc_wrapper_->TearDownMiniKDC(GetParam()));
-    ThriftTestBase::TearDown();
+  virtual void TearDown() override {
+    FLAGS_principal.clear();
+    FLAGS_be_principal.clear();
   }
-
- private:
-  boost::scoped_ptr<MiniKdcWrapper> kdc_wrapper_;
 };
 
 INSTANTIATE_TEST_CASE_P(KerberosOnAndOff,
                         ThriftKerberizedParamsTest,
                         ::testing::Values(KERBEROS_OFF,
-                                          USE_KUDU_KERBEROS,
-                                          USE_IMPALA_KERBEROS));
+                                          USE_THRIFT_KUDU_KERBEROS,
+                                          USE_THRIFT_IMPALA_KERBEROS));
 
 TEST(ThriftTestBase, Connectivity) {
   int port = GetServerPort();
@@ -557,7 +566,17 @@ int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
   impala::InitCommonRuntime(argc, argv, false, impala::TestInfo::BE_TEST);
 
+  int port = impala::FindUnusedEphemeralPort(nullptr);
+  std::unique_ptr<impala::MiniKdcWrapper> kdc;
+  Status status = impala::MiniKdcWrapper::SetupAndStartMiniKDC(
+      kdc_principal, kdc_realm, "24h", "7d", port, &kdc);
+  DCHECK(status.ok());
+
   // Fill in the path of the current binary for use by the tests.
   CURRENT_EXECUTABLE_PATH = argv[0];
-  return RUN_ALL_TESTS();
+  int retval = RUN_ALL_TESTS();
+
+  status = kdc->TearDownMiniKDC();
+  DCHECK(status.ok());
+  return retval;
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/3d52f8c9/be/src/testutil/mini-kdc-wrapper.cc
----------------------------------------------------------------------
diff --git a/be/src/testutil/mini-kdc-wrapper.cc b/be/src/testutil/mini-kdc-wrapper.cc
index eb9d9f1..526e5b1 100644
--- a/be/src/testutil/mini-kdc-wrapper.cc
+++ b/be/src/testutil/mini-kdc-wrapper.cc
@@ -30,11 +30,7 @@ using namespace impala;
 namespace filesystem = boost::filesystem;
 using filesystem::path;
 
-DECLARE_bool(use_kudu_kinit);
-
 DECLARE_string(keytab_file);
-DECLARE_string(principal);
-DECLARE_string(be_principal);
 DECLARE_string(krb5_conf);
 
 Status MiniKdcWrapper::StartKdc(string keytab_dir) {
@@ -64,46 +60,41 @@ Status MiniKdcWrapper::CreateServiceKeytab(const string& spn, string* kt_path) {
   return Status::OK();
 }
 
-Status MiniKdcWrapper::SetupAndStartMiniKDC(KerberosSwitch k) {
-  if (k != KERBEROS_OFF) {
-    // Enable the workaround for MIT krb5 1.10 bugs from krb5_realm_override.cc.
-    setenv("KUDU_ENABLE_KRB5_REALM_FIX", "true", 0);
+Status MiniKdcWrapper::SetupAndStartMiniKDC(string spn, string realm,
+    string ticket_lifetime, string renew_lifetime, int kdc_port,
+    unique_ptr<MiniKdcWrapper>* kdc_ptr) {
+  std::unique_ptr<MiniKdcWrapper> kdc(new MiniKdcWrapper(
+      spn, realm, ticket_lifetime, renew_lifetime, kdc_port));
+  DCHECK(kdc.get() != nullptr);
 
-    FLAGS_use_kudu_kinit = k == USE_KUDU_KERBEROS;
+  // Enable the workaround for MIT krb5 1.10 bugs from krb5_realm_override.cc.
+  setenv("KUDU_ENABLE_KRB5_REALM_FIX", "true", 0);
 
-    // Check if the unique directory already exists, and create it if it doesn't.
-    RETURN_IF_ERROR(FileSystemUtil::RemoveAndCreateDirectory(unique_test_dir_.string()));
-    string keytab_dir = unique_test_dir_.string() + "/krb5kdc";
+  // Check if the unique directory already exists, and create it if it doesn't.
+  RETURN_IF_ERROR(FileSystemUtil::RemoveAndCreateDirectory(kdc->unique_test_dir_.string()));
+  string keytab_dir = kdc->unique_test_dir_.string() + "/krb5kdc";
 
-    RETURN_IF_ERROR(StartKdc(keytab_dir));
+  RETURN_IF_ERROR(kdc->StartKdc(keytab_dir));
 
-    string kt_path;
-    RETURN_IF_ERROR(CreateServiceKeytab(spn_, &kt_path));
+  string kt_path;
+  RETURN_IF_ERROR(kdc->CreateServiceKeytab(kdc->spn_, &kt_path));
 
-    // Set the appropriate flags based on how we've set up the kerberos environment.
-    FLAGS_krb5_conf = strings::Substitute("$0/$1", keytab_dir, "krb5.conf");
-    FLAGS_keytab_file = kt_path;
+  // Set the appropriate flags based on how we've set up the kerberos environment.
+  FLAGS_krb5_conf = strings::Substitute("$0/$1", keytab_dir, "krb5.conf");
+  FLAGS_keytab_file = kt_path;
 
-    // We explicitly set 'principal' and 'be_principal' even though 'principal' won't be
-    // used to test IMPALA-6256.
-    FLAGS_principal = "dummy-service/host@realm";
-    FLAGS_be_principal = strings::Substitute("$0@$1", spn_, realm_);
-  }
+  *kdc_ptr = std::move(kdc);
   return Status::OK();
 }
 
-Status MiniKdcWrapper::TearDownMiniKDC(KerberosSwitch k) {
-  if (k != KERBEROS_OFF) {
-    RETURN_IF_ERROR(StopKdc());
+Status MiniKdcWrapper::TearDownMiniKDC() {
+  RETURN_IF_ERROR(StopKdc());
 
-    // Clear the flags so we don't step on other tests that may run in the same process.
-    FLAGS_keytab_file.clear();
-    FLAGS_principal.clear();
-    FLAGS_be_principal.clear();
-    FLAGS_krb5_conf.clear();
+  // Clear the flags so we don't step on other tests that may run in the same process.
+  FLAGS_keytab_file.clear();
+  FLAGS_krb5_conf.clear();
 
-    // Remove test directory.
-    RETURN_IF_ERROR(FileSystemUtil::RemovePaths({unique_test_dir_.string()}));
-  }
+  // Remove test directory.
+  RETURN_IF_ERROR(FileSystemUtil::RemovePaths({unique_test_dir_.string()}));
   return Status::OK();
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/3d52f8c9/be/src/testutil/mini-kdc-wrapper.h
----------------------------------------------------------------------
diff --git a/be/src/testutil/mini-kdc-wrapper.h b/be/src/testutil/mini-kdc-wrapper.h
index 1d5e0b8..17c174a 100644
--- a/be/src/testutil/mini-kdc-wrapper.h
+++ b/be/src/testutil/mini-kdc-wrapper.h
@@ -29,34 +29,26 @@ namespace impala {
 
 enum KerberosSwitch {
   KERBEROS_OFF,
-  USE_KUDU_KERBEROS,    // FLAGS_use_kudu_kinit = true
-  USE_IMPALA_KERBEROS   // FLAGS_use_kudu_kinit = false
+  USE_KRPC_KUDU_KERBEROS,    // FLAGS_use_kudu_kinit = true,  FLAGS_use_krpc = true
+  USE_KRPC_IMPALA_KERBEROS,  // FLAGS_use_kudu_kinit = false, FLAGS_use_krpc = true
+  USE_THRIFT_KUDU_KERBEROS,  // FLAGS_use_kudu_kinit = true,  FLAGS_use_krpc = false
+  USE_THRIFT_IMPALA_KERBEROS // FLAGS_use_kudu_kinit = false, FLAGS_use_krpc = false
 };
 
 /// This class allows tests to easily start and stop a KDC and configure Impala's auth
-/// layer.
-/// If the mode is USE_KUDU_KERBEROS or USE_IMPALA_KERBEROS, the MiniKdc which is a
-/// wrapper around the 'krb5kdc' process, is configured and started.
-/// If the mode is KERBEROS_OFF, Impala's auth layer is configured to use plain SASL and
-/// the KDC is not started.
+/// layer. A MiniKdc which is a wrapper around the 'krb5kdc' process, is configured and
+/// started.
 class MiniKdcWrapper {
  public:
-  MiniKdcWrapper(std::string spn, std::string realm, std::string ticket_lifetime,
-    std::string renew_lifetime,int kdc_port) :
-      spn_(spn),
-      realm_(realm),
-      ticket_lifetime_(ticket_lifetime),
-      renew_lifetime_(renew_lifetime),
-      kdc_port_(kdc_port) {
-  }
-
-  /// If 'k' is 'USE_KUDU_KERBEROS' or 'USE_IMPALA_KERBEROS', this function creates the
-  /// 'unique_test_dir_' path, starts the KDC and sets the appropriate flags that Impala
-  /// requires to run with Kerberos.
-  Status SetupAndStartMiniKDC(KerberosSwitch k);
+  /// This function creates the 'unique_test_dir_' path, starts the KDC and sets the
+  /// appropriate flags that Impala requires to run with Kerberos. The newly created
+  /// KDC is stored in 'kdc_ptr'. Return error status on failure.
+  static Status SetupAndStartMiniKDC(std::string spn, std::string realm,
+      std::string ticket_lifetime, std::string renew_lifetime, int kdc_port,
+      std::unique_ptr<MiniKdcWrapper>* kdc_ptr);
 
   /// Undoes everything done by SetupAndStartMiniKDC().
-  Status TearDownMiniKDC(KerberosSwitch k);
+  Status TearDownMiniKDC();
 
  private:
   boost::scoped_ptr<kudu::MiniKdc> kdc_;
@@ -79,6 +71,16 @@ class MiniKdcWrapper {
   /// Create a unique directory for this test to store its files in.
   boost::filesystem::path unique_test_dir_ = boost::filesystem::unique_path();
 
+  /// Called by SetupAndStartMiniKDC() only.
+  MiniKdcWrapper(std::string spn, std::string realm, std::string ticket_lifetime,
+    std::string renew_lifetime, int kdc_port) :
+      spn_(spn),
+      realm_(realm),
+      ticket_lifetime_(ticket_lifetime),
+      renew_lifetime_(renew_lifetime),
+      kdc_port_(kdc_port) {
+  }
+
   /// Starts the KDC and configures it to use 'keytab_dir' as the location to store the
   /// keytab. The 'keytab_dir' will not be cleaned up by this class.
   Status StartKdc(string keytab_dir);


[3/7] impala git commit: IMPALA-6715, IMPALA-6736: fix stress TPC workload selection

Posted by ta...@apache.org.
IMPALA-6715,IMPALA-6736: fix stress TPC workload selection

IMPALA-6715:
This commit
  IMPALA-6551: Change Kudu TPCDS and TPCH columns to DECIMAL
added additional decimal_v2 queries to the stress test that amount to
running the same query twice. This makes the binary search run
incredibly slow.

- Fix the query selection. Add additional queries that weren't matching
  before, like the tpcds-q[0-9]+a.test series.

- Add a test that will at least ensure if
  testdata/workloads/tpc*/queries is modified, the stress test will
  still find the same number of queries for the given workload. There's
  no obvious place to put this test: it's not testing the product at
  all, so:

- Add a new directory tests/infra for such tests and add it to
  tests/run-tests.py.

- Move the test from IMPALA-6441 into tests/infra.

Testing:
- Core private build passed. I manually looked to make sure the moved
  and new tests ran.

- Short stress test run. I checked the runtime info and saw the new
  TPCDS queries in the JSON.

- While testing on hardware clusters down stream, I noticed...

IMPALA-6736:
  TPC-DS Q67A is 10x more expensive to run without spilling than any
  other query. I fixed the --filter-query-mem-ratio option to work. This
  will still run Q67A during the binary search phase, but if a cluster
  is too small, the query will be skipped.

Change-Id: I3e26b64d38aa8d63a176daf95c4ac5dee89508da
Reviewed-on: http://gerrit.cloudera.org:8080/9758
Reviewed-by: David Knupp <dk...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/f05cf903
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/f05cf903
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/f05cf903

Branch: refs/heads/2.x
Commit: f05cf9031429a2a14a6dd964e11f067b1f40fd04
Parents: ae20eb4
Author: Michael Brown <mi...@cloudera.com>
Authored: Wed Mar 21 13:08:50 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Mar 27 03:35:00 2018 +0000

----------------------------------------------------------------------
 tests/infra/test_stress_infra.py  | 60 ++++++++++++++++++++++++++++++++++
 tests/metadata/test_explain.py    | 22 -------------
 tests/run-tests.py                |  2 +-
 tests/stress/concurrent_select.py | 29 ++++++++++++++--
 4 files changed, 87 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/f05cf903/tests/infra/test_stress_infra.py
----------------------------------------------------------------------
diff --git a/tests/infra/test_stress_infra.py b/tests/infra/test_stress_infra.py
new file mode 100644
index 0000000..58f7625
--- /dev/null
+++ b/tests/infra/test_stress_infra.py
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This module attempts to enforce infrastructural assumptions that bind test tools to
+# product or other constraints. We want to stop these assumptions from breaking at
+# pre-commit time, not later.
+
+import pytest
+
+from decimal import Decimal
+
+from tests.common.impala_test_suite import ImpalaTestSuite
+from tests.stress.concurrent_select import (
+    EXPECTED_TPCDS_QUERIES_COUNT,
+    EXPECTED_TPCH_NESTED_QUERIES_COUNT,
+    EXPECTED_TPCH_QUERIES_COUNT,
+    load_tpc_queries,
+    match_memory_estimate)
+
+
+class TestStressInfra(ImpalaTestSuite):
+
+  def test_stress_binary_search_start_point(self):
+    """
+    Test that the stress test can use EXPLAIN to find the start point for its binary
+    search.
+    """
+    result = self.client.execute("explain select 1")
+    mem_limit, units = match_memory_estimate(result.data)
+    assert isinstance(units, str) and units.upper() in ('T', 'G', 'M', 'K', ''), (
+        'unexpected units {u} from explain memory estimation\n{output}:'.format(
+            u=units, output='\n'.join(result.data)))
+    assert Decimal(mem_limit) >= 0, (
+        'unexpected value from explain\n:' + '\n'.join(result.data))
+
+  @pytest.mark.parametrize(
+      'count_map',
+      [('tpcds', EXPECTED_TPCDS_QUERIES_COUNT),
+       ('tpch_nested', EXPECTED_TPCH_NESTED_QUERIES_COUNT),
+       ('tpch', EXPECTED_TPCH_QUERIES_COUNT)])
+  def test_stress_finds_workloads(self, count_map):
+    """
+    Test that the stress test will properly load TPC workloads.
+    """
+    workload, count = count_map
+    assert count == len(load_tpc_queries(workload))

http://git-wip-us.apache.org/repos/asf/impala/blob/f05cf903/tests/metadata/test_explain.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_explain.py b/tests/metadata/test_explain.py
index 22fc177..3ad411a 100644
--- a/tests/metadata/test_explain.py
+++ b/tests/metadata/test_explain.py
@@ -19,11 +19,8 @@
 #
 import re
 
-from decimal import Decimal
-
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfLocal, SkipIfNotHdfsMinicluster
-from tests.stress.concurrent_select import match_memory_estimate
 from tests.util.filesystem_utils import WAREHOUSE
 
 # Tests the different explain levels [0-3] on a few queries.
@@ -178,22 +175,3 @@ class TestExplainEmptyPartition(ImpalaTestSuite):
     assert "missing relevant table and/or column statistics" in explain_result
     # Also test IMPALA-1530 - adding the number of partitions missing stats
     assert "partitions: 1/2 " in explain_result
-
-
-class TestInfraIntegration(ImpalaTestSuite):
-  """
-  This is a test suite to ensure separate test tooling in Python is compatible with the
-  product.
-  """
-  def test_stress_binary_search_start_point(self):
-    """
-    Test that the stress test can use EXPLAIN to find the start point for its binary
-    search.
-    """
-    result = self.client.execute("explain select 1")
-    mem_limit, units = match_memory_estimate(result.data)
-    assert isinstance(units, str) and units.upper() in ('T', 'G', 'M', 'K', ''), (
-        'unexpected units {u} from explain memory estimation\n{output}:'.format(
-            u=units, output='\n'.join(result.data)))
-    assert Decimal(mem_limit) >= 0, (
-        'unexpected value from explain\n:' + '\n'.join(result.data))

http://git-wip-us.apache.org/repos/asf/impala/blob/f05cf903/tests/run-tests.py
----------------------------------------------------------------------
diff --git a/tests/run-tests.py b/tests/run-tests.py
index 9902fc9..3a8bd2e 100755
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -34,7 +34,7 @@ from _pytest.config import FILE_OR_DIR
 # We whitelist valid test directories. If a new test directory is added, update this.
 VALID_TEST_DIRS = ['failure', 'query_test', 'stress', 'unittests', 'aux_query_tests',
                    'shell', 'hs2', 'catalog_service', 'metadata', 'data_errors',
-                   'statestore']
+                   'statestore', 'infra']
 
 TEST_DIR = os.path.join(os.environ['IMPALA_HOME'], 'tests')
 RESULT_DIR = os.path.join(os.environ['IMPALA_EE_TEST_LOGS_DIR'], 'results')

http://git-wip-us.apache.org/repos/asf/impala/blob/f05cf903/tests/stress/concurrent_select.py
----------------------------------------------------------------------
diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py
index 5146d35..fa8541c 100755
--- a/tests/stress/concurrent_select.py
+++ b/tests/stress/concurrent_select.py
@@ -84,6 +84,14 @@ from tests.util.thrift_util import op_handle_to_query_id
 
 LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
 
+# IMPALA-6715: Every so often the stress test or the TPC workload directories get
+# changed, and the stress test loses the ability to run the full set of queries. Set
+# these constants and assert that when a workload is used, all the queries we expect to
+# use are there.
+EXPECTED_TPCDS_QUERIES_COUNT = 71
+EXPECTED_TPCH_NESTED_QUERIES_COUNT = 22
+EXPECTED_TPCH_QUERIES_COUNT = 22
+
 # Used to short circuit a binary search of the min mem limit. Values will be considered
 # equal if they are within this ratio or absolute amount of each other.
 MEM_LIMIT_EQ_THRESHOLD_PC = 0.975
@@ -1065,7 +1073,10 @@ def load_tpc_queries(workload):
   queries = list()
   query_dir = os.path.join(
       os.path.dirname(__file__), "..", "..", "testdata", "workloads", workload, "queries")
-  file_name_pattern = re.compile(r"-(q\d+).test$")
+  # IMPALA-6715 and others from the past: This pattern enforces the queries we actually
+  # find. Both workload directories contain other queries that are not part of the TPC
+  # spec.
+  file_name_pattern = re.compile(r"^{0}-(q.*).test$".format(workload))
   for query_file in os.listdir(query_dir):
     match = file_name_pattern.search(query_file)
     if not match:
@@ -1956,6 +1967,7 @@ def main():
   # the TPC queries are expected to always complete successfully.
   if args.tpcds_db:
     tpcds_queries = load_tpc_queries("tpcds")
+    assert len(tpcds_queries) == EXPECTED_TPCDS_QUERIES_COUNT
     for query in tpcds_queries:
       query.db_name = args.tpcds_db
     queries.extend(tpcds_queries)
@@ -1964,6 +1976,7 @@ def main():
         queries.extend(generate_compute_stats_queries(cursor))
   if args.tpch_db:
     tpch_queries = load_tpc_queries("tpch")
+    assert len(tpch_queries) == EXPECTED_TPCH_QUERIES_COUNT
     for query in tpch_queries:
       query.db_name = args.tpch_db
     queries.extend(tpch_queries)
@@ -1972,6 +1985,7 @@ def main():
         queries.extend(generate_compute_stats_queries(cursor))
   if args.tpch_nested_db:
     tpch_nested_queries = load_tpc_queries("tpch_nested")
+    assert len(tpch_nested_queries) == EXPECTED_TPCH_NESTED_QUERIES_COUNT
     for query in tpch_nested_queries:
       query.db_name = args.tpch_nested_db
     queries.extend(tpch_nested_queries)
@@ -1980,6 +1994,7 @@ def main():
         queries.extend(generate_compute_stats_queries(cursor))
   if args.tpch_kudu_db:
     tpch_kudu_queries = load_tpc_queries("tpch")
+    assert len(tpch_kudu_queries) == EXPECTED_TPCH_QUERIES_COUNT
     for query in tpch_kudu_queries:
       query.db_name = args.tpch_kudu_db
     queries.extend(tpch_kudu_queries)
@@ -1992,6 +2007,7 @@ def main():
         queries.extend(generate_DML_queries(cursor, args.dml_mod_values))
   if args.tpcds_kudu_db:
     tpcds_kudu_queries = load_tpc_queries("tpcds")
+    assert len(tpcds_kudu_queries) == EXPECTED_TPCDS_QUERIES_COUNT
     for query in tpcds_kudu_queries:
       query.db_name = args.tpcds_kudu_db
     queries.extend(tpcds_kudu_queries)
@@ -2049,10 +2065,17 @@ def main():
 
     # Remove any queries that would use "too many" resources. This way a larger number
     # of queries will run concurrently.
+    if query.required_mem_mb_without_spilling is not None and \
+        query.required_mem_mb_without_spilling / float(impala.min_impalad_mem_mb) \
+            > args.filter_query_mem_ratio:
+      LOG.debug(
+          "Filtering non-spilling query that exceeds "
+          "--filter-query-mem-ratio: " + query.sql)
+      query.required_mem_mb_without_spilling = None
     if query.required_mem_mb_with_spilling is None \
-        or query.required_mem_mb_with_spilling / impala.min_impalad_mem_mb \
+        or query.required_mem_mb_with_spilling / float(impala.min_impalad_mem_mb) \
             > args.filter_query_mem_ratio:
-      LOG.debug("Filtered query due to mem ratio option: " + query.sql)
+      LOG.debug("Filtering query that exceeds --filter-query-mem-ratio: " + query.sql)
       del queries[idx]
 
   # Remove queries that have a nested loop join in the plan.