You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ph...@apache.org on 2018/04/06 16:17:24 UTC

[1/7] impala git commit: IMPALA-6801: Cleanup request_pool

Repository: impala
Updated Branches:
  refs/heads/master 4d6b07f0e -> 2896b8d12


IMPALA-6801: Cleanup request_pool

Eliminate the copy of this field in the QuerySchedule. Instead,
just set it directly in the TQueryCtx early on. Then, the
TQueryCtx doesn't need to be copied by the coordinator.

Change-Id: I3bee843ef7d72ba14d487fdb56e55fa3660aafd3
Reviewed-on: http://gerrit.cloudera.org:8080/9909
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/58ee5eca
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/58ee5eca
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/58ee5eca

Branch: refs/heads/master
Commit: 58ee5ecadf2b389fe47dc87085a7d63b4c364e26
Parents: 4d6b07f
Author: Dan Hecht <dh...@cloudera.com>
Authored: Tue Apr 3 10:42:39 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Apr 5 03:47:54 2018 +0000

----------------------------------------------------------------------
 be/src/runtime/coordinator.cc              | 12 +++++-------
 be/src/runtime/coordinator.h               |  3 ---
 be/src/scheduling/query-schedule.h         |  8 +++-----
 be/src/scheduling/scheduler.cc             |  9 ++++++---
 be/src/service/client-request-state.h      |  7 +++----
 be/src/service/impala-beeswax-server.cc    |  2 +-
 be/src/service/impala-hs2-server.cc        |  2 +-
 be/src/service/impala-http-handler.cc      |  2 +-
 be/src/service/impala-server.cc            |  9 +++++++--
 be/src/service/impala-server.h             | 16 ++++++++--------
 common/thrift/ImpalaInternalService.thrift |  4 ++--
 11 files changed, 37 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/58ee5eca/be/src/runtime/coordinator.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.cc b/be/src/runtime/coordinator.cc
index e6b3bca..5a3de5d 100644
--- a/be/src/runtime/coordinator.cc
+++ b/be/src/runtime/coordinator.cc
@@ -90,11 +90,9 @@ Status Coordinator::Exec() {
   const TQueryExecRequest& request = schedule_.request();
   DCHECK(request.plan_exec_info.size() > 0);
 
-  VLOG_QUERY << "Exec() query_id=" << schedule_.query_id()
+  VLOG_QUERY << "Exec() query_id=" << query_id()
              << " stmt=" << request.query_ctx.client_request.stmt;
   stmt_type_ = request.stmt_type;
-  query_ctx_ = request.query_ctx;
-  query_ctx_.__set_request_pool(schedule_.request_pool());
 
   query_profile_ =
       RuntimeProfile::Create(obj_pool(), "Execution Profile " + PrintId(query_id()));
@@ -117,7 +115,7 @@ Status Coordinator::Exec() {
   // TODO: revisit this, it may not be true anymore
   lock_guard<mutex> l(lock_);
 
-  query_state_ = ExecEnv::GetInstance()->query_exec_mgr()->CreateQueryState(query_ctx_);
+  query_state_ = ExecEnv::GetInstance()->query_exec_mgr()->CreateQueryState(query_ctx());
   query_state_->AcquireExecResourceRefcount(); // Decremented in ReleaseExecResources().
   filter_mem_tracker_ = query_state_->obj_pool()->Add(new MemTracker(
       -1, "Runtime Filter (Coordinator)", query_state_->query_mem_tracker(), false));
@@ -355,7 +353,7 @@ void Coordinator::StartBackendExec() {
   for (BackendState* backend_state: backend_states_) {
     ExecEnv::GetInstance()->exec_rpc_thread_pool()->Offer(
         [backend_state, this, &debug_options]() {
-          backend_state->Exec(query_ctx_, debug_options, filter_routing_table_,
+          backend_state->Exec(query_ctx(), debug_options, filter_routing_table_,
             exec_complete_barrier_.get());
         });
   }
@@ -783,7 +781,7 @@ void Coordinator::ReleaseAdmissionControlResources() {
 void Coordinator::ReleaseAdmissionControlResourcesLocked() {
   if (released_admission_control_resources_) return;
   LOG(INFO) << "Release admission control resources for query_id="
-            << PrintId(query_ctx_.query_id);
+            << PrintId(query_ctx().query_id);
   AdmissionController* admission_controller =
       ExecEnv::GetInstance()->admission_controller();
   if (admission_controller != nullptr) admission_controller->ReleaseQuery(schedule_);
@@ -941,7 +939,7 @@ void Coordinator::FilterState::Disable(MemTracker* tracker) {
 }
 
 const TUniqueId& Coordinator::query_id() const {
-  return query_ctx_.query_id;
+  return query_ctx().query_id;
 }
 
 void Coordinator::GetTExecSummary(TExecSummary* exec_summary) {

http://git-wip-us.apache.org/repos/asf/impala/blob/58ee5eca/be/src/runtime/coordinator.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.h b/be/src/runtime/coordinator.h
index 6665c08..8e556ec 100644
--- a/be/src/runtime/coordinator.h
+++ b/be/src/runtime/coordinator.h
@@ -187,9 +187,6 @@ class Coordinator { // NOLINT: The member variables could be re-ordered to save
   /// owned by the ClientRequestState that owns this coordinator
   const QuerySchedule& schedule_;
 
-  /// copied from TQueryExecRequest; constant across all fragments
-  TQueryCtx query_ctx_;
-
   /// copied from TQueryExecRequest, governs when to call ReportQuerySummary
   TStmtType::type stmt_type_;
 

http://git-wip-us.apache.org/repos/asf/impala/blob/58ee5eca/be/src/scheduling/query-schedule.h
----------------------------------------------------------------------
diff --git a/be/src/scheduling/query-schedule.h b/be/src/scheduling/query-schedule.h
index 28ccd6f..be219a8 100644
--- a/be/src/scheduling/query-schedule.h
+++ b/be/src/scheduling/query-schedule.h
@@ -143,8 +143,9 @@ class QuerySchedule {
   const TUniqueId& query_id() const { return query_id_; }
   const TQueryExecRequest& request() const { return request_; }
   const TQueryOptions& query_options() const { return query_options_; }
-  const std::string& request_pool() const { return request_pool_; }
-  void set_request_pool(const std::string& pool_name) { request_pool_ = pool_name; }
+
+  // Valid after Schedule() succeeds.
+  const std::string& request_pool() const { return request().query_ctx.request_pool; }
 
   /// Gets the estimated memory (bytes) per-node. Returns the user specified estimate
   /// (MEM_LIMIT query parameter) if provided or the estimate from planning if available,
@@ -250,9 +251,6 @@ class QuerySchedule {
   /// Used to generate consecutive fragment instance ids.
   TUniqueId next_instance_id_;
 
-  /// Request pool to which the request was submitted for admission.
-  std::string request_pool_;
-
   /// Indicates if the query has been admitted for execution.
   bool is_admitted_;
 

http://git-wip-us.apache.org/repos/asf/impala/blob/58ee5eca/be/src/scheduling/scheduler.cc
----------------------------------------------------------------------
diff --git a/be/src/scheduling/scheduler.cc b/be/src/scheduling/scheduler.cc
index 5a9d4bf..b091415 100644
--- a/be/src/scheduling/scheduler.cc
+++ b/be/src/scheduling/scheduler.cc
@@ -693,10 +693,13 @@ Status Scheduler::Schedule(QuerySchedule* schedule) {
 
   // TODO: Move to admission control, it doesn't need to be in the Scheduler.
   string resolved_pool;
+  // Re-resolve the pool name to propagate any resolution errors now that this request
+  // is known to require a valid pool.
   RETURN_IF_ERROR(request_pool_service_->ResolveRequestPool(
-      schedule->request().query_ctx, &resolved_pool));
-  schedule->set_request_pool(resolved_pool);
-  schedule->summary_profile()->AddInfoString("Request Pool", resolved_pool);
+          schedule->request().query_ctx, &resolved_pool));
+  // Resolved pool name should have been set in the TQueryCtx and shouldn't have changed.
+  DCHECK_EQ(resolved_pool, schedule->request_pool());
+  schedule->summary_profile()->AddInfoString("Request Pool", schedule->request_pool());
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/58ee5eca/be/src/service/client-request-state.h
----------------------------------------------------------------------
diff --git a/be/src/service/client-request-state.h b/be/src/service/client-request-state.h
index 657f3de..05cd762 100644
--- a/be/src/service/client-request-state.h
+++ b/be/src/service/client-request-state.h
@@ -159,12 +159,11 @@ class ClientRequestState {
   Coordinator* coord() const { return coord_.get(); }
   QuerySchedule* schedule() { return schedule_.get(); }
 
-  /// Resource pool associated with this query, or an empty string if the schedule has not
-  /// been created and had the pool set yet, or this StmtType doesn't go through admission
-  /// control.
+  /// Admission control resource pool associated with this query.
   std::string request_pool() const {
-    return schedule_ == nullptr ? "" : schedule_->request_pool();
+    return query_ctx_.__isset.request_pool ? query_ctx_.request_pool : "";
   }
+
   int num_rows_fetched() const { return num_rows_fetched_; }
   void set_fetched_rows() { fetched_rows_ = true; }
   bool fetched_rows() const { return fetched_rows_; }

http://git-wip-us.apache.org/repos/asf/impala/blob/58ee5eca/be/src/service/impala-beeswax-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-beeswax-server.cc b/be/src/service/impala-beeswax-server.cc
index c441285..4875adb 100644
--- a/be/src/service/impala-beeswax-server.cc
+++ b/be/src/service/impala-beeswax-server.cc
@@ -458,7 +458,7 @@ Status ImpalaServer::QueryToTQueryContext(const Query& query,
 
   // Only query options not set in the session or confOverlay can be overridden by the
   // pool options.
-  AddPoolQueryOptions(query_ctx, ~set_query_options_mask);
+  AddPoolConfiguration(query_ctx, ~set_query_options_mask);
   VLOG_QUERY << "TClientRequest.queryOptions: "
              << ThriftDebugString(query_ctx->client_request.query_options);
   return Status::OK();

http://git-wip-us.apache.org/repos/asf/impala/blob/58ee5eca/be/src/service/impala-hs2-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-hs2-server.cc b/be/src/service/impala-hs2-server.cc
index 80ace87..765fccf 100644
--- a/be/src/service/impala-hs2-server.cc
+++ b/be/src/service/impala-hs2-server.cc
@@ -256,7 +256,7 @@ Status ImpalaServer::TExecuteStatementReqToTQueryContext(
   }
   // Only query options not set in the session or confOverlay can be overridden by the
   // pool options.
-  AddPoolQueryOptions(query_ctx, ~set_query_options_mask);
+  AddPoolConfiguration(query_ctx, ~set_query_options_mask);
   VLOG_QUERY << "TClientRequest.queryOptions: "
              << ThriftDebugString(query_ctx->client_request.query_options);
   return Status::OK();

http://git-wip-us.apache.org/repos/asf/impala/blob/58ee5eca/be/src/service/impala-http-handler.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-http-handler.cc b/be/src/service/impala-http-handler.cc
index 3841bfe..9b8d597 100644
--- a/be/src/service/impala-http-handler.cc
+++ b/be/src/service/impala-http-handler.cc
@@ -361,7 +361,7 @@ void ImpalaHttpHandler::QueryStateToJson(const ImpalaServer::QueryStateRecord& r
   Value val_waiting_time(waiting_time_str.c_str(), document->GetAllocator());
   value->AddMember("waiting_time", val_waiting_time, document->GetAllocator());
 
-  Value resource_pool(record.request_pool.c_str(), document->GetAllocator());
+  Value resource_pool(record.resource_pool.c_str(), document->GetAllocator());
   value->AddMember("resource_pool", resource_pool, document->GetAllocator());
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/58ee5eca/be/src/service/impala-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index 5eeb52a..f6cd6e5 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -782,7 +782,7 @@ void ImpalaServer::ArchiveQuery(const ClientRequestState& query) {
 
 ImpalaServer::~ImpalaServer() {}
 
-void ImpalaServer::AddPoolQueryOptions(TQueryCtx* ctx,
+void ImpalaServer::AddPoolConfiguration(TQueryCtx* ctx,
     const QueryOptionsMask& override_options_mask) {
   // Errors are not returned and are only logged (at level 2) because some incoming
   // requests are not expected to be mapped to a pool and will not have query options,
@@ -796,6 +796,7 @@ void ImpalaServer::AddPoolQueryOptions(TQueryCtx* ctx,
              << " ResolveRequestPool status: " << status.GetDetail();
     return;
   }
+  ctx->__set_request_pool(resolved_pool);
 
   TPoolConfig config;
   status = exec_env_->request_pool_service()->GetPoolConfig(resolved_pool, &config);
@@ -1717,7 +1718,11 @@ ImpalaServer::QueryStateRecord::QueryStateRecord(const ClientRequestState& reque
   }
   all_rows_returned = request_state.eos();
   last_active_time_ms = request_state.last_active_ms();
-  request_pool = request_state.request_pool();
+  // For statement types other than QUERY/DML, show an empty string for resource pool
+  // to indicate that they are not subjected to admission control.
+  if (stmt_type == TStmtType::QUERY || stmt_type == TStmtType::DML) {
+    resource_pool = request_state.request_pool();
+  }
   user_has_profile_access = request_state.user_has_profile_access();
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/58ee5eca/be/src/service/impala-server.h
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.h b/be/src/service/impala-server.h
index 2af89fd..fb3f261 100644
--- a/be/src/service/impala-server.h
+++ b/be/src/service/impala-server.h
@@ -706,9 +706,9 @@ class ImpalaServer : public ImpalaServiceIf,
     // The most recent time this query was actively being processed, in Unix milliseconds.
     int64_t last_active_time_ms;
 
-    /// Request pool to which the request was submitted for admission, or an empty string
-    /// if this request doesn't have a pool.
-    std::string request_pool;
+    /// Resource pool to which the request was submitted for admission, or an empty
+    /// string if this request doesn't go through admission control.
+    std::string resource_pool;
 
     /// Initialise from an exec_state. If copy_profile is true, print the query
     /// profile to a string and copy that into this.profile (which is expensive),
@@ -789,11 +789,11 @@ class ImpalaServer : public ImpalaServiceIf,
   void CancelFromThreadPool(uint32_t thread_id,
       const CancellationWork& cancellation_work);
 
-  /// Helper method to add any pool query options to the query_ctx. Must be called before
-  /// ExecuteInternal() at which point the TQueryCtx is const and cannot be mutated.
-  /// override_options_mask indicates which query options can be overridden by the pool
-  /// default query options.
-  void AddPoolQueryOptions(TQueryCtx* query_ctx,
+  /// Helper method to add the pool name and query options to the query_ctx. Must be
+  /// called before ExecuteInternal() at which point the TQueryCtx is const and cannot
+  /// be mutated. override_options_mask indicates which query options can be overridden
+  /// by the pool default query options.
+  void AddPoolConfiguration(TQueryCtx* query_ctx,
       const QueryOptionsMask& override_options_mask);
 
   /// Register timeout value upon opening a new session. This will wake up

http://git-wip-us.apache.org/repos/asf/impala/blob/58ee5eca/common/thrift/ImpalaInternalService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index dc37fc2..e96ed87 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -389,8 +389,8 @@ struct TQueryCtx {
   // List of tables with scan ranges that map to blocks with missing disk IDs.
   15: optional list<CatalogObjects.TTableName> tables_missing_diskids
 
-  // The pool to which this request has been submitted. Used to update pool statistics
-  // for admission control.
+  // The resolved admission control pool to which this request will be submitted. May be
+  // unset for statements that aren't subjected to admission control (e.g. USE, SET).
   16: optional string request_pool
 
   // String containing a timestamp (in UTC) set as the query submission time. It


[3/7] impala git commit: IMPALA-6389: Make '\0' delimited text files work

Posted by ph...@apache.org.
IMPALA-6389: Make '\0' delimited text files work

Initially I didn't want to fully implement this, as the metadata
for these tables can't even be fully stored in Postgres; however
after digging into some older documentation, it appears that the
ASCII NUL character actually has been used as a field separator
in various vendors CSV implementation.

Therefore, this patch attempts to make things as non-broken as
possible and allows \0 as a field or tuple delimiter.  Collection
column delimiters are not allowed to be \0, as they genuinly may
not exist and we don't want to force special escaping on an
arbitrary character.  Note that the field delimiter must be distinct
from the tuple delimiter when they both exist; if it is not, the
effect will be that there is no field delimiter (this is actually
possible with single column tables).

Testing: Created a zero delimited table as described in the JIRA,
using MySQL backed Hive metastore; ran select * from tab_separated
on the table, updated the unit test.  Additionally, build ASAN
and ran the unit test.

Change-Id: I2190c57681f29f34ee1eb393e30dfdda5839098c
Reviewed-on: http://gerrit.cloudera.org:8080/9857
Tested-by: Impala Public Jenkins
Reviewed-by: Zach Amsden <za...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/380e17aa
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/380e17aa
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/380e17aa

Branch: refs/heads/master
Commit: 380e17aa3cf678d4502245f12d8a77f58f4b8996
Parents: 8e5f923
Author: Zach Amsden <za...@cloudera.com>
Authored: Wed Mar 7 01:44:43 2018 +0000
Committer: Zach Amsden <za...@cloudera.com>
Committed: Thu Apr 5 18:42:03 2018 +0000

----------------------------------------------------------------------
 be/src/exec/delimited-text-parser-test.cc  | 61 +++++++++++++++-----
 be/src/exec/delimited-text-parser.cc       | 74 +++++++++++++++++--------
 be/src/exec/delimited-text-parser.h        | 43 +++++++++-----
 be/src/exec/delimited-text-parser.inline.h | 70 ++++++++++++-----------
 be/src/exec/hdfs-sequence-scanner.cc       |  2 +-
 be/src/exec/hdfs-sequence-scanner.h        |  3 +-
 be/src/exec/hdfs-text-scanner.cc           |  2 +-
 be/src/exec/hdfs-text-scanner.h            |  3 +-
 8 files changed, 172 insertions(+), 86 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/380e17aa/be/src/exec/delimited-text-parser-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/delimited-text-parser-test.cc b/be/src/exec/delimited-text-parser-test.cc
index 3156b36..76d9444 100644
--- a/be/src/exec/delimited-text-parser-test.cc
+++ b/be/src/exec/delimited-text-parser-test.cc
@@ -24,7 +24,7 @@
 
 namespace impala {
 
-void Validate(DelimitedTextParser* parser, const string& data,
+void Validate(TupleDelimitedTextParser* parser, const string& data,
     int expected_offset, char tuple_delim, int expected_num_tuples,
     int expected_num_fields) {
   parser->ParserReset();
@@ -72,8 +72,8 @@ TEST(DelimitedTextParser, Basic) {
   bool is_materialized_col[NUM_COLS];
   for (int i = 0; i < NUM_COLS; ++i) is_materialized_col[i] = true;
 
-  DelimitedTextParser no_escape_parser(NUM_COLS, 0, is_materialized_col,
-                                       TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM);
+  TupleDelimitedTextParser no_escape_parser(NUM_COLS, 0, is_materialized_col,
+                                            TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM);
   // Note that only complete tuples "count"
   Validate(&no_escape_parser, "no_delims", -1, TUPLE_DELIM, 0, 0);
   Validate(&no_escape_parser, "abc||abc", 4, TUPLE_DELIM, 1, 1);
@@ -81,9 +81,9 @@ TEST(DelimitedTextParser, Basic) {
   Validate(&no_escape_parser, "a|bcd", 2, TUPLE_DELIM, 0, 0);
 
   // Test with escape char
-  DelimitedTextParser escape_parser(NUM_COLS, 0, is_materialized_col,
-                                    TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM,
-                                    ESCAPE_CHAR);
+  TupleDelimitedTextParser escape_parser(NUM_COLS, 0, is_materialized_col,
+                                         TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM,
+                                         ESCAPE_CHAR);
   Validate(&escape_parser, "a@|a|bcd", 5, TUPLE_DELIM, 0, 0);
   Validate(&escape_parser, "a@@|a|bcd", 4, TUPLE_DELIM, 1, 1);
   Validate(&escape_parser, "a@@@|a|bcd", 7, TUPLE_DELIM, 0, 0);
@@ -127,8 +127,8 @@ TEST(DelimitedTextParser, Fields) {
   bool is_materialized_col[NUM_COLS];
   for (int i = 0; i < NUM_COLS; ++i) is_materialized_col[i] = true;
 
-  DelimitedTextParser no_escape_parser(NUM_COLS, 0, is_materialized_col,
-                                       TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM);
+  TupleDelimitedTextParser no_escape_parser(NUM_COLS, 0, is_materialized_col,
+                                            TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM);
 
   Validate(&no_escape_parser, "a,b|c,d|e,f", 4, TUPLE_DELIM, 1, 3);
   Validate(&no_escape_parser, "b|c,d|e,f", 2, TUPLE_DELIM, 1, 3);
@@ -137,9 +137,9 @@ TEST(DelimitedTextParser, Fields) {
   const string str10("a,\0|c,d|e", 9);
   Validate(&no_escape_parser, str10, 4, TUPLE_DELIM, 1, 2);
 
-  DelimitedTextParser escape_parser(NUM_COLS, 0, is_materialized_col,
-                                    TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM,
-                                    ESCAPE_CHAR);
+  TupleDelimitedTextParser escape_parser(NUM_COLS, 0, is_materialized_col,
+                                         TUPLE_DELIM, FIELD_DELIM, COLLECTION_DELIM,
+                                         ESCAPE_CHAR);
 
   Validate(&escape_parser, "a,b|c,d|e,f", 4, TUPLE_DELIM, 1, 3);
   Validate(&escape_parser, "a,@|c|e,f", 6, TUPLE_DELIM, 0, 1);
@@ -148,14 +148,21 @@ TEST(DelimitedTextParser, Fields) {
 
 TEST(DelimitedTextParser, SpecialDelimiters) {
   const char TUPLE_DELIM = '\n'; // implies '\r' and "\r\n" are also delimiters
+  const char NUL_DELIM = '\0';
   const int NUM_COLS = 1;
+  const int MAX_COLS = 2;
 
-  bool is_materialized_col[NUM_COLS];
-  for (int i = 0; i < NUM_COLS; ++i) is_materialized_col[i] = true;
+  bool is_materialized_col[MAX_COLS];
+  for (int i = 0; i < MAX_COLS; ++i) is_materialized_col[i] = true;
 
-  DelimitedTextParser tuple_delim_parser(NUM_COLS, 0, is_materialized_col,
+  TupleDelimitedTextParser tuple_delim_parser(NUM_COLS, 0, is_materialized_col,
       TUPLE_DELIM);
 
+  TupleDelimitedTextParser nul_tuple_parser(NUM_COLS, 0, is_materialized_col, NUL_DELIM);
+
+  TupleDelimitedTextParser nul_field_parser(MAX_COLS, 0, is_materialized_col,
+                                            TUPLE_DELIM, NUL_DELIM);
+
   // Non-SSE case
   Validate(&tuple_delim_parser, "A\r\nB", 3, TUPLE_DELIM, 0, 0);
   Validate(&tuple_delim_parser, "A\rB", 2, TUPLE_DELIM, 0, 0);
@@ -165,6 +172,16 @@ TEST(DelimitedTextParser, SpecialDelimiters) {
   Validate(&tuple_delim_parser, "A\rB\nC\r\nD", 2, TUPLE_DELIM, 2, 2);
   Validate(&tuple_delim_parser, "\r\r\n\n", 1, TUPLE_DELIM, 2, 2);
 
+  // NUL tuple delimiter; no field delimiter
+  const string nul1("\0\0\0", 3);
+  const string nul2("AAA\0BBB\0", 8);
+  const string nul3("\n\0\r\0\r\n\0", 7);
+  const string nul4("\n\0\r\0\r\n", 6);
+  Validate(&nul_tuple_parser, nul1, 1, NUL_DELIM, 2, 2);
+  Validate(&nul_tuple_parser, nul2, 4, NUL_DELIM, 1, 1);
+  Validate(&nul_tuple_parser, nul3, 2, NUL_DELIM, 2, 2);
+  Validate(&nul_tuple_parser, nul4, 2, NUL_DELIM, 1, 1);
+
   // SSE case
   string data = "\rAAAAAAAAAAAAAAA";
   DCHECK_EQ(data.size(), SSEUtil::CHARS_PER_128_BIT_REGISTER);
@@ -178,6 +195,22 @@ TEST(DelimitedTextParser, SpecialDelimiters) {
   data = "\r\nAAA\n\r\r\nAAAAAAA";
   DCHECK_EQ(data.size(), SSEUtil::CHARS_PER_128_BIT_REGISTER);
   Validate(&tuple_delim_parser, data, 2, TUPLE_DELIM, 3, 3);
+
+  // NUL SSE case
+  const string nulsse1("AAAAA\0AAAAAAAAAAA\0AAAAAAAAAAAA\0\0", 32);
+  const string nulsse2("AAAAA\0AAAAAAAAAAA\0AAAAAAAAAAAA\0A", 32);
+  const string nulsse3("AAA\0BBBbbbbbbbbbbbbbbbbbbb\0cccc,ddd\0", 36);
+  const string nulsse4("AAA\0BBBbbbbbbbbbbbbbbbbbbb\0cccc,dddd", 36);
+  Validate(&nul_tuple_parser, nulsse1, 6, NUL_DELIM, 3, 3);
+  Validate(&nul_tuple_parser, nulsse2, 6, NUL_DELIM, 2, 2);
+  Validate(&nul_tuple_parser, nulsse3, 4, NUL_DELIM, 2, 2);
+  Validate(&nul_tuple_parser, nulsse4, 4, NUL_DELIM, 1, 1);
+
+  // NUL Field delimiters
+  const string field1("\na\0b\0c\n", 7);
+  const string field2("aaaa\na\0b\0c\naaaaa\0b\na\0b\0c\n", 25);
+  Validate(&nul_field_parser, field1, 1, TUPLE_DELIM, 1, 2);
+  Validate(&nul_field_parser, field2, 5, TUPLE_DELIM, 3, 6);
 }
 
 // TODO: expand test for other delimited text parser functions/cases.

http://git-wip-us.apache.org/repos/asf/impala/blob/380e17aa/be/src/exec/delimited-text-parser.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/delimited-text-parser.cc b/be/src/exec/delimited-text-parser.cc
index 18fcde1..7db65fd 100644
--- a/be/src/exec/delimited-text-parser.cc
+++ b/be/src/exec/delimited-text-parser.cc
@@ -24,7 +24,8 @@
 
 using namespace impala;
 
-DelimitedTextParser::DelimitedTextParser(
+template<bool DELIMITED_TUPLES>
+DelimitedTextParser<DELIMITED_TUPLES>::DelimitedTextParser(
     int num_cols, int num_partition_keys, const bool* is_materialized_col,
     char tuple_delim, char field_delim, char collection_item_delim, char escape_char)
     : is_materialized_col_(is_materialized_col),
@@ -72,7 +73,7 @@ DelimitedTextParser::DelimitedTextParser(
     memset(low_mask_, 0, sizeof(low_mask_));
   }
 
-  if (tuple_delim != '\0') {
+  if (DELIMITED_TUPLES) {
     search_chars[num_delims_++] = tuple_delim_;
     ++num_tuple_delims_;
     // Hive will treats \r (^M) as an alternate tuple delimiter, but \r\n is a
@@ -82,29 +83,43 @@ DelimitedTextParser::DelimitedTextParser(
       ++num_tuple_delims_;
     }
     xmm_tuple_search_ = _mm_loadu_si128(reinterpret_cast<__m128i*>(search_chars));
-  }
-
-  if (field_delim != '\0' || collection_item_delim != '\0') {
+    if (field_delim_ != tuple_delim_) search_chars[num_delims_++] = field_delim_;
+  } else {
     search_chars[num_delims_++] = field_delim_;
-    search_chars[num_delims_++] = collection_item_delim_;
   }
 
+  if (collection_item_delim != '\0') search_chars[num_delims_++] = collection_item_delim_;
+
   DCHECK_GT(num_delims_, 0);
   xmm_delim_search_ = _mm_loadu_si128(reinterpret_cast<__m128i*>(search_chars));
 
   ParserReset();
 }
 
-void DelimitedTextParser::ParserReset() {
+template
+DelimitedTextParser<true>::DelimitedTextParser(
+    int num_cols, int num_partition_keys, const bool* is_materialized_col,
+    char tuple_delim, char field_delim, char collection_item_delim, char escape_char);
+
+template
+DelimitedTextParser<false>::DelimitedTextParser(
+    int num_cols, int num_partition_keys, const bool* is_materialized_col,
+    char tuple_delim, char field_delim, char collection_item_delim, char escape_char);
+
+template<bool DELIMITED_TUPLES>
+void DelimitedTextParser<DELIMITED_TUPLES>::ParserReset() {
   current_column_has_escape_ = false;
   last_char_is_escape_ = false;
   last_row_delim_offset_ = -1;
   column_idx_ = num_partition_keys_;
 }
 
+template void DelimitedTextParser<true>::ParserReset();
+
 // Parsing raw csv data into FieldLocation descriptors.
-Status DelimitedTextParser::ParseFieldLocations(int max_tuples, int64_t remaining_len,
-    char** byte_buffer_ptr, char** row_end_locations,
+template<bool DELIMITED_TUPLES>
+Status DelimitedTextParser<DELIMITED_TUPLES>::ParseFieldLocations(int max_tuples,
+    int64_t remaining_len, char** byte_buffer_ptr, char** row_end_locations,
     FieldLocation* field_locations,
     int* num_tuples, int* num_fields, char** next_column_start) {
   // Start of this batch.
@@ -133,10 +148,10 @@ Status DelimitedTextParser::ParseFieldLocations(int max_tuples, int64_t remainin
   while (remaining_len > 0) {
     bool new_tuple = false;
     bool new_col = false;
-    unfinished_tuple_ = true;
+    if (DELIMITED_TUPLES) unfinished_tuple_ = true;
 
     if (!last_char_is_escape_) {
-      if (tuple_delim_ != '\0' && (**byte_buffer_ptr == tuple_delim_ ||
+      if (DELIMITED_TUPLES && (**byte_buffer_ptr == tuple_delim_ ||
            (tuple_delim_ == '\n' && **byte_buffer_ptr == '\r'))) {
         new_tuple = true;
         new_col = true;
@@ -166,6 +181,7 @@ Status DelimitedTextParser::ParseFieldLocations(int max_tuples, int64_t remainin
         row_end_locations[*num_tuples] = *byte_buffer_ptr;
         ++(*num_tuples);
       }
+      DCHECK(DELIMITED_TUPLES);
       unfinished_tuple_ = false;
       last_row_delim_offset_ = **byte_buffer_ptr == '\r' ? remaining_len - 1 : -1;
       if (*num_tuples == max_tuples) {
@@ -185,7 +201,7 @@ Status DelimitedTextParser::ParseFieldLocations(int max_tuples, int64_t remainin
 
   // For formats that store the length of the row, the row is not delimited:
   // e.g. Sequence files.
-  if (tuple_delim_ == '\0') {
+  if (!DELIMITED_TUPLES) {
     DCHECK_EQ(remaining_len, 0);
     RETURN_IF_ERROR(AddColumn<true>(*byte_buffer_ptr - *next_column_start,
         next_column_start, num_fields, field_locations));
@@ -193,18 +209,30 @@ Status DelimitedTextParser::ParseFieldLocations(int max_tuples, int64_t remainin
     DCHECK(status.ok());
     column_idx_ = num_partition_keys_;
     ++(*num_tuples);
-    unfinished_tuple_ = false;
   }
   return Status::OK();
 }
 
-// Find the first instance of the tuple delimiter. This will find the start of the first
-// full tuple in buffer by looking for the end of the previous tuple.
-int64_t DelimitedTextParser::FindFirstInstance(const char* buffer, int64_t len) {
+template
+Status DelimitedTextParser<true>::ParseFieldLocations(int max_tuples,
+    int64_t remaining_len, char** byte_buffer_ptr, char** row_end_locations,
+    FieldLocation* field_locations,
+    int* num_tuples, int* num_fields, char** next_column_start);
+
+template
+Status DelimitedTextParser<false>::ParseFieldLocations(int max_tuples,
+    int64_t remaining_len, char** byte_buffer_ptr, char** row_end_locations,
+    FieldLocation* field_locations,
+    int* num_tuples, int* num_fields, char** next_column_start);
+
+template<bool DELIMITED_TUPLES>
+int64_t DelimitedTextParser<DELIMITED_TUPLES>::FindFirstInstance(const char* buffer,
+    int64_t len) {
   int64_t tuple_start = 0;
   const char* buffer_start = buffer;
   bool found = false;
 
+  DCHECK(DELIMITED_TUPLES);
   // If the last char in the previous buffer was \r then either return the start of
   // this buffer or skip a \n at the beginning of the buffer.
   if (last_row_delim_offset_ != -1) {
@@ -226,13 +254,10 @@ restart:
       int tuple_mask = _mm_extract_epi16(xmm_tuple_mask, 0);
       if (tuple_mask != 0) {
         found = true;
-        for (int i = 0; i < SSEUtil::CHARS_PER_128_BIT_REGISTER; ++i) {
-          if ((tuple_mask & SSEUtil::SSE_BITMASK[i]) != 0) {
-            tuple_start += i + 1;
-            buffer += i + 1;
-            break;
-          }
-        }
+        // Find first set bit (1-based)
+        int i = ffs(tuple_mask);
+        tuple_start += i;
+        buffer += i;
         break;
       }
       tuple_start += SSEUtil::CHARS_PER_128_BIT_REGISTER;
@@ -295,3 +320,6 @@ restart:
   }
   return tuple_start;
 }
+
+template
+int64_t DelimitedTextParser<true>::FindFirstInstance(const char* buffer, int64_t len);

http://git-wip-us.apache.org/repos/asf/impala/blob/380e17aa/be/src/exec/delimited-text-parser.h
----------------------------------------------------------------------
diff --git a/be/src/exec/delimited-text-parser.h b/be/src/exec/delimited-text-parser.h
index b966081..9b89127 100644
--- a/be/src/exec/delimited-text-parser.h
+++ b/be/src/exec/delimited-text-parser.h
@@ -25,22 +25,27 @@
 
 namespace impala {
 
+template <bool DELIMITED_TUPLES>
 class DelimitedTextParser {
  public:
 
   /// The Delimited Text Parser parses text rows that are delimited by specific
   /// characters:
-  ///   tuple_delim: delimits tuples
+  ///   tuple_delim: delimits tuples.  Only used if DELIMITED_TUPLES is true.
   ///   field_delim: delimits fields
   ///   collection_item_delim: delimits collection items
   ///   escape_char: escape delimiters, make them part of the data.
-  //
+  ///
+  /// If the template parameter DELIMITED_TUPLES is false there is no support
+  /// for tuple delimiters and we do not need to search for them.  Any value
+  /// may be passed for tuple_delim, as it is ignored.
+  ///
   /// 'num_cols' is the total number of columns including partition keys.
-  //
+  ///
   /// 'is_materialized_col' should be initialized to an array of length 'num_cols', with
   /// is_materialized_col[i] = <true if column i should be materialized, false otherwise>
   /// Owned by caller.
-  //
+  ///
   /// The main method is ParseData which fills in a vector of pointers and lengths to the
   /// fields.  It also can handle an escape character which masks a tuple or field
   /// delimiter that occurs in the data.
@@ -91,14 +96,14 @@ class DelimitedTextParser {
   /// This function is used to parse sequence file records which do not need to
   /// parse for tuple delimiters. Returns an error status if any column exceeds the
   /// size limit. See AddColumn() for details.
-  template <bool process_escapes>
+  /// This function is disabled for non-sequence file parsing.
+  template <bool PROCESS_ESCAPES>
   Status ParseSingleTuple(int64_t len, char* buffer, FieldLocation* field_locations,
       int* num_fields);
 
   /// FindFirstInstance returns the position after the first non-escaped tuple
   /// delimiter from the starting offset.
   /// Used to find the start of a tuple if jumping into the middle of a text file.
-  /// Also used to find the sync marker for Sequenced and RC files.
   /// If no tuple delimiter is found within the buffer, return -1;
   int64_t FindFirstInstance(const char* buffer, int64_t len);
 
@@ -119,13 +124,16 @@ class DelimitedTextParser {
   /// by the number fields added.
   /// 'field_locations' will be updated with the start and length of the fields.
   /// Returns an error status if 'len' exceeds the size limit specified in AddColumn().
-  template <bool process_escapes>
+  template <bool PROCESS_ESCAPES>
   Status FillColumns(int64_t len, char** last_column, int* num_fields,
       impala::FieldLocation* field_locations);
 
   /// Return true if we have not seen a tuple delimiter for the current tuple being
   /// parsed (i.e., the last byte read was not a tuple delimiter).
-  bool HasUnfinishedTuple() { return unfinished_tuple_; }
+  bool HasUnfinishedTuple() {
+    DCHECK(DELIMITED_TUPLES);
+    return unfinished_tuple_;
+  }
 
  private:
   /// Initialize the parser state.
@@ -133,7 +141,7 @@ class DelimitedTextParser {
 
   /// Helper routine to add a column to the field_locations vector.
   /// Template parameter:
-  ///   process_escapes -- if true the the column may have escape characters
+  ///   PROCESS_ESCAPES -- if true the the column may have escape characters
   ///                      and the negative of the len will be stored.
   ///   len: length of the current column. The length of a column must fit in a 32-bit
   ///        signed integer (i.e. <= 2147483647 bytes). If a column is larger than that,
@@ -144,23 +152,29 @@ class DelimitedTextParser {
   /// Output:
   ///   field_locations: updated with start and length of current field.
   /// Return an error status if 'len' exceeds the size limit specified above.
-  template <bool process_escapes>
+  template <bool PROCESS_ESCAPES>
   Status AddColumn(int64_t len, char** next_column_start, int* num_fields,
       FieldLocation* field_locations);
 
   /// Helper routine to parse delimited text using SSE instructions.
   /// Identical arguments as ParseFieldLocations.
-  /// If the template argument, 'process_escapes' is true, this function will handle
+  /// If the template argument, 'PROCESS_ESCAPES' is true, this function will handle
   /// escapes, otherwise, it will assume the text is unescaped.  By using templates,
   /// we can special case the un-escaped path for better performance.  The unescaped
   /// path is optimized away by the compiler. Returns an error status if the length
   /// of any column exceeds the size limit. See AddColumn() for details.
-  template <bool process_escapes>
+  template <bool PROCESS_ESCAPES>
   Status ParseSse(int max_tuples, int64_t* remaining_len,
       char** byte_buffer_ptr, char** row_end_locations_,
       FieldLocation* field_locations,
       int* num_tuples, int* num_fields, char** next_column_start);
 
+  bool IsFieldOrCollectionItemDelimiter(char c) {
+    return (!DELIMITED_TUPLES && c == field_delim_) ||
+      (DELIMITED_TUPLES && field_delim_ != tuple_delim_ && c == field_delim_) ||
+      (collection_item_delim_ != '\0' && c == collection_item_delim_);
+  }
+
   /// SSE(xmm) register containing the tuple search character(s).
   __m128i xmm_tuple_search_;
 
@@ -214,7 +228,7 @@ class DelimitedTextParser {
   /// Character delimiting collection items (to become slots).
   char collection_item_delim_;
 
-  /// Character delimiting tuples.
+  /// Character delimiting tuples.  Only used if DELIMITED_TUPLES is true.
   char tuple_delim_;
 
   /// Whether or not the current column has an escape character in it
@@ -228,5 +242,8 @@ class DelimitedTextParser {
   bool unfinished_tuple_;
 };
 
+using TupleDelimitedTextParser = DelimitedTextParser<true>;
+using SequenceDelimitedTextParser = DelimitedTextParser<false>;
+
 }// namespace impala
 #endif// IMPALA_EXEC_DELIMITED_TEXT_PARSER_H

http://git-wip-us.apache.org/repos/asf/impala/blob/380e17aa/be/src/exec/delimited-text-parser.inline.h
----------------------------------------------------------------------
diff --git a/be/src/exec/delimited-text-parser.inline.h b/be/src/exec/delimited-text-parser.inline.h
index 02fa132..9fe737e 100644
--- a/be/src/exec/delimited-text-parser.inline.h
+++ b/be/src/exec/delimited-text-parser.inline.h
@@ -52,9 +52,10 @@ inline void ProcessEscapeMask(uint16_t escape_mask, bool* last_char_is_escape,
   *delim_mask &= ~escape_mask;
 }
 
-template <bool process_escapes>
-inline Status DelimitedTextParser::AddColumn(int64_t len, char** next_column_start,
-    int* num_fields, FieldLocation* field_locations) {
+template <bool DELIMITED_TUPLES>
+template <bool PROCESS_ESCAPES>
+inline Status DelimitedTextParser<DELIMITED_TUPLES>::AddColumn(int64_t len,
+    char** next_column_start, int* num_fields, FieldLocation* field_locations) {
   if (UNLIKELY(!BitUtil::IsNonNegative32Bit(len))) {
     return Status(TErrorCode::TEXT_PARSER_TRUNCATED_COLUMN, len);
   }
@@ -62,26 +63,27 @@ inline Status DelimitedTextParser::AddColumn(int64_t len, char** next_column_sta
     // Found a column that needs to be parsed, write the start/len to 'field_locations'
     field_locations[*num_fields].start = *next_column_start;
     int64_t field_len = len;
-    if (process_escapes && current_column_has_escape_) {
+    if (PROCESS_ESCAPES && current_column_has_escape_) {
       field_len = -len;
     }
     field_locations[*num_fields].len = static_cast<int32_t>(field_len);
     ++(*num_fields);
   }
-  if (process_escapes) current_column_has_escape_ = false;
+  if (PROCESS_ESCAPES) current_column_has_escape_ = false;
   *next_column_start += len + 1;
   ++column_idx_;
   return Status::OK();
 }
 
-template <bool process_escapes>
-inline Status DelimitedTextParser::FillColumns(int64_t len, char** last_column,
-    int* num_fields, FieldLocation* field_locations) {
+template <bool DELIMITED_TUPLES>
+template <bool PROCESS_ESCAPES>
+inline Status DelimitedTextParser<DELIMITED_TUPLES>::FillColumns(int64_t len,
+    char** last_column, int* num_fields, FieldLocation* field_locations) {
   // Fill in any columns missing from the end of the tuple.
   char* dummy = NULL;
   if (last_column == NULL) last_column = &dummy;
   while (column_idx_ < num_cols_) {
-    RETURN_IF_ERROR(AddColumn<process_escapes>(len, last_column,
+    RETURN_IF_ERROR(AddColumn<PROCESS_ESCAPES>(len, last_column,
         num_fields, field_locations));
     // The rest of the columns will be null.
     last_column = &dummy;
@@ -103,8 +105,9 @@ inline Status DelimitedTextParser::FillColumns(int64_t len, char** last_column,
 ///  Needle   = 'abcd000000000000' (we're searching for any a's, b's, c's or d's)
 ///  Haystack = 'asdfghjklhjbdwwc' (the raw string)
 ///  Result   = '1010000000011001'
-template <bool process_escapes>
-inline Status DelimitedTextParser::ParseSse(int max_tuples,
+template <bool DELIMITED_TUPLES>
+template <bool PROCESS_ESCAPES>
+inline Status DelimitedTextParser<DELIMITED_TUPLES>::ParseSse(int max_tuples,
     int64_t* remaining_len, char** byte_buffer_ptr,
     char** row_end_locations, FieldLocation* field_locations,
     int* num_tuples, int* num_fields, char** next_column_start) {
@@ -146,7 +149,7 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
 
     uint16_t escape_mask = 0;
     // If the table does not use escape characters, skip processing for it.
-    if (process_escapes) {
+    if (PROCESS_ESCAPES) {
       DCHECK(escape_char_ != '\0');
       xmm_escape_mask = SSE4_cmpestrm<SSEUtil::STRCHR_MODE>(xmm_escape_search_, 1,
           xmm_buffer, SSEUtil::CHARS_PER_128_BIT_REGISTER);
@@ -156,8 +159,10 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
 
     char* last_char = *byte_buffer_ptr + 15;
     bool last_char_is_unescaped_delim = delim_mask >> 15;
-    unfinished_tuple_ = !(last_char_is_unescaped_delim &&
-        (*last_char == tuple_delim_ || (tuple_delim_ == '\n' && *last_char == '\r')));
+    if (DELIMITED_TUPLES) {
+      unfinished_tuple_ = !(last_char_is_unescaped_delim &&
+          (*last_char == tuple_delim_ || (tuple_delim_ == '\n' && *last_char == '\r')));
+    }
 
     int last_col_idx = 0;
     // Process all non-zero bits in the delim_mask from lsb->msb.  If a bit
@@ -170,7 +175,7 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
       // clear current bit
       delim_mask &= ~(SSEUtil::SSE_BITMASK[n]);
 
-      if (process_escapes) {
+      if (PROCESS_ESCAPES) {
         // Determine if there was an escape character between [last_col_idx, n]
         bool escaped = (escape_mask & low_mask_[last_col_idx] & high_mask_[n]) != 0;
         current_column_has_escape_ |= escaped;
@@ -179,13 +184,14 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
 
       char* delim_ptr = *byte_buffer_ptr + n;
 
-      if (*delim_ptr == field_delim_ || *delim_ptr == collection_item_delim_) {
-        RETURN_IF_ERROR(AddColumn<process_escapes>(delim_ptr - *next_column_start,
+      if (IsFieldOrCollectionItemDelimiter(*delim_ptr)) {
+        RETURN_IF_ERROR(AddColumn<PROCESS_ESCAPES>(delim_ptr - *next_column_start,
             next_column_start, num_fields, field_locations));
         continue;
       }
 
-      if (*delim_ptr == tuple_delim_ || (tuple_delim_ == '\n' && *delim_ptr == '\r')) {
+      if (DELIMITED_TUPLES &&
+          (*delim_ptr == tuple_delim_ || (tuple_delim_ == '\n' && *delim_ptr == '\r'))) {
         if (UNLIKELY(
                 last_row_delim_offset_ == *remaining_len - n && *delim_ptr == '\n')) {
           // If the row ended in \r\n then move the next start past the \n
@@ -193,7 +199,7 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
           last_row_delim_offset_ = -1;
           continue;
         }
-        RETURN_IF_ERROR(AddColumn<process_escapes>(delim_ptr - *next_column_start,
+        RETURN_IF_ERROR(AddColumn<PROCESS_ESCAPES>(delim_ptr - *next_column_start,
             next_column_start, num_fields, field_locations));
         Status status = FillColumns<false>(0, NULL, num_fields, field_locations);
         DCHECK(status.ok());
@@ -204,7 +210,7 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
         last_row_delim_offset_ = *delim_ptr == '\r' ? *remaining_len - n - 1 : -1;
         if (UNLIKELY(*num_tuples == max_tuples)) {
           (*byte_buffer_ptr) += (n + 1);
-          if (process_escapes) last_char_is_escape_ = false;
+          if (PROCESS_ESCAPES) last_char_is_escape_ = false;
           *remaining_len -= (n + 1);
           // If the last character we processed was \r then set the offset to 0
           // so that we will use it at the beginning of the next batch.
@@ -214,7 +220,7 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
       }
     }
 
-    if (process_escapes) {
+    if (PROCESS_ESCAPES) {
       // Determine if there was an escape character between (last_col_idx, 15)
       bool unprocessed_escape = escape_mask & low_mask_[last_col_idx] & high_mask_[15];
       current_column_has_escape_ |= unprocessed_escape;
@@ -227,9 +233,10 @@ inline Status DelimitedTextParser::ParseSse(int max_tuples,
 }
 
 /// Simplified version of ParseSSE which does not handle tuple delimiters.
-template <bool process_escapes>
-inline Status DelimitedTextParser::ParseSingleTuple(int64_t remaining_len, char* buffer,
-    FieldLocation* field_locations, int* num_fields) {
+template<>
+template <bool PROCESS_ESCAPES>
+inline Status DelimitedTextParser<false>::ParseSingleTuple(int64_t remaining_len,
+    char* buffer, FieldLocation* field_locations, int* num_fields) {
   char* next_column_start = buffer;
   __m128i xmm_buffer, xmm_delim_mask, xmm_escape_mask;
 
@@ -246,7 +253,7 @@ inline Status DelimitedTextParser::ParseSingleTuple(int64_t remaining_len, char*
 
       uint16_t escape_mask = 0;
       // If the table does not use escape characters, skip processing for it.
-      if (process_escapes) {
+      if (PROCESS_ESCAPES) {
         DCHECK(escape_char_ != '\0');
         xmm_escape_mask = SSE4_cmpestrm<SSEUtil::STRCHR_MODE>(xmm_escape_search_, 1,
             xmm_buffer, SSEUtil::CHARS_PER_128_BIT_REGISTER);
@@ -263,7 +270,7 @@ inline Status DelimitedTextParser::ParseSingleTuple(int64_t remaining_len, char*
         DCHECK_GE(n, 0);
         DCHECK_LT(n, 16);
 
-        if (process_escapes) {
+        if (PROCESS_ESCAPES) {
           // Determine if there was an escape character between [last_col_idx, n]
           bool escaped = (escape_mask & low_mask_[last_col_idx] & high_mask_[n]) != 0;
           current_column_has_escape_ |= escaped;
@@ -273,11 +280,11 @@ inline Status DelimitedTextParser::ParseSingleTuple(int64_t remaining_len, char*
         // clear current bit
         delim_mask &= ~(SSEUtil::SSE_BITMASK[n]);
 
-        RETURN_IF_ERROR(AddColumn<process_escapes>(buffer + n - next_column_start,
+        RETURN_IF_ERROR(AddColumn<PROCESS_ESCAPES>(buffer + n - next_column_start,
             &next_column_start, num_fields, field_locations));
       }
 
-      if (process_escapes) {
+      if (PROCESS_ESCAPES) {
         // Determine if there was an escape character between (last_col_idx, 15)
         bool unprocessed_escape = escape_mask & low_mask_[last_col_idx] & high_mask_[15];
         current_column_has_escape_ |= unprocessed_escape;
@@ -296,9 +303,8 @@ inline Status DelimitedTextParser::ParseSingleTuple(int64_t remaining_len, char*
       last_char_is_escape_ = false;
     }
 
-    if (!last_char_is_escape_ &&
-          (*buffer == field_delim_ || *buffer == collection_item_delim_)) {
-      RETURN_IF_ERROR(AddColumn<process_escapes>(buffer - next_column_start,
+    if (!last_char_is_escape_ && IsFieldOrCollectionItemDelimiter(*buffer)) {
+      RETURN_IF_ERROR(AddColumn<PROCESS_ESCAPES>(buffer - next_column_start,
           &next_column_start, num_fields, field_locations));
     }
 
@@ -308,7 +314,7 @@ inline Status DelimitedTextParser::ParseSingleTuple(int64_t remaining_len, char*
 
   // Last column does not have a delimiter after it.  Add that column and also
   // pad with empty cols if the input is ragged.
-  return FillColumns<process_escapes>(buffer - next_column_start,
+  return FillColumns<PROCESS_ESCAPES>(buffer - next_column_start,
       &next_column_start, num_fields, field_locations);
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/380e17aa/be/src/exec/hdfs-sequence-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-sequence-scanner.cc b/be/src/exec/hdfs-sequence-scanner.cc
index 346a18a..8a9151e 100644
--- a/be/src/exec/hdfs-sequence-scanner.cc
+++ b/be/src/exec/hdfs-sequence-scanner.cc
@@ -73,7 +73,7 @@ Status HdfsSequenceScanner::InitNewRange() {
   text_converter_.reset(new TextConverter(hdfs_partition->escape_char(),
       scan_node_->hdfs_table()->null_column_value()));
 
-  delimited_text_parser_.reset(new DelimitedTextParser(
+  delimited_text_parser_.reset(new SequenceDelimitedTextParser(
       scan_node_->hdfs_table()->num_cols(), scan_node_->num_partition_keys(),
       scan_node_->is_materialized_col(), '\0', hdfs_partition->field_delim(),
       hdfs_partition->collection_delim(), hdfs_partition->escape_char()));

http://git-wip-us.apache.org/repos/asf/impala/blob/380e17aa/be/src/exec/hdfs-sequence-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-sequence-scanner.h b/be/src/exec/hdfs-sequence-scanner.h
index 4845edb..463ffc7 100644
--- a/be/src/exec/hdfs-sequence-scanner.h
+++ b/be/src/exec/hdfs-sequence-scanner.h
@@ -153,6 +153,7 @@
 
 namespace impala {
 
+template <bool>
 class DelimitedTextParser;
 
 class HdfsSequenceScanner : public BaseSequenceScanner {
@@ -222,7 +223,7 @@ class HdfsSequenceScanner : public BaseSequenceScanner {
   Status GetRecord(uint8_t** record_ptr, int64_t* record_len) WARN_UNUSED_RESULT;
 
   /// Helper class for picking fields and rows from delimited text.
-  boost::scoped_ptr<DelimitedTextParser> delimited_text_parser_;
+  boost::scoped_ptr<DelimitedTextParser<false>> delimited_text_parser_;
   std::vector<FieldLocation> field_locations_;
 
   /// Data that is fixed across headers.  This struct is shared between scan ranges.

http://git-wip-us.apache.org/repos/asf/impala/blob/380e17aa/be/src/exec/hdfs-text-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-text-scanner.cc b/be/src/exec/hdfs-text-scanner.cc
index 253bcc8..b78115d 100644
--- a/be/src/exec/hdfs-text-scanner.cc
+++ b/be/src/exec/hdfs-text-scanner.cc
@@ -203,7 +203,7 @@ Status HdfsTextScanner::InitNewRange() {
     collection_delim = '\0';
   }
 
-  delimited_text_parser_.reset(new DelimitedTextParser(
+  delimited_text_parser_.reset(new TupleDelimitedTextParser(
       scan_node_->hdfs_table()->num_cols(), scan_node_->num_partition_keys(),
       scan_node_->is_materialized_col(), hdfs_partition->line_delim(),
       field_delim, collection_delim, hdfs_partition->escape_char()));

http://git-wip-us.apache.org/repos/asf/impala/blob/380e17aa/be/src/exec/hdfs-text-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-text-scanner.h b/be/src/exec/hdfs-text-scanner.h
index 610c612..25886ba 100644
--- a/be/src/exec/hdfs-text-scanner.h
+++ b/be/src/exec/hdfs-text-scanner.h
@@ -25,6 +25,7 @@
 
 namespace impala {
 
+template<bool>
 class DelimitedTextParser;
 class ScannerContext;
 struct HdfsFileDesc;
@@ -237,7 +238,7 @@ class HdfsTextScanner : public HdfsScanner {
   int slot_idx_;
 
   /// Helper class for picking fields and rows from delimited text.
-  boost::scoped_ptr<DelimitedTextParser> delimited_text_parser_;
+  boost::scoped_ptr<DelimitedTextParser<true>> delimited_text_parser_;
 
   /// Return field locations from the Delimited Text Parser.
   std::vector<FieldLocation> field_locations_;


[5/7] impala git commit: [DOCS] Removed old files no longer in use

Posted by ph...@apache.org.
[DOCS] Removed old files no longer in use

Change-Id: Ia0aaa756bf0fc0092186ebc293543408b403aa5e
Reviewed-on: http://gerrit.cloudera.org:8080/9938
Reviewed-by: John Russell <jr...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/b131ee79
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/b131ee79
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/b131ee79

Branch: refs/heads/master
Commit: b131ee7983a787912c79e3432dd9a18f7c2fa661
Parents: ab7afa7
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Thu Apr 5 14:48:49 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Apr 5 23:49:56 2018 +0000

----------------------------------------------------------------------
 docs/Cloudera-Impala-Release-Notes.ditamap | 28 -----------------
 docs/impala.ditamap                        |  2 +-
 docs/impala_release_notes.ditamap          | 28 +++++++++++++++++
 docs/topics/impala_alter_function.xml      | 39 -----------------------
 docs/topics/impala_window_functions.xml    | 41 -------------------------
 5 files changed, 29 insertions(+), 109 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/b131ee79/docs/Cloudera-Impala-Release-Notes.ditamap
----------------------------------------------------------------------
diff --git a/docs/Cloudera-Impala-Release-Notes.ditamap b/docs/Cloudera-Impala-Release-Notes.ditamap
deleted file mode 100644
index 4f86e42..0000000
--- a/docs/Cloudera-Impala-Release-Notes.ditamap
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<!DOCTYPE map PUBLIC "-//OASIS//DTD DITA Map//EN" "map.dtd">
-<map audience="standalone">
-  <title>Cloudera Impala Release Notes</title>
-  <topicref href="topics/impala_relnotes.xml" audience="HTML standalone"/>
-  <topicref href="topics/impala_new_features.xml"/>
-  <topicref href="topics/impala_incompatible_changes.xml"/>
-  <topicref href="topics/impala_known_issues.xml"/>
-  <topicref href="topics/impala_fixed_issues.xml"/>
-</map>

http://git-wip-us.apache.org/repos/asf/impala/blob/b131ee79/docs/impala.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index 0f010a2..89d9553 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -305,7 +305,7 @@ under the License.
 <!-- End of former contents of Installing-and-Using-Impala_xi42979.ditamap. -->
   <topicref audience="standalone" href="topics/impala_faq.xml"/>
   <topicref audience="standalone" href="topics/impala_release_notes.xml">
-    <mapref href="Cloudera-Impala-Release-Notes.ditamap" format="ditamap"
+    <mapref href="impala_release_notes.ditamap" format="ditamap"
       audience="standalone"/>
   </topicref>
 

http://git-wip-us.apache.org/repos/asf/impala/blob/b131ee79/docs/impala_release_notes.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_release_notes.ditamap b/docs/impala_release_notes.ditamap
new file mode 100644
index 0000000..58ad5ab
--- /dev/null
+++ b/docs/impala_release_notes.ditamap
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE map PUBLIC "-//OASIS//DTD DITA Map//EN" "map.dtd">
+<map audience="standalone">
+  <title>Apache Impala Release Notes</title>
+  <topicref href="topics/impala_relnotes.xml" audience="HTML standalone"/>
+  <topicref href="topics/impala_new_features.xml"/>
+  <topicref href="topics/impala_incompatible_changes.xml"/>
+  <topicref href="topics/impala_known_issues.xml"/>
+  <topicref href="topics/impala_fixed_issues.xml"/>
+</map>

http://git-wip-us.apache.org/repos/asf/impala/blob/b131ee79/docs/topics/impala_alter_function.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_alter_function.xml b/docs/topics/impala_alter_function.xml
deleted file mode 100644
index 243222f..0000000
--- a/docs/topics/impala_alter_function.xml
+++ /dev/null
@@ -1,39 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept audience="hidden" rev="1.x" id="alter_function">
-
-  <title>ALTER FUNCTION Statement</title>
-  <titlealts audience="PDF"><navtitle>ALTER FUNCTION</navtitle></titlealts>
-  <prolog>
-    <metadata>
-      <data name="Category" value="Impala"/>
-      <data name="Category" value="SQL"/>
-      <data name="Category" value="DDL"/>
-      <data name="Category" value="Developers"/>
-      <data name="Category" value="Data Analysts"/>
-    </metadata>
-  </prolog>
-
-  <conbody>
-
-    <p/>
-  </conbody>
-</concept>

http://git-wip-us.apache.org/repos/asf/impala/blob/b131ee79/docs/topics/impala_window_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_window_functions.xml b/docs/topics/impala_window_functions.xml
deleted file mode 100644
index 9716403..0000000
--- a/docs/topics/impala_window_functions.xml
+++ /dev/null
@@ -1,41 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
-<concept rev="1.3.0" id="window_functions">
-
-  <title>Window Functions</title>
-  <prolog>
-    <metadata>
-      <data name="Category" value="Impala"/>
-    </metadata>
-  </prolog>
-
-  <conbody>
-
-    <p>
-      <indexterm audience="hidden">window functions</indexterm>
-      Window functions are a special category of built-in functions. They produce one output value for each input
-      row, like scalar functions such as <codeph>length()</codeph> or <codeph>substr()</codeph>. Yet like aggregate
-      functions, they also examine the contents of multiple input rows to compute each output value.
-    </p>
-
-    <p outputclass="toc inpage"/>
-  </conbody>
-</concept>


[6/7] impala git commit: KUDU-2401: External TLS certificate with Intermediate CA in server cert file fails

Posted by ph...@apache.org.
KUDU-2401: External TLS certificate with Intermediate CA in server cert file fails

Take 2 certificate files: cert.pem and truststore.pem

cert.pem has 2 certificates in it:
A cert for that node (with CN="hostname", and signed by CN=CertToolkitIntCA)
And the intermediate CA cert (with CN=CertToolkitIntCA, and signed by CN=CertToolkitRootCA)

truststore.pem has 1 certificate in it:
A cert which is the root CA (with CN=CertToolkitRootCA, self-signed)

This previously would not work with KRPC because in
TlsContext::VerifyCertChainUnlocked(), we would only verify X509_verify_cert()
with the top certificate in the server certificate chain.

With this change, we pass the chain to X509_STORE_CTX_init() as well to make
sure that the entire chain gets checked against the CA.

A test is added that uses the specific certificate format mentioned above
and added to rpc-test.

TODO: Add a test case that has multiple intermediate CAs. Right now we're testing
with only one intermediate CA.

Change-Id: If4af35e97ec6f91c1d9ed902128bd7f4e260f0f4
Reviewed-on: http://gerrit.cloudera.org:8080/9940
Reviewed-by: Lars Volker <lv...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/c6582b3a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/c6582b3a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/c6582b3a

Branch: refs/heads/master
Commit: c6582b3a1545fda186ab01b087f5339433c8695a
Parents: b131ee7
Author: Sailesh Mukil <sa...@cloudera.com>
Authored: Thu Apr 5 11:30:13 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Fri Apr 6 05:03:19 2018 +0000

----------------------------------------------------------------------
 be/src/kudu/rpc/rpc-test.cc             |  35 ++++-
 be/src/kudu/security/test/test_certs.cc | 219 +++++++++++++++++++++++++++
 be/src/kudu/security/test/test_certs.h  |   9 +-
 be/src/kudu/security/tls_context.cc     |   4 +-
 4 files changed, 263 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/c6582b3a/be/src/kudu/rpc/rpc-test.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/rpc/rpc-test.cc b/be/src/kudu/rpc/rpc-test.cc
index a1ab9cc..51b698a 100644
--- a/be/src/kudu/rpc/rpc-test.cc
+++ b/be/src/kudu/rpc/rpc-test.cc
@@ -165,7 +165,8 @@ TEST_P(TestRpc, TestCall) {
   }
 }
 
-TEST_P(TestRpc, TestCallWithChainCerts) {
+// Test for KUDU-2091 and KUDU-2220.
+TEST_P(TestRpc, TestCallWithChainCertAndChainCA) {
   bool enable_ssl = GetParam();
   // We're only interested in running this test with TLS enabled.
   if (!enable_ssl) return;
@@ -195,6 +196,38 @@ TEST_P(TestRpc, TestCallWithChainCerts) {
   ASSERT_OK(DoTestSyncCall(p, GenericCalculatorService::kAddMethodName));
 }
 
+// Test for KUDU-2041.
+TEST_P(TestRpc, TestCallWithChainCertAndRootCA) {
+  bool enable_ssl = GetParam();
+  // We're only interested in running this test with TLS enabled.
+  if (!enable_ssl) return;
+
+  string rpc_certificate_file;
+  string rpc_private_key_file;
+  string rpc_ca_certificate_file;
+  ASSERT_OK(security::CreateTestSSLCertWithChainSignedByRoot(GetTestDataDirectory(),
+                                                             &rpc_certificate_file,
+                                                             &rpc_private_key_file,
+                                                             &rpc_ca_certificate_file));
+  // Set up server.
+  Sockaddr server_addr;
+  ASSERT_OK(StartTestServer(&server_addr, enable_ssl));
+
+  // Set up client.
+  SCOPED_TRACE(strings::Substitute("Connecting to $0", server_addr.ToString()));
+  shared_ptr<Messenger> client_messenger;
+  ASSERT_OK(CreateMessenger("Client", &client_messenger, 1, enable_ssl,
+      rpc_certificate_file, rpc_private_key_file, rpc_ca_certificate_file));
+
+  Proxy p(client_messenger, server_addr, server_addr.host(),
+          GenericCalculatorService::static_service_name());
+  ASSERT_STR_CONTAINS(p.ToString(), strings::Substitute("kudu.rpc.GenericCalculatorService@"
+                                                            "{remote=$0, user_credentials=",
+                                                        server_addr.ToString()));
+
+  ASSERT_OK(DoTestSyncCall(p, GenericCalculatorService::kAddMethodName));
+}
+
 // Test making successful RPC calls while using a TLS certificate with a password protected
 // private key.
 TEST_P(TestRpc, TestCallWithPasswordProtectedKey) {

http://git-wip-us.apache.org/repos/asf/impala/blob/c6582b3a/be/src/kudu/security/test/test_certs.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/security/test/test_certs.cc b/be/src/kudu/security/test/test_certs.cc
index bc82140..88cf5cf 100644
--- a/be/src/kudu/security/test/test_certs.cc
+++ b/be/src/kudu/security/test/test_certs.cc
@@ -503,6 +503,12 @@ TOQYXv+dMtOkYg==
 // OpenSSL_command_line_Root_and_Intermediate_CA_including_OCSP_CRL%20and_revocation.html
 //
 // The parts relating to the OSCP and CRL were omitted.
+//
+// | serverCert TRUSTS intermediateCA TRUSTS rootCA |
+//
+// The 'cert_file' here contains the serverCert and intermediateCA.
+// The 'ca_cert_file' contains the rootCA and the same intermediateCA.
+// This was added to test KUDU-2091 and KUDU-2220.
 Status CreateTestSSLCertSignedByChain(const string& dir,
                                       string* cert_file,
                                       string* key_file,
@@ -746,5 +752,218 @@ Wd40Cr+wAdHKN6t/oransoxu0EZ3HcSOI1umFg==
   return Status::OK();
 }
 
+//
+// These certificates were generated by following the steps outlined in this tutorial
+// for creating the Root CA, Intermediate CA and end-user cert:
+// https://raymii.org/s/tutorials/ \
+// OpenSSL_command_line_Root_and_Intermediate_CA_including_OCSP_CRL%20and_revocation.html
+//
+// The parts relating to the OSCP and CRL were omitted.
+//
+// | serverCert TRUSTS intermediateCA TRUSTS rootCA |
+//
+// The 'cert_file' here contains the serverCert and intermediateCA.
+// The 'ca_cert_file' contains only the rootCA.
+// This was added to test KUDU-2041.
+Status CreateTestSSLCertWithChainSignedByRoot(const string& dir,
+                                              string* cert_file,
+                                              string* key_file,
+                                              string* ca_cert_file) {
+  const char* kCert = R"(
+-----BEGIN CERTIFICATE-----
+MIIFizCCA3OgAwIBAgICEAAwDQYJKoZIhvcNAQEFBQAwUTEXMBUGA1UEAwwOSW50
+ZXJtZWRpYXRlQ0ExCzAJBgNVBAgMAkNBMQswCQYDVQQGEwJVUzENMAsGA1UECgwE
+QWNtZTENMAsGA1UECwwES3VkdTAeFw0xNzA4MTEyMTM4MDZaFw00NDEyMjYyMTM4
+MDZaMEwxEjAQBgNVBAMMCWxvY2FsaG9zdDELMAkGA1UECAwCQ0ExCzAJBgNVBAYT
+AlVTMQ0wCwYDVQQKDARBY21lMQ0wCwYDVQQLDARLdWR1MIICIjANBgkqhkiG9w0B
+AQEFAAOCAg8AMIICCgKCAgEAqevNYH73n4kARZtMsHRucdKmqVd/xxztMlK5VOor
+ERUBhKVVOw3kpDrN9z80ldIkpOrtrfE7Ame/nA9v4k6P3minPEm1qCA/kvaAodtT
+4HjAkrPc+fto6VO6+aUV6l+ckAV/79lOuc7AutNlvvPtBQQcgOKvlNUSRKwM7ndy
+dO4ZAa+uP9Wtsd0gl8b5F3P8vwevD3a0+iDvwSd3pi2s/BeVgRwvOxJzud8ipZ/A
+ZmZN8Df9nHw5lsqLdNnqHXjTVCNXLnYXQC4gKU56fzyZL595liuefyQxiGY+dCCn
+CpqlSsHboJVC/F3OaQi3xVRTB5l2Nwb149EIadwCF0OulZCuYljJ5y9H2bECXEjP
+e5aOdz9d8W3/T7p9vBKWctToeCpqKXUd+8RPudh0D0sUHuwQ4u4S1K6X+eK+gGhT
+HOnPwt+P8ytG0M463z5Gh9feW9ZDIYoiFckheFBAHxsgDWhjYpFmYireLLXMbyaM
+s5v/AxPNRAsx3vAAd0M0vGOpdgEJ9V1MsKmxkPO/tDC3zmnv6uJhtJfrOAKxwiGC
+fDe4IoSC6H5fTxeAgw6BG5onS1UPLADL8NA/M1y8qiSCZS/5S0cHoJp5AxDfZSSR
+O49ispjqcONRwckcRJ5Pbl0IA+wGyg2DuI9LaqS5kKWp5AE8VCLPz7yepDkRnnjO
+3m8CAwEAAaNyMHAwDAYDVR0TAQH/BAIwADAdBgNVHQ4EFgQUZBZLZZaUfyIK/8B7
+GIIWDqeEvDgwHwYDVR0jBBgwFoAU8KctfaqAq0887CHqDsIC0Rkg7oQwCwYDVR0P
+BAQDAgWgMBMGA1UdJQQMMAoGCCsGAQUFBwMBMA0GCSqGSIb3DQEBBQUAA4ICAQA3
+XJXk9CbzdZUQugPI43LY88g+WjbTJfc/KtPSkHN3GjBBh8C0He7A2tp6Xj/LELmx
+crq62FzcFBnq8/iSdFITaYWRo0V/mXlpv2cpPebtwqbARCXUHGvF4/dGk/kw7uK/
+ohZJbeNySuQmQ5SQyfTdVA30Z0OSZ4jp24jC8uME7L8XOcFDgCRw01QNOISpi/5J
+BqeuFihmu/odYMHiEJdCXqe+4qIFfTh0mbgQ57l/geZm0K8uCEiOdTzSMoO8YdO2
+tm6EGNnc4yrVywjIHIvSy6YtNzd4ZM1a1CkEfPvGwe/wI1DI/zl3aJ721kcMPken
+rgEA4xXTPh6gZNMELIGZfu/mOTCFObe8rrh4QSaW4L+xa/VrLEnQRxuXAYGnmDWF
+e79aA+uXdS4+3OysNgEf4qDBt/ZquS/31DBdfJ59VfXWxp2yxMcGhcfiOdnx2Jy5
+KO8wdpXJA/7uwTJzsjLrIgfZnserOiBwE4luaHhDmKDGNVQvhkMq5tdtMdzuwn3/
+n6P1UwbFPiRGIzEAo0SSC1PRT8phv+5y0B1+gcj/peFymZVE+gRcrv9irVQqUpAY
+Lo9xrClAJ2xx4Ouz1GprKPoHdVyqtgcLXN4Oyi8Tehu96Zf6GytSEfTXsbQp+GgR
+TGRhKnDySjPhLp/uObfVwioyuAyA5mVCwjsZ/cvUUA==
+-----END CERTIFICATE-----
+-----BEGIN CERTIFICATE-----
+MIIHmDCCA4CgAwIBAgICEAAwDQYJKoZIhvcNAQEFBQAwVjELMAkGA1UEBhMCVVMx
+CzAJBgNVBAgMAkNBMQswCQYDVQQHDAJTRjENMAsGA1UECgwEQWNtZTENMAsGA1UE
+CwwES3VkdTEPMA0GA1UEAwwGUk9PVENBMB4XDTE3MDgxMTIxMzUzNVoXDTQ0MTIy
+NzIxMzUzNVowUTEXMBUGA1UEAwwOSW50ZXJtZWRpYXRlQ0ExCzAJBgNVBAgMAkNB
+MQswCQYDVQQGEwJVUzENMAsGA1UECgwEQWNtZTENMAsGA1UECwwES3VkdTCCAiIw
+DQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAM1X35LT/eBWBt0Uqqh3DSUyY3K8
+HLIlX3ZXg2Nx6y8yqhw5UGVFZl0uYBDo2DSlTl4sey+AxLIbpQI9ArRA+xqmFynV
+jheB9otudnA8hVwi/e9o+m+VSjG+HPRjSS5hwdPgpJG8DCPSmGyUUFtf3v0NxkUq
+Is+fB5qhQ36aQkI+MwQsSlHR+YrrKKVnE3f911wr9OScQP5KHjrZLQex8OmpWD9G
+v4P9jfVSUwmNEXXjmXDhNG/1R4ofX6HogZR6lBmRNGbcjjWRZQmPrOe9YcdkMLD0
+CdaUyKikqqW6Ilxs7scfuCGqwBWqh66tY18MBMHnt0bL26atTPduKYqulJ1pijio
+DUrzqtAzm7PirqPZ4aOJ9PNjdQs9zH3Zad3pcjfjpdKj4a/asX0st631J5jE6MLB
+LcbAerb/Csr/+tD0TOxwWlA+p/6wPb8ECflQLkvDDEY5BrRGdqYDpEOdm1F9DWQh
+y0RB8rWJMkxC/tTqYHfeaphzCxndLRsZQKVcPiqWCT7b431umIjPaDhsykNlcU3N
+f0V7V/fLY6wwuACngS0BLQuMrXy5FyhmWnUBeWwHfAeTxCkHlF+cVT6wHmeOuGbC
+c1piq7O7puKdC3UjO7Nn+WoOb2B6Qm/dajHpj5myxYJa5tGQGeUnWPwjjMQR557k
+HzugGAzkuG1ASQrhAgMBAAGjdTBzMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYE
+FPCnLX2qgKtPPOwh6g7CAtEZIO6EMB8GA1UdIwQYMBaAFE/9XKaDey5kC8f3bCeU
+HW46abboMAsGA1UdDwQEAwIBpjATBgNVHSUEDDAKBggrBgEFBQcDATANBgkqhkiG
+9w0BAQUFAAOCBAEAIaD2yzjTFdn61A4Qi+ek3fBJaDNQZytd0rHb49v3T+mdj/MI
+yShI1qezDFkg2FP1LfNgjuQl/T+g0BloXatAhZ/dj20Y8oN6bmilV+r2YLJbvbTn
+3hI+MxNf3Ue3FmIrwKK3QdkWcDBURpyYaDO71oxPl9QNfdhWCGHB/oWKU2y4Qt/O
+aPy+CmBAZEclX+hsdUBDJG5vuujpv4myCFwpLgFKNQX3XqCPLc4SRjfyla2YmeZv
+j7KKYh8XOWBbBF0BnWD94WzUDIBmFlUfS32aJTvd7tVaWXwH8rGwDfLN8i05UD9G
+zc3uuFH+UdzWVymk/4svKIPlB2nw9vPV8hvRRah0yFN3EQqAF0vQtwVJF/VwtZdg
+ahH0DykYTf7cKtFXE40xB7YgwDLXd3UiXfo3USW28uKqsrO52xYuUTBn+xkilds1
+tNKwtpXFWP2PUk92ficxoqi1cJnHxIIt5HKskFPgfIpzkpR8IM/vsom1a5fn4TT1
+aJbO5FsZTXQMxFLYWiSOMhTZMp3iNduxMYPosngjjKPEIkTQHKkedpF+CAGIMOKE
+BVa0vHyF34laKMMDT8d9yxwBJLqjlBohNsLLZa/Y90ThaMw+QYn/GZATB+7ng+ip
+VdGAQrghsGSxP+47HZ6WgBrlRdUWN1d1tlN2NBMHLucpbra5THGzl5MlaSVBYZb6
+yXI+2lwcTnnEkKv2zoA4ZHWdtLn/b1y4NKNg205TA+sOZcl6B1BgMe/rFuXdZe9Q
+/b6Tjz65qL4y1ByBVBJNhQQairw6cypHzwzC3w6ub1ZXtFqnTlU8fFcHGeOyydYS
+NfoepF0w2v0ounqD+6rN1CH/ERVb4FCEN19HQ3z+rAj19z2h6m/l5QEKI7bz8ghD
+8yxyqJz+L9XpfOo1yZfHQJckilY6BBIGWyeetJBmvkwv2WPt+3pX1u7h5LkvNRj2
+3fItf486zqtzUi+i/E//rS4gD/rRr4a85U8GSfp3LSAbtmfC0LNYUYA9Dcc0LSpl
+9alNuEpBHSHXlCVh4bcOb0L9n5XNdMcUYBo14hQdP0K1G7TounuAXFKYIQeyNyoi
+OAZ+eb7Y2xNnkY/ps/kyhsZgOJyiDZhdcruK3FIUGYlg5aVjQTB8H0c3/5SZnSky
+6779yMKztFXj9ctYU0YyJXWdF0xP/vi1gjQx/hJnDfXFfIOmeJdQSC08BGyK/PeC
+8zAS380bgzOza/eBL6IK0RqytbWgdoLrUQQfa1+f7AQxDDdoOkUenM0HSWjKfCuG
+m1/N7KUDHtnjVIHWqRefTPg1/tQjVY8/zgxN8MyAy+D95y4rawjsJf1dL6c0+zGv
+Wd40Cr+wAdHKN6t/oransoxu0EZ3HcSOI1umFg==
+-----END CERTIFICATE-----
+)";
+  const char* kKey = R"(
+-----BEGIN RSA PRIVATE KEY-----
+MIIJKAIBAAKCAgEAqevNYH73n4kARZtMsHRucdKmqVd/xxztMlK5VOorERUBhKVV
+Ow3kpDrN9z80ldIkpOrtrfE7Ame/nA9v4k6P3minPEm1qCA/kvaAodtT4HjAkrPc
++fto6VO6+aUV6l+ckAV/79lOuc7AutNlvvPtBQQcgOKvlNUSRKwM7ndydO4ZAa+u
+P9Wtsd0gl8b5F3P8vwevD3a0+iDvwSd3pi2s/BeVgRwvOxJzud8ipZ/AZmZN8Df9
+nHw5lsqLdNnqHXjTVCNXLnYXQC4gKU56fzyZL595liuefyQxiGY+dCCnCpqlSsHb
+oJVC/F3OaQi3xVRTB5l2Nwb149EIadwCF0OulZCuYljJ5y9H2bECXEjPe5aOdz9d
+8W3/T7p9vBKWctToeCpqKXUd+8RPudh0D0sUHuwQ4u4S1K6X+eK+gGhTHOnPwt+P
+8ytG0M463z5Gh9feW9ZDIYoiFckheFBAHxsgDWhjYpFmYireLLXMbyaMs5v/AxPN
+RAsx3vAAd0M0vGOpdgEJ9V1MsKmxkPO/tDC3zmnv6uJhtJfrOAKxwiGCfDe4IoSC
+6H5fTxeAgw6BG5onS1UPLADL8NA/M1y8qiSCZS/5S0cHoJp5AxDfZSSRO49ispjq
+cONRwckcRJ5Pbl0IA+wGyg2DuI9LaqS5kKWp5AE8VCLPz7yepDkRnnjO3m8CAwEA
+AQKCAgAE3oL2Hu1Nnwlo9ThPXibEEDtCYwWAWS3a4U/6RPOS+70dZfd5R76jjiPU
+z/TbzjfKmgjRkTYVrY9qE28rVwD8aJdSPPJ9rN7lgTbSbIyMxCkQiyLr7u5ksUeM
+W9Sy8KZ14hJ2dw2weWJAeEpUHH1QRXvjnZtWcnyhhySfuMCI5UHGMJiXr7HYhPOo
+JcWBjItTlg7ILKim+kakjFL7aheo6awZFQutb6vtSZ2ejWNgC9Jz7cbQsyabUZaJ
+dK0mxw2XPaQD6tJjvm6hgGQ2PTBOkw1S5lEWZ50bwYJMpZrjzOarq751bZGL1cxS
+ajOJ7g6rCxS+Iu7s5lKNZgaRUBkymATYccoigfZDR//fHKAmdgjgPstqy1NJL+uX
+bIuNE0bR+mBM2JQzNjPIcE67PG+0aQdO4dj0TnTzkTP1JSsa6Tz4ckOUgt7IBK8j
+ilCQpHgOB900hXC6xVRnAaU/uuSYEtfi2eFBKHT02OqH51yNZ2jsgOQJCvxNrrdI
+OmA0AaZ2tVXTTXe6qebaNjjp2cziiO5ma+5mdI1vmLQAA9v0micO+eKp/pi5e0r6
+j+9uyR2Oo4qnHg04TIfDyglW3uKz1eI0RPfBN/dx3WLpydxKeywXPH6EzfBFk8pz
+ST2sy+1ZN4vn0bDSTjSLsLBW+xBKYINqKkBD2Kg6B7aeNINjEQKCAQEA4BQByyZV
+9va91T+rQiNPifuI4PKgmLTl0wxM1Jg/H4YCyLtuEkmuvwfeAjaUaUuk2MDs3xfQ
+4pu8vtAnDapq5vJ/lMg0m3+NIwoks+xee9F//M4du9j67USvX5Qw/6Cnx1zAvrWR
+TyZncKUocQowcXM9VU0xcv4qVCMaH5F5Q/1VvG7uAtGCnB8TOHzV7GUaj9xxSqDc
+f3+p9BzsecpPZsdpVi01dQbCKi9neZwFv1Vr3MvieNDOGqSGn8X5EjSHY5PzCaXL
+S+/HoFWOSzWcuNdzKJRjVkC8U+eHoEabaRnD47dfJntN3qOQ6Mwf6P2jMN9GqlQu
+DQlvpMxBwQT1/QKCAQEAwiC4hr6EZKaLmeZBLggsS+oifHReXQy6tf2n7kCAwAgL
++65K8UW+YUtQyHL6UFfD27vvW5yp6LZxMRUD3ghguHPMQcejgoQTfGmth1bCb9tx
+zqfxuWUoNauqZiP4/kgxoh815Kt+kC8BRCXNIWjH38zxY+ncv0b4oKP7lYna/7Xk
+URLmMFr92QVAydRxY9kQTHQTCd+ZQrFT97xEoosgzkKhmynSfWNx7ymYmCrHzscA
+TutpD26u4CA4Wh4ZdVPEF10lGR531SAFEqXCkaUvIfwPphPmOtum2LZdEYho9C76
+71kLzzoJOJUNo2L9ORd5I4tOrMT0tmN+MpS1cPXb2wKCAQAu3aBeZ9807vhXQKDG
+DXKWTmibe8OBDNzAnmL3V/xj0HiGmUT1SDnnNHMHjXjO6QZKW1dvdaC3tJDua8Sv
+RARl1zQ93v25xBy1xmpUw0wjo3acXlOztTcOJv5zBCCXZneQ5+JcQMdqgYLC+ZgS
+xGnLYKnkTGfaQDSEMm9FSPzO7o5fAeh/6Gfj1VAE0X9AmQjMK/P6Atj7Ra07JE2F
+T3355h0u6/exST+U6SNAORSupuQPYwkz8aAZzG1nv1VPrHLgrdH4I5f4gucCrsI7
+ErR7qHwqcZaxNIrvFY61Q+8/NSdWWkTpXIK13Qny1raZ2WqnTxuNhlu3WFDka+AY
+ybvVAoIBACOaxL1R7A5ZzXjolkPPE/DIfJK+9R+z2frPDyHPis2trCT5Dp254PUP
+Tz20eNyLfEys53WyAifAbnpGFHOArdymwGvAJekmODy1VTJhY0AIy5LPkrIiL4HI
+fnRFXMGmlBPcDZJnMctYE69gD4N1KFOPzyY4Gliqt6ce7GG86wHDZqDICpgL2EsZ
+f4yE/lcF1Mtw7pz8+asVwwTI7v2w7s9lwSYoQYbl2lu3EVm3XvY54YCYBKjj8AcD
+YdKFer3eIzT1zHwS7n+UY9nLtSfpV/+vr18Sf0OETdGpgOBaWIWQqE2F03iqeE58
+aAfze+YgvAMc5c0iQo/BJ8A3LiANt8kCggEBAK8cFEBm+Z1g5s1IaWxqTIylR5XF
+5RFty9HyUXtkAd2d1qVzBaBG3SXg4cRsW0EhcUV2XP3iTFIPXEEABRRu5U6DEal6
+wQclrhfP4hiRQHp2Ny6jDj70NCSeUmyEu2lmwEJJYsDSOCnVmtt+qlgmk4yI1Dua
+nXhLcPLqopuhEZs2V/l2Q6E5i4vrs71Y7of+vsAvvt42Vx5wsGdPQihc5E1MI7GB
+hxmQys1MwG3Jyd7Zk88MVNveASeEIc7UAmr/TGL+RIv4bxNi/1HrgekBf1jnFUU3
+4fsdqKy0W+rOgOlGN8TX7NYCz3B41UEiyf+/gZ/TcLKAyGnoAO727Ngayqo=
+-----END RSA PRIVATE KEY-----
+)";
+  const char* kRootCaCert = R"(
+-----BEGIN CERTIFICATE-----
+MIIJfzCCBWegAwIBAgIJAOquFl/JjmRLMA0GCSqGSIb3DQEBCwUAMFYxCzAJBgNV
+BAYTAlVTMQswCQYDVQQIDAJDQTELMAkGA1UEBwwCU0YxDTALBgNVBAoMBEFjbWUx
+DTALBgNVBAsMBEt1ZHUxDzANBgNVBAMMBlJPT1RDQTAeFw0xNzA4MTEyMTMyMTla
+Fw00NDEyMjcyMTMyMTlaMFYxCzAJBgNVBAYTAlVTMQswCQYDVQQIDAJDQTELMAkG
+A1UEBwwCU0YxDTALBgNVBAoMBEFjbWUxDTALBgNVBAsMBEt1ZHUxDzANBgNVBAMM
+BlJPT1RDQTCCBCIwDQYJKoZIhvcNAQEBBQADggQPADCCBAoCggQBAOHxuOsGDBJt
+quCb+lrJ3uXvvBv6f1w1aP+WqDEqveQlOi68sV/DVUR3s+en/MHA5jYAVo2D+eR7
+v/zjrAzCeqCpipbDcxA2e00+kggGHc1BoLtXXTPPCcTQt/0jjX26GXlaJRrY5MAy
+ZJ35vkJ5wCTw7DttfyRzR/RplI6DfO3t2kkSFpSsjGFJZQRZn/L2OM8Ii/tEhede
+UP/Rv8KIKM2+P9RS0VIutiI+/mOpH0QZHHEgnHy7x/CcNCd+wDG516YoJXp9c/1n
+aRLK+jA0bNCf0ZktMpuifoFzpNJ3pvDkjgTLhbiMkS8VKc66Z/Mv0EVOrdiMla/X
+0OSWqEZctxIcVIGDbMqngy62dghMBmxpVkfNmu6RqyS3HmPFrhRXJIIogdBo8mdJ
+xFCCvOgA6suaZnQtQC0mlRi5XGnTocpeHYUZ1c1hO2ZdVrFTh3atJsD80kVYxYuK
+YMq3QaK2zZUK6TUIFue1UqLf2dpIFzskLY6bEVob7Rdl8AHdFBJ8cGOyYKpG+rwO
+n3XQudt8YwDUCvw+8MGRXQiwUnzT/3gSuLNjlQOdcqN78wT5mdp6QZwareptyRwT
+yk/wWnfZlcFO33aPnUhvzzI5TzTB6EqG+3oNYkuXXy/glvOFluyQcPfsYXVOnXOj
+xF0hjKcpx10KQSvXjT9SRYr8NcOC7Yjy3f+WF+nwV+EzevqC2iTr1u8ymqUvpgFJ
+snvO8G/tycfxrwjI/4IghBgwqhcqD4wp/NleXy3A7GE3kFusL10i1bjwxBlz5qER
+uKaxU164HXPl4gv+Qt3eGqJE8KHDwTp8x+619S0+Gd8fY6Yj6/v9WyDef0SKGscm
+t3iqYNA39yNHAj++cjcCrJwBfINVvnTsVFKsCwUpjVuNOGRfZv0uHLAv6LaePQk5
+FKHwlLlPRx7ZcwHpkzTvp/ixYPb/cNJOw8fVW5CoWXYEzDUJY0oU8BWlQNHQ/e4q
+V7Yxa/vourUUvOACDzyQ6hCO95dQdDMCDQqC2VVL45+TUJ3eU1gDHge4T2js/qL8
+iJ+auZapiZjUQzLFse4XkgDrkMrD4fkOQOw4x9AhJF/SrnI8UPNjNOmAyOlqGTdd
+IyLesKXgnOGASSmc7JRk+YBpu9PQXIgHHQZIao1zPGP5k/ylp8XPYitC9MKzRRam
+67aJmutJxEtw7VJhoyz5m5LhLysPLY+R01+QqZK9/7qwWaX6PvMmm42zq9YKncOM
+cC/4eAPnwbj6yhpFoaUD5qzloci3+tvYgb+wr3f1F9SPrpe5xJz3NTXdQj8FsGjl
+ShL+bybUQ7wzZQObUhWtXSayagQg1MAxUCn7Aglmo/A/1+teacyuCEIbrhmODYM3
+Okji9gmGv+cCAwEAAaNQME4wHQYDVR0OBBYEFE/9XKaDey5kC8f3bCeUHW46abbo
+MB8GA1UdIwQYMBaAFE/9XKaDey5kC8f3bCeUHW46abboMAwGA1UdEwQFMAMBAf8w
+DQYJKoZIhvcNAQELBQADggQBAMXuMpJzYV3QMH780os0dZyJ+zi4FETVjAWFAPME
+qzN27W0L9+HZcGpz5i5FLdmc0F3u1cyCrJ4nCCWMrEIVmrLInFRaH1u9HUO78jPw
+Uw/MZLF+sf7uE8IAdVzLQG0A3QjAVoOX3IEOxEaQYYlVQHonyq2pBt8mkGqiPI3R
+E9cTnP/R1Ncd1wZkFL/n5qSNGTr/eg1O/jFB5xH96xm18Z7HgJDku2JCKQK6kqTM
+B7LjAFwWzTg8cnewVFRzIvJe83w9lHs1SW3R9fz7fIEBbZQ3z+n1cSj5jDjaT1+U
+YWTj+gAklZT4M/vImXF0XqbZytUOqe16HfBInc0G/kakUIcs6le2hmfhccJdG25I
+e5TH6ZdMumt7//hVPBPN5fhYKc2uHpzbtmxUjuKG8Na8/w+y2O+sW5CfpNtrYLyB
+WATHGtBB3ckBAICLlhoQiY/ku9r6BfczI86MbSy5vG5CD2sYGhVEl3PQXAnvBKax
+svZS3z9f16SZm61FWwz+r0XCe7LBiKe9YpODyE8lFDymZyW0vKYzDLzCy/mXhU/j
+locrf5r4YK0cOxNQC/jK7MLDFxPQbYg2SuAPW4DF2QzgKn2RuatdOB12S05afawj
+bhrbikIfEtD3erUMMJwaV9dxhHL835rfexxbRzBCdbjWg7Qiw4r8+PJB/mSSvsVO
+RAI02p8IqW5O+iXkU4V2Mapzdpo6b8O6TplHRXbRxWuen87g87KHhgZaC6TmWgvT
+It3ARZx3tkBoJwf41ELmWcakqiT9aQslc5weafw3SZp6+w0QU0qqFwCFLJWHETa5
+/PVHDEkBoXDMnqMlu7E9PUks4Op9T2f7bNy94GZXRbSp2VKjV68sds739DhVIZ+M
+MIaEutz3UndEuGGlcVuqXlda+H5xp57RnMZSKbT240kGdci51WahhfkX7dLY6c/b
+jaNWyGSfM+wFlky97t7ANbPP85SDgrrSyb6rTIt1zU2c5+vvjNVvDhlS6n7ls/Pi
+lMWWs5Ka66E8oZFwYygfIiEv6FcNWrSZ/vCMuS02WJovsZd4YrYtNbpkx6shaA5t
+BOIpuelPbQNPlOaJ+YnRuuppomPnXx5X3RlHld7xsExHDNsi57H0PBDq/W5O1S4t
+rHm3SItJQ4ndFHBGjZ7kSOyHtCLWZ8cB75sPduVC2PnRL/kt3tmfFFVsUurLGz4n
+wgCg1OuflNcc9wIF8lZMjm0TZkQMGYBIfBA7x8/Vs2XSFuaT9vbWoC07LXftl13g
+HsMg1UUSqnMBUQStG42lbVFF1yIfPZThEPxD2RJTCw8FTLBmNrJyBsZ0BGagwe1C
+KH5H1VGmllMdZDHOamHHKA8mEDI4eAKY3HoOS4rfioT8Tks=
+-----END CERTIFICATE-----
+)";
+
+  *cert_file = JoinPathSegments(dir, "test.cert");
+  *key_file = JoinPathSegments(dir, "test.key");
+  *ca_cert_file = JoinPathSegments(dir, "testchainca.cert");
+
+  RETURN_NOT_OK(WriteStringToFile(Env::Default(), kCert, *cert_file));
+  RETURN_NOT_OK(WriteStringToFile(Env::Default(), kKey, *key_file));
+  RETURN_NOT_OK(WriteStringToFile(Env::Default(), kRootCaCert, *ca_cert_file));
+  return Status::OK();
+}
+
 } // namespace security
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/c6582b3a/be/src/kudu/security/test/test_certs.h
----------------------------------------------------------------------
diff --git a/be/src/kudu/security/test/test_certs.h b/be/src/kudu/security/test/test_certs.h
index 89d1654..7767cb2 100644
--- a/be/src/kudu/security/test/test_certs.h
+++ b/be/src/kudu/security/test/test_certs.h
@@ -69,11 +69,18 @@ Status CreateTestSSLCertWithEncryptedKey(const std::string& dir,
                                          std::string* key_password);
 
 // Same as the CreateTestSSLCertWithPlainKey() except that the 'cert_file' is
-// signed by a CA chain.
+// signed by a CA chain ('ca_cert_file' is a chain of certificates).
 Status CreateTestSSLCertSignedByChain(const std::string& dir,
                                       std::string* cert_file,
                                       std::string* key_file,
                                       std::string* ca_cert_file);
 
+// Same as the CreateTestSSLCertWithPlainKey() except that the 'cert_file' is
+// a chain signed by a root CA ('ca_cert_file' is only the root CA).
+Status CreateTestSSLCertWithChainSignedByRoot(const std::string& dir,
+                                              std::string* cert_file,
+                                              std::string* key_file,
+                                              std::string* ca_cert_file);
+
 } // namespace security
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/impala/blob/c6582b3a/be/src/kudu/security/tls_context.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/security/tls_context.cc b/be/src/kudu/security/tls_context.cc
index f94e3d2..dac9a31 100644
--- a/be/src/kudu/security/tls_context.cc
+++ b/be/src/kudu/security/tls_context.cc
@@ -163,8 +163,8 @@ Status TlsContext::VerifyCertChainUnlocked(const Cert& cert) {
   X509_STORE* store = SSL_CTX_get_cert_store(ctx_.get());
   auto store_ctx = ssl_make_unique<X509_STORE_CTX>(X509_STORE_CTX_new());
 
-  OPENSSL_RET_NOT_OK(X509_STORE_CTX_init(store_ctx.get(), store, cert.GetTopOfChainX509(), nullptr),
-                     "could not init X509_STORE_CTX");
+  OPENSSL_RET_NOT_OK(X509_STORE_CTX_init(store_ctx.get(), store, cert.GetTopOfChainX509(),
+                     cert.GetRawData()), "could not init X509_STORE_CTX");
   int rc = X509_verify_cert(store_ctx.get());
   if (rc != 1) {
     int err = X509_STORE_CTX_get_error(store_ctx.get());


[4/7] impala git commit: IMPALA-6807: [DOCS] Update the known issue for HDFS-12528

Posted by ph...@apache.org.
IMPALA-6807: [DOCS] Update the known issue for HDFS-12528

Added a new recommendation for the new setting with the fix version
of HDFS, 2.10 and higher.

Change-Id: If51cb111a9ddc67be4a1cf42502a8a021486b7e4
Reviewed-on: http://gerrit.cloudera.org:8080/9929
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/ab7afa7b
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/ab7afa7b
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/ab7afa7b

Branch: refs/heads/master
Commit: ab7afa7b4e85d0a2c3801950f529ac6d71f9dd03
Parents: 380e17a
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Wed Apr 4 16:22:42 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Apr 5 23:36:00 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_known_issues.xml | 61 +++++++++++++++++++++++---------
 1 file changed, 45 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/ab7afa7b/docs/topics/impala_known_issues.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_known_issues.xml b/docs/topics/impala_known_issues.xml
index a8a8451..a09188e 100644
--- a/docs/topics/impala_known_issues.xml
+++ b/docs/topics/impala_known_issues.xml
@@ -409,25 +409,54 @@ https://issues.apache.org/jira/browse/IMPALA-2144 - Don't have
       <title>Interaction of File Handle Cache with HDFS Appends and Short-Circuit Reads</title>
       <conbody>
         <p>
-          If a data file used by Impala is being continuously appended or overwritten in place by an
-          HDFS mechanism, such as <cmdname>hdfs dfs -appendToFile</cmdname>, interaction with the
-          file handle caching feature in <keyword keyref="impala210_full"/> and higher could cause
-          short-circuit reads to sometimes be disabled on some DataNodes. When a mismatch is detected
-          between the cached file handle and a data block that was rewritten because of an append,
-          short-circuit reads are turned off on the affected host for a 10-minute period.
+          If a data file used by Impala is being continuously appended or
+          overwritten in place by an HDFS mechanism, such as <cmdname>hdfs dfs
+            -appendToFile</cmdname>, interaction with the file handle caching
+          feature in <keyword keyref="impala210_full"/> and higher could cause
+          short-circuit reads to sometimes be disabled on some DataNodes. When a
+          mismatch is detected between the cached file handle and a data block
+          that was rewritten because of an append, short-circuit reads are
+          turned off on the affected host for a 10-minute period.
         </p>
         <p>
-          The possibility of encountering such an issue is the reason why the file handle caching
-          feature is currently turned off by default. See <xref keyref="scalability_file_handle_cache"/>
-          for information about this feature and how to enable it.
+          The possibility of encountering such an issue is the reason why the
+          file handle caching feature is currently turned off by default. See
+            <xref keyref="scalability_file_handle_cache"/> for information about
+          this feature and how to enable it.
         </p>
-        <p><b>Bug:</b> <xref href="https://issues.apache.org/jira/browse/HDFS-12528" scope="external" format="html">HDFS-12528</xref></p>
-        <p><b>Severity:</b> High</p>
-        <!-- <p><b>Resolution:</b> </p> -->
-        <p><b>Workaround:</b> Verify whether your ETL process is susceptible to this issue before enabling the file handle caching feature.
-          You can set the <cmdname>impalad</cmdname> configuration option <codeph>unused_file_handle_timeout_sec</codeph> to a time period
-          that is shorter than the HDFS setting <codeph>dfs.client.read.shortcircuit.streams.cache.expiry.ms</codeph>. (Keep in mind that
-          the HDFS setting is in milliseconds while the Impala setting is in seconds.)
+        <p>
+          <b>Bug:</b>
+          <xref href="https://issues.apache.org/jira/browse/HDFS-12528"
+            scope="external" format="html">HDFS-12528</xref>
+        </p>
+
+        <p>
+          <b>Severity:</b> High
+        </p>
+
+        <p><b>Workaround:</b> Verify whether your ETL process is susceptible to
+          this issue before enabling the file handle caching feature. You can
+          set the <cmdname>impalad</cmdname> configuration option
+            <codeph>unused_file_handle_timeout_sec</codeph> to a time period
+          that is shorter than the HDFS setting
+            <codeph>dfs.client.read.shortcircuit.streams.cache.expiry.ms</codeph>.
+          (Keep in mind that the HDFS setting is in milliseconds while the
+          Impala setting is in seconds.)
+        </p>
+
+        <p>
+          <b>Resolution:</b> Fixed in HDFS 2.10 and higher. Use the new HDFS
+          parameter <codeph>dfs.domain.socket.disable.interval.seconds</codeph>
+          to specify the amount of time that short circuit reads are disabled on
+          encountering an error. The default value is 10 minutes
+            (<codeph>600</codeph> seconds). It is recommended that you set
+            <codeph>dfs.domain.socket.disable.interval.seconds</codeph> to a
+          small value, such as <codeph>1</codeph> second, when using the file
+          handle cache. Setting <codeph>
+            dfs.domain.socket.disable.interval.seconds</codeph> to
+            <codeph>0</codeph> is not recommended as a non-zero interval
+          protects the system if there is a persistent problem with short
+          circuit reads.
         </p>
       </conbody>
     </concept>


[2/7] impala git commit: Loosen hive-exec.jar glob pattern in copy-udfs-udas.sh.

Posted by ph...@apache.org.
Loosen hive-exec.jar glob pattern in copy-udfs-udas.sh.

This commit slightly loosens the coupling between IMPALA_HIVE_VERSION
and "hive.version" in the Maven sense.

Cherry-picks: not for 2.x

Change-Id: Ifbe6f5208b4ad0ffc9cbfe4e93d712ce698beb23
Reviewed-on: http://gerrit.cloudera.org:8080/9925
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/8e5f9231
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/8e5f9231
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/8e5f9231

Branch: refs/heads/master
Commit: 8e5f923158b73204329f17e01bf66ad17059b0e1
Parents: 58ee5ec
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Wed Apr 4 09:06:45 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Apr 5 06:54:53 2018 +0000

----------------------------------------------------------------------
 testdata/bin/copy-udfs-udas.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/8e5f9231/testdata/bin/copy-udfs-udas.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/copy-udfs-udas.sh b/testdata/bin/copy-udfs-udas.sh
index adf64a0..066e1ac 100755
--- a/testdata/bin/copy-udfs-udas.sh
+++ b/testdata/bin/copy-udfs-udas.sh
@@ -86,7 +86,7 @@ hadoop fs -put -f "${IMPALA_HOME}/be/build/latest/testutil/libTestUdfs.so"\
 hadoop fs -mkdir -p "${FILESYSTEM_PREFIX}/test-warehouse/udf_test"
 hadoop fs -put -f "${IMPALA_HOME}/be/build/latest/testutil/libTestUdfs.so"\
     "${FILESYSTEM_PREFIX}/test-warehouse/udf_test/libTestUdfs.so"
-hadoop fs -put -f "${HIVE_HOME}/lib/hive-exec-${IMPALA_HIVE_VERSION}.jar"\
+hadoop fs -put -f "${HIVE_HOME}/lib/hive-exec-"*.jar\
   "${FILESYSTEM_PREFIX}/test-warehouse/hive-exec.jar"
 hadoop fs -put -f "${IMPALA_HOME}/tests/test-hive-udfs/target/test-hive-udfs-1.0.jar"\
     "${FILESYSTEM_PREFIX}/test-warehouse/impala-hive-udfs.jar"


[7/7] impala git commit: IMPALA-6070: Expose using Docker to run tests faster.

Posted by ph...@apache.org.
IMPALA-6070: Expose using Docker to run tests faster.

Allows running the tests that make up the "core" suite in about 2 hours.
By comparison, https://jenkins.impala.io/job/ubuntu-16.04-from-scratch/buildTimeTrend
tends to run in about 3.5 hours.

This commit:
* Adds "echo" statements in a few places, to facilitate timing.
* Adds --skip-parallel/--skip-serial flags to run-tests.py,
  and exposes them in run-all-tests.sh.
* Marks TestRuntimeFilters as a serial test. This test runs
  queries that need > 1GB of memory, and, combined with
  other tests running in parallel, can kill the parallel test
  suite.
* Adds "test-with-docker.py", which runs a full build, data load,
  and executes tests inside of Docker containers, generating
  a timeline at the end. In short, one container is used
  to do the build and data load, and then this container is
  re-used to run various tests in parallel. All logs are
  left on the host system.

Besides the obvious win of getting test results more quickly, this
commit serves as an example of how to get various bits of Impala
development working inside of Docker containers. For example, Kudu
relies on atomic rename of directories, which isn't available in most
Docker filesystems, and entrypoint.sh works around it.

In addition, the timeline generated by the build suggests where further
optimizations can be made. Most obviously, dataload eats up a precious
~30-50 minutes, on a largely idle machine.

This work is significantly CPU and memory hungry. It was developed on a
32-core, 120GB RAM Google Compute Engine machine. I've worked out
parallelism configurations such that it runs nicely on 60GB of RAM
(c4.8xlarge) and over 100GB (eg., m4.10xlarge, which has 160GB). There is
some simple logic to guess at some knobs, and there are knobs.  By and
large, EC2 and GCE price machines linearly, so, if CPU usage can be kept
up, it's not wasteful to run on bigger machines.

Change-Id: I82052ef31979564968effef13a3c6af0d5c62767
Reviewed-on: http://gerrit.cloudera.org:8080/9085
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/2896b8d1
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/2896b8d1
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/2896b8d1

Branch: refs/heads/master
Commit: 2896b8d1275cd507fca770b67bc268bfebb747a2
Parents: c6582b3
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Sat Oct 21 21:27:00 2017 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Fri Apr 6 06:40:07 2018 +0000

----------------------------------------------------------------------
 bin/bootstrap_system.sh                  |  11 +
 bin/rat_exclude_files.txt                |   1 +
 bin/run-all-tests.sh                     |   5 +-
 buildall.sh                              |   6 +-
 docker/README.md                         |   5 +
 docker/annotate.py                       |  34 ++
 docker/entrypoint.sh                     | 329 +++++++++++++++
 docker/monitor.py                        | 329 +++++++++++++++
 docker/test-with-docker.py               | 579 ++++++++++++++++++++++++++
 docker/timeline.html.template            | 142 +++++++
 testdata/bin/run-all.sh                  |   2 +
 tests/query_test/test_runtime_filters.py |   3 +
 tests/run-tests.py                       |  34 +-
 13 files changed, 1467 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/bin/bootstrap_system.sh
----------------------------------------------------------------------
diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh
index 3a9c42b..3f88dd3 100755
--- a/bin/bootstrap_system.sh
+++ b/bin/bootstrap_system.sh
@@ -89,10 +89,14 @@ function apt-get {
   return 1
 }
 
+echo ">>> Installing packages"
+
 apt-get update
 apt-get --yes install apt-utils
 apt-get --yes install git
 
+echo ">>> Checking out Impala"
+
 # If there is no Impala git repo, get one now
 if ! [[ -d ~/Impala ]]
 then
@@ -103,6 +107,7 @@ SET_IMPALA_HOME="export IMPALA_HOME=$(pwd)"
 echo "$SET_IMPALA_HOME" >> ~/.bashrc
 eval "$SET_IMPALA_HOME"
 
+echo ">>> Installing build tools"
 apt-get --yes install ccache g++ gcc libffi-dev liblzo2-dev libkrb5-dev \
         krb5-admin-server krb5-kdc krb5-user libsasl2-dev libsasl2-modules \
         libsasl2-modules-gssapi-mit libssl-dev make maven ninja-build ntp \
@@ -122,6 +127,8 @@ SET_JAVA_HOME="export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64"
 echo "$SET_JAVA_HOME" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
 eval "$SET_JAVA_HOME"
 
+echo ">>> Configuring system"
+
 sudo service ntp stop
 sudo ntpdate us.pool.ntp.org
 # If on EC2, use Amazon's ntp servers
@@ -191,10 +198,14 @@ sudo chown $(whoami) /var/lib/hadoop-hdfs/
 echo "* - nofile 1048576" | sudo tee -a /etc/security/limits.conf
 
 # LZO is not needed to compile or run Impala, but it is needed for the data load
+echo ">>> Checking out Impala-lzo"
 if ! [[ -d ~/Impala-lzo ]]
 then
   git clone https://github.com/cloudera/impala-lzo.git ~/Impala-lzo
 fi
+
+echo ">>> Checking out and building hadoop-lzo"
+
 if ! [[ -d ~/hadoop-lzo ]]
 then
   git clone https://github.com/cloudera/hadoop-lzo.git ~/hadoop-lzo

http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/bin/rat_exclude_files.txt
----------------------------------------------------------------------
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index 1819938..8a40ef2 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -79,6 +79,7 @@ tests/comparison/ORACLE.txt
 bin/distcc/README.md
 tests/comparison/POSTGRES.txt
 docs/README.md
+docker/README.md
 be/src/thirdparty/pcg-cpp-0.98/README.md
 
 # http://www.apache.org/legal/src-headers.html: "Test data for which the addition of a

http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/bin/run-all-tests.sh
----------------------------------------------------------------------
diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh
index 4ca0f54..4743a4f 100755
--- a/bin/run-all-tests.sh
+++ b/bin/run-all-tests.sh
@@ -55,6 +55,8 @@ fi
 # Extra arguments passed to start-impala-cluster for tests. These do not apply to custom
 # cluster tests.
 : ${TEST_START_CLUSTER_ARGS:=}
+# Extra args to pass to run-tests.py
+: ${RUN_TESTS_ARGS:=}
 if [[ "${TARGET_FILESYSTEM}" == "local" ]]; then
   # TODO: Remove abort_on_config_error flag from here and create-load-data.sh once
   # checkConfiguration() accepts the local filesystem (see IMPALA-1850).
@@ -188,7 +190,8 @@ do
   if [[ "$EE_TEST" == true ]]; then
     # Run end-to-end tests.
     # KERBEROS TODO - this will need to deal with ${KERB_ARGS}
-    if ! "${IMPALA_HOME}/tests/run-tests.py" ${COMMON_PYTEST_ARGS} ${EE_TEST_FILES}; then
+    if ! "${IMPALA_HOME}/tests/run-tests.py" ${COMMON_PYTEST_ARGS} \
+        ${RUN_TESTS_ARGS} ${EE_TEST_FILES}; then
       #${KERB_ARGS};
       TEST_RET_CODE=1
     fi

http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/buildall.sh
----------------------------------------------------------------------
diff --git a/buildall.sh b/buildall.sh
index 9d8b15e..56cdb9a 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -334,7 +334,7 @@ bootstrap_dependencies() {
     echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping download of Python dependencies."
     echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
   else
-    echo "Downloading Python dependencies"
+    echo ">>> Downloading Python dependencies"
     # Download all the Python dependencies we need before doing anything
     # of substance. Does not re-download anything that is already present.
     if ! "$IMPALA_HOME/infra/python/deps/download_requirements"; then
@@ -344,7 +344,7 @@ bootstrap_dependencies() {
       echo "Finished downloading Python dependencies"
     fi
 
-    echo "Downloading and extracting toolchain dependencies."
+    echo ">>> Downloading and extracting toolchain dependencies."
     "$IMPALA_HOME/bin/bootstrap_toolchain.py"
     echo "Toolchain bootstrap complete."
   fi
@@ -357,6 +357,7 @@ build_fe() {
 
 # Build all components.
 build_all_components() {
+  echo ">>> Building all components"
   # Build the Impala frontend, backend and external data source API.
   MAKE_IMPALA_ARGS+=" -fe -cscope -tarballs"
   if [[ -e "$IMPALA_LZO" ]]
@@ -421,6 +422,7 @@ start_test_cluster_dependencies() {
 # This does all data loading, except for the metastore snapshot which must be loaded
 # earlier before the cluster is running.
 load_test_data() {
+  echo ">>> Loading test data"
   "$IMPALA_HOME/bin/create_testdata.sh"
   # We have 4 cases:
   # - test-warehouse and metastore snapshots exists.

http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/docker/README.md
----------------------------------------------------------------------
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 0000000..f4e7709
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,5 @@
+# Docker-related scripts for Impala
+
+`test-with-docker.py` runs the Impala build and tests inside of Docker
+containers, parallelizing the test execution across test suites. See that file
+for more details.

http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/docker/annotate.py
----------------------------------------------------------------------
diff --git a/docker/annotate.py b/docker/annotate.py
new file mode 100755
index 0000000..f83854f
--- /dev/null
+++ b/docker/annotate.py
@@ -0,0 +1,34 @@
+#!/usr/bin/python -u
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Prepends input with timestamp. Unlike similar perl version,
+# produces microsecond timestamps. Python is unavailable in
+# the base Ubuntu image.
+#
+# Note that "python -u" disables buffering.
+
+import sys
+import datetime
+
+while True:
+  line = sys.stdin.readline()
+  if line == "":
+    sys.exit(0)
+  sys.stdout.write(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f "))
+  sys.stdout.write(line)

http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/docker/entrypoint.sh
----------------------------------------------------------------------
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
new file mode 100755
index 0000000..d371d25
--- /dev/null
+++ b/docker/entrypoint.sh
@@ -0,0 +1,329 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Entrypoint code for test-with-docker.py containers. test-with-docker.py
+# will create Docker containers with this script as the entrypoint,
+# with a variety of arguments. See test-with-docker.py for a more
+# general overview.
+#
+# This assumes that the following are already mounted inside
+# the container:
+#   /etc/localtime                      -> /mnt/localtime
+#     Helps timestamps be in the time zone of the host
+#   $IMPALA_HOME [a git repo of Impala] -> /repo
+#     Used to check out Impala afresh
+#   $IMPALA_HOME/logs/docker/<n1>/<n2> -> /logs
+#     Used to save logs out to host.
+#     <n1> represents the --name passed into
+#     test-with-docker for the test run. <n2>
+#     indicates which specific container is being run.
+#   ~/.ccache [configurable]            -> /ccache
+#     Used to speed up builds.
+#
+# Usage:
+#   entrypoint.sh build <uid>
+#   entrypoint.sh test_suite <suite>
+#      where <suite> is one of: BE_TEST JDBC_TEST CLUSTER_TEST
+#                               EE_TEST_SERIAL EE_TEST_PARALLEL
+
+# Boostraps the container by creating a user and adding basic tools like Python and git.
+# Takes a uid as an argument for the user to be created.
+function build() {
+  # Handy for testing.
+  if [[ $TEST_TEST_WITH_DOCKER ]]; then
+    # We sleep busily so that CPU metrics will show usage, to
+    # better exercise the timeline code.
+    echo sleeping busily for 4 seconds
+    bash -c 'while [[ $SECONDS -lt 4 ]]; do :; done'
+    return
+  fi
+
+  # Configure timezone, so any timestamps that appear are coherent with the host.
+  configure_timezone
+
+  # Assert we're superuser.
+  [ "$(id -u)" = 0 ]
+  if id $1 2> /dev/null; then
+    echo "User with id $1 already exists. Please run this as a user id missing from " \
+      "the base Ubuntu container."
+    echo
+    echo "Container users:"
+    paste <(cut -d : -f3 /etc/passwd) <(cut -d : -f1 /etc/passwd) | sort -n
+    exit 1
+  fi
+  apt-get update
+  apt-get install -y sudo git lsb-release python
+
+  adduser --disabled-password --gecos "" --uid $1 impdev
+  echo "impdev ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
+
+  su impdev -c "$0 build_impdev"
+}
+
+# Sets up Impala environment
+function impala_environment() {
+  pushd /home/impdev/Impala
+  export IMPALA_HOME=/home/impdev/Impala
+  export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
+  source bin/impala-config.sh
+  popd
+}
+
+# Starts SSH and PostgreSQL; configures container as necessary;
+# prepares Kudu for starting.
+function boot_container() {
+  pushd /home/impdev/Impala
+
+  # Required for metastore
+  sudo service postgresql start
+
+  # Required for starting HBase
+  sudo service ssh start
+
+  # Make log directories. This is typically done in buildall.sh.
+  mkdir -p logs/be_tests logs/fe_tests/coverage logs/ee_tests logs/custom_cluster_tests
+
+  # Update /etc/hosts to remove the entry for the unique docker hostname,
+  # and instead point it to 127.0.0.1. Otherwise, HttpFS returns Location:
+  # redirects to said hostname, but the relevant datanode isn't listening
+  # on the wildcard address.
+  sed -e /$(hostname)/d /etc/hosts -e /127.0.0.1/s,localhost,"localhost $(hostname)," \
+    > /tmp/hosts
+  # "sed -i" in place doesn't work on Docker, because /etc/hosts is a bind mount.
+  sudo cp /tmp/hosts /etc/hosts
+
+  echo Hostname: $(hostname)
+  echo Hosts file:
+  cat /etc/hosts
+
+  # Make a copy of Kudu's WALs to avoid isue with Docker filesystems (aufs and
+  # overlayfs) that don't support os.rename(2) on directories, which Kudu
+  # requires. We make a fresh copy of the data, in which case rename(2) works
+  # presumably because there's only one layer involved. See
+  # https://issues.apache.org/jira/browse/KUDU-1419.
+  cd /home/impdev/Impala/testdata
+  for x in cluster/cdh*/node-*/var/lib/kudu/*/wal; do
+    mv $x $x-orig
+    cp -r $x-orig $x
+    rm -r $x-orig
+  done
+
+  # Wait for postgresql to really start; if it doesn't, Hive Metastore will fail to start.
+  for i in {1..120}; do
+    echo connecting to postgresql attempt $i
+    if sudo -u postgres psql -c "select 1"; then
+      break
+    else
+      sleep 2
+    fi
+  done
+  sudo -u postgres psql -c "select 1"
+
+  popd
+}
+
+# Runs bootstrap_system.sh and then builds Impala.
+function build_impdev() {
+  # Assert we're impdev now.
+  [ "$(id -un)" = impdev ]
+
+  # Link in ccache from host.
+  ln -s /ccache /home/impdev/.ccache
+
+  # Instead of doing a full "git clone" of /repo, which is the host's checkout,
+  # we only fetch one branch, without tags. This keeps the checkout
+  # considerably lighter.
+  mkdir /home/impdev/Impala
+  pushd /home/impdev/Impala
+  git init
+  git fetch /repo --no-tags HEAD
+  git checkout -b test-with-docker FETCH_HEAD
+
+  # Link in logs. Logs are on the host since that's the most important thing to
+  # look at after the tests are run.
+  ln -sf /logs logs
+
+  bin/bootstrap_system.sh
+  impala_environment
+
+  # Builds Impala and loads test data.
+  # Note that IMPALA-6494 prevents us from using shared library builds,
+  # which are smaller and thereby speed things up.
+  ./buildall.sh -noclean -format -testdata -skiptests
+
+  # Shut down things cleanly.
+  testdata/bin/kill-all.sh
+
+  # Shutting down PostgreSQL nicely speeds up it's start time for new containers.
+  sudo service postgresql stop
+
+  # Clean up things we don't need to reduce image size
+  find be -name '*.o' -execdir rm '{}' + # ~1.6GB
+
+  popd
+}
+
+# Runs a suite passed in as the first argument. Tightly
+# coupled with Impala's run-all-tests and the suite names.
+# from test-with-docker.py.
+#
+# Before running tests, starts up the minicluster.
+function test_suite() {
+  cd /home/impdev/Impala
+
+  # These test suites are for testing.
+  if [[ $1 == NOOP ]]; then
+    return 0
+  fi
+  if [[ $1 == NOOP_FAIL ]]; then
+    return 1
+  fi
+  if [[ $1 == NOOP_SLEEP_FOREVER ]]; then
+    # Handy to test timeouts.
+    while true; do sleep 60; done
+  fi
+
+  # Assert that we're running as impdev
+  [ "$(id -un)" = impdev ]
+
+  # Assert that /home/impdev/Impala/logs is a symlink to /logs.
+  [ "$(readlink /home/impdev/Impala/logs)" = /logs ]
+
+  boot_container
+  impala_environment
+
+  # By default, the JVM will use 1/4 of your OS memory for its heap size. For a
+  # long-running test, this will delay GC inside of impalad's leading to
+  # unnecessarily large process RSS footprints. We cap the heap size at
+  # a more reasonable size.  Note that "test_insert_large_string" fails
+  # at 2g and 3g, so the suite that includes it (EE_TEST_PARALLEL) gets
+  # additional memory.
+  #
+  # Similarly, bin/start-impala-cluster typically configures the memlimit
+  # to be 80% of the machine memory, divided by the number of daemons.
+  # If multiple containers are to be run simultaneously, this is scaled
+  # down in test-with-docker.py (and further configurable with --impalad-mem-limit-bytes)
+  # and passed in via $IMPALAD_MEM_LIMIT_BYTES to the container. There is a
+  # relationship between the number of parallel tests that can be run by py.test and this
+  # limit.
+  JVM_HEAP_GB=2
+  if [[ $1 = EE_TEST_PARALLEL ]]; then
+    JVM_HEAP_GB=4
+  fi
+  export TEST_START_CLUSTER_ARGS="--jvm_args=-Xmx${JVM_HEAP_GB}g \
+    --impalad_args=--mem_limit=$IMPALAD_MEM_LIMIT_BYTES"
+
+  # BE tests don't require the minicluster, so we can run them directly.
+  if [[ $1 = BE_TEST ]]; then
+    # IMPALA-6494: thrift-server-test fails in Ubuntu16.04 for the moment; skip it.
+    export SKIP_BE_TEST_PATTERN='thrift-server-test*'
+    if ! bin/run-backend-tests.sh; then
+      echo "Tests $1 failed!"
+      return 1
+    else
+      echo "Tests $1 succeeded!"
+      return 0
+    fi
+  fi
+
+  # Start the minicluster
+  testdata/bin/run-all.sh
+
+  export MAX_PYTEST_FAILURES=0
+  # Choose which suite to run; this is how run-all.sh chooses between them.
+  export FE_TEST=false
+  export BE_TEST=false
+  export EE_TEST=false
+  export JDBC_TEST=false
+  export CLUSTER_TEST=false
+
+  eval "export ${1}=true"
+
+  if [[ ${1} = "EE_TEST_SERIAL" ]]; then
+    # We bucket the stress tests with the parallel tests.
+    export RUN_TESTS_ARGS="--skip-parallel --skip-stress"
+    export EE_TEST=true
+  elif [[ ${1} = "EE_TEST_PARALLEL" ]]; then
+    export RUN_TESTS_ARGS="--skip-serial"
+    export EE_TEST=true
+  fi
+
+  ret=0
+
+  # Run tests.
+  if ! time -p bin/run-all-tests.sh; then
+    ret=1
+    echo "Tests $1 failed!"
+  else
+    echo "Tests $1 succeeded!"
+  fi
+  # Oddly, I've observed bash fail to exit (and wind down the container),
+  # leading to test-with-docker.py hitting a timeout. Killing the minicluster
+  # daemons fixes this.
+  testdata/bin/kill-all.sh || true
+  return $ret
+}
+
+# Ubuntu's tzdata package is very finnicky, and if you
+# mount /etc/localtime from the host to the container directly,
+# it fails to install. However, if you make it a symlink
+# and configure /etc/timezone to something that's not an
+# empty string, you'll get the right behavior.
+#
+# The post installation script is findable by looking for "tzdata.postinst"
+#
+# Use this command to reproduce the Ubuntu issue:
+#   docker run -v /etc/localtime:/mnt/localtime -ti ubuntu:16.04 bash -c '
+#     date
+#     ln -sf /mnt/localtime /etc/localtime
+#     date +%Z > /etc/timezone
+#     date
+#     apt-get update > /dev/null
+#     apt-get install tzdata
+#     date'
+function configure_timezone() {
+  if ! diff -q /etc/localtime /mnt/localtime 2> /dev/null; then
+    ln -sf /mnt/localtime /etc/localtime
+    date +%Z > /etc/timezone
+  fi
+}
+
+function main() {
+  set -e
+
+  # Run given command
+  CMD="$1"
+  shift
+
+  echo ">>> ${CMD} $@ (begin)"
+  set -x
+  if "${CMD}" "$@"; then
+    ret=0
+  else
+    ret=$?
+  fi
+  set +x
+  echo ">>> ${CMD} $@ ($ret) (end)"
+  exit $ret
+}
+
+# Run main() unless we're being sourced.
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  main "$@"
+fi

http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/docker/monitor.py
----------------------------------------------------------------------
diff --git a/docker/monitor.py b/docker/monitor.py
new file mode 100644
index 0000000..64cde0c
--- /dev/null
+++ b/docker/monitor.py
@@ -0,0 +1,329 @@
+#!/usr/bin/python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Monitors Docker containers for CPU and memory usage, and
+# prepares an HTML timeline based on said monitoring.
+#
+# Usage example:
+#   mon = monitor.ContainerMonitor("monitoring.txt")
+#   mon.start()
+#   # container1 is an object with attributes id, name, and logfile.
+#   mon.add(container1)
+#   mon.add(container2)
+#   mon.stop()
+#   timeline = monitor.Timeline("monitoring.txt",
+#       [container1, container2],
+#       re.compile(">>> "))
+#   timeline.create("output.html")
+
+import datetime
+import json
+import logging
+import os
+import shutil
+import subprocess
+import threading
+import time
+
+
+# Unit for reporting user/system CPU seconds in cpuacct.stat.
+# See https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt and time(7).
+USER_HZ = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
+
+
+def total_memory():
+  """Returns total RAM on system, in GB."""
+  return _memory()[0]
+
+
+def used_memory():
+  """Returns total used RAM on system, in GB."""
+  return _memory()[1]
+
+
+def _memory():
+  """Returns (total, used) memory on system, in GB.
+
+  Used is computed as total - available.
+
+  Calls "free" and parses output. Sample output for reference:
+
+                total        used        free      shared     buffers       cache   available
+  Mem:    126747197440 26363965440 56618553344    31678464  2091614208 41673064448 99384889344
+  Swap:             0           0           0
+  """
+
+  free_lines = subprocess.check_output(["free", "-b", "-w"]).split('\n')
+  free_grid = [x.split() for x in free_lines]
+  # Identify columns for "total" and "available"
+  total_idx = free_grid[0].index("total")
+  available_idx = free_grid[0].index("available")
+  total = int(free_grid[1][1 + total_idx])
+  available = int(free_grid[1][1 + available_idx])
+  used = total - available
+  total_gb = total / (1024.0 * 1024.0 * 1024.0)
+  used_gb = used / (1024.0 * 1024.0 * 1024.0)
+  return (total_gb, used_gb)
+
+
+def datetime_to_seconds_since_epoch(dt):
+  """Converts a Python datetime to seconds since the epoch."""
+  return time.mktime(dt.timetuple())
+
+
+def split_timestamp(line):
+  """Parses timestamp at beginning of a line.
+
+  Returns a tuple of seconds since the epoch and the rest
+  of the line. Returns None on parse failures.
+  """
+  LENGTH = 26
+  FORMAT = "%Y-%m-%d %H:%M:%S.%f"
+  t = line[:LENGTH]
+  return (datetime_to_seconds_since_epoch(datetime.datetime.strptime(t, FORMAT)),
+          line[LENGTH + 1:])
+
+
+class ContainerMonitor(object):
+  """Monitors Docker containers.
+
+  Monitoring data is written to a file. An example is:
+
+  2018-02-02 09:01:37.143591 d8f640989524be3939a70557a7bf7c015ba62ea5a105a64c94472d4ebca93c50 cpu user 2 system 5
+  2018-02-02 09:01:37.143591 d8f640989524be3939a70557a7bf7c015ba62ea5a105a64c94472d4ebca93c50 memory cache 11481088 rss 4009984 rss_huge 0 mapped_file 8605696 dirty 24576 writeback 0 pgpgin 4406 pgpgout 624 pgfault 3739 pgmajfault 99 inactive_anon 0 active_anon 3891200 inactive_file 7614464 active_file 3747840 unevictable 0 hierarchical_memory_limit 9223372036854771712 total_cache 11481088 total_rss 4009984 total_rss_huge 0 total_mapped_file 8605696 total_dirty 24576 total_writeback 0 total_pgpgin 4406 total_pgpgout 624 total_pgfault 3739 total_pgmajfault 99 total_inactive_anon 0 total_active_anon 3891200 total_inactive_file 7614464 total_active_file 3747840 total_unevictable 0
+
+  That is, the format is:
+
+  <timestamp> <container> cpu user <usercpu> system <systemcpu>
+  <timestamp> <container> memory <contents of memory.stat without newlines>
+
+  <usercpu> and <systemcpu> are in the units of USER_HZ.
+  See https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt for documentation
+  on memory.stat; it's in the "memory" cgroup, often mounted at
+  /sys/fs/cgroup/memory/<cgroup>/memory.stat.
+
+  This format is parsed back by the Timeline class below and should
+  not be considered an API.
+  """
+
+  def __init__(self, output_path, frequency_seconds=1):
+    """frequency_seconds is how often metrics are gathered"""
+    self.containers = []
+    self.output_path = output_path
+    self.keep_monitoring = None
+    self.monitor_thread = None
+    self.frequency_seconds = frequency_seconds
+
+  def start(self):
+    self.keep_monitoring = True
+    self.monitor_thread = threading.Thread(target=self._monitor)
+    self.monitor_thread.setDaemon(True)
+    self.monitor_thread.start()
+
+  def stop(self):
+    self.keep_monitoring = False
+    self.monitor_thread.join()
+
+  def add(self, container):
+    """Adds monitoring for container, which is an object with property 'id'."""
+    self.containers.append(container)
+
+  @staticmethod
+  def _metrics_from_stat_file(root, container, stat):
+    """Returns metrics stat file contents.
+
+    root: a cgroups root (a path as a string)
+    container: an object with string attribute id
+    stat: a string filename
+
+    Returns contents of <root>/<container.id>/<stat>
+    with newlines replaced with spaces.
+    Returns None on errors.
+    """
+    dirname = os.path.join(root, "docker", container.id)
+    if not os.path.isdir(dirname):
+      # Container may no longer exist.
+      return None
+    try:
+      statcontents = file(os.path.join(dirname, stat)).read()
+      return statcontents.replace("\n", " ").strip()
+    except IOError, e:
+      # Ignore errors; cgroup can disappear on us.
+      logging.warning("Ignoring exception reading cgroup. " +
+                      "This can happen if container just exited. " + str(e))
+      return None
+
+  def _monitor(self):
+    """Monitors CPU usage of containers.
+
+    Otput is stored in self.output_path.
+    Also, keeps track of minimum and maximum memory usage (for the machine).
+    """
+    # Ubuntu systems typically mount cpuacct cgroup in /sys/fs/cgroup/cpu,cpuacct,
+    # but this can vary by OS distribution.
+    all_cgroups = subprocess.check_output(
+        "findmnt -n -o TARGET -t cgroup --source cgroup".split()
+    ).split("\n")
+    cpuacct_root = [c for c in all_cgroups if "cpuacct" in c][0]
+    memory_root = [c for c in all_cgroups if "memory" in c][0]
+    logging.info("Using cgroups: cpuacct %s, memory %s", cpuacct_root, memory_root)
+    self.min_memory_usage_gb = None
+    self.max_memory_usage_gb = None
+
+    with file(self.output_path, "w") as output:
+      while self.keep_monitoring:
+        # Use a single timestamp for a given round of monitoring.
+        now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
+        for c in self.containers:
+          cpu = self._metrics_from_stat_file(cpuacct_root, c, "cpuacct.stat")
+          memory = self._metrics_from_stat_file(memory_root, c, "memory.stat")
+          if cpu:
+            output.write("%s %s cpu %s\n" % (now, c.id, cpu))
+          if memory:
+            output.write("%s %s memory %s\n" % (now, c.id, memory))
+        output.flush()
+
+        # Machine-wide memory usage
+        m = used_memory()
+        if self.min_memory_usage_gb is None:
+          self.min_memory_usage_gb, self.max_memory_usage_gb = m, m
+        else:
+          self.min_memory_usage_gb = min(self.min_memory_usage_gb, m)
+          self.max_memory_usage_gb = max(self.max_memory_usage_gb, m)
+        time.sleep(self.frequency_seconds)
+
+
+class Timeline(object):
+  """Given metric and log data for containers, creates a timeline report.
+
+  This is a standalone HTML file with a timeline for the log files and CPU charts for
+  the containers. The HTML uses https://developers.google.com/chart/ for rendering
+  the charts, which happens in the browser.
+  """
+
+  def __init__(self, monitor_file, containers, interesting_re):
+    self.monitor_file = monitor_file
+    self.containers = containers
+    self.interesting_re = interesting_re
+
+  def logfile_timeline(self, container):
+    """Returns a list of (name, timestamp, line) tuples for interesting lines in
+    the container's logfile. container is expected to have name and logfile attributes.
+    """
+    interesting_lines = [
+        line.strip()
+        for line in file(container.logfile)
+        if self.interesting_re.search(line)]
+    return [(container.name,) + split_timestamp(line) for line in interesting_lines]
+
+  @staticmethod
+  def parse_metrics(f):
+    """Parses timestamped metric lines.
+
+    Given metrics lines like:
+
+    2017-10-25 10:08:30.961510 87d5562a5fe0ea075ebb2efb0300d10d23bfa474645bb464d222976ed872df2a cpu user 33 system 15
+
+    Returns an iterable of (ts, container, user_cpu, system_cpu)
+    """
+    prev_by_container = {}
+    for line in f:
+      ts, rest = split_timestamp(line.rstrip())
+      try:
+        container, metric_type, rest2 = rest.split(" ", 2)
+        if metric_type != "cpu":
+          continue
+        _, user_cpu_s, _, system_cpu_s = rest2.split(" ", 3)
+      except:
+        logging.warning("Skipping metric line: %s", line)
+        continue
+
+      prev_ts, prev_user, prev_system = prev_by_container.get(
+          container, (None, None, None))
+      user_cpu = int(user_cpu_s)
+      system_cpu = int(system_cpu_s)
+      if prev_ts is not None:
+        # Timestamps are seconds since the epoch and are floats.
+        dt = ts - prev_ts
+        assert type(dt) == float
+        if dt != 0:
+          yield ts, container, (user_cpu - prev_user)/dt/USER_HZ,\
+              (system_cpu - prev_system)/dt/USER_HZ
+      prev_by_container[container] = ts, user_cpu, system_cpu
+
+  def create(self, output):
+    # Read logfiles
+    timelines = []
+    for c in self.containers:
+      if not os.path.exists(c.logfile):
+        logging.warning("Missing log file: %s", c.logfile)
+        continue
+      timelines.append(self.logfile_timeline(c))
+
+    # Convert timelines to JSON
+    min_ts = None
+    timeline_json = []
+    for timeline in timelines:
+      for current_line, next_line in zip(timeline, timeline[1:]):
+        name, ts_current, msg = current_line
+        _, ts_next, _ = next_line
+        timeline_json.append(
+            [name, msg, ts_current, ts_next]
+        )
+    if not timeline_json:
+      logging.warning("No timeline data; skipping timeline")
+      return
+
+    min_ts = min(x[2] for x in timeline_json)
+
+    for row in timeline_json:
+      row[2] = row[2] - min_ts
+      row[3] = row[3] - min_ts
+
+    # metrics_by_container: container -> [ ts, user, system ]
+    metrics_by_container = dict()
+    max_metric_ts = 0
+    container_by_id = dict()
+    for c in self.containers:
+      container_by_id[c.id] = c
+
+    for ts, container_id, user, system in self.parse_metrics(file(self.monitor_file)):
+      container = container_by_id.get(container_id)
+      if not container:
+        continue
+
+      if ts > max_metric_ts:
+        max_metric_ts = ts
+      if ts < min_ts:
+        # We ignore metrics that show up before the timeline's
+        # first messages. This largely avoids a bug in the
+        # Google Charts visualization code wherein one of the series seems
+        # to wrap around.
+        continue
+      metrics_by_container.setdefault(
+          container.name, []).append((ts - min_ts, user, system))
+
+    with file(output, "w") as o:
+      template_path = os.path.join(os.path.dirname(__file__), "timeline.html.template")
+      shutil.copyfileobj(file(template_path), o)
+      o.write("\n<script>\nvar data = \n")
+      json.dump(dict(timeline=timeline_json, metrics=metrics_by_container,
+                     max_ts=(max_metric_ts - min_ts)), o, indent=2)
+      o.write("</script>")
+      o.close()

http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/docker/test-with-docker.py
----------------------------------------------------------------------
diff --git a/docker/test-with-docker.py b/docker/test-with-docker.py
new file mode 100755
index 0000000..59640b4
--- /dev/null
+++ b/docker/test-with-docker.py
@@ -0,0 +1,579 @@
+#!/usr/bin/python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+CLI_HELP = """\
+Runs tests inside of docker containers, parallelizing different types of
+tests. This script first creates a docker container, checks out this repo
+into it, bootstraps the container with Impala dependencies, and builds Impala
+and its test data.  Then, it saves the resulting container, and launches new
+containers to run tests in parallel.  An HTML, visual timeline is generated
+as part of the build, in logs/docker/*/timeline.html.
+"""
+
+# To execute run:
+#   docker/test-with-docker.py
+# After waiting for that to finish, inspect results in logs/docker.
+#
+# Visually, the timeline looks as follows, produced on a 32-core, 100GB RAM
+# machine:
+# .......                      1h05m Checkout, setup machine, build (8m with ccache),
+#                                    generate testdata (52m); missing ccache
+#                                    adds about 7 minutes (very sensitive to number
+#                                    of available cores)
+#        ...                     11m Commit the Docker container
+#           .                    10m FE tests
+#           .                    10m JDBC tests
+#           ....                 45m serial EE tests
+#           ......             1h02m cluster tests
+#           ...                  31m BE (C++) tests
+#           ....                 36m parallel EE tests
+# Total time: 2h25m.
+#
+# CPU usage is sustained high for the parallel EE tests and for
+# the C++ compile (when it's not ccache'd), but is otherwise low.
+# Because every parallel track consumes memory (a cluster),
+# increasing parallelism and memory must be balanced.
+#
+# Memory usage is thorny. The minicluster memory can
+# be tweaked somewhat by how much memory to give to the JVM
+# and what to set --mem_limit too. Furthermore, parallel
+# cluster tests use more memory when more parallelism happens.
+#
+# The code that runs inside of the containers is in entrypoint.sh,
+# whereas the code that invokes docker is here.
+#
+# We avoid using Dockerfile and "docker build": they make it hard or impossible
+# to cross-mount host directories into containers or use --privileged, and using
+# them would require generating them dynamically. They're more trouble than
+# they're worth for this use case.
+#
+# In practice, the containers are about 100GB (with 45GB
+# being test data and ~40GB being the tests).
+#
+# Requirements:
+#  * Docker
+#    This has been tested on Ubuntu16.04 with Docker
+#    from the Ubuntu repos, i.e., Docker 1.13.1.
+#  * About 150 GB of disk space available to Docker.
+#  * 75GB of RAM.
+#
+# This script tries to clean up images and containers created by this process, though
+# this can be disabled for debugging.
+#
+# To clean up containers and images manually, you can use:
+#   for x in $(docker ps -aq --filter label=pwd=$IMPALA_HOME); do
+#       docker stop $x; docker rm $x; done
+#   for x in $(docker images -q --filter label=pwd=$IMPALA_HOME); do docker rmi $x; done
+#
+# Core dumps:
+# On an Ubuntu host, core dumps and Docker don't mix by default, because apport is not
+# running inside of the container. See https://github.com/moby/moby/issues/11740
+# To enable core dumps, run the following command on the host:
+#   $echo 'core.%e.%p' | sudo tee /proc/sys/kernel/core_pattern
+#
+# TODOs:
+#  - Support for executing other flavors, like exhaustive, or file systems,
+#    like S3.
+#
+# Suggested speed improvement TODOs:
+#   - Speed up testdata generation
+#   - Skip generating test data for variants not being run
+#   - Make container image smaller; perhaps make BE test binaries
+#     smaller
+#   - Split up cluster tests into two groups
+#   - Analyze .xml junit files to find slow tests; eradicate
+#     or move to different suite.
+#   - Avoid building BE tests, and build them during execution,
+#     saving on container space as well as baseline build
+#     time.
+
+# We do not use Impala's python environment here, nor do we depend on
+# non-standard python libraries to avoid needing extra build steps before
+# triggering this.
+import argparse
+import datetime
+import logging
+import multiprocessing
+import multiprocessing.pool
+import os
+import re
+import subprocess
+import sys
+import tempfile
+import time
+
+if __name__ == '__main__' and __package__ is None:
+  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+  import monitor
+
+base = os.path.dirname(os.path.abspath(__file__))
+
+
+def main():
+
+  logging.basicConfig(level=logging.INFO,
+                      format='%(asctime)s %(threadName)s: %(message)s')
+
+  default_parallel_test_concurrency, default_suite_concurrency, default_memlimit_gb = \
+      _compute_defaults()
+  parser = argparse.ArgumentParser(
+      description=CLI_HELP, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+  group = parser.add_mutually_exclusive_group()
+  group.add_argument('--cleanup-containers', dest="cleanup_containers",
+                     action='store_true', default=True,
+                     help='Removes containers when finished.')
+  group.add_argument('--no-cleanup-containers',
+                     dest="cleanup_containers", action='store_false')
+  group = parser.add_mutually_exclusive_group()
+  parser.add_argument(
+      '--parallel-test-concurrency', type=int,
+      default=default_parallel_test_concurrency,
+      help='For the ee-test-parallel suite, how many tests to run concurrently.')
+  parser.add_argument(
+      '--suite-concurrency', type=int, default=default_suite_concurrency,
+      help='Number of concurrent suites to run in parallel.')
+  parser.add_argument(
+      '--impalad-mem-limit-bytes', type=int, default=default_memlimit_gb,
+      help='Memlimit to pass to impalad for miniclusters.')
+  group.add_argument(
+      '--cleanup-image', dest="cleanup_image",
+      action='store_true', default=True,
+      help="Whether to remove image when done.")
+  group.add_argument('--no-cleanup-image', dest="cleanup_image", action='store_false')
+  parser.add_argument(
+      '--build-image', metavar='IMAGE',
+      help='Skip building, and run tests on pre-existing image.')
+  parser.add_argument(
+      '--suite', metavar='VARIANT', action='append',
+      help="Run specific test suites; can be specified multiple times. \
+          If not specified, all tests are run. Choices: " + ",".join(ALL_SUITES))
+  parser.add_argument(
+      '--name', metavar='NAME',
+      help="Use a specific name for the test run. The name is used " +
+      "as a prefix for the container and image names, and " +
+      "as part of the log directory naming. Defaults to include a timestamp.",
+      default=datetime.datetime.now().strftime("i-%Y%m%d-%H%M%S"))
+  parser.add_argument('--timeout', metavar='MINUTES',
+                      help="Timeout for test suites, in minutes.",
+                      type=int,
+                      default=60*2)
+  parser.add_argument('--ccache-dir', metavar='DIR',
+                      help="CCache directory to use",
+                      default=os.path.expanduser("~/.ccache"))
+  parser.add_argument('--test', action="store_true")
+  args = parser.parse_args()
+
+  if not args.suite:
+    args.suite = ALL_SUITES
+  t = TestWithDocker(
+      build_image=args.build_image, suites=args.suite,
+      name=args.name, timeout=args.timeout, cleanup_containers=args.cleanup_containers,
+      cleanup_image=args.cleanup_image, ccache_dir=args.ccache_dir, test_mode=args.test,
+      parallel_test_concurrency=args.parallel_test_concurrency,
+      suite_concurrency=args.suite_concurrency,
+      impalad_mem_limit_bytes=args.impalad_mem_limit_bytes)
+
+  logging.getLogger('').addHandler(
+      logging.FileHandler(os.path.join(_make_dir_if_not_exist(t.log_dir), "log.txt")))
+
+  logging.info("Arguments: %s", args)
+
+  ret = t.run()
+  t.create_timeline()
+
+  if not ret:
+    sys.exit(1)
+
+
+def _compute_defaults():
+  """Compute default config options based on memory.
+
+  The goal is to work reasonably on machines with
+  about 60GB of memory, like Amazon's c4.8xlarge (36 CPUs, 60GB)
+  or c5.9xlarge (36 CPUs, 72GB) or m4.4xlarge (16 CPUs, 64 GB).
+
+  Based on some experiments, we set up defaults for different
+  machine sizes based on memory, with an eye towards
+  having reasonable runtimes as well.
+
+  Experiments on memory usage:
+
+  suite               parallelism usage
+                    Xmx    memlimit
+  ee-test-parallel  4GB  8  5GB   33GB
+  ee-test-parallel  4GB 16  7GB   37GB
+  ee-test-serial    4GB  -  5GB   18GB
+  cluster-test      4GB  -    -   13GB
+  be-test           4GB  - 10GB   19GB
+  fe-test           4GB  - 10GB    9GB
+  """
+  total_memory_gb = monitor.total_memory()
+  cpus = multiprocessing.cpu_count()
+  logging.info("CPUs: %s Memory (GB): %s", cpus, total_memory_gb)
+
+  parallel_test_concurrency = min(cpus, 8)
+  memlimit_gb = 7
+
+  if total_memory_gb >= 95:
+    suite_concurrency = 4
+    parallel_test_concurrency = min(cpus, 12)
+  elif total_memory_gb >= 65:
+    suite_concurrency = 3
+  elif total_memory_gb >= 35:
+    suite_concurrency = 2
+  else:
+    logging.warning("This tool should be run on a machine with more memory.")
+    suite_concurrency = 1
+
+  return parallel_test_concurrency, suite_concurrency, memlimit_gb * 1024 * 1024 * 1024
+
+
+# The names of all the test tracks supported.  NOOP isn't included here, but is
+# handy for testing.  These are organized slowest-to-fastest, so that, when
+# parallelism of suites is limited, the total time is not impacted.
+ALL_SUITES = [
+    "EE_TEST_SERIAL",
+    "EE_TEST_PARALLEL",
+    "CLUSTER_TEST",
+    "BE_TEST",
+    "FE_TEST",
+    "JDBC_TEST",
+]
+
+
+def _call(args, check=True):
+  """Wrapper for calling a subprocess.
+
+  args is the first argument of subprocess.Popen, typically
+  an array, e.g., ["echo", "hi"].
+
+  If check is set, raise an exception on failure.
+  """
+  logging.info("Calling: %s", args)
+  if check:
+    subprocess.check_call(args, stdin=None)
+  else:
+    return subprocess.call(args, stdin=None)
+
+
+def _check_output(*args, **kwargs):
+  """Wrapper for subprocess.check_output, with logging."""
+  logging.info("Running: %s, %s; cmdline: %s.", args, kwargs, " ".join(*args))
+  return subprocess.check_output(*args, **kwargs)
+
+
+def _make_dir_if_not_exist(*parts):
+  d = os.path.join(*parts)
+  if not os.path.exists(d):
+    os.makedirs(d)
+  return d
+
+
+class Container(object):
+  """Encapsulates a container, with some metadata."""
+
+  def __init__(self, id_, name, logfile, exitcode=None, running=None):
+    self.id = id_
+    self.name = name
+    self.logfile = logfile
+    self.exitcode = exitcode
+    self.running = running
+    self.start = None
+    self.end = None
+
+  def runtime_seconds(self):
+    if self.start and self.end:
+      return self.end - self.start
+
+  def __str__(self):
+    return "Container<" + \
+        ",".join(["%s=%s" % (k, v) for k, v in self.__dict__.items()]) \
+        + ">"
+
+
+class TestWithDocker(object):
+  """Tests Impala using Docker containers for parallelism."""
+
+  def __init__(self, build_image, suites, name, timeout, cleanup_containers,
+               cleanup_image, ccache_dir, test_mode,
+               suite_concurrency, parallel_test_concurrency,
+               impalad_mem_limit_bytes):
+    self.build_image = build_image
+    self.suites = [TestSuiteRunner(self, suite) for suite in suites]
+    self.name = name
+    self.containers = []
+    self.timeout_minutes = timeout
+    self.git_root = _check_output(["git", "rev-parse", "--show-toplevel"]).strip()
+    self.cleanup_containers = cleanup_containers
+    self.cleanup_image = cleanup_image
+    self.image = None
+    if build_image and cleanup_image:
+      # Refuse to clean up external image.
+      raise Exception("cleanup_image and build_image cannot be both specified")
+    self.ccache_dir = ccache_dir
+    self.log_dir = os.path.join(self.git_root, "logs", "docker", self.name)
+    self.monitoring_output_file = os.path.join(self.log_dir, "metrics.txt")
+    self.monitor = monitor.ContainerMonitor(self.monitoring_output_file)
+    self.test_mode = test_mode
+    self.suite_concurrency = suite_concurrency
+    self.parallel_test_concurrency = parallel_test_concurrency
+    self.impalad_mem_limit_bytes = impalad_mem_limit_bytes
+
+  def _create_container(self, image, name, logdir, logname, entrypoint, extras=None):
+    """Returns a new container.
+
+    logdir - subdirectory to create under self.log_dir,
+      which will get mounted to /logs
+    logname - name of file in logdir that will be created
+    extras - extra arguments to pass to docker
+    entrypoint - entrypoint arguments, as a list.
+    """
+    if extras is None:
+      extras = []
+    if self.test_mode:
+      extras = ["-e", "TEST_TEST_WITH_DOCKER=true"] + extras
+
+    container_id = _check_output([
+        "docker", "create",
+        # Required for some of the ntp handling in bootstrap and Kudu;
+        # requirement may be lifted in newer Docker versions.
+        "--privileged",
+        "--name", name,
+        "--hostname", name,
+        # Label with the git root directory for easier cleanup
+        "--label=pwd=" + self.git_root,
+        # Consistent locales
+        "-e", "LC_ALL=C",
+        "-e", "IMPALAD_MEM_LIMIT_BYTES=" +
+        str(self.impalad_mem_limit_bytes),
+        # Mount the git directory so that clones can be local
+        "-v", self.git_root + ":/repo:ro",
+        "-v", self.ccache_dir + ":/ccache",
+        # Share timezone between host and container
+        "-v", "/etc/localtime:/mnt/localtime",
+        "-v", _make_dir_if_not_exist(self.log_dir,
+                                     logdir) + ":/logs",
+        "-v", base + ":/mnt/base:ro"]
+        + extras
+        + [image]
+        + entrypoint).strip()
+    return Container(name=name, id_=container_id,
+                     logfile=os.path.join(self.log_dir, logdir, logname))
+
+  def _run_container(self, container):
+    """Runs container, and returns True if the container had a successful exit value.
+
+    This blocks while the container is running. The container output is
+    run through annotate.py to add timestamps and saved into the container's log file.
+    """
+    container.running = True
+
+    with file(container.logfile, "w") as log_output:
+      container.start = time.time()
+      # Sets up a "docker start ... | annotate.py > logfile" pipeline using
+      # subprocess.
+      annotate = subprocess.Popen(
+          [os.path.join(self.git_root, "docker", "annotate.py")],
+          stdin=subprocess.PIPE,
+          stdout=log_output,
+          stderr=log_output)
+
+      logging.info("Starting container %s; logging to %s", container.name,
+                   container.logfile)
+      docker = subprocess.Popen(["docker", "start", "--attach", container.id],
+                                stdin=None, stdout=annotate.stdin, stderr=annotate.stdin)
+
+      ret = docker.wait()
+      annotate.stdin.close()
+      annotate.wait()
+
+      logging.info("Container %s returned %s", container, ret)
+      container.exitcode = ret
+      container.running = False
+      container.end = time.time()
+      return ret == 0
+
+  @staticmethod
+  def _stop_container(container):
+    """Stops container. Ignores errors (e.g., if it's already exited)."""
+    _call(["docker", "stop", container.id], check=False)
+    if container.running:
+      container.end = time.time()
+      container.running = False
+
+  @staticmethod
+  def _rm_container(container):
+    """Removes container."""
+    _call(["docker", "rm", container.id], check=False)
+
+  def _create_build_image(self):
+    """Creates the "build image", with Impala compiled and data loaded."""
+    container = self._create_container(
+        image="ubuntu:16.04", name=self.name,
+        logdir="build",
+        logname="log-build.txt",
+        # entrypoint.sh will create a user with our uid; this
+        # allows the shared file systems to work seamlessly
+        entrypoint=["/mnt/base/entrypoint.sh", "build", str(os.getuid())])
+    self.containers.append(container)
+    self.monitor.add(container)
+    try:
+      logging.info("Docker container for build: %s", container)
+      _check_output(["docker", "start", container.id])
+      if not self._run_container(container):
+        raise Exception("Build container failed.")
+      logging.info("Committing docker container.")
+      self.image = _check_output(
+          ["docker", "commit",
+           "-c", "LABEL pwd=" + self.git_root,
+           container.id, "impala:built-" + self.name]).strip()
+      logging.info("Committed docker image: %s", self.image)
+    finally:
+      if self.cleanup_containers:
+        self._stop_container(container)
+        self._rm_container(container)
+
+  def _run_tests(self):
+    start_time = time.time()
+    timeout_seconds = self.timeout_minutes * 60
+    deadline = start_time + timeout_seconds
+    pool = multiprocessing.pool.ThreadPool(processes=self.suite_concurrency)
+    outstanding_suites = []
+    for suite in self.suites:
+      suite.task = pool.apply_async(suite.run)
+      outstanding_suites.append(suite)
+
+    ret = True
+    while time.time() < deadline and len(outstanding_suites) > 0:
+      for suite in list(outstanding_suites):
+        task = suite.task
+        if task.ready():
+          this_task_ret = task.get()
+          outstanding_suites.remove(suite)
+          if this_task_ret:
+            logging.info("Suite %s succeeded.", suite.name)
+          else:
+            logging.info("Suite %s failed.", suite.name)
+            ret = False
+      time.sleep(10)
+    if len(outstanding_suites) > 0:
+      for container in self.containers:
+        self._stop_container(container)
+      for suite in outstanding_suites:
+        suite.task.get()
+      raise Exception("Tasks not finished within timeout (%s minutes): %s" %
+                      (self.timeout_minutes, ",".join([
+                          suite.name for suite in outstanding_suites])))
+    return ret
+
+  def run(self):
+    # Create logs directories and ccache dir.
+    _make_dir_if_not_exist(self.ccache_dir)
+    _make_dir_if_not_exist(self.log_dir)
+
+    self.monitor.start()
+    try:
+      if not self.build_image:
+        self._create_build_image()
+      else:
+        self.image = self.build_image
+      ret = self._run_tests()
+      logging.info("Containers:")
+      for c in self.containers:
+        def to_success_string(exitcode):
+          if exitcode == 0:
+            return "SUCCESS"
+          return "FAILURE"
+        logging.info("%s %s %s %s", to_success_string(c.exitcode), c.name, c.logfile,
+                     c.runtime_seconds())
+      return ret
+    finally:
+      self.monitor.stop()
+      if self.cleanup_image and self.image:
+        _call(["docker", "rmi", self.image], check=False)
+      logging.info("Memory usage: %s GB min, %s GB max",
+                   self.monitor.min_memory_usage_gb,
+                   self.monitor.max_memory_usage_gb)
+
+  # Strings (really, regular expressions) pulled out into to the visual timeline.
+  _INTERESTING_STRINGS = [
+      ">>> ",
+  ]
+  _INTERESTING_RE = re.compile("|".join("(%s)" % (s,) for s in _INTERESTING_STRINGS))
+
+  def create_timeline(self):
+    """Creates timeline into log directory."""
+    timeline = monitor.Timeline(
+        monitor_file=self.monitoring_output_file,
+        containers=self.containers,
+        interesting_re=self._INTERESTING_RE)
+    timeline.create(os.path.join(self.log_dir, "timeline.html"))
+
+
+class TestSuiteRunner(object):
+  """Runs a single test suite."""
+
+  def __init__(self, test_with_docker, suite):
+    self.test_with_docker = test_with_docker
+    self.suite = suite
+    self.task = None
+    self.name = self.suite.lower()
+
+  def run(self):
+    """Runs given test. Returns true on success, based on exit code."""
+    test_with_docker = self.test_with_docker
+    suite = self.suite
+    self.start = time.time()
+
+    # io-file-mgr-test expects a real-ish file system at /tmp;
+    # we mount a temporary directory into the container to appease it.
+    tmpdir = tempfile.mkdtemp(prefix=test_with_docker.name + "-" + self.name)
+    # Container names are sometimes used as hostnames, and DNS names shouldn't
+    # have underscores.
+    container_name = test_with_docker.name + "-" + self.name.replace("_", "-")
+
+    container = test_with_docker._create_container(
+        image=test_with_docker.image,
+        name=container_name,
+        extras=[
+            "-v", tmpdir + ":/tmp",
+            "-u", str(os.getuid()),
+            "-e", "NUM_CONCURRENT_TESTS=" +
+            str(test_with_docker.parallel_test_concurrency),
+        ],
+        logdir=self.name,
+        logname="log-test-" + self.suite + ".txt",
+        entrypoint=["/mnt/base/entrypoint.sh", "test_suite", suite])
+
+    test_with_docker.containers.append(container)
+    test_with_docker.monitor.add(container)
+    try:
+      return test_with_docker._run_container(container)
+    except:
+      return False
+    finally:
+      logging.info("Cleaning up containers for %s" % (suite,))
+      test_with_docker._stop_container(container)
+      if test_with_docker.cleanup_containers:
+        test_with_docker._rm_container(container)
+
+
+if __name__ == "__main__":
+  main()

http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/docker/timeline.html.template
----------------------------------------------------------------------
diff --git a/docker/timeline.html.template b/docker/timeline.html.template
new file mode 100644
index 0000000..c8de821
--- /dev/null
+++ b/docker/timeline.html.template
@@ -0,0 +1,142 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<!--
+
+Template/header for a timeline visualization of a multi-container build.
+The timelines represent interesting log lines, with one row per container.
+The charts represent CPU usage within those containers.
+
+To use this, concatenate this with a '<script>' block defining
+a global variable named data.
+
+The expected format of data is exemplified by the following,
+and is tightly coupled with the implementation generating
+it in monitor.py. The intention of this unfriendly file format
+is to do as much munging as plausible in Python.
+
+To make the visualization relative to the start time (i.e., to say that all
+builds start at 00:00), the timestamps are all seconds since the build began.
+To make the visualization work with them, the timestamps are then converted
+into local time, and get displayed reasonably. This is a workaround to the fact
+that the visualization library for the timelines does not accept any data types
+that represent duration, but we still want timestamp-style formatting.
+
+var data = {
+  // max timestamp seen, in seconds since the epoch
+  "max_ts": 8153.0,
+  // map of container name to an array of metrics
+  "metrics": {
+    "i-20180312-140548-ee-test-serial": [
+      // a single metric point is an array of timestamp, user CPU, system CPU
+      // CPU is the percent of 1 CPU used since the previous timestamp.
+      [
+        4572.0,
+        0.11,
+        0.07
+      ]
+    ]
+  },
+  // Array of timelines
+  "timeline": [
+    // a timeline entry contains a name (for the entire row of the timeline),
+    // the message (for a segment of the timeline), and start and end timestamps
+    // for the segment.
+    [
+      "i-20180312-140548",
+      "+ echo '>>> build' '4266 (begin)'",
+      0.0,
+      0.0
+    ]
+  ]
+}
+-->
+
+<script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
+
+<script type="text/javascript">
+google.charts.load("current", {packages:["timeline", "corechart"]});
+google.charts.setOnLoadCallback(drawChart);
+
+function ts_to_hms(secs) {
+  var s = secs % 60;
+  var m = Math.floor(secs / 60) % 60;
+  var h = Math.floor(secs / (60 * 60));
+  return [h, m, s];
+}
+
+/* Returns a Date object corresponding to secs seconds since the epoch, in
+ * localtime. Date(x) and Date(0, 0, 0, 0, 0, 0, 0, x) differ in that the
+ * former returns UTC whereas the latter returns the browser local time.
+ * For consistent handling within this visualization, we use localtime.
+ *
+ * Beware that local time can be discontinuous around time changes.
+ */
+function ts_to_date(secs) {
+  // secs may be a float, so we use millis as a common denominator unit
+  var millis = 1000 * secs;
+  return new Date(1970 /* yr; beginning of unix epoch */, 0 /* mo */, 0 /* d */,
+      0 /* hr */, 0 /* min */, 0 /* sec */, millis);
+}
+
+function drawChart() {
+  var container = document.getElementById('container');
+  var timelineContainer = document.createElement("div");
+  container.appendChild(timelineContainer);
+  var chart = new google.visualization.Timeline(timelineContainer);
+  var dataTable = new google.visualization.DataTable();
+  dataTable.addColumn({ type: 'string', id: 'Position' });
+  dataTable.addColumn({ type: 'string', id: 'Name' });
+  // timeofday isn't supported here
+  dataTable.addColumn({ type: 'datetime', id: 'Start' });
+  dataTable.addColumn({ type: 'datetime', id: 'End' });
+  // Timeline
+  for (i = 0; i < data.timeline.length; ++i) {
+    var row = data.timeline[i];
+    dataTable.addRow([ row[0], row[1], ts_to_date(row[2]), ts_to_date(row[3]) ]);
+  }
+  chart.draw(dataTable, { height: "400px" } );
+
+  for (const k of Object.keys(data.metrics)) {
+    var lineChart = document.createElement("div");
+    container.appendChild(lineChart);
+
+    var dataTable = new google.visualization.DataTable();
+    dataTable.addColumn({ type: 'timeofday', id: 'Time' });
+    dataTable.addColumn({ type: 'number', id: 'User' });
+    dataTable.addColumn({ type: 'number', id: 'System' });
+
+    for (const row of data.metrics[k]) {
+      dataTable.addRow([ ts_to_hms(row[0]), row[1], row[2] ]);
+    }
+    var options = {
+      title: 'CPU',
+      legend: { position: 'bottom' },
+      hAxis: {
+        minValue: [0, 0, 0],
+        maxValue: ts_to_hms(data.max_ts)
+      }
+    };
+
+    var chart = new google.visualization.LineChart(lineChart);
+    chart.draw(dataTable, options);
+  }
+}
+</script>
+<div id="container" style="height: 200px;"></div>

http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/testdata/bin/run-all.sh
----------------------------------------------------------------------
diff --git a/testdata/bin/run-all.sh b/testdata/bin/run-all.sh
index fb85811..f722b89 100755
--- a/testdata/bin/run-all.sh
+++ b/testdata/bin/run-all.sh
@@ -35,6 +35,8 @@ fi
 
 # Kill and clean data for a clean start.
 echo "Killing running services..."
+# Create log dir, in case there's nothing to kill.
+mkdir -p ${IMPALA_CLUSTER_LOGS_DIR}
 $IMPALA_HOME/testdata/bin/kill-all.sh &>${IMPALA_CLUSTER_LOGS_DIR}/kill-all.log
 
 echo "Starting cluster services..."

http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/tests/query_test/test_runtime_filters.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_runtime_filters.py b/tests/query_test/test_runtime_filters.py
index c3763ff..14f5884 100644
--- a/tests/query_test/test_runtime_filters.py
+++ b/tests/query_test/test_runtime_filters.py
@@ -26,6 +26,9 @@ from tests.common.skip import SkipIfLocal
 
 WAIT_TIME_MS = specific_build_type_timeout(60000, slow_build_timeout=100000)
 
+# Some of the queries in runtime_filters consume a lot of memory, leading to
+# significant memory reservations in parallel tests.
+@pytest.mark.execute_serially
 @SkipIfLocal.multiple_impalad
 class TestRuntimeFilters(ImpalaTestSuite):
   @classmethod

http://git-wip-us.apache.org/repos/asf/impala/blob/2896b8d1/tests/run-tests.py
----------------------------------------------------------------------
diff --git a/tests/run-tests.py b/tests/run-tests.py
index 9552d0d..95e0d11 100755
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -18,8 +18,10 @@
 # under the License.
 #
 # Runs the Impala query tests, first executing the tests that cannot be run in parallel
-# and then executing the remaining tests in parallel. All additional command line options
-# are passed to py.test.
+# (the serial tests), then executing the stress tests, and then
+# executing the remaining tests in parallel. To run only some of
+# these, use --skip-serial, --skip-stress, or --skip-parallel.
+# All additional command line options are passed to py.test.
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_service import ImpaladService
 import itertools
@@ -219,6 +221,15 @@ def print_metrics(substring):
 
 if __name__ == "__main__":
   exit_on_error = '-x' in sys.argv or '--exitfirst' in sys.argv
+  skip_serial = '--skip-serial' in sys.argv
+  if skip_serial:
+    sys.argv.remove("--skip-serial")
+  skip_stress = '--skip-stress' in sys.argv
+  if skip_stress:
+    sys.argv.remove("--skip-stress")
+  skip_parallel = '--skip-parallel' in sys.argv
+  if skip_parallel:
+    sys.argv.remove("--skip-parallel")
   test_executor = TestExecutor(exit_on_error=exit_on_error)
 
   # If the user is just asking for --help, just print the help test and then exit.
@@ -241,18 +252,21 @@ if __name__ == "__main__":
   else:
     print_metrics('connections')
     # First run query tests that need to be executed serially
-    base_args = ['-m', 'execute_serially']
-    test_executor.run_tests(base_args + build_test_args('serial'))
-    print_metrics('connections')
+    if not skip_serial:
+      base_args = ['-m', 'execute_serially']
+      test_executor.run_tests(base_args + build_test_args('serial'))
+      print_metrics('connections')
 
     # Run the stress tests tests
-    base_args = ['-m', 'stress', '-n', NUM_STRESS_CLIENTS]
-    test_executor.run_tests(base_args + build_test_args('stress'))
-    print_metrics('connections')
+    if not skip_stress:
+      base_args = ['-m', 'stress', '-n', NUM_STRESS_CLIENTS]
+      test_executor.run_tests(base_args + build_test_args('stress'))
+      print_metrics('connections')
 
     # Run the remaining query tests in parallel
-    base_args = ['-m', 'not execute_serially and not stress', '-n', NUM_CONCURRENT_TESTS]
-    test_executor.run_tests(base_args + build_test_args('parallel'))
+    if not skip_parallel:
+      base_args = ['-m', 'not execute_serially and not stress', '-n', NUM_CONCURRENT_TESTS]
+      test_executor.run_tests(base_args + build_test_args('parallel'))
 
     # The total number of tests executed at this point is expected to be >0
     # If it is < 0 then the script needs to exit with a non-zero