You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by he...@apache.org on 2016/10/18 06:00:49 UTC

[1/7] incubator-impala git commit: IMPALA-2789: More compact mem layout with null bits at the end.

Repository: incubator-impala
Updated Branches:
  refs/heads/master 9f61397fc -> df680cfe3


IMPALA-2789: More compact mem layout with null bits at the end.

There are two motivations for this change:
1. Reduce memory consumption.
2. Pave the way for full memory layout compatibility between
   Impala and Kudu to eventually enable zero-copy scans. This
   patch is a only first step towards that goal.

New Memory Layout
Slots are placed in descending order by size with trailing bytes to
store null flags. Null flags are omitted for non-nullable slots. There
is no padding between tuples when stored back-to-back in a row batch.

Example: select bool_col, int_col, string_col, smallint_col
         from functional.alltypes
Slots:   string_col|int_col|smallint_col|bool_col|null_byte
Offsets: 0          16      20           22       23

The main change is to move the null indicators to the end of tuples.
The new memory layout is fully packed with no padding in between
slots or tuples.

Performance:
Our standard cluster perf tests showed no significant difference in
query response times as well as consumed cycles, and a slight
reduction in peak memory consumption.

Testing:
An exhaustive test run passed. Ran a few select tests like TPC-H/DS
with ASAN locally.

These follow-on changes are planned:
1. Planner needs to mark slots non-nullable if they correspond
   to a non-nullable Kudu column.
2. Update Kudu scan node to copy tuples with memcpy.
3. Kudu client needs to support transferring ownership of the
   tuple memory (maybe do direct and indirect buffers separately).
4. Update Kudu scan node to use memory transfer instead of copy

Change-Id: Ib6510c75d841bddafa6638f1bd2ac6731a7053f6
Reviewed-on: http://gerrit.cloudera.org:8080/4673
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/b0e87c68
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/b0e87c68
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/b0e87c68

Branch: refs/heads/master
Commit: b0e87c685d96e4e55a0fda3b23c10fc069f7551a
Parents: 9f61397
Author: Alex Behm <al...@cloudera.com>
Authored: Tue Dec 22 13:56:32 2015 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Sun Oct 16 23:36:10 2016 +0000

----------------------------------------------------------------------
 .../benchmarks/row-batch-serialize-benchmark.cc |  13 ++-
 be/src/codegen/llvm-codegen.cc                  |  11 ++
 be/src/codegen/llvm-codegen.h                   |   5 +
 be/src/exec/hdfs-scanner.cc                     |  73 +++++++------
 be/src/exec/hdfs-scanner.h                      |  13 +--
 be/src/exec/kudu-scanner.cc                     |   7 +-
 be/src/exec/kudu-scanner.h                      |   3 -
 be/src/exec/row-batch-list-test.cc              |  20 +++-
 be/src/exec/text-converter.cc                   |   2 +-
 be/src/runtime/buffered-tuple-stream-test.cc    |  35 +++----
 be/src/runtime/collection-value-builder-test.cc |  15 ++-
 be/src/runtime/descriptors.cc                   |  56 +++++-----
 be/src/runtime/descriptors.h                    |   4 +-
 be/src/runtime/row-batch-serialize-test.cc      |  32 ++++--
 be/src/runtime/row-batch-test.cc                |  16 ++-
 be/src/runtime/tuple.cc                         |   6 +-
 be/src/runtime/tuple.h                          |   7 +-
 be/src/service/frontend.cc                      |   7 ++
 be/src/service/frontend.h                       |   8 ++
 be/src/testutil/desc-tbl-builder.cc             |  96 ++++-------------
 be/src/testutil/desc-tbl-builder.h              |  20 ++--
 common/thrift/Frontend.thrift                   |   7 ++
 .../apache/impala/analysis/DescriptorTable.java |  72 +++++++++++--
 .../apache/impala/analysis/TupleDescriptor.java |  43 ++++----
 .../org/apache/impala/service/JniFrontend.java  |  41 ++++++--
 .../apache/impala/analysis/AnalyzerTest.java    | 102 ++++++++-----------
 26 files changed, 402 insertions(+), 312 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/benchmarks/row-batch-serialize-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/row-batch-serialize-benchmark.cc b/be/src/benchmarks/row-batch-serialize-benchmark.cc
index 3f04906..5a8a104 100644
--- a/be/src/benchmarks/row-batch-serialize-benchmark.cc
+++ b/be/src/benchmarks/row-batch-serialize-benchmark.cc
@@ -17,11 +17,15 @@
 
 #include <iostream>
 #include <sstream>
+#include <boost/scoped_ptr.hpp>
 
+#include "common/init.h"
 #include "runtime/mem-tracker.h"
 #include "runtime/raw-value.h"
 #include "runtime/row-batch.h"
 #include "runtime/tuple-row.h"
+#include "service/fe-support.h"
+#include "service/frontend.h"
 #include "testutil/desc-tbl-builder.h"
 #include "util/benchmark.h"
 #include "util/compress.h"
@@ -88,6 +92,10 @@ const int NUM_ROWS = 1024;
 const int MAX_STRING_LEN = 10;
 
 namespace impala {
+
+// For computing tuple mem layouts.
+static scoped_ptr<Frontend> fe;
+
 // Friend class with access to RowBatch internals
 class RowBatchSerializeBaseline {
  public:
@@ -318,7 +326,7 @@ class RowBatchSerializeBenchmark {
     MemTracker tracker;
     MemPool mem_pool(&tracker);
     ObjectPool obj_pool;
-    DescriptorTblBuilder builder(&obj_pool);
+    DescriptorTblBuilder builder(fe.get(), &obj_pool);
     builder.DeclareTuple() << TYPE_INT << TYPE_STRING;
     DescriptorTbl* desc_tbl = builder.Build();
 
@@ -398,6 +406,9 @@ class RowBatchSerializeBenchmark {
 }
 
 int main(int argc, char** argv) {
+  impala::InitCommonRuntime(argc, argv, true);
+  InitFeSupport();
+  fe.reset(new Frontend());
   RowBatchSerializeBenchmark::Run();
   return 0;
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/codegen/llvm-codegen.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index cc8b46c..d43ad6e 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -59,6 +59,7 @@
 #include "codegen/instruction-counter.h"
 #include "codegen/mcjit-mem-mgr.h"
 #include "impala-ir/impala-ir-names.h"
+#include "runtime/descriptors.h"
 #include "runtime/hdfs-fs-cache.h"
 #include "runtime/lib-cache.h"
 #include "runtime/mem-pool.h"
@@ -1210,6 +1211,16 @@ void LlvmCodeGen::CodegenMemset(LlvmBuilder* builder, Value* dst, int value, int
   builder->CreateMemSet(dst, value_const, size, /* no alignment */ 0);
 }
 
+void LlvmCodeGen::CodegenClearNullBits(LlvmBuilder* builder, Value* tuple_ptr,
+    const TupleDescriptor& tuple_desc) {
+  Value* int8_ptr = builder->CreateBitCast(tuple_ptr, ptr_type(), "int8_ptr");
+  Value* null_bytes_offset =
+      ConstantInt::get(int_type(), tuple_desc.null_bytes_offset());
+  Value* null_bytes_ptr =
+      builder->CreateInBoundsGEP(int8_ptr, null_bytes_offset, "null_bytes_ptr");
+  CodegenMemset(builder, null_bytes_ptr, 0, tuple_desc.num_null_bytes());
+}
+
 Value* LlvmCodeGen::CodegenAllocate(LlvmBuilder* builder, MemPool* pool, Value* size,
     const char* name) {
   DCHECK(pool != NULL);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/codegen/llvm-codegen.h
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h
index 2ef936f..0b81701 100644
--- a/be/src/codegen/llvm-codegen.h
+++ b/be/src/codegen/llvm-codegen.h
@@ -74,6 +74,7 @@ namespace impala {
 class CodegenAnyVal;
 class CodegenSymbolEmitter;
 class SubExprElimination;
+class TupleDescriptor;
 
 /// LLVM code generator.  This is the top level object to generate jitted code.
 //
@@ -419,6 +420,10 @@ class LlvmCodeGen {
   /// be a pointer. No-op if size is zero.
   void CodegenMemset(LlvmBuilder* builder, llvm::Value* dst, int value, int size);
 
+  /// Codegen to set all null bytes of the given tuple to 0.
+  void CodegenClearNullBits(LlvmBuilder* builder, llvm::Value* tuple_ptr,
+      const TupleDescriptor& tuple_desc);
+
   /// Codegen to call pool->Allocate(size).
   llvm::Value* CodegenAllocate(LlvmBuilder* builder, MemPool* pool, llvm::Value* size,
       const char* name = "");

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/exec/hdfs-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.cc b/be/src/exec/hdfs-scanner.cc
index 28eb606..81542ec 100644
--- a/be/src/exec/hdfs-scanner.cc
+++ b/be/src/exec/hdfs-scanner.cc
@@ -67,7 +67,6 @@ HdfsScanner::HdfsScanner(HdfsScanNodeBase* scan_node, RuntimeState* state)
       template_tuple_pool_(new MemPool(scan_node->mem_tracker())),
       template_tuple_(NULL),
       tuple_byte_size_(scan_node->tuple_desc()->byte_size()),
-      num_null_bytes_(scan_node->tuple_desc()->num_null_bytes()),
       tuple_(NULL),
       batch_(NULL),
       tuple_mem_(NULL),
@@ -88,7 +87,6 @@ HdfsScanner::HdfsScanner()
       template_tuple_pool_(NULL),
       template_tuple_(NULL),
       tuple_byte_size_(-1),
-      num_null_bytes_(-1),
       tuple_(NULL),
       batch_(NULL),
       tuple_mem_(NULL),
@@ -302,50 +300,54 @@ bool HdfsScanner::WriteCompleteTuple(MemPool* pool, FieldLocation* fields,
   return EvalConjuncts(tuple_row);
 }
 
-// Codegen for WriteTuple(above).  The signature matches WriteTuple (except for the
-// this* first argument).  For writing out and evaluating a single string slot:
+// Codegen for WriteTuple(above) for writing out single nullable string slot and
+// evaluating a <slot> = <constantexpr> conjunct. The signature matches WriteTuple()
+// except for the first this* argument.
 // define i1 @WriteCompleteTuple(%"class.impala::HdfsScanner"* %this,
 //                               %"class.impala::MemPool"* %pool,
 //                               %"struct.impala::FieldLocation"* %fields,
 //                               %"class.impala::Tuple"* %tuple,
 //                               %"class.impala::TupleRow"* %tuple_row,
 //                               %"class.impala::Tuple"* %template,
-//                               i8* %error_fields, i8* %error_in_row) #20 {
+//                               i8* %error_fields, i8* %error_in_row) {
 // entry:
 //   %tuple_ptr = bitcast %"class.impala::Tuple"* %tuple
-//                to { i8, %"struct.impala::StringValue" }*
+//                to <{ %"struct.impala::StringValue", i8 }>*
 //   %tuple_ptr1 = bitcast %"class.impala::Tuple"* %template
-//                 to { i8, %"struct.impala::StringValue" }*
-//   %null_byte = getelementptr inbounds
-//                { i8, %"struct.impala::StringValue" }* %tuple_ptr, i32 0, i32 0
-//   store i8 0, i8* %null_byte
+//                 to <{ %"struct.impala::StringValue", i8 }>*
+//   %int8_ptr = bitcast <{ %"struct.impala::StringValue", i8 }>* %tuple_ptr to i8*
+//   %null_bytes_ptr = getelementptr i8, i8* %int8_ptr, i32 16
+//   call void @llvm.memset.p0i8.i64(i8* %null_bytes_ptr, i8 0, i64 1, i32 0, i1 false)
 //   %0 = bitcast %"class.impala::TupleRow"* %tuple_row
-//        to { i8, %"struct.impala::StringValue" }**
-//   %1 = getelementptr { i8, %"struct.impala::StringValue" }** %0, i32 0
-//   store { i8, %"struct.impala::StringValue" }* %tuple_ptr,
-//         { i8, %"struct.impala::StringValue" }** %1
+//        to <{ %"struct.impala::StringValue", i8 }>**
+//   %1 = getelementptr <{ %"struct.impala::StringValue", i8 }>*,
+//                      <{ %"struct.impala::StringValue", i8 }>** %0, i32 0
+//   store <{ %"struct.impala::StringValue", i8 }>* %tuple_ptr,
+//         <{ %"struct.impala::StringValue", i8 }>** %1
 //   br label %parse
 //
 // parse:                                            ; preds = %entry
-//   %data_ptr = getelementptr %"struct.impala::FieldLocation"* %fields, i32 0, i32 0
-//   %len_ptr = getelementptr %"struct.impala::FieldLocation"* %fields, i32 0, i32 1
-//   %slot_error_ptr = getelementptr i8* %error_fields, i32 0
-//   %data = load i8** %data_ptr
-//   %len = load i32* %len_ptr
-//   %2 = call i1 @WriteSlot({ i8, %"struct.impala::StringValue" }* %tuple_ptr,
-//                           i8* %data, i32 %len)
-//   %slot_parse_error = xor i1 %2, true
-//   %error_in_row2 = or i1 false, %slot_parse_error
-//   %3 = zext i1 %slot_parse_error to i8
-//   store i8 %3, i8* %slot_error_ptr
-//   %4 = call %"class.impala::ExprContext"* @GetConjunctCtx(
-//       %"class.impala::HdfsScanner"* %this, i32 0)
-//   %conjunct_eval = call i16 @Eq_StringVal_StringValWrapper1(
-//       %"class.impala::ExprContext"* %4, %"class.impala::TupleRow"* %tuple_row)
-//   %5 = ashr i16 %conjunct_eval, 8
-//   %6 = trunc i16 %5 to i8
-//   %val = trunc i8 %6 to i1
-//   br i1 %val, label %parse3, label %eval_fail
+//  %data_ptr = getelementptr %"struct.impala::FieldLocation",
+//                            %"struct.impala::FieldLocation"* %fields, i32 0, i32 0
+//  %len_ptr = getelementptr %"struct.impala::FieldLocation",
+//                           %"struct.impala::FieldLocation"* %fields, i32 0, i32 1
+//  %slot_error_ptr = getelementptr i8, i8* %error_fields, i32 0
+//  %data = load i8*, i8** %data_ptr
+//  %len = load i32, i32* %len_ptr
+//  %2 = call i1 @WriteSlot(<{ %"struct.impala::StringValue", i8 }>* %tuple_ptr,
+//                          i8* %data, i32 %len)
+//  %slot_parse_error = xor i1 %2, true
+//  %error_in_row2 = or i1 false, %slot_parse_error
+//  %3 = zext i1 %slot_parse_error to i8
+//  store i8 %3, i8* %slot_error_ptr
+//  %4 = call %"class.impala::ExprContext"* @GetConjunctCtx(
+//    %"class.impala::HdfsScanner"* %this, i32 0)
+//  %conjunct_eval = call i16 @"impala::Operators::Eq_StringVal_StringValWrapper"(
+//    %"class.impala::ExprContext"* %4, %"class.impala::TupleRow"* %tuple_row)
+//  %5 = ashr i16 %conjunct_eval, 8
+//  %6 = trunc i16 %5 to i8
+//  %val = trunc i8 %6 to i1
+//  br i1 %val, label %parse3, label %eval_fail
 //
 // parse3:                                           ; preds = %parse
 //   %7 = zext i1 %error_in_row2 to i8
@@ -451,10 +453,7 @@ Status HdfsScanner::CodegenWriteCompleteTuple(HdfsScanNodeBase* node,
   // Initialize tuple
   if (node->num_materialized_partition_keys() == 0) {
     // No partition key slots, just zero the NULL bytes.
-    for (int i = 0; i < tuple_desc->num_null_bytes(); ++i) {
-      Value* null_byte = builder.CreateStructGEP(NULL, tuple_arg, i, "null_byte");
-      builder.CreateStore(codegen->GetIntConstant(TYPE_TINYINT, 0), null_byte);
-    }
+    codegen->CodegenClearNullBits(&builder, tuple_arg, *tuple_desc);
   } else {
     // Copy template tuple.
     // TODO: only copy what's necessary from the template tuple.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/exec/hdfs-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.h b/be/src/exec/hdfs-scanner.h
index d9fa424..4a4d366 100644
--- a/be/src/exec/hdfs-scanner.h
+++ b/be/src/exec/hdfs-scanner.h
@@ -32,6 +32,7 @@
 #include "exec/scanner-context.h"
 #include "runtime/disk-io-mgr.h"
 #include "runtime/row-batch.h"
+#include "runtime/tuple.h"
 
 namespace impala {
 
@@ -44,7 +45,6 @@ class MemPool;
 class SlotDescriptor;
 class Status;
 class TextConverter;
-class Tuple;
 class TupleDescriptor;
 class TPlanNode;
 class TScanRange;
@@ -234,9 +234,6 @@ class HdfsScanner {
   /// Fixed size of each top-level tuple, in bytes
   const int32_t tuple_byte_size_;
 
-  /// Number of null bytes in the top-level tuple.
-  const int32_t num_null_bytes_;
-
   /// Current tuple pointer into tuple_mem_.
   Tuple* tuple_;
 
@@ -443,18 +440,14 @@ class HdfsScanner {
     if (template_tuple != NULL) {
       memcpy(tuple, template_tuple, desc->byte_size());
     } else {
-      memset(tuple, 0, sizeof(uint8_t) * desc->num_null_bytes());
+      tuple->ClearNullBits(*desc);
     }
   }
 
   // TODO: replace this function with above once we can inline constants from
   // scan_node_->tuple_desc() via codegen
   void InitTuple(Tuple* template_tuple, Tuple* tuple) {
-    if (template_tuple != NULL) {
-      memcpy(tuple, template_tuple, tuple_byte_size_);
-    } else {
-      memset(tuple, 0, sizeof(uint8_t) * num_null_bytes_);
-    }
+    InitTuple(scan_node_->tuple_desc(), template_tuple, tuple);
   }
 
   inline Tuple* next_tuple(int tuple_byte_size, Tuple* t) const {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/exec/kudu-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scanner.cc b/be/src/exec/kudu-scanner.cc
index 4fcb40a..a6affbf 100644
--- a/be/src/exec/kudu-scanner.cc
+++ b/be/src/exec/kudu-scanner.cc
@@ -59,7 +59,6 @@ KuduScanner::KuduScanner(KuduScanNode* scan_node, RuntimeState* state)
     state_(state),
     cur_kudu_batch_num_read_(0),
     last_alive_time_micros_(0),
-    tuple_num_null_bytes_(scan_node_->tuple_desc()->num_null_bytes()),
     num_string_slots_(0) {
 }
 
@@ -185,14 +184,14 @@ Status KuduScanner::DecodeRowsIntoRowBatch(RowBatch* row_batch,
   // that happens inside the loop.
   int idx = row_batch->AddRow();
   TupleRow* row = row_batch->GetRow(idx);
-  (*tuple_mem)->Init(scan_node_->tuple_desc()->num_null_bytes());
+  (*tuple_mem)->ClearNullBits(*scan_node_->tuple_desc());
   row->SetTuple(tuple_idx(), *tuple_mem);
 
   int num_rows = cur_kudu_batch_.NumRows();
   // Now iterate through the Kudu rows.
   for (int krow_idx = cur_kudu_batch_num_read_; krow_idx < num_rows; ++krow_idx) {
     // Clear any NULL indicators set by a previous iteration.
-    (*tuple_mem)->Init(tuple_num_null_bytes_);
+    (*tuple_mem)->ClearNullBits(*scan_node_->tuple_desc());
 
     // Transform a Kudu row into an Impala row.
     KuduScanBatch::RowPtr krow = cur_kudu_batch_.Row(krow_idx);
@@ -216,7 +215,7 @@ Status KuduScanner::DecodeRowsIntoRowBatch(RowBatch* row_batch,
 
       // Move to the next tuple in the tuple buffer.
       *tuple_mem = next_tuple(*tuple_mem);
-      (*tuple_mem)->Init(tuple_num_null_bytes_);
+      (*tuple_mem)->ClearNullBits(*scan_node_->tuple_desc());
       // Make 'row' point to the new row.
       row = row_batch->GetRow(idx);
       row->SetTuple(tuple_idx(), *tuple_mem);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/exec/kudu-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-scanner.h b/be/src/exec/kudu-scanner.h
index d868b05..0ed5221 100644
--- a/be/src/exec/kudu-scanner.h
+++ b/be/src/exec/kudu-scanner.h
@@ -121,9 +121,6 @@ class KuduScanner {
   /// The scanner's cloned copy of the conjuncts to apply.
   vector<ExprContext*> conjunct_ctxs_;
 
-  /// Number of bytes needed to represent the null bits in the tuple.
-  int tuple_num_null_bytes_;
-
   /// List of string slots that need relocation for their auxiliary memory.
   std::vector<SlotDescriptor*> string_slots_;
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/exec/row-batch-list-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/row-batch-list-test.cc b/be/src/exec/row-batch-list-test.cc
index 38ce177..ad3317b 100644
--- a/be/src/exec/row-batch-list-test.cc
+++ b/be/src/exec/row-batch-list-test.cc
@@ -19,21 +19,30 @@
 #include <cstdio>
 #include <iostream>
 #include <vector>
+#include <boost/scoped_ptr.hpp>
 
+#include "common/init.h"
 #include "exec/row-batch-list.h"
 #include "runtime/descriptors.h"
 #include "runtime/mem-pool.h"
 #include "runtime/mem-tracker.h"
 #include "runtime/string-value.h"
 #include "runtime/tuple-row.h"
+#include "service/fe-support.h"
+#include "service/frontend.h"
 #include "util/runtime-profile-counters.h"
 #include "testutil/desc-tbl-builder.h"
 #include "testutil/gtest-util.h"
 
 #include "common/names.h"
 
+using namespace impala;
+
 namespace impala {
 
+// For computing tuple mem layouts.
+scoped_ptr<Frontend> fe;
+
 class RowBatchListTest : public testing::Test {
  public:
   RowBatchListTest() {}
@@ -44,7 +53,7 @@ class RowBatchListTest : public testing::Test {
   RowDescriptor* desc_;
 
   virtual void SetUp() {
-    DescriptorTblBuilder builder(&pool_);
+    DescriptorTblBuilder builder(fe.get(), &pool_);
     builder.DeclareTuple() << TYPE_INT;
     DescriptorTbl* desc_tbl = builder.Build();
     vector<bool> nullable_tuples(1, false);
@@ -139,5 +148,10 @@ TEST_F(RowBatchListTest, MultipleRowBatchesTest) {
 
 }
 
-IMPALA_TEST_MAIN();
-
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
+  InitFeSupport();
+  fe.reset(new Frontend());
+  return RUN_ALL_TESTS();
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/exec/text-converter.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/text-converter.cc b/be/src/exec/text-converter.cc
index 2271b4e..d38d662 100644
--- a/be/src/exec/text-converter.cc
+++ b/be/src/exec/text-converter.cc
@@ -126,7 +126,7 @@ Function* TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen,
 
   StructType* tuple_type = tuple_desc->GetLlvmStruct(codegen);
   if (tuple_type == NULL) return NULL;
-  PointerType* tuple_ptr_type = PointerType::get(tuple_type, 0);
+  PointerType* tuple_ptr_type = tuple_type->getPointerTo();
 
   Function* set_null_fn = slot_desc->GetUpdateNullFn(codegen, true);
   if (set_null_fn == NULL) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/buffered-tuple-stream-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/buffered-tuple-stream-test.cc b/be/src/runtime/buffered-tuple-stream-test.cc
index 20af23e..76b1bff 100644
--- a/be/src/runtime/buffered-tuple-stream-test.cc
+++ b/be/src/runtime/buffered-tuple-stream-test.cc
@@ -79,12 +79,12 @@ class SimpleTupleStreamTest : public testing::Test {
     vector<bool> nullable_tuples(1, false);
     vector<TTupleId> tuple_ids(1, static_cast<TTupleId>(0));
 
-    DescriptorTblBuilder int_builder(&pool_);
+    DescriptorTblBuilder int_builder(test_env_->exec_env()->frontend(), &pool_);
     int_builder.DeclareTuple() << TYPE_INT;
     int_desc_ = pool_.Add(new RowDescriptor(
         *int_builder.Build(), tuple_ids, nullable_tuples));
 
-    DescriptorTblBuilder string_builder(&pool_);
+    DescriptorTblBuilder string_builder(test_env_->exec_env()->frontend(), &pool_);
     string_builder.DeclareTuple() << TYPE_STRING;
     string_desc_ = pool_.Add(new RowDescriptor(
         *string_builder.Build(), tuple_ids, nullable_tuples));
@@ -145,23 +145,21 @@ class SimpleTupleStreamTest : public testing::Test {
       for (int tuple_idx = 0; tuple_idx < num_tuples; ++tuple_idx) {
         TupleDescriptor* tuple_desc = row_desc.tuple_descriptors()[tuple_idx];
         Tuple* tuple = Tuple::Create(tuple_desc->byte_size(), batch->tuple_data_pool());
-        // Skip over the null indicators at the beginning of the tuple.
-        uint8_t* ptr = reinterpret_cast<uint8_t*>(tuple) + tuple_desc->num_null_bytes();
         bool is_null = gen_null && !GenBoolValue(idx);
         for (int slot_idx = 0; slot_idx < tuple_desc->slots().size(); ++slot_idx, ++idx) {
           SlotDescriptor* slot_desc = tuple_desc->slots()[slot_idx];
+          void* slot = tuple->GetSlot(slot_desc->tuple_offset());
           switch (slot_desc->type().type) {
             case TYPE_INT:
-              *reinterpret_cast<int*>(ptr) = GenIntValue(idx);
+              *reinterpret_cast<int*>(slot) = GenIntValue(idx);
               break;
             case TYPE_STRING:
-              *reinterpret_cast<StringValue*>(ptr) = STRINGS[idx % NUM_STRINGS];
+              *reinterpret_cast<StringValue*>(slot) = STRINGS[idx % NUM_STRINGS];
               break;
             default:
               // The memory has been zero'ed out already by Tuple::Create().
               break;
           }
-          ptr += slot_desc->slot_size();
         }
         if (is_null) {
           row->SetTuple(tuple_idx, NULL);
@@ -212,14 +210,13 @@ class SimpleTupleStreamTest : public testing::Test {
       TupleDescriptor* tuple_desc = row_desc->tuple_descriptors()[tuple_idx];
       Tuple* tuple = row->GetTuple(tuple_idx);
       const int num_slots = tuple_desc->slots().size();
-      uint8_t* ptr = reinterpret_cast<uint8_t*>(tuple) + tuple_desc->num_null_bytes();
       for (int slot_idx = 0; slot_idx < num_slots; ++slot_idx) {
         SlotDescriptor* slot_desc = tuple_desc->slots()[slot_idx];
         if (tuple == NULL) {
           AppendValue(NULL, results);
         } else {
-          AppendValue(ptr, results);
-          ptr += slot_desc->slot_size();
+          void* slot = tuple->GetSlot(slot_desc->tuple_offset());
+          AppendValue(reinterpret_cast<uint8_t*>(slot), results);
         }
       }
     }
@@ -270,7 +267,7 @@ class SimpleTupleStreamTest : public testing::Test {
         for (int slot_idx = 0; slot_idx < num_slots; ++slot_idx, ++idx) {
           T expected_val;
           GetExpectedValue(idx, is_null, &expected_val);
-          ASSERT_TRUE(results[idx] == expected_val)
+          ASSERT_EQ(results[idx], expected_val)
               << "results[" << idx << "] " << results[idx] << " != "
               << expected_val << " row_idx=" << row_idx
               << " tuple_idx=" << tuple_idx << " slot_idx=" << slot_idx
@@ -400,12 +397,12 @@ class SimpleNullStreamTest : public SimpleTupleStreamTest {
     vector<bool> nullable_tuples(1, true);
     vector<TTupleId> tuple_ids(1, static_cast<TTupleId>(0));
 
-    DescriptorTblBuilder int_builder(&pool_);
+    DescriptorTblBuilder int_builder(test_env_->exec_env()->frontend(), &pool_);
     int_builder.DeclareTuple() << TYPE_INT;
     int_desc_ = pool_.Add(new RowDescriptor(
         *int_builder.Build(), tuple_ids, nullable_tuples));
 
-    DescriptorTblBuilder string_builder(&pool_);
+    DescriptorTblBuilder string_builder(test_env_->exec_env()->frontend(), &pool_);
     string_builder.DeclareTuple() << TYPE_STRING;
     string_desc_ = pool_.Add(new RowDescriptor(
         *string_builder.Build(), tuple_ids, nullable_tuples));
@@ -426,14 +423,14 @@ class MultiTupleStreamTest : public SimpleTupleStreamTest {
     tuple_ids.push_back(static_cast<TTupleId>(1));
     tuple_ids.push_back(static_cast<TTupleId>(2));
 
-    DescriptorTblBuilder int_builder(&pool_);
+    DescriptorTblBuilder int_builder(test_env_->exec_env()->frontend(), &pool_);
     int_builder.DeclareTuple() << TYPE_INT;
     int_builder.DeclareTuple() << TYPE_INT;
     int_builder.DeclareTuple() << TYPE_INT;
     int_desc_ = pool_.Add(new RowDescriptor(
         *int_builder.Build(), tuple_ids, nullable_tuples));
 
-    DescriptorTblBuilder string_builder(&pool_);
+    DescriptorTblBuilder string_builder(test_env_->exec_env()->frontend(), &pool_);
     string_builder.DeclareTuple() << TYPE_STRING;
     string_builder.DeclareTuple() << TYPE_STRING;
     string_builder.DeclareTuple() << TYPE_STRING;
@@ -456,14 +453,14 @@ class MultiNullableTupleStreamTest : public SimpleTupleStreamTest {
     tuple_ids.push_back(static_cast<TTupleId>(1));
     tuple_ids.push_back(static_cast<TTupleId>(2));
 
-    DescriptorTblBuilder int_builder(&pool_);
+    DescriptorTblBuilder int_builder(test_env_->exec_env()->frontend(), &pool_);
     int_builder.DeclareTuple() << TYPE_INT;
     int_builder.DeclareTuple() << TYPE_INT;
     int_builder.DeclareTuple() << TYPE_INT;
     int_desc_ = pool_.Add(new RowDescriptor(
         *int_builder.Build(), tuple_ids, nullable_tuples));
 
-    DescriptorTblBuilder string_builder(&pool_);
+    DescriptorTblBuilder string_builder(test_env_->exec_env()->frontend(), &pool_);
     string_builder.DeclareTuple() << TYPE_STRING;
     string_builder.DeclareTuple() << TYPE_STRING;
     string_builder.DeclareTuple() << TYPE_STRING;
@@ -495,7 +492,7 @@ class ArrayTupleStreamTest : public SimpleTupleStreamTest {
     nested_array_type.type = TYPE_ARRAY;
     nested_array_type.children.push_back(int_array_type);
 
-    DescriptorTblBuilder builder(&pool_);
+    DescriptorTblBuilder builder(test_env_->exec_env()->frontend(), &pool_);
     builder.DeclareTuple() << string_array_type << nested_array_type;
     builder.DeclareTuple() << int_array_type;
     array_desc_ = pool_.Add(new RowDescriptor(
@@ -735,7 +732,7 @@ TEST_F(SimpleTupleStreamTest, BigRow) {
   vector<bool> nullable_tuples;
   vector<bool> non_nullable_tuples;
 
-  DescriptorTblBuilder big_row_builder(&pool_);
+  DescriptorTblBuilder big_row_builder(test_env_->exec_env()->frontend(), &pool_);
   // Each tuple contains 8 slots of TYPE_INT and a single byte for null indicator.
   const int num_tuples = IO_BLOCK_SIZE / (8 * sizeof(int) + 1);
   for (int tuple_idx = 0; tuple_idx < num_tuples; ++tuple_idx) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/collection-value-builder-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/collection-value-builder-test.cc b/be/src/runtime/collection-value-builder-test.cc
index b8f4b65..613132a 100644
--- a/be/src/runtime/collection-value-builder-test.cc
+++ b/be/src/runtime/collection-value-builder-test.cc
@@ -17,6 +17,8 @@
 
 #include "runtime/collection-value-builder.h"
 #include "runtime/mem-tracker.h"
+#include "service/fe-support.h"
+#include "service/frontend.h"
 #include "testutil/desc-tbl-builder.h"
 #include "testutil/gtest-util.h"
 
@@ -24,9 +26,12 @@
 
 using namespace impala;
 
+// For computing tuple mem layouts.
+static scoped_ptr<Frontend> fe;
+
 TEST(CollectionValueBuilderTest, MaxBufferSize) {
   ObjectPool obj_pool;
-  DescriptorTblBuilder builder(&obj_pool);
+  DescriptorTblBuilder builder(fe.get(), &obj_pool);
   builder.DeclareTuple() << TYPE_TINYINT << TYPE_TINYINT << TYPE_TINYINT;
   DescriptorTbl* desc_tbl = builder.Build();
   vector<TupleDescriptor*> descs;
@@ -66,4 +71,10 @@ TEST(CollectionValueBuilderTest, MaxBufferSize) {
   pool.FreeAll();
 }
 
-IMPALA_TEST_MAIN();
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
+  InitFeSupport();
+  fe.reset(new Frontend());
+  return RUN_ALL_TESTS();
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/descriptors.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/descriptors.cc b/be/src/runtime/descriptors.cc
index 47ca791..d106ed4 100644
--- a/be/src/runtime/descriptors.cc
+++ b/be/src/runtime/descriptors.cc
@@ -286,6 +286,7 @@ TupleDescriptor::TupleDescriptor(const TTupleDescriptor& tdesc)
     table_desc_(NULL),
     byte_size_(tdesc.byteSize),
     num_null_bytes_(tdesc.numNullBytes),
+    null_bytes_offset_(tdesc.byteSize - tdesc.numNullBytes),
     slots_(),
     has_varlen_slots_(false),
     tuple_path_(tdesc.tuplePath),
@@ -591,14 +592,18 @@ Function* SlotDescriptor::GetUpdateNullFn(LlvmCodeGen* codegen, bool set_null) c
   prototype.AddArgument(LlvmCodeGen::NamedVariable("tuple", tuple_ptr_type));
 
   LlvmCodeGen::LlvmBuilder builder(codegen->context());
-  Value* tuple_ptr;
-  Function* fn = prototype.GeneratePrototype(&builder, &tuple_ptr);
-
-  Value* null_byte_ptr = builder.CreateStructGEP(NULL,
-          tuple_ptr, null_indicator_offset_.byte_offset, "null_byte_ptr");
+  Value* tuple_arg;
+  Function* fn = prototype.GeneratePrototype(&builder, &tuple_arg);
+
+  Value* tuple_int8_ptr =
+      builder.CreateBitCast(tuple_arg, codegen->ptr_type(), "tuple_int8_ptr");
+  Value* null_byte_offset =
+      ConstantInt::get(codegen->int_type(), null_indicator_offset_.byte_offset);
+  Value* null_byte_ptr =
+      builder.CreateInBoundsGEP(tuple_int8_ptr, null_byte_offset, "null_byte_ptr");
   Value* null_byte = builder.CreateLoad(null_byte_ptr, "null_byte");
-  Value* result = NULL;
 
+  Value* result = NULL;
   if (set_null) {
     Value* null_set = codegen->GetIntConstant(
         TYPE_TINYINT, null_indicator_offset_.bit_mask);
@@ -627,45 +632,38 @@ StructType* TupleDescriptor::GetLlvmStruct(LlvmCodeGen* codegen) const {
 
   // Sort slots in the order they will appear in LLVM struct.
   vector<SlotDescriptor*> sorted_slots(slots_.size());
-  for (SlotDescriptor* slot: slots_) {
-    sorted_slots[slot->slot_idx_] = slot;
-  }
-
-  // For each null byte, add a byte to the struct
-  vector<Type*> struct_fields;
-  for (int i = 0; i < num_null_bytes_; ++i) {
-    struct_fields.push_back(codegen->GetType(TYPE_TINYINT));
-  }
-  int curr_struct_offset = num_null_bytes_;
+  for (SlotDescriptor* slot: slots_) sorted_slots[slot->slot_idx_] = slot;
 
   // Add the slot types to the struct description.
+  vector<Type*> struct_fields;
+  int curr_struct_offset = 0;
   for (SlotDescriptor* slot: sorted_slots) {
     // IMPALA-3207: Codegen for CHAR is not yet implemented: bail out of codegen here.
     if (slot->type().type == TYPE_CHAR) return NULL;
-    DCHECK_LE(curr_struct_offset, slot->tuple_offset());
-    if (curr_struct_offset < slot->tuple_offset()) {
-      // Need to add padding to ensure slots are aligned correctly. Clang likes to
-      // sometimes pad structs in its own way. When it does this, it sets the 'packed'
-      // flag, which means that at the LLVM level the struct type has no alignment
-      // requirements, even if it does at the C language level.
-      struct_fields.push_back(ArrayType::get(codegen->GetType(TYPE_TINYINT),
-          slot->tuple_offset() - curr_struct_offset));
-    }
+    DCHECK_EQ(curr_struct_offset, slot->tuple_offset());
     slot->llvm_field_idx_ = struct_fields.size();
     struct_fields.push_back(codegen->GetType(slot->type()));
     curr_struct_offset = slot->tuple_offset() + slot->slot_size();
   }
+  // For each null byte, add a byte to the struct
+  for (int i = 0; i < num_null_bytes_; ++i) {
+    struct_fields.push_back(codegen->GetType(TYPE_TINYINT));
+    ++curr_struct_offset;
+  }
+
   DCHECK_LE(curr_struct_offset, byte_size_);
   if (curr_struct_offset < byte_size_) {
     struct_fields.push_back(ArrayType::get(codegen->GetType(TYPE_TINYINT),
         byte_size_ - curr_struct_offset));
   }
 
-  // Construct the struct type.
-  // We don't mark the struct as packed but it shouldn't matter either way: LLVM should
-  // not insert any additional padding since the contents are already aligned.
+  // Construct the struct type. Use the packed layout although not strictly necessary
+  // because the fields are already aligned, so LLVM should not add any padding. The
+  // fields are already aligned because we order the slots by descending size and only
+  // have powers-of-two slot sizes. Note that STRING and TIMESTAMP slots both occupy
+  // 16 bytes although their useful payload is only 12 bytes.
   StructType* tuple_struct = StructType::get(codegen->context(),
-      ArrayRef<Type*>(struct_fields));
+      ArrayRef<Type*>(struct_fields), true);
   const DataLayout& data_layout = codegen->execution_engine()->getDataLayout();
   const StructLayout* layout = data_layout.getStructLayout(tuple_struct);
   for (SlotDescriptor* slot: slots()) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/descriptors.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h
index 874ad69..02193df 100644
--- a/be/src/runtime/descriptors.h
+++ b/be/src/runtime/descriptors.h
@@ -160,7 +160,7 @@ class SlotDescriptor {
 
   /// The idx of the slot in the llvm codegen'd tuple struct
   /// This is set by TupleDescriptor during codegen and takes into account
-  /// leading null bytes and any padding bytes.
+  /// any padding bytes.
   int llvm_field_idx_;
 
   /// Cached codegen'd functions
@@ -363,6 +363,7 @@ class TupleDescriptor {
  public:
   int byte_size() const { return byte_size_; }
   int num_null_bytes() const { return num_null_bytes_; }
+  int null_bytes_offset() const { return null_bytes_offset_; }
   const std::vector<SlotDescriptor*>& slots() const { return slots_; }
   const std::vector<SlotDescriptor*>& string_slots() const { return string_slots_; }
   const std::vector<SlotDescriptor*>& collection_slots() const {
@@ -401,6 +402,7 @@ class TupleDescriptor {
   TableDescriptor* table_desc_;
   const int byte_size_;
   const int num_null_bytes_;
+  const int null_bytes_offset_;
 
   /// Contains all slots.
   std::vector<SlotDescriptor*> slots_;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/row-batch-serialize-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/row-batch-serialize-test.cc b/be/src/runtime/row-batch-serialize-test.cc
index 0f5e519..f4c4f0b 100644
--- a/be/src/runtime/row-batch-serialize-test.cc
+++ b/be/src/runtime/row-batch-serialize-test.cc
@@ -24,11 +24,15 @@
 #include "runtime/raw-value.inline.h"
 #include "runtime/row-batch.h"
 #include "runtime/tuple-row.h"
+#include "service/fe-support.h"
+#include "service/frontend.h"
 #include "util/stopwatch.h"
 #include "testutil/desc-tbl-builder.h"
 
 #include "common/names.h"
 
+using namespace impala;
+
 namespace impala {
 
 const int NUM_ROWS = 20;
@@ -41,13 +45,18 @@ class RowBatchSerializeTest : public testing::Test {
   ObjectPool pool_;
   scoped_ptr<MemTracker> tracker_;
 
+  // For computing tuple mem layouts.
+  scoped_ptr<Frontend> fe_;
+
   virtual void SetUp() {
+    fe_.reset(new Frontend());
     tracker_.reset(new MemTracker());
   }
 
   virtual void TearDown() {
     pool_.Clear();
     tracker_.reset();
+    fe_.reset();
   }
 
   // Serializes and deserializes 'batch', then checks that the deserialized batch is valid
@@ -291,7 +300,7 @@ class RowBatchSerializeTest : public testing::Test {
 
 TEST_F(RowBatchSerializeTest, Basic) {
   // tuple: (int)
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -306,7 +315,7 @@ TEST_F(RowBatchSerializeTest, Basic) {
 
 TEST_F(RowBatchSerializeTest, String) {
   // tuple: (int, string)
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT << TYPE_STRING;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -325,7 +334,7 @@ TEST_F(RowBatchSerializeTest, BasicArray) {
   array_type.type = TYPE_ARRAY;
   array_type.children.push_back(TYPE_INT);
 
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT << TYPE_STRING << array_type;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -353,7 +362,7 @@ TEST_F(RowBatchSerializeTest, StringArray) {
   array_type.type = TYPE_ARRAY;
   array_type.children.push_back(struct_type);
 
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT << TYPE_STRING << array_type;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -394,7 +403,7 @@ TEST_F(RowBatchSerializeTest, NestedArrays) {
   array_type.type = TYPE_ARRAY;
   array_type.children.push_back(struct_type);
 
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << array_type;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -418,7 +427,7 @@ TEST_F(RowBatchSerializeTest, DupCorrectnessFull) {
 
 void RowBatchSerializeTest::TestDupCorrectness(bool full_dedup) {
   // tuples: (int), (string)
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   builder.DeclareTuple() << TYPE_STRING;
   DescriptorTbl* desc_tbl = builder.Build();
@@ -459,7 +468,7 @@ TEST_F(RowBatchSerializeTest, DupRemovalFull) {
 // Test that tuple deduplication results in the expected reduction in serialized size.
 void RowBatchSerializeTest::TestDupRemoval(bool full_dedup) {
   // tuples: (int, string)
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT << TYPE_STRING;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -498,7 +507,7 @@ TEST_F(RowBatchSerializeTest, ConsecutiveNullsFull) {
 // Test that deduplication handles NULL tuples correctly.
 void RowBatchSerializeTest::TestConsecutiveNulls(bool full_dedup) {
   // tuples: (int)
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   DescriptorTbl* desc_tbl = builder.Build();
   vector<bool> nullable_tuples(1, true);
@@ -526,7 +535,7 @@ TEST_F(RowBatchSerializeTest, ZeroLengthTuplesDedup) {
 
 void RowBatchSerializeTest::TestZeroLengthTuple(bool full_dedup) {
   // tuples: (int), (string), ()
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   builder.DeclareTuple() << TYPE_STRING;
   builder.DeclareTuple();
@@ -553,7 +562,7 @@ TEST_F(RowBatchSerializeTest, DedupPathologicalFull) {
   ColumnType array_type;
   array_type.type = TYPE_ARRAY;
   array_type.children.push_back(TYPE_STRING);
-  DescriptorTblBuilder builder(&pool_);
+  DescriptorTblBuilder builder(fe_.get(), &pool_);
   builder.DeclareTuple() << TYPE_INT;
   builder.DeclareTuple() << TYPE_INT;
   builder.DeclareTuple() << array_type;
@@ -647,7 +656,8 @@ TEST_F(RowBatchSerializeTest, DedupPathologicalFull) {
 
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
-  impala::InitCommonRuntime(argc, argv, false, impala::TestInfo::BE_TEST);
+  InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
+  InitFeSupport();
   uint32_t seed = time(NULL);
   cout << "seed = " << seed << endl;
   srand(seed);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/row-batch-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/row-batch-test.cc b/be/src/runtime/row-batch-test.cc
index 2a8304a..041c3c2 100644
--- a/be/src/runtime/row-batch-test.cc
+++ b/be/src/runtime/row-batch-test.cc
@@ -15,23 +15,30 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <boost/scoped_ptr.hpp>
+
 #include "testutil/death-test-util.h"
 #include "testutil/gtest-util.h"
 #include "runtime/mem-tracker.h"
 #include "runtime/row-batch.h"
+#include "service/fe-support.h"
+#include "service/frontend.h"
 #include "testutil/desc-tbl-builder.h"
 
 #include <gtest/gtest.h>
 
 #include "common/names.h"
 
-namespace impala {
+using namespace impala;
+
+// For computing tuple mem layouts.
+static scoped_ptr<Frontend> fe;
 
 TEST(RowBatchTest, AcquireStateWithMarkAtCapacity) {
   // Test that AcquireState() can be correctly called with MarkAtCapacity() on the
   // source batch.
   ObjectPool pool;
-  DescriptorTblBuilder builder(&pool);
+  DescriptorTblBuilder builder(fe.get(), &pool);
   builder.DeclareTuple() << TYPE_INT;
   DescriptorTbl* desc_tbl = builder.Build();
 
@@ -61,9 +68,10 @@ TEST(RowBatchTest, AcquireStateWithMarkAtCapacity) {
   }
 }
 
-}
-
 int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
+  InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
+  InitFeSupport();
+  fe.reset(new Frontend());
   return RUN_ALL_TESTS();
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/tuple.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc
index df0066f..656621d 100644
--- a/be/src/runtime/tuple.cc
+++ b/be/src/runtime/tuple.cc
@@ -207,7 +207,7 @@ void Tuple::MaterializeExprs(
     TupleRow* row, const TupleDescriptor& desc, ExprContext* const* materialize_expr_ctxs,
     MemPool* pool, StringValue** non_null_string_values, int* total_string_lengths,
     int* num_non_null_string_values) {
-  memset(this, 0, desc.num_null_bytes());
+  ClearNullBits(desc);
   // Evaluate the materialize_expr_ctxs and place the results in the tuple.
   for (int i = 0; i < desc.slots().size(); ++i) {
     SlotDescriptor* slot_desc = desc.slots()[i];
@@ -368,8 +368,8 @@ Status Tuple::CodegenMaterializeExprs(RuntimeState* state, bool collect_string_v
   PointerType* tuple_type = codegen->GetPtrType(tuple_struct_type);
   Value* tuple = builder.CreateBitCast(opaque_tuple_arg, tuple_type, "tuple");
 
-  // Memset tuple's null bytes
-  codegen->CodegenMemset(&builder, tuple, 0, desc.num_null_bytes());
+  // Clear tuple's null bytes
+  codegen->CodegenClearNullBits(&builder, tuple, desc);
 
   // Evaluate the materialize_expr_ctxs and place the results in the tuple.
   for (int i = 0; i < desc.slots().size(); ++i) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/runtime/tuple.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.h b/be/src/runtime/tuple.h
index 3a3e399..b95492c 100644
--- a/be/src/runtime/tuple.h
+++ b/be/src/runtime/tuple.h
@@ -71,6 +71,11 @@ class Tuple {
 
   void Init(int size) { memset(this, 0, size); }
 
+  void ClearNullBits(const TupleDescriptor& tuple_desc) {
+    memset(reinterpret_cast<uint8_t*>(this) + tuple_desc.null_bytes_offset(),
+        0, tuple_desc.num_null_bytes());
+  }
+
   /// The total size of all data represented in this tuple (tuple data and referenced
   /// string and collection data).
   int64_t TotalByteSize(const TupleDescriptor& desc) const;
@@ -167,7 +172,7 @@ class Tuple {
       MemPool* pool, llvm::Function** fn);
 
   /// Turn null indicator bit on. For non-nullable slots, the mask will be 0 and
-  /// this is a no-op (but we don't have to branch to check is slots are nulalble).
+  /// this is a no-op (but we don't have to branch to check is slots are nullable).
   void SetNull(const NullIndicatorOffset& offset) {
     char* null_indicator_byte = reinterpret_cast<char*>(this) + offset.byte_offset;
     *null_indicator_byte |= offset.bit_mask;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/service/frontend.cc
----------------------------------------------------------------------
diff --git a/be/src/service/frontend.cc b/be/src/service/frontend.cc
index 107354b..855924f 100644
--- a/be/src/service/frontend.cc
+++ b/be/src/service/frontend.cc
@@ -58,6 +58,7 @@ DEFINE_string(authorized_proxy_user_config, "",
     "users. For example: hue=user1,user2;admin=*");
 DEFINE_string(authorized_proxy_user_config_delimiter, ",",
     "Specifies the delimiter used in authorized_proxy_user_config. ");
+
 Frontend::Frontend() {
   JniMethodDescriptor methods[] = {
     {"<init>", "(ZLjava/lang/String;Ljava/lang/String;Ljava/lang/String;"
@@ -85,6 +86,7 @@ Frontend::Frontend() {
     {"loadTableData", "([B)[B", &load_table_data_id_},
     {"getTableFiles", "([B)[B", &get_table_files_id_},
     {"showCreateFunction", "([B)Ljava/lang/String;", &show_create_function_id_},
+    {"buildTestDescriptorTable", "([B)[B", &build_test_descriptor_table_id_},
 };
 
   JNIEnv* jni_env = getJNIEnv();
@@ -264,3 +266,8 @@ Status Frontend::SetCatalogInitialized() {
 Status Frontend::GetTableFiles(const TShowFilesParams& params, TResultSet* result) {
   return JniUtil::CallJniMethod(fe_, get_table_files_id_, params, result);
 }
+
+Status Frontend::BuildTestDescriptorTable(const TBuildTestDescriptorTableParams& params,
+    TDescriptorTable* result) {
+  return JniUtil::CallJniMethod(fe_, build_test_descriptor_table_id_, params, result);
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/service/frontend.h
----------------------------------------------------------------------
diff --git a/be/src/service/frontend.h b/be/src/service/frontend.h
index 29dec68..c5c4895 100644
--- a/be/src/service/frontend.h
+++ b/be/src/service/frontend.h
@@ -168,6 +168,10 @@ class Frontend {
   /// Call FE to get files info for a table or partition.
   Status GetTableFiles(const TShowFilesParams& params, TResultSet* result);
 
+  /// Creates a thrift descriptor table for testing.
+  Status BuildTestDescriptorTable(const TBuildTestDescriptorTableParams& params,
+      TDescriptorTable* result);
+
  private:
   /// Descriptor of Java Frontend class itself, used to create a new instance.
   jclass fe_class_;
@@ -196,6 +200,10 @@ class Frontend {
   jmethodID set_catalog_initialized_id_; // JniFrontend.setCatalogInitialized
   jmethodID get_table_files_id_; // JniFrontend.getTableFiles
   jmethodID show_create_function_id_; // JniFrontend.showCreateFunction
+
+  // Only used for testing.
+  jmethodID build_test_descriptor_table_id_; // JniFrontend.buildTestDescriptorTable()
+
   jmethodID fe_ctor_;
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/testutil/desc-tbl-builder.cc
----------------------------------------------------------------------
diff --git a/be/src/testutil/desc-tbl-builder.cc b/be/src/testutil/desc-tbl-builder.cc
index 7793f06..77be724 100644
--- a/be/src/testutil/desc-tbl-builder.cc
+++ b/be/src/testutil/desc-tbl-builder.cc
@@ -19,13 +19,17 @@
 #include "util/bit-util.h"
 
 #include "common/object-pool.h"
+#include "service/frontend.h"
 #include "runtime/descriptors.h"
 
 #include "common/names.h"
 
 namespace impala {
 
-DescriptorTblBuilder::DescriptorTblBuilder(ObjectPool* obj_pool) : obj_pool_(obj_pool) {
+DescriptorTblBuilder::DescriptorTblBuilder(Frontend* fe, ObjectPool* obj_pool)
+  : fe_(fe), obj_pool_(obj_pool) {
+  DCHECK(fe != NULL);
+  DCHECK(obj_pool_ != NULL);
 }
 
 TupleDescBuilder& DescriptorTblBuilder::DeclareTuple() {
@@ -34,34 +38,6 @@ TupleDescBuilder& DescriptorTblBuilder::DeclareTuple() {
   return *tuple_builder;
 }
 
-// item_id of -1 indicates no itemTupleId
-static TSlotDescriptor MakeSlotDescriptor(int id, int parent_id, const ColumnType& type,
-    int slot_idx, int byte_offset, int item_id) {
-  int null_byte = slot_idx / 8;
-  int null_bit = slot_idx % 8;
-  TSlotDescriptor slot_desc;
-  slot_desc.__set_id(id);
-  slot_desc.__set_parent(parent_id);
-  slot_desc.__set_slotType(type.ToThrift());
-  slot_desc.__set_materializedPath(vector<int>(1, slot_idx));
-  slot_desc.__set_byteOffset(byte_offset);
-  slot_desc.__set_nullIndicatorByte(null_byte);
-  slot_desc.__set_nullIndicatorBit(null_bit);
-  slot_desc.__set_slotIdx(slot_idx);
-  if (item_id != -1) slot_desc.__set_itemTupleId(item_id);
-  return slot_desc;
-}
-
-static TTupleDescriptor MakeTupleDescriptor(int id, int byte_size, int num_null_bytes,
-    int table_id = -1) {
-  TTupleDescriptor tuple_desc;
-  tuple_desc.__set_id(id);
-  tuple_desc.__set_byteSize(byte_size);
-  tuple_desc.__set_numNullBytes(num_null_bytes);
-  if (table_id != -1) tuple_desc.__set_tableId(table_id);
-  return tuple_desc;
-}
-
 void DescriptorTblBuilder::SetTableDescriptor(const TTableDescriptor& table_desc) {
   DCHECK(thrift_desc_tbl_.tableDescriptors.empty())
       << "Only one TableDescriptor can be set.";
@@ -69,61 +45,25 @@ void DescriptorTblBuilder::SetTableDescriptor(const TTableDescriptor& table_desc
 }
 
 DescriptorTbl* DescriptorTblBuilder::Build() {
-  DescriptorTbl* desc_tbl;
-  int tuple_id = 0;
-  int slot_id = tuples_descs_.size(); // First ids reserved for TupleDescriptors
+  DCHECK(!tuples_descs_.empty());
 
+  TBuildTestDescriptorTableParams params;
   for (int i = 0; i < tuples_descs_.size(); ++i) {
-    BuildTuple(tuples_descs_[i]->slot_types(), &thrift_desc_tbl_, &tuple_id, &slot_id);
-  }
-
-  Status status = DescriptorTbl::Create(obj_pool_, thrift_desc_tbl_, &desc_tbl);
-  DCHECK(status.ok());
-  return desc_tbl;
-}
-
-TTupleDescriptor DescriptorTblBuilder::BuildTuple(
-    const vector<ColumnType>& slot_types, TDescriptorTable* thrift_desc_tbl,
-    int* next_tuple_id, int* slot_id) {
-  // We never materialize struct slots (there's no in-memory representation of structs,
-  // instead the materialized fields appear directly in the tuple), but array types can
-  // still have a struct item type. In this case, the array item tuple contains the
-  // "inlined" struct fields.
-  if (slot_types.size() == 1 && slot_types[0].type == TYPE_STRUCT) {
-    return BuildTuple(slot_types[0].children, thrift_desc_tbl, next_tuple_id, slot_id);
-  }
-
-  int num_null_bytes = BitUtil::Ceil(slot_types.size(), 8);
-  int byte_offset = num_null_bytes;
-  int tuple_id = *next_tuple_id;
-  ++(*next_tuple_id);
-
-  for (int i = 0; i < slot_types.size(); ++i) {
-    DCHECK_NE(slot_types[i].type, TYPE_STRUCT);
-    int item_id = -1;
-    if (slot_types[i].IsCollectionType()) {
-      TTupleDescriptor item_desc =
-          BuildTuple(slot_types[i].children, thrift_desc_tbl, next_tuple_id, slot_id);
-      item_id = item_desc.id;
+    params.slot_types.push_back(vector<TColumnType>());
+    vector<TColumnType>& tslot_types = params.slot_types.back();
+    const vector<ColumnType>& slot_types = tuples_descs_[i]->slot_types();
+    for (const ColumnType& slot_type : slot_types) {
+      tslot_types.push_back(slot_type.ToThrift());
     }
-
-    thrift_desc_tbl->slotDescriptors.push_back(
-        MakeSlotDescriptor(*slot_id, tuple_id, slot_types[i], i, byte_offset, item_id));
-    byte_offset += slot_types[i].GetSlotSize();
-    ++(*slot_id);
   }
 
-  TTupleDescriptor result;
+  Status buildDescTblStatus = fe_->BuildTestDescriptorTable(params, &thrift_desc_tbl_);
+  DCHECK(buildDescTblStatus.ok()) << buildDescTblStatus.GetDetail();
 
-  // If someone set a table descriptor pass that id along to the tuple descriptor.
-  if (thrift_desc_tbl_.tableDescriptors.empty()) {
-    result = MakeTupleDescriptor(tuple_id, byte_offset, num_null_bytes);
-  } else {
-    result = MakeTupleDescriptor(tuple_id, byte_offset, num_null_bytes,
-                                 thrift_desc_tbl_.tableDescriptors[0].id);
-  }
-  thrift_desc_tbl->tupleDescriptors.push_back(result);
-  return result;
+  DescriptorTbl* desc_tbl;
+  Status status = DescriptorTbl::Create(obj_pool_, thrift_desc_tbl_, &desc_tbl);
+  DCHECK(status.ok()) << status.GetDetail();
+  return desc_tbl;
 }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/be/src/testutil/desc-tbl-builder.h
----------------------------------------------------------------------
diff --git a/be/src/testutil/desc-tbl-builder.h b/be/src/testutil/desc-tbl-builder.h
index 8dc8a55..9ad92b1 100644
--- a/be/src/testutil/desc-tbl-builder.h
+++ b/be/src/testutil/desc-tbl-builder.h
@@ -28,18 +28,17 @@ class TupleDescBuilder;
 
 /// Aids in the construction of a DescriptorTbl by declaring tuples and slots
 /// associated with those tuples.
-/// TupleIds are monotonically increasing from 0 for each DeclareTuple, and
-/// SlotIds increase similarly, but are always greater than all TupleIds.
-/// Unlike FE, slots are not reordered based on size, and padding is not addded.
-//
+/// The descriptor table is constructed by calling into the FE via JNI, such that
+/// the tuple mem layouts mimic real queries. All id assignments happen in the FE.
+///
 /// Example usage:
 /// DescriptorTblBuilder builder;
-/// builder.DeclareTuple() << TYPE_TINYINT << TYPE_TIMESTAMP; // gets TupleId 0
-/// builder.DeclareTuple() << TYPE_FLOAT; // gets TupleId 1
+/// builder.DeclareTuple() << TYPE_TINYINT << TYPE_TIMESTAMP;
+/// builder.DeclareTuple() << TYPE_FLOAT;
 /// DescriptorTbl desc_tbl = builder.Build();
 class DescriptorTblBuilder {
  public:
-  DescriptorTblBuilder(ObjectPool* object_pool);
+  DescriptorTblBuilder(Frontend* fe, ObjectPool* object_pool);
 
   TupleDescBuilder& DeclareTuple();
 
@@ -50,15 +49,12 @@ class DescriptorTblBuilder {
   DescriptorTbl* Build();
 
  private:
-  /// Owned by caller.
+  /// Both owned by caller.
+  Frontend* fe_;
   ObjectPool* obj_pool_;
 
   std::vector<TupleDescBuilder*> tuples_descs_;
   TDescriptorTable thrift_desc_tbl_;
-
-  TTupleDescriptor BuildTuple(
-      const std::vector<ColumnType>& slot_types, TDescriptorTable* thrift_desc_tbl,
-      int* tuple_id, int* slot_id);
 };
 
 class TupleDescBuilder {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/common/thrift/Frontend.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/Frontend.thrift b/common/thrift/Frontend.thrift
index 91322b2..fbbf7be 100644
--- a/common/thrift/Frontend.thrift
+++ b/common/thrift/Frontend.thrift
@@ -749,3 +749,10 @@ struct TGetAllHadoopConfigsResponse {
 struct TStartupOptions {
   1: optional bool compute_lineage
 }
+
+// For creating a test descriptor table. The tuples and their memory layout are computed
+// in the FE.
+struct TBuildTestDescriptorTableParams {
+  // Every entry describes the slot types of one tuple.
+  1: required list<list<Types.TColumnType>> slot_types
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java b/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
index 0f59fdb..22764ea 100644
--- a/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
+++ b/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
@@ -17,17 +17,22 @@
 
 package org.apache.impala.analysis;
 
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 
-import org.apache.commons.lang.StringUtils;
-
+import org.apache.impala.catalog.ArrayType;
+import org.apache.impala.catalog.StructField;
+import org.apache.impala.catalog.StructType;
 import org.apache.impala.catalog.Table;
+import org.apache.impala.catalog.Type;
 import org.apache.impala.catalog.View;
 import org.apache.impala.common.IdGenerator;
+import org.apache.impala.thrift.TColumnType;
 import org.apache.impala.thrift.TDescriptorTable;
+
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -142,13 +147,13 @@ public class DescriptorTable {
     return result;
   }
 
-  // Computes physical layout parameters of all descriptors.
-  // Call this only after the last descriptor was added.
-  // Test-only.
+  /**
+   * Computes physical layout parameters of all descriptors.
+   * Call this only after the last descriptor was added.
+   * Test-only.
+   */
   public void computeMemLayout() {
-    for (TupleDescriptor d: tupleDescs_.values()) {
-      d.computeMemLayout();
-    }
+    for (TupleDescriptor d: tupleDescs_.values()) d.computeMemLayout();
   }
 
   public TDescriptorTable toThrift() {
@@ -195,4 +200,55 @@ public class DescriptorTable {
     }
     return out.toString();
   }
+
+  /**
+   * Creates a thrift descriptor table for testing. Each entry in 'slotTypes' is a list
+   * of slot types for one tuple.
+   */
+  public static TDescriptorTable buildTestDescriptorTable(
+      List<List<TColumnType>> slotTypes) {
+    DescriptorTable descTbl = new DescriptorTable();
+    for (List<TColumnType> ttupleSlots: slotTypes) {
+      ArrayList<StructField> fields = Lists.newArrayListWithCapacity(ttupleSlots.size());
+      for (TColumnType ttype: ttupleSlots) {
+        fields.add(new StructField("testField", Type.fromThrift(ttype)));
+      }
+      StructType tupleType = new StructType(fields);
+      createTupleDesc(tupleType, descTbl);
+    }
+    descTbl.computeMemLayout();
+    return descTbl.toThrift();
+  }
+
+  /**
+   * Recursive helper for buildTestDescriptorTable(). Returns a TupleDescriptor
+   * corresponding to the given struct. The struct may contain scalar and array fields.
+   */
+  private static TupleDescriptor createTupleDesc(StructType tupleType,
+      DescriptorTable descTbl) {
+    TupleDescriptor tupleDesc = descTbl.createTupleDescriptor("testDescTbl");
+    for (StructField field: tupleType.getFields()) {
+      Type type = field.getType();
+      SlotDescriptor slotDesc = descTbl.addSlotDescriptor(tupleDesc);
+      slotDesc.setIsMaterialized(true);
+      slotDesc.setType(type);
+      if (!type.isCollectionType()) continue;
+
+      // Set item tuple descriptor for the collection.
+      Preconditions.checkState(type.isArrayType());
+      ArrayType arrayType = (ArrayType) type;
+      Type itemType = arrayType.getItemType();
+      StructType itemStruct = null;
+      if (itemType.isStructType()) {
+        itemStruct = (StructType) itemType;
+      } else {
+        ArrayList<StructField> itemFields = Lists.newArrayListWithCapacity(1);
+        itemFields.add(new StructField("item", itemType));
+        itemStruct = new StructType(itemFields);
+      }
+      TupleDescriptor itemTuple = createTupleDesc(itemStruct, descTbl);
+      slotDesc.setItemTupleDesc(itemTuple);
+    }
+    return tupleDesc;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
index cbfdaca..e5462fd 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
@@ -24,13 +24,13 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.lang.StringUtils;
-
 import org.apache.impala.catalog.ColumnStats;
 import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.catalog.StructType;
 import org.apache.impala.catalog.Table;
 import org.apache.impala.catalog.View;
 import org.apache.impala.thrift.TTupleDescriptor;
+
 import com.google.common.base.Joiner;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
@@ -56,6 +56,15 @@ import com.google.common.collect.Lists;
  * A tuple descriptor may be materialized or non-materialized. A non-materialized tuple
  * descriptor acts as a placeholder for 'virtual' table references such as inline views,
  * and must not be materialized at runtime.
+ *
+ * Memory Layout
+ * Slots are placed in descending order by size with trailing bytes to store null flags.
+ * Null flags are omitted for non-nullable slots. There is no padding between tuples when
+ * stored back-to-back in a row batch.
+ *
+ * Example: select bool_col, int_col, string_col, smallint_col from functional.alltypes
+ * Slots:   string_col|int_col|smallint_col|bool_col|null_byte
+ * Offsets: 0          16      20           22       23
  */
 public class TupleDescriptor {
   private final TupleId id_;
@@ -211,12 +220,13 @@ public class TupleDescriptor {
     if (hasMemLayout_) return;
     hasMemLayout_ = true;
 
-    // sort slots by size
+    // maps from slot size to slot descriptors with that size
     Map<Integer, List<SlotDescriptor>> slotsBySize =
         new HashMap<Integer, List<SlotDescriptor>>();
 
-    // populate slotsBySize; also compute avgSerializedSize
+    // populate slotsBySize
     int numNullableSlots = 0;
+    int totalSlotSize = 0;
     for (SlotDescriptor d: slots_) {
       if (!d.isMaterialized()) continue;
       ColumnStats stats = d.getStats();
@@ -229,6 +239,7 @@ public class TupleDescriptor {
       if (!slotsBySize.containsKey(d.getType().getSlotSize())) {
         slotsBySize.put(d.getType().getSlotSize(), new ArrayList<SlotDescriptor>());
       }
+      totalSlotSize += d.getType().getSlotSize();
       slotsBySize.get(d.getType().getSlotSize()).add(d);
       if (d.getIsNullable()) ++numNullableSlots;
     }
@@ -236,30 +247,25 @@ public class TupleDescriptor {
     Preconditions.checkState(!slotsBySize.containsKey(0));
     Preconditions.checkState(!slotsBySize.containsKey(-1));
 
-    // assign offsets to slots in order of ascending size
+    // assign offsets to slots in order of descending size
     numNullBytes_ = (numNullableSlots + 7) / 8;
-    int offset = numNullBytes_;
-    int nullIndicatorByte = 0;
+    int slotOffset = 0;
+    int nullIndicatorByte = totalSlotSize;
     int nullIndicatorBit = 0;
-    // slotIdx is the index into the resulting tuple struct.  The first (smallest) field
+    // slotIdx is the index into the resulting tuple struct.  The first (largest) field
     // is 0, next is 1, etc.
     int slotIdx = 0;
+    // sort slots in descending order of size
     List<Integer> sortedSizes = new ArrayList<Integer>(slotsBySize.keySet());
-    Collections.sort(sortedSizes);
+    Collections.sort(sortedSizes, Collections.reverseOrder());
     for (int slotSize: sortedSizes) {
       if (slotsBySize.get(slotSize).isEmpty()) continue;
-      if (slotSize > 1) {
-        // insert padding
-        int alignTo = Math.min(slotSize, 8);
-        offset = (offset + alignTo - 1) / alignTo * alignTo;
-      }
-
       for (SlotDescriptor d: slotsBySize.get(slotSize)) {
         Preconditions.checkState(d.isMaterialized());
         d.setByteSize(slotSize);
-        d.setByteOffset(offset);
+        d.setByteOffset(slotOffset);
         d.setSlotIdx(slotIdx++);
-        offset += slotSize;
+        slotOffset += slotSize;
 
         // assign null indicator
         if (d.getIsNullable()) {
@@ -268,14 +274,15 @@ public class TupleDescriptor {
           nullIndicatorBit = (nullIndicatorBit + 1) % 8;
           if (nullIndicatorBit == 0) ++nullIndicatorByte;
         } else {
-          // Non-nullable slots will have 0 for the byte offset and -1 for the bit mask
+          // non-nullable slots will have 0 for the byte offset and -1 for the bit mask
           d.setNullIndicatorBit(-1);
           d.setNullIndicatorByte(0);
         }
       }
     }
+    Preconditions.checkState(slotOffset == totalSlotSize);
 
-    this.byteSize_ = offset;
+    byteSize_ = totalSlotSize + numNullBytes_;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/fe/src/main/java/org/apache/impala/service/JniFrontend.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java b/fe/src/main/java/org/apache/impala/service/JniFrontend.java
index 0b4ce8f..07d6ec6 100644
--- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java
+++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java
@@ -30,17 +30,10 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.log4j.Appender;
-import org.apache.hadoop.fs.s3a.S3AFileSystem;
-import org.apache.log4j.FileAppender;
-import org.apache.thrift.TException;
-import org.apache.thrift.TSerializer;
-import org.apache.thrift.protocol.TBinaryProtocol;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import org.apache.impala.analysis.DescriptorTable;
 import org.apache.impala.analysis.ToSqlUtils;
 import org.apache.impala.authorization.AuthorizationConfig;
 import org.apache.impala.authorization.ImpalaInternalAdminUser;
@@ -53,12 +46,13 @@ import org.apache.impala.common.FileSystemUtil;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.common.JniUtil;
-import org.apache.impala.service.BackendConfig;
+import org.apache.impala.thrift.TBuildTestDescriptorTableParams;
 import org.apache.impala.thrift.TCatalogObject;
 import org.apache.impala.thrift.TDatabase;
 import org.apache.impala.thrift.TDescribeDbParams;
 import org.apache.impala.thrift.TDescribeResult;
 import org.apache.impala.thrift.TDescribeTableParams;
+import org.apache.impala.thrift.TDescriptorTable;
 import org.apache.impala.thrift.TExecRequest;
 import org.apache.impala.thrift.TFunctionCategory;
 import org.apache.impala.thrift.TGetAllHadoopConfigsResponse;
@@ -90,6 +84,14 @@ import org.apache.impala.thrift.TUpdateMembershipRequest;
 import org.apache.impala.util.GlogAppender;
 import org.apache.impala.util.PatternMatcher;
 import org.apache.impala.util.TSessionStateUtil;
+import org.apache.log4j.Appender;
+import org.apache.log4j.FileAppender;
+import org.apache.thrift.TException;
+import org.apache.thrift.TSerializer;
+import org.apache.thrift.protocol.TBinaryProtocol;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -477,6 +479,25 @@ public class JniFrontend {
   }
 
   /**
+   * Creates a thrift descriptor table for testing.
+   */
+  public byte[] buildTestDescriptorTable(byte[] buildTestDescTblParams)
+      throws ImpalaException {
+    TBuildTestDescriptorTableParams params = new TBuildTestDescriptorTableParams();
+    JniUtil.deserializeThrift(protocolFactory_, params, buildTestDescTblParams);
+    Preconditions.checkNotNull(params.slot_types);
+    TDescriptorTable result =
+        DescriptorTable.buildTestDescriptorTable(params.slot_types);
+    TSerializer serializer = new TSerializer(protocolFactory_);
+    try {
+      byte[] ret = serializer.serialize(result);
+      return ret;
+    } catch (TException e) {
+      throw new InternalException(e.getMessage());
+    }
+  }
+
+  /**
    * Gets all roles
    */
   public byte[] getRoles(byte[] showRolesParams) throws ImpalaException {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b0e87c68/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
index d763deb..993f489 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
@@ -22,18 +22,17 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import org.junit.Assert;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.catalog.Function;
-import org.apache.impala.catalog.PrimitiveType;
 import org.apache.impala.catalog.ScalarType;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.FrontendTestBase;
 import org.apache.impala.thrift.TExpr;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Preconditions;
 
 public class AnalyzerTest extends FrontendTestBase {
@@ -173,27 +172,24 @@ public class AnalyzerTest extends FrontendTestBase {
     SelectStmt stmt = (SelectStmt) AnalyzesOk("select * from functional.AllTypes");
     Analyzer analyzer = stmt.getAnalyzer();
     DescriptorTable descTbl = analyzer.getDescTbl();
-    TupleDescriptor tupleD = descTbl.getTupleDesc(new TupleId(0));
-    for (SlotDescriptor slotD: tupleD.getSlots()) {
-      slotD.setIsMaterialized(true);
-    }
+    TupleDescriptor tupleDesc = descTbl.getTupleDesc(new TupleId(0));
+    tupleDesc.materializeSlots();
     descTbl.computeMemLayout();
-    Assert.assertEquals(97.0f, tupleD.getAvgSerializedSize(), 0.0);
-    checkLayoutParams("functional.alltypes.bool_col", 1, 2, 0, 0, analyzer);
-    checkLayoutParams("functional.alltypes.tinyint_col", 1, 3, 0, 1, analyzer);
-    checkLayoutParams("functional.alltypes.smallint_col", 2, 4, 0, 2, analyzer);
-    checkLayoutParams("functional.alltypes.id", 4, 8, 0, 3, analyzer);
-    checkLayoutParams("functional.alltypes.int_col", 4, 12, 0, 4, analyzer);
-    checkLayoutParams("functional.alltypes.float_col", 4, 16, 0, 5, analyzer);
-    checkLayoutParams("functional.alltypes.year", 4, 20, 0, 6, analyzer);
-    checkLayoutParams("functional.alltypes.month", 4, 24, 0, 7, analyzer);
-    checkLayoutParams("functional.alltypes.bigint_col", 8, 32, 1, 0, analyzer);
-    checkLayoutParams("functional.alltypes.double_col", 8, 40, 1, 1, analyzer);
-    int strSlotSize = PrimitiveType.STRING.getSlotSize();
-    checkLayoutParams("functional.alltypes.date_string_col",
-        strSlotSize, 48, 1, 2, analyzer);
-    checkLayoutParams("functional.alltypes.string_col",
-        strSlotSize, 48 + strSlotSize, 1, 3, analyzer);
+
+    Assert.assertEquals(97.0f, tupleDesc.getAvgSerializedSize(), 0.0);
+    checkLayoutParams("functional.alltypes.date_string_col", 16, 0, 88, 0, analyzer);
+    checkLayoutParams("functional.alltypes.string_col", 16, 16, 88, 1, analyzer);
+    checkLayoutParams("functional.alltypes.timestamp_col", 16, 32, 88, 2, analyzer);
+    checkLayoutParams("functional.alltypes.bigint_col", 8, 48, 88, 3, analyzer);
+    checkLayoutParams("functional.alltypes.double_col", 8, 56, 88, 4, analyzer);
+    checkLayoutParams("functional.alltypes.id", 4, 64, 88, 5, analyzer);
+    checkLayoutParams("functional.alltypes.int_col", 4, 68, 88, 6, analyzer);
+    checkLayoutParams("functional.alltypes.float_col", 4, 72, 88, 7, analyzer);
+    checkLayoutParams("functional.alltypes.year", 4, 76, 89, 0, analyzer);
+    checkLayoutParams("functional.alltypes.month", 4, 80, 89, 1, analyzer);
+    checkLayoutParams("functional.alltypes.smallint_col", 2, 84, 89, 2, analyzer);
+    checkLayoutParams("functional.alltypes.bool_col", 1, 86, 89, 3, analyzer);
+    checkLayoutParams("functional.alltypes.tinyint_col", 1, 87, 89, 4, analyzer);
   }
 
   private void testNonNullable() throws AnalysisException {
@@ -205,9 +201,7 @@ public class AnalyzerTest extends FrontendTestBase {
         "select count(int_col), count(*) from functional.AllTypes");
     DescriptorTable descTbl = stmt.getAnalyzer().getDescTbl();
     TupleDescriptor aggDesc = descTbl.getTupleDesc(new TupleId(1));
-    for (SlotDescriptor slotD: aggDesc.getSlots()) {
-      slotD.setIsMaterialized(true);
-    }
+    aggDesc.materializeSlots();
     descTbl.computeMemLayout();
     Assert.assertEquals(16.0f, aggDesc.getAvgSerializedSize(), 0.0);
     Assert.assertEquals(16, aggDesc.getByteSize());
@@ -218,22 +212,19 @@ public class AnalyzerTest extends FrontendTestBase {
   private void testMixedNullable() throws AnalysisException {
     // one slot is nullable, one is not. The layout should look like:
     // (byte range : data)
-    // 0 : 1 nullable-byte (only 1 bit used)
-    // 1 - 7: padded bytes
-    // 8 - 15: sum(int_col)
-    // 16 - 23: count(*)
+    // 0 - 7: sum(int_col)
+    // 8 - 15: count(*)
+    // 16 - 17: nullable-byte (only 1 bit used)
     SelectStmt stmt = (SelectStmt) AnalyzesOk(
         "select sum(int_col), count(*) from functional.AllTypes");
     DescriptorTable descTbl = stmt.getAnalyzer().getDescTbl();
     TupleDescriptor aggDesc = descTbl.getTupleDesc(new TupleId(1));
-    for (SlotDescriptor slotD: aggDesc.getSlots()) {
-      slotD.setIsMaterialized(true);
-    }
+    aggDesc.materializeSlots();
     descTbl.computeMemLayout();
     Assert.assertEquals(16.0f, aggDesc.getAvgSerializedSize(), 0.0);
-    Assert.assertEquals(24, aggDesc.getByteSize());
-    checkLayoutParams(aggDesc.getSlots().get(0), 8, 8, 0, 0);
-    checkLayoutParams(aggDesc.getSlots().get(1), 8, 16, 0, -1);
+    Assert.assertEquals(17, aggDesc.getByteSize());
+    checkLayoutParams(aggDesc.getSlots().get(0), 8, 0, 16, 0);
+    checkLayoutParams(aggDesc.getSlots().get(1), 8, 8, 0, -1);
   }
 
   /**
@@ -243,34 +234,31 @@ public class AnalyzerTest extends FrontendTestBase {
     SelectStmt stmt = (SelectStmt) AnalyzesOk("select * from functional.alltypes");
     Analyzer analyzer = stmt.getAnalyzer();
     DescriptorTable descTbl = analyzer.getDescTbl();
-    TupleDescriptor tupleD = descTbl.getTupleDesc(new TupleId(0));
-    ArrayList<SlotDescriptor> slots = tupleD.getSlots();
-    for (SlotDescriptor slotD: slots) {
-      slotD.setIsMaterialized(true);
-    }
+    TupleDescriptor tupleDesc = descTbl.getTupleDesc(new TupleId(0));
+    tupleDesc.materializeSlots();
     // Mark slots 0 (id), 7 (double_col), 9 (string_col) as non-materialized.
+    ArrayList<SlotDescriptor> slots = tupleDesc.getSlots();
     slots.get(0).setIsMaterialized(false);
     slots.get(7).setIsMaterialized(false);
     slots.get(9).setIsMaterialized(false);
-
     descTbl.computeMemLayout();
-    Assert.assertEquals(68.0f, tupleD.getAvgSerializedSize(), 0.0);
+
+    Assert.assertEquals(68.0f, tupleDesc.getAvgSerializedSize(), 0.0);
     // Check non-materialized slots.
     checkLayoutParams("functional.alltypes.id", 0, -1, 0, 0, analyzer);
     checkLayoutParams("functional.alltypes.double_col", 0, -1, 0, 0, analyzer);
     checkLayoutParams("functional.alltypes.string_col", 0, -1, 0, 0, analyzer);
     // Check materialized slots.
-    checkLayoutParams("functional.alltypes.bool_col", 1, 2, 0, 0, analyzer);
-    checkLayoutParams("functional.alltypes.tinyint_col", 1, 3, 0, 1, analyzer);
-    checkLayoutParams("functional.alltypes.smallint_col", 2, 4, 0, 2, analyzer);
-    checkLayoutParams("functional.alltypes.int_col", 4, 8, 0, 3, analyzer);
-    checkLayoutParams("functional.alltypes.float_col", 4, 12, 0, 4, analyzer);
-    checkLayoutParams("functional.alltypes.year", 4, 16, 0, 5, analyzer);
-    checkLayoutParams("functional.alltypes.month", 4, 20, 0, 6, analyzer);
-    checkLayoutParams("functional.alltypes.bigint_col", 8, 24, 0, 7, analyzer);
-    int strSlotSize = PrimitiveType.STRING.getSlotSize();
-    checkLayoutParams("functional.alltypes.date_string_col",
-        strSlotSize, 32, 1, 0, analyzer);
+    checkLayoutParams("functional.alltypes.date_string_col", 16, 0, 60, 0, analyzer);
+    checkLayoutParams("functional.alltypes.timestamp_col", 16, 16, 60, 1, analyzer);
+    checkLayoutParams("functional.alltypes.bigint_col", 8, 32, 60, 2, analyzer);
+    checkLayoutParams("functional.alltypes.int_col", 4, 40, 60, 3, analyzer);
+    checkLayoutParams("functional.alltypes.float_col", 4, 44, 60, 4, analyzer);
+    checkLayoutParams("functional.alltypes.year", 4, 48, 60, 5, analyzer);
+    checkLayoutParams("functional.alltypes.month", 4, 52, 60, 6, analyzer);
+    checkLayoutParams("functional.alltypes.smallint_col", 2, 56, 60, 7, analyzer);
+    checkLayoutParams("functional.alltypes.bool_col", 1, 58, 61, 0, analyzer);
+    checkLayoutParams("functional.alltypes.tinyint_col", 1, 59, 61, 1, analyzer);
   }
 
   private void checkLayoutParams(SlotDescriptor d, int byteSize, int byteOffset,


[7/7] incubator-impala git commit: IMPALA-4277: allow overriding of Hive/Hadoop versions/locations

Posted by he...@apache.org.
IMPALA-4277: allow overriding of Hive/Hadoop versions/locations

This is to help with IMPALA-4277 to make it easier to build against
Hadoop/Hive distributions where the directory layout doesn't exactly
match our current CDH dependencies, or where we may want to
temporarily override a version without making a source change.

Change-Id: I7da10e38f9c4309f2d193dc25f14a6ea308c9639
Reviewed-on: http://gerrit.cloudera.org:8080/4720
Reviewed-by: Sailesh Mukil <sa...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/df680cfe
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/df680cfe
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/df680cfe

Branch: refs/heads/master
Commit: df680cfe3a99fa295d25d39f2eab4a9cd98509be
Parents: d0a2d1d
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Thu Oct 13 15:00:08 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Oct 18 05:54:09 2016 +0000

----------------------------------------------------------------------
 bin/impala-config.sh         | 33 ++++++++++++++++++++++-----------
 buildall.sh                  |  2 +-
 cmake_modules/FindHDFS.cmake | 11 +++--------
 common/thrift/CMakeLists.txt |  2 +-
 4 files changed, 27 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df680cfe/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 90e8fc0..2a25248 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -295,13 +295,13 @@ if [[ $OSTYPE == "darwin"* ]]; then
   IMPALA_THRIFT_JAVA_VERSION=0.9.2
 fi
 
-export IMPALA_HADOOP_VERSION=2.6.0-cdh5.10.0-SNAPSHOT
-export IMPALA_HBASE_VERSION=1.2.0-cdh5.10.0-SNAPSHOT
-export IMPALA_HIVE_VERSION=1.1.0-cdh5.10.0-SNAPSHOT
-export IMPALA_SENTRY_VERSION=1.5.1-cdh5.10.0-SNAPSHOT
-export IMPALA_LLAMA_VERSION=1.0.0-cdh5.10.0-SNAPSHOT
-export IMPALA_PARQUET_VERSION=1.5.0-cdh5.10.0-SNAPSHOT
-export IMPALA_LLAMA_MINIKDC_VERSION=1.0.0
+export IMPALA_HADOOP_VERSION=${IMPALA_HADOOP_VERSION:-2.6.0-cdh5.10.0-SNAPSHOT}
+export IMPALA_HBASE_VERSION=${IMPALA_HBASE_VERSION:-1.2.0-cdh5.10.0-SNAPSHOT}
+export IMPALA_HIVE_VERSION=${IMPALA_HIVE_VERSION:-1.1.0-cdh5.10.0-SNAPSHOT}
+export IMPALA_SENTRY_VERSION=${IMPALA_SENTRY_VERSION:-1.5.1-cdh5.10.0-SNAPSHOT}
+export IMPALA_LLAMA_VERSION=${IMPALA_LLAMA_VERSION:-1.0.0-cdh5.10.0-SNAPSHOT}
+export IMPALA_PARQUET_VERSION=${IMPALA_PARQUET_VERSION:-1.5.0-cdh5.10.0-SNAPSHOT}
+export IMPALA_LLAMA_MINIKDC_VERSION=${IMPALA_LLAMA_MINIKDC_VERSION:-1.0.0}
 
 export IMPALA_FE_DIR="$IMPALA_HOME/fe"
 export IMPALA_BE_DIR="$IMPALA_HOME/be"
@@ -319,12 +319,17 @@ else
   export CDH_COMPONENTS_HOME="$IMPALA_HOME/thirdparty"
 fi
 
-# Hadoop dependencies are snapshots in the Impala tree
+# Typically we build against a snapshot build of Hadoop that includes everything we need
+# for building Impala and running a minicluster.
 export HADOOP_HOME="$CDH_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}/"
 export HADOOP_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
+# The include and lib paths are needed to pick up hdfs.h and libhdfs.*
+# Allow overriding in case we want to point to a package/install with a different layout.
+export HADOOP_INCLUDE_DIR=${HADOOP_INCLUDE_DIR:-"${HADOOP_HOME}/include"}
+export HADOOP_LIB_DIR=${HADOOP_LIB_DIR:-"${HADOOP_HOME}/lib"}
 
 : ${HADOOP_CLASSPATH=}
-# Please note that the * is inside quotes, thus it won't get exanded by bash but
+# Please note that the * is inside quotes, thus it won't get expanded by bash but
 # by java, see "Understanding class path wildcards" at http://goo.gl/f0cfft
 export HADOOP_CLASSPATH="$HADOOP_CLASSPATH:${HADOOP_HOME}/share/hadoop/tools/lib/*"
 # YARN is configured to use LZO so the LZO jar needs to be in the hadoop classpath.
@@ -341,6 +346,9 @@ export SENTRY_CONF_DIR="$IMPALA_HOME/fe/src/test/resources"
 
 export HIVE_HOME="$CDH_COMPONENTS_HOME/hive-${IMPALA_HIVE_VERSION}/"
 export PATH="$HIVE_HOME/bin:$PATH"
+# Allow overriding of Hive source location in case we want to build Impala without
+# a complete Hive build.
+export HIVE_SRC_DIR=${HIVE_SRC_DIR:-"${HIVE_HOME}/src"}
 export HIVE_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
 
 # Hive looks for jar files in a single directory from HIVE_AUX_JARS_PATH plus
@@ -392,7 +400,7 @@ export USER="${USER-`id -un`}"
 #LIBHDFS_OPTS="-Xcheck:jni -Xcheck:nabounds"
 # - Points to the location of libbackend.so.
 LIBHDFS_OPTS="${LIBHDFS_OPTS:-}"
-LIBHDFS_OPTS="${LIBHDFS_OPTS} -Djava.library.path=${HADOOP_HOME}/lib/native/"
+LIBHDFS_OPTS="${LIBHDFS_OPTS} -Djava.library.path=${HADOOP_LIB_DIR}/native/"
 # READER BEWARE: This always points to the debug build.
 # TODO: Consider having cmake scripts change this value depending on
 # the build type.
@@ -412,7 +420,7 @@ LIB_JVM=` find "${JAVA_HOME}/"   -name libjvm.so  | head -1`
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH-}"
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:`dirname ${LIB_JAVA}`:`dirname ${LIB_JSIG}`"
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:`dirname ${LIB_JVM}`"
-LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${HADOOP_HOME}/lib/native"
+LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${HADOOP_LIB_DIR}/native"
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${IMPALA_HOME}/be/build/debug/service"
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${IMPALA_SNAPPY_PATH}"
 LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${IMPALA_LZO}/build"
@@ -443,9 +451,12 @@ export IMPALA_CONFIG_SOURCED=1
 echo "IMPALA_HOME             = $IMPALA_HOME"
 echo "HADOOP_HOME             = $HADOOP_HOME"
 echo "HADOOP_CONF_DIR         = $HADOOP_CONF_DIR"
+echo "HADOOP_INCLUDE_DIR      = $HADOOP_INCLUDE_DIR"
+echo "HADOOP_LIB_DIR          = $HADOOP_LIB_DIR"
 echo "MINI_DFS_BASE_DATA_DIR  = $MINI_DFS_BASE_DATA_DIR"
 echo "HIVE_HOME               = $HIVE_HOME"
 echo "HIVE_CONF_DIR           = $HIVE_CONF_DIR"
+echo "HIVE_SRC_DIR            = $HIVE_SRC_DIR"
 echo "HBASE_HOME              = $HBASE_HOME"
 echo "HBASE_CONF_DIR          = $HBASE_CONF_DIR"
 echo "MINIKDC_HOME            = $MINIKDC_HOME"

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df680cfe/buildall.sh
----------------------------------------------------------------------
diff --git a/buildall.sh b/buildall.sh
index a7858a3..d7159e7 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -365,7 +365,7 @@ reconfigure_test_cluster() {
 
   # Copy Hadoop-lzo dependencies if available (required to generate Lzo data).
   if stat "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* > /dev/null ; then
-    cp "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* "$HADOOP_HOME/lib/native"
+    cp "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* "$HADOOP_LIB_DIR/native"
   else
     echo "No hadoop-lzo found"
   fi

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df680cfe/cmake_modules/FindHDFS.cmake
----------------------------------------------------------------------
diff --git a/cmake_modules/FindHDFS.cmake b/cmake_modules/FindHDFS.cmake
index 6affc56..f0916e1 100644
--- a/cmake_modules/FindHDFS.cmake
+++ b/cmake_modules/FindHDFS.cmake
@@ -28,9 +28,9 @@
 exec_program(hadoop ARGS version OUTPUT_VARIABLE Hadoop_VERSION
              RETURN_VALUE Hadoop_RETURN)
 
-# currently only looking in HADOOP_HOME
+# Only look in HADOOP_INCLUDE_DIR
 find_path(HDFS_INCLUDE_DIR hdfs.h PATHS
-  $ENV{HADOOP_HOME}/include/
+  $ENV{HADOOP_INCLUDE_DIR}
   # make sure we don't accidentally pick up a different version
   NO_DEFAULT_PATH
 )
@@ -44,12 +44,7 @@ else ()
 endif()
 
 message(STATUS "Architecture: ${arch_hint}")
-
-if ("${arch_hint}" STREQUAL "x64")
-  set(HDFS_LIB_PATHS $ENV{HADOOP_HOME}/lib/native)
-else ()
-  set(HDFS_LIB_PATHS $ENV{HADOOP_HOME}/lib/native)
-endif ()
+set(HDFS_LIB_PATHS $ENV{HADOOP_LIB_DIR}/native)
 
 message(STATUS "HDFS_LIB_PATHS: ${HDFS_LIB_PATHS}")
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/df680cfe/common/thrift/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/common/thrift/CMakeLists.txt b/common/thrift/CMakeLists.txt
index 3104ee2..08fabd8 100644
--- a/common/thrift/CMakeLists.txt
+++ b/common/thrift/CMakeLists.txt
@@ -119,7 +119,7 @@ function(THRIFT_GEN_DS VAR)
 endfunction(THRIFT_GEN_DS)
 
 message("Using Thrift compiler: ${THRIFT_COMPILER}")
-set(THRIFT_INCLUDE_DIR_OPTION -I ${THRIFT_CONTRIB_DIR} -I $ENV{HIVE_HOME}/src/metastore/if)
+set(THRIFT_INCLUDE_DIR_OPTION -I ${THRIFT_CONTRIB_DIR} -I $ENV{HIVE_SRC_DIR}/metastore/if)
 set(BE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/be/generated-sources)
 set(FE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/fe/generated-sources)
 # TODO: avoid duplicating generated java classes


[6/7] incubator-impala git commit: Add search / sort to HTML tables for metrics and threads

Posted by he...@apache.org.
Add search / sort to HTML tables for metrics and threads

Change-Id: If069ce6a9eae00bacaa30605d23bea72f29e5c4f
Reviewed-on: http://gerrit.cloudera.org:8080/4743
Tested-by: Internal Jenkins
Reviewed-by: Henry Robinson <he...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/d0a2d1d4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/d0a2d1d4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/d0a2d1d4

Branch: refs/heads/master
Commit: d0a2d1d43da5ab8d4c1c06db3524458cb23a76bb
Parents: e3a0891
Author: Henry Robinson <he...@cloudera.com>
Authored: Mon Oct 17 17:05:05 2016 -0700
Committer: Henry Robinson <he...@cloudera.com>
Committed: Tue Oct 18 05:08:21 2016 +0000

----------------------------------------------------------------------
 www/metric_group.tmpl | 62 ++++++++++++++++++++++++++++------------------
 www/thread-group.tmpl | 44 +++++++++++++++++++++-----------
 2 files changed, 67 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d0a2d1d4/www/metric_group.tmpl
----------------------------------------------------------------------
diff --git a/www/metric_group.tmpl b/www/metric_group.tmpl
index 82f5408..0cbfefe 100644
--- a/www/metric_group.tmpl
+++ b/www/metric_group.tmpl
@@ -19,33 +19,47 @@ under the License.
 {{!Renders a metric group and all its children, one table each}}
 <a id="{{name}}"><h3>{{name}}</h3></a>
 
-<table class='table table-bordered table-hover'>
-  <tr>
-    <th>Name</th>
-    <th>Value</th>
-    <th>Description</th>
-  </tr>
+<table id ="{{name}}-tbl" class='table table-bordered table-hover'>
+  <thead>
+    <tr>
+      <th>Name</th>
+      <th>Value</th>
+      <th>Description</th>
+    </tr>
+  </thead>
+  <tbody>
     {{#metrics}}
-  <tr>
-    <td><tt>{{name}}</tt></td>
-    {{! Is this a stats metric? }}
-    {{?mean}}
-    <td>
-      Last (of {{count}}): <strong>{{last}}</strong>.
-      Min: {{min}}, max: {{max}}, avg: {{mean}}</td>
-    {{/mean}}
-    {{^mean}}
-    <td>
-      {{human_readable}}
-    </td>
-    {{/mean}}
-    <td>
-      {{description}}
-    </td>
-  </tr>
-  {{/metrics}}
+    <tr>
+      <td><tt>{{name}}</tt></td>
+      {{! Is this a stats metric? }}
+      {{?mean}}
+      <td>
+        Last (of {{count}}): <strong>{{last}}</strong>.
+        Min: {{min}}, max: {{max}}, avg: {{mean}}</td>
+      {{/mean}}
+      {{^mean}}
+      <td>
+        {{human_readable}}
+      </td>
+      {{/mean}}
+      <td>
+        {{description}}
+      </td>
+    </tr>
+    {{/metrics}}
+  </tbody>
 </table>
 
+<script>
+    $(document).ready(function() {
+        $('#{{name}}-tbl').DataTable({
+            "order": [[ 1, "desc" ]],
+            "pageLength": 100
+        });
+    });
+</script>
+
+
 {{! Recurse into all child groups }}
 {{#child_groups}}
 {{>www/metric_group.tmpl}}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d0a2d1d4/www/thread-group.tmpl
----------------------------------------------------------------------
diff --git a/www/thread-group.tmpl b/www/thread-group.tmpl
index 90f6cd4..19730c6 100644
--- a/www/thread-group.tmpl
+++ b/www/thread-group.tmpl
@@ -20,21 +20,35 @@ under the License.
 
 <h2>Thread Group: {{thread-group.category}}</h2>
 
-<table class='table table-hover table-border'>
-  <tr>
-    <th>Thread name</th>
-    <th>Cumulative User CPU(s)</th>
-    <th>Cumulative Kernel CPU(s)</th>
-    <th>Cumulative IO-wait(s)</th>
-  </tr>
-  {{#threads}}
-  <tr>
-    <td>{{name}}</td>
-    <td>{{user_ns}}</td>
-    <td>{{kernel_ns}}</td>
-    <td>{{iowait_ns}}</td>
-  </tr>
-  {{/threads}}
+<table id="{{thread-group.category}}-tbl" class='table table-hover table-bordered'
+       style='table-layout:fixed; word-wrap: break-word'>
+  <thead>
+    <tr>
+      <th>Thread name</th>
+      <th>Cumulative User CPU(s)</th>
+      <th>Cumulative Kernel CPU(s)</th>
+      <th>Cumulative IO-wait(s)</th>
+    </tr>
+  </thead>
+  <tbody>
+    {{#threads}}
+    <tr>
+      <td>{{name}}</td>
+      <td>{{user_ns}}</td>
+      <td>{{kernel_ns}}</td>
+      <td>{{iowait_ns}}</td>
+    </tr>
+    {{/threads}}
+  </tbody>
 </table>
 
+<script>
+    $(document).ready(function() {
+        $('#{{thread-group.category}}-tbl').DataTable({
+            "order": [[ 1, "desc" ]],
+            "pageLength": 100
+        });
+    });
+</script>
+
 {{> www/common-footer.tmpl}}


[5/7] incubator-impala git commit: Buffer pool: Add basic counters to buffer pool client

Posted by he...@apache.org.
Buffer pool: Add basic counters to buffer pool client

Change-Id: I9a5a57b7cfccf67ee498e68964f1e077075ee325
Reviewed-on: http://gerrit.cloudera.org:8080/4714
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/e3a08914
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/e3a08914
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/e3a08914

Branch: refs/heads/master
Commit: e3a08914451a63fe65e8f66afc743739f4570ba4
Parents: 07da767
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Fri Oct 7 09:23:59 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Oct 18 04:48:43 2016 +0000

----------------------------------------------------------------------
 be/src/bufferpool/buffer-pool-counters.h      | 47 ++++++++++++++++++++++
 be/src/bufferpool/buffer-pool-test.cc         | 30 +++++++-------
 be/src/bufferpool/buffer-pool.cc              | 33 ++++++++++++---
 be/src/bufferpool/buffer-pool.h               | 15 +++++--
 be/src/bufferpool/reservation-tracker-test.cc |  9 +----
 5 files changed, 102 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e3a08914/be/src/bufferpool/buffer-pool-counters.h
----------------------------------------------------------------------
diff --git a/be/src/bufferpool/buffer-pool-counters.h b/be/src/bufferpool/buffer-pool-counters.h
new file mode 100644
index 0000000..6f3801e
--- /dev/null
+++ b/be/src/bufferpool/buffer-pool-counters.h
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef IMPALA_BUFFER_POOL_COUNTERS_H
+#define IMPALA_BUFFER_POOL_COUNTERS_H
+
+#include "util/runtime-profile.h"
+
+namespace impala {
+
+/// A set of counters for each buffer pool client.
+struct BufferPoolClientCounters {
+ public:
+  /// Amount of time spent trying to get a buffer.
+  RuntimeProfile::Counter* get_buffer_time;
+
+  /// Amount of time spent waiting for reads from disk to complete.
+  RuntimeProfile::Counter* read_wait_time;
+
+  /// Amount of time spent waiting for writes to disk to complete.
+  RuntimeProfile::Counter* write_wait_time;
+
+  /// The peak total size of unpinned buffers.
+  RuntimeProfile::HighWaterMarkCounter* peak_unpinned_bytes;
+
+  /// The total bytes of data unpinned. Every time a page's pin count goes from 1 to 0,
+  /// this counter is incremented by the page size.
+  RuntimeProfile::Counter* total_unpinned_bytes;
+};
+
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e3a08914/be/src/bufferpool/buffer-pool-test.cc
----------------------------------------------------------------------
diff --git a/be/src/bufferpool/buffer-pool-test.cc b/be/src/bufferpool/buffer-pool-test.cc
index 16cf12c..793bcb9 100644
--- a/be/src/bufferpool/buffer-pool-test.cc
+++ b/be/src/bufferpool/buffer-pool-test.cc
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
 #include <boost/bind.hpp>
 #include <boost/scoped_ptr.hpp>
 #include <boost/thread/thread.hpp>
@@ -29,7 +28,7 @@
 #include "common/init.h"
 #include "common/object-pool.h"
 #include "testutil/death-test-util.h"
-#include "testutil/test-macros.h"
+#include "testutil/gtest-util.h"
 
 #include "common/names.h"
 
@@ -125,7 +124,8 @@ void BufferPoolTest::RegisterQueriesAndClients(BufferPool* pool, int query_id_hi
       EXPECT_TRUE(
           client_reservations[i][j].IncreaseReservationToFit(initial_client_reservation));
       string name = Substitute("Client $0 for query $1", j, query_id);
-      EXPECT_OK(pool->RegisterClient(name, &client_reservations[i][j], &clients[i][j]));
+      EXPECT_OK(pool->RegisterClient(
+          name, &client_reservations[i][j], NewProfile(), &clients[i][j]));
     }
 
     for (int j = 0; j < clients_per_query; ++j) {
@@ -209,7 +209,7 @@ TEST_F(BufferPoolTest, PageCreation) {
   client_tracker->InitChildTracker(NewProfile(), &global_reservations_, NULL, total_mem);
   ASSERT_TRUE(client_tracker->IncreaseReservation(total_mem));
   BufferPool::Client client;
-  ASSERT_OK(pool.RegisterClient("test client", client_tracker, &client));
+  ASSERT_OK(pool.RegisterClient("test client", client_tracker, NewProfile(), &client));
 
   vector<BufferPool::PageHandle> handles(num_pages);
 
@@ -256,7 +256,7 @@ TEST_F(BufferPoolTest, BufferAllocation) {
   client_tracker->InitChildTracker(NewProfile(), &global_reservations_, NULL, total_mem);
   ASSERT_TRUE(client_tracker->IncreaseReservationToFit(total_mem));
   BufferPool::Client client;
-  ASSERT_OK(pool.RegisterClient("test client", client_tracker, &client));
+  ASSERT_OK(pool.RegisterClient("test client", client_tracker, NewProfile(), &client));
 
   vector<BufferPool::BufferHandle> handles(num_buffers);
 
@@ -302,7 +302,8 @@ TEST_F(BufferPoolTest, BufferTransfer) {
     client_trackers[i].InitChildTracker(
         NewProfile(), &global_reservations_, NULL, TEST_BUFFER_LEN);
     ASSERT_TRUE(client_trackers[i].IncreaseReservationToFit(TEST_BUFFER_LEN));
-    ASSERT_OK(pool.RegisterClient("test client", &client_trackers[i], &clients[i]));
+    ASSERT_OK(pool.RegisterClient(
+        "test client", &client_trackers[i], NewProfile(), &clients[i]));
   }
 
   // Transfer the page around between the clients repeatedly in a circle.
@@ -344,7 +345,7 @@ TEST_F(BufferPoolTest, Pin) {
       NewProfile(), &global_reservations_, NULL, child_reservation);
   ASSERT_TRUE(client_tracker->IncreaseReservationToFit(child_reservation));
   BufferPool::Client client;
-  ASSERT_OK(pool.RegisterClient("test client", client_tracker, &client));
+  ASSERT_OK(pool.RegisterClient("test client", client_tracker, NewProfile(), &client));
 
   BufferPool::PageHandle handle1, handle2;
 
@@ -395,7 +396,7 @@ TEST_F(BufferPoolTest, PinWithoutReservation) {
   client_tracker->InitChildTracker(
       NewProfile(), &global_reservations_, NULL, TEST_BUFFER_LEN);
   BufferPool::Client client;
-  ASSERT_OK(pool.RegisterClient("test client", client_tracker, &client));
+  ASSERT_OK(pool.RegisterClient("test client", client_tracker, NewProfile(), &client));
 
   BufferPool::PageHandle handle;
   IMPALA_ASSERT_DEBUG_DEATH(pool.CreatePage(&client, TEST_BUFFER_LEN, &handle), "");
@@ -423,7 +424,7 @@ TEST_F(BufferPoolTest, ExtractBuffer) {
       NewProfile(), &global_reservations_, NULL, child_reservation);
   ASSERT_TRUE(client_tracker->IncreaseReservationToFit(child_reservation));
   BufferPool::Client client;
-  ASSERT_OK(pool.RegisterClient("test client", client_tracker, &client));
+  ASSERT_OK(pool.RegisterClient("test client", client_tracker, NewProfile(), &client));
 
   BufferPool::PageHandle page;
   BufferPool::BufferHandle buffer;
@@ -499,7 +500,7 @@ void BufferPoolTest::CreatePageLoop(
   ReservationTracker client_tracker;
   client_tracker.InitChildTracker(NewProfile(), parent_tracker, NULL, TEST_BUFFER_LEN);
   BufferPool::Client client;
-  ASSERT_OK(pool->RegisterClient("test client", &client_tracker, &client));
+  ASSERT_OK(pool->RegisterClient("test client", &client_tracker, NewProfile(), &client));
   for (int i = 0; i < num_ops; ++i) {
     BufferPool::PageHandle handle;
     ASSERT_TRUE(client_tracker.IncreaseReservation(TEST_BUFFER_LEN));
@@ -525,7 +526,8 @@ TEST_F(BufferPoolTest, CapacityExhausted) {
   BufferPool::PageHandle handle1, handle2, handle3;
 
   BufferPool::Client client;
-  ASSERT_OK(pool.RegisterClient("test client", &global_reservations_, &client));
+  ASSERT_OK(
+      pool.RegisterClient("test client", &global_reservations_, NewProfile(), &client));
   ASSERT_TRUE(global_reservations_.IncreaseReservation(TEST_BUFFER_LEN));
   ASSERT_OK(pool.CreatePage(&client, TEST_BUFFER_LEN, &handle1));
 
@@ -549,8 +551,4 @@ TEST_F(BufferPoolTest, CapacityExhausted) {
 }
 }
 
-int main(int argc, char** argv) {
-  ::testing::InitGoogleTest(&argc, argv);
-  impala::InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
-  return RUN_ALL_TESTS();
-}
+IMPALA_TEST_MAIN();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e3a08914/be/src/bufferpool/buffer-pool.cc
----------------------------------------------------------------------
diff --git a/be/src/bufferpool/buffer-pool.cc b/be/src/bufferpool/buffer-pool.cc
index eaa4262..3035694 100644
--- a/be/src/bufferpool/buffer-pool.cc
+++ b/be/src/bufferpool/buffer-pool.cc
@@ -25,6 +25,7 @@
 #include "common/names.h"
 #include "gutil/strings/substitute.h"
 #include "util/bit-util.h"
+#include "util/runtime-profile-counters.h"
 #include "util/uid-util.h"
 
 namespace impala {
@@ -176,14 +177,26 @@ BufferPool::~BufferPool() {
 }
 
 Status BufferPool::RegisterClient(
-    const string& name, ReservationTracker* reservation, Client* client) {
+    const string& name, ReservationTracker* reservation, RuntimeProfile* profile,
+    Client* client) {
   DCHECK(!client->is_registered());
   DCHECK(reservation != NULL);
+  client->InitCounters(profile);
   client->reservation_ = reservation;
   client->name_ = name;
   return Status::OK();
 }
 
+void BufferPool::Client::InitCounters(RuntimeProfile* profile) {
+  counters_.get_buffer_time = ADD_TIMER(profile, "BufferPoolGetBufferTime");
+  counters_.read_wait_time = ADD_TIMER(profile, "BufferPoolReadWaitTime");
+  counters_.write_wait_time = ADD_TIMER(profile, "BufferPoolWriteWaitTime");
+  counters_.peak_unpinned_bytes =
+      profile->AddHighWaterMarkCounter("BufferPoolPeakUnpinnedBytes", TUnit::BYTES);
+  counters_.total_unpinned_bytes =
+      ADD_COUNTER(profile, "BufferPoolTotalUnpinnedBytes", TUnit::BYTES);
+}
+
 void BufferPool::DeregisterClient(Client* client) {
   if (!client->is_registered()) return;
   client->reservation_->Close();
@@ -256,13 +269,16 @@ Status BufferPool::Pin(Client* client, PageHandle* handle) {
   Page* page = handle->page_;
   {
     lock_guard<SpinLock> pl(page->lock); // Lock page while we work on its state.
-    if (!page->buffer.is_open()) {
-      // No changes have been made to state yet, so we can cleanly return on error.
-      RETURN_IF_ERROR(AllocateBufferInternal(client, page->len, &page->buffer));
+    if (page->pin_count == 0)  {
+      if (!page->buffer.is_open()) {
+        // No changes have been made to state yet, so we can cleanly return on error.
+        RETURN_IF_ERROR(AllocateBufferInternal(client, page->len, &page->buffer));
+
+        // TODO: will need to initiate/wait for read if the page is not in-memory.
+      }
+      COUNTER_ADD(client->counters_.peak_unpinned_bytes, -handle->len());
     }
     page->IncrementPinCount(handle);
-
-    // TODO: will need to initiate/wait for read if the page is not in-memory.
   }
 
   client->reservation_->AllocateFrom(page->len);
@@ -286,12 +302,16 @@ void BufferPool::UnpinLocked(Client* client, PageHandle* handle) {
   page->DecrementPinCount(handle);
   client->reservation_->ReleaseTo(page->len);
 
+  COUNTER_ADD(client->counters_.total_unpinned_bytes, handle->len());
+  COUNTER_ADD(client->counters_.peak_unpinned_bytes, handle->len());
+
   // TODO: can evict now. Only need to preserve contents if 'page->dirty' is true.
 }
 
 void BufferPool::ExtractBuffer(
     Client* client, PageHandle* page_handle, BufferHandle* buffer_handle) {
   DCHECK(page_handle->is_pinned());
+
   DCHECK_EQ(page_handle->client_, client);
 
   Page* page = page_handle->page_;
@@ -316,6 +336,7 @@ Status BufferPool::AllocateBufferInternal(
   DCHECK(!buffer->is_open());
   DCHECK_GE(len, min_buffer_len_);
   DCHECK_EQ(len, BitUtil::RoundUpToPowerOfTwo(len));
+  SCOPED_TIMER(client->counters_.get_buffer_time);
 
   // If there is headroom in 'buffer_bytes_remaining_', we can just allocate a new buffer.
   if (TryDecreaseBufferBytesRemaining(len)) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e3a08914/be/src/bufferpool/buffer-pool.h
----------------------------------------------------------------------
diff --git a/be/src/bufferpool/buffer-pool.h b/be/src/bufferpool/buffer-pool.h
index 44b5574..6a9641d 100644
--- a/be/src/bufferpool/buffer-pool.h
+++ b/be/src/bufferpool/buffer-pool.h
@@ -24,6 +24,7 @@
 #include <string>
 
 #include "bufferpool/buffer-allocator.h"
+#include "bufferpool/buffer-pool-counters.h"
 #include "common/atomic.h"
 #include "common/status.h"
 #include "gutil/macros.h"
@@ -167,10 +168,11 @@ class BufferPool {
 
   /// Register a client. Returns an error status and does not register the client if the
   /// arguments are invalid. 'name' is an arbitrary name used to identify the client in
-  /// any errors messages or logging. 'client' is the client to register. 'client' should
-  /// not already be registered.
+  /// any errors messages or logging. Counters for this client are added to the (non-NULL)
+  /// 'profile'. 'client' is the client to register. 'client' should not already be
+  /// registered.
   Status RegisterClient(const std::string& name, ReservationTracker* reservation,
-      Client* client);
+      RuntimeProfile* profile, Client* client);
 
   /// Deregister 'client' if it is registered. Idempotent.
   void DeregisterClient(Client* client);
@@ -305,12 +307,19 @@ class BufferPool::Client {
   friend class BufferPool;
   DISALLOW_COPY_AND_ASSIGN(Client);
 
+  /// Initialize 'counters_' and add the counters to 'profile'.
+  void InitCounters(RuntimeProfile* profile);
+
   /// A name identifying the client.
   std::string name_;
 
   /// The reservation tracker for the client. NULL means the client isn't registered.
   /// All pages pinned by the client count as usage against 'reservation_'.
   ReservationTracker* reservation_;
+
+  /// The RuntimeProfile counters for this client. All non-NULL if is_registered()
+  /// is true.
+  BufferPoolClientCounters counters_;
 };
 
 /// A handle to a buffer allocated from the buffer pool. Each BufferHandle should only

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e3a08914/be/src/bufferpool/reservation-tracker-test.cc
----------------------------------------------------------------------
diff --git a/be/src/bufferpool/reservation-tracker-test.cc b/be/src/bufferpool/reservation-tracker-test.cc
index 93bf7b8..66ce287 100644
--- a/be/src/bufferpool/reservation-tracker-test.cc
+++ b/be/src/bufferpool/reservation-tracker-test.cc
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
 #include <limits>
 #include <string>
 #include <vector>
@@ -24,7 +23,7 @@
 #include "common/init.h"
 #include "common/object-pool.h"
 #include "runtime/mem-tracker.h"
-#include "testutil/test-macros.h"
+#include "testutil/gtest-util.h"
 
 #include "common/names.h"
 
@@ -376,8 +375,4 @@ TEST_F(ReservationTrackerTest, MemTrackerIntegrationMultiLevel) {
 }
 }
 
-int main(int argc, char** argv) {
-  ::testing::InitGoogleTest(&argc, argv);
-  impala::InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
-  return RUN_ALL_TESTS();
-}
+IMPALA_TEST_MAIN();


[2/7] incubator-impala git commit: IMPALA-4270: Gracefully fail unsupported queries with mt_dop > 0.

Posted by he...@apache.org.
IMPALA-4270: Gracefully fail unsupported queries with mt_dop > 0.

MT_DOP > 0 is only supported for plans without distributed joins
or table sinks. Adds validation to fail unsupported queries
gracefully in planning.

For scans in queries that are executable with MT_DOP > 0 we either
use the optimized MT scan node BE implementation (only Parquet), or
we use the conventional scan node with num_scanner_threads=1.

TODO: Still need to add end-to-end tests.

Change-Id: I91a60ea7b6e3ae4ee44be856615ddd3cd0af476d
Reviewed-on: http://gerrit.cloudera.org:8080/4677
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/04802535
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/04802535
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/04802535

Branch: refs/heads/master
Commit: 04802535661979c50e5d06ef04e62eee677b901e
Parents: b0e87c6
Author: Alex Behm <al...@cloudera.com>
Authored: Mon Oct 10 11:03:43 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Mon Oct 17 09:22:57 2016 +0000

----------------------------------------------------------------------
 be/src/exec/exec-node.cc                        |   5 +-
 common/thrift/PlanNodes.thrift                  |   5 +
 .../org/apache/impala/analysis/Analyzer.java    |  10 +-
 .../org/apache/impala/planner/HdfsScanNode.java |  28 +-
 .../java/org/apache/impala/planner/Planner.java |  13 +-
 .../apache/impala/planner/PlannerContext.java   |  10 +-
 .../impala/planner/SingleNodePlanner.java       |  24 +-
 .../org/apache/impala/planner/PlannerTest.java  |  21 +-
 .../apache/impala/planner/PlannerTestBase.java  |  18 +-
 .../queries/PlannerTest/mt-dop-validation.test  | 350 +++++++++++++++++++
 10 files changed, 450 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/be/src/exec/exec-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/exec-node.cc b/be/src/exec/exec-node.cc
index 837fc09..df491dd 100644
--- a/be/src/exec/exec-node.cc
+++ b/be/src/exec/exec-node.cc
@@ -264,9 +264,12 @@ Status ExecNode::CreateNode(ObjectPool* pool, const TPlanNode& tnode,
   switch (tnode.node_type) {
     case TPlanNodeType::HDFS_SCAN_NODE:
       *node = pool->Add(new HdfsScanNode(pool, tnode, descs));
-      if (state->query_options().mt_dop > 0) {
+      if (tnode.hdfs_scan_node.use_mt_scan_node) {
+        DCHECK_GT(state->query_options().mt_dop, 0);
         *node = pool->Add(new HdfsScanNodeMt(pool, tnode, descs));
       } else {
+        DCHECK(state->query_options().mt_dop == 0
+            || state->query_options().num_scanner_threads == 1);
         *node = pool->Add(new HdfsScanNode(pool, tnode, descs));
       }
       // If true, this node requests codegen over interpretation for conjuncts

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/common/thrift/PlanNodes.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/PlanNodes.thrift b/common/thrift/PlanNodes.thrift
index 4cf1357..49fcfbb 100644
--- a/common/thrift/PlanNodes.thrift
+++ b/common/thrift/PlanNodes.thrift
@@ -202,6 +202,11 @@ struct THdfsScanNode {
   // Number of header lines to skip at the beginning of each file of this table. Only set
   // for hdfs text files.
   6: optional i32 skip_header_line_count
+
+  // Indicates whether the MT scan node implementation should be used.
+  // If this is true then the MT_DOP query option must be > 0.
+  // TODO: Remove this option when the MT scan node supports all file formats.
+  7: optional bool use_mt_scan_node
 }
 
 struct TDataSourceScanNode {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 3edddf2..f9909b1 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -30,9 +30,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.analysis.Path.PathType;
 import org.apache.impala.authorization.AuthorizationConfig;
 import org.apache.impala.authorization.Privilege;
@@ -66,10 +63,14 @@ import org.apache.impala.thrift.TCatalogObjectType;
 import org.apache.impala.thrift.TLineageGraph;
 import org.apache.impala.thrift.TNetworkAddress;
 import org.apache.impala.thrift.TQueryCtx;
+import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.util.DisjointSet;
 import org.apache.impala.util.EventSequence;
 import org.apache.impala.util.ListMap;
 import org.apache.impala.util.TSessionStateUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicates;
@@ -2246,6 +2247,9 @@ public class Analyzer {
   public String getDefaultDb() { return globalState_.queryCtx.session.database; }
   public User getUser() { return user_; }
   public TQueryCtx getQueryCtx() { return globalState_.queryCtx; }
+  public TQueryOptions getQueryOptions() {
+    return globalState_.queryCtx.getRequest().getQuery_options();
+  }
   public AuthorizationConfig getAuthzConfig() { return globalState_.authzConfig; }
   public ListMap<TNetworkAddress> getHostIndex() { return globalState_.hostIndex; }
   public ColumnLineageGraph getColumnLineageGraph() { return globalState_.lineageGraph; }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 4052867..3d52aa4 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -21,9 +21,7 @@ import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import java.util.Set;
 
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.Expr;
@@ -55,6 +53,9 @@ import org.apache.impala.thrift.TScanRange;
 import org.apache.impala.thrift.TScanRangeLocation;
 import org.apache.impala.thrift.TScanRangeLocations;
 import org.apache.impala.util.MembershipSnapshot;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Joiner;
 import com.google.common.base.Objects;
 import com.google.common.base.Objects.ToStringHelper;
@@ -107,6 +108,9 @@ public class HdfsScanNode extends ScanNode {
   // True if this scan node should use codegen for evaluting conjuncts.
   private boolean codegenConjuncts_;
 
+  // True if this scan node should use the MT implementation in the backend.
+  private boolean useMtScanNode_;
+
   // Conjuncts that can be evaluated while materializing the items (tuples) of
   // collection-typed slots. Maps from tuple descriptor to the conjuncts bound by that
   // tuple. Uses a linked hash map for consistent display in explain.
@@ -168,7 +172,16 @@ public class HdfsScanNode extends ScanNode {
     computeMemLayout(analyzer);
 
     // compute scan range locations
-    computeScanRangeLocations(analyzer);
+    Set<HdfsFileFormat> fileFormats = computeScanRangeLocations(analyzer);
+
+    // Determine backend scan node implementation to use. The optimized MT implementation
+    // is currently only supported for Parquet.
+    if (analyzer.getQueryOptions().mt_dop > 0 &&
+        fileFormats.size() == 1 && fileFormats.contains(HdfsFileFormat.PARQUET)) {
+      useMtScanNode_ = true;
+    } else {
+      useMtScanNode_ = false;
+    }
 
     // do this at the end so it can take all conjuncts and scan ranges into account
     computeStats(analyzer);
@@ -298,12 +311,15 @@ public class HdfsScanNode extends ScanNode {
   /**
    * Computes scan ranges (hdfs splits) plus their storage locations, including volume
    * ids, based on the given maximum number of bytes each scan range should scan.
+   * Returns the set of file formats being scanned.
    */
-  private void computeScanRangeLocations(Analyzer analyzer) {
+  private Set<HdfsFileFormat> computeScanRangeLocations(Analyzer analyzer) {
     long maxScanRangeLength = analyzer.getQueryCtx().getRequest().getQuery_options()
         .getMax_scan_range_length();
     scanRanges_ = Lists.newArrayList();
+    Set<HdfsFileFormat> fileFormats = Sets.newHashSet();
     for (HdfsPartition partition: partitions_) {
+      fileFormats.add(partition.getFileFormat());
       Preconditions.checkState(partition.getId() >= 0);
       for (HdfsPartition.FileDescriptor fileDesc: partition.getFileDescriptors()) {
         for (THdfsFileBlock thriftBlock: fileDesc.getFileBlocks()) {
@@ -353,6 +369,7 @@ public class HdfsScanNode extends ScanNode {
         }
       }
     }
+    return fileFormats;
   }
 
   /**
@@ -542,6 +559,7 @@ public class HdfsScanNode extends ScanNode {
     if (skipHeaderLineCount_ > 0) {
       msg.hdfs_scan_node.setSkip_header_line_count(skipHeaderLineCount_);
     }
+    msg.hdfs_scan_node.setUse_mt_scan_node(useMtScanNode_);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/main/java/org/apache/impala/planner/Planner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/Planner.java b/fe/src/main/java/org/apache/impala/planner/Planner.java
index ed4c677..8abb901 100644
--- a/fe/src/main/java/org/apache/impala/planner/Planner.java
+++ b/fe/src/main/java/org/apache/impala/planner/Planner.java
@@ -21,9 +21,6 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.analysis.AnalysisContext;
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.ColumnLineageGraph;
@@ -43,6 +40,9 @@ import org.apache.impala.thrift.TQueryExecRequest;
 import org.apache.impala.thrift.TRuntimeFilterMode;
 import org.apache.impala.thrift.TTableName;
 import org.apache.impala.util.MaxRowsProcessedVisitor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -117,12 +117,13 @@ public class Planner {
           "Runtime filters computed");
     }
 
+    singleNodePlanner.validatePlan(singleNodePlan);
+
     if (ctx_.isSingleNodeExec()) {
       // create one fragment containing the entire single-node plan tree
       fragments = Lists.newArrayList(new PlanFragment(
           ctx_.getNextFragmentId(), singleNodePlan, DataPartition.UNPARTITIONED));
     } else {
-      singleNodePlanner.validatePlan(singleNodePlan);
       // create distributed plan
       fragments = distributedPlanner.createPlanFragments(singleNodePlan);
     }
@@ -200,10 +201,14 @@ public class Planner {
    * TODO: roll into createPlan()
    */
   public List<PlanFragment> createParallelPlans() throws ImpalaException {
+    Preconditions.checkState(ctx_.getQueryOptions().mt_dop > 0);
     ArrayList<PlanFragment> distrPlan = createPlan();
     Preconditions.checkNotNull(distrPlan);
     ParallelPlanner planner = new ParallelPlanner(ctx_);
     List<PlanFragment> parallelPlans = planner.createPlans(distrPlan.get(0));
+    // Only use one scanner thread per scan-node instance since intra-node
+    // parallelism is achieved via multiple fragment instances.
+    ctx_.getQueryOptions().setNum_scanner_threads(1);
     ctx_.getRootAnalyzer().getTimeline().markEvent("Parallel plans created");
     return parallelPlans;
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/main/java/org/apache/impala/planner/PlannerContext.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/PlannerContext.java b/fe/src/main/java/org/apache/impala/planner/PlannerContext.java
index 3275a7a..721acf9 100644
--- a/fe/src/main/java/org/apache/impala/planner/PlannerContext.java
+++ b/fe/src/main/java/org/apache/impala/planner/PlannerContext.java
@@ -25,6 +25,7 @@ import org.apache.impala.analysis.QueryStmt;
 import org.apache.impala.common.IdGenerator;
 import org.apache.impala.thrift.TQueryCtx;
 import org.apache.impala.thrift.TQueryOptions;
+
 import com.google.common.collect.Lists;
 
 /**
@@ -79,9 +80,7 @@ public class PlannerContext {
 
   public QueryStmt getQueryStmt() { return queryStmt_; }
   public TQueryCtx getQueryCtx() { return queryCtx_; }
-  public TQueryOptions getQueryOptions() {
-    return queryCtx_.getRequest().getQuery_options();
-  }
+  public TQueryOptions getQueryOptions() { return getRootAnalyzer().getQueryOptions(); }
   public AnalysisContext.AnalysisResult getAnalysisResult() { return analysisResult_; }
   public Analyzer getRootAnalyzer() { return analysisResult_.getAnalyzer(); }
   public boolean isSingleNodeExec() { return getQueryOptions().num_nodes == 1; }
@@ -91,7 +90,10 @@ public class PlannerContext {
     return analysisResult_.isInsertStmt() || analysisResult_.isCreateTableAsSelectStmt();
   }
   public boolean isQuery() { return analysisResult_.isQueryStmt(); }
-
+  public boolean hasTableSink() {
+    return isInsertOrCtas() || analysisResult_.isUpdateStmt()
+        || analysisResult_.isDeleteStmt();
+  }
   public boolean hasSubplan() { return !subplans_.isEmpty(); }
   public SubplanNode getSubplan() { return subplans_.getFirst(); }
   public boolean pushSubplan(SubplanNode n) { return subplans_.offerFirst(n); }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
index b686fe6..434e36d 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
@@ -27,9 +27,6 @@ import java.util.ListIterator;
 import java.util.Map;
 import java.util.Set;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.analysis.AggregateInfo;
 import org.apache.impala.analysis.AnalyticInfo;
 import org.apache.impala.analysis.Analyzer;
@@ -67,6 +64,10 @@ import org.apache.impala.common.ImpalaException;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.common.NotImplementedException;
 import org.apache.impala.common.Pair;
+import org.apache.impala.common.RuntimeEnv;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
 import com.google.common.collect.Iterables;
@@ -148,11 +149,22 @@ public class SingleNodePlanner {
   }
 
   /**
-   * Validates a single-node plan by checking that it does not contain right or
-   * full outer joins with no equi-join conjuncts that are not inside the right child
-   * of a SubplanNode. Throws a NotImplementedException if plan validation fails.
+   * Checks that the given single-node plan is executable:
+   * - It may not contain right or full outer joins with no equi-join conjuncts that
+   *   are not inside the right child of a SubplanNode.
+   * - MT_DOP > 0 is not supported for plans with base table joins or table sinks.
+   * Throws a NotImplementedException if plan validation fails.
    */
   public void validatePlan(PlanNode planNode) throws NotImplementedException {
+    if (ctx_.getQueryOptions().mt_dop > 0 && !RuntimeEnv.INSTANCE.isTestEnv()
+        && (planNode instanceof JoinNode || ctx_.hasTableSink())) {
+      throw new NotImplementedException(
+          "MT_DOP not supported for plans with base table joins or table sinks.");
+    }
+
+    // As long as MT_DOP == 0 any join can run in a single-node plan.
+    if (ctx_.isSingleNodeExec() && ctx_.getQueryOptions().mt_dop == 0) return;
+
     if (planNode instanceof NestedLoopJoinNode) {
       JoinNode joinNode = (JoinNode) planNode;
       JoinOperator joinOp = joinNode.getJoinOp();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 88a8631..6250969 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -17,14 +17,13 @@
 
 package org.apache.impala.planner;
 
-import org.junit.Assume;
-import org.junit.Test;
-
 import org.apache.impala.catalog.Db;
 import org.apache.impala.common.RuntimeEnv;
 import org.apache.impala.thrift.TExplainLevel;
 import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.thrift.TRuntimeFilterMode;
+import org.junit.Assume;
+import org.junit.Test;
 
 // All planner tests, except for S3 specific tests should go here.
 public class PlannerTest extends PlannerTestBase {
@@ -279,4 +278,20 @@ public class PlannerTest extends PlannerTestBase {
     Assume.assumeTrue(RuntimeEnv.INSTANCE.isKuduSupported());
     runPlannerTestFile("tpch-kudu");
   }
+
+  @Test
+  public void testMtDopValidation() {
+    // Tests that queries supported with mt_dop > 0 produce a parallel plan, or
+    // throw a NotImplementedException otherwise (e.g. plan has a distributed join).
+    TQueryOptions options = defaultQueryOptions();
+    options.setMt_dop(3);
+    try {
+      // Temporarily unset the test env such that unsupported queries with mt_dop > 0
+      // throw an exception. Those are otherwise allowed for testing parallel plans.
+      RuntimeEnv.INSTANCE.setTestEnv(false);
+      runPlannerTestFile("mt-dop-validation", options);
+    } finally {
+      RuntimeEnv.INSTANCE.setTestEnv(true);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
index 284d7e5..9c12b89 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
@@ -33,13 +33,6 @@ import java.util.regex.Pattern;
 
 import org.apache.commons.lang.exception.ExceptionUtils;
 import org.apache.hadoop.fs.Path;
-import org.apache.kudu.client.KuduClient;
-import org.apache.kudu.client.KuduScanToken;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.analysis.ColumnLineageGraph;
 import org.apache.impala.catalog.CatalogException;
 import org.apache.impala.common.FrontendTestBase;
@@ -72,6 +65,13 @@ import org.apache.impala.thrift.TTableDescriptor;
 import org.apache.impala.thrift.TTupleDescriptor;
 import org.apache.impala.thrift.TUpdateMembershipRequest;
 import org.apache.impala.util.MembershipSnapshot;
+import org.apache.kudu.client.KuduClient;
+import org.apache.kudu.client.KuduScanToken;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -503,6 +503,7 @@ public class PlannerTestBase extends FrontendTestBase {
     // Query exec request may not be set for DDL, e.g., CTAS.
     String locationsStr = null;
     if (execRequest != null && execRequest.isSetQuery_exec_request()) {
+      if (execRequest.query_exec_request.fragments == null) return;
       buildMaps(execRequest.query_exec_request);
       // If we optimize the partition key scans, we may get all the partition key values
       // from the metadata and don't reference any table. Skip the check in this case.
@@ -563,7 +564,8 @@ public class PlannerTestBase extends FrontendTestBase {
       String query, TExecRequest execRequest, StringBuilder errorLog) {
     if (execRequest == null) return;
     if (!execRequest.isSetQuery_exec_request()
-        || execRequest.query_exec_request == null) {
+        || execRequest.query_exec_request == null
+        || execRequest.query_exec_request.fragments == null) {
       return;
     }
     for (TPlanFragment planFragment : execRequest.query_exec_request.fragments) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/04802535/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
new file mode 100644
index 0000000..fe25599
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
@@ -0,0 +1,350 @@
+# Distributed nested-loop join not allowed.
+select count(*) from
+functional_parquet.alltypestiny a,
+functional_parquet.alltypestiny b
+---- PLAN
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+---- PARALLELPLANS
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+====
+# Distributed hash-join not allowed.
+select count(*) from
+functional_parquet.alltypestiny a,
+functional_parquet.alltypestiny b
+where a.id = b.id
+---- PLAN
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+---- PARALLELPLANS
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+====
+# Insert not allowed.
+insert into functional_parquet.alltypes partition(year,month)
+select * from functional_parquet.alltypessmall
+---- PLAN
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+---- PARALLELPLANS
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+====
+# CTAS not allowed.
+create table ctas_mt_dop_test as select * from functional_parquet.alltypes
+---- PLAN
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+---- PARALLELPLANS
+not implemented: MT_DOP not supported for plans with base table joins or table sinks.
+====
+# Single-table scan/filter/agg/topn should work.
+select count(int_col) cnt from functional_parquet.alltypes
+where id < 10
+group by bigint_col
+order by cnt, bigint_col
+limit 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:TOP-N [LIMIT=10]
+|  order by: count(int_col) ASC, bigint_col ASC
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=2 row-size=16B cardinality=10
+|
+01:AGGREGATE [FINALIZE]
+|  output: count(int_col)
+|  group by: bigint_col
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1 row-size=16B cardinality=unavailable
+|
+00:SCAN HDFS [functional_parquet.alltypes]
+   partitions=24/24 files=24 size=156.57KB
+   predicates: id < 10
+   table stats: unavailable
+   column stats: unavailable
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=16B cardinality=unavailable
+---- PARALLELPLANS
+PLAN-ROOT SINK
+|
+05:MERGING-EXCHANGE [UNPARTITIONED]
+|  order by: count(int_col) ASC, bigint_col ASC
+|  limit: 10
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=2 row-size=16B cardinality=10
+|
+02:TOP-N [LIMIT=10]
+|  order by: count(int_col) ASC, bigint_col ASC
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=2 row-size=16B cardinality=10
+|
+04:AGGREGATE [FINALIZE]
+|  output: count:merge(int_col)
+|  group by: bigint_col
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1 row-size=16B cardinality=unavailable
+|
+03:EXCHANGE [HASH(bigint_col)]
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1 row-size=16B cardinality=unavailable
+|
+01:AGGREGATE [STREAMING]
+|  output: count(int_col)
+|  group by: bigint_col
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1 row-size=16B cardinality=unavailable
+|
+00:SCAN HDFS [functional_parquet.alltypes, RANDOM]
+   partitions=24/24 files=24 size=156.57KB
+   predicates: id < 10
+   table stats: unavailable
+   column stats: unavailable
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=16B cardinality=unavailable
+====
+# Single-table scan/filter/analysic should work.
+select row_number() over(partition by int_col order by id)
+from functional_parquet.alltypes
+where id < 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:ANALYTIC
+|  functions: row_number()
+|  partition by: int_col
+|  order by: id ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=3,2 row-size=16B cardinality=unavailable
+|
+01:SORT
+|  order by: int_col ASC NULLS FIRST, id ASC
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=3 row-size=8B cardinality=unavailable
+|
+00:SCAN HDFS [functional_parquet.alltypes]
+   partitions=24/24 files=24 size=156.57KB
+   predicates: id < 10
+   table stats: unavailable
+   column stats: unavailable
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=8B cardinality=unavailable
+---- PARALLELPLANS
+PLAN-ROOT SINK
+|
+04:EXCHANGE [UNPARTITIONED]
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=3,2 row-size=16B cardinality=unavailable
+|
+02:ANALYTIC
+|  functions: row_number()
+|  partition by: int_col
+|  order by: id ASC
+|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=3,2 row-size=16B cardinality=unavailable
+|
+01:SORT
+|  order by: int_col ASC NULLS FIRST, id ASC
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=3 row-size=8B cardinality=unavailable
+|
+03:EXCHANGE [HASH(int_col)]
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=0 row-size=8B cardinality=unavailable
+|
+00:SCAN HDFS [functional_parquet.alltypes, RANDOM]
+   partitions=24/24 files=24 size=156.57KB
+   predicates: id < 10
+   table stats: unavailable
+   column stats: unavailable
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=8B cardinality=unavailable
+====
+# Nested-loop join in a subplan should work.
+select *
+from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems
+where c_custkey < 10 and o_orderkey < 5 and l_linenumber < 3
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=2,1,0 row-size=562B cardinality=1500000
+|
+|--08:NESTED LOOP JOIN [CROSS JOIN]
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=2,1,0 row-size=562B cardinality=100
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     parent-subplan=01
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=0 row-size=254B cardinality=1
+|  |
+|  04:SUBPLAN
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=2,1 row-size=308B cardinality=100
+|  |
+|  |--07:NESTED LOOP JOIN [CROSS JOIN]
+|  |  |  hosts=3 per-host-mem=unavailable
+|  |  |  tuple-ids=2,1 row-size=308B cardinality=10
+|  |  |
+|  |  |--05:SINGULAR ROW SRC
+|  |  |     parent-subplan=04
+|  |  |     hosts=3 per-host-mem=unavailable
+|  |  |     tuple-ids=1 row-size=124B cardinality=1
+|  |  |
+|  |  06:UNNEST [o.o_lineitems]
+|  |     parent-subplan=04
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=2 row-size=0B cardinality=10
+|  |
+|  03:UNNEST [c.c_orders o]
+|     parent-subplan=01
+|     hosts=3 per-host-mem=unavailable
+|     tuple-ids=1 row-size=0B cardinality=10
+|
+00:SCAN HDFS [tpch_nested_parquet.customer c]
+   partitions=1/1 files=4 size=292.36MB
+   predicates: c_custkey < 10, !empty(c.c_orders)
+   predicates on o: !empty(o.o_lineitems), o_orderkey < 5
+   predicates on o_lineitems: l_linenumber < 3
+   table stats: 150000 rows total
+   columns missing stats: c_orders
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=254B cardinality=15000
+---- PARALLELPLANS
+PLAN-ROOT SINK
+|
+09:EXCHANGE [UNPARTITIONED]
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=2,1,0 row-size=562B cardinality=1500000
+|
+01:SUBPLAN
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=2,1,0 row-size=562B cardinality=1500000
+|
+|--08:NESTED LOOP JOIN [CROSS JOIN]
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=2,1,0 row-size=562B cardinality=100
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     parent-subplan=01
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=0 row-size=254B cardinality=1
+|  |
+|  04:SUBPLAN
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=2,1 row-size=308B cardinality=100
+|  |
+|  |--07:NESTED LOOP JOIN [CROSS JOIN]
+|  |  |  hosts=3 per-host-mem=unavailable
+|  |  |  tuple-ids=2,1 row-size=308B cardinality=10
+|  |  |
+|  |  |--05:SINGULAR ROW SRC
+|  |  |     parent-subplan=04
+|  |  |     hosts=3 per-host-mem=unavailable
+|  |  |     tuple-ids=1 row-size=124B cardinality=1
+|  |  |
+|  |  06:UNNEST [o.o_lineitems]
+|  |     parent-subplan=04
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=2 row-size=0B cardinality=10
+|  |
+|  03:UNNEST [c.c_orders o]
+|     parent-subplan=01
+|     hosts=3 per-host-mem=unavailable
+|     tuple-ids=1 row-size=0B cardinality=10
+|
+00:SCAN HDFS [tpch_nested_parquet.customer c, RANDOM]
+   partitions=1/1 files=4 size=292.36MB
+   predicates: c_custkey < 10, !empty(c.c_orders)
+   predicates on o: !empty(o.o_lineitems), o_orderkey < 5
+   predicates on o_lineitems: l_linenumber < 3
+   table stats: 150000 rows total
+   columns missing stats: c_orders
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=254B cardinality=15000
+====
+# Hash-join in a subplan should work.
+select c.*
+from tpch_nested_parquet.customer c, c.c_orders o1, c.c_orders o2
+where o1.o_orderkey = o2.o_orderkey + 2 and o1.o_orderkey < 5
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1,0,2 row-size=286B cardinality=1500000
+|
+|--06:HASH JOIN [INNER JOIN]
+|  |  hash predicates: o1.o_orderkey = o2.o_orderkey + 2
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=1,0,2 row-size=286B cardinality=10
+|  |
+|  |--04:UNNEST [c.c_orders o2]
+|  |     parent-subplan=01
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=2 row-size=0B cardinality=10
+|  |
+|  05:NESTED LOOP JOIN [CROSS JOIN]
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=1,0 row-size=278B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     parent-subplan=01
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=0 row-size=270B cardinality=1
+|  |
+|  03:UNNEST [c.c_orders o1]
+|     parent-subplan=01
+|     hosts=3 per-host-mem=unavailable
+|     tuple-ids=1 row-size=0B cardinality=10
+|
+00:SCAN HDFS [tpch_nested_parquet.customer c]
+   partitions=1/1 files=4 size=292.36MB
+   predicates: !empty(c.c_orders), !empty(c.c_orders)
+   predicates on o1: o1.o_orderkey < 5
+   table stats: 150000 rows total
+   columns missing stats: c_orders, c_orders
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=270B cardinality=150000
+---- PARALLELPLANS
+PLAN-ROOT SINK
+|
+07:EXCHANGE [UNPARTITIONED]
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1,0,2 row-size=286B cardinality=1500000
+|
+01:SUBPLAN
+|  hosts=3 per-host-mem=unavailable
+|  tuple-ids=1,0,2 row-size=286B cardinality=1500000
+|
+|--06:HASH JOIN [INNER JOIN]
+|  |  hash predicates: o1.o_orderkey = o2.o_orderkey + 2
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=1,0,2 row-size=286B cardinality=10
+|  |
+|  |--04:UNNEST [c.c_orders o2]
+|  |     parent-subplan=01
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=2 row-size=0B cardinality=10
+|  |
+|  05:NESTED LOOP JOIN [CROSS JOIN]
+|  |  hosts=3 per-host-mem=unavailable
+|  |  tuple-ids=1,0 row-size=278B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     parent-subplan=01
+|  |     hosts=3 per-host-mem=unavailable
+|  |     tuple-ids=0 row-size=270B cardinality=1
+|  |
+|  03:UNNEST [c.c_orders o1]
+|     parent-subplan=01
+|     hosts=3 per-host-mem=unavailable
+|     tuple-ids=1 row-size=0B cardinality=10
+|
+00:SCAN HDFS [tpch_nested_parquet.customer c, RANDOM]
+   partitions=1/1 files=4 size=292.36MB
+   predicates: !empty(c.c_orders), !empty(c.c_orders)
+   predicates on o1: o1.o_orderkey < 5
+   table stats: 150000 rows total
+   columns missing stats: c_orders, c_orders
+   hosts=3 per-host-mem=unavailable
+   tuple-ids=0 row-size=270B cardinality=150000
+====


[4/7] incubator-impala git commit: IMPALA-4123: Fast bit unpacking

Posted by he...@apache.org.
IMPALA-4123: Fast bit unpacking

Adds utility functions for fast unpacking of batches of bit-packed
values. These support reading batches of any number of values provided
that the start of the batch is aligned to a byte boundary. Callers that
want to read smaller batches that don't align to byte boundaries will
need to implement their own buffering.

The unpacking code uses only portable C++ and no SIMD intrinsics, but is
fairly efficient because unpacking a full batch of 32 values compiles
down to 32-bit loads, shifts by constants, masks by constants, bitwise
ors when a value straddles 32-bit words and stores. Further speedups
should be possible using SIMD intrinsics.

Testing:
Added unit tests for unpacking, exhaustively covering different
bitwidths with additional test dimensions (memory alignment, various
input sizes, etc).

Tested under ASAN to ensure the bit unpacking doesn't read past the end
of buffers.

Perf:
Added microbenchmark that shows on average an 8-9x speedup over the
existing BitReader code.

Change-Id: I12db69409483d208cd4c0f41c27a78aeb6cd3622
Reviewed-on: http://gerrit.cloudera.org:8080/4494
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/07da7679
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/07da7679
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/07da7679

Branch: refs/heads/master
Commit: 07da7679d1755ada836706f752d8078260a76244
Parents: ef762b7
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Sep 14 10:44:08 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Oct 18 02:53:16 2016 +0000

----------------------------------------------------------------------
 be/src/benchmarks/CMakeLists.txt           |  31 ++-
 be/src/benchmarks/bit-packing-benchmark.cc | 347 ++++++++++++++++++++++++
 be/src/benchmarks/bswap-benchmark.cc       |  23 +-
 be/src/exprs/expr-test.cc                  |   5 +-
 be/src/testutil/mem-util.h                 |  57 ++++
 be/src/util/CMakeLists.txt                 |   1 +
 be/src/util/bit-packing-test.cc            | 159 +++++++++++
 be/src/util/bit-packing.h                  |  92 +++++++
 be/src/util/bit-packing.inline.h           | 202 ++++++++++++++
 be/src/util/bit-stream-utils.h             |  16 +-
 be/src/util/bit-stream-utils.inline.h      |   2 +-
 be/src/util/bit-util.h                     |  68 +++--
 be/src/util/openssl-util-test.cc           |  10 +-
 13 files changed, 929 insertions(+), 84 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/benchmarks/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/CMakeLists.txt b/be/src/benchmarks/CMakeLists.txt
index 0a28dce..ba8bcfc 100644
--- a/be/src/benchmarks/CMakeLists.txt
+++ b/be/src/benchmarks/CMakeLists.txt
@@ -27,28 +27,29 @@ FUNCTION(ADD_BE_BENCHMARK BENCHMARK_NAME)
   TARGET_LINK_LIBRARIES(${BENCHMARK_NAME} ${IMPALA_LINK_LIBS})
 ENDFUNCTION()
 
-ADD_BE_BENCHMARK(parse-timestamp-benchmark)
-ADD_BE_BENCHMARK(string-search-benchmark)
 ADD_BE_BENCHMARK(atod-benchmark)
 ADD_BE_BENCHMARK(atof-benchmark)
 ADD_BE_BENCHMARK(atoi-benchmark)
-ADD_BE_BENCHMARK(lock-benchmark)
-ADD_BE_BENCHMARK(thread-create-benchmark)
-ADD_BE_BENCHMARK(tuple-layout-benchmark)
-ADD_BE_BENCHMARK(string-benchmark)
-ADD_BE_BENCHMARK(rle-benchmark)
-ADD_BE_BENCHMARK(string-compare-benchmark)
-ADD_BE_BENCHMARK(multiint-benchmark)
-ADD_BE_BENCHMARK(status-benchmark)
-ADD_BE_BENCHMARK(row-batch-serialize-benchmark)
-ADD_BE_BENCHMARK(overflow-benchmark)
-ADD_BE_BENCHMARK(bloom-filter-benchmark)
-ADD_BE_BENCHMARK(int-hash-benchmark)
 ADD_BE_BENCHMARK(bitmap-benchmark)
+ADD_BE_BENCHMARK(bit-packing-benchmark)
+ADD_BE_BENCHMARK(bloom-filter-benchmark)
+ADD_BE_BENCHMARK(bswap-benchmark)
 ADD_BE_BENCHMARK(expr-benchmark)
 ADD_BE_BENCHMARK(hash-benchmark)
 ADD_BE_BENCHMARK(in-predicate-benchmark)
+ADD_BE_BENCHMARK(int-hash-benchmark)
+ADD_BE_BENCHMARK(lock-benchmark)
+ADD_BE_BENCHMARK(multiint-benchmark)
 ADD_BE_BENCHMARK(network-perf-benchmark)
-ADD_BE_BENCHMARK(bswap-benchmark)
+ADD_BE_BENCHMARK(overflow-benchmark)
+ADD_BE_BENCHMARK(parse-timestamp-benchmark)
+ADD_BE_BENCHMARK(rle-benchmark)
+ADD_BE_BENCHMARK(row-batch-serialize-benchmark)
+ADD_BE_BENCHMARK(status-benchmark)
+ADD_BE_BENCHMARK(string-benchmark)
+ADD_BE_BENCHMARK(string-compare-benchmark)
+ADD_BE_BENCHMARK(string-search-benchmark)
+ADD_BE_BENCHMARK(thread-create-benchmark)
+ADD_BE_BENCHMARK(tuple-layout-benchmark)
 
 target_link_libraries(hash-benchmark Experiments)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/benchmarks/bit-packing-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/bit-packing-benchmark.cc b/be/src/benchmarks/bit-packing-benchmark.cc
new file mode 100644
index 0000000..6e80d83
--- /dev/null
+++ b/be/src/benchmarks/bit-packing-benchmark.cc
@@ -0,0 +1,347 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Test bit packing performance when unpacking data for all supported bit-widths.
+// This compares:
+// * BitReader - the original bit reader that unpacks a value at a time.
+// * Unpack32Scalar - a batched implementation using scalar operations to unpack batches
+//    of 32 values.
+// * UnpackScalar - an implementation that can unpack a variable number of values, using
+//   Unpack32Scalar internally.
+//
+//
+// Machine Info: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
+// Unpack32Values bit_width 0:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.57e+04 1.59e+04  1.6e+04         1X         1X         1X
+//                      Unpack32Scalar           1.34e+05 1.35e+05 1.36e+05      8.51X      8.49X      8.51X
+//                        UnpackScalar           2.08e+05  2.1e+05 2.12e+05      13.3X      13.2X      13.2X
+//
+// Unpack32Values bit_width 1:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.19e+04  1.2e+04  1.2e+04         1X         1X         1X
+//                      Unpack32Scalar           8.89e+04 8.94e+04 9.04e+04      7.48X      7.46X      7.51X
+//                        UnpackScalar           9.72e+04  9.8e+04 9.86e+04      8.18X      8.18X      8.19X
+//
+// Unpack32Values bit_width 2:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.18e+04 1.19e+04  1.2e+04         1X         1X         1X
+//                      Unpack32Scalar           8.84e+04 8.91e+04 8.99e+04      7.49X      7.48X       7.5X
+//                        UnpackScalar           9.68e+04 9.76e+04 9.84e+04       8.2X      8.19X      8.21X
+//
+// Unpack32Values bit_width 3:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.16e+04 1.17e+04 1.18e+04         1X         1X         1X
+//                      Unpack32Scalar           8.67e+04 8.72e+04 8.79e+04      7.45X      7.42X      7.43X
+//                        UnpackScalar            9.6e+04 9.66e+04 9.74e+04      8.25X      8.22X      8.24X
+//
+// Unpack32Values bit_width 4:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.08e+04 1.09e+04  1.1e+04         1X         1X         1X
+//                      Unpack32Scalar           9.13e+04 9.19e+04 9.25e+04      8.44X      8.43X      8.42X
+//                        UnpackScalar           9.65e+04 9.69e+04 9.78e+04      8.91X      8.89X       8.9X
+//
+// Unpack32Values bit_width 5:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.14e+04 1.15e+04 1.16e+04         1X         1X         1X
+//                      Unpack32Scalar           8.35e+04 8.42e+04 8.49e+04       7.3X      7.31X      7.31X
+//                        UnpackScalar           9.41e+04 9.48e+04 9.56e+04      8.22X      8.22X      8.24X
+//
+// Unpack32Values bit_width 6:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.14e+04 1.15e+04 1.16e+04         1X         1X         1X
+//                      Unpack32Scalar           8.46e+04 8.53e+04  8.6e+04       7.4X      7.41X      7.41X
+//                        UnpackScalar           9.35e+04 9.41e+04 9.51e+04      8.18X      8.16X       8.2X
+//
+// Unpack32Values bit_width 7:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.09e+04  1.1e+04 1.11e+04         1X         1X         1X
+//                      Unpack32Scalar           8.11e+04 8.16e+04 8.25e+04      7.44X      7.44X      7.45X
+//                        UnpackScalar           9.16e+04 9.21e+04  9.3e+04       8.4X       8.4X      8.39X
+//
+// Unpack32Values bit_width 8:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.14e+04 1.15e+04 1.16e+04         1X         1X         1X
+//                      Unpack32Scalar           9.02e+04 9.07e+04 9.14e+04       7.9X       7.9X      7.91X
+//                        UnpackScalar           9.48e+04 9.55e+04 9.63e+04      8.31X      8.33X      8.33X
+//
+// Unpack32Values bit_width 9:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.11e+04 1.12e+04 1.13e+04         1X         1X         1X
+//                      Unpack32Scalar           7.94e+04 7.97e+04 8.06e+04      7.14X      7.12X      7.14X
+//                        UnpackScalar           8.78e+04 8.83e+04  8.9e+04      7.89X      7.88X      7.89X
+//
+// Unpack32Values bit_width 10:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader            1.1e+04 1.11e+04 1.12e+04         1X         1X         1X
+//                      Unpack32Scalar           8.07e+04 8.14e+04 8.21e+04      7.31X      7.32X      7.34X
+//                        UnpackScalar           8.95e+04 9.02e+04 9.09e+04      8.11X      8.12X      8.12X
+//
+// Unpack32Values bit_width 11:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.09e+04  1.1e+04 1.11e+04         1X         1X         1X
+//                      Unpack32Scalar           7.63e+04 7.69e+04 7.75e+04      6.99X      6.99X      6.99X
+//                        UnpackScalar           8.55e+04 8.61e+04 8.69e+04      7.83X      7.83X      7.84X
+//
+// Unpack32Values bit_width 12:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.09e+04  1.1e+04  1.1e+04         1X         1X         1X
+//                      Unpack32Scalar           8.23e+04 8.29e+04 8.35e+04      7.55X      7.56X      7.57X
+//                        UnpackScalar           9.06e+04 9.12e+04 9.19e+04      8.31X      8.31X      8.33X
+//
+// Unpack32Values bit_width 13:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.07e+04 1.08e+04 1.09e+04         1X         1X         1X
+//                      Unpack32Scalar           7.42e+04 7.47e+04 7.55e+04      6.92X       6.9X      6.92X
+//                        UnpackScalar           8.16e+04 8.23e+04 8.29e+04       7.6X       7.6X      7.61X
+//
+// Unpack32Values bit_width 14:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.07e+04 1.08e+04 1.09e+04         1X         1X         1X
+//                      Unpack32Scalar           7.58e+04 7.62e+04 7.68e+04      7.08X      7.08X      7.08X
+//                        UnpackScalar           8.33e+04 8.38e+04 8.46e+04      7.78X      7.78X      7.79X
+//
+// Unpack32Values bit_width 15:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.06e+04 1.06e+04 1.07e+04         1X         1X         1X
+//                      Unpack32Scalar           7.16e+04 7.22e+04 7.29e+04      6.78X      6.79X      6.79X
+//                        UnpackScalar           7.96e+04 8.05e+04 8.09e+04      7.54X      7.57X      7.54X
+//
+// Unpack32Values bit_width 16:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.08e+04 1.08e+04 1.09e+04         1X         1X         1X
+//                      Unpack32Scalar           8.71e+04 8.76e+04 8.83e+04      8.09X      8.09X      8.08X
+//                        UnpackScalar           9.22e+04  9.3e+04 9.37e+04      8.56X      8.58X      8.57X
+//
+// Unpack32Values bit_width 17:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.04e+04 1.04e+04 1.05e+04         1X         1X         1X
+//                      Unpack32Scalar           6.98e+04 7.04e+04 7.09e+04      6.73X      6.74X      6.74X
+//                        UnpackScalar           7.73e+04 7.78e+04 7.85e+04      7.45X      7.45X      7.47X
+//
+// Unpack32Values bit_width 18:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.03e+04 1.04e+04 1.05e+04         1X         1X         1X
+//                      Unpack32Scalar            7.1e+04 7.17e+04 7.22e+04      6.86X      6.88X      6.87X
+//                        UnpackScalar           7.77e+04 7.82e+04 7.89e+04      7.51X       7.5X      7.51X
+//
+// Unpack32Values bit_width 19:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.02e+04 1.03e+04 1.04e+04         1X         1X         1X
+//                      Unpack32Scalar           6.74e+04  6.8e+04 6.85e+04      6.59X       6.6X      6.61X
+//                        UnpackScalar           7.43e+04 7.49e+04 7.54e+04      7.26X      7.27X      7.28X
+//
+// Unpack32Values bit_width 20:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.02e+04 1.03e+04 1.03e+04         1X         1X         1X
+//                      Unpack32Scalar           7.28e+04 7.34e+04  7.4e+04      7.15X      7.15X      7.15X
+//                        UnpackScalar           7.94e+04 8.02e+04 8.07e+04       7.8X      7.81X       7.8X
+//
+// Unpack32Values bit_width 21:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           1.01e+04 1.01e+04 1.02e+04         1X         1X         1X
+//                      Unpack32Scalar           6.56e+04 6.62e+04 6.67e+04      6.53X      6.54X      6.54X
+//                        UnpackScalar            7.1e+04 7.15e+04 7.19e+04      7.06X      7.06X      7.06X
+//
+// Unpack32Values bit_width 22:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader              1e+04 1.01e+04 1.02e+04         1X         1X         1X
+//                      Unpack32Scalar           6.68e+04 6.73e+04 6.79e+04      6.68X      6.68X      6.68X
+//                        UnpackScalar           7.35e+04 7.41e+04 7.46e+04      7.34X      7.35X      7.35X
+//
+// Unpack32Values bit_width 23:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.87e+03 9.95e+03    1e+04         1X         1X         1X
+//                      Unpack32Scalar           6.44e+04 6.48e+04 6.53e+04      6.52X      6.52X      6.51X
+//                        UnpackScalar           6.93e+04 6.97e+04 7.04e+04      7.03X      7.01X      7.02X
+//
+// Unpack32Values bit_width 24:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.93e+03    1e+04 1.01e+04         1X         1X         1X
+//                      Unpack32Scalar           7.44e+04 7.49e+04 7.55e+04      7.49X      7.49X      7.49X
+//                        UnpackScalar           8.12e+04 8.17e+04 8.27e+04      8.18X      8.17X       8.2X
+//
+// Unpack32Values bit_width 25:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.71e+03 9.79e+03 9.86e+03         1X         1X         1X
+//                      Unpack32Scalar           6.12e+04 6.16e+04 6.22e+04      6.31X      6.29X      6.31X
+//                        UnpackScalar           6.44e+04 6.48e+04 6.53e+04      6.64X      6.62X      6.62X
+//
+// Unpack32Values bit_width 26:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.67e+03 9.74e+03 9.81e+03         1X         1X         1X
+//                      Unpack32Scalar           6.21e+04 6.26e+04 6.31e+04      6.42X      6.42X      6.43X
+//                        UnpackScalar           6.53e+04 6.59e+04 6.64e+04      6.75X      6.77X      6.76X
+//
+// Unpack32Values bit_width 27:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.56e+03 9.62e+03  9.7e+03         1X         1X         1X
+//                      Unpack32Scalar           5.99e+04 6.03e+04 6.09e+04      6.27X      6.27X      6.28X
+//                        UnpackScalar           6.32e+04 6.35e+04 6.42e+04      6.61X       6.6X      6.62X
+//
+// Unpack32Values bit_width 28:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.53e+03 9.61e+03 9.66e+03         1X         1X         1X
+//                      Unpack32Scalar           6.37e+04 6.42e+04 6.47e+04      6.69X      6.68X       6.7X
+//                        UnpackScalar           6.68e+04 6.73e+04 6.77e+04      7.01X         7X      7.01X
+//
+// Unpack32Values bit_width 29:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.41e+03 9.46e+03 9.55e+03         1X         1X         1X
+//                      Unpack32Scalar           5.79e+04 5.82e+04 5.87e+04      6.15X      6.15X      6.14X
+//                        UnpackScalar           6.08e+04 6.11e+04 6.16e+04      6.46X      6.46X      6.46X
+//
+// Unpack32Values bit_width 30:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.37e+03 9.45e+03 9.52e+03         1X         1X         1X
+//                      Unpack32Scalar           5.87e+04 5.92e+04 5.96e+04      6.26X      6.27X      6.26X
+//                        UnpackScalar           6.16e+04  6.2e+04 6.26e+04      6.58X      6.56X      6.57X
+//
+// Unpack32Values bit_width 31:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.26e+03 9.33e+03 9.41e+03         1X         1X         1X
+//                      Unpack32Scalar           5.59e+04 5.63e+04 5.67e+04      6.03X      6.03X      6.03X
+//                        UnpackScalar           5.85e+04 5.89e+04 5.94e+04      6.31X      6.31X      6.31X
+//
+// Unpack32Values bit_width 32:Function  iters/ms   10%ile   50%ile   90%ile     10%ile     50%ile     90%ile
+//                                                                          (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+//                           BitReader           9.89e+03 9.96e+03    1e+04         1X         1X         1X
+//                      Unpack32Scalar           9.83e+04 9.96e+04 1.01e+05      9.95X        10X        10X
+//                        UnpackScalar           8.24e+04 8.36e+04 8.44e+04      8.34X       8.4X      8.41X
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+
+#include "gutil/strings/substitute.h"
+#include "util/benchmark.h"
+#include "util/bit-packing.inline.h"
+#include "util/bit-stream-utils.inline.h"
+#include "util/cpu-info.h"
+
+#include "common/names.h"
+
+using namespace impala;
+
+constexpr int NUM_OUT_VALUES = 1024 * 1024;
+static_assert(NUM_OUT_VALUES % 32 == 0, "NUM_OUT_VALUES must be divisible by 32");
+
+uint32_t out_buffer[NUM_OUT_VALUES];
+
+struct BenchmarkParams {
+  int bit_width;
+  const uint8_t* data;
+  int64_t data_len;
+};
+
+/// Benchmark calling BitReader::GetValue() in a loop to unpack 32 * 'batch_size' values.
+void BitReaderBenchmark(int batch_size, void* data) {
+  const BenchmarkParams* p = reinterpret_cast<BenchmarkParams*>(data);
+  BitReader reader(p->data, p->data_len);
+  for (int i = 0; i < batch_size; ++i) {
+    for (int j = 0; j < 32; ++j) {
+      const int64_t offset = (i * 32 + j) % NUM_OUT_VALUES;
+      if (UNLIKELY(!reader.GetValue<uint32_t>(p->bit_width, &out_buffer[offset]))) {
+        reader.Reset(p->data, p->data_len);
+        const bool success = reader.GetValue<uint32_t>(p->bit_width, &out_buffer[offset]);
+        DCHECK(success);
+      }
+    }
+  }
+}
+
+/// Benchmark calling Unpack32Values() in a loop to unpack 32 * 'batch_size' values.
+void Unpack32Benchmark(int batch_size, void* data) {
+  const BenchmarkParams* p = reinterpret_cast<BenchmarkParams*>(data);
+  const uint8_t* pos = reinterpret_cast<const uint8_t*>(p->data);
+  const uint8_t* const data_end = pos + p->data_len;
+  for (int i = 0; i < batch_size; ++i) {
+    if (UNLIKELY(pos >= data_end)) pos = reinterpret_cast<const uint8_t*>(p->data);
+    const int64_t offset = (i * 32) % NUM_OUT_VALUES;
+    pos = BitPacking::Unpack32Values(
+        p->bit_width, pos, data_end - pos, out_buffer + offset);
+  }
+}
+
+/// Benchmark calling UnpackValues() to unpack 32 * 'batch_size' values.
+void UnpackBenchmark(int batch_size, void* data) {
+  const BenchmarkParams* p = reinterpret_cast<BenchmarkParams*>(data);
+  const int64_t total_values_to_unpack = 32L * batch_size;
+  for (int64_t unpacked = 0; unpacked < total_values_to_unpack;
+       unpacked += NUM_OUT_VALUES) {
+    const int64_t unpack_batch =
+        min<int64_t>(NUM_OUT_VALUES, total_values_to_unpack - unpacked);
+    BitPacking::UnpackValues(
+        p->bit_width, p->data, p->data_len, unpack_batch, out_buffer);
+  }
+}
+
+int main(int argc, char **argv) {
+  CpuInfo::Init();
+  cout << endl << Benchmark::GetMachineInfo() << endl;
+
+  for (int bit_width = 0; bit_width <= 32; ++bit_width) {
+    Benchmark suite(Substitute("Unpack32Values bit_width $0", bit_width));
+    const int64_t data_len = NUM_OUT_VALUES * bit_width / 8;
+    vector<uint8_t> data(data_len);
+    std::iota(data.begin(), data.end(), 0);
+    BenchmarkParams params{bit_width, data.data(), data_len};
+    suite.AddBenchmark(Substitute("BitReader", bit_width), BitReaderBenchmark, &params);
+    suite.AddBenchmark(
+        Substitute("Unpack32Scalar", bit_width), Unpack32Benchmark, &params);
+    suite.AddBenchmark(Substitute("UnpackScalar", bit_width), UnpackBenchmark, &params);
+    cout << suite.Measure() << endl;
+  }
+  return 0;
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/benchmarks/bswap-benchmark.cc
----------------------------------------------------------------------
diff --git a/be/src/benchmarks/bswap-benchmark.cc b/be/src/benchmarks/bswap-benchmark.cc
index 6add717..f62d4fc 100644
--- a/be/src/benchmarks/bswap-benchmark.cc
+++ b/be/src/benchmarks/bswap-benchmark.cc
@@ -25,6 +25,7 @@
 #include "gutil/strings/substitute.h"
 #include "exec/parquet-common.h"
 #include "runtime/decimal-value.h"
+#include "testutil/mem-util.h"
 #include "util/benchmark.h"
 #include "util/cpu-info.h"
 #include "util/bit-util.h"
@@ -116,18 +117,6 @@ void TestSIMDSwap(int batch_size, void* d) {
   BitUtil::ByteSwap(data->outbuffer, data->inbuffer, data->num_values);
 }
 
-// Allocate 64-byte (an x86-64 cache line) aligned memory so it does not straddle cache
-// line boundaries. This is sufficient to meet alignment requirements for all SIMD
-// instructions, at least up to AVX-512.
-// Exit process if allocation fails.
-void* AllocateAligned(size_t size) {
-  void* ptr;
-  if (posix_memalign(&ptr, 64, size) != 0) {
-    LOG(FATAL) << "Failed to allocate " << size;
-  }
-  return ptr;
-}
-
 // Benchmark routine for FastScalar/"Pure" SSSE3/"Pure" AVX2/SIMD approaches
 void PerfBenchmark() {
   // Measure perf both when memory is perfectly aligned for SIMD and also misaligned.
@@ -135,18 +124,16 @@ void PerfBenchmark() {
   const vector<int> misalignments({0, 1, 4, max_misalignment});
   const int data_len = 1 << 20;
 
-  const unique_ptr<uint8_t, decltype(free)*> inbuffer(
-      reinterpret_cast<uint8_t*>(AllocateAligned(data_len + max_misalignment)), free);
-  const unique_ptr<uint8_t, decltype(free)*> outbuffer(
-      reinterpret_cast<uint8_t*>(AllocateAligned(data_len + max_misalignment)), free);
+  AlignedAllocation inbuffer(data_len + max_misalignment);
+  AlignedAllocation outbuffer(data_len + max_misalignment);
 
   for (const int misalign : misalignments) {
     Benchmark suite(Substitute("ByteSwap benchmark misalignment=$0", misalign));
     TestData data;
 
     data.num_values = data_len;
-    data.inbuffer = inbuffer.get() + misalign;
-    data.outbuffer = outbuffer.get() + misalign;
+    data.inbuffer = inbuffer.data() + misalign;
+    data.outbuffer = outbuffer.data() + misalign;
     InitData(data.inbuffer, data_len);
 
     const int baseline = suite.AddBenchmark("FastScalar", TestFastScalarSwap, &data, -1);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 88565e1..0a6b720 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -15,16 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <math.h>
+#include <time.h>
 #include <limits>
 #include <map>
-#include <math.h>
 #include <string>
-#include <time.h>
 
 #include <boost/date_time/c_local_time_adjustor.hpp>
 #include <boost/date_time/posix_time/posix_time.hpp>
 #include <boost/lexical_cast.hpp>
-#include <boost/random/mersenne_twister.hpp>
 #include <boost/regex.hpp>
 #include <boost/unordered_map.hpp>
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/testutil/mem-util.h
----------------------------------------------------------------------
diff --git a/be/src/testutil/mem-util.h b/be/src/testutil/mem-util.h
new file mode 100644
index 0000000..78b7b48
--- /dev/null
+++ b/be/src/testutil/mem-util.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef IMPALA_TESTUTIL_MEM_UTIL_H_
+#define IMPALA_TESTUTIL_MEM_UTIL_H_
+
+#include <cstdint>
+#include <cstdlib>
+
+#include "gutil/macros.h"
+
+namespace impala {
+
+/// Allocate 64-byte (an x86-64 cache line) aligned memory so it does not straddle cache
+/// line boundaries. This is sufficient to meet alignment requirements for all SIMD
+/// instructions, at least up to AVX-512.
+/// Exits process if allocation fails so should be used for tests and benchmarks only.
+inline uint8_t* AllocateAligned(size_t size) {
+  void* ptr;
+  if (posix_memalign(&ptr, 64, size) != 0) {
+    LOG(FATAL) << "Failed to allocate " << size;
+  }
+  return reinterpret_cast<uint8_t*>(ptr);
+}
+
+/// Scoped allocation with 64-bit alignment.
+/// Exits process if allocation fails so should be used for tests and benchmarks only.
+class AlignedAllocation {
+ public:
+  AlignedAllocation(size_t bytes) : data_(AllocateAligned(bytes)) {}
+  ~AlignedAllocation() { free(data_); }
+
+  uint8_t* data() { return data_; }
+ private:
+  DISALLOW_COPY_AND_ASSIGN(AlignedAllocation);
+
+  uint8_t* data_;
+};
+
+}
+
+#endif
+

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index 0dfd12e..ecc222a 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -105,6 +105,7 @@ target_link_libraries(loggingsupport ${IMPALA_LINK_LIBS_DYNAMIC_TARGETS})
 
 ADD_BE_TEST(benchmark-test)
 ADD_BE_TEST(bitmap-test)
+ADD_BE_TEST(bit-packing-test)
 ADD_BE_TEST(bit-util-test)
 ADD_BE_TEST(blocking-queue-test)
 ADD_BE_TEST(bloom-filter-test)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/bit-packing-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/bit-packing-test.cc b/be/src/util/bit-packing-test.cc
new file mode 100644
index 0000000..bedf178
--- /dev/null
+++ b/be/src/util/bit-packing-test.cc
@@ -0,0 +1,159 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include "testutil/gtest-util.h"
+#include "testutil/mem-util.h"
+#include "util/bit-packing.inline.h"
+#include "util/bit-stream-utils.inline.h"
+
+#include "common/names.h"
+
+using std::uniform_int_distribution;
+using std::mt19937;
+
+namespace impala {
+
+namespace {
+uint32_t ComputeMask(int bit_width) {
+  return (1U << bit_width) - 1;
+}
+}
+
+/// Test unpacking a subarray of values to/from smaller buffers that are sized to exactly
+/// fit the the input and output. 'in' is the original unpacked input, 'packed' is the
+/// bit-packed data. The test copies 'num_in_values' packed values to a smaller temporary
+/// buffer, then unpacks them to another temporary buffer. Both buffers are sized to the
+/// minimum number of bytes required to fit the packed/unpacked data.
+///
+/// This is to test that we do not overrun either the input or output buffer for smaller
+/// batch sizes.
+void UnpackSubset(const uint32_t* in, const uint8_t* packed, int num_in_values,
+    int bit_width, bool aligned);
+
+/// Test a packing/unpacking round-trip of the 'num_in_values' values in 'in',
+/// packed with 'bit_width'. If 'aligned' is true, buffers for packed and unpacked data
+/// are allocated at a 64-byte aligned address. Otherwise the buffers are misaligned
+/// by 1 byte from a 64-byte aligned address.
+void PackUnpack(const uint32_t* in, int num_in_values, int bit_width, bool aligned) {
+  LOG(INFO) << "num_in_values = " << num_in_values << " bit_width = " << bit_width
+            << " aligned = " << aligned;
+
+  // Mask out higher bits so that the values to pack are in range.
+  const uint32_t mask = ComputeMask(bit_width);
+  const int misalignment = aligned ? 0 : 1;
+
+  const int bytes_required = BitUtil::RoundUpNumBytes(bit_width * num_in_values);
+  AlignedAllocation storage(bytes_required + misalignment);
+  uint8_t* packed = storage.data() + misalignment;
+
+  BitWriter writer(packed, bytes_required);
+  if (bit_width > 0) {
+    for (int i = 0; i < num_in_values; ++i) {
+      ASSERT_TRUE(writer.PutValue(in[i] & mask, bit_width));
+    }
+  }
+  writer.Flush();
+  LOG(INFO) << "Wrote " << writer.bytes_written() << " bytes.";
+
+  // Test unpacking all the values. Trying to unpack extra values should have the same
+  // result because the input buffer size 'num_in_values' limits the number of values to
+  // return.
+  for (const int num_to_unpack : {num_in_values, num_in_values + 1, num_in_values + 77}) {
+    LOG(INFO) << "Unpacking " << num_to_unpack;
+    // Size buffer exactly so that ASAN can detect reads/writes that overrun the buffer.
+    AlignedAllocation out_storage(num_to_unpack * sizeof(uint32_t) + misalignment);
+    uint32_t* out = reinterpret_cast<uint32_t*>(out_storage.data() + misalignment);
+    const auto result = BitPacking::UnpackValues(
+        bit_width, packed, writer.bytes_written(), num_to_unpack, out);
+    ASSERT_EQ(packed + writer.bytes_written(), result.first)
+        << "Unpacked different # of bytes from the # written";
+    if (bit_width == 0) {
+      // If no bits, we can get back as many as we ask for.
+      ASSERT_EQ(num_to_unpack, result.second) << "Unpacked wrong # of values";
+    } else if (bit_width < CHAR_BIT) {
+      // We may get back some garbage values that we didn't actually pack if we
+      // didn't use all of the trailing byte.
+      const int max_packed_values = writer.bytes_written() * CHAR_BIT / bit_width;
+      ASSERT_EQ(min(num_to_unpack, max_packed_values), result.second)
+          << "Unpacked wrong # of values";
+    } else {
+      ASSERT_EQ(num_in_values, result.second) << "Unpacked wrong # of values";
+    }
+
+    for (int i = 0; i < num_in_values; ++i) {
+      EXPECT_EQ(in[i] & mask, out[i]) << "Didn't get back input value " << i;
+    }
+  }
+  UnpackSubset(in, packed, num_in_values, bit_width, aligned);
+}
+
+void UnpackSubset(const uint32_t* in, const uint8_t* packed, int num_in_values,
+    int bit_width, bool aligned) {
+  const int misalignment = aligned ? 0 : 1;
+  for (int num_to_unpack : {1, 10, 77, num_in_values - 7}) {
+    if (num_to_unpack < 0 || num_to_unpack > num_in_values) continue;
+
+    // Size buffers exactly so that ASAN can detect buffer overruns.
+    const int64_t bytes_to_read = BitUtil::RoundUpNumBytes(num_to_unpack * bit_width);
+    AlignedAllocation packed_copy_storage(bytes_to_read + misalignment);
+    uint8_t* packed_copy = packed_copy_storage.data() + misalignment;
+    memcpy(packed_copy, packed, bytes_to_read);
+    AlignedAllocation out_storage(num_to_unpack * sizeof(uint32_t) + misalignment);
+    uint32_t* out = reinterpret_cast<uint32_t*>(out_storage.data() + misalignment);
+    const auto result = BitPacking::UnpackValues(
+        bit_width, packed_copy, bytes_to_read, num_to_unpack, out);
+    ASSERT_EQ(packed_copy + bytes_to_read, result.first) << "Read wrong # of bytes";
+    ASSERT_EQ(num_to_unpack, result.second) << "Unpacked wrong # of values";
+
+    for (int i = 0; i < num_to_unpack; ++i) {
+      ASSERT_EQ(in[i] & ComputeMask(bit_width), out[i]) << "Didn't get back input value "
+                                                         << i;
+    }
+  }
+}
+
+TEST(BitPackingTest, RandomUnpack) {
+  constexpr int NUM_IN_VALUES = 64 * 1024;
+  uint32_t in[NUM_IN_VALUES];
+  mt19937 rng;
+  uniform_int_distribution<uint32_t> dist;
+  std::generate(std::begin(in), std::end(in), [&rng, &dist] { return dist(rng); });
+
+  // Test various odd input lengths to exercise boundary cases for full and partial
+  // batches of 32.
+  vector<int> lengths{NUM_IN_VALUES, NUM_IN_VALUES - 1, NUM_IN_VALUES - 16,
+      NUM_IN_VALUES - 19, NUM_IN_VALUES - 31};
+  for (int i = 0; i < 32; ++i) {
+    lengths.push_back(i);
+  }
+
+  for (int bit_width = 0; bit_width <= 32; ++bit_width) {
+    for (const int length : lengths) {
+      // Test that unpacking to/from aligned and unaligned memory works.
+      for (const bool aligned : {true, false}) {
+        PackUnpack(in, length, bit_width, aligned);
+      }
+    }
+  }
+}
+}
+
+IMPALA_TEST_MAIN();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/bit-packing.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-packing.h b/be/src/util/bit-packing.h
new file mode 100644
index 0000000..62e5e88
--- /dev/null
+++ b/be/src/util/bit-packing.h
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef IMPALA_UTIL_BIT_PACKING_H
+#define IMPALA_UTIL_BIT_PACKING_H
+
+namespace impala {
+
+#include <cstdint>
+
+#include <utility>
+
+/// Utilities for manipulating bit-packed values. Bit-packing is a technique for
+/// compressing integer values that do not use the full range of the integer type.
+/// E.g. an array of uint32_t values with range [0, 31] only uses the lower 5 bits
+/// of every uint32_t value, or an array of 0/1 booleans only uses the lowest bit
+/// of each integer.
+///
+/// Bit-packing always has a "bit width" parameter that determines the range of
+/// representable unsigned values: [0, 2^bit_width - 1]. The packed representation
+/// is logically the concatenatation of the lower bits of the input values (in
+/// little-endian order). E.g. the values 1, 2, 3, 4 packed with bit width 4 results
+/// in the two output bytes: [ 0 0 1 0 | 0 0 0 1 ] [ 0 1 0 0 | 0 0 1 1 ]
+///                               2         1           4         3
+///
+/// Packed values can be split across words, e.g. packing 1, 17 with bit_width 5 results
+/// in the two output bytes: [ 0 0 1 | 0 0 0 0 1 ] [ x x x x x x | 1 0 ]
+///            lower bits of 17--^         1         next value     ^--upper bits of 17
+///
+/// Bit widths from 0 to 32 are supported (0 bit width means that every value is 0).
+/// The batched unpacking functions operate on batches of 32 values. This batch size
+/// is convenient because for every supported bit width, the end of a 32 value batch
+/// falls on a byte boundary. It is also large enough to amortise loop overheads.
+class BitPacking {
+ public:
+  /// Unpack bit-packed values with 'bit_width' from 'in' to 'out'. Keeps unpacking until
+  /// either all 'in_bytes' are read or 'num_values' values are unpacked. 'out' must have
+  /// enough space for 'num_values'. 0 <= 'bit_width' <= 32 and 'bit_width' <= # of bits
+  /// in OutType. 'in' must point to 'in_bytes' of addressable memory.
+  ///
+  /// Returns a pointer to the byte after the last byte of 'in' that was read and also the
+  /// number of values that were read. If the caller wants to continue reading packed
+  /// values after the last one returned, it must ensure that the next value to unpack
+  /// starts at a byte boundary. This is true if 'num_values' is a multiple of 32, or
+  /// more generally if (bit_width * num_values) % 8 == 0.
+  template <typename OutType>
+  static std::pair<const uint8_t*, int64_t> UnpackValues(int bit_width,
+      const uint8_t* __restrict__ in, int64_t in_bytes, int64_t num_values,
+      OutType* __restrict__ out);
+
+  /// Unpack exactly 32 values of 'bit_width' from 'in' to 'out'. 'in' must point to
+  /// 'in_bytes' of addressable memory, and 'in_bytes' must be at least
+  /// (32 * bit_width / 8). 'out' must have space for 32 OutType values.
+  /// 0 <= 'bit_width' <= 32 and 'bit_width' <= # of bits in OutType.
+  template <typename OutType>
+  static const uint8_t* Unpack32Values(int bit_width, const uint8_t* __restrict__ in,
+      int64_t in_bytes, OutType* __restrict__ out);
+
+ private:
+  /// Implementation of Unpack32Values() that uses 32-bit integer loads to
+  /// unpack values with the given BIT_WIDTH from 'in' to 'out'.
+  template <typename OutType, int BIT_WIDTH>
+  static const uint8_t* Unpack32Values(
+      const uint8_t* __restrict__ in, int64_t in_bytes, OutType* __restrict__ out);
+
+  /// Function that unpacks 'num_values' values with the given BIT_WIDTH from 'in' to
+  /// 'out'. 'num_values' can be at most 32. The version with 'bit_width' as an argument
+  /// dispatches based on 'bit_width' to the appropriate templated implementation.
+  template <typename OutType, int BIT_WIDTH>
+  static const uint8_t* UnpackUpTo32Values(const uint8_t* __restrict__ in,
+      int64_t in_bytes, int num_values, OutType* __restrict__ out);
+  template <typename OutType>
+  static const uint8_t* UnpackUpTo32Values(int bit_width, const uint8_t* __restrict__ in,
+      int64_t in_bytes, int num_values, OutType* __restrict__ out);
+};
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/bit-packing.inline.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-packing.inline.h b/be/src/util/bit-packing.inline.h
new file mode 100644
index 0000000..37d51ab
--- /dev/null
+++ b/be/src/util/bit-packing.inline.h
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef IMPALA_UTIL_BIT_PACKING_INLINE_H
+#define IMPALA_UTIL_BIT_PACKING_INLINE_H
+
+#include "util/bit-packing.h"
+
+#include <algorithm>
+#include <type_traits>
+
+#include <boost/preprocessor/repetition/repeat_from_to.hpp>
+
+#include "common/compiler-util.h"
+#include "common/logging.h"
+#include "util/bit-util.h"
+
+namespace impala {
+
+template <typename OutType>
+std::pair<const uint8_t*, int64_t> BitPacking::UnpackValues(int bit_width,
+    const uint8_t* __restrict__ in, int64_t in_bytes, int64_t num_values,
+    OutType* __restrict__ out) {
+  constexpr int BATCH_SIZE = 32;
+  const int64_t max_input_values =
+      bit_width ? (in_bytes * CHAR_BIT) / bit_width : num_values;
+  const int64_t values_to_read = std::min(num_values, max_input_values);
+  const int64_t batches_to_read = values_to_read / BATCH_SIZE;
+  const int64_t remainder_values = values_to_read % BATCH_SIZE;
+  const uint8_t* in_pos = in;
+  OutType* out_pos = out;
+  // First unpack as many full batches as possible.
+  for (int64_t i = 0; i < batches_to_read; ++i) {
+    in_pos = Unpack32Values<OutType>(bit_width, in_pos, in_bytes, out_pos);
+    out_pos += BATCH_SIZE;
+    in_bytes -= (BATCH_SIZE * bit_width) / CHAR_BIT;
+  }
+  // Then unpack the final partial batch.
+  if (remainder_values > 0) {
+    in_pos = UnpackUpTo32Values<OutType>(bit_width,
+        in_pos, in_bytes, remainder_values, out_pos);
+  }
+  return std::make_pair(in_pos, values_to_read);
+}
+
+// Loop body of unrolled loop that unpacks the value. BIT_WIDTH is the bit width of
+// the packed values. 'in_buf' is the start of the input buffer and 'out_vals' is the
+// start of the output values array. This function unpacks the VALUE_IDX'th packed value
+// from 'in_buf'.
+//
+// This implements essentially the same algorithm as the (Apache-licensed) code in
+// bpacking.c at https://github.com/lemire/FrameOfReference/, but is much more compact
+// because it uses templates rather than source-level unrolling of all combinations.
+//
+// After the template parameters is expanded and constants are propagated, all branches
+// and offset/shift calculations should be optimized out, leaving only shifts by constants
+// and bitmasks by constants. Calls to this must be stamped out manually or with
+// BOOST_PP_REPEAT_FROM_TO: experimentation revealed that the GCC 4.9.2 optimiser was
+// not able to fully propagate constants and remove branches when this was called from
+// inside a for loop with constant bounds with VALUE_IDX changed to a function argument.
+template <int BIT_WIDTH, int VALUE_IDX>
+inline uint32_t ALWAYS_INLINE UnpackValue(const uint8_t* __restrict__ in_buf) {
+  constexpr uint32_t LOAD_BIT_WIDTH = sizeof(uint32_t) * CHAR_BIT;
+  static_assert(BIT_WIDTH <= LOAD_BIT_WIDTH, "BIT_WIDTH > LOAD_BIT_WIDTH");
+  static_assert(VALUE_IDX >= 0 && VALUE_IDX < 32, "0 <= VALUE_IDX < 32");
+  // The index of the first bit of the value, relative to the start of 'in_buf'.
+  constexpr uint32_t FIRST_BIT = VALUE_IDX * BIT_WIDTH;
+  constexpr uint32_t IN_WORD_IDX = FIRST_BIT / LOAD_BIT_WIDTH;
+  constexpr uint32_t FIRST_BIT_OFFSET = FIRST_BIT % LOAD_BIT_WIDTH;
+  // Index of bit after last bit of this value, relative to start of IN_WORD_IDX.
+  constexpr uint32_t END_BIT_OFFSET = FIRST_BIT_OFFSET + BIT_WIDTH;
+
+  const uint32_t* in_words = reinterpret_cast<const uint32_t*>(in_buf);
+  // The lower bits of the value come from the first word.
+  const uint32_t lower_bits =
+      BIT_WIDTH > 0 ? in_words[IN_WORD_IDX] >> FIRST_BIT_OFFSET : 0U;
+  if (END_BIT_OFFSET < LOAD_BIT_WIDTH) {
+    // All bits of the value are in the first word, but we need to mask out upper bits
+    // that belong to the next value.
+    return lower_bits % (1UL << BIT_WIDTH);
+  } if (END_BIT_OFFSET == LOAD_BIT_WIDTH) {
+    // This value was exactly the uppermost bits of the first word - no masking required.
+    return lower_bits;
+  } else {
+    DCHECK_GT(END_BIT_OFFSET, LOAD_BIT_WIDTH);
+    DCHECK_LT(VALUE_IDX, 31)
+        << "Should not go down this branch for last value with no trailing bits.";
+    // Value is split between words, so grab trailing bits from the next word.
+    // Force into [0, LOAD_BIT_WIDTH) to avoid spurious shift >= width of type warning.
+    constexpr uint32_t NUM_TRAILING_BITS =
+        END_BIT_OFFSET < LOAD_BIT_WIDTH ? 0 : END_BIT_OFFSET - LOAD_BIT_WIDTH;
+    const uint32_t trailing_bits = in_words[IN_WORD_IDX + 1] % (1UL << NUM_TRAILING_BITS);
+    // Force into [0, LOAD_BIT_WIDTH) to avoid spurious shift >= width of type warning.
+    constexpr uint32_t TRAILING_BITS_SHIFT =
+        BIT_WIDTH == 32 ? 0 : (BIT_WIDTH - NUM_TRAILING_BITS);
+    return lower_bits | (trailing_bits << TRAILING_BITS_SHIFT);
+  }
+}
+
+template <typename OutType, int BIT_WIDTH>
+const uint8_t* BitPacking::Unpack32Values(
+    const uint8_t* __restrict__ in, int64_t in_bytes, OutType* __restrict__ out) {
+  static_assert(BIT_WIDTH >= 0, "BIT_WIDTH too low");
+  static_assert(BIT_WIDTH <= 32, "BIT_WIDTH > 32");
+  static_assert(
+      BIT_WIDTH <= sizeof(OutType) * CHAR_BIT, "BIT_WIDTH too high for output type");
+  constexpr int BYTES_TO_READ = BitUtil::RoundUpNumBytes(32 * BIT_WIDTH);
+  DCHECK_GE(in_bytes, BYTES_TO_READ);
+
+// Call UnpackValue for 0 <= i < 32.
+#pragma push_macro("UNPACK_VALUES_CALL")
+#define UNPACK_VALUE_CALL(ignore1, i, ignore2) \
+  out[i] = static_cast<OutType>(UnpackValue<BIT_WIDTH, i>(in));
+  BOOST_PP_REPEAT_FROM_TO(0, 32, UNPACK_VALUE_CALL, ignore);
+#pragma pop_macro("UNPACK_VALUES_CALL")
+  return in + BYTES_TO_READ;
+}
+
+template <typename OutType>
+const uint8_t* BitPacking::Unpack32Values(int bit_width, const uint8_t* __restrict__ in,
+    int64_t in_bytes, OutType* __restrict__ out) {
+  switch (bit_width) {
+    // Expand cases from 0 to 32.
+#pragma push_macro("UNPACK_VALUES_CASE")
+#define UNPACK_VALUES_CASE(ignore1, i, ignore2) \
+    case i: return Unpack32Values<OutType, i>(in, in_bytes, out);
+    BOOST_PP_REPEAT_FROM_TO(0, 33, UNPACK_VALUES_CASE, ignore);
+#pragma pop_macro("UNPACK_VALUES_CASE")
+    default: DCHECK(false); return in;
+  }
+}
+
+template <typename OutType>
+const uint8_t* BitPacking::UnpackUpTo32Values(int bit_width, const uint8_t* __restrict__ in,
+    int64_t in_bytes, int num_values, OutType* __restrict__ out) {
+  switch (bit_width) {
+    // Expand cases from 0 to 32.
+#pragma push_macro("UNPACK_VALUES_CASE")
+#define UNPACK_VALUES_CASE(ignore1, i, ignore2) \
+    case i: return UnpackUpTo32Values<OutType, i>(in, in_bytes, num_values, out);
+    BOOST_PP_REPEAT_FROM_TO(0, 33, UNPACK_VALUES_CASE, ignore);
+#pragma pop_macro("UNPACK_VALUES_CASE")
+    default: DCHECK(false); return in;
+  }
+}
+
+template <typename OutType, int BIT_WIDTH>
+const uint8_t* BitPacking::UnpackUpTo32Values(const uint8_t* __restrict__ in,
+    int64_t in_bytes, int num_values, OutType* __restrict__ out) {
+  static_assert(BIT_WIDTH >= 0, "BIT_WIDTH too low");
+  static_assert(BIT_WIDTH <= 32, "BIT_WIDTH > 32");
+  static_assert(
+      BIT_WIDTH <= sizeof(OutType) * CHAR_BIT, "BIT_WIDTH too high for output type");
+  constexpr int MAX_BATCH_SIZE = 31;
+  const int BYTES_TO_READ = BitUtil::RoundUpNumBytes(num_values * BIT_WIDTH);
+  DCHECK_GE(in_bytes, BYTES_TO_READ);
+  DCHECK_LE(num_values, MAX_BATCH_SIZE);
+
+  // Make sure the buffer is at least 1 byte.
+  constexpr int TMP_BUFFER_SIZE = BIT_WIDTH ?
+    (BIT_WIDTH * (MAX_BATCH_SIZE + 1)) / CHAR_BIT : 1;
+  uint8_t tmp_buffer[TMP_BUFFER_SIZE];
+
+  const uint8_t* in_buffer = in;
+  // Copy into padded temporary buffer to avoid reading past the end of 'in' if the
+  // last 32-bit load would go past the end of the buffer.
+  if (BitUtil::RoundUp(BYTES_TO_READ, sizeof(uint32_t)) > in_bytes) {
+    memcpy(tmp_buffer, in, BYTES_TO_READ);
+    in_buffer = tmp_buffer;
+  }
+
+  // Use switch with fall-through cases to minimise branching.
+  switch (num_values) {
+// Expand cases from 31 down to 1.
+#pragma push_macro("UNPACK_VALUES_CASE")
+#define UNPACK_VALUES_CASE(ignore1, i, ignore2) \
+  case 31 - i: out[30 - i] = \
+      static_cast<OutType>(UnpackValue<BIT_WIDTH, 30 - i>(in_buffer));
+    BOOST_PP_REPEAT_FROM_TO(0, 31, UNPACK_VALUES_CASE, ignore);
+#pragma pop_macro("UNPACK_VALUES_CASE")
+    case 0: break;
+    default: DCHECK(false);
+  }
+  return in + BYTES_TO_READ;
+}
+}
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/bit-stream-utils.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-stream-utils.h b/be/src/util/bit-stream-utils.h
index ce159cb..5acdeee 100644
--- a/be/src/util/bit-stream-utils.h
+++ b/be/src/util/bit-stream-utils.h
@@ -98,13 +98,19 @@ class BitWriter {
 class BitReader {
  public:
   /// 'buffer' is the buffer to read from.  The buffer's length is 'buffer_len'.
-  BitReader(uint8_t* buffer, int buffer_len) {
-    Reset(buffer, buffer_len);
-  }
+  /// Does not take ownership of the buffer.
+  BitReader(const uint8_t* buffer, int buffer_len) { Reset(buffer, buffer_len); }
 
   BitReader() : buffer_(NULL), max_bytes_(0) {}
 
-  void Reset(uint8_t* buffer, int buffer_len) {
+  // The implicit copy constructor is left defined. If a BitReader is copied, the
+  // two copies do not share any state. Invoking functions on either copy continues
+  // reading from the current read position without modifying the state of the other
+  // copy.
+
+  /// Resets the read to start reading from the start of 'buffer'. The buffer's
+  /// length is 'buffer_len'. Does not take ownership of the buffer.
+  void Reset(const uint8_t* buffer, int buffer_len) {
     buffer_ = buffer;
     max_bytes_ = buffer_len;
     byte_offset_ = 0;
@@ -141,7 +147,7 @@ class BitReader {
   static const int MAX_BITWIDTH = 32;
 
  private:
-  uint8_t* buffer_;
+  const uint8_t* buffer_;
   int max_bytes_;
 
   /// Bytes are memcpy'd from buffer_ and values are read from this variable. This is

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/bit-stream-utils.inline.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-stream-utils.inline.h b/be/src/util/bit-stream-utils.inline.h
index fd77974..41648e3 100644
--- a/be/src/util/bit-stream-utils.inline.h
+++ b/be/src/util/bit-stream-utils.inline.h
@@ -86,7 +86,7 @@ inline bool BitWriter::PutVlqInt(int32_t v) {
 
 template<typename T>
 inline bool BitReader::GetValue(int num_bits, T* v) {
-  DCHECK(buffer_ != NULL);
+  DCHECK(num_bits == 0 || buffer_ != NULL);
   // TODO: revisit this limit if necessary
   DCHECK_LE(num_bits, MAX_BITWIDTH);
   DCHECK_LE(num_bits, sizeof(T) * 8);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/bit-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h
index f947a17..33dd02b 100644
--- a/be/src/util/bit-util.h
+++ b/be/src/util/bit-util.h
@@ -41,17 +41,17 @@ using boost::make_unsigned;
 class BitUtil {
  public:
   /// Returns the ceil of value/divisor
-  static inline int64_t Ceil(int64_t value, int64_t divisor) {
+  constexpr static inline int64_t Ceil(int64_t value, int64_t divisor) {
     return value / divisor + (value % divisor != 0);
   }
 
   /// Returns 'value' rounded up to the nearest multiple of 'factor'
-  static inline int64_t RoundUp(int64_t value, int64_t factor) {
+  constexpr static inline int64_t RoundUp(int64_t value, int64_t factor) {
     return (value + (factor - 1)) / factor * factor;
   }
 
   /// Returns 'value' rounded down to the nearest multiple of 'factor'
-  static inline int64_t RoundDown(int64_t value, int64_t factor) {
+  constexpr static inline int64_t RoundDown(int64_t value, int64_t factor) {
     return (value / factor) * factor;
   }
 
@@ -85,34 +85,28 @@ class BitUtil {
   /// Specialized round up and down functions for frequently used factors,
   /// like 8 (bits->bytes), 32 (bits->i32), and 64 (bits->i64).
   /// Returns the rounded up number of bytes that fit the number of bits.
-  static inline uint32_t RoundUpNumBytes(uint32_t bits) {
+  constexpr static inline uint32_t RoundUpNumBytes(uint32_t bits) {
     return (bits + 7) >> 3;
   }
 
   /// Returns the rounded down number of bytes that fit the number of bits.
-  static inline uint32_t RoundDownNumBytes(uint32_t bits) {
-    return bits >> 3;
-  }
+  constexpr static inline uint32_t RoundDownNumBytes(uint32_t bits) { return bits >> 3; }
 
   /// Returns the rounded up to 32 multiple. Used for conversions of bits to i32.
-  static inline uint32_t RoundUpNumi32(uint32_t bits) {
+  constexpr static inline uint32_t RoundUpNumi32(uint32_t bits) {
     return (bits + 31) >> 5;
   }
 
   /// Returns the rounded up 32 multiple.
-  static inline uint32_t RoundDownNumi32(uint32_t bits) {
-    return bits >> 5;
-  }
+  constexpr static inline uint32_t RoundDownNumi32(uint32_t bits) { return bits >> 5; }
 
   /// Returns the rounded up to 64 multiple. Used for conversions of bits to i64.
-  static inline uint32_t RoundUpNumi64(uint32_t bits) {
+  constexpr static inline uint32_t RoundUpNumi64(uint32_t bits) {
     return (bits + 63) >> 6;
   }
 
   /// Returns the rounded down to 64 multiple.
-  static inline uint32_t RoundDownNumi64(uint32_t bits) {
-    return bits >> 6;
-  }
+  constexpr static inline uint32_t RoundDownNumi64(uint32_t bits) { return bits >> 6; }
 
   /// Non hw accelerated pop count.
   /// TODO: we don't use this in any perf sensitive code paths currently.  There
@@ -172,51 +166,51 @@ class BitUtil {
   /// swap for len > 16.
   static void ByteSwap(void* dest, const void* source, int len);
 
-  /// Converts to big endian format (if not already in big endian) from the
-  /// machine's native endian format.
+/// Converts to big endian format (if not already in big endian) from the
+/// machine's native endian format.
 #if __BYTE_ORDER == __LITTLE_ENDIAN
-  static inline int64_t  ToBigEndian(int64_t value)  { return ByteSwap(value); }
+  static inline int64_t ToBigEndian(int64_t value) { return ByteSwap(value); }
   static inline uint64_t ToBigEndian(uint64_t value) { return ByteSwap(value); }
-  static inline int32_t  ToBigEndian(int32_t value)  { return ByteSwap(value); }
+  static inline int32_t ToBigEndian(int32_t value) { return ByteSwap(value); }
   static inline uint32_t ToBigEndian(uint32_t value) { return ByteSwap(value); }
-  static inline int16_t  ToBigEndian(int16_t value)  { return ByteSwap(value); }
+  static inline int16_t ToBigEndian(int16_t value) { return ByteSwap(value); }
   static inline uint16_t ToBigEndian(uint16_t value) { return ByteSwap(value); }
 #else
-  static inline int64_t  ToBigEndian(int64_t val)  { return val; }
+  static inline int64_t ToBigEndian(int64_t val) { return val; }
   static inline uint64_t ToBigEndian(uint64_t val) { return val; }
-  static inline int32_t  ToBigEndian(int32_t val)  { return val; }
+  static inline int32_t ToBigEndian(int32_t val) { return val; }
   static inline uint32_t ToBigEndian(uint32_t val) { return val; }
-  static inline int16_t  ToBigEndian(int16_t val)  { return val; }
+  static inline int16_t ToBigEndian(int16_t val) { return val; }
   static inline uint16_t ToBigEndian(uint16_t val) { return val; }
 #endif
 
-  /// Converts from big endian format to the machine's native endian format.
+/// Converts from big endian format to the machine's native endian format.
 #if __BYTE_ORDER == __LITTLE_ENDIAN
-  static inline int64_t  FromBigEndian(int64_t value)  { return ByteSwap(value); }
+  static inline int64_t FromBigEndian(int64_t value) { return ByteSwap(value); }
   static inline uint64_t FromBigEndian(uint64_t value) { return ByteSwap(value); }
-  static inline int32_t  FromBigEndian(int32_t value)  { return ByteSwap(value); }
+  static inline int32_t FromBigEndian(int32_t value) { return ByteSwap(value); }
   static inline uint32_t FromBigEndian(uint32_t value) { return ByteSwap(value); }
-  static inline int16_t  FromBigEndian(int16_t value)  { return ByteSwap(value); }
+  static inline int16_t FromBigEndian(int16_t value) { return ByteSwap(value); }
   static inline uint16_t FromBigEndian(uint16_t value) { return ByteSwap(value); }
 #else
-  static inline int64_t  FromBigEndian(int64_t val)  { return val; }
+  static inline int64_t FromBigEndian(int64_t val) { return val; }
   static inline uint64_t FromBigEndian(uint64_t val) { return val; }
-  static inline int32_t  FromBigEndian(int32_t val)  { return val; }
+  static inline int32_t FromBigEndian(int32_t val) { return val; }
   static inline uint32_t FromBigEndian(uint32_t val) { return val; }
-  static inline int16_t  FromBigEndian(int16_t val)  { return val; }
+  static inline int16_t FromBigEndian(int16_t val) { return val; }
   static inline uint16_t FromBigEndian(uint16_t val) { return val; }
 #endif
 
   /// Returns true if 'value' is a non-negative 32-bit integer.
-  static inline bool IsNonNegative32Bit(int64_t value) {
+  constexpr static inline bool IsNonNegative32Bit(int64_t value) {
     return static_cast<uint64_t>(value) <= std::numeric_limits<int32_t>::max();
   }
 
   /// Logical right shift for signed integer types
   /// This is needed because the C >> operator does arithmetic right shift
   /// Negative shift amounts lead to undefined behavior
-  template<typename T>
-  static T ShiftRightLogical(T v, int shift) {
+  template <typename T>
+  constexpr static T ShiftRightLogical(T v, int shift) {
     // Conversion to unsigned ensures most significant bits always filled with 0's
     return static_cast<typename make_unsigned<T>::type>(v) >> shift;
   }
@@ -230,15 +224,15 @@ class BitUtil {
 
   /// Set a specific bit to 1
   /// Behavior when bitpos is negative is undefined
-  template<typename T>
-  static T SetBit(T v, int bitpos) {
+  template <typename T>
+  constexpr static T SetBit(T v, int bitpos) {
     return v | (static_cast<T>(0x1) << bitpos);
   }
 
   /// Set a specific bit to 0
   /// Behavior when bitpos is negative is undefined
-  template<typename T>
-  static T UnsetBit(T v, int bitpos) {
+  template <typename T>
+  constexpr static T UnsetBit(T v, int bitpos) {
     return v & ~(static_cast<T>(0x1) << bitpos);
   }
 };

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07da7679/be/src/util/openssl-util-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/openssl-util-test.cc b/be/src/util/openssl-util-test.cc
index b0238bf..ef1b28e 100644
--- a/be/src/util/openssl-util-test.cc
+++ b/be/src/util/openssl-util-test.cc
@@ -15,17 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <random>
+
 #include <gtest/gtest.h>
 #include <openssl/rand.h>
-#include <boost/random/mersenne_twister.hpp>
-#include <boost/random/uniform_int.hpp>
 
 #include "common/init.h"
 #include "testutil/gtest-util.h"
 #include "util/openssl-util.h"
 
-using boost::uniform_int;
-using boost::mt19937_64;
+using std::uniform_int_distribution;
+using std::mt19937_64;
 
 namespace impala {
 
@@ -40,7 +40,7 @@ class OpenSSLUtilTest : public ::testing::Test {
     DCHECK_EQ(len % 8, 0);
     for (int64_t i = 0; i < len; i += sizeof(uint64_t)) {
       *(reinterpret_cast<uint64_t*>(&data[i])) =
-          uniform_int<uint64_t>(0, numeric_limits<uint64_t>::max())(rng_);
+          uniform_int_distribution<uint64_t>(0, numeric_limits<uint64_t>::max())(rng_);
     }
   }
 


[3/7] incubator-impala git commit: IMPALA-4299: add buildall.sh option to start test cluster

Posted by he...@apache.org.
IMPALA-4299: add buildall.sh option to start test cluster

A previous commit "IMPALA-4259: build Impala without any test
cluster setup" altered some undocumented side-effects of
buildall.sh.

Previously the following commands reconfigured and restarted the test
cluster. It worked because buildall.sh unconditionally regenerated
the test cluster configs.

  ./buildall.sh -notests && ./testdata/bin/run-all.sh
  ./buildall.sh -noclean -notests && ./testdata/bin/run-all.sh

Instead of restoring the old behaviour and continuing to encourage
mixing use of low and high level scripts like testdata/bin/run-all.sh
as part of the "standard" workflow, this commit adds another
high-level option to buildall.sh, -start_minicluster, that
accomplishes the high-level task of restarting a minicluster with
fresh configs. The above commands can be replaced with:

  ./buildall.sh -notests -start_minicluster
  ./buildall.sh -notests -noclean -start_minicluster

Change-Id: I0ab3461f8ff3de49b3f28a0dc22fa0a6d5569da5
Reviewed-on: http://gerrit.cloudera.org:8080/4734
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ef762b73
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ef762b73
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ef762b73

Branch: refs/heads/master
Commit: ef762b73a1ddb8842bb5bac5bfed733b9a71fc1b
Parents: 0480253
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Mon Oct 17 10:04:39 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Mon Oct 17 22:19:06 2016 +0000

----------------------------------------------------------------------
 buildall.sh | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ef762b73/buildall.sh
----------------------------------------------------------------------
diff --git a/buildall.sh b/buildall.sh
index ebd3eb3..a7858a3 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -46,6 +46,7 @@ TESTS_ACTION=1
 FORMAT_CLUSTER=0
 FORMAT_METASTORE=0
 FORMAT_SENTRY_POLICY_DB=0
+NEED_MINICLUSTER=0
 START_IMPALA_CLUSTER=0
 IMPALA_KERBERIZE=0
 SNAPSHOT_FILE=
@@ -133,6 +134,9 @@ do
       METASTORE_SNAPSHOT_FILE="$(readlink -f "$METASTORE_SNAPSHOT_FILE")"
       shift;
       ;;
+    -start_minicluster)
+      NEED_MINICLUSTER=1
+      ;;
     -start_impala_cluster)
       START_IMPALA_CLUSTER=1
       ;;
@@ -169,6 +173,10 @@ do
       echo "[-asan] : Address sanitizer build [Default: False]"
       echo "[-skiptests] : Skips execution of all tests"
       echo "[-notests] : Skips building and execution of all tests"
+      echo "[-start_minicluster] : Start test cluster including Impala and all"\
+            " its dependencies. If already running, all services are restarted."\
+            " Regenerates test cluster config files. [Default: True if running "\
+            " tests or loading data, False otherwise]"
       echo "[-start_impala_cluster] : Start Impala minicluster after build"\
            " [Default: False]"
       echo "[-testpairwise] : Run tests in 'pairwise' mode (increases"\
@@ -191,8 +199,12 @@ Examples of common tasks:
   # Build and skip tests
   ./buildall.sh -skiptests
 
-  # Incrementally rebuild and skip tests. Keeps existing Hadoop services running.
-  ./buildall.sh -skiptests -noclean
+  # Build, then restart the minicluster and Impala with fresh configs.
+  ./buildall.sh -notests -start_minicluster -start_impala_cluster
+
+  # Incrementally rebuild and skip tests. Keeps existing minicluster services running
+  # and restart Impala.
+  ./buildall.sh -skiptests -noclean -start_impala_cluster
 
   # Build, load a snapshot file, run tests
   ./buildall.sh -snapshot_file <file>
@@ -256,7 +268,6 @@ if [[ -z "$METASTORE_SNAPSHOT_FILE" && "${TARGET_FILESYSTEM}" != "hdfs" &&
   exit 1
 fi
 
-NEED_MINICLUSTER=0
 if [[ $TESTS_ACTION -eq 1 || $TESTDATA_ACTION -eq 1 || $FORMAT_CLUSTER -eq 1 ||
       $FORMAT_METASTORE -eq 1 || $FORMAT_SENTRY_POLICY_DB -eq 1 || -n "$SNAPSHOT_FILE" ||
       -n "$METASTORE_SNAPSHOT_FILE" ]]; then