You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by kw...@apache.org on 2016/09/27 21:38:48 UTC
[2/2] incubator-impala git commit: IMPALA-4008: Don't bake ExprContext pointers into IR code

IMPALA-4008: Don't bake ExprContext pointers into IR code

To allow genearated code to be shared across multiple fragment
instances, this change removes the ExprContext pointers baked
into various IR functions (e.g. AGG/PAGG/hash-table).

Change-Id: I42039eed803a39fa716b9ed647510b6440974ae5
Reviewed-on: http://gerrit.cloudera.org:8080/4390
Reviewed-by: Michael Ho <kw...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6cc296ec
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6cc296ec
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6cc296ec

Branch: refs/heads/master
Commit: 6cc296ec85a4260446333f89b1f5df0d7bd1ec95
Parents: 4849e58
Author: Michael Ho <kw...@cloudera.com>
Authored: Fri Sep 9 00:56:46 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Sep 27 20:20:17 2016 +0000

----------------------------------------------------------------------
 be/src/codegen/gen_ir_descriptions.py          |   6 +
 be/src/codegen/llvm-codegen.cc                 |   4 +
 be/src/codegen/llvm-codegen.h                  |   3 +
 be/src/exec/aggregation-node-ir.cc             |   8 +
 be/src/exec/aggregation-node.cc                | 180 +++++++++++-------
 be/src/exec/aggregation-node.h                 |  20 +-
 be/src/exec/hash-table-ir.cc                   |   4 +
 be/src/exec/hash-table.cc                      |  50 +++--
 be/src/exec/hash-table.h                       |  26 ++-
 be/src/exec/partitioned-aggregation-node-ir.cc |   8 +-
 be/src/exec/partitioned-aggregation-node.cc    | 192 ++++++++++++--------
 be/src/exec/partitioned-aggregation-node.h     |  19 +-
 be/src/exprs/agg-fn-evaluator.h                |   4 +-
 13 files changed, 349 insertions(+), 175 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/codegen/gen_ir_descriptions.py
----------------------------------------------------------------------
diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py
index 9ba9d78..a12d73d 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -44,6 +44,8 @@ options, args = parser.parse_args()
 ir_functions = [
   ["AGG_NODE_PROCESS_ROW_BATCH_WITH_GROUPING", "ProcessRowBatchWithGrouping"],
   ["AGG_NODE_PROCESS_ROW_BATCH_NO_GROUPING", "ProcessRowBatchNoGrouping"],
+  ["AGG_NODE_GET_EXPR_CTX", "GetAggExprCtx"],
+  ["AGG_NODE_GET_FN_CTX", "GetAggFnCtx"],
   ["PART_AGG_NODE_PROCESS_BATCH_UNAGGREGATED",
       "PartitionedAggregationNode12ProcessBatchILb0"],
   ["PART_AGG_NODE_PROCESS_BATCH_AGGREGATED",
@@ -52,6 +54,8 @@ ir_functions = [
       "PartitionedAggregationNode22ProcessBatchNoGrouping"],
   ["PART_AGG_NODE_PROCESS_BATCH_STREAMING",
       "PartitionedAggregationNode21ProcessBatchStreaming"],
+  ["PART_AGG_NODE_GET_EXPR_CTX",
+      "PartitionedAggregationNode17GetAggExprContext"],
   ["AVG_UPDATE_BIGINT", "9AvgUpdateIN10impala_udf9BigIntVal"],
   ["AVG_UPDATE_DOUBLE", "9AvgUpdateIN10impala_udf9DoubleVal"],
   ["AVG_UPDATE_TIMESTAMP", "TimestampAvgUpdate"],
@@ -89,6 +93,8 @@ ir_functions = [
   ["PHJ_PROCESS_PROBE_BATCH_FULL_OUTER_JOIN", "ProcessProbeBatchILi8"],
   ["PHJ_INSERT_BATCH", "9Partition11InsertBatch"],
   ["HASH_TABLE_GET_HASH_SEED", "GetHashSeed"],
+  ["HASH_TABLE_GET_BUILD_EXPR_CTX", "HashTableCtx15GetBuildExprCtx"],
+  ["HASH_TABLE_GET_PROBE_EXPR_CTX", "HashTableCtx15GetProbeExprCtx"],
   ["HLL_UPDATE_BOOLEAN", "HllUpdateIN10impala_udf10BooleanVal"],
   ["HLL_UPDATE_TINYINT", "HllUpdateIN10impala_udf10TinyIntVal"],
   ["HLL_UPDATE_SMALLINT", "HllUpdateIN10impala_udf11SmallIntVal"],

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/codegen/llvm-codegen.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index b2a3b18..b107c51 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -581,6 +581,10 @@ PointerType* LlvmCodeGen::GetPtrType(Type* type) {
   return PointerType::get(type, 0);
 }
 
+PointerType* LlvmCodeGen::GetPtrPtrType(Type* type) {
+  return PointerType::get(PointerType::get(type, 0), 0);
+}
+
 // Llvm doesn't let you create a PointerValue from a c-side ptr.  Instead
 // cast it to an int and then to 'type'.
 Value* LlvmCodeGen::CastPtrToLlvmPtr(Type* type, const void* ptr) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/codegen/llvm-codegen.h
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h
index fa9f1b1..2ef936f 100644
--- a/be/src/codegen/llvm-codegen.h
+++ b/be/src/codegen/llvm-codegen.h
@@ -218,6 +218,9 @@ class LlvmCodeGen {
   /// Return a pointer type to 'type'
   llvm::PointerType* GetPtrType(llvm::Type* type);
 
+  /// Return a pointer to pointer type to 'type'.
+  llvm::PointerType* GetPtrPtrType(llvm::Type* type);
+
   /// Returns llvm type for the column type
   llvm::Type* GetType(const ColumnType& type);
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/exec/aggregation-node-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/aggregation-node-ir.cc b/be/src/exec/aggregation-node-ir.cc
index 185196a..8050b58 100644
--- a/be/src/exec/aggregation-node-ir.cc
+++ b/be/src/exec/aggregation-node-ir.cc
@@ -28,6 +28,14 @@ using namespace impala;
 // Functions in this file are cross compiled to IR with clang.  These functions
 // are modified at runtime with a query specific codegen'd UpdateAggTuple
 
+FunctionContext* AggregationNode::GetAggFnCtx(int i) const {
+  return agg_fn_ctxs_[i];
+}
+
+ExprContext* AggregationNode::GetAggExprCtx(int i) const {
+  return agg_expr_ctxs_[i];
+}
+
 void AggregationNode::ProcessRowBatchNoGrouping(RowBatch* batch) {
   for (int i = 0; i < batch->num_rows(); ++i) {
     UpdateTuple(singleton_intermediate_tuple_, batch->GetRow(i));

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/exec/aggregation-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/aggregation-node.cc b/be/src/exec/aggregation-node.cc
index 2b9550a..909d42b 100644
--- a/be/src/exec/aggregation-node.cc
+++ b/be/src/exec/aggregation-node.cc
@@ -81,6 +81,18 @@ Status AggregationNode::Init(const TPlanNode& tnode, RuntimeState* state) {
     RETURN_IF_ERROR(AggFnEvaluator::Create(
         pool_, tnode.agg_node.aggregate_functions[i], &evaluator));
     aggregate_evaluators_.push_back(evaluator);
+    ExprContext* agg_expr_ctx;
+    if (evaluator->input_expr_ctxs().size() == 1) {
+      agg_expr_ctx = evaluator->input_expr_ctxs()[0];
+    } else {
+      // CodegenUpdateSlot() can only support aggregate operator with only one ExprContext
+      // so it doesn't support operator such as group_concat. There are also aggregate
+      // operators with no ExprContext (e.g. count(*)). In cases above, 'agg_expr_ctxs_'
+      // will contain NULL for that entry.
+      DCHECK(evaluator->agg_op() == AggFnEvaluator::OTHER || evaluator->is_count_star());
+      agg_expr_ctx = NULL;
+    }
+    agg_expr_ctxs_.push_back(agg_expr_ctx);
   }
   return Status::OK();
 }
@@ -302,6 +314,7 @@ void AggregationNode::Close(RuntimeState* state) {
   if (tuple_pool_.get() != NULL) tuple_pool_->FreeAll();
   if (hash_tbl_.get() != NULL) hash_tbl_->Close();
 
+  agg_expr_ctxs_.clear();
   DCHECK(agg_fn_ctxs_.empty() || aggregate_evaluators_.size() == agg_fn_ctxs_.size());
   for (int i = 0; i < aggregate_evaluators_.size(); ++i) {
     aggregate_evaluators_[i]->Close(state);
@@ -432,23 +445,26 @@ IRFunction::Type GetHllUpdateFunction2(const ColumnType& type) {
 }
 
 // IR Generation for updating a single aggregation slot. Signature is:
-// void UpdateSlot(FunctionContext* fn_ctx, AggTuple* agg_tuple, char** row)
+// void UpdateSlot(FunctionContext* fn_ctx, ExprContext* expr_ctx,
+//     AggTuple* agg_tuple, char** row)
 //
 // The IR for sum(double_col) is:
 // define void @UpdateSlot(%"class.impala_udf::FunctionContext"* %fn_ctx,
-//                         { i8, double }* %agg_tuple,
-//                         %"class.impala::TupleRow"* %row) #20 {
+//                         %"class.impala::ExprContext"* %expr_ctx,
+//                         { i8, [7 x i8], double }* %agg_tuple,
+//                         %"class.impala::TupleRow"* %row) #34 {
 // entry:
-//   %src = call { i8, double } @GetSlotRef(%"class.impala::ExprContext"* inttoptr
-//     (i64 128241264 to %"class.impala::ExprContext"*), %"class.impala::TupleRow"* %row)
+//   %src = call { i8, double } @GetSlotRef(%"class.impala::ExprContext"* %expr_ctx,
+//                                          %"class.impala::TupleRow"* %row)
 //   %0 = extractvalue { i8, double } %src, 0
 //   %is_null = trunc i8 %0 to i1
 //   br i1 %is_null, label %ret, label %src_not_null
 //
 // src_not_null:                                     ; preds = %entry
-//   %dst_slot_ptr = getelementptr inbounds { i8, double }* %agg_tuple, i32 0, i32 1
-//   call void @SetNotNull({ i8, double }* %agg_tuple)
-//   %dst_val = load double* %dst_slot_ptr
+//   %dst_slot_ptr = getelementptr inbounds { i8, [7 x i8], double },
+//       { i8, [7 x i8], double }* %agg_tuple, i32 0, i32 2
+//   call void @SetNotNull({ i8, [7 x i8], double }* %agg_tuple)
+//   %dst_val = load double, double* %dst_slot_ptr
 //   %val = extractvalue { i8, double } %src, 1
 //   %1 = fadd double %dst_val, %val
 //   store double %1, double* %dst_slot_ptr
@@ -460,25 +476,27 @@ IRFunction::Type GetHllUpdateFunction2(const ColumnType& type) {
 //
 // The IR for ndv(double_col) is:
 // define void @UpdateSlot(%"class.impala_udf::FunctionContext"* %fn_ctx,
-//                         { i8, %"struct.impala::StringValue" }* %agg_tuple,
-//                         %"class.impala::TupleRow"* %row) #20 {
+//                         %"class.impala::ExprContext"* %expr_ctx,
+//                         { i8, [7 x i8], %"struct.impala::StringValue" }* %agg_tuple,
+//                         %"class.impala::TupleRow"* %row) #34 {
 // entry:
 //   %dst_lowered_ptr = alloca { i64, i8* }
-//   %src_lowered_ptr = alloca { i8, double }
-//   %src = call { i8, double } @GetSlotRef(%"class.impala::ExprContext"* inttoptr
-//     (i64 120530832 to %"class.impala::ExprContext"*), %"class.impala::TupleRow"* %row)
-//   %0 = extractvalue { i8, double } %src, 0
-//   %is_null = trunc i8 %0 to i1
+//   %src_lowered_ptr = alloca { i64, i8* }
+//   %src = call { i64, i8* } @GetSlotRef(%"class.impala::ExprContext"* %expr_ctx,
+//                                        %"class.impala::TupleRow"* %row)
+//   %0 = extractvalue { i64, i8* } %src, 0
+//   %is_null = trunc i64 %0 to i1
 //   br i1 %is_null, label %ret, label %src_not_null
 //
 // src_not_null:                                     ; preds = %entry
-//   %dst_slot_ptr = getelementptr inbounds
-//     { i8, %"struct.impala::StringValue" }* %agg_tuple, i32 0, i32 1
-//   call void @SetNotNull({ i8, %"struct.impala::StringValue" }* %agg_tuple)
-//   %dst_val = load %"struct.impala::StringValue"* %dst_slot_ptr
-//   store { i8, double } %src, { i8, double }* %src_lowered_ptr
-//   %src_unlowered_ptr = bitcast { i8, double }* %src_lowered_ptr
-//                        to %"struct.impala_udf::DoubleVal"*
+//   %dst_slot_ptr = getelementptr inbounds { i8, [7 x i8], %"struct.impala::StringValue" },
+//       { i8, [7 x i8], %"struct.impala::StringValue" }* %agg_tuple, i32 0, i32 2
+//   call void @SetNotNull({ i8, [7 x i8], %"struct.impala::StringValue" }* %agg_tuple)
+//   %dst_val =
+//       load %"struct.impala::StringValue", %"struct.impala::StringValue"* %dst_slot_ptr
+//   store { i64, i8* } %src, { i64, i8* }* %src_lowered_ptr
+//   %src_unlowered_ptr =
+//       bitcast { i64, i8* }* %src_lowered_ptr to %"struct.impala_udf::StringVal"*
 //   %ptr = extractvalue %"struct.impala::StringValue" %dst_val, 0
 //   %dst_stringval = insertvalue { i64, i8* } zeroinitializer, i8* %ptr, 1
 //   %len = extractvalue %"struct.impala::StringValue" %dst_val, 1
@@ -489,18 +507,18 @@ IRFunction::Type GetHllUpdateFunction2(const ColumnType& type) {
 //   %5 = or i64 %4, %3
 //   %dst_stringval1 = insertvalue { i64, i8* } %dst_stringval, i64 %5, 0
 //   store { i64, i8* } %dst_stringval1, { i64, i8* }* %dst_lowered_ptr
-//   %dst_unlowered_ptr = bitcast { i64, i8* }* %dst_lowered_ptr
-//                        to %"struct.impala_udf::StringVal"*
-//   call void @HllUpdate(%"class.impala_udf::FunctionContext"* %fn_ctx,
-//                        %"struct.impala_udf::DoubleVal"* %src_unlowered_ptr,
-//                        %"struct.impala_udf::StringVal"* %dst_unlowered_ptr)
-//   %anyval_result = load { i64, i8* }* %dst_lowered_ptr
-//   %6 = extractvalue { i64, i8* } %anyval_result, 1
-//   %7 = insertvalue %"struct.impala::StringValue" zeroinitializer, i8* %6, 0
-//   %8 = extractvalue { i64, i8* } %anyval_result, 0
-//   %9 = ashr i64 %8, 32
-//   %10 = trunc i64 %9 to i32
-//   %11 = insertvalue %"struct.impala::StringValue" %7, i32 %10, 1
+//   %dst_unlowered_ptr =
+//       bitcast { i64, i8* }* %dst_lowered_ptr to %"struct.impala_udf::StringVal"*
+//   call void @HllMerge(%"class.impala_udf::FunctionContext"* %fn_ctx,
+//                       %"struct.impala_udf::StringVal"* %src_unlowered_ptr,
+//                       %"struct.impala_udf::StringVal"* %dst_unlowered_ptr)
+//   %anyval_result = load { i64, i8* }, { i64, i8* }* %dst_lowered_ptr
+//   %6 = extractvalue { i64, i8* } %anyval_result, 0
+//   %7 = ashr i64 %6, 32
+//   %8 = trunc i64 %7 to i32
+//   %9 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %8, 1
+//   %10 = extractvalue { i64, i8* } %anyval_result, 1
+//   %11 = insertvalue %"struct.impala::StringValue" %9, i8* %10, 0
 //   store %"struct.impala::StringValue" %11, %"struct.impala::StringValue"* %dst_slot_ptr
 //   br label %ret
 //
@@ -512,6 +530,8 @@ llvm::Function* AggregationNode::CodegenUpdateSlot(
   LlvmCodeGen* codegen;
   if (!state->GetCodegen(&codegen).ok()) return NULL;
 
+  // TODO: Fix this DCHECK and Init() once CodegenUpdateSlot() can handle AggFnEvaluator
+  // with multiple input expressions (e.g. group_concat).
   DCHECK_EQ(evaluator->input_expr_ctxs().size(), 1);
   ExprContext* input_expr_ctx = evaluator->input_expr_ctxs()[0];
   Expr* input_expr = input_expr_ctx->root();
@@ -525,33 +545,34 @@ llvm::Function* AggregationNode::CodegenUpdateSlot(
   }
   DCHECK(agg_expr_fn != NULL);
 
-  PointerType* fn_ctx_type =
+  PointerType* fn_ctx_ptr_type =
       codegen->GetPtrType(FunctionContextImpl::LLVM_FUNCTIONCONTEXT_NAME);
+  PointerType* expr_ctx_ptr_type = codegen->GetPtrType(ExprContext::LLVM_CLASS_NAME);
   StructType* tuple_struct = intermediate_tuple_desc_->GetLlvmStruct(codegen);
-  PointerType* tuple_ptr_type = PointerType::get(tuple_struct, 0);
+  PointerType* tuple_ptr_type = codegen->GetPtrType(tuple_struct);
   PointerType* tuple_row_ptr_type = codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME);
 
   // Create UpdateSlot prototype
   LlvmCodeGen::FnPrototype prototype(codegen, "UpdateSlot", codegen->void_type());
-  prototype.AddArgument(LlvmCodeGen::NamedVariable("fn_ctx", fn_ctx_type));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("fn_ctx", fn_ctx_ptr_type));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_ctx", expr_ctx_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_tuple", tuple_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
 
   LlvmCodeGen::LlvmBuilder builder(codegen->context());
-  Value* args[3];
+  Value* args[4];
   Function* fn = prototype.GeneratePrototype(&builder, &args[0]);
   Value* fn_ctx_arg = args[0];
-  Value* agg_tuple_arg = args[1];
-  Value* row_arg = args[2];
+  Value* expr_ctx_arg = args[1];
+  Value* agg_tuple_arg = args[2];
+  Value* row_arg = args[3];
 
   BasicBlock* src_not_null_block =
       BasicBlock::Create(codegen->context(), "src_not_null", fn);
   BasicBlock* ret_block = BasicBlock::Create(codegen->context(), "ret", fn);
 
   // Call expr function to get src slot value
-  Value* ctx_arg = codegen->CastPtrToLlvmPtr(
-      codegen->GetPtrType(ExprContext::LLVM_CLASS_NAME), input_expr_ctx);
-  Value* agg_expr_fn_args[] = { ctx_arg, row_arg };
+  Value* agg_expr_fn_args[] = { expr_ctx_arg, row_arg };
   CodegenAnyVal src = CodegenAnyVal::CreateCallWrapped(
       codegen, &builder, input_expr->type(), agg_expr_fn, agg_expr_fn_args, "src");
 
@@ -660,23 +681,38 @@ llvm::Function* AggregationNode::CodegenUpdateSlot(
 // For the query:
 // select count(*), count(int_col), sum(double_col) the IR looks like:
 //
+// ; Function Attrs: alwaysinline
 // define void @UpdateTuple(%"class.impala::AggregationNode"* %this_ptr,
 //                          %"class.impala::Tuple"* %agg_tuple,
-//                          %"class.impala::TupleRow"* %tuple_row) #20 {
+//                          %"class.impala::TupleRow"* %tuple_row) #34 {
 // entry:
-//   %tuple = bitcast %"class.impala::Tuple"* %agg_tuple to { i8, i64, i64, double }*
-//   %src_slot = getelementptr inbounds { i8, i64, i64, double }* %tuple, i32 0, i32 1
-//   %count_star_val = load i64* %src_slot
+//   %tuple =
+//       bitcast %"class.impala::Tuple"* %agg_tuple to { i8, [7 x i8], i64, i64, double }*
+//   %src_slot = getelementptr inbounds { i8, [7 x i8], i64, i64, double },
+//       { i8, [7 x i8], i64, i64, double }* %tuple, i32 0, i32 2
+//   %count_star_val = load i64, i64* %src_slot
 //   %count_star_inc = add i64 %count_star_val, 1
 //   store i64 %count_star_inc, i64* %src_slot
-//   call void @UpdateSlot(%"class.impala_udf::FunctionContext"* inttoptr
-//                           (i64 44521296 to %"class.impala_udf::FunctionContext"*),
-//                         { i8, i64, i64, double }* %tuple,
+//   %0 = call %"class.impala_udf::FunctionContext"*
+//       @_ZNK6impala15AggregationNode11GetAggFnCtxEi(
+//           %"class.impala::AggregationNode"* %this_ptr, i32 1)
+//   %1 = call %"class.impala::ExprContext"*
+//       @_ZNK6impala15AggregationNode13GetAggExprCtxEi(
+//           %"class.impala::AggregationNode"* %this_ptr, i32 1)
+//   call void @UpdateSlot(%"class.impala_udf::FunctionContext"* %0,
+//                         %"class.impala::ExprContext"* %1,
+//                         { i8, [7 x i8], i64, i64, double }* %tuple,
 //                         %"class.impala::TupleRow"* %tuple_row)
-//   call void @UpdateSlot5(%"class.impala_udf::FunctionContext"* inttoptr
-//                            (i64 44521328 to %"class.impala_udf::FunctionContext"*),
-//                          { i8, i64, i64, double }* %tuple,
-//                          %"class.impala::TupleRow"* %tuple_row)
+//   %2 = call %"class.impala_udf::FunctionContext"*
+//       @_ZNK6impala15AggregationNode11GetAggFnCtxEi(
+//           %"class.impala::AggregationNode"* %this_ptr, i32 2)
+//   %3 = call %"class.impala::ExprContext"*
+//       @_ZNK6impala15AggregationNode13GetAggExprCtxEi(
+//           %"class.impala::AggregationNode"* %this_ptr, i32 2)
+//   call void @UpdateSlot.3(%"class.impala_udf::FunctionContext"* %2,
+//                           %"class.impala::ExprContext"* %3,
+//                           { i8, [7 x i8], i64, i64, double }* %tuple,
+//                           %"class.impala::TupleRow"* %tuple_row)
 //   ret void
 // }
 Function* AggregationNode::CodegenUpdateTuple(RuntimeState* state) {
@@ -721,12 +757,13 @@ Function* AggregationNode::CodegenUpdateTuple(RuntimeState* state) {
   DCHECK(agg_tuple_type != NULL);
   DCHECK(tuple_row_type != NULL);
 
-  PointerType* agg_node_ptr_type = PointerType::get(agg_node_type, 0);
-  PointerType* agg_tuple_ptr_type = PointerType::get(agg_tuple_type, 0);
-  PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0);
+  PointerType* agg_node_ptr_type = codegen->GetPtrType(agg_node_type);
+  PointerType* agg_tuple_ptr_type = codegen->GetPtrType(agg_tuple_type);
+  PointerType* tuple_row_ptr_type = codegen->GetPtrType(tuple_row_type);
 
   // Signature for UpdateTuple is
-  // void UpdateTuple(AggregationNode* this, Tuple* tuple, TupleRow* row)
+  // void UpdateTuple(AggregationNode* this, FunctionContext** fn_ctx,
+  //     ExprContext** expr_ctx, Tuple* tuple, TupleRow* row)
   // This signature needs to match the non-codegen'd signature exactly.
   StructType* tuple_struct = intermediate_tuple_desc_->GetLlvmStruct(codegen);
   PointerType* tuple_ptr = PointerType::get(tuple_struct, 0);
@@ -741,7 +778,15 @@ Function* AggregationNode::CodegenUpdateTuple(RuntimeState* state) {
 
   // Cast the parameter types to the internal llvm runtime types.
   // TODO: get rid of this by using right type in function signature
-  args[1] = builder.CreateBitCast(args[1], tuple_ptr, "tuple");
+  Value* this_arg = args[0];
+  Value* agg_tuple_arg = builder.CreateBitCast(args[1], tuple_ptr, "tuple");
+  Value* row_arg = args[2];
+
+  Function* get_fn_ctx_fn = codegen->GetFunction(IRFunction::AGG_NODE_GET_FN_CTX, false);
+  DCHECK(get_fn_ctx_fn != NULL);
+  Function* get_expr_ctx_fn =
+      codegen->GetFunction(IRFunction::AGG_NODE_GET_EXPR_CTX, false);
+  DCHECK(get_expr_ctx_fn != NULL);
 
   // Loop over each expr and generate the IR for that slot.  If the expr is not
   // count(*), generate a helper IR function to update the slot and call that.
@@ -754,18 +799,23 @@ Function* AggregationNode::CodegenUpdateTuple(RuntimeState* state) {
       // increment the slot by the number of rows in the batch.
       int field_idx = slot_desc->llvm_field_idx();
       Value* const_one = codegen->GetIntConstant(TYPE_BIGINT, 1);
-      Value* slot_ptr = builder.CreateStructGEP(NULL, args[1], field_idx, "src_slot");
+      Value* slot_ptr = builder.CreateStructGEP(NULL, agg_tuple_arg, field_idx,
+          "src_slot");
       Value* slot_loaded = builder.CreateLoad(slot_ptr, "count_star_val");
       Value* count_inc = builder.CreateAdd(slot_loaded, const_one, "count_star_inc");
       builder.CreateStore(count_inc, slot_ptr);
     } else {
       Function* update_slot_fn = CodegenUpdateSlot(state, evaluator, slot_desc);
       if (update_slot_fn == NULL) return NULL;
-      Value* fn_ctx_arg = codegen->CastPtrToLlvmPtr(
-          codegen->GetPtrType(FunctionContextImpl::LLVM_FUNCTIONCONTEXT_NAME),
-          agg_fn_ctxs_[i]);
-      builder.CreateCall(update_slot_fn,
-          ArrayRef<Value*>({fn_ctx_arg, args[1], args[2]}));
+      // Call GetAggFnCtx() to get the function context.
+      Value* get_fn_ctx_args[] = { this_arg, codegen->GetIntConstant(TYPE_INT, i) };
+      Value* fn_ctx = builder.CreateCall(get_fn_ctx_fn, get_fn_ctx_args);
+      // Call GetAggExprCtx() to get the expression context.
+      DCHECK(agg_expr_ctxs_[i] != NULL);
+      Value* get_expr_ctx_args[] = { this_arg, codegen->GetIntConstant(TYPE_INT, i) };
+      Value* expr_ctx = builder.CreateCall(get_expr_ctx_fn, get_expr_ctx_args);
+      Value* update_slot_args[] = { fn_ctx, expr_ctx, agg_tuple_arg, row_arg };
+      builder.CreateCall(update_slot_fn, update_slot_args);
     }
   }
   builder.CreateRetVoid();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/exec/aggregation-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/aggregation-node.h b/be/src/exec/aggregation-node.h
index eaaf97c..5d87d82 100644
--- a/be/src/exec/aggregation-node.h
+++ b/be/src/exec/aggregation-node.h
@@ -69,12 +69,20 @@ class AggregationNode : public ExecNode {
   boost::scoped_ptr<OldHashTable> hash_tbl_;
   OldHashTable::Iterator output_iterator_;
 
+  /// The list of all aggregate operations for this exec node.
   std::vector<AggFnEvaluator*> aggregate_evaluators_;
 
-  /// FunctionContext for each agg fn and backing pool.
+  /// FunctionContexts and backing MemPools of 'aggregate_evaluators_'.
+  /// FunctionContexts objects are stored in ObjectPool of RuntimeState.
   std::vector<impala_udf::FunctionContext*> agg_fn_ctxs_;
   boost::scoped_ptr<MemPool> agg_fn_pool_;
 
+  /// Cache of the ExprContexts of 'aggregate_evaluators_'. Used in the codegen'ed
+  /// version of UpdateTuple() to avoid loading aggregate_evaluators_[i] at runtime.
+  /// An entry is NULL if the aggregate evaluator is not codegen'ed or there is no
+  /// Expr in the aggregate evaluator (e.g. count(*)).
+  std::vector<ExprContext*> agg_expr_ctxs_;
+
   /// Exprs used to evaluate input rows
   std::vector<ExprContext*> probe_expr_ctxs_;
   /// Exprs used to insert constructed aggregation tuple into the hash table.
@@ -124,7 +132,7 @@ class AggregationNode : public ExecNode {
   Tuple* ConstructIntermediateTuple();
 
   /// Updates the aggregation intermediate tuple 'tuple' with aggregation values
-  /// computed over 'row'.
+  /// computed over 'row'. This function is replaced by codegen.
   void UpdateTuple(Tuple* tuple, TupleRow* row);
 
   /// Called on the intermediate tuple of each group after all input rows have been
@@ -135,6 +143,14 @@ class AggregationNode : public ExecNode {
   /// Returns the tuple holding the final aggregate values.
   Tuple* FinalizeTuple(Tuple* tuple, MemPool* pool);
 
+  /// Accessor for the function context of an AggFnEvaluator. Used only in codegen'ed
+  /// version of the UpdateSlot().
+  FunctionContext* IR_ALWAYS_INLINE GetAggFnCtx(int i) const;
+
+  /// Accessor for the expression context of an AggFnEvaluator. Used only in codegen'ed
+  /// version of the UpdateSlot().
+  ExprContext* IR_ALWAYS_INLINE GetAggExprCtx(int i) const;
+
   /// Do the aggregation for all tuple rows in the batch
   void ProcessRowBatchNoGrouping(RowBatch* batch);
   void ProcessRowBatchWithGrouping(RowBatch* batch);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/exec/hash-table-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table-ir.cc b/be/src/exec/hash-table-ir.cc
index ce9c317..a702736 100644
--- a/be/src/exec/hash-table-ir.cc
+++ b/be/src/exec/hash-table-ir.cc
@@ -23,4 +23,8 @@ using namespace impala;
 
 uint32_t HashTableCtx::GetHashSeed() const { return seeds_[level_]; }
 
+ExprContext* HashTableCtx::GetBuildExprCtx(int i) const { return build_expr_ctxs_[i]; }
+
+ExprContext* HashTableCtx::GetProbeExprCtx(int i) const { return probe_expr_ctxs_[i]; }
+
 #endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/exec/hash-table.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc
index dfa700e..0d780b9 100644
--- a/be/src/exec/hash-table.cc
+++ b/be/src/exec/hash-table.cc
@@ -183,8 +183,8 @@ bool HashTableCtx::EvalRow(const TupleRow* row, const vector<ExprContext*>& ctxs
   return has_null;
 }
 
-uint32_t HashTableCtx::HashVariableLenRow(
-    const uint8_t* expr_values, const uint8_t* expr_values_null) const {
+uint32_t HashTableCtx::HashVariableLenRow(const uint8_t* expr_values,
+    const uint8_t* expr_values_null) const {
   uint32_t hash = seeds_[level_];
   int var_result_offset = expr_values_cache_.var_result_offset();
   // Hash the non-var length portions (if there are any)
@@ -699,30 +699,36 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state, bool build, Function**
   RETURN_IF_ERROR(state->GetCodegen(&codegen));
 
   // Get types to generate function prototype
-  Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME);
-  DCHECK(tuple_row_type != NULL);
-  PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0);
-
   Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
   DCHECK(this_type != NULL);
-  PointerType* this_ptr_type = PointerType::get(this_type, 0);
+  PointerType* this_ptr_type = codegen->GetPtrType(this_type);
+  Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME);
+  DCHECK(tuple_row_type != NULL);
+  PointerType* tuple_row_ptr_type = codegen->GetPtrType(tuple_row_type);
   LlvmCodeGen::FnPrototype prototype(codegen, build ? "EvalBuildRow" : "EvalProbeRow",
       codegen->GetType(TYPE_BOOLEAN));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", codegen->ptr_type()));
-  prototype.AddArgument(
-      LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type()));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values_null",
+      codegen->ptr_type()));
 
   LLVMContext& context = codegen->context();
   LlvmCodeGen::LlvmBuilder builder(context);
   Value* args[4];
   *fn = prototype.GeneratePrototype(&builder, args);
+  Value* this_ptr = args[0];
   Value* row = args[1];
   Value* expr_values = args[2];
   Value* expr_values_null = args[3];
   Value* has_null = codegen->false_value();
 
+  IRFunction::Type get_expr_ctx_fn_name = build ?
+      IRFunction::HASH_TABLE_GET_BUILD_EXPR_CTX :
+      IRFunction::HASH_TABLE_GET_PROBE_EXPR_CTX;
+  Function* get_expr_ctx_fn = codegen->GetFunction(get_expr_ctx_fn_name, false);
+  DCHECK(get_expr_ctx_fn != NULL);
+
   for (int i = 0; i < ctxs.size(); ++i) {
     // TODO: refactor this to somewhere else?  This is not hash table specific except for
     // the null handling bit and would be used for anyone that needs to materialize a
@@ -748,8 +754,8 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state, bool build, Function**
           status.GetDetail()));
     }
 
-    Value* ctx_arg = codegen->CastPtrToLlvmPtr(
-        codegen->GetPtrType(ExprContext::LLVM_CLASS_NAME), ctxs[i]);
+    Value* get_expr_ctx_args[] = { this_ptr, codegen->GetIntConstant(TYPE_INT, i) };
+    Value* ctx_arg = builder.CreateCall(get_expr_ctx_fn, get_expr_ctx_args, "expr_ctx");
     Value* expr_fn_args[] = { ctx_arg, row };
     CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped(
         codegen, &builder, ctxs[i]->root()->type(), expr_fn, expr_fn_args, "result");
@@ -845,7 +851,7 @@ Status HashTableCtx::CodegenHashRow(RuntimeState* state, bool use_murmur, Functi
   // Get types to generate function prototype
   Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
   DCHECK(this_type != NULL);
-  PointerType* this_ptr_type = PointerType::get(this_type, 0);
+  PointerType* this_ptr_type = codegen->GetPtrType(this_type);
 
   LlvmCodeGen::FnPrototype prototype(
       codegen, (use_murmur ? "MurmurHashRow" : "HashRow"), codegen->GetType(TYPE_INT));
@@ -1050,13 +1056,13 @@ Status HashTableCtx::CodegenEquals(RuntimeState* state, bool force_null_equality
   LlvmCodeGen* codegen;
   RETURN_IF_ERROR(state->GetCodegen(&codegen));
   // Get types to generate function prototype
+  Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
+  DCHECK(this_type != NULL);
+  PointerType* this_ptr_type = codegen->GetPtrType(this_type);
   Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME);
   DCHECK(tuple_row_type != NULL);
-  PointerType* tuple_row_ptr_type = PointerType::get(tuple_row_type, 0);
+  PointerType* tuple_row_ptr_type = codegen->GetPtrType(tuple_row_type);
 
-  Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
-  DCHECK(this_type != NULL);
-  PointerType* this_ptr_type = PointerType::get(this_type, 0);
   LlvmCodeGen::FnPrototype prototype(codegen, "Equals", codegen->GetType(TYPE_BOOLEAN));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
@@ -1068,10 +1074,15 @@ Status HashTableCtx::CodegenEquals(RuntimeState* state, bool force_null_equality
   LlvmCodeGen::LlvmBuilder builder(context);
   Value* args[4];
   *fn = prototype.GeneratePrototype(&builder, args);
+  Value* this_ptr = args[0];
   Value* row = args[1];
   Value* expr_values = args[2];
   Value* expr_values_null = args[3];
 
+  Function* get_expr_ctx_fn =
+      codegen->GetFunction(IRFunction::HASH_TABLE_GET_BUILD_EXPR_CTX, false);
+  DCHECK(get_expr_ctx_fn != NULL);
+
   BasicBlock* false_block = BasicBlock::Create(context, "false_block", *fn);
   for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
     BasicBlock* null_block = BasicBlock::Create(context, "null", *fn);
@@ -1088,8 +1099,11 @@ Status HashTableCtx::CodegenEquals(RuntimeState* state, bool force_null_equality
           status.GetDetail()));
     }
 
-    Value* ctx_arg = codegen->CastPtrToLlvmPtr(
-        codegen->GetPtrType(ExprContext::LLVM_CLASS_NAME), build_expr_ctxs_[i]);
+    // Load ExprContext* from 'build_expr_ctxs_'.
+    Value* get_expr_ctx_args[] = { this_ptr, codegen->GetIntConstant(TYPE_INT, i) };
+    Value* ctx_arg = builder.CreateCall(get_expr_ctx_fn, get_expr_ctx_args, "expr_ctx");
+
+    // Evaluate the expression.
     Value* expr_fn_args[] = { ctx_arg, row };
     CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped(codegen, &builder,
         build_expr_ctxs_[i]->root()->type(), expr_fn, expr_fn_args, "result");

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/exec/hash-table.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.h b/be/src/exec/hash-table.h
index 58078ad..fead1f7 100644
--- a/be/src/exec/hash-table.h
+++ b/be/src/exec/hash-table.h
@@ -300,7 +300,9 @@ class HashTableCtx {
     uint32_t ALWAYS_INLINE CurExprValuesHash() const { return *cur_expr_values_hash_; }
 
     /// Sets the hash values for the current row.
-    void ALWAYS_INLINE SetCurExprValuesHash(uint32_t hash) { *cur_expr_values_hash_ = hash; }
+    void ALWAYS_INLINE SetCurExprValuesHash(uint32_t hash) {
+      *cur_expr_values_hash_ = hash;
+    }
 
     /// Returns a pointer to the expression value at 'expr_idx' in 'expr_values'.
     uint8_t* ExprValuePtr(uint8_t* expr_values, int expr_idx) const;
@@ -410,19 +412,19 @@ class HashTableCtx {
   uint32_t Hash(const void* input, int len, uint32_t hash) const;
 
   /// Evaluate 'row' over build exprs, storing values into 'expr_values' and nullness into
-  /// 'expr_values_null'. This will be replaced by codegen. We do not want this
-  /// function inlined when cross compiled because we need to be able to differentiate
-  /// between EvalBuildRow and EvalProbeRow by name and the build/probe exprs are baked
-  /// into the codegen'd function.
-  bool IR_NO_INLINE EvalBuildRow(
-      const TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) {
+  /// 'expr_values_null'. This will be replaced by codegen. We do not want this function
+  /// inlined when cross compiled because we need to be able to differentiate between
+  /// EvalBuildRow and EvalProbeRow by name and the build/probe exprs are baked into the
+  /// codegen'd function.
+  bool IR_NO_INLINE EvalBuildRow(const TupleRow* row, uint8_t* expr_values,
+      uint8_t* expr_values_null) {
     return EvalRow(row, build_expr_ctxs_, expr_values, expr_values_null);
   }
 
   /// Evaluate 'row' over probe exprs, storing the values into 'expr_values' and nullness
   /// into 'expr_values_null'. This will be replaced by codegen.
-  bool IR_NO_INLINE EvalProbeRow(
-      const TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) {
+  bool IR_NO_INLINE EvalProbeRow(const TupleRow* row, uint8_t* expr_values,
+      uint8_t* expr_values_null) {
     return EvalRow(row, probe_expr_ctxs_, expr_values, expr_values_null);
   }
 
@@ -454,12 +456,16 @@ class HashTableCtx {
   }
 
   /// Cross-compiled function to access member variables used in CodegenHashRow().
-  uint32_t GetHashSeed() const;
+  uint32_t IR_ALWAYS_INLINE GetHashSeed() const;
 
   /// Functions to be replaced by codegen to specialize the hash table.
   bool IR_NO_INLINE stores_nulls() const { return stores_nulls_; }
   bool IR_NO_INLINE finds_some_nulls() const { return finds_some_nulls_; }
 
+  /// Cross-compiled function to access the build/probe expression context.
+  ExprContext* IR_ALWAYS_INLINE GetBuildExprCtx(int i) const;
+  ExprContext* IR_ALWAYS_INLINE GetProbeExprCtx(int i) const;
+
   const std::vector<ExprContext*>& build_expr_ctxs_;
   const std::vector<ExprContext*>& probe_expr_ctxs_;
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/exec/partitioned-aggregation-node-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node-ir.cc b/be/src/exec/partitioned-aggregation-node-ir.cc
index 194f6c4..ed95844 100644
--- a/be/src/exec/partitioned-aggregation-node-ir.cc
+++ b/be/src/exec/partitioned-aggregation-node-ir.cc
@@ -26,6 +26,10 @@
 
 using namespace impala;
 
+ExprContext* PartitionedAggregationNode::GetAggExprContext(int i) const {
+  return agg_expr_ctxs_[i];
+}
+
 Status PartitionedAggregationNode::ProcessBatchNoGrouping(RowBatch* batch) {
   Tuple* output_tuple = singleton_output_tuple_;
   FOREACH_ROW(batch, 0, batch_iter) {
@@ -202,7 +206,7 @@ Status PartitionedAggregationNode::ProcessBatchStreaming(bool needs_serialize,
           DCHECK(!process_batch_status_.ok());
           return process_batch_status_;
         }
-        UpdateTuple(&agg_fn_ctxs_[0], intermediate_tuple, in_row, false);
+        UpdateTuple(&agg_fn_ctxs_[0], intermediate_tuple, in_row);
         out_batch_iterator.Get()->SetTuple(0, intermediate_tuple);
         out_batch_iterator.Next();
         out_batch->CommitLastRow();
@@ -250,7 +254,7 @@ bool PartitionedAggregationNode::TryAddToHashTable(
     }
   }
 
-  UpdateTuple(&partition->agg_fn_ctxs[0], intermediate_tuple, in_row, false);
+  UpdateTuple(&partition->agg_fn_ctxs[0], intermediate_tuple, in_row);
   return true;
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/exec/partitioned-aggregation-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node.cc b/be/src/exec/partitioned-aggregation-node.cc
index eb5addc..ba2d9f7 100644
--- a/be/src/exec/partitioned-aggregation-node.cc
+++ b/be/src/exec/partitioned-aggregation-node.cc
@@ -153,6 +153,18 @@ Status PartitionedAggregationNode::Init(const TPlanNode& tnode, RuntimeState* st
     RETURN_IF_ERROR(AggFnEvaluator::Create(
         pool_, tnode.agg_node.aggregate_functions[i], &evaluator));
     aggregate_evaluators_.push_back(evaluator);
+    ExprContext* agg_expr_ctx;
+    if (evaluator->input_expr_ctxs().size() == 1) {
+      agg_expr_ctx = evaluator->input_expr_ctxs()[0];
+    } else {
+      // CodegenUpdateSlot() can only support aggregate operator with only one ExprContext
+      // so it doesn't support operator such as group_concat. There are also aggregate
+      // operators with no ExprContext (e.g. count(*)). In cases above, 'agg_expr_ctxs_'
+      // will contain NULL for that entry.
+      DCHECK(evaluator->agg_op() == AggFnEvaluator::OTHER || evaluator->is_count_star());
+      agg_expr_ctx = NULL;
+    }
+    agg_expr_ctxs_.push_back(agg_expr_ctx);
   }
   return Status::OK();
 }
@@ -696,6 +708,7 @@ void PartitionedAggregationNode::Close(RuntimeState* state) {
   for (int i = 0; i < aggregate_evaluators_.size(); ++i) {
     aggregate_evaluators_[i]->Close(state);
   }
+  agg_expr_ctxs_.clear();
   for (int i = 0; i < agg_fn_ctxs_.size(); ++i) {
     agg_fn_ctxs_[i]->impl()->Close();
   }
@@ -1407,23 +1420,28 @@ Status PartitionedAggregationNode::QueryMaintenance(RuntimeState* state) {
 }
 
 // IR Generation for updating a single aggregation slot. Signature is:
-// void UpdateSlot(FunctionContext* fn_ctx, AggTuple* agg_tuple, char** row)
+// void UpdateSlot(FunctionContext* agg_fn_ctx, ExprContext* agg_expr_ctx,
+//     AggTuple* agg_tuple, char** row)
 //
 // The IR for sum(double_col) is:
-// define void @UpdateSlot(%"class.impala_udf::FunctionContext"* %fn_ctx,
-//                         { i8, double }* %agg_tuple,
-//                         %"class.impala::TupleRow"* %row) #20 {
+//
+// define void @UpdateSlot(%"class.impala_udf::FunctionContext"* %agg_fn_ctx,
+//                         %"class.impala::ExprContext"* %agg_expr_ctx,
+//                         { i8, [7 x i8], double }* %agg_tuple,
+//                         %"class.impala::TupleRow"* %row) #34 {
+//
 // entry:
-//   %src = call { i8, double } @GetSlotRef(%"class.impala::ExprContext"* inttoptr
-//     (i64 128241264 to %"class.impala::ExprContext"*), %"class.impala::TupleRow"* %row)
+//   %src = call { i8, double } @GetSlotRef(%"class.impala::ExprContext"* %agg_expr_ctx,
+//       %"class.impala::TupleRow"* %row)
 //   %0 = extractvalue { i8, double } %src, 0
 //   %is_null = trunc i8 %0 to i1
 //   br i1 %is_null, label %ret, label %src_not_null
 //
 // src_not_null:                                     ; preds = %entry
-//   %dst_slot_ptr = getelementptr inbounds { i8, double }* %agg_tuple, i32 0, i32 1
-//   call void @SetNotNull({ i8, double }* %agg_tuple)
-//   %dst_val = load double* %dst_slot_ptr
+//   %dst_slot_ptr = getelementptr inbounds { i8, [7 x i8], double },
+//       { i8, [7 x i8], double }* %agg_tuple, i32 0, i32 2
+//   call void @SetNotNull({ i8, [7 x i8], double }* %agg_tuple)
+//   %dst_val = load double, double* %dst_slot_ptr
 //   %val = extractvalue { i8, double } %src, 1
 //   %1 = fadd double %dst_val, %val
 //   store double %1, double* %dst_slot_ptr
@@ -1434,48 +1452,51 @@ Status PartitionedAggregationNode::QueryMaintenance(RuntimeState* state) {
 // }
 //
 // The IR for ndv(double_col) is:
-// define void @UpdateSlot(%"class.impala_udf::FunctionContext"* %fn_ctx,
-//                         { i8, %"struct.impala::StringValue" }* %agg_tuple,
-//                         %"class.impala::TupleRow"* %row) #20 {
+//
+// define void @UpdateSlot(%"class.impala_udf::FunctionContext"* %agg_fn_ctx,
+//                         %"class.impala::ExprContext"* %agg_expr_ctx,
+//                         { i8, [7 x i8], %"struct.impala::StringValue" }* %agg_tuple,
+//                         %"class.impala::TupleRow"* %row) #34 {
 // entry:
 //   %dst_lowered_ptr = alloca { i64, i8* }
 //   %src_lowered_ptr = alloca { i8, double }
-//   %src = call { i8, double } @GetSlotRef(%"class.impala::ExprContext"* inttoptr
-//     (i64 120530832 to %"class.impala::ExprContext"*), %"class.impala::TupleRow"* %row)
+//   %src = call { i8, double } @GetSlotRef(%"class.impala::ExprContext"* %agg_expr_ctx,
+//       %"class.impala::TupleRow"* %row)
 //   %0 = extractvalue { i8, double } %src, 0
 //   %is_null = trunc i8 %0 to i1
 //   br i1 %is_null, label %ret, label %src_not_null
 //
 // src_not_null:                                     ; preds = %entry
-//   %dst_slot_ptr = getelementptr inbounds
-//     { i8, %"struct.impala::StringValue" }* %agg_tuple, i32 0, i32 1
-//   call void @SetNotNull({ i8, %"struct.impala::StringValue" }* %agg_tuple)
-//   %dst_val = load %"struct.impala::StringValue"* %dst_slot_ptr
+//   %dst_slot_ptr = getelementptr inbounds { i8, [7 x i8], %"struct.impala::StringValue" },
+//       { i8, [7 x i8], %"struct.impala::StringValue" }* %agg_tuple, i32 0, i32 2
+//   call void @SetNotNull({ i8, [7 x i8], %"struct.impala::StringValue" }* %agg_tuple)
+//   %dst_val =
+//       load %"struct.impala::StringValue", %"struct.impala::StringValue"* %dst_slot_ptr
 //   store { i8, double } %src, { i8, double }* %src_lowered_ptr
-//   %src_unlowered_ptr = bitcast { i8, double }* %src_lowered_ptr
-//                        to %"struct.impala_udf::DoubleVal"*
+//   %src_unlowered_ptr =
+//       bitcast { i8, double }* %src_lowered_ptr to %"struct.impala_udf::DoubleVal"*
 //   %ptr = extractvalue %"struct.impala::StringValue" %dst_val, 0
-//   %dst_stringval = insertvalue { i64, i8* } zeroinitializer, i8* %ptr, 1
+//   %dst = insertvalue { i64, i8* } zeroinitializer, i8* %ptr, 1
 //   %len = extractvalue %"struct.impala::StringValue" %dst_val, 1
-//   %1 = extractvalue { i64, i8* } %dst_stringval, 0
+//   %1 = extractvalue { i64, i8* } %dst, 0
 //   %2 = zext i32 %len to i64
 //   %3 = shl i64 %2, 32
 //   %4 = and i64 %1, 4294967295
 //   %5 = or i64 %4, %3
-//   %dst_stringval1 = insertvalue { i64, i8* } %dst_stringval, i64 %5, 0
-//   store { i64, i8* } %dst_stringval1, { i64, i8* }* %dst_lowered_ptr
-//   %dst_unlowered_ptr = bitcast { i64, i8* }* %dst_lowered_ptr
-//                        to %"struct.impala_udf::StringVal"*
-//   call void @HllUpdate(%"class.impala_udf::FunctionContext"* %fn_ctx,
+//   %dst1 = insertvalue { i64, i8* } %dst, i64 %5, 0
+//   store { i64, i8* } %dst1, { i64, i8* }* %dst_lowered_ptr
+//   %dst_unlowered_ptr =
+//       bitcast { i64, i8* }* %dst_lowered_ptr to %"struct.impala_udf::StringVal"*
+//   call void @HllUpdate(%"class.impala_udf::FunctionContext"* %agg_fn_ctx,
 //                        %"struct.impala_udf::DoubleVal"* %src_unlowered_ptr,
 //                        %"struct.impala_udf::StringVal"* %dst_unlowered_ptr)
-//   %anyval_result = load { i64, i8* }* %dst_lowered_ptr
-//   %6 = extractvalue { i64, i8* } %anyval_result, 1
-//   %7 = insertvalue %"struct.impala::StringValue" zeroinitializer, i8* %6, 0
-//   %8 = extractvalue { i64, i8* } %anyval_result, 0
-//   %9 = ashr i64 %8, 32
-//   %10 = trunc i64 %9 to i32
-//   %11 = insertvalue %"struct.impala::StringValue" %7, i32 %10, 1
+//   %anyval_result = load { i64, i8* }, { i64, i8* }* %dst_lowered_ptr
+//   %6 = extractvalue { i64, i8* } %anyval_result, 0
+//   %7 = ashr i64 %6, 32
+//   %8 = trunc i64 %7 to i32
+//   %9 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %8, 1
+//   %10 = extractvalue { i64, i8* } %anyval_result, 1
+//   %11 = insertvalue %"struct.impala::StringValue" %9, i8* %10, 0
 //   store %"struct.impala::StringValue" %11, %"struct.impala::StringValue"* %dst_slot_ptr
 //   br label %ret
 //
@@ -1487,53 +1508,56 @@ Status PartitionedAggregationNode::CodegenUpdateSlot(
   LlvmCodeGen* codegen;
   RETURN_IF_ERROR(state_->GetCodegen(&codegen));
 
+  // TODO: Fix this DCHECK and Init() once CodegenUpdateSlot() can handle AggFnEvaluator
+  // with multiple input expressions (e.g. group_concat).
   DCHECK_EQ(evaluator->input_expr_ctxs().size(), 1);
-  ExprContext* input_expr_ctx = evaluator->input_expr_ctxs()[0];
-  Expr* input_expr = input_expr_ctx->root();
+  ExprContext* agg_expr_ctx = evaluator->input_expr_ctxs()[0];
+  Expr* agg_expr = agg_expr_ctx->root();
 
   // TODO: implement timestamp
-  if (input_expr->type().type == TYPE_TIMESTAMP &&
+  if (agg_expr->type().type == TYPE_TIMESTAMP &&
       evaluator->agg_op() != AggFnEvaluator::AVG) {
     return Status("PartitionedAggregationNode::CodegenUpdateSlot(): timestamp input type "
         "NYI");
   }
 
   Function* agg_expr_fn;
-  RETURN_IF_ERROR(input_expr->GetCodegendComputeFn(state_, &agg_expr_fn));
+  RETURN_IF_ERROR(agg_expr->GetCodegendComputeFn(state_, &agg_expr_fn));
 
   PointerType* fn_ctx_type =
       codegen->GetPtrType(FunctionContextImpl::LLVM_FUNCTIONCONTEXT_NAME);
+  PointerType* expr_ctx_type = codegen->GetPtrType(ExprContext::LLVM_CLASS_NAME);
   StructType* tuple_struct = intermediate_tuple_desc_->GetLlvmStruct(codegen);
   if (tuple_struct == NULL) {
     return Status("PartitionedAggregationNode::CodegenUpdateSlot(): failed to generate "
         "intermediate tuple desc");
   }
-  PointerType* tuple_ptr_type = PointerType::get(tuple_struct, 0);
+  PointerType* tuple_ptr_type = codegen->GetPtrType(tuple_struct);
   PointerType* tuple_row_ptr_type = codegen->GetPtrType(TupleRow::LLVM_CLASS_NAME);
 
   // Create UpdateSlot prototype
   LlvmCodeGen::FnPrototype prototype(codegen, "UpdateSlot", codegen->void_type());
-  prototype.AddArgument(LlvmCodeGen::NamedVariable("fn_ctx", fn_ctx_type));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_fn_ctx", fn_ctx_type));
+  prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_expr_ctx", expr_ctx_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_tuple", tuple_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
 
   LlvmCodeGen::LlvmBuilder builder(codegen->context());
-  Value* args[3];
+  Value* args[4];
   *fn = prototype.GeneratePrototype(&builder, &args[0]);
-  Value* fn_ctx_arg = args[0];
-  Value* agg_tuple_arg = args[1];
-  Value* row_arg = args[2];
+  Value* agg_fn_ctx_arg = args[0];
+  Value* agg_expr_ctx_arg = args[1];
+  Value* agg_tuple_arg = args[2];
+  Value* row_arg = args[3];
 
   BasicBlock* src_not_null_block =
       BasicBlock::Create(codegen->context(), "src_not_null", *fn);
   BasicBlock* ret_block = BasicBlock::Create(codegen->context(), "ret", *fn);
 
   // Call expr function to get src slot value
-  Value* expr_ctx = codegen->CastPtrToLlvmPtr(
-      codegen->GetPtrType(ExprContext::LLVM_CLASS_NAME), input_expr_ctx);
-  Value* agg_expr_fn_args[] = { expr_ctx, row_arg };
+  Value* agg_expr_fn_args[] = { agg_expr_ctx_arg, row_arg };
   CodegenAnyVal src = CodegenAnyVal::CreateCallWrapped(
-      codegen, &builder, input_expr->type(), agg_expr_fn, agg_expr_fn_args, "src");
+      codegen, &builder, agg_expr->type(), agg_expr_fn, agg_expr_fn_args, "src");
 
   Value* src_is_null = src.GetIsNull();
   builder.CreateCondBr(src_is_null, ret_block, src_not_null_block);
@@ -1597,7 +1621,7 @@ Status PartitionedAggregationNode::CodegenUpdateSlot(
       // Clone and replace constants.
       ir_fn = codegen->CloneFunction(ir_fn);
       vector<FunctionContext::TypeDesc> arg_types;
-      arg_types.push_back(AnyValUtil::ColumnTypeToTypeDesc(input_expr->type()));
+      arg_types.push_back(AnyValUtil::ColumnTypeToTypeDesc(agg_expr->type()));
       Expr::InlineConstants(AnyValUtil::ColumnTypeToTypeDesc(dst_type), arg_types,
           codegen, ir_fn);
 
@@ -1606,7 +1630,7 @@ Status PartitionedAggregationNode::CodegenUpdateSlot(
           *fn, LlvmCodeGen::NamedVariable("src_lowered_ptr", src.value()->getType()));
       builder.CreateStore(src.value(), src_lowered_ptr);
       Type* unlowered_ptr_type =
-          CodegenAnyVal::GetUnloweredPtrType(codegen, input_expr->type());
+          CodegenAnyVal::GetUnloweredPtrType(codegen, agg_expr->type());
       Value* src_unlowered_ptr =
           builder.CreateBitCast(src_lowered_ptr, unlowered_ptr_type, "src_unlowered_ptr");
 
@@ -1624,7 +1648,7 @@ Status PartitionedAggregationNode::CodegenUpdateSlot(
 
       // Call 'ir_fn'
       builder.CreateCall(ir_fn,
-          ArrayRef<Value*>({fn_ctx_arg, src_unlowered_ptr, dst_unlowered_ptr}));
+          ArrayRef<Value*>({agg_fn_ctx_arg, src_unlowered_ptr, dst_unlowered_ptr}));
 
       // Convert StringVal intermediate 'dst_arg' back to StringValue
       Value* anyval_result = builder.CreateLoad(dst_lowered_ptr, "anyval_result");
@@ -1656,28 +1680,41 @@ Status PartitionedAggregationNode::CodegenUpdateSlot(
 // For the query:
 // select count(*), count(int_col), sum(double_col) the IR looks like:
 //
-
 // ; Function Attrs: alwaysinline
 // define void @UpdateTuple(%"class.impala::PartitionedAggregationNode"* %this_ptr,
 //                          %"class.impala_udf::FunctionContext"** %agg_fn_ctxs,
 //                          %"class.impala::Tuple"* %tuple,
 //                          %"class.impala::TupleRow"* %row,
-//                          i1 %is_merge) #20 {
+//                          i1 %is_merge) #34 {
 // entry:
-//   %tuple1 = bitcast %"class.impala::Tuple"* %tuple to { i8, i64, i64, double }*
-//   %src_slot = getelementptr inbounds { i8, i64, i64, double }* %tuple1, i32 0, i32 1
-//   %count_star_val = load i64* %src_slot
+//   %tuple1 =
+//       bitcast %"class.impala::Tuple"* %tuple to { i8, [7 x i8], i64, i64, double }*
+//   %src_slot = getelementptr inbounds { i8, [7 x i8], i64, i64, double },
+//       { i8, [7 x i8], i64, i64, double }* %tuple1, i32 0, i32 2
+//   %count_star_val = load i64, i64* %src_slot
 //   %count_star_inc = add i64 %count_star_val, 1
 //   store i64 %count_star_inc, i64* %src_slot
-//   %0 = getelementptr %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, i32 1
-//   %fn_ctx = load %"class.impala_udf::FunctionContext"** %0
-//   call void @UpdateSlot(%"class.impala_udf::FunctionContext"* %fn_ctx,
-//                         { i8, i64, i64, double }* %tuple1,
+//   %0 = getelementptr %"class.impala_udf::FunctionContext"*,
+//       %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, i32 1
+//   %agg_fn_ctx = load %"class.impala_udf::FunctionContext"*,
+//       %"class.impala_udf::FunctionContext"** %0
+//   %1 = call %"class.impala::ExprContext"*
+//       @_ZNK6impala26PartitionedAggregationNode17GetAggExprContextEi(
+//           %"class.impala::PartitionedAggregationNode"* %this_ptr, i32 1)
+//   call void @UpdateSlot(%"class.impala_udf::FunctionContext"* %agg_fn_ctx,
+//                         %"class.impala::ExprContext"* %1,
+//                         { i8, [7 x i8], i64, i64, double }* %tuple1,
 //                         %"class.impala::TupleRow"* %row)
-//   %1 = getelementptr %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, i32 2
-//   %fn_ctx2 = load %"class.impala_udf::FunctionContext"** %1
-//   call void @UpdateSlot5(%"class.impala_udf::FunctionContext"* %fn_ctx2,
-//                          { i8, i64, i64, double }* %tuple1,
+//   %2 = getelementptr %"class.impala_udf::FunctionContext"*,
+//       %"class.impala_udf::FunctionContext"** %agg_fn_ctxs, i32 2
+//   %agg_fn_ctx2 = load %"class.impala_udf::FunctionContext"*,
+//       %"class.impala_udf::FunctionContext"** %2
+//   %3 = call %"class.impala::ExprContext"*
+//       @_ZNK6impala26PartitionedAggregationNode17GetAggExprContextEi(
+//           %"class.impala::PartitionedAggregationNode"* %this_ptr, i32 2)
+//   call void @UpdateSlot.3(%"class.impala_udf::FunctionContext"* %agg_fn_ctx2,
+//                          %"class.impala::ExprContext"* %3,
+//                          { i8, [7 x i8], i64, i64, double }* %tuple1,
 //                          %"class.impala::TupleRow"* %row)
 //   ret void
 // }
@@ -1726,13 +1763,13 @@ Status PartitionedAggregationNode::CodegenUpdateTuple(Function** fn) {
   Type* tuple_type = codegen->GetType(Tuple::LLVM_CLASS_NAME);
   Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME);
 
-  PointerType* agg_node_ptr_type = agg_node_type->getPointerTo();
-  PointerType* fn_ctx_ptr_ptr_type = fn_ctx_type->getPointerTo()->getPointerTo();
-  PointerType* tuple_ptr_type = tuple_type->getPointerTo();
-  PointerType* tuple_row_ptr_type = tuple_row_type->getPointerTo();
+  PointerType* agg_node_ptr_type = codegen->GetPtrType(agg_node_type);
+  PointerType* fn_ctx_ptr_ptr_type = codegen->GetPtrPtrType(fn_ctx_type);
+  PointerType* tuple_ptr_type = codegen->GetPtrType(tuple_type);
+  PointerType* tuple_row_ptr_type = codegen->GetPtrType(tuple_row_type);
 
   StructType* tuple_struct = intermediate_tuple_desc_->GetLlvmStruct(codegen);
-  PointerType* tuple_ptr = PointerType::get(tuple_struct, 0);
+  PointerType* tuple_ptr = codegen->GetPtrType(tuple_struct);
   LlvmCodeGen::FnPrototype prototype(codegen, "UpdateTuple", codegen->void_type());
   prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", agg_node_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("agg_fn_ctxs", fn_ctx_ptr_ptr_type));
@@ -1743,7 +1780,7 @@ Status PartitionedAggregationNode::CodegenUpdateTuple(Function** fn) {
   LlvmCodeGen::LlvmBuilder builder(codegen->context());
   Value* args[5];
   *fn = prototype.GeneratePrototype(&builder, &args[0]);
-
+  Value* this_arg = args[0];
   Value* agg_fn_ctxs_arg = args[1];
   Value* tuple_arg = args[2];
   Value* row_arg = args[3];
@@ -1752,6 +1789,10 @@ Status PartitionedAggregationNode::CodegenUpdateTuple(Function** fn) {
   // TODO: get rid of this by using right type in function signature
   tuple_arg = builder.CreateBitCast(tuple_arg, tuple_ptr, "tuple");
 
+  Function* get_expr_ctx_fn =
+      codegen->GetFunction(IRFunction::PART_AGG_NODE_GET_EXPR_CTX, false);
+  DCHECK(get_expr_ctx_fn != NULL);
+
   // Loop over each expr and generate the IR for that slot.  If the expr is not
   // count(*), generate a helper IR function to update the slot and call that.
   j = grouping_expr_ctxs_.size();
@@ -1770,9 +1811,14 @@ Status PartitionedAggregationNode::CodegenUpdateTuple(Function** fn) {
     } else {
       Function* update_slot_fn;
       RETURN_IF_ERROR(CodegenUpdateSlot(evaluator, slot_desc, &update_slot_fn));
-      Value* fn_ctx_ptr = builder.CreateConstGEP1_32(agg_fn_ctxs_arg, i);
-      Value* fn_ctx = builder.CreateLoad(fn_ctx_ptr, "fn_ctx");
-      builder.CreateCall(update_slot_fn, ArrayRef<Value*>({fn_ctx, tuple_arg, row_arg}));
+      Value* agg_fn_ctx_ptr = builder.CreateConstGEP1_32(agg_fn_ctxs_arg, i);
+      Value* agg_fn_ctx = builder.CreateLoad(agg_fn_ctx_ptr, "agg_fn_ctx");
+      // Call GetExprCtx() to get the expression context.
+      DCHECK(agg_expr_ctxs_[i] != NULL);
+      Value* get_expr_ctx_args[] = { this_arg, codegen->GetIntConstant(TYPE_INT, i) };
+      Value* agg_expr_ctx = builder.CreateCall(get_expr_ctx_fn, get_expr_ctx_args);
+      Value* update_slot_args[] = { agg_fn_ctx, agg_expr_ctx, tuple_arg, row_arg };
+      builder.CreateCall(update_slot_fn, update_slot_args);
     }
   }
   builder.CreateRetVoid();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/exec/partitioned-aggregation-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node.h b/be/src/exec/partitioned-aggregation-node.h
index 952dcd7..c766ab2 100644
--- a/be/src/exec/partitioned-aggregation-node.h
+++ b/be/src/exec/partitioned-aggregation-node.h
@@ -193,11 +193,18 @@ class PartitionedAggregationNode : public ExecNode {
   /// are doing a streaming preaggregation.
   bool is_streaming_preagg_;
 
-  /// Contains any evaluators that require the serialize step.
+  /// True if any of the evaluators require the serialize step.
   bool needs_serialize_;
 
+  /// The list of all aggregate operations for this exec node.
   std::vector<AggFnEvaluator*> aggregate_evaluators_;
 
+  /// Cache of the ExprContexts of 'aggregate_evaluators_'. Used in the codegen'ed
+  /// version of UpdateTuple() to avoid loading aggregate_evaluators_[i] at runtime.
+  /// An entry is NULL if the aggregate evaluator is not codegen'ed or there is no Expr
+  /// in the aggregate evaluator (e.g. count(*)).
+  std::vector<ExprContext*> agg_expr_ctxs_;
+
   /// FunctionContext for each aggregate function and backing MemPool. String data
   /// returned by the aggregate functions is allocated via these contexts.
   /// These contexts are only passed to the evaluators in the non-partitioned
@@ -468,9 +475,9 @@ class PartitionedAggregationNode : public ExecNode {
   /// belonging to the same partition independent of whether the agg fn evaluators have
   /// is_merge() == true.
   /// This function is replaced by codegen (which is why we don't use a vector argument
-  /// for agg_fn_ctxs). Any var-len data is allocated from the FunctionContexts.
+  /// for agg_fn_ctxs).. Any var-len data is allocated from the FunctionContexts.
   void UpdateTuple(impala_udf::FunctionContext** agg_fn_ctxs, Tuple* tuple, TupleRow* row,
-                   bool is_merge = false);
+      bool is_merge = false);
 
   /// Called on the intermediate tuple of each group after all input rows have been
   /// consumed and aggregated. Computes the final aggregate values to be returned in
@@ -482,7 +489,7 @@ class PartitionedAggregationNode : public ExecNode {
   /// TODO: Coordinate the allocation of new tuples with the release of memory
   /// so as not to make memory consumption blow up.
   Tuple* GetOutputTuple(const std::vector<impala_udf::FunctionContext*>& agg_fn_ctxs,
-                        Tuple* tuple, MemPool* pool);
+      Tuple* tuple, MemPool* pool);
 
   /// Do the aggregation for all tuple rows in the batch when there is no grouping.
   /// This function is replaced by codegen.
@@ -517,6 +524,10 @@ class PartitionedAggregationNode : public ExecNode {
   template<bool AGGREGATED_ROWS>
   Status IR_ALWAYS_INLINE ProcessRow(TupleRow* row, HashTableCtx* ht_ctx);
 
+  /// Accessor for the expression context of an AggFnEvaluator. Used only in codegen'ed
+  /// version of UpdateTuple().
+  ExprContext* IR_ALWAYS_INLINE GetAggExprContext(int i) const;
+
   /// Create a new intermediate tuple in partition, initialized with row. ht_ctx is
   /// the context for the partition's hash table and hash is the precomputed hash of
   /// the row. The row can be an unaggregated or aggregated row depending on

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cc296ec/be/src/exprs/agg-fn-evaluator.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/agg-fn-evaluator.h b/be/src/exprs/agg-fn-evaluator.h
index fb39789..cde969c 100644
--- a/be/src/exprs/agg-fn-evaluator.h
+++ b/be/src/exprs/agg-fn-evaluator.h
@@ -189,6 +189,8 @@ class AggFnEvaluator {
   /// intermediate_slot_desc_ if this agg fn has the same intermediate and output type.
   const SlotDescriptor* output_slot_desc_;
 
+  /// Expression contexts for this AggFnEvaluator. Empty if there is no
+  /// expression (e.g. count(*)).
   std::vector<ExprContext*> input_expr_ctxs_;
 
   /// The enum for some of the builtins that still require special cased logic.
@@ -270,7 +272,7 @@ inline void AggFnEvaluator::Init(const std::vector<AggFnEvaluator*>& evaluators,
   }
 }
 inline void AggFnEvaluator::Add(const std::vector<AggFnEvaluator*>& evaluators,
-      const std::vector<FunctionContext*>& fn_ctxs, const TupleRow* src, Tuple* dst) {
+    const std::vector<FunctionContext*>& fn_ctxs, const TupleRow* src, Tuple* dst) {
   DCHECK_EQ(evaluators.size(), fn_ctxs.size());
   for (int i = 0; i < evaluators.size(); ++i) {
     evaluators[i]->Add(fn_ctxs[i], src, dst);