You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by kw...@apache.org on 2016/10/19 17:31:53 UTC

[1/2] incubator-impala git commit: IMPALA-4080, IMPALA-3638: Introduce ExecNode::Codegen()

Repository: incubator-impala
Updated Branches:
  refs/heads/master ee2a06d82 -> b15d992ab


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/topn-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/topn-node.h b/be/src/exec/topn-node.h
index 6c2bcae..5bd7ded 100644
--- a/be/src/exec/topn-node.h
+++ b/be/src/exec/topn-node.h
@@ -45,6 +45,7 @@ class TopNNode : public ExecNode {
 
   virtual Status Init(const TPlanNode& tnode, RuntimeState* state);
   virtual Status Prepare(RuntimeState* state);
+  virtual void Codegen(RuntimeState* state);
   virtual Status Open(RuntimeState* state);
   virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
   virtual Status Reset(RuntimeState* state);
@@ -57,9 +58,6 @@ class TopNNode : public ExecNode {
 
   friend class TupleLessThan;
 
-  /// Creates a codegen'd version of InsertBatch() that is used in Open().
-  Status Codegen(RuntimeState* state);
-
   /// Inserts all the rows in 'batch' into the queue.
   void InsertBatch(RowBatch* batch);
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/case-expr.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/case-expr.cc b/be/src/exprs/case-expr.cc
index 5771f09..2262c22 100644
--- a/be/src/exprs/case-expr.cc
+++ b/be/src/exprs/case-expr.cc
@@ -182,7 +182,7 @@ string CaseExpr::DebugString() const {
 //                                   %"class.impala::TupleRow"* %row)
 //   ret i16 %else_val
 // }
-Status CaseExpr::GetCodegendComputeFn(RuntimeState* state, Function** fn) {
+Status CaseExpr::GetCodegendComputeFn(LlvmCodeGen* codegen, Function** fn) {
   if (ir_compute_fn_ != NULL) {
     *fn = ir_compute_fn_;
     return Status::OK();
@@ -191,11 +191,9 @@ Status CaseExpr::GetCodegendComputeFn(RuntimeState* state, Function** fn) {
   const int num_children = GetNumChildren();
   Function* child_fns[num_children];
   for (int i = 0; i < num_children; ++i) {
-    RETURN_IF_ERROR(children()[i]->GetCodegendComputeFn(state, &child_fns[i]));
+    RETURN_IF_ERROR(children()[i]->GetCodegendComputeFn(codegen, &child_fns[i]));
   }
 
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
   LLVMContext& context = codegen->context();
   LlvmCodeGen::LlvmBuilder builder(context);
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/case-expr.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/case-expr.h b/be/src/exprs/case-expr.h
index fbf6f85..87bab76 100644
--- a/be/src/exprs/case-expr.h
+++ b/be/src/exprs/case-expr.h
@@ -30,7 +30,7 @@ class TExprNode;
 
 class CaseExpr: public Expr {
  public:
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn);
 
   virtual BooleanVal GetBooleanVal(ExprContext* ctx, const TupleRow* row);
   virtual TinyIntVal GetTinyIntVal(ExprContext* ctx, const TupleRow* row);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/compound-predicates.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/compound-predicates.cc b/be/src/exprs/compound-predicates.cc
index 94eb269..531a70c 100644
--- a/be/src/exprs/compound-predicates.cc
+++ b/be/src/exprs/compound-predicates.cc
@@ -65,8 +65,8 @@ string OrPredicate::DebugString() const {
   return out.str();
 }
 
-// IR codegen for compound and/or predicates.  Compound predicate has non trivial 
-// null handling as well as many branches so this is pretty complicated.  The IR 
+// IR codegen for compound and/or predicates.  Compound predicate has non trivial
+// null handling as well as many branches so this is pretty complicated.  The IR
 // for x && y is:
 //
 // define i16 @CompoundPredicate(%"class.impala::ExprContext"* %context,
@@ -86,30 +86,30 @@ string OrPredicate::DebugString() const {
 //   %val2 = trunc i8 %3 to i1
 //   %tmp_and = and i1 %val, %val2
 //   br i1 %is_null, label %lhs_null, label %lhs_not_null
-// 
+//
 // lhs_null:                                         ; preds = %entry
 //   br i1 %is_null1, label %null_block, label %lhs_null_rhs_not_null
-// 
+//
 // lhs_not_null:                                     ; preds = %entry
 //   br i1 %is_null1, label %lhs_not_null_rhs_null, label %not_null_block
-// 
+//
 // lhs_null_rhs_not_null:                            ; preds = %lhs_null
 //   br i1 %val2, label %null_block, label %not_null_block
-// 
+//
 // lhs_not_null_rhs_null:                            ; preds = %lhs_not_null
 //   br i1 %val, label %null_block, label %not_null_block
-// 
+//
 // null_block:                                       ; preds = %lhs_null_rhs_not_null,
 //                                                     %lhs_not_null_rhs_null, %lhs_null
 //   br label %ret
-// 
+//
 // not_null_block:                                   ; preds = %lhs_null_rhs_not_null,
 //                                                   %lhs_not_null_rhs_null, %lhs_not_null
 //   %4 = phi i1 [ false, %lhs_null_rhs_not_null ],
 //               [ false, %lhs_not_null_rhs_null ],
 //               [ %tmp_and, %lhs_not_null ]
 //   br label %ret
-// 
+//
 // ret:                                              ; preds = %not_null_block, %null_block
 //   %ret3 = phi i1 [ false, %null_block ], [ %4, %not_null_block ]
 //   %5 = zext i1 %ret3 to i16
@@ -118,21 +118,18 @@ string OrPredicate::DebugString() const {
 //   ret i16 %7
 // }
 Status CompoundPredicate::CodegenComputeFn(
-    bool and_fn, RuntimeState* state, Function** fn) {
+    bool and_fn, LlvmCodeGen* codegen, Function** fn) {
   if (ir_compute_fn_ != NULL) {
     *fn = ir_compute_fn_;
     return Status::OK();
   }
 
   DCHECK_EQ(GetNumChildren(), 2);
-
   Function* lhs_function;
-  RETURN_IF_ERROR(children()[0]->GetCodegendComputeFn(state, &lhs_function));
+  RETURN_IF_ERROR(children()[0]->GetCodegendComputeFn(codegen, &lhs_function));
   Function* rhs_function;
-  RETURN_IF_ERROR(children()[1]->GetCodegendComputeFn(state, &rhs_function));
-  
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
+  RETURN_IF_ERROR(children()[1]->GetCodegendComputeFn(codegen, &rhs_function));
+
   LLVMContext& context = codegen->context();
   LlvmCodeGen::LlvmBuilder builder(context);
   Value* args[2];
@@ -143,11 +140,11 @@ Status CompoundPredicate::CodegenComputeFn(
 
   // Control blocks for aggregating results
   BasicBlock* lhs_null_block = BasicBlock::Create(context, "lhs_null", function);
-  BasicBlock* lhs_not_null_block = 
+  BasicBlock* lhs_not_null_block =
       BasicBlock::Create(context, "lhs_not_null", function);
-  BasicBlock* lhs_null_rhs_not_null_block = 
+  BasicBlock* lhs_null_rhs_not_null_block =
       BasicBlock::Create(context, "lhs_null_rhs_not_null", function);
-  BasicBlock* lhs_not_null_rhs_null_block = 
+  BasicBlock* lhs_not_null_rhs_null_block =
       BasicBlock::Create(context, "lhs_not_null_rhs_null", function);
   BasicBlock* null_block = BasicBlock::Create(context, "null_block", function);
   BasicBlock* not_null_block = BasicBlock::Create(context, "not_null_block", function);
@@ -159,7 +156,7 @@ Status CompoundPredicate::CodegenComputeFn(
   // Call rhs
   CodegenAnyVal rhs_result = CodegenAnyVal::CreateCallWrapped(
       codegen, &builder, TYPE_BOOLEAN, rhs_function, args, "rhs_call");
-  
+
   Value* lhs_is_null = lhs_result.GetIsNull();
   Value* rhs_is_null = rhs_result.GetIsNull();
   Value* lhs_value = lhs_result.GetVal();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/compound-predicates.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/compound-predicates.h b/be/src/exprs/compound-predicates.h
index 3ba4ad0..0e403ba 100644
--- a/be/src/exprs/compound-predicates.h
+++ b/be/src/exprs/compound-predicates.h
@@ -34,7 +34,7 @@ class CompoundPredicate: public Predicate {
  protected:
   CompoundPredicate(const TExprNode& node) : Predicate(node) { }
 
-  Status CodegenComputeFn(bool and_fn, RuntimeState* state, llvm::Function** fn);
+  Status CodegenComputeFn(bool and_fn, LlvmCodeGen* codegen, llvm::Function** fn);
 };
 
 /// Expr for evaluating and (&&) operators
@@ -42,8 +42,8 @@ class AndPredicate: public CompoundPredicate {
  public:
   virtual impala_udf::BooleanVal GetBooleanVal(ExprContext* context, const TupleRow*);
 
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn) {
-    return CompoundPredicate::CodegenComputeFn(true, state, fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) {
+    return CompoundPredicate::CodegenComputeFn(true, codegen, fn);
   }
 
  protected:
@@ -61,8 +61,8 @@ class OrPredicate: public CompoundPredicate {
  public:
   virtual impala_udf::BooleanVal GetBooleanVal(ExprContext* context, const TupleRow*);
 
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn) {
-    return CompoundPredicate::CodegenComputeFn(false, state, fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) {
+    return CompoundPredicate::CodegenComputeFn(false, codegen, fn);
   }
 
  protected:

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/conditional-functions.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/conditional-functions.cc b/be/src/exprs/conditional-functions.cc
index eed9b8e..6f8f362 100644
--- a/be/src/exprs/conditional-functions.cc
+++ b/be/src/exprs/conditional-functions.cc
@@ -25,8 +25,8 @@ using namespace impala;
 using namespace impala_udf;
 
 #define CONDITIONAL_CODEGEN_FN(expr_class) \
-  Status expr_class::GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn) { \
-    return GetCodegendComputeFnWrapper(state, fn); \
+  Status expr_class::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) { \
+    return GetCodegendComputeFnWrapper(codegen, fn); \
   }
 
 CONDITIONAL_CODEGEN_FN(IsNullExpr);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/conditional-functions.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/conditional-functions.h b/be/src/exprs/conditional-functions.h
index 8783c6e..12c9b1e 100644
--- a/be/src/exprs/conditional-functions.h
+++ b/be/src/exprs/conditional-functions.h
@@ -73,7 +73,7 @@ class IsNullExpr : public Expr {
   virtual TimestampVal GetTimestampVal(ExprContext* context, const TupleRow* row);
   virtual DecimalVal GetDecimalVal(ExprContext* context, const TupleRow* row);
 
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn);
   virtual std::string DebugString() const { return Expr::DebugString("IsNullExpr"); }
 
  protected:
@@ -94,7 +94,7 @@ class NullIfExpr : public Expr {
   virtual TimestampVal GetTimestampVal(ExprContext* context, const TupleRow* row);
   virtual DecimalVal GetDecimalVal(ExprContext* context, const TupleRow* row);
 
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn);
   virtual std::string DebugString() const { return Expr::DebugString("NullIfExpr"); }
 
  protected:
@@ -115,7 +115,7 @@ class IfExpr : public Expr {
   virtual TimestampVal GetTimestampVal(ExprContext* context, const TupleRow* row);
   virtual DecimalVal GetDecimalVal(ExprContext* context, const TupleRow* row);
 
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn);
   virtual std::string DebugString() const { return Expr::DebugString("IfExpr"); }
 
  protected:
@@ -141,7 +141,7 @@ class CoalesceExpr : public Expr {
  protected:
   friend class Expr;
   CoalesceExpr(const TExprNode& node) : Expr(node) { }
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn);
 };
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/expr.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr.cc b/be/src/exprs/expr.cc
index 0e3610d..8d21830 100644
--- a/be/src/exprs/expr.cc
+++ b/be/src/exprs/expr.cc
@@ -267,6 +267,16 @@ Status Expr::CreateExpr(ObjectPool* pool, const TExprNode& texpr_node, Expr** ex
   }
 }
 
+bool Expr::NeedCodegen(const TExpr& texpr) {
+  for (const TExprNode& texpr_node : texpr.nodes) {
+    if (texpr_node.node_type == TExprNodeType::FUNCTION_CALL && texpr_node.__isset.fn &&
+        texpr_node.fn.binary_type == TFunctionBinaryType::IR) {
+      return true;
+    }
+  }
+  return false;
+}
+
 struct MemLayoutData {
   int expr_idx;
   int byte_size;
@@ -655,13 +665,11 @@ int Expr::InlineConstants(const FunctionContext::TypeDesc& return_type,
   return replaced;
 }
 
-Status Expr::GetCodegendComputeFnWrapper(RuntimeState* state, Function** fn) {
+Status Expr::GetCodegendComputeFnWrapper(LlvmCodeGen* codegen, Function** fn) {
   if (ir_compute_fn_ != NULL) {
     *fn = ir_compute_fn_;
     return Status::OK();
   }
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
   Function* static_getval_fn = GetStaticGetValWrapper(type(), codegen);
 
   // Call it passing this as the additional first argument.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/expr.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h
index 0cdafeb..99d463d 100644
--- a/be/src/exprs/expr.h
+++ b/be/src/exprs/expr.h
@@ -173,6 +173,10 @@ class Expr {
   /// Returns the number of slots added to the vector
   virtual int GetSlotIds(std::vector<SlotId>* slot_ids) const;
 
+  /// Returns true iff the expression 'texpr' contains UDF available only as LLVM IR. In
+  /// which case, it's impossible to interpret this expression and codegen must be used.
+  static bool NeedCodegen(const TExpr& texpr);
+
   /// Create expression tree from the list of nodes contained in texpr within 'pool'.
   /// Returns the root of expression tree in 'expr' and the corresponding ExprContext in
   /// 'ctx'.
@@ -234,7 +238,7 @@ class Expr {
   //
   /// The function should evaluate this expr over 'row' and return the result as the
   /// appropriate type of AnyVal.
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn) = 0;
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) = 0;
 
   /// If this expr is constant, evaluates the expr with no input row argument and returns
   /// the output. Returns NULL if the argument is not constant. The returned AnyVal* is
@@ -389,7 +393,7 @@ class Expr {
   /// functions that use the IRBuilder. It doesn't provide any performance benefit over
   /// the interpreted path.
   /// TODO: this should be replaced with fancier xcompiling infrastructure
-  Status GetCodegendComputeFnWrapper(RuntimeState* state, llvm::Function** fn);
+  Status GetCodegendComputeFnWrapper(LlvmCodeGen* codegen, llvm::Function** fn);
 
   /// Returns the IR version of the static Get*Val() wrapper function corresponding to
   /// 'type'. This is used for calling interpreted Get*Val() functions from codegen'd

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/hive-udf-call.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/hive-udf-call.cc b/be/src/exprs/hive-udf-call.cc
index c7a3f32..453f876 100644
--- a/be/src/exprs/hive-udf-call.cc
+++ b/be/src/exprs/hive-udf-call.cc
@@ -277,8 +277,8 @@ void HiveUdfCall::Close(RuntimeState* state, ExprContext* ctx,
   Expr::Close(state, ctx, scope);
 }
 
-Status HiveUdfCall::GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn) {
-  return GetCodegendComputeFnWrapper(state, fn);
+Status HiveUdfCall::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) {
+  return GetCodegendComputeFnWrapper(codegen, fn);
 }
 
 string HiveUdfCall::DebugString() const {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/hive-udf-call.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/hive-udf-call.h b/be/src/exprs/hive-udf-call.h
index 8a70540..74302eb 100644
--- a/be/src/exprs/hive-udf-call.h
+++ b/be/src/exprs/hive-udf-call.h
@@ -82,7 +82,7 @@ class HiveUdfCall : public Expr {
   virtual TimestampVal GetTimestampVal(ExprContext* ctx, const TupleRow*);
   virtual DecimalVal GetDecimalVal(ExprContext* ctx, const TupleRow*);
 
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn);
 
  protected:
   friend class Expr;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/is-not-empty-predicate.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/is-not-empty-predicate.cc b/be/src/exprs/is-not-empty-predicate.cc
index 88aac42..521ebf4 100644
--- a/be/src/exprs/is-not-empty-predicate.cc
+++ b/be/src/exprs/is-not-empty-predicate.cc
@@ -42,9 +42,9 @@ Status IsNotEmptyPredicate::Prepare(RuntimeState* state,
   return Status::OK();
 }
 
-Status IsNotEmptyPredicate::GetCodegendComputeFn(RuntimeState* state,
+Status IsNotEmptyPredicate::GetCodegendComputeFn(LlvmCodeGen* codegen,
     llvm::Function** fn) {
-  return GetCodegendComputeFnWrapper(state, fn);
+  return GetCodegendComputeFnWrapper(codegen, fn);
 }
 
 string IsNotEmptyPredicate::DebugString() const {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/is-not-empty-predicate.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/is-not-empty-predicate.h b/be/src/exprs/is-not-empty-predicate.h
index d8e15be..2454a6d 100644
--- a/be/src/exprs/is-not-empty-predicate.h
+++ b/be/src/exprs/is-not-empty-predicate.h
@@ -31,7 +31,7 @@ class IsNotEmptyPredicate: public Predicate {
  public:
   virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc,
                          ExprContext* ctx);
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn);
   virtual BooleanVal GetBooleanVal(ExprContext* context, const TupleRow* row);
   virtual std::string DebugString() const;
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/literal.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/literal.cc b/be/src/exprs/literal.cc
index 1d816c3..caf6c14 100644
--- a/be/src/exprs/literal.cc
+++ b/be/src/exprs/literal.cc
@@ -375,15 +375,13 @@ string Literal::DebugString() const {
 // entry:
 //   ret { i8, i64 } { i8 0, i64 10 }
 // }
-Status Literal::GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn) {
+Status Literal::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) {
   if (ir_compute_fn_ != NULL) {
     *fn = ir_compute_fn_;
     return Status::OK();
   }
 
   DCHECK_EQ(GetNumChildren(), 0);
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
   Value* args[2];
   *fn = CreateIrFunctionPrototype(codegen, "Literal", &args);
   BasicBlock* entry_block = BasicBlock::Create(codegen->context(), "entry", *fn);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/literal.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/literal.h b/be/src/exprs/literal.h
index 24bc913..65219e0 100644
--- a/be/src/exprs/literal.h
+++ b/be/src/exprs/literal.h
@@ -45,7 +45,7 @@ class Literal: public Expr {
   /// Literal.
   static Literal* CreateLiteral(const ColumnType& type, const std::string& str);
 
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn);
 
   virtual impala_udf::BooleanVal GetBooleanVal(ExprContext*, const TupleRow*);
   virtual impala_udf::TinyIntVal GetTinyIntVal(ExprContext*, const TupleRow*);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/null-literal.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/null-literal.cc b/be/src/exprs/null-literal.cc
index 54d6247..a8f4241 100644
--- a/be/src/exprs/null-literal.cc
+++ b/be/src/exprs/null-literal.cc
@@ -91,15 +91,13 @@ CollectionVal NullLiteral::GetCollectionVal(ExprContext* context, const TupleRow
 // entry:
 //   ret { i8, i64 } { i8 1, i64 0 }
 // }
-Status NullLiteral::GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn) {
+Status NullLiteral::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) {
   if (ir_compute_fn_ != NULL) {
     *fn = ir_compute_fn_;
     return Status::OK();
   }
 
   DCHECK_EQ(GetNumChildren(), 0);
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
   Value* args[2];
   *fn = CreateIrFunctionPrototype(codegen, "NullLiteral", &args);
   BasicBlock* entry_block = BasicBlock::Create(codegen->context(), "entry", *fn);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/null-literal.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/null-literal.h b/be/src/exprs/null-literal.h
index 9297cf6..0a0f9a1 100644
--- a/be/src/exprs/null-literal.h
+++ b/be/src/exprs/null-literal.h
@@ -28,7 +28,7 @@ class TExprNode;
 class NullLiteral: public Expr {
  public:
   NullLiteral(PrimitiveType type) : Expr(type) { }
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn);
 
   virtual impala_udf::BooleanVal GetBooleanVal(ExprContext*, const TupleRow*);
   virtual impala_udf::TinyIntVal GetTinyIntVal(ExprContext*, const TupleRow*);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/scalar-fn-call.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/scalar-fn-call.cc b/be/src/exprs/scalar-fn-call.cc
index b2fce53..17cbd64 100644
--- a/be/src/exprs/scalar-fn-call.cc
+++ b/be/src/exprs/scalar-fn-call.cc
@@ -22,6 +22,11 @@
 #include <llvm/IR/Attributes.h>
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
 
+#include <boost/preprocessor/punctuation/comma_if.hpp>
+#include <boost/preprocessor/repetition/repeat.hpp>
+#include <boost/preprocessor/repetition/enum_params.hpp>
+#include <boost/preprocessor/repetition/repeat_from_to.hpp>
+
 #include "codegen/codegen-anyval.h"
 #include "codegen/llvm-codegen.h"
 #include "exprs/anyval-util.h"
@@ -41,6 +46,9 @@ using namespace impala;
 using namespace impala_udf;
 using namespace strings;
 
+// Maximum number of arguments the interpretation path supports.
+#define MAX_INTERP_ARGS 20
+
 ScalarFnCall::ScalarFnCall(const TExprNode& node)
   : Expr(node),
     vararg_start_idx_(node.__isset.vararg_start_idx ?
@@ -89,33 +97,30 @@ Status ScalarFnCall::Prepare(RuntimeState* state, const RowDescriptor& desc,
       varargs_buffer_size);
 
   // Use the interpreted path and call the builtin without codegen if:
-  // 1. there are char arguments (as they aren't supported yet)
-  // OR
-  // if all of the following conditions are satisfied:
-  // 2. the codegen object hasn't been created yet.
-  // 3. the planner doesn't insist on using codegen.
-  // 4. we're calling a builtin or native UDF with <= 8 non-variadic arguments.
-  //    The templates for UDFs used in the interpretation path support up to 8
-  //    arguments only.
-  //
-  // This saves us the overhead of creating the codegen object when it's not necessary
-  // (i.e., in plan fragments with no codegen-enabled operators).
+  // 1. codegen is disabled or
+  // 2. there are char arguments (as they aren't supported yet)
   //
   // TODO: codegen for char arguments
-  // TODO: remove condition 2 above and put a flag in the RuntimeState to indicate
-  // if codegen should be enabled for the entire fragment.
-  bool skip_codegen = false;
-  if (has_char_arg_or_result) {
-    skip_codegen = true;
-  } else if (!state->codegen_created() && !state->ShouldCodegenExpr()) {
-    skip_codegen = fn_.binary_type != TFunctionBinaryType::IR && NumFixedArgs() <= 8;
-  }
-  if (skip_codegen) {
-    // Builtins with char arguments must still have <= 8 arguments.
-    // TODO: delete when we have codegen for char arguments
-    if (has_char_arg_or_result) {
-      DCHECK(NumFixedArgs() <= 8 && fn_.binary_type == TFunctionBinaryType::BUILTIN);
+  bool codegen_enabled = state->codegen_enabled();
+  if (!codegen_enabled || has_char_arg_or_result) {
+    if (fn_.binary_type == TFunctionBinaryType::IR) {
+      // CHAR or VARCHAR are not supported as input arguments or return values for UDFs.
+      DCHECK(!has_char_arg_or_result && !codegen_enabled);
+      return Status(Substitute("Cannot interpret LLVM IR UDF '$0': Codegen is needed. "
+          "Please set DISABLE_CODEGEN to false.", fn_.name.function_name));
     }
+
+    // The templates for builtin or native UDFs used in the interpretation path
+    // support up to 20 arguments only.
+    if (NumFixedArgs() > MAX_INTERP_ARGS) {
+      DCHECK_EQ(fn_.binary_type, TFunctionBinaryType::NATIVE);
+      // CHAR or VARCHAR are not supported as input arguments or return values for UDFs.
+      DCHECK(!has_char_arg_or_result && !codegen_enabled);
+      return Status(Substitute("Cannot interpret native UDF '$0': number of arguments is "
+          "more than $1. Codegen is needed. Please set DISABLE_CODEGEN to false.",
+          fn_.name.function_name, MAX_INTERP_ARGS));
+    }
+
     Status status = LibCache::instance()->GetSoFunctionPtr(
         fn_.hdfs_location, fn_.scalar_fn.symbol, &scalar_fn_, &cache_entry_);
     if (!status.ok()) {
@@ -127,13 +132,12 @@ Status ScalarFnCall::Prepare(RuntimeState* state, const RowDescriptor& desc,
         DCHECK_EQ(fn_.binary_type, TFunctionBinaryType::NATIVE);
         return Status(Substitute("Problem loading UDF '$0':\n$1",
             fn_.name.function_name, status.GetDetail()));
-        return status;
       }
     }
   } else {
     // If we got here, either codegen is enabled or we need codegen to run this function.
-    LlvmCodeGen* codegen;
-    RETURN_IF_ERROR(state->GetCodegen(&codegen));
+    LlvmCodeGen* codegen = state->codegen();
+    DCHECK(codegen != NULL);
 
     if (fn_.binary_type == TFunctionBinaryType::IR) {
       string local_path;
@@ -146,7 +150,7 @@ Status ScalarFnCall::Prepare(RuntimeState* state, const RowDescriptor& desc,
     }
 
     llvm::Function* ir_udf_wrapper;
-    RETURN_IF_ERROR(GetCodegendComputeFn(state, &ir_udf_wrapper));
+    RETURN_IF_ERROR(GetCodegendComputeFn(codegen, &ir_udf_wrapper));
     // TODO: don't do this for child exprs
     codegen->AddFunctionToJit(ir_udf_wrapper, &scalar_fn_wrapper_);
   }
@@ -260,7 +264,7 @@ bool ScalarFnCall::IsConstant() const {
 //        i32 4,
 //        i64* inttoptr (i64 89111072 to i64*))
 //   ret { i8, double } %result
-Status ScalarFnCall::GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn) {
+Status ScalarFnCall::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) {
   if (ir_compute_fn_ != NULL) {
     *fn = ir_compute_fn_;
     return Status::OK();
@@ -275,11 +279,8 @@ Status ScalarFnCall::GetCodegendComputeFn(RuntimeState* state, llvm::Function**
     }
   }
 
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
-
   llvm::Function* udf;
-  RETURN_IF_ERROR(GetUdf(state, &udf));
+  RETURN_IF_ERROR(GetUdf(codegen, &udf));
 
   // Create wrapper that computes args and calls UDF
   stringstream fn_name;
@@ -326,10 +327,9 @@ Status ScalarFnCall::GetCodegendComputeFn(RuntimeState* state, llvm::Function**
   for (int i = 0; i < GetNumChildren(); ++i) {
     llvm::Function* child_fn = NULL;
     vector<llvm::Value*> child_fn_args;
-    if (state->codegen_enabled()) {
-      // Set 'child_fn' to the codegen'd function, sets child_fn = NULL if codegen fails
-      children_[i]->GetCodegendComputeFn(state, &child_fn);
-    }
+    // Set 'child_fn' to the codegen'd function, sets child_fn = NULL if codegen fails
+    children_[i]->GetCodegendComputeFn(codegen, &child_fn);
+
     if (child_fn == NULL) {
       // Set 'child_fn' to the interpreted function
       child_fn = GetStaticGetValWrapper(children_[i]->type(), codegen);
@@ -401,21 +401,16 @@ Status ScalarFnCall::GetCodegendComputeFn(RuntimeState* state, llvm::Function**
   return Status::OK();
 }
 
-Status ScalarFnCall::GetUdf(RuntimeState* state, llvm::Function** udf) {
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
-
-  // from_utc_timestamp and to_utc_timestamp have inline ASM that cannot be JIT'd.
+Status ScalarFnCall::GetUdf(LlvmCodeGen* codegen, llvm::Function** udf) {
+  // from_utc_timestamp() and to_utc_timestamp() have inline ASM that cannot be JIT'd.
   // TimestampFunctions::AddSub() contains a try/catch which doesn't work in JIT'd
-  // code.  Always use the statically compiled versions of these functions so the
-  // xcompiled versions are not included in the final module to be JIT'd.
-  // TODO: fix this
+  // code. Always use the interpreted version of these functions.
+  // TODO: fix these built-in functions so we don't need 'broken_builtin' below.
   bool broken_builtin = fn_.name.function_name == "from_utc_timestamp" ||
                         fn_.name.function_name == "to_utc_timestamp" ||
                         fn_.scalar_fn.symbol.find("AddSub") != string::npos;
   if (fn_.binary_type == TFunctionBinaryType::NATIVE ||
-      (fn_.binary_type == TFunctionBinaryType::BUILTIN &&
-       (!state->codegen_enabled() || broken_builtin))) {
+      (fn_.binary_type == TFunctionBinaryType::BUILTIN && broken_builtin)) {
     // In this path, we are code that has been statically compiled to assembly.
     // This can either be a UDF implemented in a .so or a builtin using the UDF
     // interface with the code in impalad.
@@ -478,7 +473,6 @@ Status ScalarFnCall::GetUdf(RuntimeState* state, llvm::Function** udf) {
   } else if (fn_.binary_type == TFunctionBinaryType::BUILTIN) {
     // In this path, we're running a builtin with the UDF interface. The IR is
     // in the llvm module.
-    DCHECK(state->codegen_enabled());
     *udf = codegen->GetFunction(fn_.scalar_fn.symbol);
     if (*udf == NULL) {
       // Builtins symbols should exist unless there is a version mismatch.
@@ -522,8 +516,8 @@ Status ScalarFnCall::GetFunction(RuntimeState* state, const string& symbol, void
                                                   &cache_entry_);
   } else {
     DCHECK_EQ(fn_.binary_type, TFunctionBinaryType::IR);
-    LlvmCodeGen* codegen;
-    RETURN_IF_ERROR(state->GetCodegen(&codegen));
+    LlvmCodeGen* codegen = state->codegen();
+    DCHECK(codegen != NULL);
     llvm::Function* ir_fn = codegen->GetFunction(symbol);
     if (ir_fn == NULL) {
       stringstream ss;
@@ -563,122 +557,56 @@ RETURN_TYPE ScalarFnCall::InterpretEval(ExprContext* context, const TupleRow* ro
 
   if (vararg_start_idx_ == -1) {
     switch (children_.size()) {
-      case 0:
-        typedef RETURN_TYPE (*ScalarFn0)(FunctionContext*);
-        return reinterpret_cast<ScalarFn0>(scalar_fn_)(fn_ctx);
-      case 1:
-        typedef RETURN_TYPE (*ScalarFn1)(FunctionContext*, const AnyVal& a1);
-        return reinterpret_cast<ScalarFn1>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0]);
-      case 2:
-        typedef RETURN_TYPE (*ScalarFn2)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2);
-        return reinterpret_cast<ScalarFn2>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0], *(*input_vals)[1]);
-      case 3:
-        typedef RETURN_TYPE (*ScalarFn3)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, const AnyVal& a3);
-        return reinterpret_cast<ScalarFn3>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2]);
-      case 4:
-        typedef RETURN_TYPE (*ScalarFn4)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, const AnyVal& a3, const AnyVal& a4);
-        return reinterpret_cast<ScalarFn4>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3]);
-      case 5:
-        typedef RETURN_TYPE (*ScalarFn5)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5);
-        return reinterpret_cast<ScalarFn5>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
-            *(*input_vals)[4]);
-      case 6:
-        typedef RETURN_TYPE (*ScalarFn6)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
-            const AnyVal& a6);
-        return reinterpret_cast<ScalarFn6>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
-            *(*input_vals)[4], *(*input_vals)[5]);
-      case 7:
-        typedef RETURN_TYPE (*ScalarFn7)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
-            const AnyVal& a6, const AnyVal& a7);
-        return reinterpret_cast<ScalarFn7>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
-            *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6]);
-      case 8:
-        typedef RETURN_TYPE (*ScalarFn8)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
-            const AnyVal& a6, const AnyVal& a7, const AnyVal& a8);
-        return reinterpret_cast<ScalarFn8>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
-            *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], *(*input_vals)[7]);
+
+#define ARG_DECL_ONE(z, n, data) BOOST_PP_COMMA_IF(n) const AnyVal&
+#define ARG_DECL_LIST(n) \
+    FunctionContext* BOOST_PP_COMMA_IF(n) BOOST_PP_REPEAT(n, ARG_DECL_ONE, unused)
+#define ARG_ONE(z, n, data) BOOST_PP_COMMA_IF(n) *(*input_vals)[n]
+#define ARG_LIST(n) fn_ctx BOOST_PP_COMMA_IF(n) BOOST_PP_REPEAT(n, ARG_ONE, unused)
+
+   // Expands to code snippet like the following for X from 0 to 20:
+   // case X:
+   //     typedef RETURN_TYPE (*ScalarFnX)(FunctionContext*, const AnyVal& a1, ...,
+   //         const AnyVal& aX);
+   //     return reinterpret_cast<ScalarFnn>(scalar_fn_)(fn_ctx, *(*input_vals)[0], ...,
+   //         *(*input_vals)[X-1]);
+#define SCALAR_FN_TYPE(n) BOOST_PP_CAT(ScalarFn, n)
+#define INTERP_SCALAR_FN(z, n, unused)                                       \
+      case n:                                                                \
+        typedef RETURN_TYPE (*SCALAR_FN_TYPE(n))(ARG_DECL_LIST(n));          \
+        return reinterpret_cast<SCALAR_FN_TYPE(n)>(scalar_fn_)(ARG_LIST(n));
+
+      // Support up to MAX_INTERP_ARGS arguments in the interpretation path
+      BOOST_PP_REPEAT_FROM_TO(0, BOOST_PP_ADD(MAX_INTERP_ARGS, 1),
+          INTERP_SCALAR_FN, unused)
+
       default:
-        DCHECK(false) << "Interpreted path not implemented. We should have "
-                      << "codegen'd the wrapper";
+        DCHECK(false) << "Interpreted path not implemented.";
     }
-   } else {
+  } else {
     int num_varargs = children_.size() - NumFixedArgs();
     const AnyVal* varargs = reinterpret_cast<AnyVal*>(fn_ctx->impl()->varargs_buffer());
     switch (NumFixedArgs()) {
-      case 0:
-        typedef RETURN_TYPE (*VarargFn0)(FunctionContext*, int num_varargs,
-            const AnyVal* varargs);
-        return reinterpret_cast<VarargFn0>(scalar_fn_)(fn_ctx, num_varargs, varargs);
-      case 1:
-        typedef RETURN_TYPE (*VarargFn1)(FunctionContext*, const AnyVal& a1,
-            int num_varargs, const AnyVal* varargs);
-        return reinterpret_cast<VarargFn1>(scalar_fn_)(fn_ctx, *(*input_vals)[0],
-            num_varargs, varargs);
-      case 2:
-        typedef RETURN_TYPE (*VarargFn2)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, int num_varargs, const AnyVal* varargs);
-        return reinterpret_cast<VarargFn2>(scalar_fn_)(fn_ctx, *(*input_vals)[0],
-            *(*input_vals)[1], num_varargs, varargs);
-      case 3:
-        typedef RETURN_TYPE (*VarargFn3)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, const AnyVal& a3, int num_varargs, const AnyVal* varargs);
-        return reinterpret_cast<VarargFn3>(scalar_fn_)(fn_ctx, *(*input_vals)[0],
-            *(*input_vals)[1], *(*input_vals)[2], num_varargs, varargs);
-      case 4:
-        typedef RETURN_TYPE (*VarargFn4)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, int num_varargs,
-            const AnyVal* varargs);
-        return reinterpret_cast<VarargFn4>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
-            num_varargs, varargs);
-      case 5:
-        typedef RETURN_TYPE (*VarargFn5)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
-            int num_varargs, const AnyVal* varargs);
-        return reinterpret_cast<VarargFn5>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
-            *(*input_vals)[4], num_varargs, varargs);
-      case 6:
-        typedef RETURN_TYPE (*VarargFn6)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
-            const AnyVal& a6, int num_varargs, const AnyVal* varargs);
-        return reinterpret_cast<VarargFn6>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
-            *(*input_vals)[4], *(*input_vals)[5], num_varargs, varargs);
-      case 7:
-        typedef RETURN_TYPE (*VarargFn7)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
-            const AnyVal& a6, const AnyVal& a7, int num_varargs, const AnyVal* varargs);
-        return reinterpret_cast<VarargFn7>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
-            *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], num_varargs, varargs);
-      case 8:
-        typedef RETURN_TYPE (*VarargFn8)(FunctionContext*, const AnyVal& a1,
-            const AnyVal& a2, const AnyVal& a3, const AnyVal& a4, const AnyVal& a5,
-            const AnyVal& a6, const AnyVal& a7, const AnyVal& a8, int num_varargs,
-            const AnyVal* varargs);
-        return reinterpret_cast<VarargFn8>(scalar_fn_)(fn_ctx,
-            *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], *(*input_vals)[3],
-            *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], *(*input_vals)[7],
+
+   // Expands to code snippet like the following for X from 0 to 20:
+   // case X:
+   //     typedef RETURN_TYPE (*VarargFnX)(FunctionContext*, const AnyVal& a1, ...,
+   //         const AnyVal& aX, int num_varargs, const AnyVal* varargs);
+   //     return reinterpret_cast<VarargFnX>(scalar_fn_)(fn_ctx, *(*input_vals)[0], ...,
+   //         *(*input_vals)[X-1], num_varargs, varargs);
+#define SCALAR_VARARG_FN_TYPE(n) BOOST_PP_CAT(VarargFn, n)
+#define INTERP_SCALAR_VARARG_FN(z, n, text)                                        \
+      case n:                                                                      \
+        typedef RETURN_TYPE (*SCALAR_VARARG_FN_TYPE(n))(ARG_DECL_LIST(n), int,     \
+            const AnyVal*);                                                        \
+        return reinterpret_cast<SCALAR_VARARG_FN_TYPE(n)>(scalar_fn_)(ARG_LIST(n), \
             num_varargs, varargs);
+
+      BOOST_PP_REPEAT_FROM_TO(0, BOOST_PP_ADD(MAX_INTERP_ARGS, 1),
+         INTERP_SCALAR_VARARG_FN, unused)
+
       default:
-        DCHECK(false) << "Interpreted path not implemented. We should have "
-                      << "codegen'd the wrapper";
+        DCHECK(false) << "Interpreted path not implemented.";
     }
   }
   return RETURN_TYPE::null();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/scalar-fn-call.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/scalar-fn-call.h b/be/src/exprs/scalar-fn-call.h
index 8cc8a19..e48b9df 100644
--- a/be/src/exprs/scalar-fn-call.h
+++ b/be/src/exprs/scalar-fn-call.h
@@ -60,7 +60,7 @@ class ScalarFnCall: public Expr {
                          ExprContext* context);
   virtual Status Open(RuntimeState* state, ExprContext* context,
       FunctionContext::FunctionStateScope scope = FunctionContext::FRAGMENT_LOCAL);
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn);
   virtual void Close(RuntimeState* state, ExprContext* context,
       FunctionContext::FunctionStateScope scope = FunctionContext::FRAGMENT_LOCAL);
 
@@ -107,7 +107,7 @@ class ScalarFnCall: public Expr {
   }
 
   /// Loads the native or IR function from HDFS and puts the result in *udf.
-  Status GetUdf(RuntimeState* state, llvm::Function** udf);
+  Status GetUdf(LlvmCodeGen* codegen, llvm::Function** udf);
 
   /// Loads the native or IR function 'symbol' from HDFS and puts the result in *fn.
   /// If the function is loaded from an IR module, it cannot be called until the module

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/slot-ref.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/slot-ref.cc b/be/src/exprs/slot-ref.cc
index 69f555b..0a766fa 100644
--- a/be/src/exprs/slot-ref.cc
+++ b/be/src/exprs/slot-ref.cc
@@ -155,7 +155,7 @@ string SlotRef::DebugString() const {
 //
 // TODO: We could generate a typed struct (and not a char*) for Tuple for llvm.  We know
 // the types from the TupleDesc.  It will likey make this code simpler to reason about.
-Status SlotRef::GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn) {
+Status SlotRef::GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn) {
   if (type_.type == TYPE_CHAR) {
     *fn = NULL;
     return Status("Codegen for Char not supported.");
@@ -166,9 +166,6 @@ Status SlotRef::GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn) {
   }
 
   DCHECK_EQ(GetNumChildren(), 0);
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
-
   // SlotRefs are based on the slot_id and tuple_idx.  Combine them to make a
   // query-wide unique id. We also need to combine whether the tuple is nullable. For
   // example, in an outer join the scan node could have the same tuple id and slot id

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/slot-ref.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/slot-ref.h b/be/src/exprs/slot-ref.h
index d3d8368..6ca3c89 100644
--- a/be/src/exprs/slot-ref.h
+++ b/be/src/exprs/slot-ref.h
@@ -43,7 +43,7 @@ class SlotRef : public Expr {
   virtual int GetSlotIds(std::vector<SlotId>* slot_ids) const;
   const SlotId& slot_id() const { return slot_id_; }
 
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn);
 
   virtual impala_udf::BooleanVal GetBooleanVal(ExprContext* context, const TupleRow*);
   virtual impala_udf::TinyIntVal GetTinyIntVal(ExprContext* context, const TupleRow*);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/tuple-is-null-predicate.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/tuple-is-null-predicate.cc b/be/src/exprs/tuple-is-null-predicate.cc
index dc54273..dd20e8a 100644
--- a/be/src/exprs/tuple-is-null-predicate.cc
+++ b/be/src/exprs/tuple-is-null-predicate.cc
@@ -59,9 +59,9 @@ Status TupleIsNullPredicate::Prepare(RuntimeState* state, const RowDescriptor& r
   return Status::OK();
 }
 
-Status TupleIsNullPredicate::GetCodegendComputeFn(RuntimeState* state,
-                                                  llvm::Function** fn) {
-  return GetCodegendComputeFnWrapper(state, fn);
+Status TupleIsNullPredicate::GetCodegendComputeFn(LlvmCodeGen* codegen,
+    llvm::Function** fn) {
+  return GetCodegendComputeFnWrapper(codegen, fn);
 }
 
 string TupleIsNullPredicate::DebugString() const {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exprs/tuple-is-null-predicate.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/tuple-is-null-predicate.h b/be/src/exprs/tuple-is-null-predicate.h
index 8c48c3a..e20a195 100644
--- a/be/src/exprs/tuple-is-null-predicate.h
+++ b/be/src/exprs/tuple-is-null-predicate.h
@@ -38,7 +38,7 @@ class TupleIsNullPredicate: public Predicate {
 
   virtual Status Prepare(RuntimeState* state, const RowDescriptor& row_desc,
                          ExprContext* ctx);
-  virtual Status GetCodegendComputeFn(RuntimeState* state, llvm::Function** fn);
+  virtual Status GetCodegendComputeFn(LlvmCodeGen* codegen, llvm::Function** fn);
   virtual std::string DebugString() const;
 
   virtual bool IsConstant() const { return false; }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/runtime/plan-fragment-executor.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/plan-fragment-executor.cc b/be/src/runtime/plan-fragment-executor.cc
index 5269fe5..a791e61 100644
--- a/be/src/runtime/plan-fragment-executor.cc
+++ b/be/src/runtime/plan-fragment-executor.cc
@@ -201,10 +201,14 @@ Status PlanFragmentExecutor::PrepareInternal(const TExecPlanFragmentParams& requ
     scan_node->SetScanRanges(scan_ranges);
   }
 
+  RuntimeState* state = runtime_state_.get();
   RuntimeProfile::Counter* prepare_timer = ADD_TIMER(profile(), "ExecTreePrepareTime");
   {
     SCOPED_TIMER(prepare_timer);
-    RETURN_IF_ERROR(exec_tree_->Prepare(runtime_state_.get()));
+    // Until IMPALA-4233 is fixed, we still need to create the codegen object before
+    // Prepare() as ScalarFnCall::Prepare() may codegen.
+    if (state->codegen_enabled()) RETURN_IF_ERROR(state->CreateCodegen());
+    RETURN_IF_ERROR(exec_tree_->Prepare(state));
   }
 
   PrintVolumeIds(fragment_instance_ctx.per_node_scan_ranges);
@@ -232,6 +236,8 @@ Status PlanFragmentExecutor::PrepareInternal(const TExecPlanFragmentParams& requ
     ReleaseThreadToken();
   }
 
+  if (state->codegen_enabled()) exec_tree_->Codegen(state);
+
   // set up profile counters
   profile()->AddChild(exec_tree_->runtime_profile());
   rows_produced_counter_ =
@@ -246,12 +252,10 @@ Status PlanFragmentExecutor::PrepareInternal(const TExecPlanFragmentParams& requ
 }
 
 void PlanFragmentExecutor::OptimizeLlvmModule() {
-  if (!runtime_state_->codegen_created()) return;
-  LlvmCodeGen* codegen;
-  Status status = runtime_state_->GetCodegen(&codegen, /* initalize */ false);
-  DCHECK(status.ok());
+  if (!runtime_state_->codegen_enabled()) return;
+  LlvmCodeGen* codegen = runtime_state_->codegen();
   DCHECK(codegen != NULL);
-  status = codegen->FinalizeModule();
+  Status status = codegen->FinalizeModule();
   if (!status.ok()) {
     stringstream ss;
     ss << "Error with codegen for this query: " << status.GetDetail();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/runtime/runtime-state.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-state.cc b/be/src/runtime/runtime-state.cc
index a05b3ef..00ccea1 100644
--- a/be/src/runtime/runtime-state.cc
+++ b/be/src/runtime/runtime-state.cc
@@ -73,7 +73,6 @@ RuntimeState::RuntimeState(
     fragment_params_(fragment_params),
     now_(new TimestampValue(
         query_ctx().now_string.c_str(), query_ctx().now_string.size())),
-    codegen_expr_(false),
     profile_(obj_pool_.get(), "Fragment " + PrintId(fragment_ctx().fragment_instance_id)),
     is_cancelled_(false),
     root_node_id_(-1) {
@@ -86,7 +85,6 @@ RuntimeState::RuntimeState(const TQueryCtx& query_ctx)
     now_(new TimestampValue(query_ctx.now_string.c_str(),
         query_ctx.now_string.size())),
     exec_env_(ExecEnv::GetInstance()),
-    codegen_expr_(false),
     profile_(obj_pool_.get(), "<unnamed>"),
     is_cancelled_(false),
     root_node_id_(-1) {
@@ -286,12 +284,6 @@ Status RuntimeState::CheckQueryState() {
   return GetQueryStatus();
 }
 
-Status RuntimeState::GetCodegen(LlvmCodeGen** codegen, bool initialize) {
-  if (codegen_.get() == NULL && initialize) RETURN_IF_ERROR(CreateCodegen());
-  *codegen = codegen_.get();
-  return Status::OK();
-}
-
 void RuntimeState::AcquireReaderContext(DiskIoRequestContext* reader_context) {
   boost::lock_guard<SpinLock> l(reader_contexts_lock_);
   reader_contexts_.push_back(reader_context);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/runtime/runtime-state.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-state.h b/be/src/runtime/runtime-state.h
index 3496d9c..0d612db 100644
--- a/be/src/runtime/runtime-state.h
+++ b/be/src/runtime/runtime-state.h
@@ -158,9 +158,8 @@ class RuntimeState {
   /// Returns true if codegen is enabled for this query.
   bool codegen_enabled() const { return !query_options().disable_codegen; }
 
-  /// Returns true if the codegen object has been created. Note that this may return false
-  /// even when codegen is enabled if nothing has been codegen'd.
-  bool codegen_created() const { return codegen_.get() != NULL; }
+  /// Returns the LlvmCodeGen object for this fragment instance.
+  LlvmCodeGen* codegen() { return codegen_.get(); }
 
   /// Takes ownership of a scan node's reader context and plan fragment executor will call
   /// UnregisterReaderContexts() to unregister it when the fragment is closed. The IO
@@ -170,18 +169,6 @@ class RuntimeState {
   /// Unregisters all reader contexts acquired through AcquireReaderContext().
   void UnregisterReaderContexts();
 
-  /// Returns codegen_ in 'codegen'. If 'initialize' is true, codegen_ will be created if
-  /// it has not been initialized by a previous call already. If 'initialize' is false,
-  /// 'codegen' will be set to NULL if codegen_ has not been initialized.
-  Status GetCodegen(LlvmCodeGen** codegen, bool initialize = true);
-
-  /// Returns true if codegen should be used for expr evaluation in this plan fragment.
-  bool ShouldCodegenExpr() { return codegen_expr_; }
-
-  /// Records that this fragment should use codegen for expr evaluation whenever
-  /// applicable if codegen is not disabled.
-  void SetCodegenExpr() { codegen_expr_ = codegen_enabled(); }
-
   BufferedBlockMgr* block_mgr() {
     DCHECK(block_mgr_.get() != NULL);
     return block_mgr_.get();
@@ -267,6 +254,9 @@ class RuntimeState {
   /// execution doesn't continue if the query terminates abnormally.
   Status CheckQueryState();
 
+  /// Create a codegen object accessible via codegen() if it doesn't exist already.
+  Status CreateCodegen();
+
  private:
   /// Allow TestEnv to set block_mgr manually for testing.
   friend class TestEnv;
@@ -274,10 +264,6 @@ class RuntimeState {
   /// Set per-fragment state.
   Status Init(ExecEnv* exec_env);
 
-  /// Create a codegen object in codegen_. No-op if it has already been called. This is
-  /// created on first use.
-  Status CreateCodegen();
-
   /// Use a custom block manager for the query for testing purposes.
   void set_block_mgr(const std::shared_ptr<BufferedBlockMgr>& block_mgr) {
     block_mgr_ = block_mgr;
@@ -306,9 +292,6 @@ class RuntimeState {
   ExecEnv* exec_env_;
   boost::scoped_ptr<LlvmCodeGen> codegen_;
 
-  /// True if this fragment should force codegen for expr evaluation.
-  bool codegen_expr_;
-
   /// Thread resource management object for this fragment's execution.  The runtime
   /// state is responsible for returning this pool to the thread mgr.
   ThreadResourceMgr::ResourcePool* resource_pool_;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/runtime/sorted-run-merger.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/sorted-run-merger.h b/be/src/runtime/sorted-run-merger.h
index dade968..e81ca88 100644
--- a/be/src/runtime/sorted-run-merger.h
+++ b/be/src/runtime/sorted-run-merger.h
@@ -92,7 +92,7 @@ class SortedRunMerger {
   std::vector<SortedRunWrapper*> min_heap_;
 
   /// Row comparator. Returns true if lhs < rhs.
-  TupleRowComparator comparator_;
+  const TupleRowComparator& comparator_;
 
   /// Descriptor for the rows provided by the input runs. Owned by the exec-node through
   /// which this merger was created.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/runtime/sorter.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/sorter.cc b/be/src/runtime/sorter.cc
index 6757be0..513aba2 100644
--- a/be/src/runtime/sorter.cc
+++ b/be/src/runtime/sorter.cc
@@ -420,7 +420,7 @@ class Sorter::TupleSorter {
   const int tuple_size_;
 
   /// Tuple comparator with method Less() that returns true if lhs < rhs.
-  const TupleRowComparator comparator_;
+  const TupleRowComparator& comparator_;
 
   /// Number of times comparator_.Less() can be invoked again before
   /// comparator_.FreeLocalAllocations() needs to be called.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/runtime/sorter.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/sorter.h b/be/src/runtime/sorter.h
index ed23b75..54e2d22 100644
--- a/be/src/runtime/sorter.h
+++ b/be/src/runtime/sorter.h
@@ -160,7 +160,7 @@ class Sorter {
   RuntimeState* const state_;
 
   /// In memory sorter and less-than comparator.
-  TupleRowComparator compare_less_than_;
+  const TupleRowComparator& compare_less_than_;
   boost::scoped_ptr<TupleSorter> in_mem_tuple_sorter_;
 
   /// Block manager object used to allocate, pin and release runs. Not owned by Sorter.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/runtime/tuple.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc
index 656621d..7fa8b2b 100644
--- a/be/src/runtime/tuple.cc
+++ b/be/src/runtime/tuple.cc
@@ -306,19 +306,17 @@ void Tuple::MaterializeExprs(
 //   ; ----- end CodegenAnyVal::WriteToSlot() -------------------------------------------
 //   ret void
 // }
-Status Tuple::CodegenMaterializeExprs(RuntimeState* state, bool collect_string_vals,
+Status Tuple::CodegenMaterializeExprs(LlvmCodeGen* codegen, bool collect_string_vals,
     const TupleDescriptor& desc, const vector<ExprContext*>& materialize_expr_ctxs,
     MemPool* pool, Function** fn) {
   DCHECK(!collect_string_vals) << "CodegenMaterializeExprs: collect_string_vals NYI";
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
   SCOPED_TIMER(codegen->codegen_timer());
   LLVMContext& context = codegen->context();
 
   // Codegen each compute function from materialize_expr_ctxs
   Function* materialize_expr_fns[materialize_expr_ctxs.size()];
   for (int i = 0; i < materialize_expr_ctxs.size(); ++i) {
-    Status status = materialize_expr_ctxs[i]->root()->GetCodegendComputeFn(state,
+    Status status = materialize_expr_ctxs[i]->root()->GetCodegendComputeFn(codegen,
         &materialize_expr_fns[i]);
     if (!status.ok()) {
       stringstream ss;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/runtime/tuple.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.h b/be/src/runtime/tuple.h
index b95492c..82efc24 100644
--- a/be/src/runtime/tuple.h
+++ b/be/src/runtime/tuple.h
@@ -167,7 +167,7 @@ class Tuple {
   /// separate functions for the non-NULL and NULL cases, i.e., the 'pool' argument of the
   /// generated function is ignored. There are two different MaterializeExprs symbols to
   /// differentiate these cases when we replace the function calls during codegen.
-  static Status CodegenMaterializeExprs(RuntimeState* state, bool collect_string_vals,
+  static Status CodegenMaterializeExprs(LlvmCodeGen* codegen, bool collect_string_vals,
       const TupleDescriptor& desc, const vector<ExprContext*>& materialize_expr_ctxs,
       MemPool* pool, llvm::Function** fn);
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/service/fe-support.cc
----------------------------------------------------------------------
diff --git a/be/src/service/fe-support.cc b/be/src/service/fe-support.cc
index 996770c..31aa488 100644
--- a/be/src/service/fe-support.cc
+++ b/be/src/service/fe-support.cc
@@ -84,31 +84,45 @@ Java_org_apache_impala_service_FeSupport_NativeEvalConstExprs(
 
   DeserializeThriftMsg(env, thrift_expr_batch, &expr_batch);
   DeserializeThriftMsg(env, thrift_query_ctx_bytes, &query_ctx);
-  query_ctx.request.query_options.disable_codegen = true;
+  vector<TExpr>& texprs = expr_batch.exprs;
+
+  // Codegen is almost always disabled in this path. The only exception is when the
+  // expression contains IR UDF which cannot be interpreted. Enable codegen in this
+  // case if codegen is not disabled in the query option. Otherwise, we will let it
+  // fail in ScalarFnCall::Prepare().
+  bool need_codegen = false;
+  for (const TExpr& texpr : texprs) {
+    if (Expr::NeedCodegen(texpr)) {
+      need_codegen = true;
+      break;
+    }
+  }
+  query_ctx.request.query_options.disable_codegen |= !need_codegen;
   RuntimeState state(query_ctx);
+  if (!query_ctx.request.query_options.disable_codegen) {
+    THROW_IF_ERROR_RET(
+        state.CreateCodegen(), env, JniUtil::internal_exc_class(), result_bytes);
+  }
 
   THROW_IF_ERROR_RET(jni_frame.push(env), env, JniUtil::internal_exc_class(),
-                     result_bytes);
+      result_bytes);
   // Exprs can allocate memory so we need to set up the mem trackers before
   // preparing/running the exprs.
   state.InitMemTrackers(TUniqueId(), NULL, -1);
 
-  vector<TExpr>& texprs = expr_batch.exprs;
   // Prepare the exprs
   vector<ExprContext*> expr_ctxs;
-  for (vector<TExpr>::iterator it = texprs.begin(); it != texprs.end(); it++) {
+  for (const TExpr& texpr : texprs) {
     ExprContext* ctx;
-    THROW_IF_ERROR_RET(Expr::CreateExprTree(&obj_pool, *it, &ctx), env,
-                       JniUtil::internal_exc_class(), result_bytes);
+    THROW_IF_ERROR_RET(Expr::CreateExprTree(&obj_pool, texpr, &ctx), env,
+        JniUtil::internal_exc_class(), result_bytes);
     THROW_IF_ERROR_RET(ctx->Prepare(&state, RowDescriptor(), state.query_mem_tracker()),
-                       env, JniUtil::internal_exc_class(), result_bytes);
+        env, JniUtil::internal_exc_class(), result_bytes);
     expr_ctxs.push_back(ctx);
   }
 
-  if (state.codegen_created()) {
-    // Finalize the module so any UDF functions are jit'd
-    LlvmCodeGen* codegen = NULL;
-    state.GetCodegen(&codegen, /* initialize */ false);
+  if (!query_ctx.request.query_options.disable_codegen) {
+    LlvmCodeGen* codegen = state.codegen();
     DCHECK(codegen != NULL);
     codegen->EnableOptimizations(false);
     codegen->FinalizeModule();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/service/query-options.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index 0823981..eb1d2f9 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -304,6 +304,7 @@ Status impala::SetQueryOption(const string& key, const string& value,
         query_options->__set_schedule_random_replica(
             iequals(value, "true") || iequals(value, "1"));
         break;
+      // TODO: remove this query option (IMPALA-4319).
       case TImpalaQueryOptions::SCAN_NODE_CODEGEN_THRESHOLD:
         query_options->__set_scan_node_codegen_threshold(atol(value.c_str()));
         break;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/testutil/test-udfs.cc
----------------------------------------------------------------------
diff --git a/be/src/testutil/test-udfs.cc b/be/src/testutil/test-udfs.cc
index fa86a4a..efdfa0a 100644
--- a/be/src/testutil/test-udfs.cc
+++ b/be/src/testutil/test-udfs.cc
@@ -333,3 +333,32 @@ IntVal EightArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
     const IntVal& v7, const IntVal& v8) {
   return IntVal(v1.val + v2.val + v3.val + v4.val + v5.val + v6.val + v7.val + v8.val);
 }
+
+IntVal NineArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
+    const IntVal& v3, const IntVal& v4, const IntVal& v5, const IntVal& v6,
+    const IntVal& v7, const IntVal& v8, const IntVal& v9) {
+  return IntVal(v1.val + v2.val + v3.val + v4.val + v5.val + v6.val + v7.val + v8.val +
+      v9.val);
+}
+
+IntVal TwentyArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
+    const IntVal& v3, const IntVal& v4, const IntVal& v5, const IntVal& v6,
+    const IntVal& v7, const IntVal& v8, const IntVal& v9, const IntVal& v10,
+    const IntVal& v11, const IntVal& v12, const IntVal& v13, const IntVal& v14,
+    const IntVal& v15, const IntVal& v16, const IntVal& v17, const IntVal& v18,
+    const IntVal& v19, const IntVal& v20) {
+  return IntVal(v1.val + v2.val + v3.val + v4.val + v5.val + v6.val + v7.val + v8.val +
+      v9.val + v10.val + v11.val + v12.val + v13.val + v14.val + v15.val + v16.val +
+      v17.val + v18.val + v19.val + v20.val);
+}
+
+IntVal TwentyOneArgs(FunctionContext* context, const IntVal& v1, const IntVal& v2,
+    const IntVal& v3, const IntVal& v4, const IntVal& v5, const IntVal& v6,
+    const IntVal& v7, const IntVal& v8, const IntVal& v9, const IntVal& v10,
+    const IntVal& v11, const IntVal& v12, const IntVal& v13, const IntVal& v14,
+    const IntVal& v15, const IntVal& v16, const IntVal& v17, const IntVal& v18,
+    const IntVal& v19, const IntVal& v20, const IntVal& v21) {
+  return IntVal(v1.val + v2.val + v3.val + v4.val + v5.val + v6.val + v7.val + v8.val +
+      v9.val + v10.val + v11.val + v12.val + v13.val + v14.val + v15.val + v16.val +
+      v17.val + v18.val + v19.val + v20.val + v21.val);
+}

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/util/tuple-row-compare.cc
----------------------------------------------------------------------
diff --git a/be/src/util/tuple-row-compare.cc b/be/src/util/tuple-row-compare.cc
index 6f68b9e..69892ae 100644
--- a/be/src/util/tuple-row-compare.cc
+++ b/be/src/util/tuple-row-compare.cc
@@ -51,10 +51,9 @@ int TupleRowComparator::CompareInterpreted(
 
 Status TupleRowComparator::Codegen(RuntimeState* state) {
   Function* fn;
-  RETURN_IF_ERROR(CodegenCompare(state, &fn));
-  LlvmCodeGen* codegen;
-  bool got_codegen = state->GetCodegen(&codegen).ok();
-  DCHECK(got_codegen);
+  LlvmCodeGen* codegen = state->codegen();
+  DCHECK(codegen != NULL);
+  RETURN_IF_ERROR(CodegenCompare(codegen, &fn));
   codegend_compare_fn_ = state->obj_pool()->Add(new CompareFn);
   codegen->AddFunctionToJit(fn, reinterpret_cast<void**>(codegend_compare_fn_));
   return Status::OK();
@@ -175,9 +174,7 @@ Status TupleRowComparator::Codegen(RuntimeState* state) {
 // next_key2:                                        ; preds = %rhs_non_null12, %next_key
 //   ret i32 0
 // }
-Status TupleRowComparator::CodegenCompare(RuntimeState* state, Function** fn) {
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
+Status TupleRowComparator::CodegenCompare(LlvmCodeGen* codegen, Function** fn) {
   SCOPED_TIMER(codegen->codegen_timer());
   LLVMContext& context = codegen->context();
 
@@ -188,7 +185,7 @@ Status TupleRowComparator::CodegenCompare(RuntimeState* state, Function** fn) {
   Function* key_fns[key_expr_ctxs_lhs_.size()];
   for (int i = 0; i < key_expr_ctxs_lhs_.size(); ++i) {
     Status status =
-        key_expr_ctxs_lhs_[i]->root()->GetCodegendComputeFn(state, &key_fns[i]);
+        key_expr_ctxs_lhs_[i]->root()->GetCodegendComputeFn(codegen, &key_fns[i]);
     if (!status.ok()) {
       return Status(Substitute("Could not codegen TupleRowComparator::Compare(): $0",
           status.GetDetail()));

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/util/tuple-row-compare.h
----------------------------------------------------------------------
diff --git a/be/src/util/tuple-row-compare.h b/be/src/util/tuple-row-compare.h
index a1d8d72..e2beed1 100644
--- a/be/src/util/tuple-row-compare.h
+++ b/be/src/util/tuple-row-compare.h
@@ -127,7 +127,7 @@ class TupleRowComparator {
   /// Codegen Compare(). Returns a non-OK status if codegen is unsuccessful.
   /// TODO: inline this at codegen'd callsites instead of indirectly calling via function
   /// pointer.
-  Status CodegenCompare(RuntimeState* state, llvm::Function** fn);
+  Status CodegenCompare(LlvmCodeGen* codegen, llvm::Function** fn);
 
   /// References to ExprContexts managed by SortExecExprs. The lhs ExprContexts must
   /// be created and prepared before the TupleRowCompator is constructed, but the rhs

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/common/thrift/PlanNodes.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/PlanNodes.thrift b/common/thrift/PlanNodes.thrift
index 49fcfbb..6f863b2 100644
--- a/common/thrift/PlanNodes.thrift
+++ b/common/thrift/PlanNodes.thrift
@@ -196,17 +196,14 @@ struct THdfsScanNode {
   // Option to control tie breaks during scan scheduling.
   4: optional bool random_replica
 
-  // Option to control whether codegen should be used for conjuncts evaluation.
-  5: optional bool codegen_conjuncts
-
   // Number of header lines to skip at the beginning of each file of this table. Only set
   // for hdfs text files.
-  6: optional i32 skip_header_line_count
+  5: optional i32 skip_header_line_count
 
   // Indicates whether the MT scan node implementation should be used.
   // If this is true then the MT_DOP query option must be > 0.
   // TODO: Remove this option when the MT scan node supports all file formats.
-  7: optional bool use_mt_scan_node
+  6: optional bool use_mt_scan_node
 }
 
 struct TDataSourceScanNode {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 3d52aa4..95d86e3 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -105,9 +105,6 @@ public class HdfsScanNode extends ScanNode {
   // Total number of bytes from partitions_
   private long totalBytes_ = 0;
 
-  // True if this scan node should use codegen for evaluting conjuncts.
-  private boolean codegenConjuncts_;
-
   // True if this scan node should use the MT implementation in the backend.
   private boolean useMtScanNode_;
 
@@ -188,9 +185,6 @@ public class HdfsScanNode extends ScanNode {
 
     // TODO: do we need this?
     assignedConjuncts_ = analyzer.getAssignedConjuncts();
-
-    // Decide whether codegen should be used for evaluating conjuncts.
-    checkForCodegen(analyzer);
   }
 
   /**
@@ -505,39 +499,6 @@ public class HdfsScanNode extends ScanNode {
         " clusterNodes=" + cluster.numNodes());
   }
 
-  /**
-   * Approximate the cost of evaluating all conjuncts bound by this node by
-   * aggregating total number of nodes in expression trees of all conjuncts.
-   */
-  private int computeConjunctsCost() {
-    int cost = 0;
-    for (Expr expr: getConjuncts()) {
-      cost += expr.numNodes();
-    }
-    for (List<Expr> exprs: collectionConjuncts_.values()) {
-      for (Expr expr: exprs) {
-        cost += expr.numNodes();
-      }
-    }
-    return cost;
-  }
-
-  /**
-   * Scan node is not a codegen-enabled operator. Decide whether to use codegen for
-   * conjuncts evaluation by estimating the cost of interpretation.
-   */
-  private void checkForCodegen(Analyzer analyzer) {
-    long conjunctsCost = computeConjunctsCost();
-    long inputCardinality = getInputCardinality();
-    long threshold =
-        analyzer.getQueryCtx().getRequest().query_options.scan_node_codegen_threshold;
-    if (inputCardinality == -1) {
-      codegenConjuncts_ = conjunctsCost > 0;
-    } else {
-      codegenConjuncts_ = inputCardinality * conjunctsCost > threshold;
-    }
-  }
-
   @Override
   protected void toThrift(TPlanNode msg) {
     msg.hdfs_scan_node = new THdfsScanNode(desc_.getId().asInt());
@@ -546,7 +507,6 @@ public class HdfsScanNode extends ScanNode {
     }
     msg.hdfs_scan_node.setRandom_replica(randomReplica_);
     msg.node_type = TPlanNodeType.HDFS_SCAN_NODE;
-    msg.hdfs_scan_node.setCodegen_conjuncts(codegenConjuncts_);
     if (!collectionConjuncts_.isEmpty()) {
       Map<Integer, List<TExpr>> tcollectionConjuncts = Maps.newLinkedHashMap();
       for (Map.Entry<TupleDescriptor, List<Expr>> entry:

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test b/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test
index f3ff3f9..948d584 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/udf-errors.test
@@ -24,12 +24,60 @@ symbol='FnDoesNotExist';
 Could not load binary: $FILESYSTEM_PREFIX/test-warehouse/not-a-real-file.so
 ====
 ---- QUERY
+# This test is run with codegen disabled. Interpretation only handles up to 20 arguments.
+create function if not exists udf_test_errors.twenty_args(int, int, int, int, int, int,
+    int, int, int, int, int, int, int, int, int, int, int, int, int, int) returns int
+location '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so'
+symbol='TwentyArgs';
+---- RESULTS
+====
+---- QUERY
+# Verifies that interpretation can support up to 20 arguments
+select udf_test_errors.twenty_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20);
+---- TYPES
+INT
+---- RESULTS
+210
+====
+---- QUERY
+# This test is run with codegen disabled. Interpretation only handles up to 20 arguments.
+create function if not exists udf_test_errors.twenty_one_args(int, int, int, int, int, int,
+    int, int, int, int, int, int, int, int, int, int, int, int, int, int, int) returns int
+location '$FILESYSTEM_PREFIX/test-warehouse/libTestUdfs.so'
+symbol='TwentyOneArgs';
+---- RESULTS
+====
+---- QUERY
+# Verifies that interpretation fails with more than 20 arguments.
+select udf_test_errors.twenty_one_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21);
+---- CATCH
+Cannot interpret native UDF 'twenty_one_args': number of arguments is more than 20. Codegen is needed. Please set DISABLE_CODEGEN to false.
+====
+---- QUERY
+# This test is run with codegen disabled. IR UDF will fail.
+create function if not exists udf_test_errors.nine_args_ir(int, int, int, int, int, int,
+    int, int, int) returns int
+location '$FILESYSTEM_PREFIX/test-warehouse/test-udfs.ll'
+symbol='NineArgs';
+---- RESULTS
+====
+---- QUERY
+select udf_test_errors.nine_args_ir(1,2,3,4,5,6,7,8,9);
+---- CATCH
+Cannot interpret LLVM IR UDF 'nine_args_ir': Codegen is needed. Please set DISABLE_CODEGEN to false.
+====
+---- QUERY
 drop database udf_test_errors;
 ---- CATCH
 Cannot drop non-empty database: udf_test_errors
 ====
 ---- QUERY
 drop function udf_test_errors.hive_pi();
+drop function udf_test_errors.twenty_args(int, int, int, int, int, int, int, int,
+    int, int, int, int, int, int, int, int, int, int, int, int);
+drop function udf_test_errors.twenty_one_args(int, int, int, int, int, int, int, int,
+    int, int, int, int, int, int, int, int, int, int, int, int, int);
+drop function udf_test_errors.nine_args_ir(int, int, int, int, int, int, int, int, int);
 drop database udf_test_errors;
 ---- RESULTS
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/testdata/workloads/functional-query/queries/QueryTest/udf.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/udf.test b/testdata/workloads/functional-query/queries/QueryTest/udf.test
index 645c321..5cbbecb 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/udf.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/udf.test
@@ -529,3 +529,17 @@ INT
 ---- RESULTS
 36
 ====
+---- QUERY
+select twenty_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20);
+---- TYPES
+INT
+---- RESULTS
+210
+====
+---- QUERY
+select twenty_one_args(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21);
+---- TYPES
+INT
+---- RESULTS
+231
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/tests/query_test/test_udfs.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py
index 02bdf4f..2658351 100644
--- a/tests/query_test/test_udfs.py
+++ b/tests/query_test/test_udfs.py
@@ -73,6 +73,11 @@ class TestUdfs(ImpalaTestSuite):
       self.run_test_case('QueryTest/udf-init-close', vector, use_db=database)
 
   def test_udf_errors(self, vector):
+    # Disable codegen to force interpretation path to be taken.
+    # Aim to exercise two failure cases:
+    # 1. too many arguments
+    # 2. IR UDF
+    vector.get_value('exec_option')['disable_codegen'] = 1
     self.run_test_case('QueryTest/udf-errors', vector)
 
   def test_udf_invalid_symbol(self, vector):
@@ -371,6 +376,11 @@ drop function if exists {database}.five_args(int, int, int, int, int);
 drop function if exists {database}.six_args(int, int, int, int, int, int);
 drop function if exists {database}.seven_args(int, int, int, int, int, int, int);
 drop function if exists {database}.eight_args(int, int, int, int, int, int, int, int);
+drop function if exists {database}.nine_args(int, int, int, int, int, int, int, int, int);
+drop function if exists {database}.twenty_args(int, int, int, int, int, int, int, int, int,
+    int, int, int, int, int, int, int, int, int, int, int);
+drop function if exists {database}.twenty_one_args(int, int, int, int, int, int, int, int,
+    int, int, int, int, int, int, int, int, int, int, int, int, int);
 
 create database if not exists {database};
 
@@ -493,4 +503,12 @@ location '{location}' symbol='SevenArgs';
 
 create function {database}.eight_args(int, int, int, int, int, int, int, int) returns int
 location '{location}' symbol='EightArgs';
+
+create function {database}.twenty_args(int, int, int, int, int, int, int, int, int, int,
+    int, int, int, int, int, int, int, int, int, int) returns int
+location '{location}' symbol='TwentyArgs';
+
+create function {database}.twenty_one_args(int, int, int, int, int, int, int, int, int, int,
+    int, int, int, int, int, int, int, int, int, int, int) returns int
+location '{location}' symbol='TwentyOneArgs';
 """

[2/2] incubator-impala git commit: IMPALA-4080, IMPALA-3638: Introduce ExecNode::Codegen()

Posted by kw...@apache.org.

IMPALA-4080, IMPALA-3638: Introduce ExecNode::Codegen()

This patch is mostly mechanical move of codegen related logic
from each exec node's Prepare() to its Codegen() function.
After this change, code generation will no longer happen in
Prepare(). Instead, it will happen after Prepare() completes in
PlanFragmentExecutor. This is an intermediate step towards the
final goal of sharing compiled code among fragment instances in
multi-threading.

As part of the clean up, this change also removes the logic for
lazy codegen object creation. In other words, if codegen is enabled,
the codegen object will always be created. This simplifies some
of the logic in ScalarFnCall::Prepare() and various Codegen()
functions by reducing error checking needed. This change also
removes the logic added for tackling IMPALA-1755 as it's not
needed anymore after the clean up.

The clean up also rectifies a not so well documented situation.
Previously, even if a user explicitly sets DISABLE_CODEGEN to true,
we may still codegen a UDF if it was written in LLVM IR or if it
has more than 8 arguments. This patch enforces the query option
by failing the query in both cases. To run the query, the user
must enable codegen. This change also extends the number of
arguments supported in the interpretation path of ScalarFn to 20.

Change-Id: I207566bc9f4c6a159271ecdbc4bbdba3d78c6651
Reviewed-on: http://gerrit.cloudera.org:8080/4651
Reviewed-by: Michael Ho <kw...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/b15d992a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/b15d992a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/b15d992a

Branch: refs/heads/master
Commit: b15d992abe09bc841f6e2112d47099eb15f8454f
Parents: ee2a06d
Author: Michael Ho <kw...@cloudera.com>
Authored: Wed Oct 5 20:05:24 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Wed Oct 19 08:18:37 2016 +0000

----------------------------------------------------------------------
 be/src/exec/aggregation-node.cc                 |  61 +++--
 be/src/exec/aggregation-node.h                  |  11 +-
 be/src/exec/exchange-node.cc                    |   5 +-
 be/src/exec/exchange-node.h                     |   4 +
 be/src/exec/exec-node.cc                        |  21 +-
 be/src/exec/exec-node.h                         |  13 +-
 be/src/exec/hash-join-node.cc                   |  51 ++--
 be/src/exec/hash-join-node.h                    |   6 +-
 be/src/exec/hash-table.cc                       |  22 +-
 be/src/exec/hash-table.h                        |   8 +-
 be/src/exec/hdfs-avro-scanner.cc                |  16 +-
 be/src/exec/hdfs-avro-scanner.h                 |   2 +-
 be/src/exec/hdfs-parquet-scanner.cc             |  10 +-
 be/src/exec/hdfs-scan-node-base.cc              |  28 +--
 be/src/exec/hdfs-scan-node-base.h               |   1 +
 be/src/exec/hdfs-scanner.cc                     |   2 +-
 be/src/exec/hdfs-sequence-scanner.cc            |   8 +-
 be/src/exec/hdfs-text-scanner.cc                |   8 +-
 be/src/exec/old-hash-table.cc                   |  30 +--
 be/src/exec/old-hash-table.h                    |   6 +-
 be/src/exec/partitioned-aggregation-node.cc     |  76 +++---
 be/src/exec/partitioned-aggregation-node.h      |  13 +-
 be/src/exec/partitioned-hash-join-builder.cc    |  83 +++----
 be/src/exec/partitioned-hash-join-builder.h     |  19 +-
 be/src/exec/partitioned-hash-join-node.cc       |  56 +++--
 be/src/exec/partitioned-hash-join-node.h        |   4 +-
 be/src/exec/sort-node.cc                        |  23 +-
 be/src/exec/sort-node.h                         |   4 +
 be/src/exec/topn-node.cc                        |  94 +++----
 be/src/exec/topn-node.h                         |   4 +-
 be/src/exprs/case-expr.cc                       |   6 +-
 be/src/exprs/case-expr.h                        |   2 +-
 be/src/exprs/compound-predicates.cc             |  37 ++-
 be/src/exprs/compound-predicates.h              |  10 +-
 be/src/exprs/conditional-functions.cc           |   4 +-
 be/src/exprs/conditional-functions.h            |   8 +-
 be/src/exprs/expr.cc                            |  14 +-
 be/src/exprs/expr.h                             |   8 +-
 be/src/exprs/hive-udf-call.cc                   |   4 +-
 be/src/exprs/hive-udf-call.h                    |   2 +-
 be/src/exprs/is-not-empty-predicate.cc          |   4 +-
 be/src/exprs/is-not-empty-predicate.h           |   2 +-
 be/src/exprs/literal.cc                         |   4 +-
 be/src/exprs/literal.h                          |   2 +-
 be/src/exprs/null-literal.cc                    |   4 +-
 be/src/exprs/null-literal.h                     |   2 +-
 be/src/exprs/scalar-fn-call.cc                  | 246 +++++++------------
 be/src/exprs/scalar-fn-call.h                   |   4 +-
 be/src/exprs/slot-ref.cc                        |   5 +-
 be/src/exprs/slot-ref.h                         |   2 +-
 be/src/exprs/tuple-is-null-predicate.cc         |   6 +-
 be/src/exprs/tuple-is-null-predicate.h          |   2 +-
 be/src/runtime/plan-fragment-executor.cc        |  16 +-
 be/src/runtime/runtime-state.cc                 |   8 -
 be/src/runtime/runtime-state.h                  |  27 +-
 be/src/runtime/sorted-run-merger.h              |   2 +-
 be/src/runtime/sorter.cc                        |   2 +-
 be/src/runtime/sorter.h                         |   2 +-
 be/src/runtime/tuple.cc                         |   6 +-
 be/src/runtime/tuple.h                          |   2 +-
 be/src/service/fe-support.cc                    |  36 ++-
 be/src/service/query-options.cc                 |   1 +
 be/src/testutil/test-udfs.cc                    |  29 +++
 be/src/util/tuple-row-compare.cc                |  13 +-
 be/src/util/tuple-row-compare.h                 |   2 +-
 common/thrift/PlanNodes.thrift                  |   7 +-
 .../org/apache/impala/planner/HdfsScanNode.java |  40 ---
 .../queries/QueryTest/udf-errors.test           |  48 ++++
 .../functional-query/queries/QueryTest/udf.test |  14 ++
 tests/query_test/test_udfs.py                   |  18 ++
 70 files changed, 647 insertions(+), 693 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/aggregation-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/aggregation-node.cc b/be/src/exec/aggregation-node.cc
index 909d42b..67928ca 100644
--- a/be/src/exec/aggregation-node.cc
+++ b/be/src/exec/aggregation-node.cc
@@ -165,25 +165,29 @@ Status AggregationNode::Prepare(RuntimeState* state) {
     hash_tbl_->Insert(singleton_intermediate_tuple_);
     output_iterator_ = hash_tbl_->Begin();
   }
+  if (!state->codegen_enabled()) {
+    runtime_profile()->AddCodegenMsg(false, "disabled by query option DISABLE_CODEGEN");
+  }
+  return Status::OK();
+}
 
+void AggregationNode::Codegen(RuntimeState* state) {
+  DCHECK(state->codegen_enabled());
   bool codegen_enabled = false;
-  if (state->codegen_enabled()) {
-    LlvmCodeGen* codegen;
-    RETURN_IF_ERROR(state->GetCodegen(&codegen));
-    Function* update_tuple_fn = CodegenUpdateTuple(state);
-    if (update_tuple_fn != NULL) {
-      codegen_process_row_batch_fn_ =
-          CodegenProcessRowBatch(state, update_tuple_fn);
-      if (codegen_process_row_batch_fn_ != NULL) {
-        // Update to using codegen'd process row batch.
-        codegen->AddFunctionToJit(codegen_process_row_batch_fn_,
-            reinterpret_cast<void**>(&process_row_batch_fn_));
-        codegen_enabled = true;
-      }
+  LlvmCodeGen* codegen = state->codegen();
+  DCHECK(codegen != NULL);
+  Function* update_tuple_fn = CodegenUpdateTuple(codegen);
+  if (update_tuple_fn != NULL) {
+    codegen_process_row_batch_fn_ = CodegenProcessRowBatch(codegen, update_tuple_fn);
+    if (codegen_process_row_batch_fn_ != NULL) {
+      // Update to using codegen'd process row batch.
+      codegen->AddFunctionToJit(codegen_process_row_batch_fn_,
+          reinterpret_cast<void**>(&process_row_batch_fn_));
+      codegen_enabled = true;
     }
   }
   runtime_profile()->AddCodegenMsg(codegen_enabled);
-  return Status::OK();
+  ExecNode::Codegen(state);
 }
 
 Status AggregationNode::Open(RuntimeState* state) {
@@ -525,11 +529,8 @@ IRFunction::Type GetHllUpdateFunction2(const ColumnType& type) {
 // ret:                                              ; preds = %src_not_null, %entry
 //   ret void
 // }
-llvm::Function* AggregationNode::CodegenUpdateSlot(
-    RuntimeState* state, AggFnEvaluator* evaluator, SlotDescriptor* slot_desc) {
-  LlvmCodeGen* codegen;
-  if (!state->GetCodegen(&codegen).ok()) return NULL;
-
+llvm::Function* AggregationNode::CodegenUpdateSlot(LlvmCodeGen* codegen,
+    AggFnEvaluator* evaluator, SlotDescriptor* slot_desc) {
   // TODO: Fix this DCHECK and Init() once CodegenUpdateSlot() can handle AggFnEvaluator
   // with multiple input expressions (e.g. group_concat).
   DCHECK_EQ(evaluator->input_expr_ctxs().size(), 1);
@@ -538,7 +539,7 @@ llvm::Function* AggregationNode::CodegenUpdateSlot(
   // TODO: implement timestamp
   if (input_expr->type().type == TYPE_TIMESTAMP) return NULL;
   Function* agg_expr_fn;
-  Status status = input_expr->GetCodegendComputeFn(state, &agg_expr_fn);
+  Status status = input_expr->GetCodegendComputeFn(codegen, &agg_expr_fn);
   if (!status.ok()) {
     VLOG_QUERY << "Could not codegen UpdateSlot(): " << status.GetDetail();
     return NULL;
@@ -715,9 +716,7 @@ llvm::Function* AggregationNode::CodegenUpdateSlot(
 //                           %"class.impala::TupleRow"* %tuple_row)
 //   ret void
 // }
-Function* AggregationNode::CodegenUpdateTuple(RuntimeState* state) {
-  LlvmCodeGen* codegen;
-  if (!state->GetCodegen(&codegen).ok()) return NULL;
+Function* AggregationNode::CodegenUpdateTuple(LlvmCodeGen* codegen) {
   SCOPED_TIMER(codegen->codegen_timer());
 
   int j = probe_expr_ctxs_.size();
@@ -805,7 +804,7 @@ Function* AggregationNode::CodegenUpdateTuple(RuntimeState* state) {
       Value* count_inc = builder.CreateAdd(slot_loaded, const_one, "count_star_inc");
       builder.CreateStore(count_inc, slot_ptr);
     } else {
-      Function* update_slot_fn = CodegenUpdateSlot(state, evaluator, slot_desc);
+      Function* update_slot_fn = CodegenUpdateSlot(codegen, evaluator, slot_desc);
       if (update_slot_fn == NULL) return NULL;
       // Call GetAggFnCtx() to get the function context.
       Value* get_fn_ctx_args[] = { this_arg, codegen->GetIntConstant(TYPE_INT, i) };
@@ -824,10 +823,8 @@ Function* AggregationNode::CodegenUpdateTuple(RuntimeState* state) {
   return codegen->FinalizeFunction(fn);
 }
 
-Function* AggregationNode::CodegenProcessRowBatch(
-    RuntimeState* state, Function* update_tuple_fn) {
-  LlvmCodeGen* codegen;
-  if (!state->GetCodegen(&codegen).ok()) return NULL;
+Function* AggregationNode::CodegenProcessRowBatch(LlvmCodeGen* codegen,
+    Function* update_tuple_fn) {
   SCOPED_TIMER(codegen->codegen_timer());
   DCHECK(update_tuple_fn != NULL);
 
@@ -847,19 +844,19 @@ Function* AggregationNode::CodegenProcessRowBatch(
     // Aggregation w/o grouping does not use a hash table.
 
     // Codegen for hash
-    Function* hash_fn = hash_tbl_->CodegenHashCurrentRow(state);
+    Function* hash_fn = hash_tbl_->CodegenHashCurrentRow(codegen);
     if (hash_fn == NULL) return NULL;
 
     // Codegen HashTable::Equals
-    Function* equals_fn = hash_tbl_->CodegenEquals(state);
+    Function* equals_fn = hash_tbl_->CodegenEquals(codegen);
     if (equals_fn == NULL) return NULL;
 
     // Codegen for evaluating build rows
-    Function* eval_build_row_fn = hash_tbl_->CodegenEvalTupleRow(state, true);
+    Function* eval_build_row_fn = hash_tbl_->CodegenEvalTupleRow(codegen, true);
     if (eval_build_row_fn == NULL) return NULL;
 
     // Codegen for evaluating probe rows
-    Function* eval_probe_row_fn = hash_tbl_->CodegenEvalTupleRow(state, false);
+    Function* eval_probe_row_fn = hash_tbl_->CodegenEvalTupleRow(codegen, false);
     if (eval_probe_row_fn == NULL) return NULL;
 
     // Replace call sites

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/aggregation-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/aggregation-node.h b/be/src/exec/aggregation-node.h
index 5d87d82..6f4867b 100644
--- a/be/src/exec/aggregation-node.h
+++ b/be/src/exec/aggregation-node.h
@@ -55,6 +55,7 @@ class AggregationNode : public ExecNode {
 
   virtual Status Init(const TPlanNode& tnode, RuntimeState* state);
   virtual Status Prepare(RuntimeState* state);
+  virtual void Codegen(RuntimeState* state);
   virtual Status Open(RuntimeState* state);
   virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
   virtual Status Reset(RuntimeState* state);
@@ -159,16 +160,16 @@ class AggregationNode : public ExecNode {
   /// IR and loaded into the codegen object.  UpdateAggTuple has also been
   /// codegen'd to IR.  This function will modify the loop subsituting the
   /// UpdateAggTuple function call with the (inlined) codegen'd 'update_tuple_fn'.
-  llvm::Function* CodegenProcessRowBatch(
-      RuntimeState* state, llvm::Function* update_tuple_fn);
+  llvm::Function* CodegenProcessRowBatch(LlvmCodeGen* codegen,
+      llvm::Function* update_tuple_fn);
 
   /// Codegen for updating aggregate_exprs at slot_idx. Returns NULL if unsuccessful.
   /// slot_idx is the idx into aggregate_exprs_ (does not include grouping exprs).
-  llvm::Function* CodegenUpdateSlot(
-      RuntimeState* state, AggFnEvaluator* evaluator, SlotDescriptor* slot_desc);
+  llvm::Function* CodegenUpdateSlot(LlvmCodeGen* codegen,
+      AggFnEvaluator* evaluator, SlotDescriptor* slot_desc);
 
   /// Codegen UpdateTuple(). Returns NULL if codegen is unsuccessful.
-  llvm::Function* CodegenUpdateTuple(RuntimeState* state);
+  llvm::Function* CodegenUpdateTuple(LlvmCodeGen* codegen);
 };
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/exchange-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/exchange-node.cc b/be/src/exec/exchange-node.cc
index 833949b..fac2ff5 100644
--- a/be/src/exec/exchange-node.cc
+++ b/be/src/exec/exchange-node.cc
@@ -83,6 +83,8 @@ Status ExchangeNode::Prepare(RuntimeState* state) {
     RETURN_IF_ERROR(sort_exec_exprs_.Prepare(
         state, row_descriptor_, row_descriptor_, expr_mem_tracker()));
     AddExprCtxsToFree(sort_exec_exprs_);
+    less_than_.reset(
+        new TupleRowComparator(sort_exec_exprs_, is_asc_order_, nulls_first_));
   }
   return Status::OK();
 }
@@ -92,10 +94,9 @@ Status ExchangeNode::Open(RuntimeState* state) {
   RETURN_IF_ERROR(ExecNode::Open(state));
   if (is_merging_) {
     RETURN_IF_ERROR(sort_exec_exprs_.Open(state));
-    TupleRowComparator less_than(sort_exec_exprs_, is_asc_order_, nulls_first_);
     // CreateMerger() will populate its merging heap with batches from the stream_recvr_,
     // so it is not necessary to call FillInputRowBatch().
-    RETURN_IF_ERROR(stream_recvr_->CreateMerger(less_than));
+    RETURN_IF_ERROR(stream_recvr_->CreateMerger(*less_than_.get()));
   } else {
     RETURN_IF_ERROR(FillInputRowBatch(state));
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/exchange-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/exchange-node.h b/be/src/exec/exchange-node.h
index 16e9137..f6face4 100644
--- a/be/src/exec/exchange-node.h
+++ b/be/src/exec/exchange-node.h
@@ -27,6 +27,7 @@ namespace impala {
 
 class RowBatch;
 class DataStreamRecvr;
+class TupleRowComparator;
 
 /// Receiver node for data streams. The data stream receiver is created in Prepare()
 /// and closed in Close().
@@ -94,6 +95,9 @@ class ExchangeNode : public ExecNode {
   /// underlying stream_recvr_, and input_batch_ is not used/valid.
   bool is_merging_;
 
+  /// The TupleRowComparator based on 'sort_exec_exprs_' for merging exchange.
+  boost::scoped_ptr<TupleRowComparator> less_than_;
+
   /// Sort expressions and parameters passed to the merging receiver..
   SortExecExprs sort_exec_exprs_;
   std::vector<bool> is_asc_order_;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/exec-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/exec-node.cc b/be/src/exec/exec-node.cc
index df491dd..510a7d9 100644
--- a/be/src/exec/exec-node.cc
+++ b/be/src/exec/exec-node.cc
@@ -158,13 +158,20 @@ Status ExecNode::Prepare(RuntimeState* state) {
 
   RETURN_IF_ERROR(Expr::Prepare(conjunct_ctxs_, state, row_desc(), expr_mem_tracker()));
   AddExprCtxsToFree(conjunct_ctxs_);
-
   for (int i = 0; i < children_.size(); ++i) {
     RETURN_IF_ERROR(children_[i]->Prepare(state));
   }
   return Status::OK();
 }
 
+void ExecNode::Codegen(RuntimeState* state) {
+  DCHECK(state->codegen_enabled());
+  DCHECK(state->codegen() != NULL);
+  for (int i = 0; i < children_.size(); ++i) {
+    children_[i]->Codegen(state);
+  }
+}
+
 Status ExecNode::Open(RuntimeState* state) {
   RETURN_IF_ERROR(ExecDebugAction(TExecNodePhase::OPEN, state));
   return Expr::Open(conjunct_ctxs_, state);
@@ -272,12 +279,6 @@ Status ExecNode::CreateNode(ObjectPool* pool, const TPlanNode& tnode,
             || state->query_options().num_scanner_threads == 1);
         *node = pool->Add(new HdfsScanNode(pool, tnode, descs));
       }
-      // If true, this node requests codegen over interpretation for conjuncts
-      // evaluation whenever possible. Turn codegen on for expr evaluation for
-      // the entire fragment.
-      if (tnode.hdfs_scan_node.codegen_conjuncts) state->SetCodegenExpr();
-      (*node)->runtime_profile()->AddCodegenMsg(
-          state->ShouldCodegenExpr(), "", "Expr Evaluation");
       break;
     case TPlanNodeType::HBASE_SCAN_NODE:
       *node = pool->Add(new HBaseScanNode(pool, tnode, descs));
@@ -492,15 +493,13 @@ void ExecNode::AddExprCtxsToFree(const SortExecExprs& sort_exec_exprs) {
 // false:                                            ; preds = %continue, %entry
 //   ret i1 false
 // }
-Status ExecNode::CodegenEvalConjuncts(RuntimeState* state,
+Status ExecNode::CodegenEvalConjuncts(LlvmCodeGen* codegen,
     const vector<ExprContext*>& conjunct_ctxs, Function** fn, const char* name) {
   Function* conjunct_fns[conjunct_ctxs.size()];
   for (int i = 0; i < conjunct_ctxs.size(); ++i) {
     RETURN_IF_ERROR(
-        conjunct_ctxs[i]->root()->GetCodegendComputeFn(state, &conjunct_fns[i]));
+        conjunct_ctxs[i]->root()->GetCodegendComputeFn(codegen, &conjunct_fns[i]));
   }
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
 
   // Construct function signature to match
   // bool EvalConjuncts(Expr** exprs, int num_exprs, TupleRow* row)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/exec-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/exec-node.h b/be/src/exec/exec-node.h
index 9283a8b..4f3f3fc 100644
--- a/be/src/exec/exec-node.h
+++ b/be/src/exec/exec-node.h
@@ -63,12 +63,17 @@ class ExecNode {
   /// Sets up internal structures, etc., without doing any actual work.
   /// Must be called prior to Open(). Will only be called once in this
   /// node's lifetime.
-  /// All code generation (adding functions to the LlvmCodeGen object) must happen
-  /// in Prepare().  Retrieving the jit compiled function pointer must happen in
-  /// Open().
   /// If overridden in subclass, must first call superclass's Prepare().
   virtual Status Prepare(RuntimeState* state);
 
+  /// Recursively calls Codegen() on all children.
+  /// Expected to be overriden in subclass to generate LLVM IR functions and register
+  /// them with the LlvmCodeGen object. The function pointers of the compiled IR functions
+  /// will be set up in PlanFragmentExecutor::Open(). If overridden in subclass, must also
+  /// call superclass's Codegen() before or after the code generation for this exec node.
+  /// Will only be called once in the node's lifetime.
+  virtual void Codegen(RuntimeState* state);
+
   /// Performs any preparatory work prior to calling GetNext().
   /// Caller must not be holding any io buffers. This will cause deadlock.
   /// If overridden in subclass, must first call superclass's Open().
@@ -146,7 +151,7 @@ class ExecNode {
   /// Codegen EvalConjuncts(). Returns a non-OK status if the function couldn't be
   /// codegen'd. The codegen'd version uses inlined, codegen'd GetBooleanVal() functions.
   static Status CodegenEvalConjuncts(
-      RuntimeState* state, const std::vector<ExprContext*>& conjunct_ctxs,
+      LlvmCodeGen* codegen, const std::vector<ExprContext*>& conjunct_ctxs,
       llvm::Function** fn, const char* name = "EvalConjuncts");
 
   /// Returns a string representation in DFS order of the plan rooted at this.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/hash-join-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-join-node.cc b/be/src/exec/hash-join-node.cc
index 9106159..f2ac928 100644
--- a/be/src/exec/hash-join-node.cc
+++ b/be/src/exec/hash-join-node.cc
@@ -143,19 +143,24 @@ Status HashJoinNode::Prepare(RuntimeState* state) {
           child(1)->row_desc().tuple_descriptors().size(), stores_nulls,
           is_not_distinct_from_, state->fragment_hash_seed(), mem_tracker(), filters_));
   build_pool_.reset(new MemPool(mem_tracker()));
+  if (!state->codegen_enabled()) {
+    runtime_profile()->AddCodegenMsg(false, "disabled by query option DISABLE_CODEGEN");
+  }
+  return Status::OK();
+}
 
+void HashJoinNode::Codegen(RuntimeState* state) {
+  DCHECK(state->codegen_enabled());
+  LlvmCodeGen* codegen = state->codegen();
+  DCHECK(codegen != NULL);
   bool build_codegen_enabled = false;
   bool probe_codegen_enabled = false;
-  if (state->codegen_enabled()) {
-    LlvmCodeGen* codegen;
-    RETURN_IF_ERROR(state->GetCodegen(&codegen));
-
-    // Codegen for hashing rows
-    Function* hash_fn = hash_tbl_->CodegenHashCurrentRow(state);
-    if (hash_fn == NULL) return Status::OK();
 
+  // Codegen for hashing rows
+  Function* hash_fn = hash_tbl_->CodegenHashCurrentRow(codegen);
+  if (hash_fn != NULL) {
     // Codegen for build path
-    codegen_process_build_batch_fn_ = CodegenProcessBuildBatch(state, hash_fn);
+    codegen_process_build_batch_fn_ = CodegenProcessBuildBatch(codegen, hash_fn);
     if (codegen_process_build_batch_fn_ != NULL) {
       codegen->AddFunctionToJit(codegen_process_build_batch_fn_,
           reinterpret_cast<void**>(&process_build_batch_fn_));
@@ -164,7 +169,8 @@ Status HashJoinNode::Prepare(RuntimeState* state) {
 
     // Codegen for probe path (only for left joins)
     if (!match_all_build_) {
-      Function* codegen_process_probe_batch_fn = CodegenProcessProbeBatch(state, hash_fn);
+      Function* codegen_process_probe_batch_fn =
+          CodegenProcessProbeBatch(codegen, hash_fn);
       if (codegen_process_probe_batch_fn != NULL) {
         codegen->AddFunctionToJit(codegen_process_probe_batch_fn,
             reinterpret_cast<void**>(&process_probe_batch_fn_));
@@ -174,7 +180,7 @@ Status HashJoinNode::Prepare(RuntimeState* state) {
   }
   runtime_profile()->AddCodegenMsg(build_codegen_enabled, "", "Build Side");
   runtime_profile()->AddCodegenMsg(probe_codegen_enabled, "", "Probe Side");
-  return Status::OK();
+  ExecNode::Codegen(state);
 }
 
 Status HashJoinNode::Reset(RuntimeState* state) {
@@ -591,18 +597,15 @@ Function* HashJoinNode::CodegenCreateOutputRow(LlvmCodeGen* codegen) {
   return codegen->FinalizeFunction(fn);
 }
 
-Function* HashJoinNode::CodegenProcessBuildBatch(RuntimeState* state,
+Function* HashJoinNode::CodegenProcessBuildBatch(LlvmCodeGen* codegen,
     Function* hash_fn) {
-  LlvmCodeGen* codegen;
-  if (!state->GetCodegen(&codegen).ok()) return NULL;
-
   // Get cross compiled function
   Function* process_build_batch_fn =
       codegen->GetFunction(IRFunction::HASH_JOIN_PROCESS_BUILD_BATCH, true);
   DCHECK(process_build_batch_fn != NULL);
 
   // Codegen for evaluating build rows
-  Function* eval_row_fn = hash_tbl_->CodegenEvalTupleRow(state, true);
+  Function* eval_row_fn = hash_tbl_->CodegenEvalTupleRow(codegen, true);
   if (eval_row_fn == NULL) return NULL;
 
   int replaced = codegen->ReplaceCallSites(process_build_batch_fn, eval_row_fn,
@@ -615,36 +618,34 @@ Function* HashJoinNode::CodegenProcessBuildBatch(RuntimeState* state,
   return codegen->FinalizeFunction(process_build_batch_fn);
 }
 
-Function* HashJoinNode::CodegenProcessProbeBatch(RuntimeState* state, Function* hash_fn) {
-  LlvmCodeGen* codegen;
-  if (!state->GetCodegen(&codegen).ok()) return NULL;
-
+Function* HashJoinNode::CodegenProcessProbeBatch(LlvmCodeGen* codegen,
+    Function* hash_fn) {
   // Get cross compiled function
   Function* process_probe_batch_fn =
       codegen->GetFunction(IRFunction::HASH_JOIN_PROCESS_PROBE_BATCH, true);
   DCHECK(process_probe_batch_fn != NULL);
 
-  // Codegen HashTable::Equals
-  Function* equals_fn = hash_tbl_->CodegenEquals(state);
+  // Codegen HashTable::Equals()
+  Function* equals_fn = hash_tbl_->CodegenEquals(codegen);
   if (equals_fn == NULL) return NULL;
 
   // Codegen for evaluating build rows
-  Function* eval_row_fn = hash_tbl_->CodegenEvalTupleRow(state, false);
+  Function* eval_row_fn = hash_tbl_->CodegenEvalTupleRow(codegen, false);
   if (eval_row_fn == NULL) return NULL;
 
-  // Codegen CreateOutputRow
+  // Codegen CreateOutputRow()
   Function* create_output_row_fn = CodegenCreateOutputRow(codegen);
   if (create_output_row_fn == NULL) return NULL;
 
   // Codegen evaluating other join conjuncts
   Function* eval_other_conjuncts_fn;
-  Status status = ExecNode::CodegenEvalConjuncts(state, other_join_conjunct_ctxs_,
+  Status status = ExecNode::CodegenEvalConjuncts(codegen, other_join_conjunct_ctxs_,
       &eval_other_conjuncts_fn, "EvalOtherConjuncts");
   if (!status.ok()) return NULL;
 
   // Codegen evaluating conjuncts
   Function* eval_conjuncts_fn;
-  status = ExecNode::CodegenEvalConjuncts(state, conjunct_ctxs_, &eval_conjuncts_fn);
+  status = ExecNode::CodegenEvalConjuncts(codegen, conjunct_ctxs_, &eval_conjuncts_fn);
   if (!status.ok()) return NULL;
 
   // Replace all call sites with codegen version

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/hash-join-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-join-node.h b/be/src/exec/hash-join-node.h
index 2e5ca6e..e65dc16 100644
--- a/be/src/exec/hash-join-node.h
+++ b/be/src/exec/hash-join-node.h
@@ -54,6 +54,7 @@ class HashJoinNode : public BlockingJoinNode {
 
   virtual Status Init(const TPlanNode& tnode, RuntimeState* state);
   virtual Status Prepare(RuntimeState* state);
+  virtual void Codegen(RuntimeState* state);
   virtual Status Open(RuntimeState* state);
   virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
   virtual Status Reset(RuntimeState* state);
@@ -63,7 +64,6 @@ class HashJoinNode : public BlockingJoinNode {
 
  protected:
   virtual void AddToDebugString(int indentation_level, std::stringstream* out) const;
-
   virtual Status ProcessBuildInput(RuntimeState* state);
 
  private:
@@ -144,13 +144,13 @@ class HashJoinNode : public BlockingJoinNode {
   /// hash_fn is the codegen'd function for computing hashes over tuple rows in the
   /// hash table.
   /// Returns NULL if codegen was not possible.
-  llvm::Function* CodegenProcessBuildBatch(RuntimeState* state, llvm::Function* hash_fn);
+  llvm::Function* CodegenProcessBuildBatch(LlvmCodeGen* codegen, llvm::Function* hash_fn);
 
   /// Codegen processing probe batches.  Identical signature to ProcessProbeBatch.
   /// hash_fn is the codegen'd function for computing hashes over tuple rows in the
   /// hash table.
   /// Returns NULL if codegen was not possible.
-  llvm::Function* CodegenProcessProbeBatch(RuntimeState* state, llvm::Function* hash_fn);
+  llvm::Function* CodegenProcessProbeBatch(LlvmCodeGen* codegen, llvm::Function* hash_fn);
 };
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/hash-table.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc
index 6626b33..c39e9e9 100644
--- a/be/src/exec/hash-table.cc
+++ b/be/src/exec/hash-table.cc
@@ -684,7 +684,7 @@ static void CodegenAssignNullValue(LlvmCodeGen* codegen,
 // Both the null and not null branch into the continue block.  The continue block
 // becomes the start of the next block for codegen (either the next expr or just the
 // end of the function).
-Status HashTableCtx::CodegenEvalRow(RuntimeState* state, bool build, Function** fn) {
+Status HashTableCtx::CodegenEvalRow(LlvmCodeGen* codegen, bool build, Function** fn) {
   // TODO: CodegenAssignNullValue() can't handle TYPE_TIMESTAMP or TYPE_DECIMAL yet
   const vector<ExprContext*>& ctxs = build ? build_expr_ctxs_ : probe_expr_ctxs_;
   for (int i = 0; i < ctxs.size(); ++i) {
@@ -695,9 +695,6 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state, bool build, Function**
     }
   }
 
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
-
   // Get types to generate function prototype
   Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
   DCHECK(this_type != NULL);
@@ -746,7 +743,7 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state, bool build, Function**
 
     // Call expr
     Function* expr_fn;
-    Status status = ctxs[i]->root()->GetCodegendComputeFn(state, &expr_fn);
+    Status status = ctxs[i]->root()->GetCodegendComputeFn(codegen, &expr_fn);
     if (!status.ok()) {
       (*fn)->eraseFromParent(); // deletes function
       *fn = NULL;
@@ -837,7 +834,7 @@ Status HashTableCtx::CodegenEvalRow(RuntimeState* state, bool build, Function**
 //   %hash_phi = phi i32 [ %string_hash, %not_null ], [ %str_null, %null ]
 //   ret i32 %hash_phi
 // }
-Status HashTableCtx::CodegenHashRow(RuntimeState* state, bool use_murmur, Function** fn) {
+Status HashTableCtx::CodegenHashRow(LlvmCodeGen* codegen, bool use_murmur, Function** fn) {
   for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
     // Disable codegen for CHAR
     if (build_expr_ctxs_[i]->root()->type().type == TYPE_CHAR) {
@@ -845,9 +842,6 @@ Status HashTableCtx::CodegenHashRow(RuntimeState* state, bool use_murmur, Functi
     }
   }
 
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
-
   // Get types to generate function prototype
   Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
   DCHECK(this_type != NULL);
@@ -1044,7 +1038,7 @@ Status HashTableCtx::CodegenHashRow(RuntimeState* state, bool use_murmur, Functi
 //        %"struct.impala_udf::StringVal"* %8, %"struct.impala::StringValue"* %row_val8)
 //   br i1 %cmp_raw10, label %continue3, label %false_block
 // }
-Status HashTableCtx::CodegenEquals(RuntimeState* state, bool force_null_equality,
+Status HashTableCtx::CodegenEquals(LlvmCodeGen* codegen, bool force_null_equality,
     Function** fn) {
   for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
     // Disable codegen for CHAR
@@ -1053,8 +1047,6 @@ Status HashTableCtx::CodegenEquals(RuntimeState* state, bool force_null_equality
     }
   }
 
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
   // Get types to generate function prototype
   Type* this_type = codegen->GetType(HashTableCtx::LLVM_CLASS_NAME);
   DCHECK(this_type != NULL);
@@ -1091,7 +1083,7 @@ Status HashTableCtx::CodegenEquals(RuntimeState* state, bool force_null_equality
 
     // call GetValue on build_exprs[i]
     Function* expr_fn;
-    Status status = build_expr_ctxs_[i]->root()->GetCodegendComputeFn(state, &expr_fn);
+    Status status = build_expr_ctxs_[i]->root()->GetCodegendComputeFn(codegen, &expr_fn);
     if (!status.ok()) {
       (*fn)->eraseFromParent(); // deletes function
       *fn = NULL;
@@ -1164,11 +1156,9 @@ Status HashTableCtx::CodegenEquals(RuntimeState* state, bool force_null_equality
   return Status::OK();
 }
 
-Status HashTableCtx::ReplaceHashTableConstants(RuntimeState* state,
+Status HashTableCtx::ReplaceHashTableConstants(LlvmCodeGen* codegen,
     bool stores_duplicates, int num_build_tuples, Function* fn,
     HashTableReplacedConstants* replacement_counts) {
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
 
   replacement_counts->stores_nulls = codegen->ReplaceCallSitesWithBoolConst(
       fn, stores_nulls(), "stores_nulls");

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/hash-table.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.h b/be/src/exec/hash-table.h
index 4edd130..404b294 100644
--- a/be/src/exec/hash-table.h
+++ b/be/src/exec/hash-table.h
@@ -177,20 +177,20 @@ class HashTableCtx {
   /// Codegen for evaluating a tuple row. Codegen'd function matches the signature
   /// for EvalBuildRow and EvalTupleRow.
   /// If build_row is true, the codegen uses the build_exprs, otherwise the probe_exprs.
-  Status CodegenEvalRow(RuntimeState* state, bool build_row, llvm::Function** fn);
+  Status CodegenEvalRow(LlvmCodeGen* codegen, bool build_row, llvm::Function** fn);
 
   /// Codegen for evaluating a TupleRow and comparing equality. Function signature
   /// matches HashTable::Equals(). 'force_null_equality' is true if the generated
   /// equality function should treat all NULLs as equal. See the template parameter
   /// to HashTable::Equals().
-  Status CodegenEquals(RuntimeState* state, bool force_null_equality,
+  Status CodegenEquals(LlvmCodeGen* codegen, bool force_null_equality,
       llvm::Function** fn);
 
   /// Codegen for hashing expr values. Function prototype matches HashRow identically.
   /// Unlike HashRow(), the returned function only uses a single hash function, rather
   /// than switching based on level_. If 'use_murmur' is true, murmur hash is used,
   /// otherwise CRC is used if the hardware supports it (see hash-util.h).
-  Status CodegenHashRow(RuntimeState* state, bool use_murmur, llvm::Function** fn);
+  Status CodegenHashRow(LlvmCodeGen* codegen, bool use_murmur, llvm::Function** fn);
 
   /// Struct that returns the number of constants replaced by ReplaceConstants().
   struct HashTableReplacedConstants {
@@ -204,7 +204,7 @@ class HashTableCtx {
   /// Replace hash table parameters with constants in 'fn'. Updates 'replacement_counts'
   /// with the number of replacements made. 'num_build_tuples' and 'stores_duplicates'
   /// correspond to HashTable parameters with the same name.
-  Status ReplaceHashTableConstants(RuntimeState* state, bool stores_duplicates,
+  Status ReplaceHashTableConstants(LlvmCodeGen* codegen, bool stores_duplicates,
       int num_build_tuples, llvm::Function* fn,
       HashTableReplacedConstants* replacement_counts);
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/hdfs-avro-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-avro-scanner.cc b/be/src/exec/hdfs-avro-scanner.cc
index f511b12..88d6d3a 100644
--- a/be/src/exec/hdfs-avro-scanner.cc
+++ b/be/src/exec/hdfs-avro-scanner.cc
@@ -80,15 +80,13 @@ Status HdfsAvroScanner::Open(ScannerContext* context) {
 Status HdfsAvroScanner::Codegen(HdfsScanNodeBase* node,
     const vector<ExprContext*>& conjunct_ctxs, Function** decode_avro_data_fn) {
   *decode_avro_data_fn = NULL;
-  if (!node->runtime_state()->codegen_enabled()) {
-    return Status("Disabled by query option.");
-  }
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(node->runtime_state()->GetCodegen(&codegen));
+  DCHECK(node->runtime_state()->codegen_enabled());
+  LlvmCodeGen* codegen = node->runtime_state()->codegen();
+  DCHECK(codegen != NULL);
   Function* materialize_tuple_fn;
   RETURN_IF_ERROR(CodegenMaterializeTuple(node, codegen, &materialize_tuple_fn));
   DCHECK(materialize_tuple_fn != NULL);
-  RETURN_IF_ERROR(CodegenDecodeAvroData(node->runtime_state(), materialize_tuple_fn,
+  RETURN_IF_ERROR(CodegenDecodeAvroData(codegen, materialize_tuple_fn,
       conjunct_ctxs, decode_avro_data_fn));
   DCHECK(*decode_avro_data_fn != NULL);
   return Status::OK();
@@ -1016,11 +1014,9 @@ Status HdfsAvroScanner::CodegenReadScalar(const AvroSchemaElement& element,
   return Status::OK();
 }
 
-Status HdfsAvroScanner::CodegenDecodeAvroData(RuntimeState* state,
+Status HdfsAvroScanner::CodegenDecodeAvroData(LlvmCodeGen* codegen,
     Function* materialize_tuple_fn, const vector<ExprContext*>& conjunct_ctxs,
     Function** decode_avro_data_fn) {
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
   SCOPED_TIMER(codegen->codegen_timer());
   DCHECK(materialize_tuple_fn != NULL);
 
@@ -1030,7 +1026,7 @@ Status HdfsAvroScanner::CodegenDecodeAvroData(RuntimeState* state,
   DCHECK_EQ(replaced, 1);
 
   Function* eval_conjuncts_fn;
-  RETURN_IF_ERROR(ExecNode::CodegenEvalConjuncts(state, conjunct_ctxs,
+  RETURN_IF_ERROR(ExecNode::CodegenEvalConjuncts(codegen, conjunct_ctxs,
       &eval_conjuncts_fn));
 
   replaced = codegen->ReplaceCallSites(fn, eval_conjuncts_fn, "EvalConjuncts");

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/hdfs-avro-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-avro-scanner.h b/be/src/exec/hdfs-avro-scanner.h
index 274cf4d..595a733 100644
--- a/be/src/exec/hdfs-avro-scanner.h
+++ b/be/src/exec/hdfs-avro-scanner.h
@@ -197,7 +197,7 @@ class HdfsAvroScanner : public BaseSequenceScanner {
   /// Produces a version of DecodeAvroData that uses codegen'd instead of interpreted
   /// functions. Stores the resulting function in 'decode_avro_data_fn' if codegen was
   /// successful or returns an error.
-  static Status CodegenDecodeAvroData(RuntimeState* state,
+  static Status CodegenDecodeAvroData(LlvmCodeGen* codegen,
       llvm::Function* materialize_tuple_fn,
       const std::vector<ExprContext*>& conjunct_ctxs,
       llvm::Function** decode_avro_data_fn);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/hdfs-parquet-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-parquet-scanner.cc b/be/src/exec/hdfs-parquet-scanner.cc
index 86dbbf8..e91a7ec 100644
--- a/be/src/exec/hdfs-parquet-scanner.cc
+++ b/be/src/exec/hdfs-parquet-scanner.cc
@@ -620,13 +620,9 @@ int HdfsParquetScanner::TransferScratchTuples(RowBatch* dst_batch) {
 
 Status HdfsParquetScanner::Codegen(HdfsScanNodeBase* node,
     const vector<ExprContext*>& conjunct_ctxs, Function** process_scratch_batch_fn) {
+  DCHECK(node->runtime_state()->codegen_enabled());
   *process_scratch_batch_fn = NULL;
-  if (!node->runtime_state()->codegen_enabled()) {
-    return Status("Disabled by query option.");
-  }
-
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(node->runtime_state()->GetCodegen(&codegen));
+  LlvmCodeGen* codegen = node->runtime_state()->codegen();
   DCHECK(codegen != NULL);
   SCOPED_TIMER(codegen->codegen_timer());
 
@@ -634,7 +630,7 @@ Status HdfsParquetScanner::Codegen(HdfsScanNodeBase* node,
   DCHECK(fn != NULL);
 
   Function* eval_conjuncts_fn;
-  RETURN_IF_ERROR(ExecNode::CodegenEvalConjuncts(node->runtime_state(), conjunct_ctxs,
+  RETURN_IF_ERROR(ExecNode::CodegenEvalConjuncts(codegen, conjunct_ctxs,
       &eval_conjuncts_fn));
   DCHECK(eval_conjuncts_fn != NULL);
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/hdfs-scan-node-base.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node-base.cc b/be/src/exec/hdfs-scan-node-base.cc
index 4acf3f5..cf6708c 100644
--- a/be/src/exec/hdfs-scan-node-base.cc
+++ b/be/src/exec/hdfs-scan-node-base.cc
@@ -146,7 +146,6 @@ Status HdfsScanNodeBase::Init(const TPlanNode& tnode, RuntimeState* state) {
   // Add row batch conjuncts
   DCHECK(conjuncts_map_[tuple_id_].empty());
   conjuncts_map_[tuple_id_] = conjunct_ctxs_;
-
   return Status::OK();
 }
 
@@ -293,10 +292,15 @@ Status HdfsScanNodeBase::Prepare(RuntimeState* state) {
   UpdateHdfsSplitStats(*scan_range_params_, &per_volume_stats);
   PrintHdfsSplitStats(per_volume_stats, &str);
   runtime_profile()->AddInfoString(HDFS_SPLIT_STATS_DESC, str.str());
+  if (!state->codegen_enabled()) {
+    runtime_profile()->AddCodegenMsg(false, "disabled by query option DISABLE_CODEGEN");
+  }
+  return Status::OK();
+}
 
+void HdfsScanNodeBase::Codegen(RuntimeState* state) {
   // Create codegen'd functions
-  for (int format = THdfsFileFormat::TEXT;
-       format <= THdfsFileFormat::PARQUET; ++format) {
+  for (int format = THdfsFileFormat::TEXT; format <= THdfsFileFormat::PARQUET; ++format) {
     vector<HdfsFileDesc*>& file_descs =
         per_type_files_[static_cast<THdfsFileFormat::type>(format)];
 
@@ -332,20 +336,16 @@ Status HdfsScanNodeBase::Prepare(RuntimeState* state) {
         status = Status("Not implemented for this format.");
     }
     DCHECK(fn != NULL || !status.ok());
-
     const char* format_name = _THdfsFileFormat_VALUES_TO_NAMES.find(format)->second;
-    if (!status.ok()) {
-      runtime_profile()->AddCodegenMsg(false, status, format_name);
-    } else {
-      runtime_profile()->AddCodegenMsg(true, status, format_name);
-      LlvmCodeGen* codegen;
-      RETURN_IF_ERROR(runtime_state_->GetCodegen(&codegen));
-      codegen->AddFunctionToJit(
-          fn, &codegend_fn_map_[static_cast<THdfsFileFormat::type>(format)]);
+    if (status.ok()) {
+      LlvmCodeGen* codegen = state->codegen();
+      DCHECK(codegen != NULL);
+      codegen->AddFunctionToJit(fn,
+          &codegend_fn_map_[static_cast<THdfsFileFormat::type>(format)]);
     }
+    runtime_profile()->AddCodegenMsg(status.ok(), status, format_name);
   }
-
-  return Status::OK();
+  ExecNode::Codegen(state);
 }
 
 Status HdfsScanNodeBase::Open(RuntimeState* state) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/hdfs-scan-node-base.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node-base.h b/be/src/exec/hdfs-scan-node-base.h
index 7ea4b9d..3531c9e 100644
--- a/be/src/exec/hdfs-scan-node-base.h
+++ b/be/src/exec/hdfs-scan-node-base.h
@@ -119,6 +119,7 @@ class HdfsScanNodeBase : public ScanNode {
 
   virtual Status Init(const TPlanNode& tnode, RuntimeState* state);
   virtual Status Prepare(RuntimeState* state);
+  virtual void Codegen(RuntimeState* state);
   virtual Status Open(RuntimeState* state);
   virtual Status Reset(RuntimeState* state);
   virtual void Close(RuntimeState* state);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/hdfs-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.cc b/be/src/exec/hdfs-scanner.cc
index 81542ec..0b6e8c5 100644
--- a/be/src/exec/hdfs-scanner.cc
+++ b/be/src/exec/hdfs-scanner.cc
@@ -530,7 +530,7 @@ Status HdfsScanner::CodegenWriteCompleteTuple(HdfsScanNodeBase* node,
       parse_block = BasicBlock::Create(context, "parse", fn, eval_fail_block);
       Function* conjunct_fn;
       Status status =
-          conjunct_ctxs[conjunct_idx]->root()->GetCodegendComputeFn(state, &conjunct_fn);
+          conjunct_ctxs[conjunct_idx]->root()->GetCodegendComputeFn(codegen, &conjunct_fn);
       if (!status.ok()) {
         stringstream ss;
         ss << "Failed to codegen conjunct: " << status.GetDetail();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/hdfs-sequence-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-sequence-scanner.cc b/be/src/exec/hdfs-sequence-scanner.cc
index 03a07ce..fd552be 100644
--- a/be/src/exec/hdfs-sequence-scanner.cc
+++ b/be/src/exec/hdfs-sequence-scanner.cc
@@ -55,11 +55,9 @@ HdfsSequenceScanner::~HdfsSequenceScanner() {
 Status HdfsSequenceScanner::Codegen(HdfsScanNodeBase* node,
     const vector<ExprContext*>& conjunct_ctxs, Function** write_aligned_tuples_fn) {
   *write_aligned_tuples_fn = NULL;
-  if (!node->runtime_state()->codegen_enabled()) {
-    return Status("Disabled by query option.");
-  }
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(node->runtime_state()->GetCodegen(&codegen));
+  DCHECK(node->runtime_state()->codegen_enabled());
+  LlvmCodeGen* codegen = node->runtime_state()->codegen();
+  DCHECK(codegen != NULL);
   Function* write_complete_tuple_fn;
   RETURN_IF_ERROR(CodegenWriteCompleteTuple(node, codegen, conjunct_ctxs,
       &write_complete_tuple_fn));

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/hdfs-text-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-text-scanner.cc b/be/src/exec/hdfs-text-scanner.cc
index f3f0a7c..cc63408 100644
--- a/be/src/exec/hdfs-text-scanner.cc
+++ b/be/src/exec/hdfs-text-scanner.cc
@@ -696,11 +696,9 @@ Status HdfsTextScanner::CheckForSplitDelimiter(bool* split_delimiter) {
 Status HdfsTextScanner::Codegen(HdfsScanNodeBase* node,
     const vector<ExprContext*>& conjunct_ctxs, Function** write_aligned_tuples_fn) {
   *write_aligned_tuples_fn = NULL;
-  if (!node->runtime_state()->codegen_enabled()) {
-    return Status("Disabled by query option.");
-  }
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(node->runtime_state()->GetCodegen(&codegen));
+  DCHECK(node->runtime_state()->codegen_enabled());
+  LlvmCodeGen* codegen = node->runtime_state()->codegen();
+  DCHECK(codegen != NULL);
   Function* write_complete_tuple_fn;
   RETURN_IF_ERROR(CodegenWriteCompleteTuple(node, codegen, conjunct_ctxs,
       &write_complete_tuple_fn));

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/old-hash-table.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/old-hash-table.cc b/be/src/exec/old-hash-table.cc
index 0e38924..db89782 100644
--- a/be/src/exec/old-hash-table.cc
+++ b/be/src/exec/old-hash-table.cc
@@ -255,7 +255,7 @@ static void CodegenAssignNullValue(LlvmCodeGen* codegen,
 // Both the null and not null branch into the continue block.  The continue block
 // becomes the start of the next block for codegen (either the next expr or just the
 // end of the function).
-Function* OldHashTable::CodegenEvalTupleRow(RuntimeState* state, bool build) {
+Function* OldHashTable::CodegenEvalTupleRow(LlvmCodeGen* codegen, bool build) {
   // TODO: CodegenAssignNullValue() can't handle TYPE_TIMESTAMP or TYPE_DECIMAL yet
   const vector<ExprContext*>& ctxs = build ? build_expr_ctxs_ : probe_expr_ctxs_;
   for (int i = 0; i < ctxs.size(); ++i) {
@@ -263,9 +263,6 @@ Function* OldHashTable::CodegenEvalTupleRow(RuntimeState* state, bool build) {
     if (type == TYPE_TIMESTAMP || type == TYPE_DECIMAL || type == TYPE_CHAR) return NULL;
   }
 
-  LlvmCodeGen* codegen;
-  if (!state->GetCodegen(&codegen).ok()) return NULL;
-
   // Get types to generate function prototype
   Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME);
   DCHECK(tuple_row_type != NULL);
@@ -307,12 +304,10 @@ Function* OldHashTable::CodegenEvalTupleRow(RuntimeState* state, bool build) {
 
       // Call expr
       Function* expr_fn;
-      Status status = ctxs[i]->root()->GetCodegendComputeFn(state, &expr_fn);
+      Status status = ctxs[i]->root()->GetCodegendComputeFn(codegen, &expr_fn);
       if (!status.ok()) {
-        stringstream ss;
-        ss << "Problem with codegen: " << status.GetDetail();
-        state->LogError(ErrorMsg(TErrorCode::GENERAL, ss.str()));
         fn->eraseFromParent(); // deletes function
+        VLOG_QUERY << "Failed to codegen EvalTupleRow(): " << status.GetDetail();
         return NULL;
       }
 
@@ -352,11 +347,9 @@ Function* OldHashTable::CodegenEvalTupleRow(RuntimeState* state, bool build) {
     }
   }
   builder.CreateRet(has_null);
-
   return codegen->FinalizeFunction(fn);
 }
 
-
 uint32_t OldHashTable::HashVariableLenRow() {
   uint32_t hash = initial_seed_;
   // Hash the non-var length portions (if there are any)
@@ -410,15 +403,12 @@ uint32_t OldHashTable::HashVariableLenRow() {
 //   ret i32 %7
 // }
 // TODO: can this be cross-compiled?
-Function* OldHashTable::CodegenHashCurrentRow(RuntimeState* state) {
+Function* OldHashTable::CodegenHashCurrentRow(LlvmCodeGen* codegen) {
   for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
     // Disable codegen for CHAR
     if (build_expr_ctxs_[i]->root()->type().type == TYPE_CHAR) return NULL;
   }
 
-  LlvmCodeGen* codegen;
-  if (!state->GetCodegen(&codegen).ok()) return NULL;
-
   // Get types to generate function prototype
   Type* this_type = codegen->GetType(OldHashTable::LLVM_CLASS_NAME);
   DCHECK(this_type != NULL);
@@ -592,14 +582,12 @@ bool OldHashTable::Equals(TupleRow* build_row) {
 // continue3:                                        ; preds = %not_null2, %null1
 //   ret i1 true
 // }
-Function* OldHashTable::CodegenEquals(RuntimeState* state) {
+Function* OldHashTable::CodegenEquals(LlvmCodeGen* codegen) {
   for (int i = 0; i < build_expr_ctxs_.size(); ++i) {
     // Disable codegen for CHAR
     if (build_expr_ctxs_[i]->root()->type().type == TYPE_CHAR) return NULL;
   }
 
-  LlvmCodeGen* codegen;
-  if (!state->GetCodegen(&codegen).ok()) return NULL;
   // Get types to generate function prototype
   Type* tuple_row_type = codegen->GetType(TupleRow::LLVM_CLASS_NAME);
   DCHECK(tuple_row_type != NULL);
@@ -629,12 +617,11 @@ Function* OldHashTable::CodegenEquals(RuntimeState* state) {
 
       // call GetValue on build_exprs[i]
       Function* expr_fn;
-      Status status = build_expr_ctxs_[i]->root()->GetCodegendComputeFn(state, &expr_fn);
+      Status status =
+          build_expr_ctxs_[i]->root()->GetCodegendComputeFn(codegen, &expr_fn);
       if (!status.ok()) {
-        stringstream ss;
-        ss << "Problem with codegen: " << status.GetDetail();
-        state->LogError(ErrorMsg(TErrorCode::GENERAL, ss.str()));
         fn->eraseFromParent(); // deletes function
+        VLOG_QUERY << "Failed to codegen Equals(): " << status.GetDetail();
         return NULL;
       }
 
@@ -690,7 +677,6 @@ Function* OldHashTable::CodegenEquals(RuntimeState* state) {
   } else {
     builder.CreateRet(codegen->true_value());
   }
-
   return codegen->FinalizeFunction(fn);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/old-hash-table.h
----------------------------------------------------------------------
diff --git a/be/src/exec/old-hash-table.h b/be/src/exec/old-hash-table.h
index 5ef24ab..3a9b5b2 100644
--- a/be/src/exec/old-hash-table.h
+++ b/be/src/exec/old-hash-table.h
@@ -233,15 +233,15 @@ class OldHashTable {
   /// Codegen for evaluating a tuple row.  Codegen'd function matches the signature
   /// for EvalBuildRow and EvalTupleRow.
   /// if build_row is true, the codegen uses the build_exprs, otherwise the probe_exprs
-  llvm::Function* CodegenEvalTupleRow(RuntimeState* state, bool build_row);
+  llvm::Function* CodegenEvalTupleRow(LlvmCodeGen* codegen, bool build_row);
 
   /// Codegen for hashing the expr values in 'expr_values_buffer_'.  Function
   /// prototype matches HashCurrentRow identically.
-  llvm::Function* CodegenHashCurrentRow(RuntimeState* state);
+  llvm::Function* CodegenHashCurrentRow(LlvmCodeGen* codegen);
 
   /// Codegen for evaluating a TupleRow and comparing equality against
   /// 'expr_values_buffer_'.  Function signature matches OldHashTable::Equals()
-  llvm::Function* CodegenEquals(RuntimeState* state);
+  llvm::Function* CodegenEquals(LlvmCodeGen* codegen);
 
   static const char* LLVM_CLASS_NAME;
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/partitioned-aggregation-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node.cc b/be/src/exec/partitioned-aggregation-node.cc
index 629e407..6862bb3 100644
--- a/be/src/exec/partitioned-aggregation-node.cc
+++ b/be/src/exec/partitioned-aggregation-node.cc
@@ -172,14 +172,6 @@ Status PartitionedAggregationNode::Init(const TPlanNode& tnode, RuntimeState* st
 Status PartitionedAggregationNode::Prepare(RuntimeState* state) {
   SCOPED_TIMER(runtime_profile_->total_time_counter());
 
-  // Create the codegen object before preparing conjunct_ctxs_ and children_, so that any
-  // ScalarFnCalls will use codegen.
-  // TODO: this is brittle and hard to reason about, revisit
-  if (state->codegen_enabled()) {
-    LlvmCodeGen* codegen;
-    RETURN_IF_ERROR(state->GetCodegen(&codegen));
-  }
-
   RETURN_IF_ERROR(ExecNode::Prepare(state));
   state_ = state;
 
@@ -292,18 +284,24 @@ Status PartitionedAggregationNode::Prepare(RuntimeState* state) {
     }
     DCHECK(serialize_stream_->has_write_block());
   }
-
-  bool codegen_enabled = false;
-  Status codegen_status;
-  if (state->codegen_enabled()) {
-    codegen_status =
-        is_streaming_preagg_ ? CodegenProcessBatchStreaming() : CodegenProcessBatch();
-    codegen_enabled = codegen_status.ok();
+  if (!state->codegen_enabled()) {
+    runtime_profile()->AddCodegenMsg(false, "disabled by query option DISABLE_CODEGEN");
   }
-  runtime_profile()->AddCodegenMsg(codegen_enabled, codegen_status);
   return Status::OK();
 }
 
+void PartitionedAggregationNode::Codegen(RuntimeState* state) {
+  DCHECK(state->codegen_enabled());
+  LlvmCodeGen* codegen = state->codegen();
+  DCHECK(codegen != NULL);
+  TPrefetchMode::type prefetch_mode = state_->query_options().prefetch_mode;
+  Status codegen_status =
+     is_streaming_preagg_ ? CodegenProcessBatchStreaming(codegen, prefetch_mode) :
+          CodegenProcessBatch(codegen, prefetch_mode);
+  runtime_profile()->AddCodegenMsg(codegen_status.ok(), codegen_status);
+  ExecNode::Codegen(state);
+}
+
 Status PartitionedAggregationNode::Open(RuntimeState* state) {
   SCOPED_TIMER(runtime_profile_->total_time_counter());
   RETURN_IF_ERROR(ExecNode::Open(state));
@@ -1531,11 +1529,8 @@ Status PartitionedAggregationNode::QueryMaintenance(RuntimeState* state) {
 // ret:                                              ; preds = %src_not_null, %entry
 //   ret void
 // }
-Status PartitionedAggregationNode::CodegenUpdateSlot(
+Status PartitionedAggregationNode::CodegenUpdateSlot(LlvmCodeGen* codegen,
     AggFnEvaluator* evaluator, SlotDescriptor* slot_desc, Function** fn) {
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state_->GetCodegen(&codegen));
-
   // TODO: Fix this DCHECK and Init() once CodegenUpdateSlot() can handle AggFnEvaluator
   // with multiple input expressions (e.g. group_concat).
   DCHECK_EQ(evaluator->input_expr_ctxs().size(), 1);
@@ -1550,7 +1545,7 @@ Status PartitionedAggregationNode::CodegenUpdateSlot(
   }
 
   Function* agg_expr_fn;
-  RETURN_IF_ERROR(agg_expr->GetCodegendComputeFn(state_, &agg_expr_fn));
+  RETURN_IF_ERROR(agg_expr->GetCodegendComputeFn(codegen, &agg_expr_fn));
 
   PointerType* fn_ctx_type =
       codegen->GetPtrType(FunctionContextImpl::LLVM_FUNCTIONCONTEXT_NAME);
@@ -1746,9 +1741,8 @@ Status PartitionedAggregationNode::CodegenUpdateSlot(
 //                          %"class.impala::TupleRow"* %row)
 //   ret void
 // }
-Status PartitionedAggregationNode::CodegenUpdateTuple(Function** fn) {
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state_->GetCodegen(&codegen));
+Status PartitionedAggregationNode::CodegenUpdateTuple(LlvmCodeGen* codegen,
+    Function** fn) {
   SCOPED_TIMER(codegen->codegen_timer());
 
   int j = grouping_expr_ctxs_.size();
@@ -1838,7 +1832,7 @@ Status PartitionedAggregationNode::CodegenUpdateTuple(Function** fn) {
       builder.CreateStore(count_inc, slot_ptr);
     } else {
       Function* update_slot_fn;
-      RETURN_IF_ERROR(CodegenUpdateSlot(evaluator, slot_desc, &update_slot_fn));
+      RETURN_IF_ERROR(CodegenUpdateSlot(codegen, evaluator, slot_desc, &update_slot_fn));
       Value* agg_fn_ctx_ptr = builder.CreateConstGEP1_32(agg_fn_ctxs_arg, i);
       Value* agg_fn_ctx = builder.CreateLoad(agg_fn_ctx_ptr, "agg_fn_ctx");
       // Call GetExprCtx() to get the expression context.
@@ -1860,13 +1854,12 @@ Status PartitionedAggregationNode::CodegenUpdateTuple(Function** fn) {
   return Status::OK();
 }
 
-Status PartitionedAggregationNode::CodegenProcessBatch() {
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state_->GetCodegen(&codegen));
+Status PartitionedAggregationNode::CodegenProcessBatch(LlvmCodeGen* codegen,
+    TPrefetchMode::type prefetch_mode) {
   SCOPED_TIMER(codegen->codegen_timer());
 
   Function* update_tuple_fn;
-  RETURN_IF_ERROR(CodegenUpdateTuple(&update_tuple_fn));
+  RETURN_IF_ERROR(CodegenUpdateTuple(codegen, &update_tuple_fn));
 
   // Get the cross compiled update row batch function
   IRFunction::Type ir_fn = (!grouping_expr_ctxs_.empty() ?
@@ -1880,7 +1873,6 @@ Status PartitionedAggregationNode::CodegenProcessBatch() {
     // Codegen for grouping using hash table
 
     // Replace prefetch_mode with constant so branches can be optimised out.
-    TPrefetchMode::type prefetch_mode = state_->query_options().prefetch_mode;
     Value* prefetch_mode_arg = codegen->GetArgument(process_batch_fn, 3);
     prefetch_mode_arg->replaceAllUsesWith(
         ConstantInt::get(Type::getInt32Ty(codegen->context()), prefetch_mode));
@@ -1888,15 +1880,15 @@ Status PartitionedAggregationNode::CodegenProcessBatch() {
     // The codegen'd ProcessBatch function is only used in Open() with level_ = 0,
     // so don't use murmur hash
     Function* hash_fn;
-    RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(state_, /* use murmur */ false, &hash_fn));
+    RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(codegen, /* use murmur */ false, &hash_fn));
 
     // Codegen HashTable::Equals<true>
     Function* build_equals_fn;
-    RETURN_IF_ERROR(ht_ctx_->CodegenEquals(state_, true, &build_equals_fn));
+    RETURN_IF_ERROR(ht_ctx_->CodegenEquals(codegen, true, &build_equals_fn));
 
     // Codegen for evaluating input rows
     Function* eval_grouping_expr_fn;
-    RETURN_IF_ERROR(ht_ctx_->CodegenEvalRow(state_, false, &eval_grouping_expr_fn));
+    RETURN_IF_ERROR(ht_ctx_->CodegenEvalRow(codegen, false, &eval_grouping_expr_fn));
 
     // Replace call sites
     replaced = codegen->ReplaceCallSites(process_batch_fn, eval_grouping_expr_fn,
@@ -1911,7 +1903,7 @@ Status PartitionedAggregationNode::CodegenProcessBatch() {
 
     HashTableCtx::HashTableReplacedConstants replaced_constants;
     const bool stores_duplicates = false;
-    RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(state_, stores_duplicates, 1,
+    RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(codegen, stores_duplicates, 1,
         process_batch_fn, &replaced_constants));
     DCHECK_GE(replaced_constants.stores_nulls, 1);
     DCHECK_GE(replaced_constants.finds_some_nulls, 1);
@@ -1935,10 +1927,9 @@ Status PartitionedAggregationNode::CodegenProcessBatch() {
   return Status::OK();
 }
 
-Status PartitionedAggregationNode::CodegenProcessBatchStreaming() {
+Status PartitionedAggregationNode::CodegenProcessBatchStreaming(
+    LlvmCodeGen* codegen, TPrefetchMode::type prefetch_mode) {
   DCHECK(is_streaming_preagg_);
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state_->GetCodegen(&codegen));
   SCOPED_TIMER(codegen->codegen_timer());
 
   IRFunction::Type ir_fn = IRFunction::PART_AGG_NODE_PROCESS_BATCH_STREAMING;
@@ -1951,25 +1942,24 @@ Status PartitionedAggregationNode::CodegenProcessBatchStreaming() {
       ConstantInt::get(Type::getInt1Ty(codegen->context()), needs_serialize_));
 
   // Replace prefetch_mode with constant so branches can be optimised out.
-  TPrefetchMode::type prefetch_mode = state_->query_options().prefetch_mode;
   Value* prefetch_mode_arg = codegen->GetArgument(process_batch_streaming_fn, 3);
   prefetch_mode_arg->replaceAllUsesWith(
       ConstantInt::get(Type::getInt32Ty(codegen->context()), prefetch_mode));
 
   Function* update_tuple_fn;
-  RETURN_IF_ERROR(CodegenUpdateTuple(&update_tuple_fn));
+  RETURN_IF_ERROR(CodegenUpdateTuple(codegen, &update_tuple_fn));
 
   // We only use the top-level hash function for streaming aggregations.
   Function* hash_fn;
-  RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(state_, false, &hash_fn));
+  RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(codegen, false, &hash_fn));
 
   // Codegen HashTable::Equals
   Function* equals_fn;
-  RETURN_IF_ERROR(ht_ctx_->CodegenEquals(state_, true, &equals_fn));
+  RETURN_IF_ERROR(ht_ctx_->CodegenEquals(codegen, true, &equals_fn));
 
   // Codegen for evaluating input rows
   Function* eval_grouping_expr_fn;
-  RETURN_IF_ERROR(ht_ctx_->CodegenEvalRow(state_, false, &eval_grouping_expr_fn));
+  RETURN_IF_ERROR(ht_ctx_->CodegenEvalRow(codegen, false, &eval_grouping_expr_fn));
 
   // Replace call sites
   int replaced = codegen->ReplaceCallSites(process_batch_streaming_fn, update_tuple_fn,
@@ -1988,7 +1978,7 @@ Status PartitionedAggregationNode::CodegenProcessBatchStreaming() {
 
   HashTableCtx::HashTableReplacedConstants replaced_constants;
   const bool stores_duplicates = false;
-  RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(state_, stores_duplicates, 1,
+  RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(codegen, stores_duplicates, 1,
       process_batch_streaming_fn, &replaced_constants));
   DCHECK_GE(replaced_constants.stores_nulls, 1);
   DCHECK_GE(replaced_constants.finds_some_nulls, 1);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/partitioned-aggregation-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node.h b/be/src/exec/partitioned-aggregation-node.h
index 0c0f3e8..9e14e66 100644
--- a/be/src/exec/partitioned-aggregation-node.h
+++ b/be/src/exec/partitioned-aggregation-node.h
@@ -126,6 +126,7 @@ class PartitionedAggregationNode : public ExecNode {
 
   virtual Status Init(const TPlanNode& tnode, RuntimeState* state);
   virtual Status Prepare(RuntimeState* state);
+  virtual void Codegen(RuntimeState* state);
   virtual Status Open(RuntimeState* state);
   virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
   virtual Status Reset(RuntimeState* state);
@@ -136,7 +137,6 @@ class PartitionedAggregationNode : public ExecNode {
  protected:
   /// Frees local allocations from aggregate_evaluators_ and agg_fn_ctxs
   virtual Status QueryMaintenance(RuntimeState* state);
-
   virtual void DebugString(int indentation_level, std::stringstream* out) const;
 
  private:
@@ -637,11 +637,11 @@ class PartitionedAggregationNode : public ExecNode {
 
   /// Codegen UpdateSlot(). Returns non-OK status if codegen is unsuccessful.
   /// Assumes is_merge = false;
-  Status CodegenUpdateSlot(AggFnEvaluator* evaluator, SlotDescriptor* slot_desc,
-      llvm::Function** fn);
+  Status CodegenUpdateSlot(LlvmCodeGen* codegen, AggFnEvaluator* evaluator,
+      SlotDescriptor* slot_desc, llvm::Function** fn);
 
   /// Codegen UpdateTuple(). Returns non-OK status if codegen is unsuccessful.
-  Status CodegenUpdateTuple(llvm::Function** fn);
+  Status CodegenUpdateTuple(LlvmCodeGen* codegen, llvm::Function** fn);
 
   /// Codegen the non-streaming process row batch loop. The loop has already been
   /// compiled to IR and loaded into the codegen object. UpdateAggTuple has also been
@@ -650,11 +650,12 @@ class PartitionedAggregationNode : public ExecNode {
   /// 'process_batch_no_grouping_fn_' will be updated with the codegened function
   /// depending on whether this is a grouping or non-grouping aggregation.
   /// Assumes AGGREGATED_ROWS = false.
-  Status CodegenProcessBatch();
+  Status CodegenProcessBatch(LlvmCodeGen* codegen, TPrefetchMode::type prefetch_mode);
 
   /// Codegen the materialization loop for streaming preaggregations.
   /// 'process_batch_streaming_fn_' will be updated with the codegened function.
-  Status CodegenProcessBatchStreaming();
+  Status CodegenProcessBatchStreaming(
+      LlvmCodeGen* codegen, TPrefetchMode::type prefetch_mode);
 
   /// We need two buffers per partition, one for the aggregated stream and one
   /// for the unaggregated stream. We need an additional buffer to read the stream

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/partitioned-hash-join-builder.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder.cc b/be/src/exec/partitioned-hash-join-builder.cc
index bf5b42a..0f15876 100644
--- a/be/src/exec/partitioned-hash-join-builder.cc
+++ b/be/src/exec/partitioned-hash-join-builder.cc
@@ -138,8 +138,9 @@ Status PhjBuilder::Prepare(RuntimeState* state, MemTracker* mem_tracker) {
   partition_build_rows_timer_ = ADD_TIMER(profile(), "BuildRowsPartitionTime");
   build_hash_table_timer_ = ADD_TIMER(profile(), "HashTablesBuildTime");
   repartition_timer_ = ADD_TIMER(profile(), "RepartitionTime");
-
-  Codegen(state);
+  if (!state->codegen_enabled()) {
+    profile()->AddCodegenMsg(false, "disabled by query option DISABLE_CODEGEN");
+  }
   return Status::OK();
 }
 
@@ -704,39 +705,34 @@ not_built:
   return Status::OK();
 }
 
-void PhjBuilder::Codegen(RuntimeState* state) {
-  bool build_codegen_enabled = false;
-  bool insert_codegen_enabled = false;
-  Status build_codegen_status, insert_codegen_status;
-  if (state->codegen_enabled()) {
-    Status codegen_status;
-    // Codegen for hashing rows with the builder's hash table context.
-    Function* hash_fn;
-    codegen_status = ht_ctx_->CodegenHashRow(runtime_state_, false, &hash_fn);
-    Function* murmur_hash_fn;
-    codegen_status.MergeStatus(
-        ht_ctx_->CodegenHashRow(runtime_state_, true, &murmur_hash_fn));
-
-    // Codegen for evaluating build rows
-    Function* eval_build_row_fn;
-    codegen_status.MergeStatus(
-        ht_ctx_->CodegenEvalRow(runtime_state_, true, &eval_build_row_fn));
-
-    if (codegen_status.ok()) {
-      build_codegen_status =
-          CodegenProcessBuildBatch(hash_fn, murmur_hash_fn, eval_build_row_fn);
-      insert_codegen_status =
-          CodegenInsertBatch(hash_fn, murmur_hash_fn, eval_build_row_fn);
-    } else {
-      build_codegen_status = codegen_status;
-      insert_codegen_status = codegen_status;
-    }
-    build_codegen_enabled = build_codegen_status.ok();
-    insert_codegen_enabled = insert_codegen_status.ok();
+void PhjBuilder::Codegen(LlvmCodeGen* codegen) {
+  Status build_codegen_status;
+  Status insert_codegen_status;
+  Status codegen_status;
+
+  // Codegen for hashing rows with the builder's hash table context.
+  Function* hash_fn;
+  codegen_status = ht_ctx_->CodegenHashRow(codegen, false, &hash_fn);
+  Function* murmur_hash_fn;
+  codegen_status.MergeStatus(ht_ctx_->CodegenHashRow(codegen, true, &murmur_hash_fn));
+
+  // Codegen for evaluating build rows
+  Function* eval_build_row_fn;
+  codegen_status.MergeStatus(ht_ctx_->CodegenEvalRow(codegen, true, &eval_build_row_fn));
+
+  if (codegen_status.ok()) {
+    TPrefetchMode::type prefetch_mode = runtime_state_->query_options().prefetch_mode;
+    build_codegen_status =
+        CodegenProcessBuildBatch(codegen, hash_fn, murmur_hash_fn, eval_build_row_fn);
+    insert_codegen_status = CodegenInsertBatch(codegen, hash_fn, murmur_hash_fn,
+        eval_build_row_fn, prefetch_mode);
+  } else {
+    build_codegen_status = codegen_status;
+    insert_codegen_status = codegen_status;
   }
-  profile()->AddCodegenMsg(build_codegen_enabled, build_codegen_status, "Build Side");
-  profile()->AddCodegenMsg(
-      insert_codegen_enabled, insert_codegen_status, "Hash Table Construction");
+  profile()->AddCodegenMsg(build_codegen_status.ok(), build_codegen_status, "Build Side");
+  profile()->AddCodegenMsg(insert_codegen_status.ok(), insert_codegen_status,
+      "Hash Table Construction");
 }
 
 string PhjBuilder::DebugString() const {
@@ -763,11 +759,8 @@ string PhjBuilder::DebugString() const {
   return ss.str();
 }
 
-Status PhjBuilder::CodegenProcessBuildBatch(
+Status PhjBuilder::CodegenProcessBuildBatch(LlvmCodeGen* codegen,
     Function* hash_fn, Function* murmur_hash_fn, Function* eval_row_fn) {
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(runtime_state_->GetCodegen(&codegen));
-
   Function* process_build_batch_fn =
       codegen->GetFunction(IRFunction::PHJ_PROCESS_BUILD_BATCH, true);
   DCHECK(process_build_batch_fn != NULL);
@@ -781,7 +774,7 @@ Status PhjBuilder::CodegenProcessBuildBatch(
   HashTableCtx::HashTableReplacedConstants replaced_constants;
   const bool stores_duplicates = true;
   const int num_build_tuples = row_desc_.tuple_descriptors().size();
-  RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(runtime_state_, stores_duplicates,
+  RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(codegen, stores_duplicates,
       num_build_tuples, process_build_batch_fn, &replaced_constants));
   DCHECK_GE(replaced_constants.stores_nulls, 1);
   DCHECK_EQ(replaced_constants.finds_some_nulls, 0);
@@ -838,18 +831,14 @@ Status PhjBuilder::CodegenProcessBuildBatch(
   return Status::OK();
 }
 
-Status PhjBuilder::CodegenInsertBatch(
-    Function* hash_fn, Function* murmur_hash_fn, Function* eval_row_fn) {
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(runtime_state_->GetCodegen(&codegen));
-
+Status PhjBuilder::CodegenInsertBatch(LlvmCodeGen* codegen, Function* hash_fn,
+    Function* murmur_hash_fn, Function* eval_row_fn, TPrefetchMode::type prefetch_mode) {
   Function* insert_batch_fn = codegen->GetFunction(IRFunction::PHJ_INSERT_BATCH, true);
   Function* build_equals_fn;
-  RETURN_IF_ERROR(ht_ctx_->CodegenEquals(runtime_state_, true, &build_equals_fn));
+  RETURN_IF_ERROR(ht_ctx_->CodegenEquals(codegen, true, &build_equals_fn));
 
   // Replace the parameter 'prefetch_mode' with constant.
   Value* prefetch_mode_arg = codegen->GetArgument(insert_batch_fn, 1);
-  TPrefetchMode::type prefetch_mode = runtime_state_->query_options().prefetch_mode;
   DCHECK_GE(prefetch_mode, TPrefetchMode::NONE);
   DCHECK_LE(prefetch_mode, TPrefetchMode::HT_BUCKET);
   prefetch_mode_arg->replaceAllUsesWith(
@@ -867,7 +856,7 @@ Status PhjBuilder::CodegenInsertBatch(
   HashTableCtx::HashTableReplacedConstants replaced_constants;
   const bool stores_duplicates = true;
   const int num_build_tuples = row_desc_.tuple_descriptors().size();
-  RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(runtime_state_, stores_duplicates,
+  RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(codegen, stores_duplicates,
       num_build_tuples, insert_batch_fn, &replaced_constants));
   DCHECK_GE(replaced_constants.stores_nulls, 1);
   DCHECK_EQ(replaced_constants.finds_some_nulls, 0);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/partitioned-hash-join-builder.h
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-builder.h b/be/src/exec/partitioned-hash-join-builder.h
index 7f81e5a..650452c 100644
--- a/be/src/exec/partitioned-hash-join-builder.h
+++ b/be/src/exec/partitioned-hash-join-builder.h
@@ -84,6 +84,11 @@ class PhjBuilder : public DataSink {
   virtual Status FlushFinal(RuntimeState* state) override;
   virtual void Close(RuntimeState* state) override;
 
+  /// Does all codegen for the builder (if codegen is enabled).
+  /// Updates the the builder's runtime profile with info about whether any errors
+  /// occured during codegen.
+  void Codegen(LlvmCodeGen* codegen);
+
   /////////////////////////////////////////
   // The following functions are used only by PartitionedHashJoinNode.
   /////////////////////////////////////////
@@ -312,20 +317,16 @@ class PhjBuilder : public DataSink {
   /// unacceptably high false-positive rate.
   void PublishRuntimeFilters(int64_t num_build_rows);
 
-  /// Does all codegen for the builder (if codegen is enabled).
-  /// Updates the the builder's runtime profile with info about whether the codegen was
-  /// enabled and whether any errors occured during codegen.
-  void Codegen(RuntimeState* state);
-
   /// Codegen processing build batches. Identical signature to ProcessBuildBatch().
   /// Returns non-OK status if codegen was not possible.
-  Status CodegenProcessBuildBatch(llvm::Function* hash_fn, llvm::Function* murmur_hash_fn,
-      llvm::Function* eval_row_fn);
+  Status CodegenProcessBuildBatch(LlvmCodeGen* codegen, llvm::Function* hash_fn,
+      llvm::Function* murmur_hash_fn, llvm::Function* eval_row_fn);
 
   /// Codegen inserting batches into a partition's hash table. Identical signature to
   /// Partition::InsertBatch(). Returns non-OK if codegen was not possible.
-  Status CodegenInsertBatch(llvm::Function* hash_fn, llvm::Function* murmur_hash_fn,
-      llvm::Function* eval_row_fn);
+  Status CodegenInsertBatch(LlvmCodeGen* codegen, llvm::Function* hash_fn,
+      llvm::Function* murmur_hash_fn, llvm::Function* eval_row_fn,
+      TPrefetchMode::type prefetch_mode);
 
   RuntimeState* const runtime_state_;
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/partitioned-hash-join-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-node.cc b/be/src/exec/partitioned-hash-join-node.cc
index 46b91ba..024fdc3 100644
--- a/be/src/exec/partitioned-hash-join-node.cc
+++ b/be/src/exec/partitioned-hash-join-node.cc
@@ -98,14 +98,6 @@ Status PartitionedHashJoinNode::Init(const TPlanNode& tnode, RuntimeState* state
 Status PartitionedHashJoinNode::Prepare(RuntimeState* state) {
   SCOPED_TIMER(runtime_profile_->total_time_counter());
 
-  // Create the codegen object before preparing conjunct_ctxs_ and children_, so that any
-  // ScalarFnCalls will use codegen.
-  // TODO: this is brittle and hard to reason about, revisit
-  if (state->codegen_enabled()) {
-    LlvmCodeGen* codegen;
-    RETURN_IF_ERROR(state->GetCodegen(&codegen));
-  }
-
   RETURN_IF_ERROR(BlockingJoinNode::Prepare(state));
   runtime_state_ = state;
 
@@ -143,18 +135,30 @@ Status PartitionedHashJoinNode::Prepare(RuntimeState* state) {
 
   num_probe_rows_partitioned_ =
       ADD_COUNTER(runtime_profile(), "ProbeRowsPartitioned", TUnit::UNIT);
-
-  bool probe_codegen_enabled = false;
-  Status probe_codegen_status;
-  if (state->codegen_enabled()) {
-    probe_codegen_status = CodegenProcessProbeBatch(state);
-    probe_codegen_enabled = probe_codegen_status.ok();
+  if (!state->codegen_enabled()) {
+    runtime_profile()->AddCodegenMsg(false, "disabled by query option DISABLE_CODEGEN");
   }
-  runtime_profile()->AddCodegenMsg(
-      probe_codegen_enabled, probe_codegen_status, "Probe Side");
   return Status::OK();
 }
 
+void PartitionedHashJoinNode::Codegen(RuntimeState* state) {
+  DCHECK(state->codegen_enabled());
+  LlvmCodeGen* codegen = state->codegen();
+  DCHECK(codegen != NULL);
+
+  // Codegen the build side.
+  builder_->Codegen(codegen);
+
+  // Codegen the probe side.
+  TPrefetchMode::type prefetch_mode = state->query_options().prefetch_mode;
+  Status probe_codegen_status = CodegenProcessProbeBatch(codegen, prefetch_mode);
+  runtime_profile()->AddCodegenMsg(probe_codegen_status.ok(), probe_codegen_status,
+      "Probe Side");
+
+  // Codegen the children node;
+  ExecNode::Codegen(state);
+}
+
 Status PartitionedHashJoinNode::Open(RuntimeState* state) {
   SCOPED_TIMER(runtime_profile_->total_time_counter());
   RETURN_IF_ERROR(BlockingJoinNode::Open(state));
@@ -1190,14 +1194,13 @@ Status PartitionedHashJoinNode::CodegenCreateOutputRow(LlvmCodeGen* codegen,
   return Status::OK();
 }
 
-Status PartitionedHashJoinNode::CodegenProcessProbeBatch(RuntimeState* state) {
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
+Status PartitionedHashJoinNode::CodegenProcessProbeBatch(
+    LlvmCodeGen* codegen, TPrefetchMode::type prefetch_mode) {
   // Codegen for hashing rows
   Function* hash_fn;
   Function* murmur_hash_fn;
-  RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(state, false, &hash_fn));
-  RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(state, true, &murmur_hash_fn));
+  RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(codegen, false, &hash_fn));
+  RETURN_IF_ERROR(ht_ctx_->CodegenHashRow(codegen, true, &murmur_hash_fn));
 
   // Get cross compiled function
   IRFunction::Type ir_fn = IRFunction::FN_END;
@@ -1243,7 +1246,6 @@ Status PartitionedHashJoinNode::CodegenProcessProbeBatch(RuntimeState* state) {
 
   // Replace the parameter 'prefetch_mode' with constant.
   Value* prefetch_mode_arg = codegen->GetArgument(process_probe_batch_fn, 1);
-  TPrefetchMode::type prefetch_mode = state->query_options().prefetch_mode;
   DCHECK_GE(prefetch_mode, TPrefetchMode::NONE);
   DCHECK_LE(prefetch_mode, TPrefetchMode::HT_BUCKET);
   prefetch_mode_arg->replaceAllUsesWith(
@@ -1251,11 +1253,11 @@ Status PartitionedHashJoinNode::CodegenProcessProbeBatch(RuntimeState* state) {
 
   // Codegen HashTable::Equals
   Function* probe_equals_fn;
-  RETURN_IF_ERROR(ht_ctx_->CodegenEquals(state, false, &probe_equals_fn));
+  RETURN_IF_ERROR(ht_ctx_->CodegenEquals(codegen, false, &probe_equals_fn));
 
   // Codegen for evaluating probe rows
   Function* eval_row_fn;
-  RETURN_IF_ERROR(ht_ctx_->CodegenEvalRow(state, false, &eval_row_fn));
+  RETURN_IF_ERROR(ht_ctx_->CodegenEvalRow(codegen, false, &eval_row_fn));
 
   // Codegen CreateOutputRow
   Function* create_output_row_fn;
@@ -1263,12 +1265,12 @@ Status PartitionedHashJoinNode::CodegenProcessProbeBatch(RuntimeState* state) {
 
   // Codegen evaluating other join conjuncts
   Function* eval_other_conjuncts_fn;
-  RETURN_IF_ERROR(ExecNode::CodegenEvalConjuncts(state, other_join_conjunct_ctxs_,
+  RETURN_IF_ERROR(ExecNode::CodegenEvalConjuncts(codegen, other_join_conjunct_ctxs_,
       &eval_other_conjuncts_fn, "EvalOtherConjuncts"));
 
   // Codegen evaluating conjuncts
   Function* eval_conjuncts_fn;
-  RETURN_IF_ERROR(ExecNode::CodegenEvalConjuncts(state, conjunct_ctxs_,
+  RETURN_IF_ERROR(ExecNode::CodegenEvalConjuncts(codegen, conjunct_ctxs_,
       &eval_conjuncts_fn));
 
   // Replace all call sites with codegen version
@@ -1317,7 +1319,7 @@ Status PartitionedHashJoinNode::CodegenProcessProbeBatch(RuntimeState* state) {
   HashTableCtx::HashTableReplacedConstants replaced_constants;
   const bool stores_duplicates = true;
   const int num_build_tuples = child(1)->row_desc().tuple_descriptors().size();
-  RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(state, stores_duplicates,
+  RETURN_IF_ERROR(ht_ctx_->ReplaceHashTableConstants(codegen, stores_duplicates,
       num_build_tuples, process_probe_batch_fn, &replaced_constants));
   DCHECK_GE(replaced_constants.stores_nulls, 1);
   DCHECK_GE(replaced_constants.finds_some_nulls, 1);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/partitioned-hash-join-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-hash-join-node.h b/be/src/exec/partitioned-hash-join-node.h
index 5b9264c..504dc7b 100644
--- a/be/src/exec/partitioned-hash-join-node.h
+++ b/be/src/exec/partitioned-hash-join-node.h
@@ -110,6 +110,7 @@ class PartitionedHashJoinNode : public BlockingJoinNode {
 
   virtual Status Init(const TPlanNode& tnode, RuntimeState* state);
   virtual Status Prepare(RuntimeState* state);
+  virtual void Codegen(RuntimeState* state);
   virtual Status Open(RuntimeState* state);
   virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
   virtual Status Reset(RuntimeState* state);
@@ -355,7 +356,8 @@ class PartitionedHashJoinNode : public BlockingJoinNode {
 
   /// Codegen processing probe batches.  Identical signature to ProcessProbeBatch.
   /// Returns non-OK if codegen was not possible.
-  Status CodegenProcessProbeBatch(RuntimeState* state);
+  Status CodegenProcessProbeBatch(
+      LlvmCodeGen* codegen, TPrefetchMode::type prefetch_mode);
 
   /// Returns the current state of the partition as a string.
   std::string PrintState() const;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/sort-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/sort-node.cc b/be/src/exec/sort-node.cc
index 9271721..140e662 100644
--- a/be/src/exec/sort-node.cc
+++ b/be/src/exec/sort-node.cc
@@ -50,22 +50,23 @@ Status SortNode::Prepare(RuntimeState* state) {
   RETURN_IF_ERROR(sort_exec_exprs_.Prepare(
       state, child(0)->row_desc(), row_descriptor_, expr_mem_tracker()));
   AddExprCtxsToFree(sort_exec_exprs_);
-  TupleRowComparator less_than(sort_exec_exprs_, is_asc_order_, nulls_first_);
-
-  bool codegen_enabled = false;
-  Status codegen_status;
-  if (state->codegen_enabled()) {
-    codegen_status = less_than.Codegen(state);
-    codegen_enabled = codegen_status.ok();
-  }
-  runtime_profile()->AddCodegenMsg(codegen_enabled, codegen_status);
-
-  sorter_.reset(new Sorter(less_than, sort_exec_exprs_.sort_tuple_slot_expr_ctxs(),
+  less_than_.reset(new TupleRowComparator(sort_exec_exprs_, is_asc_order_, nulls_first_));
+  sorter_.reset(new Sorter(*less_than_.get(), sort_exec_exprs_.sort_tuple_slot_expr_ctxs(),
       &row_descriptor_, mem_tracker(), runtime_profile(), state));
   RETURN_IF_ERROR(sorter_->Init());
+  if (!state->codegen_enabled()) {
+    runtime_profile()->AddCodegenMsg(false, "disabled by query option DISABLE_CODEGEN");
+  }
   return Status::OK();
 }
 
+void SortNode::Codegen(RuntimeState* state) {
+  DCHECK(state->codegen_enabled());
+  Status codegen_status = less_than_->Codegen(state);
+  runtime_profile()->AddCodegenMsg(codegen_status.ok(), codegen_status);
+  ExecNode::Codegen(state);
+}
+
 Status SortNode::Open(RuntimeState* state) {
   SCOPED_TIMER(runtime_profile_->total_time_counter());
   RETURN_IF_ERROR(ExecNode::Open(state));

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/sort-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/sort-node.h b/be/src/exec/sort-node.h
index a90c99e..75513ba 100644
--- a/be/src/exec/sort-node.h
+++ b/be/src/exec/sort-node.h
@@ -41,6 +41,7 @@ class SortNode : public ExecNode {
 
   virtual Status Init(const TPlanNode& tnode, RuntimeState* state);
   virtual Status Prepare(RuntimeState* state);
+  virtual void Codegen(RuntimeState* state);
   virtual Status Open(RuntimeState* state);
   virtual Status GetNext(RuntimeState* state, RowBatch* row_batch, bool* eos);
   virtual Status Reset(RuntimeState* state);
@@ -56,6 +57,9 @@ class SortNode : public ExecNode {
   /// Number of rows to skip.
   int64_t offset_;
 
+  /// The tuple row comparator derived based on 'sort_exec_exprs_'.
+  boost::scoped_ptr<TupleRowComparator> less_than_;
+
   /// Expressions and parameters used for tuple materialization and tuple comparison.
   SortExecExprs sort_exec_exprs_;
   std::vector<bool> is_asc_order_;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b15d992a/be/src/exec/topn-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/topn-node.cc b/be/src/exec/topn-node.cc
index 6dd1e34..e72249f 100644
--- a/be/src/exec/topn-node.cc
+++ b/be/src/exec/topn-node.cc
@@ -63,39 +63,6 @@ Status TopNNode::Init(const TPlanNode& tnode, RuntimeState* state) {
   return Status::OK();
 }
 
-Status TopNNode::Codegen(RuntimeState* state) {
-  DCHECK(materialized_tuple_desc_ != NULL);
-  LlvmCodeGen* codegen;
-  RETURN_IF_ERROR(state->GetCodegen(&codegen));
-  Function* insert_batch_fn =
-      codegen->GetFunction(IRFunction::TOPN_NODE_INSERT_BATCH, true);
-
-  // Generate two MaterializeExprs() functions, one using tuple_pool_ and one with no
-  // pool.
-  Function* materialize_exprs_tuple_pool_fn;
-  RETURN_IF_ERROR(Tuple::CodegenMaterializeExprs(state, false, *materialized_tuple_desc_,
-      sort_exec_exprs_.sort_tuple_slot_expr_ctxs(), tuple_pool_.get(),
-      &materialize_exprs_tuple_pool_fn));
-
-  Function* materialize_exprs_no_pool_fn;
-  RETURN_IF_ERROR(Tuple::CodegenMaterializeExprs(state, false, *materialized_tuple_desc_,
-      sort_exec_exprs_.sort_tuple_slot_expr_ctxs(), NULL, &materialize_exprs_no_pool_fn));
-
-  int replaced = codegen->ReplaceCallSites(insert_batch_fn,
-      materialize_exprs_tuple_pool_fn, Tuple::MATERIALIZE_EXPRS_SYMBOL);
-  DCHECK_EQ(replaced, 1) << LlvmCodeGen::Print(insert_batch_fn);
-
-  replaced = codegen->ReplaceCallSites(insert_batch_fn, materialize_exprs_no_pool_fn,
-      Tuple::MATERIALIZE_EXPRS_NULL_POOL_SYMBOL);
-  DCHECK_EQ(replaced, 1) << LlvmCodeGen::Print(insert_batch_fn);
-
-  insert_batch_fn = codegen->FinalizeFunction(insert_batch_fn);
-  DCHECK(insert_batch_fn != NULL);
-  codegen->AddFunctionToJit(insert_batch_fn,
-      reinterpret_cast<void**>(&codegend_insert_batch_fn_));
-  return Status::OK();
-}
-
 Status TopNNode::Prepare(RuntimeState* state) {
   SCOPED_TIMER(runtime_profile_->total_time_counter());
   RETURN_IF_ERROR(ExecNode::Prepare(state));
@@ -106,23 +73,66 @@ Status TopNNode::Prepare(RuntimeState* state) {
   AddExprCtxsToFree(sort_exec_exprs_);
   tuple_row_less_than_.reset(
       new TupleRowComparator(sort_exec_exprs_, is_asc_order_, nulls_first_));
-  bool codegen_enabled = false;
-  Status codegen_status;
-  if (state->codegen_enabled()) {
-    // TODO: inline tuple_row_less_than_->Compare()
-    codegen_status = tuple_row_less_than_->Codegen(state);
-    codegen_status.MergeStatus(Codegen(state));
-    codegen_enabled = codegen_status.ok();
-  }
-  runtime_profile()->AddCodegenMsg(codegen_enabled, codegen_status);
   priority_queue_.reset(
       new priority_queue<Tuple*, vector<Tuple*>, ComparatorWrapper<TupleRowComparator>>(
           *tuple_row_less_than_));
   materialized_tuple_desc_ = row_descriptor_.tuple_descriptors()[0];
   insert_batch_timer_ = ADD_TIMER(runtime_profile(), "InsertBatchTime");
+  if (!state->codegen_enabled()) {
+    runtime_profile()->AddCodegenMsg(false, "disabled by query option DISABLE_CODEGEN");
+  }
+
+runtime_profile()->AddCodegenMsg(false);
   return Status::OK();
 }
 
+void TopNNode::Codegen(RuntimeState* state) {
+  DCHECK(state->codegen_enabled());
+  LlvmCodeGen* codegen = state->codegen();
+  DCHECK(codegen != NULL);
+
+  // TODO: inline tuple_row_less_than_->Compare()
+  Status codegen_status = tuple_row_less_than_->Codegen(state);
+  if (codegen_status.ok()) {
+    Function* insert_batch_fn =
+        codegen->GetFunction(IRFunction::TOPN_NODE_INSERT_BATCH, true);
+    DCHECK(insert_batch_fn != NULL);
+
+    // Generate two MaterializeExprs() functions, one using tuple_pool_ and
+    // one with no pool.
+    DCHECK(materialized_tuple_desc_ != NULL);
+    Function* materialize_exprs_tuple_pool_fn;
+    Function* materialize_exprs_no_pool_fn;
+
+    codegen_status = Tuple::CodegenMaterializeExprs(codegen, false,
+        *materialized_tuple_desc_, sort_exec_exprs_.sort_tuple_slot_expr_ctxs(),
+        tuple_pool_.get(), &materialize_exprs_tuple_pool_fn);
+
+    if (codegen_status.ok()) {
+      codegen_status = Tuple::CodegenMaterializeExprs(codegen, false,
+          *materialized_tuple_desc_, sort_exec_exprs_.sort_tuple_slot_expr_ctxs(),
+          NULL, &materialize_exprs_no_pool_fn);
+
+      if (codegen_status.ok()) {
+        int replaced = codegen->ReplaceCallSites(insert_batch_fn,
+            materialize_exprs_tuple_pool_fn, Tuple::MATERIALIZE_EXPRS_SYMBOL);
+        DCHECK_EQ(replaced, 1) << LlvmCodeGen::Print(insert_batch_fn);
+
+        replaced = codegen->ReplaceCallSites(insert_batch_fn,
+            materialize_exprs_no_pool_fn, Tuple::MATERIALIZE_EXPRS_NULL_POOL_SYMBOL);
+        DCHECK_EQ(replaced, 1) << LlvmCodeGen::Print(insert_batch_fn);
+
+        insert_batch_fn = codegen->FinalizeFunction(insert_batch_fn);
+        DCHECK(insert_batch_fn != NULL);
+        codegen->AddFunctionToJit(insert_batch_fn,
+            reinterpret_cast<void**>(&codegend_insert_batch_fn_));
+      }
+    }
+  }
+  runtime_profile()->AddCodegenMsg(codegen_status.ok(), codegen_status);
+  ExecNode::Codegen(state);
+}
+
 Status TopNNode::Open(RuntimeState* state) {
   SCOPED_TIMER(runtime_profile_->total_time_counter());
   RETURN_IF_ERROR(ExecNode::Open(state));