You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/04/19 18:12:07 UTC

[02/15] impala git commit: IMPALA-6120: Add thread timers for reporting codegen time

IMPALA-6120: Add thread timers for reporting codegen time

Add thread times for accurate reporting of codegen time.
Also cleaned up a few places where time elapsed was being counted twice.

Sample Profile:

Query: SELECT count(*) FROM tpch_parquet.lineitem
WHERE l_partkey in (1,6,11,16,21,26,31,36,41);

CodeGen:(Total: 37.948ms, non-child: 37.948ms, % non-child: 100.00%)
   - CodegenInvoluntaryContextSwitches: 0 (0)
   - CodegenTotalWallClockTime: 37.942ms
     - CodegenSysTime: 0.000ns
     - CodegenUserTime: 36.938ms
   - CodegenVoluntaryContextSwitches: 0 (0)
   - CompileTime: 2.065ms
   - IrGenerationTime: 392.351us
   - LoadTime: 0.000ns
   - ModuleBitcodeSize: 2.26 MB (2373148)
   - NumFunctions: 22 (22)
   - NumInstructions: 381 (381)
   - OptimizationTime: 21.416ms
   - PeakMemoryUsage: 190.50 KB (195072)
   - PrepareTime: 13.496ms

Sample Profile with an added 2 sec sleep time to "OptimizationTime":

CodeGen:(Total: 2s037ms, non-child: 2s037ms, % non-child: 100.00%)
   - CodegenInvoluntaryContextSwitches: 0 (0)
   - CodegenTotalWallClockTime: 2s037ms
     - CodegenSysTime: 0.000ns
     - CodegenUserTime: 37.672ms
   - CodegenVoluntaryContextSwitches: 1 (1)
   - CompileTime: 2.032ms
   - IrGenerationTime: 386.948us
   - LoadTime: 0.000ns
   - ModuleBitcodeSize: 2.26 MB (2373148)
   - NumFunctions: 22 (22)
   - NumInstructions: 381 (381)
   - OptimizationTime: 2s023ms
   - PeakMemoryUsage: 190.50 KB (195072)
   - PrepareTime: 11.598ms

Change-Id: I24d5a46b8870bc959b89045432d2e86af72b30e5
Reviewed-on: http://gerrit.cloudera.org:8080/9960
Reviewed-by: Bikramjeet Vig <bi...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/d9d3ce7c
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/d9d3ce7c
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/d9d3ce7c

Branch: refs/heads/2.x
Commit: d9d3ce7c2ec124290d503880f3963fe7c4a78c58
Parents: 857d2b0
Author: Bikramjeet Vig <bi...@cloudera.com>
Authored: Mon Apr 9 14:03:52 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 18 21:17:47 2018 +0000

----------------------------------------------------------------------
 be/src/codegen/llvm-codegen.cc              | 21 +++++++++------------
 be/src/codegen/llvm-codegen.h               | 18 ++++++++++--------
 be/src/exec/hdfs-avro-scanner.cc            |  2 --
 be/src/exec/hdfs-parquet-scanner.cc         |  1 -
 be/src/exec/hdfs-scanner.cc                 |  2 --
 be/src/exec/partitioned-aggregation-node.cc |  5 -----
 be/src/exec/select-node.cc                  |  1 -
 be/src/exec/text-converter.cc               |  1 -
 be/src/runtime/fragment-instance-state.cc   | 16 +++++++++++-----
 be/src/runtime/tuple.cc                     |  1 -
 be/src/util/tuple-row-compare.cc            |  1 -
 11 files changed, 30 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/codegen/llvm-codegen.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index c8fd8eb..5d5ed15 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -201,11 +201,12 @@ LlvmCodeGen::LlvmCodeGen(RuntimeState* state, ObjectPool* pool,
   load_module_timer_ = ADD_TIMER(profile_, "LoadTime");
   prepare_module_timer_ = ADD_TIMER(profile_, "PrepareTime");
   module_bitcode_size_ = ADD_COUNTER(profile_, "ModuleBitcodeSize", TUnit::BYTES);
-  codegen_timer_ = ADD_TIMER(profile_, "CodegenTime");
+  ir_generation_timer_ = ADD_TIMER(profile_, "IrGenerationTime");
   optimization_timer_ = ADD_TIMER(profile_, "OptimizationTime");
   compile_timer_ = ADD_TIMER(profile_, "CompileTime");
   num_functions_ = ADD_COUNTER(profile_, "NumFunctions", TUnit::UNIT);
   num_instructions_ = ADD_COUNTER(profile_, "NumInstructions", TUnit::UNIT);
+  llvm_thread_counters_ = ADD_THREAD_COUNTERS(profile_, "Codegen");
 }
 
 Status LlvmCodeGen::CreateFromFile(RuntimeState* state, ObjectPool* pool,
@@ -213,6 +214,7 @@ Status LlvmCodeGen::CreateFromFile(RuntimeState* state, ObjectPool* pool,
     scoped_ptr<LlvmCodeGen>* codegen) {
   codegen->reset(new LlvmCodeGen(state, pool, parent_mem_tracker, id));
   SCOPED_TIMER((*codegen)->profile_->total_time_counter());
+  SCOPED_THREAD_COUNTER_MEASUREMENT((*codegen)->llvm_thread_counters());
 
   unique_ptr<llvm::Module> loaded_module;
   Status status = (*codegen)->LoadModuleFromFile(file, &loaded_module);
@@ -229,6 +231,8 @@ Status LlvmCodeGen::CreateFromMemory(RuntimeState* state, ObjectPool* pool,
     MemTracker* parent_mem_tracker, const string& id, scoped_ptr<LlvmCodeGen>* codegen) {
   codegen->reset(new LlvmCodeGen(state, pool, parent_mem_tracker, id));
   SCOPED_TIMER((*codegen)->profile_->total_time_counter());
+  SCOPED_TIMER((*codegen)->prepare_module_timer_);
+  SCOPED_THREAD_COUNTER_MEASUREMENT((*codegen)->llvm_thread_counters());
 
   // Select the appropriate IR version. We cannot use LLVM IR with SSE4.2 instructions on
   // a machine without SSE4.2 support.
@@ -282,7 +286,6 @@ Status LlvmCodeGen::LoadModuleFromFile(
 Status LlvmCodeGen::LoadModuleFromMemory(unique_ptr<llvm::MemoryBuffer> module_ir_buf,
     string module_name, unique_ptr<llvm::Module>* module) {
   DCHECK(!module_name.empty());
-  SCOPED_TIMER(prepare_module_timer_);
   COUNTER_ADD(module_bitcode_size_, module_ir_buf->getMemBufferRef().getBufferSize());
   llvm::Expected<unique_ptr<llvm::Module>> tmp_module =
       getOwningLazyBitcodeModule(move(module_ir_buf), context());
@@ -305,7 +308,6 @@ Status LlvmCodeGen::LoadModuleFromMemory(unique_ptr<llvm::MemoryBuffer> module_i
 
 // TODO: Create separate counters/timers (file size, load time) for each module linked
 Status LlvmCodeGen::LinkModuleFromLocalFs(const string& file) {
-  SCOPED_TIMER(profile_->total_time_counter());
   unique_ptr<llvm::Module> new_module;
   RETURN_IF_ERROR(LoadModuleFromFile(file, &new_module));
 
@@ -366,6 +368,7 @@ Status LlvmCodeGen::CreateImpalaCodegen(RuntimeState* state,
   // Parse module for cross compiled functions and types
   SCOPED_TIMER(codegen->profile_->total_time_counter());
   SCOPED_TIMER(codegen->prepare_module_timer_);
+  SCOPED_THREAD_COUNTER_MEASUREMENT(codegen->llvm_thread_counters_);
 
   // Get type for StringValue
   codegen->string_value_type_ = codegen->GetStructType<StringValue>();
@@ -621,7 +624,7 @@ void LlvmCodeGen::CreateIfElseBlocks(llvm::Function* fn, const string& if_name,
   *else_block = llvm::BasicBlock::Create(context(), else_name, fn, insert_before);
 }
 
-Status LlvmCodeGen::MaterializeFunctionHelper(llvm::Function* fn) {
+Status LlvmCodeGen::MaterializeFunction(llvm::Function* fn) {
   DCHECK(!is_compiled_);
   if (fn->isIntrinsic() || !fn->isMaterializable()) return Status::OK();
 
@@ -642,18 +645,12 @@ Status LlvmCodeGen::MaterializeFunctionHelper(llvm::Function* fn) {
     for (const string& callee : *callees) {
       llvm::Function* callee_fn = module_->getFunction(callee);
       DCHECK(callee_fn != nullptr);
-      RETURN_IF_ERROR(MaterializeFunctionHelper(callee_fn));
+      RETURN_IF_ERROR(MaterializeFunction(callee_fn));
     }
   }
   return Status::OK();
 }
 
-Status LlvmCodeGen::MaterializeFunction(llvm::Function* fn) {
-  SCOPED_TIMER(profile_->total_time_counter());
-  SCOPED_TIMER(prepare_module_timer_);
-  return MaterializeFunctionHelper(fn);
-}
-
 llvm::Function* LlvmCodeGen::GetFunction(const string& symbol, bool clone) {
   llvm::Function* fn = module_->getFunction(symbol.c_str());
   if (fn == NULL) {
@@ -1038,7 +1035,6 @@ Status LlvmCodeGen::MaterializeModule() {
 
 // It's okay to call this function even if the module has been materialized.
 Status LlvmCodeGen::FinalizeLazyMaterialization() {
-  SCOPED_TIMER(prepare_module_timer_);
   for (llvm::Function& fn : module_->functions()) {
     if (fn.isMaterializable()) {
       DCHECK(!module_->isMaterialized());
@@ -1078,6 +1074,7 @@ Status LlvmCodeGen::FinalizeModule() {
 
   if (is_corrupt_) return Status("Module is corrupt.");
   SCOPED_TIMER(profile_->total_time_counter());
+  SCOPED_THREAD_COUNTER_MEASUREMENT(llvm_thread_counters_);
 
   // Clean up handcrafted functions that have not been finalized. Clean up is done by
   // deleting the function from the module. Any reference to deleted functions in the

http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/codegen/llvm-codegen.h
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h
index 783269b..53569ca 100644
--- a/be/src/codegen/llvm-codegen.h
+++ b/be/src/codegen/llvm-codegen.h
@@ -169,7 +169,8 @@ class LlvmCodeGen {
   void Close();
 
   RuntimeProfile* runtime_profile() { return profile_; }
-  RuntimeProfile::Counter* codegen_timer() { return codegen_timer_; }
+  RuntimeProfile::Counter* ir_generation_timer() { return ir_generation_timer_; }
+  RuntimeProfile::ThreadCounters* llvm_thread_counters() { return llvm_thread_counters_; }
 
   /// Turns on/off optimization passes
   void EnableOptimizations(bool enable);
@@ -688,10 +689,6 @@ class LlvmCodeGen {
   /// This function parses the bitcode of 'fn' to populate basic blocks, instructions
   /// and other data structures attached to the function object. Return error status
   /// for any error.
-  Status MaterializeFunctionHelper(llvm::Function* fn);
-
-  /// Entry point for materializing function 'fn'. Invokes MaterializeFunctionHelper()
-  /// to do the actual work. Return error status for any error.
   Status MaterializeFunction(llvm::Function* fn);
 
   /// Materialize the module owned by this codegen object. This will materialize all
@@ -754,11 +751,12 @@ class LlvmCodeGen {
   /// Time spent reading the .ir file from the file system.
   RuntimeProfile::Counter* load_module_timer_;
 
-  /// Time spent constructing the in-memory module from the ir.
+  /// Time spent creating the initial module with the cross-compiled Impala IR.
   RuntimeProfile::Counter* prepare_module_timer_;
 
-  /// Time spent doing codegen (adding IR to the module)
-  RuntimeProfile::Counter* codegen_timer_;
+  /// Time spent by ExecNodes while adding IR to the module. Update by
+  /// FragmentInstanceState during its 'CODEGEN_START' state.
+  RuntimeProfile::Counter* ir_generation_timer_;
 
   /// Time spent optimizing the module.
   RuntimeProfile::Counter* optimization_timer_;
@@ -774,6 +772,10 @@ class LlvmCodeGen {
   RuntimeProfile::Counter* num_functions_;
   RuntimeProfile::Counter* num_instructions_;
 
+  /// Aggregated llvm thread counters. Also includes the phase represented by
+  /// 'ir_generation_timer_' and hence is also updated by FragmentInstanceState.
+  RuntimeProfile::ThreadCounters* llvm_thread_counters_;
+
   /// whether or not optimizations are enabled
   bool optimizations_enabled_;
 

http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/exec/hdfs-avro-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-avro-scanner.cc b/be/src/exec/hdfs-avro-scanner.cc
index fe1bed4..e74b589 100644
--- a/be/src/exec/hdfs-avro-scanner.cc
+++ b/be/src/exec/hdfs-avro-scanner.cc
@@ -1062,8 +1062,6 @@ Status HdfsAvroScanner::CodegenReadScalar(const AvroSchemaElement& element,
 Status HdfsAvroScanner::CodegenDecodeAvroData(const HdfsScanNodeBase* node,
     LlvmCodeGen* codegen, const vector<ScalarExpr*>& conjuncts,
     llvm::Function** decode_avro_data_fn) {
-  SCOPED_TIMER(codegen->codegen_timer());
-
   llvm::Function* materialize_tuple_fn;
   RETURN_IF_ERROR(CodegenMaterializeTuple(node, codegen, &materialize_tuple_fn));
   DCHECK(materialize_tuple_fn != nullptr);

http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/exec/hdfs-parquet-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-parquet-scanner.cc b/be/src/exec/hdfs-parquet-scanner.cc
index ae22149..73dd29b 100644
--- a/be/src/exec/hdfs-parquet-scanner.cc
+++ b/be/src/exec/hdfs-parquet-scanner.cc
@@ -1019,7 +1019,6 @@ Status HdfsParquetScanner::Codegen(HdfsScanNodeBase* node,
   *process_scratch_batch_fn = nullptr;
   LlvmCodeGen* codegen = node->runtime_state()->codegen();
   DCHECK(codegen != nullptr);
-  SCOPED_TIMER(codegen->codegen_timer());
 
   llvm::Function* fn = codegen->GetFunction(IRFunction::PROCESS_SCRATCH_BATCH, true);
   DCHECK(fn != nullptr);

http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/exec/hdfs-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.cc b/be/src/exec/hdfs-scanner.cc
index a4aee4d..d191d3f 100644
--- a/be/src/exec/hdfs-scanner.cc
+++ b/be/src/exec/hdfs-scanner.cc
@@ -323,7 +323,6 @@ Status HdfsScanner::CodegenWriteCompleteTuple(const HdfsScanNodeBase* node,
     LlvmCodeGen* codegen, const vector<ScalarExpr*>& conjuncts,
     llvm::Function** write_complete_tuple_fn) {
   *write_complete_tuple_fn = NULL;
-  SCOPED_TIMER(codegen->codegen_timer());
   RuntimeState* state = node->runtime_state();
 
   // Cast away const-ness.  The codegen only sets the cached typed llvm struct.
@@ -531,7 +530,6 @@ Status HdfsScanner::CodegenWriteAlignedTuples(const HdfsScanNodeBase* node,
     LlvmCodeGen* codegen, llvm::Function* write_complete_tuple_fn,
     llvm::Function** write_aligned_tuples_fn) {
   *write_aligned_tuples_fn = NULL;
-  SCOPED_TIMER(codegen->codegen_timer());
   DCHECK(write_complete_tuple_fn != NULL);
 
   llvm::Function* write_tuples_fn =

http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/exec/partitioned-aggregation-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node.cc b/be/src/exec/partitioned-aggregation-node.cc
index c6c6189..d7b8c0a 100644
--- a/be/src/exec/partitioned-aggregation-node.cc
+++ b/be/src/exec/partitioned-aggregation-node.cc
@@ -1721,8 +1721,6 @@ Status PartitionedAggregationNode::CodegenCallUda(LlvmCodeGen* codegen,
 //
 Status PartitionedAggregationNode::CodegenUpdateTuple(
     LlvmCodeGen* codegen, llvm::Function** fn) {
-  SCOPED_TIMER(codegen->codegen_timer());
-
   for (const SlotDescriptor* slot_desc : intermediate_tuple_desc_->slots()) {
     if (slot_desc->type().type == TYPE_CHAR) {
       return Status::Expected("PartitionedAggregationNode::CodegenUpdateTuple(): cannot "
@@ -1811,8 +1809,6 @@ Status PartitionedAggregationNode::CodegenUpdateTuple(
 
 Status PartitionedAggregationNode::CodegenProcessBatch(LlvmCodeGen* codegen,
     TPrefetchMode::type prefetch_mode) {
-  SCOPED_TIMER(codegen->codegen_timer());
-
   llvm::Function* update_tuple_fn;
   RETURN_IF_ERROR(CodegenUpdateTuple(codegen, &update_tuple_fn));
 
@@ -1884,7 +1880,6 @@ Status PartitionedAggregationNode::CodegenProcessBatch(LlvmCodeGen* codegen,
 Status PartitionedAggregationNode::CodegenProcessBatchStreaming(
     LlvmCodeGen* codegen, TPrefetchMode::type prefetch_mode) {
   DCHECK(is_streaming_preagg_);
-  SCOPED_TIMER(codegen->codegen_timer());
 
   IRFunction::Type ir_fn = IRFunction::PART_AGG_NODE_PROCESS_BATCH_STREAMING;
   llvm::Function* process_batch_streaming_fn = codegen->GetFunction(ir_fn, true);

http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/exec/select-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/select-node.cc b/be/src/exec/select-node.cc
index 0f0683b..df57db0 100644
--- a/be/src/exec/select-node.cc
+++ b/be/src/exec/select-node.cc
@@ -48,7 +48,6 @@ void SelectNode::Codegen(RuntimeState* state) {
   DCHECK(state->ShouldCodegen());
   ExecNode::Codegen(state);
   if (IsNodeCodegenDisabled()) return;
-  SCOPED_TIMER(state->codegen()->codegen_timer());
   Status codegen_status = CodegenCopyRows(state);
   runtime_profile()->AddCodegenMsg(codegen_status.ok(), codegen_status);
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/exec/text-converter.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/text-converter.cc b/be/src/exec/text-converter.cc
index 9e919a3..783e384 100644
--- a/be/src/exec/text-converter.cc
+++ b/be/src/exec/text-converter.cc
@@ -112,7 +112,6 @@ Status TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen,
     return Status("TextConverter::CodegenWriteSlot(): Char isn't supported for"
         " CodegenWriteSlot");
   }
-  SCOPED_TIMER(codegen->codegen_timer());
 
   // Codegen is_null_string
   bool is_default_null = (len == 2 && null_col_val[0] == '\\' && null_col_val[1] == 'N');

http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/runtime/fragment-instance-state.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/fragment-instance-state.cc b/be/src/runtime/fragment-instance-state.cc
index 7322519..1a0d452 100644
--- a/be/src/runtime/fragment-instance-state.cc
+++ b/be/src/runtime/fragment-instance-state.cc
@@ -244,11 +244,17 @@ Status FragmentInstanceState::Open() {
   if (runtime_state_->ShouldCodegen()) {
     UpdateState(StateEvent::CODEGEN_START);
     RETURN_IF_ERROR(runtime_state_->CreateCodegen());
-    exec_tree_->Codegen(runtime_state_);
-    // It shouldn't be fatal to fail codegen. However, until IMPALA-4233 is fixed,
-    // ScalarFnCall has no fall back to interpretation when codegen fails so propagates
-    // the error status for now.
-    RETURN_IF_ERROR(runtime_state_->CodegenScalarFns());
+    {
+      SCOPED_TIMER(runtime_state_->codegen()->ir_generation_timer());
+      SCOPED_TIMER(runtime_state_->codegen()->runtime_profile()->total_time_counter());
+      SCOPED_THREAD_COUNTER_MEASUREMENT(
+          runtime_state_->codegen()->llvm_thread_counters());
+      exec_tree_->Codegen(runtime_state_);
+      // It shouldn't be fatal to fail codegen. However, until IMPALA-4233 is fixed,
+      // ScalarFnCall has no fall back to interpretation when codegen fails so propagates
+      // the error status for now.
+      RETURN_IF_ERROR(runtime_state_->CodegenScalarFns());
+    }
 
     LlvmCodeGen* codegen = runtime_state_->codegen();
     DCHECK(codegen != nullptr);

http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/runtime/tuple.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc
index 627c7a4..0061419 100644
--- a/be/src/runtime/tuple.cc
+++ b/be/src/runtime/tuple.cc
@@ -311,7 +311,6 @@ Status Tuple::CodegenMaterializeExprs(LlvmCodeGen* codegen, bool collect_string_
   if (collect_string_vals) {
     return Status("CodegenMaterializeExprs() collect_string_vals == true NYI");
   }
-  SCOPED_TIMER(codegen->codegen_timer());
   llvm::LLVMContext& context = codegen->context();
 
   // Codegen each compute function from slot_materialize_exprs

http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/util/tuple-row-compare.cc
----------------------------------------------------------------------
diff --git a/be/src/util/tuple-row-compare.cc b/be/src/util/tuple-row-compare.cc
index 5620dae..f05a88e 100644
--- a/be/src/util/tuple-row-compare.cc
+++ b/be/src/util/tuple-row-compare.cc
@@ -203,7 +203,6 @@ Status TupleRowComparator::Codegen(RuntimeState* state) {
 //   ret i32 0
 // }
 Status TupleRowComparator::CodegenCompare(LlvmCodeGen* codegen, llvm::Function** fn) {
-  SCOPED_TIMER(codegen->codegen_timer());
   llvm::LLVMContext& context = codegen->context();
   const vector<ScalarExpr*>& ordering_exprs = ordering_exprs_;
   llvm::Function* key_fns[ordering_exprs.size()];