You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/04/19 18:12:07 UTC
[02/15] impala git commit: IMPALA-6120: Add thread timers for
reporting codegen time
IMPALA-6120: Add thread timers for reporting codegen time
Add thread times for accurate reporting of codegen time.
Also cleaned up a few places where time elapsed was being counted twice.
Sample Profile:
Query: SELECT count(*) FROM tpch_parquet.lineitem
WHERE l_partkey in (1,6,11,16,21,26,31,36,41);
CodeGen:(Total: 37.948ms, non-child: 37.948ms, % non-child: 100.00%)
- CodegenInvoluntaryContextSwitches: 0 (0)
- CodegenTotalWallClockTime: 37.942ms
- CodegenSysTime: 0.000ns
- CodegenUserTime: 36.938ms
- CodegenVoluntaryContextSwitches: 0 (0)
- CompileTime: 2.065ms
- IrGenerationTime: 392.351us
- LoadTime: 0.000ns
- ModuleBitcodeSize: 2.26 MB (2373148)
- NumFunctions: 22 (22)
- NumInstructions: 381 (381)
- OptimizationTime: 21.416ms
- PeakMemoryUsage: 190.50 KB (195072)
- PrepareTime: 13.496ms
Sample Profile with an added 2 sec sleep time to "OptimizationTime":
CodeGen:(Total: 2s037ms, non-child: 2s037ms, % non-child: 100.00%)
- CodegenInvoluntaryContextSwitches: 0 (0)
- CodegenTotalWallClockTime: 2s037ms
- CodegenSysTime: 0.000ns
- CodegenUserTime: 37.672ms
- CodegenVoluntaryContextSwitches: 1 (1)
- CompileTime: 2.032ms
- IrGenerationTime: 386.948us
- LoadTime: 0.000ns
- ModuleBitcodeSize: 2.26 MB (2373148)
- NumFunctions: 22 (22)
- NumInstructions: 381 (381)
- OptimizationTime: 2s023ms
- PeakMemoryUsage: 190.50 KB (195072)
- PrepareTime: 11.598ms
Change-Id: I24d5a46b8870bc959b89045432d2e86af72b30e5
Reviewed-on: http://gerrit.cloudera.org:8080/9960
Reviewed-by: Bikramjeet Vig <bi...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/d9d3ce7c
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/d9d3ce7c
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/d9d3ce7c
Branch: refs/heads/2.x
Commit: d9d3ce7c2ec124290d503880f3963fe7c4a78c58
Parents: 857d2b0
Author: Bikramjeet Vig <bi...@cloudera.com>
Authored: Mon Apr 9 14:03:52 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Apr 18 21:17:47 2018 +0000
----------------------------------------------------------------------
be/src/codegen/llvm-codegen.cc | 21 +++++++++------------
be/src/codegen/llvm-codegen.h | 18 ++++++++++--------
be/src/exec/hdfs-avro-scanner.cc | 2 --
be/src/exec/hdfs-parquet-scanner.cc | 1 -
be/src/exec/hdfs-scanner.cc | 2 --
be/src/exec/partitioned-aggregation-node.cc | 5 -----
be/src/exec/select-node.cc | 1 -
be/src/exec/text-converter.cc | 1 -
be/src/runtime/fragment-instance-state.cc | 16 +++++++++++-----
be/src/runtime/tuple.cc | 1 -
be/src/util/tuple-row-compare.cc | 1 -
11 files changed, 30 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/codegen/llvm-codegen.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index c8fd8eb..5d5ed15 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -201,11 +201,12 @@ LlvmCodeGen::LlvmCodeGen(RuntimeState* state, ObjectPool* pool,
load_module_timer_ = ADD_TIMER(profile_, "LoadTime");
prepare_module_timer_ = ADD_TIMER(profile_, "PrepareTime");
module_bitcode_size_ = ADD_COUNTER(profile_, "ModuleBitcodeSize", TUnit::BYTES);
- codegen_timer_ = ADD_TIMER(profile_, "CodegenTime");
+ ir_generation_timer_ = ADD_TIMER(profile_, "IrGenerationTime");
optimization_timer_ = ADD_TIMER(profile_, "OptimizationTime");
compile_timer_ = ADD_TIMER(profile_, "CompileTime");
num_functions_ = ADD_COUNTER(profile_, "NumFunctions", TUnit::UNIT);
num_instructions_ = ADD_COUNTER(profile_, "NumInstructions", TUnit::UNIT);
+ llvm_thread_counters_ = ADD_THREAD_COUNTERS(profile_, "Codegen");
}
Status LlvmCodeGen::CreateFromFile(RuntimeState* state, ObjectPool* pool,
@@ -213,6 +214,7 @@ Status LlvmCodeGen::CreateFromFile(RuntimeState* state, ObjectPool* pool,
scoped_ptr<LlvmCodeGen>* codegen) {
codegen->reset(new LlvmCodeGen(state, pool, parent_mem_tracker, id));
SCOPED_TIMER((*codegen)->profile_->total_time_counter());
+ SCOPED_THREAD_COUNTER_MEASUREMENT((*codegen)->llvm_thread_counters());
unique_ptr<llvm::Module> loaded_module;
Status status = (*codegen)->LoadModuleFromFile(file, &loaded_module);
@@ -229,6 +231,8 @@ Status LlvmCodeGen::CreateFromMemory(RuntimeState* state, ObjectPool* pool,
MemTracker* parent_mem_tracker, const string& id, scoped_ptr<LlvmCodeGen>* codegen) {
codegen->reset(new LlvmCodeGen(state, pool, parent_mem_tracker, id));
SCOPED_TIMER((*codegen)->profile_->total_time_counter());
+ SCOPED_TIMER((*codegen)->prepare_module_timer_);
+ SCOPED_THREAD_COUNTER_MEASUREMENT((*codegen)->llvm_thread_counters());
// Select the appropriate IR version. We cannot use LLVM IR with SSE4.2 instructions on
// a machine without SSE4.2 support.
@@ -282,7 +286,6 @@ Status LlvmCodeGen::LoadModuleFromFile(
Status LlvmCodeGen::LoadModuleFromMemory(unique_ptr<llvm::MemoryBuffer> module_ir_buf,
string module_name, unique_ptr<llvm::Module>* module) {
DCHECK(!module_name.empty());
- SCOPED_TIMER(prepare_module_timer_);
COUNTER_ADD(module_bitcode_size_, module_ir_buf->getMemBufferRef().getBufferSize());
llvm::Expected<unique_ptr<llvm::Module>> tmp_module =
getOwningLazyBitcodeModule(move(module_ir_buf), context());
@@ -305,7 +308,6 @@ Status LlvmCodeGen::LoadModuleFromMemory(unique_ptr<llvm::MemoryBuffer> module_i
// TODO: Create separate counters/timers (file size, load time) for each module linked
Status LlvmCodeGen::LinkModuleFromLocalFs(const string& file) {
- SCOPED_TIMER(profile_->total_time_counter());
unique_ptr<llvm::Module> new_module;
RETURN_IF_ERROR(LoadModuleFromFile(file, &new_module));
@@ -366,6 +368,7 @@ Status LlvmCodeGen::CreateImpalaCodegen(RuntimeState* state,
// Parse module for cross compiled functions and types
SCOPED_TIMER(codegen->profile_->total_time_counter());
SCOPED_TIMER(codegen->prepare_module_timer_);
+ SCOPED_THREAD_COUNTER_MEASUREMENT(codegen->llvm_thread_counters_);
// Get type for StringValue
codegen->string_value_type_ = codegen->GetStructType<StringValue>();
@@ -621,7 +624,7 @@ void LlvmCodeGen::CreateIfElseBlocks(llvm::Function* fn, const string& if_name,
*else_block = llvm::BasicBlock::Create(context(), else_name, fn, insert_before);
}
-Status LlvmCodeGen::MaterializeFunctionHelper(llvm::Function* fn) {
+Status LlvmCodeGen::MaterializeFunction(llvm::Function* fn) {
DCHECK(!is_compiled_);
if (fn->isIntrinsic() || !fn->isMaterializable()) return Status::OK();
@@ -642,18 +645,12 @@ Status LlvmCodeGen::MaterializeFunctionHelper(llvm::Function* fn) {
for (const string& callee : *callees) {
llvm::Function* callee_fn = module_->getFunction(callee);
DCHECK(callee_fn != nullptr);
- RETURN_IF_ERROR(MaterializeFunctionHelper(callee_fn));
+ RETURN_IF_ERROR(MaterializeFunction(callee_fn));
}
}
return Status::OK();
}
-Status LlvmCodeGen::MaterializeFunction(llvm::Function* fn) {
- SCOPED_TIMER(profile_->total_time_counter());
- SCOPED_TIMER(prepare_module_timer_);
- return MaterializeFunctionHelper(fn);
-}
-
llvm::Function* LlvmCodeGen::GetFunction(const string& symbol, bool clone) {
llvm::Function* fn = module_->getFunction(symbol.c_str());
if (fn == NULL) {
@@ -1038,7 +1035,6 @@ Status LlvmCodeGen::MaterializeModule() {
// It's okay to call this function even if the module has been materialized.
Status LlvmCodeGen::FinalizeLazyMaterialization() {
- SCOPED_TIMER(prepare_module_timer_);
for (llvm::Function& fn : module_->functions()) {
if (fn.isMaterializable()) {
DCHECK(!module_->isMaterialized());
@@ -1078,6 +1074,7 @@ Status LlvmCodeGen::FinalizeModule() {
if (is_corrupt_) return Status("Module is corrupt.");
SCOPED_TIMER(profile_->total_time_counter());
+ SCOPED_THREAD_COUNTER_MEASUREMENT(llvm_thread_counters_);
// Clean up handcrafted functions that have not been finalized. Clean up is done by
// deleting the function from the module. Any reference to deleted functions in the
http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/codegen/llvm-codegen.h
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h
index 783269b..53569ca 100644
--- a/be/src/codegen/llvm-codegen.h
+++ b/be/src/codegen/llvm-codegen.h
@@ -169,7 +169,8 @@ class LlvmCodeGen {
void Close();
RuntimeProfile* runtime_profile() { return profile_; }
- RuntimeProfile::Counter* codegen_timer() { return codegen_timer_; }
+ RuntimeProfile::Counter* ir_generation_timer() { return ir_generation_timer_; }
+ RuntimeProfile::ThreadCounters* llvm_thread_counters() { return llvm_thread_counters_; }
/// Turns on/off optimization passes
void EnableOptimizations(bool enable);
@@ -688,10 +689,6 @@ class LlvmCodeGen {
/// This function parses the bitcode of 'fn' to populate basic blocks, instructions
/// and other data structures attached to the function object. Return error status
/// for any error.
- Status MaterializeFunctionHelper(llvm::Function* fn);
-
- /// Entry point for materializing function 'fn'. Invokes MaterializeFunctionHelper()
- /// to do the actual work. Return error status for any error.
Status MaterializeFunction(llvm::Function* fn);
/// Materialize the module owned by this codegen object. This will materialize all
@@ -754,11 +751,12 @@ class LlvmCodeGen {
/// Time spent reading the .ir file from the file system.
RuntimeProfile::Counter* load_module_timer_;
- /// Time spent constructing the in-memory module from the ir.
+ /// Time spent creating the initial module with the cross-compiled Impala IR.
RuntimeProfile::Counter* prepare_module_timer_;
- /// Time spent doing codegen (adding IR to the module)
- RuntimeProfile::Counter* codegen_timer_;
+ /// Time spent by ExecNodes while adding IR to the module. Update by
+ /// FragmentInstanceState during its 'CODEGEN_START' state.
+ RuntimeProfile::Counter* ir_generation_timer_;
/// Time spent optimizing the module.
RuntimeProfile::Counter* optimization_timer_;
@@ -774,6 +772,10 @@ class LlvmCodeGen {
RuntimeProfile::Counter* num_functions_;
RuntimeProfile::Counter* num_instructions_;
+ /// Aggregated llvm thread counters. Also includes the phase represented by
+ /// 'ir_generation_timer_' and hence is also updated by FragmentInstanceState.
+ RuntimeProfile::ThreadCounters* llvm_thread_counters_;
+
/// whether or not optimizations are enabled
bool optimizations_enabled_;
http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/exec/hdfs-avro-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-avro-scanner.cc b/be/src/exec/hdfs-avro-scanner.cc
index fe1bed4..e74b589 100644
--- a/be/src/exec/hdfs-avro-scanner.cc
+++ b/be/src/exec/hdfs-avro-scanner.cc
@@ -1062,8 +1062,6 @@ Status HdfsAvroScanner::CodegenReadScalar(const AvroSchemaElement& element,
Status HdfsAvroScanner::CodegenDecodeAvroData(const HdfsScanNodeBase* node,
LlvmCodeGen* codegen, const vector<ScalarExpr*>& conjuncts,
llvm::Function** decode_avro_data_fn) {
- SCOPED_TIMER(codegen->codegen_timer());
-
llvm::Function* materialize_tuple_fn;
RETURN_IF_ERROR(CodegenMaterializeTuple(node, codegen, &materialize_tuple_fn));
DCHECK(materialize_tuple_fn != nullptr);
http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/exec/hdfs-parquet-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-parquet-scanner.cc b/be/src/exec/hdfs-parquet-scanner.cc
index ae22149..73dd29b 100644
--- a/be/src/exec/hdfs-parquet-scanner.cc
+++ b/be/src/exec/hdfs-parquet-scanner.cc
@@ -1019,7 +1019,6 @@ Status HdfsParquetScanner::Codegen(HdfsScanNodeBase* node,
*process_scratch_batch_fn = nullptr;
LlvmCodeGen* codegen = node->runtime_state()->codegen();
DCHECK(codegen != nullptr);
- SCOPED_TIMER(codegen->codegen_timer());
llvm::Function* fn = codegen->GetFunction(IRFunction::PROCESS_SCRATCH_BATCH, true);
DCHECK(fn != nullptr);
http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/exec/hdfs-scanner.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.cc b/be/src/exec/hdfs-scanner.cc
index a4aee4d..d191d3f 100644
--- a/be/src/exec/hdfs-scanner.cc
+++ b/be/src/exec/hdfs-scanner.cc
@@ -323,7 +323,6 @@ Status HdfsScanner::CodegenWriteCompleteTuple(const HdfsScanNodeBase* node,
LlvmCodeGen* codegen, const vector<ScalarExpr*>& conjuncts,
llvm::Function** write_complete_tuple_fn) {
*write_complete_tuple_fn = NULL;
- SCOPED_TIMER(codegen->codegen_timer());
RuntimeState* state = node->runtime_state();
// Cast away const-ness. The codegen only sets the cached typed llvm struct.
@@ -531,7 +530,6 @@ Status HdfsScanner::CodegenWriteAlignedTuples(const HdfsScanNodeBase* node,
LlvmCodeGen* codegen, llvm::Function* write_complete_tuple_fn,
llvm::Function** write_aligned_tuples_fn) {
*write_aligned_tuples_fn = NULL;
- SCOPED_TIMER(codegen->codegen_timer());
DCHECK(write_complete_tuple_fn != NULL);
llvm::Function* write_tuples_fn =
http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/exec/partitioned-aggregation-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/partitioned-aggregation-node.cc b/be/src/exec/partitioned-aggregation-node.cc
index c6c6189..d7b8c0a 100644
--- a/be/src/exec/partitioned-aggregation-node.cc
+++ b/be/src/exec/partitioned-aggregation-node.cc
@@ -1721,8 +1721,6 @@ Status PartitionedAggregationNode::CodegenCallUda(LlvmCodeGen* codegen,
//
Status PartitionedAggregationNode::CodegenUpdateTuple(
LlvmCodeGen* codegen, llvm::Function** fn) {
- SCOPED_TIMER(codegen->codegen_timer());
-
for (const SlotDescriptor* slot_desc : intermediate_tuple_desc_->slots()) {
if (slot_desc->type().type == TYPE_CHAR) {
return Status::Expected("PartitionedAggregationNode::CodegenUpdateTuple(): cannot "
@@ -1811,8 +1809,6 @@ Status PartitionedAggregationNode::CodegenUpdateTuple(
Status PartitionedAggregationNode::CodegenProcessBatch(LlvmCodeGen* codegen,
TPrefetchMode::type prefetch_mode) {
- SCOPED_TIMER(codegen->codegen_timer());
-
llvm::Function* update_tuple_fn;
RETURN_IF_ERROR(CodegenUpdateTuple(codegen, &update_tuple_fn));
@@ -1884,7 +1880,6 @@ Status PartitionedAggregationNode::CodegenProcessBatch(LlvmCodeGen* codegen,
Status PartitionedAggregationNode::CodegenProcessBatchStreaming(
LlvmCodeGen* codegen, TPrefetchMode::type prefetch_mode) {
DCHECK(is_streaming_preagg_);
- SCOPED_TIMER(codegen->codegen_timer());
IRFunction::Type ir_fn = IRFunction::PART_AGG_NODE_PROCESS_BATCH_STREAMING;
llvm::Function* process_batch_streaming_fn = codegen->GetFunction(ir_fn, true);
http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/exec/select-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/select-node.cc b/be/src/exec/select-node.cc
index 0f0683b..df57db0 100644
--- a/be/src/exec/select-node.cc
+++ b/be/src/exec/select-node.cc
@@ -48,7 +48,6 @@ void SelectNode::Codegen(RuntimeState* state) {
DCHECK(state->ShouldCodegen());
ExecNode::Codegen(state);
if (IsNodeCodegenDisabled()) return;
- SCOPED_TIMER(state->codegen()->codegen_timer());
Status codegen_status = CodegenCopyRows(state);
runtime_profile()->AddCodegenMsg(codegen_status.ok(), codegen_status);
}
http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/exec/text-converter.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/text-converter.cc b/be/src/exec/text-converter.cc
index 9e919a3..783e384 100644
--- a/be/src/exec/text-converter.cc
+++ b/be/src/exec/text-converter.cc
@@ -112,7 +112,6 @@ Status TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen,
return Status("TextConverter::CodegenWriteSlot(): Char isn't supported for"
" CodegenWriteSlot");
}
- SCOPED_TIMER(codegen->codegen_timer());
// Codegen is_null_string
bool is_default_null = (len == 2 && null_col_val[0] == '\\' && null_col_val[1] == 'N');
http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/runtime/fragment-instance-state.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/fragment-instance-state.cc b/be/src/runtime/fragment-instance-state.cc
index 7322519..1a0d452 100644
--- a/be/src/runtime/fragment-instance-state.cc
+++ b/be/src/runtime/fragment-instance-state.cc
@@ -244,11 +244,17 @@ Status FragmentInstanceState::Open() {
if (runtime_state_->ShouldCodegen()) {
UpdateState(StateEvent::CODEGEN_START);
RETURN_IF_ERROR(runtime_state_->CreateCodegen());
- exec_tree_->Codegen(runtime_state_);
- // It shouldn't be fatal to fail codegen. However, until IMPALA-4233 is fixed,
- // ScalarFnCall has no fall back to interpretation when codegen fails so propagates
- // the error status for now.
- RETURN_IF_ERROR(runtime_state_->CodegenScalarFns());
+ {
+ SCOPED_TIMER(runtime_state_->codegen()->ir_generation_timer());
+ SCOPED_TIMER(runtime_state_->codegen()->runtime_profile()->total_time_counter());
+ SCOPED_THREAD_COUNTER_MEASUREMENT(
+ runtime_state_->codegen()->llvm_thread_counters());
+ exec_tree_->Codegen(runtime_state_);
+ // It shouldn't be fatal to fail codegen. However, until IMPALA-4233 is fixed,
+ // ScalarFnCall has no fall back to interpretation when codegen fails so propagates
+ // the error status for now.
+ RETURN_IF_ERROR(runtime_state_->CodegenScalarFns());
+ }
LlvmCodeGen* codegen = runtime_state_->codegen();
DCHECK(codegen != nullptr);
http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/runtime/tuple.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/tuple.cc b/be/src/runtime/tuple.cc
index 627c7a4..0061419 100644
--- a/be/src/runtime/tuple.cc
+++ b/be/src/runtime/tuple.cc
@@ -311,7 +311,6 @@ Status Tuple::CodegenMaterializeExprs(LlvmCodeGen* codegen, bool collect_string_
if (collect_string_vals) {
return Status("CodegenMaterializeExprs() collect_string_vals == true NYI");
}
- SCOPED_TIMER(codegen->codegen_timer());
llvm::LLVMContext& context = codegen->context();
// Codegen each compute function from slot_materialize_exprs
http://git-wip-us.apache.org/repos/asf/impala/blob/d9d3ce7c/be/src/util/tuple-row-compare.cc
----------------------------------------------------------------------
diff --git a/be/src/util/tuple-row-compare.cc b/be/src/util/tuple-row-compare.cc
index 5620dae..f05a88e 100644
--- a/be/src/util/tuple-row-compare.cc
+++ b/be/src/util/tuple-row-compare.cc
@@ -203,7 +203,6 @@ Status TupleRowComparator::Codegen(RuntimeState* state) {
// ret i32 0
// }
Status TupleRowComparator::CodegenCompare(LlvmCodeGen* codegen, llvm::Function** fn) {
- SCOPED_TIMER(codegen->codegen_timer());
llvm::LLVMContext& context = codegen->context();
const vector<ScalarExpr*>& ordering_exprs = ordering_exprs_;
llvm::Function* key_fns[ordering_exprs.size()];