You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/08/08 23:14:36 UTC
[impala] 02/03: IMPALA-11474: Codegen Tuple size in sorter-ir.cc
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit cdf62596846916e64ff9b0549b1eee8736185687
Author: noemi <np...@cloudera.com>
AuthorDate: Mon Jul 18 17:40:31 2022 +0200
IMPALA-11474: Codegen Tuple size in sorter-ir.cc
The number of bytes in a tuple is known before execution, it is
available in the query plan. However, currently the tuple size
is treated as a member variable of the TupleSorter.
Using Codegen to replace this member variable by a constant in
sorter-ir.cc can speed up the quicksort phase by up to 20% in the
most simple queries with small tuples.
Some examples using tpch_parquet lineitem, scale factor=8:
disable_outermost_topn=1;
Query: select _ from lineitem order by _ limit 1;
+---------------+-------------------------+------+----------+----------+-------------+
| Order by | Tuple | NDV | Constant | Variable | Improvement |
+---------------+-------------------------+------+----------+----------+-------------+
| rand() | int, rand | 48M | 12.21s | 14.21s | 14% |
| l_linenumber | int | 7 | 1.81s | 2.34s | 22% |
| l_orderkey | bigint | 12M | 5.38s | 6.69s | 19% |
| l_receiptdate | string, decimal, bigint | 2600 | 17.3s | 18.7s | 7% |
+---------------+-------------------------+------+----------+----------+-------------+
Change-Id: Ia4161a61db1782dc448dae9a1d4c1d120b055b3c
Reviewed-on: http://gerrit.cloudera.org:8080/18802
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exec/partial-sort-node.cc | 4 +++-
be/src/exec/sort-node.cc | 4 +++-
be/src/exec/topn-node.cc | 3 ++-
be/src/runtime/sorter-internal.h | 5 ++++-
be/src/runtime/sorter-ir.cc | 6 +++---
be/src/runtime/sorter.cc | 6 +++++-
6 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/be/src/exec/partial-sort-node.cc b/be/src/exec/partial-sort-node.cc
index 2ba5724ed..1fadbe523 100644
--- a/be/src/exec/partial-sort-node.cc
+++ b/be/src/exec/partial-sort-node.cc
@@ -94,7 +94,9 @@ void PartialSortPlanNode::Codegen(FragmentState* state) {
llvm::Function* compare_fn = nullptr;
AddCodegenStatus(row_comparator_config_->Codegen(state, &compare_fn));
AddCodegenStatus(
- Sorter::TupleSorter::Codegen(state, compare_fn, &codegend_sort_helper_fn_));
+ Sorter::TupleSorter::Codegen(state, compare_fn,
+ row_descriptor_->tuple_descriptors()[0]->byte_size(),
+ &codegend_sort_helper_fn_));
}
Status PartialSortNode::Open(RuntimeState* state) {
diff --git a/be/src/exec/sort-node.cc b/be/src/exec/sort-node.cc
index 739694c9e..223b689a9 100644
--- a/be/src/exec/sort-node.cc
+++ b/be/src/exec/sort-node.cc
@@ -111,7 +111,9 @@ void SortPlanNode::Codegen(FragmentState* state) {
Status codegen_status = row_comparator_config_->Codegen(state, &compare_fn);
if (codegen_status.ok()) {
codegen_status =
- Sorter::TupleSorter::Codegen(state, compare_fn, &codegend_sort_helper_fn_);
+ Sorter::TupleSorter::Codegen(state, compare_fn,
+ row_descriptor_->tuple_descriptors()[0]->byte_size(),
+ &codegend_sort_helper_fn_);
}
AddCodegenStatus(codegen_status);
}
diff --git a/be/src/exec/topn-node.cc b/be/src/exec/topn-node.cc
index f65af7df3..2e331a6a8 100644
--- a/be/src/exec/topn-node.cc
+++ b/be/src/exec/topn-node.cc
@@ -218,7 +218,8 @@ void TopNPlanNode::Codegen(FragmentState* state) {
Status codegen_status = ordering_comparator_config_->Codegen(state, &compare_fn);
if (codegen_status.ok() && is_partitioned()) {
codegen_status =
- Sorter::TupleSorter::Codegen(state, compare_fn, &codegend_sort_helper_fn_);
+ Sorter::TupleSorter::Codegen(state, compare_fn, output_tuple_desc_->byte_size(),
+ &codegend_sort_helper_fn_);
}
if (codegen_status.ok() && is_partitioned()) {
// TODO: IMPALA-10228: replace comparisons in std::map.
diff --git a/be/src/runtime/sorter-internal.h b/be/src/runtime/sorter-internal.h
index 5945526ba..931ee20b6 100644
--- a/be/src/runtime/sorter-internal.h
+++ b/be/src/runtime/sorter-internal.h
@@ -444,7 +444,7 @@ class Sorter::TupleSorter {
/// 'compare_fn' is the pointer to the code-gen version of the compare method with
/// which to replace all non-code-gen versions.
static Status Codegen(FragmentState* state, llvm::Function* compare_fn,
- CodegenFnPtr<SortHelperFn>* codegend_fn);
+ int tuple_byte_size, CodegenFnPtr<SortHelperFn>* codegend_fn);
/// Mangled name of SorterHelper().
static const char* SORTER_HELPER_SYMBOL;
@@ -460,6 +460,9 @@ class Sorter::TupleSorter {
/// Size of the tuples in memory.
const int tuple_size_;
+ /// Getter for the size of the tuples in memory. Replaced by a constant during codegen
+ int IR_NO_INLINE get_tuple_size() const { return tuple_size_; }
+
/// Tuple comparator with method Less() that returns true if lhs < rhs.
const TupleRowComparator& comparator_;
diff --git a/be/src/runtime/sorter-ir.cc b/be/src/runtime/sorter-ir.cc
index 16da09d61..44398fb5a 100644
--- a/be/src/runtime/sorter-ir.cc
+++ b/be/src/runtime/sorter-ir.cc
@@ -90,7 +90,7 @@ Status IR_ALWAYS_INLINE Sorter::TupleSorter::Partition2way(TupleIterator begin,
TupleIterator end, const Tuple* pivot, TupleIterator* cut) {
// Hoist member variable lookups out of loop to avoid extra loads inside loop.
Run* run = run_;
- int tuple_size = tuple_size_;
+ const int tuple_size = get_tuple_size();
Tuple* pivot_tuple = reinterpret_cast<Tuple*>(temp_tuple_buffer_);
TupleRow* pivot_tuple_row = reinterpret_cast<TupleRow*>(&pivot_tuple);
Tuple* swap_tuple = reinterpret_cast<Tuple*>(swap_buffer_);
@@ -136,7 +136,7 @@ Status IR_ALWAYS_INLINE Sorter::TupleSorter::Partition3way(TupleIterator begin,
TupleIterator* cut_right) {
// Hoist member variable lookups out of loop to avoid extra loads inside loop.
Run* run = run_;
- int tuple_size = tuple_size_;
+ const int tuple_size = get_tuple_size();
Tuple* pivot_tuple = reinterpret_cast<Tuple*>(temp_tuple_buffer_);
TupleRow* pivot_tuple_row = reinterpret_cast<TupleRow*>(&pivot_tuple);
Tuple* swap_tuple = reinterpret_cast<Tuple*>(swap_buffer_);
@@ -215,7 +215,7 @@ Status IR_ALWAYS_INLINE Sorter::TupleSorter::InsertionSort(const TupleIterator&
// Hoist member variable lookups out of loop to avoid extra loads inside loop.
Run* run = run_;
- int tuple_size = tuple_size_;
+ const int tuple_size = get_tuple_size();
uint8_t* temp_tuple_buffer = temp_tuple_buffer_;
TupleIterator insert_iter = begin;
diff --git a/be/src/runtime/sorter.cc b/be/src/runtime/sorter.cc
index 04fa338b0..5cb99d8e5 100644
--- a/be/src/runtime/sorter.cc
+++ b/be/src/runtime/sorter.cc
@@ -1211,7 +1211,7 @@ const char* Sorter::TupleSorter::LLVM_CLASS_NAME = "class.impala::Sorter::TupleS
// A method to code-gen for TupleSorter::SortHelper().
Status Sorter::TupleSorter::Codegen(FragmentState* state, llvm::Function* compare_fn,
- CodegenFnPtr<SortHelperFn>* codegened_fn) {
+ int tuple_size, CodegenFnPtr<SortHelperFn>* codegened_fn) {
LlvmCodeGen* codegen = state->codegen();
DCHECK(codegen != nullptr);
@@ -1228,6 +1228,10 @@ Status Sorter::TupleSorter::Codegen(FragmentState* state, llvm::Function* compar
replaced = codegen->ReplaceCallSites(fn, fn, SORTER_HELPER_SYMBOL);
DCHECK_REPLACE_COUNT(replaced, 2) << LlvmCodeGen::Print(fn);
+ replaced = codegen->ReplaceCallSitesWithValue(fn,
+ codegen->GetI32Constant(tuple_size), "get_tuple_size");
+ DCHECK_REPLACE_COUNT(replaced, 3) << LlvmCodeGen::Print(fn);
+
fn = codegen->FinalizeFunction(fn);
if (fn == nullptr) {
return Status("Sorter::TupleSorter::Codegen(): failed to finalize function");