You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/08/08 23:14:36 UTC

[impala] 02/03: IMPALA-11474: Codegen Tuple size in sorter-ir.cc

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit cdf62596846916e64ff9b0549b1eee8736185687
Author: noemi <np...@cloudera.com>
AuthorDate: Mon Jul 18 17:40:31 2022 +0200

    IMPALA-11474: Codegen Tuple size in sorter-ir.cc
    
    The number of bytes in a tuple is known before execution, it is
    available in the query plan. However, currently the tuple size
    is treated as a member variable of the TupleSorter.
    Using Codegen to replace this member variable by a constant in
    sorter-ir.cc can speed up the quicksort phase by up to 20% in the
    most simple queries with small tuples.
    
    Some examples using tpch_parquet lineitem, scale factor=8:
    disable_outermost_topn=1;
    Query: select _ from lineitem order by _ limit 1;
    +---------------+-------------------------+------+----------+----------+-------------+
    |   Order by    |          Tuple          | NDV  | Constant | Variable | Improvement |
    +---------------+-------------------------+------+----------+----------+-------------+
    | rand()        | int, rand               | 48M  | 12.21s   | 14.21s   | 14%         |
    | l_linenumber  | int                     | 7    | 1.81s    | 2.34s    | 22%         |
    | l_orderkey    | bigint                  | 12M  | 5.38s    | 6.69s    | 19%         |
    | l_receiptdate | string, decimal, bigint | 2600 | 17.3s    | 18.7s    | 7%          |
    +---------------+-------------------------+------+----------+----------+-------------+
    
    Change-Id: Ia4161a61db1782dc448dae9a1d4c1d120b055b3c
    Reviewed-on: http://gerrit.cloudera.org:8080/18802
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/partial-sort-node.cc | 4 +++-
 be/src/exec/sort-node.cc         | 4 +++-
 be/src/exec/topn-node.cc         | 3 ++-
 be/src/runtime/sorter-internal.h | 5 ++++-
 be/src/runtime/sorter-ir.cc      | 6 +++---
 be/src/runtime/sorter.cc         | 6 +++++-
 6 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/be/src/exec/partial-sort-node.cc b/be/src/exec/partial-sort-node.cc
index 2ba5724ed..1fadbe523 100644
--- a/be/src/exec/partial-sort-node.cc
+++ b/be/src/exec/partial-sort-node.cc
@@ -94,7 +94,9 @@ void PartialSortPlanNode::Codegen(FragmentState* state) {
   llvm::Function* compare_fn = nullptr;
   AddCodegenStatus(row_comparator_config_->Codegen(state, &compare_fn));
   AddCodegenStatus(
-      Sorter::TupleSorter::Codegen(state, compare_fn, &codegend_sort_helper_fn_));
+      Sorter::TupleSorter::Codegen(state, compare_fn,
+          row_descriptor_->tuple_descriptors()[0]->byte_size(),
+          &codegend_sort_helper_fn_));
 }
 
 Status PartialSortNode::Open(RuntimeState* state) {
diff --git a/be/src/exec/sort-node.cc b/be/src/exec/sort-node.cc
index 739694c9e..223b689a9 100644
--- a/be/src/exec/sort-node.cc
+++ b/be/src/exec/sort-node.cc
@@ -111,7 +111,9 @@ void SortPlanNode::Codegen(FragmentState* state) {
   Status codegen_status = row_comparator_config_->Codegen(state, &compare_fn);
   if (codegen_status.ok()) {
     codegen_status =
-        Sorter::TupleSorter::Codegen(state, compare_fn, &codegend_sort_helper_fn_);
+        Sorter::TupleSorter::Codegen(state, compare_fn,
+            row_descriptor_->tuple_descriptors()[0]->byte_size(),
+            &codegend_sort_helper_fn_);
   }
   AddCodegenStatus(codegen_status);
 }
diff --git a/be/src/exec/topn-node.cc b/be/src/exec/topn-node.cc
index f65af7df3..2e331a6a8 100644
--- a/be/src/exec/topn-node.cc
+++ b/be/src/exec/topn-node.cc
@@ -218,7 +218,8 @@ void TopNPlanNode::Codegen(FragmentState* state) {
   Status codegen_status = ordering_comparator_config_->Codegen(state, &compare_fn);
   if (codegen_status.ok() && is_partitioned()) {
     codegen_status =
-        Sorter::TupleSorter::Codegen(state, compare_fn, &codegend_sort_helper_fn_);
+        Sorter::TupleSorter::Codegen(state, compare_fn, output_tuple_desc_->byte_size(),
+            &codegend_sort_helper_fn_);
   }
   if (codegen_status.ok() && is_partitioned()) {
     // TODO: IMPALA-10228: replace comparisons in std::map.
diff --git a/be/src/runtime/sorter-internal.h b/be/src/runtime/sorter-internal.h
index 5945526ba..931ee20b6 100644
--- a/be/src/runtime/sorter-internal.h
+++ b/be/src/runtime/sorter-internal.h
@@ -444,7 +444,7 @@ class Sorter::TupleSorter {
   /// 'compare_fn' is the pointer to the code-gen version of the compare method with
   /// which to replace all non-code-gen versions.
   static Status Codegen(FragmentState* state, llvm::Function* compare_fn,
-      CodegenFnPtr<SortHelperFn>* codegend_fn);
+      int tuple_byte_size, CodegenFnPtr<SortHelperFn>* codegend_fn);
 
   /// Mangled name of SorterHelper().
   static const char* SORTER_HELPER_SYMBOL;
@@ -460,6 +460,9 @@ class Sorter::TupleSorter {
   /// Size of the tuples in memory.
   const int tuple_size_;
 
+  /// Getter for the size of the tuples in memory. Replaced by a constant during codegen
+  int IR_NO_INLINE get_tuple_size() const { return tuple_size_; }
+
   /// Tuple comparator with method Less() that returns true if lhs < rhs.
   const TupleRowComparator& comparator_;
 
diff --git a/be/src/runtime/sorter-ir.cc b/be/src/runtime/sorter-ir.cc
index 16da09d61..44398fb5a 100644
--- a/be/src/runtime/sorter-ir.cc
+++ b/be/src/runtime/sorter-ir.cc
@@ -90,7 +90,7 @@ Status IR_ALWAYS_INLINE Sorter::TupleSorter::Partition2way(TupleIterator begin,
     TupleIterator end, const Tuple* pivot, TupleIterator* cut) {
   // Hoist member variable lookups out of loop to avoid extra loads inside loop.
   Run* run = run_;
-  int tuple_size = tuple_size_;
+  const int tuple_size = get_tuple_size();
   Tuple* pivot_tuple = reinterpret_cast<Tuple*>(temp_tuple_buffer_);
   TupleRow* pivot_tuple_row = reinterpret_cast<TupleRow*>(&pivot_tuple);
   Tuple* swap_tuple = reinterpret_cast<Tuple*>(swap_buffer_);
@@ -136,7 +136,7 @@ Status IR_ALWAYS_INLINE Sorter::TupleSorter::Partition3way(TupleIterator begin,
     TupleIterator* cut_right) {
   // Hoist member variable lookups out of loop to avoid extra loads inside loop.
   Run* run = run_;
-  int tuple_size = tuple_size_;
+  const int tuple_size = get_tuple_size();
   Tuple* pivot_tuple = reinterpret_cast<Tuple*>(temp_tuple_buffer_);
   TupleRow* pivot_tuple_row = reinterpret_cast<TupleRow*>(&pivot_tuple);
   Tuple* swap_tuple = reinterpret_cast<Tuple*>(swap_buffer_);
@@ -215,7 +215,7 @@ Status IR_ALWAYS_INLINE Sorter::TupleSorter::InsertionSort(const TupleIterator&
 
   // Hoist member variable lookups out of loop to avoid extra loads inside loop.
   Run* run = run_;
-  int tuple_size = tuple_size_;
+  const int tuple_size = get_tuple_size();
   uint8_t* temp_tuple_buffer = temp_tuple_buffer_;
 
   TupleIterator insert_iter = begin;
diff --git a/be/src/runtime/sorter.cc b/be/src/runtime/sorter.cc
index 04fa338b0..5cb99d8e5 100644
--- a/be/src/runtime/sorter.cc
+++ b/be/src/runtime/sorter.cc
@@ -1211,7 +1211,7 @@ const char* Sorter::TupleSorter::LLVM_CLASS_NAME = "class.impala::Sorter::TupleS
 
 // A method to code-gen for TupleSorter::SortHelper().
 Status Sorter::TupleSorter::Codegen(FragmentState* state, llvm::Function* compare_fn,
-    CodegenFnPtr<SortHelperFn>* codegened_fn) {
+    int tuple_size, CodegenFnPtr<SortHelperFn>* codegened_fn) {
   LlvmCodeGen* codegen = state->codegen();
   DCHECK(codegen != nullptr);
 
@@ -1228,6 +1228,10 @@ Status Sorter::TupleSorter::Codegen(FragmentState* state, llvm::Function* compar
   replaced = codegen->ReplaceCallSites(fn, fn, SORTER_HELPER_SYMBOL);
   DCHECK_REPLACE_COUNT(replaced, 2) << LlvmCodeGen::Print(fn);
 
+  replaced = codegen->ReplaceCallSitesWithValue(fn,
+      codegen->GetI32Constant(tuple_size), "get_tuple_size");
+  DCHECK_REPLACE_COUNT(replaced, 3) << LlvmCodeGen::Print(fn);
+
   fn = codegen->FinalizeFunction(fn);
   if (fn == nullptr) {
     return Status("Sorter::TupleSorter::Codegen(): failed to finalize function");