You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2020/07/16 00:27:08 UTC
[impala] branch master updated: IMPALA-9956: inline hot functions
in Sorter
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new dabcede IMPALA-9956: inline hot functions in Sorter
dabcede is described below
commit dabcedeca363051a9d73f2abb1a88ff230e41cc7
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Wed Jul 15 10:51:36 2020 -0700
IMPALA-9956: inline hot functions in Sorter
Add some compiler hints to force inlining of small
functions into the hot Partition() loop.
Performance:
A single node perf run on TPC-H showed no perf change.
A single node performance run with the queries that target
sort performance showed up to a 19% reduction in time spent
in the sort.
+-------------------+-----------------------+---------+------------+------------+----------------+
| Workload | File Format | Avg (s) | Delta(Avg) | GeoMean(s) | Delta(GeoMean) |
+-------------------+-----------------------+---------+------------+------------+----------------+
| TARGETED-PERF(30) | parquet / none / none | 5.52 | -5.82% | 4.00 | -9.74% |
+-------------------+-----------------------+---------+------------+------------+----------------+
+-------------------+-------------------------------------+-----------------------+--------+-------------+------------+-----------+----------------+-------+----------------+---------+--------+
| Workload | Query | File Format | Avg(s) | Base Avg(s) | Delta(Avg) | StdDev(%) | Base StdDev(%) | Iters | Median Diff(%) | MW Zval | Tval |
+-------------------+-------------------------------------+-----------------------+--------+-------------+------------+-----------+----------------+-------+----------------+---------+--------+
| TARGETED-PERF(30) | primitive_orderby_all | parquet / none / none | 11.89 | 12.22 | -2.73% | 1.07% | 1.20% | 10 | -2.88% | -3.13 | -5.42 |
| TARGETED-PERF(30) | primitive_orderby_bigint_expression | parquet / none / none | 2.61 | 2.94 | I -11.27% | 0.83% | 1.14% | 10 | I -12.56% | -3.58 | -26.25 |
| TARGETED-PERF(30) | primitive_orderby_bigint | parquet / none / none | 2.06 | 2.42 | I -14.80% | 0.94% | 0.68% | 10 | I -17.43% | -3.58 | -44.37 |
+-------------------+-------------------------------------+-----------------------+--------+-------------+------------+-----------+----------------+-------+----------------+---------+--------+
(I) Improvement: TARGETED-PERF(30) primitive_orderby_bigint_expression [parquet / none / none] (2.94s -> 2.61s [-11.27%])
+---------------------+------------+----------+----------+------------+-----------+----------+----------+------------+--------+-------+-------+-----------+
| Operator | % of Query | Avg | Base Avg | Delta(Avg) | StdDev(%) | Max | Base Max | Delta(Max) | #Hosts | #Inst | #Rows | Est #Rows |
+---------------------+------------+----------+----------+------------+-----------+----------+----------+------------+--------+-------+-------+-----------+
| 02:ANALYTIC | 11.84% | 332.95ms | 337.56ms | -1.37% | 4.86% | 360.86ms | 379.52ms | -4.92% | 1 | 1 | 5.09M | 18.00M |
| F00:EXCHANGE SENDER | 15.61% | 439.03ms | 454.63ms | -3.43% | 4.86% | 478.29ms | 485.79ms | -1.55% | 1 | 1 | -1 | -1 |
| 01:SORT | 67.05% | 1.89s | 2.21s | -14.88% | 0.98% | 1.92s | 2.26s | -15.07% | 1 | 1 | 5.09M | 18.00M |
+---------------------+------------+----------+----------+------------+-----------+----------+----------+------------+--------+-------+-------+-----------+
(I) Improvement: TARGETED-PERF(30) primitive_orderby_bigint [parquet / none / none] (2.42s -> 2.06s [-14.80%])
+---------------------+------------+----------+----------+------------+-----------+----------+----------+------------+--------+-------+-------+-----------+
| Operator | % of Query | Avg | Base Avg | Delta(Avg) | StdDev(%) | Max | Base Max | Delta(Max) | #Hosts | #Inst | #Rows | Est #Rows |
+---------------------+------------+----------+----------+------------+-----------+----------+----------+------------+--------+-------+-------+-----------+
| 02:ANALYTIC | 15.39% | 367.90ms | 373.26ms | -1.44% | 3.48% | 390.03ms | 393.01ms | -0.76% | 1 | 1 | 5.09M | 18.00M |
| F00:EXCHANGE SENDER | 15.64% | 373.88ms | 374.12ms | -0.07% | 2.83% | 389.96ms | 386.36ms | +0.93% | 1 | 1 | -1 | -1 |
| 01:SORT | 56.28% | 1.35s | 1.68s | -20.10% | 1.14% | 1.38s | 1.70s | -18.92% | 1 | 1 | 5.09M | 18.00M |
| 00:SCAN HDFS | 9.67% | 231.18ms | 231.77ms | -0.25% | 7.06% | 247.79ms | 250.70ms | -1.16% | 1 | 1 | 5.09M | 18.00M |
+---------------------+------------+----------+----------+------------+-----------+----------+----------+------------+--------+-------+-------+-----------+
Change-Id: I7a8034ab6d2e3c71a2d2f2fcc3d6b788e9398194
Reviewed-on: http://gerrit.cloudera.org:8080/16202
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/runtime/sorter-internal.h | 5 ++++-
be/src/runtime/sorter-ir.cc | 14 +++++++++-----
2 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/be/src/runtime/sorter-internal.h b/be/src/runtime/sorter-internal.h
index 492fc95..dbbf618 100644
--- a/be/src/runtime/sorter-internal.h
+++ b/be/src/runtime/sorter-internal.h
@@ -23,6 +23,8 @@
#include <random>
+#include "common/compiler-util.h"
+
namespace impala {
/// Wrapper around BufferPool::PageHandle that tracks additional info about the page.
@@ -496,7 +498,8 @@ class Sorter::TupleSorter {
Tuple* MedianOfThree(Tuple* t1, Tuple* t2, Tuple* t3);
/// Swaps tuples pointed to by left and right using 'swap_tuple'.
- static void Swap(Tuple* left, Tuple* right, Tuple* swap_tuple, int tuple_size);
+ static inline void Swap(Tuple* RESTRICT left, Tuple* RESTRICT right,
+ Tuple* RESTRICT swap_tuple, int tuple_size);
};
} // namespace impala
diff --git a/be/src/runtime/sorter-ir.cc b/be/src/runtime/sorter-ir.cc
index 4da6bdd..2c07efc 100644
--- a/be/src/runtime/sorter-ir.cc
+++ b/be/src/runtime/sorter-ir.cc
@@ -48,21 +48,24 @@ void Sorter::TupleIterator::PrevPage(Sorter::Run* run) {
tuple_ = run->fixed_len_pages_[page_index_].data() + last_tuple_page_offset;
}
-void Sorter::TupleIterator::Next(Sorter::Run* run, int tuple_size) {
+// IMPALA-9956: Function is not inlined into Partition() without inline hint.
+inline void Sorter::TupleIterator::Next(Sorter::Run* run, int tuple_size) {
DCHECK_LT(index_, run->num_tuples()) << "Can only advance one past end of run";
tuple_ += tuple_size;
++index_;
if (UNLIKELY(index_ >= buffer_end_index_)) NextPage(run);
}
-void Sorter::TupleIterator::Prev(Sorter::Run* run, int tuple_size) {
+// IMPALA-9956: Function is not inlined into Partition() without inline hint.
+inline void Sorter::TupleIterator::Prev(Sorter::Run* run, int tuple_size) {
DCHECK_GE(index_, 0) << "Can only advance one before start of run";
tuple_ -= tuple_size;
--index_;
if (UNLIKELY(index_ < buffer_start_index_)) PrevPage(run);
}
-bool Sorter::TupleSorter::Less(const TupleRow* lhs, const TupleRow* rhs) {
+// IMPALA-9956: Function is not inlined into Partition() without inline hint.
+inline bool Sorter::TupleSorter::Less(const TupleRow* lhs, const TupleRow* rhs) {
--num_comparisons_till_free_;
DCHECK_GE(num_comparisons_till_free_, 0);
if (UNLIKELY(num_comparisons_till_free_ == 0)) {
@@ -242,8 +245,9 @@ Tuple* Sorter::TupleSorter::MedianOfThree(Tuple* t1, Tuple* t2, Tuple* t3) {
}
}
-void Sorter::TupleSorter::Swap(Tuple* left, Tuple* right, Tuple* swap_tuple,
- int tuple_size) {
+// IMPALA-9956: Function is not inlined into Partition() without inline hint.
+inline void Sorter::TupleSorter::Swap(Tuple* RESTRICT left, Tuple* RESTRICT right,
+ Tuple* RESTRICT swap_tuple, int tuple_size) {
memcpy(swap_tuple, left, tuple_size);
memcpy(left, right, tuple_size);
memcpy(right, swap_tuple, tuple_size);