You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/10/11 02:42:46 UTC
[impala] 02/03: IMPALA-11504: Specializing DecimalUtil::GetScaleMultiplier().
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 6e7cd605fdf3ce95a5993676a87a0209ae671973
Author: yx91490 <yx...@126.com>
AuthorDate: Wed Aug 17 13:08:06 2022 +0800
IMPALA-11504: Specializing DecimalUtil::GetScaleMultiplier<int256_t>().
Currently decimal-util.h didn't specialize DecimalUtil
::GetScaleMultiplier<int256_t>(), causing more performance loss when
calculate Decimal16Value division.
This template specialisation results in 5600X speedup approximately.
Testing:
- Ran existing jobs.
- Add decimal-util-benchmark.
Change-Id: I969e2977d51313e738f72c8246db003ae43a3782
Reviewed-on: http://gerrit.cloudera.org:8080/18861
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/benchmarks/CMakeLists.txt | 1 +
be/src/benchmarks/decimal-util-benchmark.cc | 92 +++++++++++++++++
be/src/runtime/decimal-test.cc | 19 ++++
be/src/util/decimal-util.h | 153 ++++++++++++++++++++++------
4 files changed, 235 insertions(+), 30 deletions(-)
diff --git a/be/src/benchmarks/CMakeLists.txt b/be/src/benchmarks/CMakeLists.txt
index f436cefee..2af5e9fe2 100644
--- a/be/src/benchmarks/CMakeLists.txt
+++ b/be/src/benchmarks/CMakeLists.txt
@@ -38,6 +38,7 @@ ADD_BE_BENCHMARK(bitmap-benchmark)
ADD_BE_BENCHMARK(bit-packing-benchmark)
ADD_BE_BENCHMARK(bloom-filter-benchmark)
ADD_BE_BENCHMARK(bswap-benchmark)
+ADD_BE_BENCHMARK(decimal-util-benchmark)
ADD_BE_BENCHMARK(expr-benchmark)
ADD_BE_BENCHMARK(free-lists-benchmark)
ADD_BE_BENCHMARK(hash-benchmark)
diff --git a/be/src/benchmarks/decimal-util-benchmark.cc b/be/src/benchmarks/decimal-util-benchmark.cc
new file mode 100644
index 000000000..e1395175a
--- /dev/null
+++ b/be/src/benchmarks/decimal-util-benchmark.cc
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <iostream>
+#include <vector>
+
+#include "util/benchmark.h"
+#include "util/cpu-info.h"
+#include "util/decimal-util.h"
+
+// Summary result (ran within a Docker container):
+//
+// Machine Info: AMD Ryzen 9 5950X 16-Core Processor
+// ScaleMultiplierBenchmark: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile
+// (relative) (relative) (relative)
+// ---------------------------------------------------------------------------------------------------------
+// raw 0.852 0.852 0.852 1X 1X 1X
+// specialized 4.71e+03 4.76e+03 4.81e+03 5.53e+03X 5.59e+03X 5.64e+03X
+
+using namespace impala;
+
+namespace int256_scale_multiplier {
+
+void AddTestData(vector<int>* data, int n) {
+ for (int i = 0; i < n; i++) {
+ int m = rand() % DecimalUtil::INT256_SCALE_UPPER_BOUND;
+ data->push_back(m);
+ }
+}
+
+/// Copied from Decimal::GetScaleMultiplier
+static int256_t GetScaleMultiplier(int scale) {
+ int256_t result = 1;
+ for (int i = 0; i < scale; ++i) {
+ result *= 10;
+ }
+ return result;
+}
+
+// A volatile variable to hold the return value of the benchmarked functions.
+// Used to prevent the compiler from optimising out the calls.
+static volatile int64_t volatile_var = 0;
+
+static void TestRawTemplateCall(int batch_size, void* d) {
+ vector<int>* data = reinterpret_cast<vector<int>*>(d);
+ for (int i = 0; i < batch_size; i++) {
+ for (int j = 0; j < data->size(); j++) {
+ auto m = GetScaleMultiplier(j);
+ volatile_var = m.convert_to<int64_t>();
+ }
+ }
+}
+
+static void TestSpecializedTemplateCall(int batch_size, void* d) {
+ vector<int>* data = reinterpret_cast<vector<int>*>(d);
+ for (int i = 0; i < batch_size; i++) {
+ for (int j = 0; j < data->size(); j++) {
+ auto m = DecimalUtil::GetScaleMultiplier<int256_t>(j);
+ volatile_var = m.convert_to<int64_t>();
+ }
+ }
+}
+
+} // namespace int256_scale_multiplier
+
+int main(int argc, char** argv) {
+ CpuInfo::Init();
+ std::cout << Benchmark::GetMachineInfo() << std::endl;
+ vector<int> data;
+ int256_scale_multiplier::AddTestData(&data, 1000);
+ Benchmark int256_scale_multiplier_suite("ScaleMultiplierBenchmark");
+ int256_scale_multiplier_suite.AddBenchmark(
+ "raw", int256_scale_multiplier::TestRawTemplateCall, &data);
+ int256_scale_multiplier_suite.AddBenchmark(
+ "specialized", int256_scale_multiplier::TestSpecializedTemplateCall, &data);
+ std::cout << int256_scale_multiplier_suite.Measure() << std::endl;
+ return 0;
+}
diff --git a/be/src/runtime/decimal-test.cc b/be/src/runtime/decimal-test.cc
index c5aed53c1..34624b134 100644
--- a/be/src/runtime/decimal-test.cc
+++ b/be/src/runtime/decimal-test.cc
@@ -1004,5 +1004,24 @@ TEST(DecimalTest, PrecisionScaleValidation) {
EXPECT_FALSE(ColumnType::ValidateDecimalParams(15, 16));
}
+template <typename T>
+static void TestGetScaleMultiplier(int scale_upper_bound, T overflow_val) {
+ T expect = 1;
+ for (int scale = 0; scale < scale_upper_bound; scale++) {
+ EXPECT_EQ(expect, DecimalUtil::GetScaleMultiplier<T>(scale));
+ expect *= 10;
+ }
+ // test overflow
+ EXPECT_EQ(overflow_val, DecimalUtil::GetScaleMultiplier<T>(scale_upper_bound));
+}
+
+TEST(DecimalTest, GetScaleMultiplier) {
+ TestGetScaleMultiplier<int32_t>(DecimalUtil::INT32_SCALE_UPPER_BOUND, -1);
+ TestGetScaleMultiplier<int64_t>(DecimalUtil::INT64_SCALE_UPPER_BOUND, -1);
+ TestGetScaleMultiplier<int128_t>(DecimalUtil::INT128_SCALE_UPPER_BOUND, -1);
+ TestGetScaleMultiplier<int256_t>(DecimalUtil::INT256_SCALE_UPPER_BOUND, -1);
+ TestGetScaleMultiplier<double>(DecimalUtil::INT64_SCALE_UPPER_BOUND, 1E19);
+}
+
}
diff --git a/be/src/util/decimal-util.h b/be/src/util/decimal-util.h
index 6c1d94f19..b9bb8bf1e 100644
--- a/be/src/util/decimal-util.h
+++ b/be/src/util/decimal-util.h
@@ -34,6 +34,15 @@ namespace impala {
class DecimalUtil {
public:
+ // The scale's exclusive upper bound for GetScaleMultiplier<int32_t>()
+ static constexpr int INT32_SCALE_UPPER_BOUND = ColumnType::MAX_DECIMAL4_PRECISION + 1;
+ // The scale's exclusive upper bound for GetScaleMultiplier<int64_t>()
+ static constexpr int INT64_SCALE_UPPER_BOUND = ColumnType::MAX_DECIMAL8_PRECISION + 1;
+ // The scale's exclusive upper bound for GetScaleMultiplier<int128_t>()
+ static constexpr int INT128_SCALE_UPPER_BOUND = ColumnType::MAX_PRECISION + 1;
+ // The scale's exclusive upper bound for GetScaleMultiplier<int256_t>()
+ static constexpr int INT256_SCALE_UPPER_BOUND = 77;
+
// Helper function that checks for multiplication overflow. We only check for overflow
// if may_overflow is false.
template <typename T>
@@ -166,7 +175,7 @@ class DecimalUtil {
template <>
inline int32_t DecimalUtil::GetScaleMultiplier<int32_t>(int scale) {
DCHECK_GE(scale, 0);
- static const int32_t values[] = {
+ static constexpr int32_t values[INT32_SCALE_UPPER_BOUND] = {
1,
10,
100,
@@ -177,15 +186,14 @@ inline int32_t DecimalUtil::GetScaleMultiplier<int32_t>(int scale) {
10000000,
100000000,
1000000000};
- DCHECK_GE(sizeof(values) / sizeof(int32_t), ColumnType::MAX_DECIMAL4_PRECISION);
- if (LIKELY(scale < 10)) return values[scale];
+ if (LIKELY(scale < INT32_SCALE_UPPER_BOUND)) return values[scale];
return -1; // Overflow
}
template <>
inline int64_t DecimalUtil::GetScaleMultiplier<int64_t>(int scale) {
DCHECK_GE(scale, 0);
- static const int64_t values[] = {
+ static constexpr int64_t values[INT64_SCALE_UPPER_BOUND] = {
1ll,
10ll,
100ll,
@@ -205,15 +213,15 @@ inline int64_t DecimalUtil::GetScaleMultiplier<int64_t>(int scale) {
10000000000000000ll,
100000000000000000ll,
1000000000000000000ll};
- DCHECK_GE(sizeof(values) / sizeof(int64_t), ColumnType::MAX_DECIMAL8_PRECISION);
- if (LIKELY(scale < 19)) return values[scale];
+ if (LIKELY(scale < INT64_SCALE_UPPER_BOUND)) return values[scale];
return -1; // Overflow
}
template <>
inline int128_t DecimalUtil::GetScaleMultiplier<int128_t>(int scale) {
DCHECK_GE(scale, 0);
- static const int128_t values[] = {
+ static constexpr int128_t i10e18{1000000000000000000ll};
+ static constexpr int128_t values[INT128_SCALE_UPPER_BOUND] = {
static_cast<int128_t>(1ll),
static_cast<int128_t>(10ll),
static_cast<int128_t>(100ll),
@@ -232,29 +240,114 @@ inline int128_t DecimalUtil::GetScaleMultiplier<int128_t>(int scale) {
static_cast<int128_t>(1000000000000000ll),
static_cast<int128_t>(10000000000000000ll),
static_cast<int128_t>(100000000000000000ll),
- static_cast<int128_t>(1000000000000000000ll),
- static_cast<int128_t>(1000000000000000000ll) * 10ll,
- static_cast<int128_t>(1000000000000000000ll) * 100ll,
- static_cast<int128_t>(1000000000000000000ll) * 1000ll,
- static_cast<int128_t>(1000000000000000000ll) * 10000ll,
- static_cast<int128_t>(1000000000000000000ll) * 100000ll,
- static_cast<int128_t>(1000000000000000000ll) * 1000000ll,
- static_cast<int128_t>(1000000000000000000ll) * 10000000ll,
- static_cast<int128_t>(1000000000000000000ll) * 100000000ll,
- static_cast<int128_t>(1000000000000000000ll) * 1000000000ll,
- static_cast<int128_t>(1000000000000000000ll) * 10000000000ll,
- static_cast<int128_t>(1000000000000000000ll) * 100000000000ll,
- static_cast<int128_t>(1000000000000000000ll) * 1000000000000ll,
- static_cast<int128_t>(1000000000000000000ll) * 10000000000000ll,
- static_cast<int128_t>(1000000000000000000ll) * 100000000000000ll,
- static_cast<int128_t>(1000000000000000000ll) * 1000000000000000ll,
- static_cast<int128_t>(1000000000000000000ll) * 10000000000000000ll,
- static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll,
- static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll * 10ll,
- static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll * 100ll,
- static_cast<int128_t>(1000000000000000000ll) * 100000000000000000ll * 1000ll};
- DCHECK_GE(sizeof(values) / sizeof(int128_t), ColumnType::MAX_PRECISION);
- if (LIKELY(scale < 39)) return values[scale];
+ i10e18,
+ i10e18 * 10ll,
+ i10e18 * 100ll,
+ i10e18 * 1000ll,
+ i10e18 * 10000ll,
+ i10e18 * 100000ll,
+ i10e18 * 1000000ll,
+ i10e18 * 10000000ll,
+ i10e18 * 100000000ll,
+ i10e18 * 1000000000ll,
+ i10e18 * 10000000000ll,
+ i10e18 * 100000000000ll,
+ i10e18 * 1000000000000ll,
+ i10e18 * 10000000000000ll,
+ i10e18 * 100000000000000ll,
+ i10e18 * 1000000000000000ll,
+ i10e18 * 10000000000000000ll,
+ i10e18 * 100000000000000000ll,
+ i10e18 * i10e18,
+ i10e18 * i10e18 * 10ll,
+ i10e18 * i10e18 * 100ll};
+ if (LIKELY(scale < INT128_SCALE_UPPER_BOUND)) return values[scale];
+ return -1; // Overflow
+}
+
+template <>
+inline int256_t DecimalUtil::GetScaleMultiplier<int256_t>(int scale) {
+ DCHECK_GE(scale, 0);
+ static constexpr int256_t i10e18{1000000000000000000ll};
+ static constexpr int256_t values[INT256_SCALE_UPPER_BOUND] = {
+ static_cast<int256_t>(1ll),
+ static_cast<int256_t>(10ll),
+ static_cast<int256_t>(100ll),
+ static_cast<int256_t>(1000ll),
+ static_cast<int256_t>(10000ll),
+ static_cast<int256_t>(100000ll),
+ static_cast<int256_t>(1000000ll),
+ static_cast<int256_t>(10000000ll),
+ static_cast<int256_t>(100000000ll),
+ static_cast<int256_t>(1000000000ll),
+ static_cast<int256_t>(10000000000ll),
+ static_cast<int256_t>(100000000000ll),
+ static_cast<int256_t>(1000000000000ll),
+ static_cast<int256_t>(10000000000000ll),
+ static_cast<int256_t>(100000000000000ll),
+ static_cast<int256_t>(1000000000000000ll),
+ static_cast<int256_t>(10000000000000000ll),
+ static_cast<int256_t>(100000000000000000ll),
+ i10e18,
+ i10e18 * 10ll,
+ i10e18 * 100ll,
+ i10e18 * 1000ll,
+ i10e18 * 10000ll,
+ i10e18 * 100000ll,
+ i10e18 * 1000000ll,
+ i10e18 * 10000000ll,
+ i10e18 * 100000000ll,
+ i10e18 * 1000000000ll,
+ i10e18 * 10000000000ll,
+ i10e18 * 100000000000ll,
+ i10e18 * 1000000000000ll,
+ i10e18 * 10000000000000ll,
+ i10e18 * 100000000000000ll,
+ i10e18 * 1000000000000000ll,
+ i10e18 * 10000000000000000ll,
+ i10e18 * 100000000000000000ll,
+ i10e18 * i10e18,
+ i10e18 * i10e18 * 10ll,
+ i10e18 * i10e18 * 100ll,
+ i10e18 * i10e18 * 1000ll,
+ i10e18 * i10e18 * 10000ll,
+ i10e18 * i10e18 * 100000ll,
+ i10e18 * i10e18 * 1000000ll,
+ i10e18 * i10e18 * 10000000ll,
+ i10e18 * i10e18 * 100000000ll,
+ i10e18 * i10e18 * 1000000000ll,
+ i10e18 * i10e18 * 10000000000ll,
+ i10e18 * i10e18 * 100000000000ll,
+ i10e18 * i10e18 * 1000000000000ll,
+ i10e18 * i10e18 * 10000000000000ll,
+ i10e18 * i10e18 * 100000000000000ll,
+ i10e18 * i10e18 * 1000000000000000ll,
+ i10e18 * i10e18 * 10000000000000000ll,
+ i10e18 * i10e18 * 100000000000000000ll,
+ i10e18 * i10e18 * i10e18,
+ i10e18 * i10e18 * i10e18 * 10ll,
+ i10e18 * i10e18 * i10e18 * 100ll,
+ i10e18 * i10e18 * i10e18 * 1000ll,
+ i10e18 * i10e18 * i10e18 * 10000ll,
+ i10e18 * i10e18 * i10e18 * 100000ll,
+ i10e18 * i10e18 * i10e18 * 1000000ll,
+ i10e18 * i10e18 * i10e18 * 10000000ll,
+ i10e18 * i10e18 * i10e18 * 100000000ll,
+ i10e18 * i10e18 * i10e18 * 1000000000ll,
+ i10e18 * i10e18 * i10e18 * 10000000000ll,
+ i10e18 * i10e18 * i10e18 * 100000000000ll,
+ i10e18 * i10e18 * i10e18 * 1000000000000ll,
+ i10e18 * i10e18 * i10e18 * 10000000000000ll,
+ i10e18 * i10e18 * i10e18 * 100000000000000ll,
+ i10e18 * i10e18 * i10e18 * 1000000000000000ll,
+ i10e18 * i10e18 * i10e18 * 10000000000000000ll,
+ i10e18 * i10e18 * i10e18 * 100000000000000000ll,
+ i10e18 * i10e18 * i10e18 * i10e18,
+ i10e18 * i10e18 * i10e18 * i10e18 * 10ll,
+ i10e18 * i10e18 * i10e18 * i10e18 * 100ll,
+ i10e18 * i10e18 * i10e18 * i10e18 * 1000ll,
+ i10e18 * i10e18 * i10e18 * i10e18 * 10000ll};
+ if (LIKELY(scale < INT256_SCALE_UPPER_BOUND)) return values[scale];
return -1; // Overflow
}
}