You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/02/14 22:36:49 UTC
[impala] 02/04: IMPALA-11916: Replace base::IsAarch64 with constant
This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 104c1ad5540ba655f54dd1f49a0ffc7c49367efb
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Sun Feb 12 09:02:46 2023 +0800
IMPALA-11916: Replace base::IsAarch64 with constant
base::IsAarch64() returns true for Aarch64 platforms and returns false
for x86 platforms. It shows up in a perf test and consumes some
percents. Note that it's used in the hot path of hash computation. This
patch replaces this method with a constant, IS_AARCH64. Defines the
constant in cpu-info.h since the original place is in a file ported from
gutil.
Test:
- Redo the perf test and don't see base::IsAarch64 anymore.
Change-Id: Id6d62de63a0cb7b94244a1d4f8dcc6b2e65b6d9c
Reviewed-on: http://gerrit.cloudera.org:8080/19492
Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/codegen/llvm-codegen-test.cc | 3 +--
be/src/codegen/llvm-codegen.cc | 2 +-
be/src/gutil/sysinfo.cc | 8 --------
be/src/gutil/sysinfo.h | 3 ---
be/src/util/bit-util-test.cc | 7 +++----
be/src/util/cpu-info.h | 6 ++++++
be/src/util/hash-util.h | 17 ++++++++---------
7 files changed, 19 insertions(+), 27 deletions(-)
diff --git a/be/src/codegen/llvm-codegen-test.cc b/be/src/codegen/llvm-codegen-test.cc
index 0e33aa912..43a1663e2 100644
--- a/be/src/codegen/llvm-codegen-test.cc
+++ b/be/src/codegen/llvm-codegen-test.cc
@@ -28,7 +28,6 @@
#include "runtime/string-value.h"
#include "runtime/test-env.h"
#include "service/fe-support.h"
-#include "gutil/sysinfo.h"
#include "util/cpu-info.h"
#include "util/filesystem-util.h"
#include "util/hash-util.h"
@@ -533,7 +532,7 @@ TEST_F(LlvmCodeGenTest, CpuAttrWhitelist) {
// arm does not have sse2
EXPECT_EQ(std::unordered_set<string>(
{"-dummy1", "-dummy2", "-dummy3", "-dummy4",
- base::IsAarch64() ? "-sse2" : "+sse2", "-lzcnt"}),
+ IS_AARCH64 ? "-sse2" : "+sse2", "-lzcnt"}),
LlvmCodeGen::ApplyCpuAttrWhitelist(
{"+dummy1", "+dummy2", "-dummy3", "+dummy4", "+sse2", "-lzcnt"}));
// IMPALA-6291: Test that all AVX512 attributes are disabled.
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index f0bfbaf7a..d31eee772 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -1769,7 +1769,7 @@ void LlvmCodeGen::ClearHashFns() {
// ret i32 %12
// }
llvm::Function* LlvmCodeGen::GetHashFunction(int num_bytes) {
- if (base::IsAarch64() || IsCPUFeatureEnabled(CpuInfo::SSE4_2)) {
+ if (IS_AARCH64 || IsCPUFeatureEnabled(CpuInfo::SSE4_2)) {
if (num_bytes == -1) {
// -1 indicates variable length, just return the generic loop based
// hash fn.
diff --git a/be/src/gutil/sysinfo.cc b/be/src/gutil/sysinfo.cc
index adc755440..01f259737 100644
--- a/be/src/gutil/sysinfo.cc
+++ b/be/src/gutil/sysinfo.cc
@@ -469,12 +469,4 @@ int MaxCPUIndex(void) {
return cpuinfo_max_cpu_index;
}
-bool IsAarch64(void) {
-#ifdef __aarch64__
- return true;
-#else
- return false;
-#endif
-}
-
} // namespace base
diff --git a/be/src/gutil/sysinfo.h b/be/src/gutil/sysinfo.h
index df37b720a..d46cfe550 100644
--- a/be/src/gutil/sysinfo.h
+++ b/be/src/gutil/sysinfo.h
@@ -65,8 +65,5 @@ extern double CyclesPerSecond(void);
// Exposed for testing.
extern int ParseMaxCpuIndex(const char* str);
-// Return current platform is aarch64 or not
-extern bool IsAarch64();
-
} // namespace base
#endif /* #ifndef _SYSINFO_H_ */
diff --git a/be/src/util/bit-util-test.cc b/be/src/util/bit-util-test.cc
index f602603ba..d1707a8d4 100644
--- a/be/src/util/bit-util-test.cc
+++ b/be/src/util/bit-util-test.cc
@@ -27,7 +27,6 @@
#include "runtime/multi-precision.h"
#include "testutil/gtest-util.h"
-#include "gutil/sysinfo.h"
#include "util/arithmetic-util.h"
#include "util/bit-util.h"
#include "util/cpu-info.h"
@@ -135,7 +134,7 @@ TEST(BitUtil, TrailingBits) {
void TestByteSwapSimd_Unit(const int64_t CpuFlag) {
void (*bswap_fptr)(const uint8_t* src, uint8_t* dst) = NULL;
int buf_size = 0;
- if (base::IsAarch64() || CpuFlag == CpuInfo::SSSE3) {
+ if (IS_AARCH64 || CpuFlag == CpuInfo::SSSE3) {
buf_size = 16;
bswap_fptr = SimdByteSwap::ByteSwap128;
} else {
@@ -180,7 +179,7 @@ void TestByteSwapSimd(const int64_t CpuFlag, const int buf_size) {
std::iota(src_buf, src_buf + buf_size, 0);
int start_size = 0;
- if (base::IsAarch64() || CpuFlag == CpuInfo::SSSE3) {
+ if (IS_AARCH64 || CpuFlag == CpuInfo::SSSE3) {
start_size = 16;
} else if (CpuFlag == CpuInfo::AVX2) {
start_size = 32;
@@ -189,7 +188,7 @@ void TestByteSwapSimd(const int64_t CpuFlag, const int buf_size) {
for (int i = start_size; i < buf_size; ++i) {
// Initialize dst buffer and swap i bytes.
memset(dst_buf, 0, buf_size);
- if (base::IsAarch64() || CpuFlag == CpuInfo::SSSE3) {
+ if (IS_AARCH64 || CpuFlag == CpuInfo::SSSE3) {
SimdByteSwap::ByteSwapSimd<16>(src_buf, i, dst_buf);
} else if (CpuFlag == CpuInfo::AVX2) {
SimdByteSwap::ByteSwapSimd<32>(src_buf, i, dst_buf);
diff --git a/be/src/util/cpu-info.h b/be/src/util/cpu-info.h
index 7b8cdb830..f3154b110 100644
--- a/be/src/util/cpu-info.h
+++ b/be/src/util/cpu-info.h
@@ -27,6 +27,12 @@
namespace impala {
+#ifdef __aarch64__
+#define IS_AARCH64 true
+#else
+#define IS_AARCH64 false
+#endif
+
/// CpuInfo is an interface to query for cpu information at runtime. The caller can
/// ask for the sizes of the caches and what hardware features are supported.
/// On Linux, this information is pulled from a couple of sys files (/proc/cpuinfo and
diff --git a/be/src/util/hash-util.h b/be/src/util/hash-util.h
index 714cbe376..bb60997ae 100644
--- a/be/src/util/hash-util.h
+++ b/be/src/util/hash-util.h
@@ -21,7 +21,6 @@
#include "common/logging.h"
#include "common/compiler-util.h"
-#include "gutil/sysinfo.h"
#include "util/cpu-info.h"
#include "util/sse-util.h"
@@ -39,7 +38,7 @@ class HashUtil {
/// The resulting hashes are correlated.
/// TODO: update this to also use SSE4_crc32_u64 and SSE4_crc32_u16 where appropriate.
static uint32_t CrcHash(const void* data, int32_t bytes, uint32_t hash) {
- DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+ DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
uint32_t words = bytes / sizeof(uint32_t);
bytes = bytes % sizeof(uint32_t);
@@ -63,7 +62,7 @@ class HashUtil {
/// CrcHash() specialized for 1-byte data
static inline uint32_t CrcHash1(const void* v, uint32_t hash) {
- DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+ DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
const uint8_t* s = reinterpret_cast<const uint8_t*>(v);
hash = SSE4_crc32_u8(hash, *s);
hash = (hash << 16) | (hash >> 16);
@@ -72,7 +71,7 @@ class HashUtil {
/// CrcHash() specialized for 2-byte data
static inline uint32_t CrcHash2(const void* v, uint32_t hash) {
- DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+ DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
const uint16_t* s = reinterpret_cast<const uint16_t*>(v);
hash = SSE4_crc32_u16(hash, *s);
hash = (hash << 16) | (hash >> 16);
@@ -81,7 +80,7 @@ class HashUtil {
/// CrcHash() specialized for 4-byte data
static inline uint32_t CrcHash4(const void* v, uint32_t hash) {
- DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+ DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
const uint32_t* p = reinterpret_cast<const uint32_t*>(v);
hash = SSE4_crc32_u32(hash, *p);
hash = (hash << 16) | (hash >> 16);
@@ -90,7 +89,7 @@ class HashUtil {
/// CrcHash() specialized for 8-byte data
static inline uint32_t CrcHash8(const void* v, uint32_t hash) {
- DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+ DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
const uint64_t* p = reinterpret_cast<const uint64_t*>(v);
hash = SSE4_crc32_u64(hash, *p);
hash = (hash << 16) | (hash >> 16);
@@ -99,7 +98,7 @@ class HashUtil {
/// CrcHash() specialized for 12-byte data
static inline uint32_t CrcHash12(const void* v, uint32_t hash) {
- DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+ DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
const uint64_t* p = reinterpret_cast<const uint64_t*>(v);
hash = SSE4_crc32_u64(hash, *p);
++p;
@@ -110,7 +109,7 @@ class HashUtil {
/// CrcHash() specialized for 16-byte data
static inline uint32_t CrcHash16(const void* v, uint32_t hash) {
- DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+ DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
const uint64_t* p = reinterpret_cast<const uint64_t*>(v);
hash = SSE4_crc32_u64(hash, *p);
++p;
@@ -202,7 +201,7 @@ class HashUtil {
/// Seed values for different steps of the query execution should use different seeds
/// to prevent accidental key collisions. (See IMPALA-219 for more details).
static uint32_t Hash(const void* data, int32_t bytes, uint32_t seed) {
- if (base::IsAarch64() || LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
+ if (IS_AARCH64 || LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
return CrcHash(data, bytes, seed);
} else {
return MurmurHash2_64(data, bytes, seed);