You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/02/14 22:36:49 UTC
[impala] 02/04: IMPALA-11916: Replace base::IsAarch64 with constant

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 104c1ad5540ba655f54dd1f49a0ffc7c49367efb
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Sun Feb 12 09:02:46 2023 +0800

    IMPALA-11916: Replace base::IsAarch64 with constant
    
    base::IsAarch64() returns true for Aarch64 platforms and returns false
    for x86 platforms. It shows up in a perf test and consumes some
    percents. Note that it's used in the hot path of hash computation. This
    patch replaces this method with a constant, IS_AARCH64. Defines the
    constant in cpu-info.h since the original place is in a file ported from
    gutil.
    
    Test:
     - Redo the perf test and don't see base::IsAarch64 anymore.
    
    Change-Id: Id6d62de63a0cb7b94244a1d4f8dcc6b2e65b6d9c
    Reviewed-on: http://gerrit.cloudera.org:8080/19492
    Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/codegen/llvm-codegen-test.cc |  3 +--
 be/src/codegen/llvm-codegen.cc      |  2 +-
 be/src/gutil/sysinfo.cc             |  8 --------
 be/src/gutil/sysinfo.h              |  3 ---
 be/src/util/bit-util-test.cc        |  7 +++----
 be/src/util/cpu-info.h              |  6 ++++++
 be/src/util/hash-util.h             | 17 ++++++++---------
 7 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/be/src/codegen/llvm-codegen-test.cc b/be/src/codegen/llvm-codegen-test.cc
index 0e33aa912..43a1663e2 100644
--- a/be/src/codegen/llvm-codegen-test.cc
+++ b/be/src/codegen/llvm-codegen-test.cc
@@ -28,7 +28,6 @@
 #include "runtime/string-value.h"
 #include "runtime/test-env.h"
 #include "service/fe-support.h"
-#include "gutil/sysinfo.h"
 #include "util/cpu-info.h"
 #include "util/filesystem-util.h"
 #include "util/hash-util.h"
@@ -533,7 +532,7 @@ TEST_F(LlvmCodeGenTest, CpuAttrWhitelist) {
   // arm does not have sse2
   EXPECT_EQ(std::unordered_set<string>(
                 {"-dummy1", "-dummy2", "-dummy3", "-dummy4",
-                base::IsAarch64() ? "-sse2" : "+sse2", "-lzcnt"}),
+                IS_AARCH64 ? "-sse2" : "+sse2", "-lzcnt"}),
       LlvmCodeGen::ApplyCpuAttrWhitelist(
                 {"+dummy1", "+dummy2", "-dummy3", "+dummy4", "+sse2", "-lzcnt"}));
   // IMPALA-6291: Test that all AVX512 attributes are disabled.
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index f0bfbaf7a..d31eee772 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -1769,7 +1769,7 @@ void LlvmCodeGen::ClearHashFns() {
 //   ret i32 %12
 // }
 llvm::Function* LlvmCodeGen::GetHashFunction(int num_bytes) {
-  if (base::IsAarch64() || IsCPUFeatureEnabled(CpuInfo::SSE4_2)) {
+  if (IS_AARCH64 || IsCPUFeatureEnabled(CpuInfo::SSE4_2)) {
     if (num_bytes == -1) {
       // -1 indicates variable length, just return the generic loop based
       // hash fn.
diff --git a/be/src/gutil/sysinfo.cc b/be/src/gutil/sysinfo.cc
index adc755440..01f259737 100644
--- a/be/src/gutil/sysinfo.cc
+++ b/be/src/gutil/sysinfo.cc
@@ -469,12 +469,4 @@ int MaxCPUIndex(void) {
   return cpuinfo_max_cpu_index;
 }
 
-bool IsAarch64(void) {
-#ifdef __aarch64__
-  return true;
-#else
-  return false;
-#endif
-}
-
 } // namespace base
diff --git a/be/src/gutil/sysinfo.h b/be/src/gutil/sysinfo.h
index df37b720a..d46cfe550 100644
--- a/be/src/gutil/sysinfo.h
+++ b/be/src/gutil/sysinfo.h
@@ -65,8 +65,5 @@ extern double CyclesPerSecond(void);
 // Exposed for testing.
 extern int ParseMaxCpuIndex(const char* str);
 
-// Return current platform is aarch64 or not
-extern bool IsAarch64();
-
 } // namespace base
 #endif   /* #ifndef _SYSINFO_H_ */
diff --git a/be/src/util/bit-util-test.cc b/be/src/util/bit-util-test.cc
index f602603ba..d1707a8d4 100644
--- a/be/src/util/bit-util-test.cc
+++ b/be/src/util/bit-util-test.cc
@@ -27,7 +27,6 @@
 
 #include "runtime/multi-precision.h"
 #include "testutil/gtest-util.h"
-#include "gutil/sysinfo.h"
 #include "util/arithmetic-util.h"
 #include "util/bit-util.h"
 #include "util/cpu-info.h"
@@ -135,7 +134,7 @@ TEST(BitUtil, TrailingBits) {
 void TestByteSwapSimd_Unit(const int64_t CpuFlag) {
   void (*bswap_fptr)(const uint8_t* src, uint8_t* dst) = NULL;
   int buf_size = 0;
-  if (base::IsAarch64() || CpuFlag == CpuInfo::SSSE3) {
+  if (IS_AARCH64 || CpuFlag == CpuInfo::SSSE3) {
     buf_size = 16;
     bswap_fptr = SimdByteSwap::ByteSwap128;
   } else {
@@ -180,7 +179,7 @@ void TestByteSwapSimd(const int64_t CpuFlag, const int buf_size) {
   std::iota(src_buf, src_buf + buf_size, 0);
 
   int start_size = 0;
-  if (base::IsAarch64() || CpuFlag == CpuInfo::SSSE3) {
+  if (IS_AARCH64 || CpuFlag == CpuInfo::SSSE3) {
     start_size = 16;
   } else if (CpuFlag == CpuInfo::AVX2) {
     start_size = 32;
@@ -189,7 +188,7 @@ void TestByteSwapSimd(const int64_t CpuFlag, const int buf_size) {
   for (int i = start_size; i < buf_size; ++i) {
     // Initialize dst buffer and swap i bytes.
     memset(dst_buf, 0, buf_size);
-    if (base::IsAarch64() || CpuFlag == CpuInfo::SSSE3) {
+    if (IS_AARCH64 || CpuFlag == CpuInfo::SSSE3) {
       SimdByteSwap::ByteSwapSimd<16>(src_buf, i, dst_buf);
     } else if (CpuFlag == CpuInfo::AVX2) {
       SimdByteSwap::ByteSwapSimd<32>(src_buf, i, dst_buf);
diff --git a/be/src/util/cpu-info.h b/be/src/util/cpu-info.h
index 7b8cdb830..f3154b110 100644
--- a/be/src/util/cpu-info.h
+++ b/be/src/util/cpu-info.h
@@ -27,6 +27,12 @@
 
 namespace impala {
 
+#ifdef __aarch64__
+#define IS_AARCH64 true
+#else
+#define IS_AARCH64 false
+#endif
+
 /// CpuInfo is an interface to query for cpu information at runtime.  The caller can
 /// ask for the sizes of the caches and what hardware features are supported.
 /// On Linux, this information is pulled from a couple of sys files (/proc/cpuinfo and
diff --git a/be/src/util/hash-util.h b/be/src/util/hash-util.h
index 714cbe376..bb60997ae 100644
--- a/be/src/util/hash-util.h
+++ b/be/src/util/hash-util.h
@@ -21,7 +21,6 @@
 
 #include "common/logging.h"
 #include "common/compiler-util.h"
-#include "gutil/sysinfo.h"
 #include "util/cpu-info.h"
 #include "util/sse-util.h"
 
@@ -39,7 +38,7 @@ class HashUtil {
   /// The resulting hashes are correlated.
   /// TODO: update this to also use SSE4_crc32_u64 and SSE4_crc32_u16 where appropriate.
   static uint32_t CrcHash(const void* data, int32_t bytes, uint32_t hash) {
-    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
     uint32_t words = bytes / sizeof(uint32_t);
     bytes = bytes % sizeof(uint32_t);
 
@@ -63,7 +62,7 @@ class HashUtil {
 
   /// CrcHash() specialized for 1-byte data
   static inline uint32_t CrcHash1(const void* v, uint32_t hash) {
-    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
     const uint8_t* s = reinterpret_cast<const uint8_t*>(v);
     hash = SSE4_crc32_u8(hash, *s);
     hash = (hash << 16) | (hash >> 16);
@@ -72,7 +71,7 @@ class HashUtil {
 
   /// CrcHash() specialized for 2-byte data
   static inline uint32_t CrcHash2(const void* v, uint32_t hash) {
-    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
     const uint16_t* s = reinterpret_cast<const uint16_t*>(v);
     hash = SSE4_crc32_u16(hash, *s);
     hash = (hash << 16) | (hash >> 16);
@@ -81,7 +80,7 @@ class HashUtil {
 
   /// CrcHash() specialized for 4-byte data
   static inline uint32_t CrcHash4(const void* v, uint32_t hash) {
-    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
     const uint32_t* p = reinterpret_cast<const uint32_t*>(v);
     hash = SSE4_crc32_u32(hash, *p);
     hash = (hash << 16) | (hash >> 16);
@@ -90,7 +89,7 @@ class HashUtil {
 
   /// CrcHash() specialized for 8-byte data
   static inline uint32_t CrcHash8(const void* v, uint32_t hash) {
-    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
     const uint64_t* p = reinterpret_cast<const uint64_t*>(v);
     hash = SSE4_crc32_u64(hash, *p);
     hash = (hash << 16) | (hash >> 16);
@@ -99,7 +98,7 @@ class HashUtil {
 
   /// CrcHash() specialized for 12-byte data
   static inline uint32_t CrcHash12(const void* v, uint32_t hash) {
-    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
     const uint64_t* p = reinterpret_cast<const uint64_t*>(v);
     hash = SSE4_crc32_u64(hash, *p);
     ++p;
@@ -110,7 +109,7 @@ class HashUtil {
 
   /// CrcHash() specialized for 16-byte data
   static inline uint32_t CrcHash16(const void* v, uint32_t hash) {
-    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || base::IsAarch64());
+    DCHECK(CpuInfo::IsSupported(CpuInfo::SSE4_2) || IS_AARCH64);
     const uint64_t* p = reinterpret_cast<const uint64_t*>(v);
     hash = SSE4_crc32_u64(hash, *p);
     ++p;
@@ -202,7 +201,7 @@ class HashUtil {
   /// Seed values for different steps of the query execution should use different seeds
   /// to prevent accidental key collisions. (See IMPALA-219 for more details).
   static uint32_t Hash(const void* data, int32_t bytes, uint32_t seed) {
-    if (base::IsAarch64() || LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
+    if (IS_AARCH64 || LIKELY(CpuInfo::IsSupported(CpuInfo::SSE4_2))) {
       return CrcHash(data, bytes, seed);
     } else {
       return MurmurHash2_64(data, bytes, seed);