You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2020/08/12 14:34:14 UTC
[impala] 01/02: IMPALA-10061 Fix bugs of IMPALA-9645

This is an automated email from the ASF dual-hosted git repository.

stakiar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 7a02e370be2aada393d6ab062e1c959c712abb9f
Author: zhaorenhai <zh...@hotmail.com>
AuthorDate: Sun Apr 12 12:05:52 2020 +0000

    IMPALA-10061 Fix bugs of IMPALA-9645
    
    Fix one bug of IMPALA-9645.
    
    And fix issue when return type is decimal,
    codegen code lack a 'StructRet' attribute,
    this is not a issue on x86, but on aarch64,
    the "StructRet" attribute is necessary.
    
    And fix the hash function on aarch64.
    
    Change-Id: I219588992715b7d5c69cd7c0d48ff4d90b980338
    Reviewed-on: http://gerrit.cloudera.org:8080/16306
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/codegen/llvm-codegen.cc | 39 ++++++++++++++++++++++++++++++++++++---
 be/src/exprs/scalar-fn-call.cc |  4 +++-
 be/src/gutil/sysinfo.cc        |  8 ++++++++
 be/src/gutil/sysinfo.h         |  3 +++
 4 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index 1834f8e..e36c22e 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -75,6 +75,7 @@
 #include "runtime/runtime-state.h"
 #include "runtime/string-value.h"
 #include "runtime/timestamp-value.h"
+#include "gutil/sysinfo.h"
 #include "util/cpu-info.h"
 #include "util/debug-util.h"
 #include "util/hdfs-util.h"
@@ -892,7 +893,12 @@ Status LlvmCodeGen::LoadFunction(const TFunction& fn, const string& symbol,
     // declaration, not a definition, since we do not create any basic blocks or
     // instructions in it.
     *llvm_fn = prototype.GeneratePrototype(nullptr, nullptr);
-
+#ifdef __aarch64__
+    if (is_decimal) {
+      // Mark first argument as sret
+      (*llvm_fn)->addAttribute(1, llvm::Attribute::StructRet);
+    }
+#endif
     // Associate the dynamically loaded function pointer with the Function* we defined.
     // This tells LLVM where the compiled function definition is located in memory.
     execution_engine_->addGlobalMapping(*llvm_fn, fn_ptr);
@@ -1483,10 +1489,17 @@ Status LlvmCodeGen::LoadIntrinsics() {
     llvm::Intrinsic::ID id;
     const char* error;
   } non_overloaded_intrinsics[] = {
+#ifdef __aarch64__
+      {llvm::Intrinsic::aarch64_crc32cb, "aarch64 crc32_u8"},
+      {llvm::Intrinsic::aarch64_crc32ch, "aarch64 crc32_u16"},
+      {llvm::Intrinsic::aarch64_crc32cw, "aarch64 crc32_u32"},
+      {llvm::Intrinsic::aarch64_crc32cx, "aarch64 crc32_u64"},
+#else
       {llvm::Intrinsic::x86_sse42_crc32_32_8, "sse4.2 crc32_u8"},
       {llvm::Intrinsic::x86_sse42_crc32_32_16, "sse4.2 crc32_u16"},
       {llvm::Intrinsic::x86_sse42_crc32_32_32, "sse4.2 crc32_u32"},
       {llvm::Intrinsic::x86_sse42_crc32_64_64, "sse4.2 crc32_u64"},
+#endif
   };
   const int num_intrinsics =
       sizeof(non_overloaded_intrinsics) / sizeof(non_overloaded_intrinsics[0]);
@@ -1597,7 +1610,7 @@ void LlvmCodeGen::ClearHashFns() {
 //   ret i32 %12
 // }
 llvm::Function* LlvmCodeGen::GetHashFunction(int num_bytes) {
-  if (IsCPUFeatureEnabled(CpuInfo::SSE4_2)) {
+  if (base::IsAarch64() || IsCPUFeatureEnabled(CpuInfo::SSE4_2)) {
     if (num_bytes == -1) {
       // -1 indicates variable length, just return the generic loop based
       // hash fn.
@@ -1622,25 +1635,39 @@ llvm::Function* LlvmCodeGen::GetHashFunction(int num_bytes) {
     llvm::Function* fn = prototype.GeneratePrototype(&builder, &args[0]);
     llvm::Value* data = args[0];
     llvm::Value* result = args[2];
-
+#ifdef __aarch64__
+    llvm::Function* crc8_fn = llvm_intrinsics_[llvm::Intrinsic::aarch64_crc32cb];
+    llvm::Function* crc16_fn = llvm_intrinsics_[llvm::Intrinsic::aarch64_crc32ch];
+    llvm::Function* crc32_fn = llvm_intrinsics_[llvm::Intrinsic::aarch64_crc32cw];
+    llvm::Function* crc64_fn = llvm_intrinsics_[llvm::Intrinsic::aarch64_crc32cx];
+#else
     llvm::Function* crc8_fn = llvm_intrinsics_[llvm::Intrinsic::x86_sse42_crc32_32_8];
     llvm::Function* crc16_fn = llvm_intrinsics_[llvm::Intrinsic::x86_sse42_crc32_32_16];
     llvm::Function* crc32_fn = llvm_intrinsics_[llvm::Intrinsic::x86_sse42_crc32_32_32];
     llvm::Function* crc64_fn = llvm_intrinsics_[llvm::Intrinsic::x86_sse42_crc32_64_64];
+#endif
 
     // Generate the crc instructions starting with the highest number of bytes
     if (num_bytes >= 8) {
+#ifndef __aarch64__
       llvm::Value* result_64 = builder.CreateZExt(result, i64_type());
+#endif
       llvm::Value* ptr = builder.CreateBitCast(data, i64_ptr_type());
       int i = 0;
       while (num_bytes >= 8) {
         llvm::Value* index[] = {GetI32Constant(i++)};
         llvm::Value* d = builder.CreateLoad(builder.CreateInBoundsGEP(ptr, index));
+#ifdef __aarch64__
+        result = builder.CreateCall(crc64_fn, llvm::ArrayRef<llvm::Value*>({result, d}));
+#else
         result_64 =
             builder.CreateCall(crc64_fn, llvm::ArrayRef<llvm::Value*>({result_64, d}));
+#endif
         num_bytes -= 8;
       }
+#ifndef __aarch64__
       result = builder.CreateTrunc(result_64, i32_type());
+#endif
       llvm::Value* index[] = {GetI32Constant(i * 8)};
       // Update data to past the 8-byte chunks
       data = builder.CreateInBoundsGEP(data, index);
@@ -1660,6 +1687,9 @@ llvm::Function* LlvmCodeGen::GetHashFunction(int num_bytes) {
       DCHECK_LT(num_bytes, 4);
       llvm::Value* ptr = builder.CreateBitCast(data, i16_ptr_type());
       llvm::Value* d = builder.CreateLoad(ptr);
+#ifdef __aarch64__
+      d = builder.CreateZExt(d, i32_type());
+#endif
       result = builder.CreateCall(crc16_fn, llvm::ArrayRef<llvm::Value*>({result, d}));
       llvm::Value* index[] = {GetI16Constant(2)};
       data = builder.CreateInBoundsGEP(data, index);
@@ -1669,6 +1699,9 @@ llvm::Function* LlvmCodeGen::GetHashFunction(int num_bytes) {
     if (num_bytes > 0) {
       DCHECK_EQ(num_bytes, 1);
       llvm::Value* d = builder.CreateLoad(data);
+#ifdef __aarch64__
+      d = builder.CreateZExt(d, i32_type());
+#endif
       result = builder.CreateCall(crc8_fn, llvm::ArrayRef<llvm::Value*>({result, d}));
       --num_bytes;
     }
diff --git a/be/src/exprs/scalar-fn-call.cc b/be/src/exprs/scalar-fn-call.cc
index 0c52bc6..aaae77e 100644
--- a/be/src/exprs/scalar-fn-call.cc
+++ b/be/src/exprs/scalar-fn-call.cc
@@ -348,13 +348,15 @@ Status ScalarFnCall::GetCodegendComputeFnImpl(LlvmCodeGen* codegen, llvm::Functi
     DCHECK(child_fn != NULL);
     llvm::Type* arg_type = CodegenAnyVal::GetUnloweredType(codegen, children_[i]->type());
     llvm::Value* arg_val_ptr;
+#ifdef __aarch64__
+    PrimitiveType col_type = children_[i]->type().type;
+#endif
     if (i < NumFixedArgs()) {
 #ifndef __aarch64__
       // Allocate space to store 'child_fn's result so we can pass the pointer to the UDF.
       arg_val_ptr = codegen->CreateEntryBlockAlloca(builder, arg_type, "arg_val_ptr");
       udf_args.push_back(arg_val_ptr);
 #else
-      PrimitiveType col_type = children_[i]->type().type;
       if (col_type != TYPE_BOOLEAN and col_type != TYPE_TINYINT
           and col_type != TYPE_SMALLINT) {
         arg_val_ptr = codegen->CreateEntryBlockAlloca(builder, arg_type, "arg_val_ptr");
diff --git a/be/src/gutil/sysinfo.cc b/be/src/gutil/sysinfo.cc
index 01f2597..adc7554 100644
--- a/be/src/gutil/sysinfo.cc
+++ b/be/src/gutil/sysinfo.cc
@@ -469,4 +469,12 @@ int MaxCPUIndex(void) {
   return cpuinfo_max_cpu_index;
 }
 
+bool IsAarch64(void) {
+#ifdef __aarch64__
+  return true;
+#else
+  return false;
+#endif
+}
+
 } // namespace base
diff --git a/be/src/gutil/sysinfo.h b/be/src/gutil/sysinfo.h
index d46cfe5..df37b72 100644
--- a/be/src/gutil/sysinfo.h
+++ b/be/src/gutil/sysinfo.h
@@ -65,5 +65,8 @@ extern double CyclesPerSecond(void);
 // Exposed for testing.
 extern int ParseMaxCpuIndex(const char* str);
 
+// Return current platform is aarch64 or not
+extern bool IsAarch64();
+
 } // namespace base
 #endif   /* #ifndef _SYSINFO_H_ */