You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2020/08/12 14:34:14 UTC
[impala] 01/02: IMPALA-10061 Fix bugs of IMPALA-9645
This is an automated email from the ASF dual-hosted git repository.
stakiar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 7a02e370be2aada393d6ab062e1c959c712abb9f
Author: zhaorenhai <zh...@hotmail.com>
AuthorDate: Sun Apr 12 12:05:52 2020 +0000
IMPALA-10061 Fix bugs of IMPALA-9645
Fix one bug of IMPALA-9645.
And fix issue when return type is decimal,
codegen code lack a 'StructRet' attribute,
this is not a issue on x86, but on aarch64,
the "StructRet" attribute is necessary.
And fix the hash function on aarch64.
Change-Id: I219588992715b7d5c69cd7c0d48ff4d90b980338
Reviewed-on: http://gerrit.cloudera.org:8080/16306
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/codegen/llvm-codegen.cc | 39 ++++++++++++++++++++++++++++++++++++---
be/src/exprs/scalar-fn-call.cc | 4 +++-
be/src/gutil/sysinfo.cc | 8 ++++++++
be/src/gutil/sysinfo.h | 3 +++
4 files changed, 50 insertions(+), 4 deletions(-)
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index 1834f8e..e36c22e 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -75,6 +75,7 @@
#include "runtime/runtime-state.h"
#include "runtime/string-value.h"
#include "runtime/timestamp-value.h"
+#include "gutil/sysinfo.h"
#include "util/cpu-info.h"
#include "util/debug-util.h"
#include "util/hdfs-util.h"
@@ -892,7 +893,12 @@ Status LlvmCodeGen::LoadFunction(const TFunction& fn, const string& symbol,
// declaration, not a definition, since we do not create any basic blocks or
// instructions in it.
*llvm_fn = prototype.GeneratePrototype(nullptr, nullptr);
-
+#ifdef __aarch64__
+ if (is_decimal) {
+ // Mark first argument as sret
+ (*llvm_fn)->addAttribute(1, llvm::Attribute::StructRet);
+ }
+#endif
// Associate the dynamically loaded function pointer with the Function* we defined.
// This tells LLVM where the compiled function definition is located in memory.
execution_engine_->addGlobalMapping(*llvm_fn, fn_ptr);
@@ -1483,10 +1489,17 @@ Status LlvmCodeGen::LoadIntrinsics() {
llvm::Intrinsic::ID id;
const char* error;
} non_overloaded_intrinsics[] = {
+#ifdef __aarch64__
+ {llvm::Intrinsic::aarch64_crc32cb, "aarch64 crc32_u8"},
+ {llvm::Intrinsic::aarch64_crc32ch, "aarch64 crc32_u16"},
+ {llvm::Intrinsic::aarch64_crc32cw, "aarch64 crc32_u32"},
+ {llvm::Intrinsic::aarch64_crc32cx, "aarch64 crc32_u64"},
+#else
{llvm::Intrinsic::x86_sse42_crc32_32_8, "sse4.2 crc32_u8"},
{llvm::Intrinsic::x86_sse42_crc32_32_16, "sse4.2 crc32_u16"},
{llvm::Intrinsic::x86_sse42_crc32_32_32, "sse4.2 crc32_u32"},
{llvm::Intrinsic::x86_sse42_crc32_64_64, "sse4.2 crc32_u64"},
+#endif
};
const int num_intrinsics =
sizeof(non_overloaded_intrinsics) / sizeof(non_overloaded_intrinsics[0]);
@@ -1597,7 +1610,7 @@ void LlvmCodeGen::ClearHashFns() {
// ret i32 %12
// }
llvm::Function* LlvmCodeGen::GetHashFunction(int num_bytes) {
- if (IsCPUFeatureEnabled(CpuInfo::SSE4_2)) {
+ if (base::IsAarch64() || IsCPUFeatureEnabled(CpuInfo::SSE4_2)) {
if (num_bytes == -1) {
// -1 indicates variable length, just return the generic loop based
// hash fn.
@@ -1622,25 +1635,39 @@ llvm::Function* LlvmCodeGen::GetHashFunction(int num_bytes) {
llvm::Function* fn = prototype.GeneratePrototype(&builder, &args[0]);
llvm::Value* data = args[0];
llvm::Value* result = args[2];
-
+#ifdef __aarch64__
+ llvm::Function* crc8_fn = llvm_intrinsics_[llvm::Intrinsic::aarch64_crc32cb];
+ llvm::Function* crc16_fn = llvm_intrinsics_[llvm::Intrinsic::aarch64_crc32ch];
+ llvm::Function* crc32_fn = llvm_intrinsics_[llvm::Intrinsic::aarch64_crc32cw];
+ llvm::Function* crc64_fn = llvm_intrinsics_[llvm::Intrinsic::aarch64_crc32cx];
+#else
llvm::Function* crc8_fn = llvm_intrinsics_[llvm::Intrinsic::x86_sse42_crc32_32_8];
llvm::Function* crc16_fn = llvm_intrinsics_[llvm::Intrinsic::x86_sse42_crc32_32_16];
llvm::Function* crc32_fn = llvm_intrinsics_[llvm::Intrinsic::x86_sse42_crc32_32_32];
llvm::Function* crc64_fn = llvm_intrinsics_[llvm::Intrinsic::x86_sse42_crc32_64_64];
+#endif
// Generate the crc instructions starting with the highest number of bytes
if (num_bytes >= 8) {
+#ifndef __aarch64__
llvm::Value* result_64 = builder.CreateZExt(result, i64_type());
+#endif
llvm::Value* ptr = builder.CreateBitCast(data, i64_ptr_type());
int i = 0;
while (num_bytes >= 8) {
llvm::Value* index[] = {GetI32Constant(i++)};
llvm::Value* d = builder.CreateLoad(builder.CreateInBoundsGEP(ptr, index));
+#ifdef __aarch64__
+ result = builder.CreateCall(crc64_fn, llvm::ArrayRef<llvm::Value*>({result, d}));
+#else
result_64 =
builder.CreateCall(crc64_fn, llvm::ArrayRef<llvm::Value*>({result_64, d}));
+#endif
num_bytes -= 8;
}
+#ifndef __aarch64__
result = builder.CreateTrunc(result_64, i32_type());
+#endif
llvm::Value* index[] = {GetI32Constant(i * 8)};
// Update data to past the 8-byte chunks
data = builder.CreateInBoundsGEP(data, index);
@@ -1660,6 +1687,9 @@ llvm::Function* LlvmCodeGen::GetHashFunction(int num_bytes) {
DCHECK_LT(num_bytes, 4);
llvm::Value* ptr = builder.CreateBitCast(data, i16_ptr_type());
llvm::Value* d = builder.CreateLoad(ptr);
+#ifdef __aarch64__
+ d = builder.CreateZExt(d, i32_type());
+#endif
result = builder.CreateCall(crc16_fn, llvm::ArrayRef<llvm::Value*>({result, d}));
llvm::Value* index[] = {GetI16Constant(2)};
data = builder.CreateInBoundsGEP(data, index);
@@ -1669,6 +1699,9 @@ llvm::Function* LlvmCodeGen::GetHashFunction(int num_bytes) {
if (num_bytes > 0) {
DCHECK_EQ(num_bytes, 1);
llvm::Value* d = builder.CreateLoad(data);
+#ifdef __aarch64__
+ d = builder.CreateZExt(d, i32_type());
+#endif
result = builder.CreateCall(crc8_fn, llvm::ArrayRef<llvm::Value*>({result, d}));
--num_bytes;
}
diff --git a/be/src/exprs/scalar-fn-call.cc b/be/src/exprs/scalar-fn-call.cc
index 0c52bc6..aaae77e 100644
--- a/be/src/exprs/scalar-fn-call.cc
+++ b/be/src/exprs/scalar-fn-call.cc
@@ -348,13 +348,15 @@ Status ScalarFnCall::GetCodegendComputeFnImpl(LlvmCodeGen* codegen, llvm::Functi
DCHECK(child_fn != NULL);
llvm::Type* arg_type = CodegenAnyVal::GetUnloweredType(codegen, children_[i]->type());
llvm::Value* arg_val_ptr;
+#ifdef __aarch64__
+ PrimitiveType col_type = children_[i]->type().type;
+#endif
if (i < NumFixedArgs()) {
#ifndef __aarch64__
// Allocate space to store 'child_fn's result so we can pass the pointer to the UDF.
arg_val_ptr = codegen->CreateEntryBlockAlloca(builder, arg_type, "arg_val_ptr");
udf_args.push_back(arg_val_ptr);
#else
- PrimitiveType col_type = children_[i]->type().type;
if (col_type != TYPE_BOOLEAN and col_type != TYPE_TINYINT
and col_type != TYPE_SMALLINT) {
arg_val_ptr = codegen->CreateEntryBlockAlloca(builder, arg_type, "arg_val_ptr");
diff --git a/be/src/gutil/sysinfo.cc b/be/src/gutil/sysinfo.cc
index 01f2597..adc7554 100644
--- a/be/src/gutil/sysinfo.cc
+++ b/be/src/gutil/sysinfo.cc
@@ -469,4 +469,12 @@ int MaxCPUIndex(void) {
return cpuinfo_max_cpu_index;
}
+bool IsAarch64(void) {
+#ifdef __aarch64__
+ return true;
+#else
+ return false;
+#endif
+}
+
} // namespace base
diff --git a/be/src/gutil/sysinfo.h b/be/src/gutil/sysinfo.h
index d46cfe5..df37b72 100644
--- a/be/src/gutil/sysinfo.h
+++ b/be/src/gutil/sysinfo.h
@@ -65,5 +65,8 @@ extern double CyclesPerSecond(void);
// Exposed for testing.
extern int ParseMaxCpuIndex(const char* str);
+// Return current platform is aarch64 or not
+extern bool IsAarch64();
+
} // namespace base
#endif /* #ifndef _SYSINFO_H_ */