You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2020/08/07 19:15:46 UTC
[impala] branch master updated: IMPALA-9645 Port LLVM codegen to
adapt aarch64
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new fab251e IMPALA-9645 Port LLVM codegen to adapt aarch64
fab251e is described below
commit fab251efe3de449d22439dd17798cd414168748c
Author: zhaorenhai <zh...@hotmail.com>
AuthorDate: Sun Apr 12 12:05:52 2020 +0000
IMPALA-9645 Port LLVM codegen to adapt aarch64
On aarch64, the Lowered type of struct {bool, int128} is form
{ {i8}, {i128} }. No padding add. This is different with x86-64,
which is { {i8}, {15*i8}, {i128} } with padding add automatically.
And here also add some type conversion between x86 and aarch64 data types.
And also add some aarch64 cpu's feature.
Change-Id: I3f30ee84ea9bf5245da88154632bb69079103d11
Reviewed-on: http://gerrit.cloudera.org:8080/15718
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
---
be/src/codegen/codegen-anyval.cc | 121 +++++++++++++++++++++++++++++++++++----
be/src/codegen/llvm-codegen.cc | 7 +++
be/src/exec/text-converter.cc | 19 ++++++
be/src/exprs/scalar-fn-call.cc | 39 +++++++++++++
4 files changed, 175 insertions(+), 11 deletions(-)
diff --git a/be/src/codegen/codegen-anyval.cc b/be/src/codegen/codegen-anyval.cc
index 66d79e7..1346f95 100644
--- a/be/src/codegen/codegen-anyval.cc
+++ b/be/src/codegen/codegen-anyval.cc
@@ -41,28 +41,56 @@ const char* CodegenAnyVal::LLVM_COLLECTIONVAL_NAME = "struct.impala_udf::Collect
llvm::Type* CodegenAnyVal::GetLoweredType(LlvmCodeGen* cg, const ColumnType& type) {
switch (type.type) {
case TYPE_BOOLEAN: // i16
+#ifndef __aarch64__
return cg->i16_type();
+#else
+ return cg->i64_type();
+#endif
case TYPE_TINYINT: // i16
+#ifndef __aarch64__
return cg->i16_type();
+#else
+ return cg->i64_type();
+#endif
case TYPE_SMALLINT: // i32
+#ifndef __aarch64__
return cg->i32_type();
+#else
+ return cg->i64_type();
+#endif
case TYPE_INT: // i64
return cg->i64_type();
case TYPE_BIGINT: // { i8, i64 }
+#ifndef __aarch64__
return llvm::StructType::get(cg->i8_type(), cg->i64_type());
+#else
+ return llvm::ArrayType::get(cg->i64_type(), 2);
+#endif
case TYPE_FLOAT: // i64
return cg->i64_type();
case TYPE_DOUBLE: // { i8, double }
+#ifndef __aarch64__
return llvm::StructType::get(cg->i8_type(), cg->double_type());
+#else
+ return llvm::ArrayType::get(cg->i64_type(), 2);
+#endif
case TYPE_STRING: // { i64, i8* }
case TYPE_VARCHAR: // { i64, i8* }
case TYPE_CHAR: // Uses StringVal, so same as STRING/VARCHAR.
case TYPE_FIXED_UDA_INTERMEDIATE: // { i64, i8* }
case TYPE_ARRAY: // CollectionVal has same memory layout as StringVal.
case TYPE_MAP: // CollectionVal has same memory layout as StringVal.
+#ifndef __aarch64__
return llvm::StructType::get(cg->i64_type(), cg->ptr_type());
+#else
+ return llvm::ArrayType::get(cg->i64_type(), 2);
+#endif
case TYPE_TIMESTAMP: // { i64, i64 }
+#ifndef __aarch64__
return llvm::StructType::get(cg->i64_type(), cg->i64_type());
+#else
+ return llvm::ArrayType::get(cg->i64_type(), 2);
+#endif
case TYPE_DECIMAL: // %"struct.impala_udf::DecimalVal" (isn't lowered)
// = { {i8}, [15 x i8], {i128} }
return cg->GetNamedType(LLVM_DECIMALVAL_NAME);
@@ -198,9 +226,14 @@ llvm::Value* CodegenAnyVal::GetIsNull(const char* name) const {
case TYPE_BIGINT:
case TYPE_DOUBLE: {
// Lowered type is of form { i8, * }. Get the i8 value.
- llvm::Value* is_null_i8 = builder_->CreateExtractValue(value_, 0);
- DCHECK(is_null_i8->getType() == codegen_->i8_type());
- return builder_->CreateTrunc(is_null_i8, codegen_->bool_type(), name);
+ // On aarch64, Lowered type is of form { i64, * }
+ llvm::Value* is_null = builder_->CreateExtractValue(value_, 0);
+#ifndef __aarch64__
+ DCHECK(is_null->getType() == codegen_->i8_type());
+#else
+ DCHECK(is_null->getType() == codegen_->i64_type());
+#endif
+ return builder_->CreateTrunc(is_null, codegen_->bool_type(), name);
}
case TYPE_DECIMAL: {
// Lowered type is of the form { {i8}, ... }
@@ -240,8 +273,14 @@ void CodegenAnyVal::SetIsNull(llvm::Value* is_null) {
case TYPE_BIGINT:
case TYPE_DOUBLE: {
// Lowered type is of form { i8, * }. Set the i8 value to 'is_null'.
+ // On aarch64, lowered type is of form { i64, * }
+#ifndef __aarch64__
llvm::Value* is_null_ext =
builder_->CreateZExt(is_null, codegen_->i8_type(), "is_null_ext");
+#else
+ llvm::Value* is_null_ext =
+ builder_->CreateZExt(is_null, codegen_->i64_type(), "is_null_ext");
+#endif
value_ = builder_->CreateInsertValue(value_, is_null_ext, 0, name_);
break;
}
@@ -322,14 +361,25 @@ llvm::Value* CodegenAnyVal::GetVal(const char* name) {
return builder_->CreateBitCast(val, codegen_->float_type());
}
case TYPE_BIGINT:
- case TYPE_DOUBLE:
- // Lowered type is of form { i8, * }. Get the second value.
return builder_->CreateExtractValue(value_, 1, name);
+ case TYPE_DOUBLE: {
+ // Lowered type is of form { i8, * }. Get the second value.
+ llvm::Value* val = builder_->CreateExtractValue(value_, 1, name);
+#ifdef __aarch64__
+ val = builder_->CreateBitCast(val, codegen_->double_type());
+#endif
+ return val;
+ }
case TYPE_DECIMAL: {
- // Lowered type is of form { {i8}, [15 x i8], {i128} }. Get the i128 value and
- // truncate it to the correct size. (The {i128} corresponds to the union of the
- // different width int types.)
+#ifdef __aarch64__
+ // On aarch64, the Lowered type is of form { {i8}, {i128} }. No padding add.
+ uint32_t idxs[] = {1, 0};
+#else
+ // On x86-64, Lowered type is of form { {i8}, [15 x i8], {i128} }.
uint32_t idxs[] = {2, 0};
+#endif
+ // Get the i128 value and truncate it to the correct size.
+ // (The {i128} corresponds to the union of the different width int types.)
llvm::Value* val = builder_->CreateExtractValue(value_, idxs, name);
return builder_->CreateTrunc(val,
codegen_->GetSlotType(type_), name);
@@ -366,16 +416,27 @@ void CodegenAnyVal::SetVal(llvm::Value* val) {
value_ = SetHighBits(32, val, value_, name_);
break;
case TYPE_BIGINT:
+ value_ = builder_->CreateInsertValue(value_, val, 1, name_);
+ break;
case TYPE_DOUBLE:
+#ifdef __aarch64__
+ val = builder_->CreateBitCast(val, codegen_->i64_type());
+#endif
// Lowered type is of form { i8, * }. Set the second value to 'val'.
value_ = builder_->CreateInsertValue(value_, val, 1, name_);
break;
case TYPE_DECIMAL: {
- // Lowered type is of the form { {i8}, [15 x i8], {i128} }. Set the i128 value to
- // 'val'. (The {i128} corresponds to the union of the different width int types.)
+ // Set the i128 value to 'val'.
+ // (The {i128} corresponds to the union of the different width int types.)
DCHECK_EQ(val->getType()->getIntegerBitWidth(), type_.GetByteSize() * 8);
val = builder_->CreateSExt(val, llvm::Type::getIntNTy(codegen_->context(), 128));
+#ifdef __aarch64__
+ // On aarch64, the Lowered type is of form { {i8}, {i128} }. No padding add.
+ uint32_t idxs[] = {1, 0};
+#else
+ // On X86-64, the Lowered type is of the form { {i8}, [15 x i8], {i128} }
uint32_t idxs[] = {2, 0};
+#endif
value_ = builder_->CreateInsertValue(value_, val, idxs, name_);
break;
}
@@ -430,7 +491,11 @@ void CodegenAnyVal::SetVal(double val) {
llvm::Value* CodegenAnyVal::GetPtr() {
// Set the second pointer value to 'ptr'.
DCHECK(type_.IsStringType() || type_.IsCollectionType());
- return builder_->CreateExtractValue(value_, 1, name_);
+ llvm::Value* val = builder_->CreateExtractValue(value_, 1, name_);
+#ifdef __aarch64__
+ val = builder_->CreateIntToPtr(val, codegen_->ptr_type());
+#endif
+ return val;
}
llvm::Value* CodegenAnyVal::GetLen() {
@@ -444,6 +509,9 @@ void CodegenAnyVal::SetPtr(llvm::Value* ptr) {
// Set the second pointer value to 'ptr'.
DCHECK(type_.IsStringType() || type_.type == TYPE_FIXED_UDA_INTERMEDIATE
|| type_.IsCollectionType());
+#ifdef __aarch64__
+ ptr = builder_->CreatePtrToInt(ptr, codegen_->i64_type());
+#endif
value_ = builder_->CreateInsertValue(value_, ptr, 1, name_);
}
@@ -835,7 +903,9 @@ void CodegenAnyVal::CodegenBranchIfNull(
}
llvm::Value* CodegenAnyVal::GetHighBits(int num_bits, llvm::Value* v, const char* name) {
+#ifndef __aarch64__
DCHECK_EQ(v->getType()->getIntegerBitWidth(), num_bits * 2);
+#endif
llvm::Value* shifted = builder_->CreateAShr(v, num_bits);
return builder_->CreateTrunc(
shifted, llvm::IntegerType::get(codegen_->context(), num_bits));
@@ -849,9 +919,14 @@ llvm::Value* CodegenAnyVal::GetHighBits(int num_bits, llvm::Value* v, const char
llvm::Value* CodegenAnyVal::SetHighBits(
int num_bits, llvm::Value* src, llvm::Value* dst, const char* name) {
DCHECK_LE(src->getType()->getIntegerBitWidth(), num_bits);
+#ifndef __aarch64__
DCHECK_EQ(dst->getType()->getIntegerBitWidth(), num_bits * 2);
llvm::Value* extended_src = builder_->CreateZExt(
src, llvm::IntegerType::get(codegen_->context(), num_bits * 2));
+#else
+ llvm::Value* extended_src = builder_->CreateZExt(src,
+ llvm::IntegerType::get(codegen_->context(), 64));
+#endif
llvm::Value* shifted_src = builder_->CreateShl(extended_src, num_bits);
llvm::Value* masked_dst = builder_->CreateAnd(dst, (1LL << num_bits) - 1);
return builder_->CreateOr(masked_dst, shifted_src, name);
@@ -879,6 +954,18 @@ llvm::Value* CodegenAnyVal::GetNullVal(LlvmCodeGen* codegen, llvm::Type* val_typ
return llvm::ConstantStruct::get(struct_type, null_anyval,
llvm::Constant::getNullValue(type2), llvm::Constant::getNullValue(type3));
}
+#ifdef __aarch64__
+ else if (struct_type->getElementType(0)->isStructTy()) {
+ llvm::StructType* anyval_struct_type =
+ llvm::cast<llvm::StructType>(struct_type->getElementType(0));
+ llvm::Type* is_null_type = anyval_struct_type->getElementType(0);
+ llvm::Constant* null_anyval = llvm::ConstantStruct::get(
+ anyval_struct_type, llvm::ConstantInt::get(is_null_type, 1));
+ llvm::Type* type1 = struct_type->getElementType(1);
+ return llvm::ConstantStruct::get(struct_type, null_anyval,
+ llvm::Constant::getNullValue(type1));
+ }
+#endif
// Return the struct { 1, 0 } (the 'is_null' byte, i.e. the first value's first byte,
// is set to 1, the other bytes don't matter)
DCHECK_EQ(struct_type->getNumElements(), 2);
@@ -888,6 +975,18 @@ llvm::Value* CodegenAnyVal::GetNullVal(LlvmCodeGen* codegen, llvm::Type* val_typ
return llvm::ConstantStruct::get(struct_type, llvm::ConstantInt::get(type1, 1),
llvm::Constant::getNullValue(type2));
}
+#ifdef __aarch64__
+ if (val_type->isArrayTy()) {
+ llvm::ArrayType* array_type = llvm::cast<llvm::ArrayType>(val_type);
+ DCHECK_EQ(array_type->getNumElements(), 2);
+ llvm::Type* type1 = array_type->getElementType();
+ DCHECK(type1->isIntegerTy()) << LlvmCodeGen::Print(type1);
+ std::vector<llvm::Constant *> arrayElts;
+ arrayElts.push_back(llvm::ConstantInt::get(type1, 1));
+ arrayElts.push_back(llvm::Constant::getNullValue(type1));
+ return llvm::ConstantArray::get(array_type, arrayElts);
+ }
+#endif
// Return the int 1 ('is_null' byte is 1, other bytes don't matter)
DCHECK(val_type->isIntegerTy());
return llvm::ConstantInt::get(val_type, 1);
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index f942cb9..1834f8e 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -111,6 +111,12 @@ DECLARE_string(local_library_dir);
// avx512ifma,avx512pf,avx512vbmi,avx512vl,clflushopt,clwb,fma4,mwaitx.1.2,pcommit,pku,
// prefetchwt1,sgx,sha,sse4a,tbm,xop,xsavec,xsaves. If new attrs are added to LLVM,
// they will be disabled until added to this whitelist.
+#ifdef __aarch64__
+DEFINE_string_hidden(llvm_cpu_attr_whitelist, "crc,neon,fp-armv8,crypto",
+ "(Experimental) a comma-separated list of LLVM CPU attribute flags that are enabled "
+ "for runtime code generation. This flag is provided to enable additional LLVM CPU "
+ "attribute flags for testing.");
+#else
DEFINE_string_hidden(llvm_cpu_attr_whitelist, "adx,aes,avx,avx2,bmi,bmi2,cmov,cx16,f16c,"
"fma,fsgsbase,hle,invpcid,lzcnt,mmx,movbe,pclmul,popcnt,prfchw,rdrnd,rdseed,rtm,smap,"
"sse,sse2,sse3,sse4.1,sse4.2,ssse3,xsave,xsaveopt",
@@ -118,6 +124,7 @@ DEFINE_string_hidden(llvm_cpu_attr_whitelist, "adx,aes,avx,avx2,bmi,bmi2,cmov,cx
"for runtime code generation. The default flags are a known-good set that are "
"routinely tested. This flag is provided to enable additional LLVM CPU attribute "
"flags for testing.");
+#endif
namespace impala {
diff --git a/be/src/exec/text-converter.cc b/be/src/exec/text-converter.cc
index 16d01f2..cb69bcc 100644
--- a/be/src/exec/text-converter.cc
+++ b/be/src/exec/text-converter.cc
@@ -302,11 +302,30 @@ Status TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen,
builder.SetInsertPoint(parse_success_block);
// If the parsed value is in parse_return, move it into slot
if (slot_desc->type().type == TYPE_DECIMAL) {
+#ifdef __aarch64__
+ // On aarch64, the 4 bytes decimal still return i64 type, so here truncing is need
+ if (slot_desc->slot_size() == 4) {
+ llvm::Value* temp_slot = builder.CreateTrunc(parse_return, codegen->i32_type());
+ builder.CreateStore(temp_slot, slot);
+ } else {
+ llvm::Value* cast_slot =
+ builder.CreateBitCast(slot, parse_return->getType()->getPointerTo());
+ builder.CreateStore(parse_return, cast_slot);
+ }
+#else
// For Decimal values, the return type generated by Clang is struct type rather than
// integer so casting is necessary
llvm::Value* cast_slot =
builder.CreateBitCast(slot, parse_return->getType()->getPointerTo());
builder.CreateStore(parse_return, cast_slot);
+#endif
+#ifdef __aarch64__
+ } else if (slot_desc->type().type == TYPE_DATE) {
+ // On aarch64, for Date Values, the return type generated by Clang is i64, not i32,
+ // so truncing is necessary.
+ llvm::Value* temp_slot = builder.CreateTrunc(parse_return, codegen->i32_type());
+ builder.CreateStore(temp_slot, slot);
+#endif
} else if (slot_desc->type().type != TYPE_TIMESTAMP) {
builder.CreateStore(parse_return, slot);
}
diff --git a/be/src/exprs/scalar-fn-call.cc b/be/src/exprs/scalar-fn-call.cc
index b3647ba..0c52bc6 100644
--- a/be/src/exprs/scalar-fn-call.cc
+++ b/be/src/exprs/scalar-fn-call.cc
@@ -349,21 +349,60 @@ Status ScalarFnCall::GetCodegendComputeFnImpl(LlvmCodeGen* codegen, llvm::Functi
llvm::Type* arg_type = CodegenAnyVal::GetUnloweredType(codegen, children_[i]->type());
llvm::Value* arg_val_ptr;
if (i < NumFixedArgs()) {
+#ifndef __aarch64__
// Allocate space to store 'child_fn's result so we can pass the pointer to the UDF.
arg_val_ptr = codegen->CreateEntryBlockAlloca(builder, arg_type, "arg_val_ptr");
udf_args.push_back(arg_val_ptr);
+#else
+ PrimitiveType col_type = children_[i]->type().type;
+ if (col_type != TYPE_BOOLEAN and col_type != TYPE_TINYINT
+ and col_type != TYPE_SMALLINT) {
+ arg_val_ptr = codegen->CreateEntryBlockAlloca(builder, arg_type, "arg_val_ptr");
+ udf_args.push_back(arg_val_ptr);
+ }
+#endif
} else {
// Store the result of 'child_fn' in varargs_buffer[i].
arg_val_ptr =
builder.CreateConstGEP1_32(varargs_buffer, i - NumFixedArgs(), "arg_val_ptr");
}
+#ifndef __aarch64__
DCHECK_EQ(arg_val_ptr->getType(), arg_type->getPointerTo());
// The result of the call must be stored in a lowered AnyVal
llvm::Value* lowered_arg_val_ptr = builder.CreateBitCast(arg_val_ptr,
CodegenAnyVal::GetLoweredPtrType(codegen, children_[i]->type()),
"lowered_arg_val_ptr");
+#else
+ llvm::Value* lowered_arg_val_ptr;
+ if (col_type == TYPE_BOOLEAN or col_type == TYPE_TINYINT
+ or col_type == TYPE_SMALLINT) {
+ lowered_arg_val_ptr = codegen->CreateEntryBlockAlloca(builder,
+ CodegenAnyVal::GetLoweredType(codegen, children_[i]->type()), 1,
+ FunctionContextImpl::VARARGS_BUFFER_ALIGNMENT, "lowered_arg_val_ptr");
+ } else {
+ lowered_arg_val_ptr = builder.CreateBitCast(arg_val_ptr,
+ CodegenAnyVal::GetLoweredPtrType(codegen, children_[i]->type()),
+ "lowered_arg_val_ptr");
+ }
+#endif
CodegenAnyVal::CreateCall(
codegen, &builder, child_fn, child_fn_args, "arg_val", lowered_arg_val_ptr);
+#ifdef __aarch64__
+ if (col_type == TYPE_BOOLEAN or col_type == TYPE_TINYINT
+ or col_type == TYPE_SMALLINT) {
+ if (i < NumFixedArgs()) {
+ arg_val_ptr = builder.CreateTruncOrBitCast(lowered_arg_val_ptr,
+ CodegenAnyVal::GetUnloweredPtrType(codegen, children_[i]->type()),
+ "arg_val_ptr");
+ udf_args.push_back(arg_val_ptr);
+ } else {
+ llvm::Value* tmp_ptr = builder.CreateTruncOrBitCast(lowered_arg_val_ptr,
+ CodegenAnyVal::GetUnloweredPtrType(codegen, children_[i]->type()),
+ "tmp_ptr");
+ builder.CreateStore(builder.CreateLoad(tmp_ptr), arg_val_ptr);
+ }
+ }
+#endif
}
if (vararg_start_idx_ != -1) {