You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2020/08/07 19:15:46 UTC

[impala] branch master updated: IMPALA-9645 Port LLVM codegen to adapt aarch64

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new fab251e  IMPALA-9645 Port LLVM codegen to adapt aarch64
fab251e is described below

commit fab251efe3de449d22439dd17798cd414168748c
Author: zhaorenhai <zh...@hotmail.com>
AuthorDate: Sun Apr 12 12:05:52 2020 +0000

    IMPALA-9645 Port LLVM codegen to adapt aarch64
    
    On aarch64, the Lowered type  of  struct {bool, int128} is form
    { {i8}, {i128} }. No padding add. This is different with x86-64,
    which is { {i8}, {15*i8}, {i128} } with padding add automatically.
    
    And here also add some type conversion between x86 and aarch64 data types.
    
    And also add some aarch64 cpu's feature.
    
    Change-Id: I3f30ee84ea9bf5245da88154632bb69079103d11
    Reviewed-on: http://gerrit.cloudera.org:8080/15718
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
---
 be/src/codegen/codegen-anyval.cc | 121 +++++++++++++++++++++++++++++++++++----
 be/src/codegen/llvm-codegen.cc   |   7 +++
 be/src/exec/text-converter.cc    |  19 ++++++
 be/src/exprs/scalar-fn-call.cc   |  39 +++++++++++++
 4 files changed, 175 insertions(+), 11 deletions(-)

diff --git a/be/src/codegen/codegen-anyval.cc b/be/src/codegen/codegen-anyval.cc
index 66d79e7..1346f95 100644
--- a/be/src/codegen/codegen-anyval.cc
+++ b/be/src/codegen/codegen-anyval.cc
@@ -41,28 +41,56 @@ const char* CodegenAnyVal::LLVM_COLLECTIONVAL_NAME = "struct.impala_udf::Collect
 llvm::Type* CodegenAnyVal::GetLoweredType(LlvmCodeGen* cg, const ColumnType& type) {
   switch (type.type) {
     case TYPE_BOOLEAN: // i16
+#ifndef __aarch64__
       return cg->i16_type();
+#else
+      return cg->i64_type();
+#endif
     case TYPE_TINYINT: // i16
+#ifndef __aarch64__
       return cg->i16_type();
+#else
+      return cg->i64_type();
+#endif
     case TYPE_SMALLINT: // i32
+#ifndef __aarch64__
       return cg->i32_type();
+#else
+      return cg->i64_type();
+#endif
     case TYPE_INT: // i64
       return cg->i64_type();
     case TYPE_BIGINT: // { i8, i64 }
+#ifndef __aarch64__
       return llvm::StructType::get(cg->i8_type(), cg->i64_type());
+#else
+      return llvm::ArrayType::get(cg->i64_type(), 2);
+#endif
     case TYPE_FLOAT: // i64
       return cg->i64_type();
     case TYPE_DOUBLE: // { i8, double }
+#ifndef __aarch64__
       return llvm::StructType::get(cg->i8_type(), cg->double_type());
+#else
+      return llvm::ArrayType::get(cg->i64_type(), 2);
+#endif
     case TYPE_STRING: // { i64, i8* }
     case TYPE_VARCHAR: // { i64, i8* }
     case TYPE_CHAR: // Uses StringVal, so same as STRING/VARCHAR.
     case TYPE_FIXED_UDA_INTERMEDIATE: // { i64, i8* }
     case TYPE_ARRAY: // CollectionVal has same memory layout as StringVal.
     case TYPE_MAP: // CollectionVal has same memory layout as StringVal.
+#ifndef __aarch64__
       return llvm::StructType::get(cg->i64_type(), cg->ptr_type());
+#else
+      return llvm::ArrayType::get(cg->i64_type(), 2);
+#endif
     case TYPE_TIMESTAMP: // { i64, i64 }
+#ifndef __aarch64__
       return llvm::StructType::get(cg->i64_type(), cg->i64_type());
+#else
+      return llvm::ArrayType::get(cg->i64_type(), 2);
+#endif
     case TYPE_DECIMAL: // %"struct.impala_udf::DecimalVal" (isn't lowered)
                        // = { {i8}, [15 x i8], {i128} }
       return cg->GetNamedType(LLVM_DECIMALVAL_NAME);
@@ -198,9 +226,14 @@ llvm::Value* CodegenAnyVal::GetIsNull(const char* name) const {
     case TYPE_BIGINT:
     case TYPE_DOUBLE: {
       // Lowered type is of form { i8, * }. Get the i8 value.
-      llvm::Value* is_null_i8 = builder_->CreateExtractValue(value_, 0);
-      DCHECK(is_null_i8->getType() == codegen_->i8_type());
-      return builder_->CreateTrunc(is_null_i8, codegen_->bool_type(), name);
+      // On aarch64, Lowered type is of form { i64, * }
+      llvm::Value* is_null = builder_->CreateExtractValue(value_, 0);
+#ifndef __aarch64__
+      DCHECK(is_null->getType() == codegen_->i8_type());
+#else
+      DCHECK(is_null->getType() == codegen_->i64_type());
+#endif
+      return builder_->CreateTrunc(is_null, codegen_->bool_type(), name);
     }
     case TYPE_DECIMAL: {
       // Lowered type is of the form { {i8}, ... }
@@ -240,8 +273,14 @@ void CodegenAnyVal::SetIsNull(llvm::Value* is_null) {
     case TYPE_BIGINT:
     case TYPE_DOUBLE: {
       // Lowered type is of form { i8, * }. Set the i8 value to 'is_null'.
+      // On aarch64, lowered type is of form { i64, * }
+#ifndef __aarch64__
       llvm::Value* is_null_ext =
           builder_->CreateZExt(is_null, codegen_->i8_type(), "is_null_ext");
+#else
+      llvm::Value* is_null_ext =
+          builder_->CreateZExt(is_null, codegen_->i64_type(), "is_null_ext");
+#endif
       value_ = builder_->CreateInsertValue(value_, is_null_ext, 0, name_);
       break;
     }
@@ -322,14 +361,25 @@ llvm::Value* CodegenAnyVal::GetVal(const char* name) {
       return builder_->CreateBitCast(val, codegen_->float_type());
     }
     case TYPE_BIGINT:
-    case TYPE_DOUBLE:
-      // Lowered type is of form { i8, * }. Get the second value.
       return builder_->CreateExtractValue(value_, 1, name);
+    case TYPE_DOUBLE: {
+      // Lowered type is of form { i8, * }. Get the second value.
+      llvm::Value* val = builder_->CreateExtractValue(value_, 1, name);
+#ifdef __aarch64__
+      val = builder_->CreateBitCast(val, codegen_->double_type());
+#endif
+      return val;
+    }
     case TYPE_DECIMAL: {
-      // Lowered type is of form { {i8}, [15 x i8], {i128} }. Get the i128 value and
-      // truncate it to the correct size. (The {i128} corresponds to the union of the
-      // different width int types.)
+#ifdef __aarch64__
+      // On aarch64, the Lowered type is of form { {i8}, {i128} }. No padding add.
+      uint32_t idxs[] = {1, 0};
+#else
+      // On x86-64, Lowered type is of form { {i8}, [15 x i8], {i128} }.
       uint32_t idxs[] = {2, 0};
+#endif
+      // Get the i128 value and truncate it to the correct size.
+      // (The {i128} corresponds to the union of the different width int types.)
       llvm::Value* val = builder_->CreateExtractValue(value_, idxs, name);
       return builder_->CreateTrunc(val,
           codegen_->GetSlotType(type_), name);
@@ -366,16 +416,27 @@ void CodegenAnyVal::SetVal(llvm::Value* val) {
       value_ = SetHighBits(32, val, value_, name_);
       break;
     case TYPE_BIGINT:
+      value_ = builder_->CreateInsertValue(value_, val, 1, name_);
+      break;
     case TYPE_DOUBLE:
+#ifdef __aarch64__
+      val = builder_->CreateBitCast(val, codegen_->i64_type());
+#endif
       // Lowered type is of form { i8, * }. Set the second value to 'val'.
       value_ = builder_->CreateInsertValue(value_, val, 1, name_);
       break;
     case TYPE_DECIMAL: {
-      // Lowered type is of the form { {i8}, [15 x i8], {i128} }. Set the i128 value to
-      // 'val'. (The {i128} corresponds to the union of the different width int types.)
+      //  Set the i128 value to 'val'.
+      //  (The {i128} corresponds to the union of the different width int types.)
       DCHECK_EQ(val->getType()->getIntegerBitWidth(), type_.GetByteSize() * 8);
       val = builder_->CreateSExt(val, llvm::Type::getIntNTy(codegen_->context(), 128));
+#ifdef __aarch64__
+      // On aarch64, the Lowered type is of form { {i8}, {i128} }. No padding add.
+      uint32_t idxs[] = {1, 0};
+#else
+      // On X86-64, the Lowered type is of the form { {i8}, [15 x i8], {i128} }
       uint32_t idxs[] = {2, 0};
+#endif
       value_ = builder_->CreateInsertValue(value_, val, idxs, name_);
       break;
     }
@@ -430,7 +491,11 @@ void CodegenAnyVal::SetVal(double val) {
 llvm::Value* CodegenAnyVal::GetPtr() {
   // Set the second pointer value to 'ptr'.
   DCHECK(type_.IsStringType() || type_.IsCollectionType());
-  return builder_->CreateExtractValue(value_, 1, name_);
+  llvm::Value* val = builder_->CreateExtractValue(value_, 1, name_);
+#ifdef __aarch64__
+  val = builder_->CreateIntToPtr(val, codegen_->ptr_type());
+#endif
+  return val;
 }
 
 llvm::Value* CodegenAnyVal::GetLen() {
@@ -444,6 +509,9 @@ void CodegenAnyVal::SetPtr(llvm::Value* ptr) {
   // Set the second pointer value to 'ptr'.
   DCHECK(type_.IsStringType() || type_.type == TYPE_FIXED_UDA_INTERMEDIATE
       || type_.IsCollectionType());
+#ifdef __aarch64__
+  ptr = builder_->CreatePtrToInt(ptr, codegen_->i64_type());
+#endif
   value_ = builder_->CreateInsertValue(value_, ptr, 1, name_);
 }
 
@@ -835,7 +903,9 @@ void CodegenAnyVal::CodegenBranchIfNull(
 }
 
 llvm::Value* CodegenAnyVal::GetHighBits(int num_bits, llvm::Value* v, const char* name) {
+#ifndef __aarch64__
   DCHECK_EQ(v->getType()->getIntegerBitWidth(), num_bits * 2);
+#endif
   llvm::Value* shifted = builder_->CreateAShr(v, num_bits);
   return builder_->CreateTrunc(
       shifted, llvm::IntegerType::get(codegen_->context(), num_bits));
@@ -849,9 +919,14 @@ llvm::Value* CodegenAnyVal::GetHighBits(int num_bits, llvm::Value* v, const char
 llvm::Value* CodegenAnyVal::SetHighBits(
     int num_bits, llvm::Value* src, llvm::Value* dst, const char* name) {
   DCHECK_LE(src->getType()->getIntegerBitWidth(), num_bits);
+#ifndef __aarch64__
   DCHECK_EQ(dst->getType()->getIntegerBitWidth(), num_bits * 2);
   llvm::Value* extended_src = builder_->CreateZExt(
       src, llvm::IntegerType::get(codegen_->context(), num_bits * 2));
+#else
+  llvm::Value* extended_src = builder_->CreateZExt(src,
+        llvm::IntegerType::get(codegen_->context(), 64));
+#endif
   llvm::Value* shifted_src = builder_->CreateShl(extended_src, num_bits);
   llvm::Value* masked_dst = builder_->CreateAnd(dst, (1LL << num_bits) - 1);
   return builder_->CreateOr(masked_dst, shifted_src, name);
@@ -879,6 +954,18 @@ llvm::Value* CodegenAnyVal::GetNullVal(LlvmCodeGen* codegen, llvm::Type* val_typ
       return llvm::ConstantStruct::get(struct_type, null_anyval,
           llvm::Constant::getNullValue(type2), llvm::Constant::getNullValue(type3));
     }
+#ifdef __aarch64__
+    else if (struct_type->getElementType(0)->isStructTy()) {
+      llvm::StructType* anyval_struct_type =
+          llvm::cast<llvm::StructType>(struct_type->getElementType(0));
+      llvm::Type* is_null_type = anyval_struct_type->getElementType(0);
+      llvm::Constant* null_anyval = llvm::ConstantStruct::get(
+          anyval_struct_type, llvm::ConstantInt::get(is_null_type, 1));
+      llvm::Type* type1 = struct_type->getElementType(1);
+      return llvm::ConstantStruct::get(struct_type, null_anyval,
+          llvm::Constant::getNullValue(type1));
+    }
+#endif
     // Return the struct { 1, 0 } (the 'is_null' byte, i.e. the first value's first byte,
     // is set to 1, the other bytes don't matter)
     DCHECK_EQ(struct_type->getNumElements(), 2);
@@ -888,6 +975,18 @@ llvm::Value* CodegenAnyVal::GetNullVal(LlvmCodeGen* codegen, llvm::Type* val_typ
     return llvm::ConstantStruct::get(struct_type, llvm::ConstantInt::get(type1, 1),
         llvm::Constant::getNullValue(type2));
   }
+#ifdef __aarch64__
+  if (val_type->isArrayTy()) {
+    llvm::ArrayType* array_type = llvm::cast<llvm::ArrayType>(val_type);
+    DCHECK_EQ(array_type->getNumElements(), 2);
+    llvm::Type* type1 = array_type->getElementType();
+    DCHECK(type1->isIntegerTy()) << LlvmCodeGen::Print(type1);
+    std::vector<llvm::Constant *> arrayElts;
+    arrayElts.push_back(llvm::ConstantInt::get(type1, 1));
+    arrayElts.push_back(llvm::Constant::getNullValue(type1));
+    return llvm::ConstantArray::get(array_type, arrayElts);
+  }
+#endif
   // Return the int 1 ('is_null' byte is 1, other bytes don't matter)
   DCHECK(val_type->isIntegerTy());
   return llvm::ConstantInt::get(val_type, 1);
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index f942cb9..1834f8e 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -111,6 +111,12 @@ DECLARE_string(local_library_dir);
 // avx512ifma,avx512pf,avx512vbmi,avx512vl,clflushopt,clwb,fma4,mwaitx.1.2,pcommit,pku,
 // prefetchwt1,sgx,sha,sse4a,tbm,xop,xsavec,xsaves. If new attrs are added to LLVM,
 // they will be disabled until added to this whitelist.
+#ifdef __aarch64__
+DEFINE_string_hidden(llvm_cpu_attr_whitelist, "crc,neon,fp-armv8,crypto",
+    "(Experimental) a comma-separated list of LLVM CPU attribute flags that are enabled "
+    "for runtime code generation. This flag is provided to enable additional LLVM CPU "
+    "attribute flags for testing.");
+#else
 DEFINE_string_hidden(llvm_cpu_attr_whitelist, "adx,aes,avx,avx2,bmi,bmi2,cmov,cx16,f16c,"
     "fma,fsgsbase,hle,invpcid,lzcnt,mmx,movbe,pclmul,popcnt,prfchw,rdrnd,rdseed,rtm,smap,"
     "sse,sse2,sse3,sse4.1,sse4.2,ssse3,xsave,xsaveopt",
@@ -118,6 +124,7 @@ DEFINE_string_hidden(llvm_cpu_attr_whitelist, "adx,aes,avx,avx2,bmi,bmi2,cmov,cx
     "for runtime code generation. The default flags are a known-good set that are "
     "routinely tested. This flag is provided to enable additional LLVM CPU attribute "
     "flags for testing.");
+#endif
 
 namespace impala {
 
diff --git a/be/src/exec/text-converter.cc b/be/src/exec/text-converter.cc
index 16d01f2..cb69bcc 100644
--- a/be/src/exec/text-converter.cc
+++ b/be/src/exec/text-converter.cc
@@ -302,11 +302,30 @@ Status TextConverter::CodegenWriteSlot(LlvmCodeGen* codegen,
     builder.SetInsertPoint(parse_success_block);
     // If the parsed value is in parse_return, move it into slot
     if (slot_desc->type().type == TYPE_DECIMAL) {
+#ifdef __aarch64__
+      // On aarch64, the 4 bytes decimal still return i64 type, so here truncing is need
+      if (slot_desc->slot_size() == 4) {
+        llvm::Value* temp_slot = builder.CreateTrunc(parse_return, codegen->i32_type());
+        builder.CreateStore(temp_slot, slot);
+      } else {
+        llvm::Value* cast_slot =
+          builder.CreateBitCast(slot, parse_return->getType()->getPointerTo());
+        builder.CreateStore(parse_return, cast_slot);
+      }
+#else
       // For Decimal values, the return type generated by Clang is struct type rather than
       // integer so casting is necessary
       llvm::Value* cast_slot =
           builder.CreateBitCast(slot, parse_return->getType()->getPointerTo());
       builder.CreateStore(parse_return, cast_slot);
+#endif
+#ifdef __aarch64__
+    } else if (slot_desc->type().type == TYPE_DATE) {
+      // On aarch64, for Date Values, the return type generated by Clang is i64, not i32,
+      // so truncing is necessary.
+      llvm::Value* temp_slot = builder.CreateTrunc(parse_return, codegen->i32_type());
+      builder.CreateStore(temp_slot, slot);
+#endif
     } else if (slot_desc->type().type != TYPE_TIMESTAMP) {
       builder.CreateStore(parse_return, slot);
     }
diff --git a/be/src/exprs/scalar-fn-call.cc b/be/src/exprs/scalar-fn-call.cc
index b3647ba..0c52bc6 100644
--- a/be/src/exprs/scalar-fn-call.cc
+++ b/be/src/exprs/scalar-fn-call.cc
@@ -349,21 +349,60 @@ Status ScalarFnCall::GetCodegendComputeFnImpl(LlvmCodeGen* codegen, llvm::Functi
     llvm::Type* arg_type = CodegenAnyVal::GetUnloweredType(codegen, children_[i]->type());
     llvm::Value* arg_val_ptr;
     if (i < NumFixedArgs()) {
+#ifndef __aarch64__
       // Allocate space to store 'child_fn's result so we can pass the pointer to the UDF.
       arg_val_ptr = codegen->CreateEntryBlockAlloca(builder, arg_type, "arg_val_ptr");
       udf_args.push_back(arg_val_ptr);
+#else
+      PrimitiveType col_type = children_[i]->type().type;
+      if (col_type != TYPE_BOOLEAN and col_type != TYPE_TINYINT
+          and col_type != TYPE_SMALLINT) {
+        arg_val_ptr = codegen->CreateEntryBlockAlloca(builder, arg_type, "arg_val_ptr");
+        udf_args.push_back(arg_val_ptr);
+      }
+#endif
     } else {
       // Store the result of 'child_fn' in varargs_buffer[i].
       arg_val_ptr =
           builder.CreateConstGEP1_32(varargs_buffer, i - NumFixedArgs(), "arg_val_ptr");
     }
+#ifndef __aarch64__
     DCHECK_EQ(arg_val_ptr->getType(), arg_type->getPointerTo());
     // The result of the call must be stored in a lowered AnyVal
     llvm::Value* lowered_arg_val_ptr = builder.CreateBitCast(arg_val_ptr,
         CodegenAnyVal::GetLoweredPtrType(codegen, children_[i]->type()),
         "lowered_arg_val_ptr");
+#else
+    llvm::Value* lowered_arg_val_ptr;
+    if (col_type == TYPE_BOOLEAN or col_type == TYPE_TINYINT
+        or col_type == TYPE_SMALLINT) {
+      lowered_arg_val_ptr = codegen->CreateEntryBlockAlloca(builder,
+          CodegenAnyVal::GetLoweredType(codegen, children_[i]->type()), 1,
+          FunctionContextImpl::VARARGS_BUFFER_ALIGNMENT, "lowered_arg_val_ptr");
+    } else {
+      lowered_arg_val_ptr = builder.CreateBitCast(arg_val_ptr,
+          CodegenAnyVal::GetLoweredPtrType(codegen, children_[i]->type()),
+          "lowered_arg_val_ptr");
+    }
+#endif
     CodegenAnyVal::CreateCall(
         codegen, &builder, child_fn, child_fn_args, "arg_val", lowered_arg_val_ptr);
+#ifdef __aarch64__
+    if (col_type == TYPE_BOOLEAN or col_type == TYPE_TINYINT
+        or col_type == TYPE_SMALLINT) {
+      if (i < NumFixedArgs()) {
+        arg_val_ptr = builder.CreateTruncOrBitCast(lowered_arg_val_ptr,
+            CodegenAnyVal::GetUnloweredPtrType(codegen, children_[i]->type()),
+            "arg_val_ptr");
+        udf_args.push_back(arg_val_ptr);
+      } else {
+        llvm::Value* tmp_ptr = builder.CreateTruncOrBitCast(lowered_arg_val_ptr,
+            CodegenAnyVal::GetUnloweredPtrType(codegen, children_[i]->type()),
+            "tmp_ptr");
+        builder.CreateStore(builder.CreateLoad(tmp_ptr), arg_val_ptr);
+      }
+    }
+#endif
   }
 
   if (vararg_start_idx_ != -1) {