You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/10/25 20:16:19 UTC
[32/33] incubator-impala git commit: IMPALA-3884: Support
TYPE_TIMESTAMP for HashTableCtx::CodegenAssignNullValue()
IMPALA-3884: Support TYPE_TIMESTAMP for HashTableCtx::CodegenAssignNullValue()
This change implements support for TYPE_TIMESTAMP for
HashTableCtx::CodegenAssignNullValue(). TimestampValue itself
is 16 bytes in size. To match RawValue::Write() in the
interpreted path, CodegenAssignNullValue() emits code to assign
HashUtil::FNV_SEED to both the upper and lower 64-bit of the
destination value. This change also fixes the handling of 128-bit
Decimal16Value in CodegenAssignNullValue() so the emitted code
matches the behavior of the interpreted path.
Change-Id: I0211d38cbef46331e0006fa5ed0680e6e0867bc8
Reviewed-on: http://gerrit.cloudera.org:8080/4794
Reviewed-by: Michael Ho <kw...@cloudera.com>
Tested-by: Michael Ho <kw...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/13455b5a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/13455b5a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/13455b5a
Branch: refs/heads/hadoop-next
Commit: 13455b5a24a9d4d009d1dd0d72944c6cacd54829
Parents: aa7741a
Author: Michael Ho <kw...@cloudera.com>
Authored: Fri Oct 21 15:00:56 2016 -0700
Committer: Michael Ho <kw...@cloudera.com>
Committed: Tue Oct 25 05:52:33 2016 +0000
----------------------------------------------------------------------
be/src/codegen/codegen-anyval.cc | 8 ++---
be/src/codegen/llvm-codegen.cc | 10 ++++--
be/src/codegen/llvm-codegen.h | 9 +++--
be/src/exec/hash-table.cc | 36 ++++++++++++--------
be/src/exec/old-hash-table.cc | 32 +++++++++++------
be/src/util/bit-util.h | 4 +++
.../queries/QueryTest/joins.test | 10 ++++++
7 files changed, 72 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/be/src/codegen/codegen-anyval.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/codegen-anyval.cc b/be/src/codegen/codegen-anyval.cc
index a2df356..57c08b5 100644
--- a/be/src/codegen/codegen-anyval.cc
+++ b/be/src/codegen/codegen-anyval.cc
@@ -380,12 +380,8 @@ void CodegenAnyVal::SetVal(int64_t val) {
void CodegenAnyVal::SetVal(int128_t val) {
DCHECK_EQ(type_.type, TYPE_DECIMAL);
- // TODO: is there a better way to do this?
- // Set high bits
- Value* ir_val = ConstantInt::get(codegen_->i128_type(), HighBits(val));
- ir_val = builder_->CreateShl(ir_val, 64, "tmp");
- // Set low bits
- ir_val = builder_->CreateOr(ir_val, LowBits(val), "tmp");
+ vector<uint64_t> vals({LowBits(val), HighBits(val)});
+ Value* ir_val = ConstantInt::get(codegen_->context(), APInt(128, vals));
SetVal(ir_val);
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/be/src/codegen/llvm-codegen.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index d43ad6e..30cc7d0 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -564,7 +564,7 @@ Value* LlvmCodeGen::CastPtrToLlvmPtr(Type* type, const void* ptr) {
return ConstantExpr::getIntToPtr(const_int, type);
}
-Value* LlvmCodeGen::GetIntConstant(PrimitiveType type, int64_t val) {
+Value* LlvmCodeGen::GetIntConstant(PrimitiveType type, uint64_t val) {
switch (type) {
case TYPE_TINYINT:
return ConstantInt::get(context(), APInt(8, val));
@@ -580,8 +580,12 @@ Value* LlvmCodeGen::GetIntConstant(PrimitiveType type, int64_t val) {
}
}
-Value* LlvmCodeGen::GetIntConstant(int num_bytes, int64_t val) {
- return ConstantInt::get(context(), APInt(8 * num_bytes, val));
+Value* LlvmCodeGen::GetIntConstant(int num_bytes, uint64_t low_bits, uint64_t high_bits) {
+ DCHECK_GE(num_bytes, 1);
+ DCHECK_LE(num_bytes, 16);
+ DCHECK(BitUtil::IsPowerOf2(num_bytes));
+ vector<uint64_t> vals({low_bits, high_bits});
+ return ConstantInt::get(context(), APInt(8 * num_bytes, vals));
}
AllocaInst* LlvmCodeGen::CreateEntryBlockAlloca(Function* f, const NamedVariable& var) {
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/be/src/codegen/llvm-codegen.h
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h
index 0b81701..602dc5d 100644
--- a/be/src/codegen/llvm-codegen.h
+++ b/be/src/codegen/llvm-codegen.h
@@ -380,10 +380,13 @@ class LlvmCodeGen {
llvm::Value* CastPtrToLlvmPtr(llvm::Type* type, const void* ptr);
/// Returns the constant 'val' of 'type'.
- llvm::Value* GetIntConstant(PrimitiveType type, int64_t val);
+ llvm::Value* GetIntConstant(PrimitiveType type, uint64_t val);
- /// Returns the constant 'val' of the int type of size 'byte_size'.
- llvm::Value* GetIntConstant(int byte_size, int64_t val);
+ /// Returns a constant int of 'byte_size' bytes based on 'low_bits' and 'high_bits'
+ /// which stand for the lower and upper 64-bits of the constant respectively. For
+ /// values less than or equal to 64-bits, 'high_bits' is not used. This function
+ /// can generate constant up to 128-bit wide. 'byte_size' must be power of 2.
+ llvm::Value* GetIntConstant(int byte_size, uint64_t low_bits, uint64_t high_bits);
/// Returns true/false constants (bool type)
llvm::Value* true_value() { return true_value_; }
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/be/src/exec/hash-table.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc
index c39e9e9..74fc534 100644
--- a/be/src/exec/hash-table.cc
+++ b/be/src/exec/hash-table.cc
@@ -569,41 +569,51 @@ string HashTable::PrintStats() const {
// we'll pick a more random value.
static void CodegenAssignNullValue(LlvmCodeGen* codegen,
LlvmCodeGen::LlvmBuilder* builder, Value* dst, const ColumnType& type) {
- int64_t fvn_seed = HashUtil::FNV_SEED;
+ uint64_t fnv_seed = HashUtil::FNV_SEED;
if (type.type == TYPE_STRING || type.type == TYPE_VARCHAR) {
Value* dst_ptr = builder->CreateStructGEP(NULL, dst, 0, "string_ptr");
Value* dst_len = builder->CreateStructGEP(NULL, dst, 1, "string_len");
- Value* null_len = codegen->GetIntConstant(TYPE_INT, fvn_seed);
+ Value* null_len = codegen->GetIntConstant(TYPE_INT, fnv_seed);
Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->ptr_type());
builder->CreateStore(null_ptr, dst_ptr);
builder->CreateStore(null_len, dst_len);
} else {
Value* null_value = NULL;
- // Get a type specific representation of fvn_seed
+ int byte_size = type.GetByteSize();
+ // Get a type specific representation of fnv_seed
switch (type.type) {
case TYPE_BOOLEAN:
// In results, booleans are stored as 1 byte
dst = builder->CreateBitCast(dst, codegen->ptr_type());
- null_value = codegen->GetIntConstant(TYPE_TINYINT, fvn_seed);
+ null_value = codegen->GetIntConstant(TYPE_TINYINT, fnv_seed);
break;
+ case TYPE_TIMESTAMP: {
+ // Cast 'dst' to 'i128*'
+ DCHECK_EQ(byte_size, 16);
+ PointerType* fnv_seed_ptr_type =
+ codegen->GetPtrType(Type::getIntNTy(codegen->context(), byte_size * 8));
+ dst = builder->CreateBitCast(dst, fnv_seed_ptr_type);
+ null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed);
+ break;
+ }
case TYPE_TINYINT:
case TYPE_SMALLINT:
case TYPE_INT:
case TYPE_BIGINT:
case TYPE_DECIMAL:
- null_value = codegen->GetIntConstant(type.GetByteSize(), fvn_seed);
+ null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed);
break;
case TYPE_FLOAT: {
// Don't care about the value, just the bit pattern
- float fvn_seed_float = *reinterpret_cast<float*>(&fvn_seed);
- null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_float));
+ float fnv_seed_float = *reinterpret_cast<float*>(&fnv_seed);
+ null_value = ConstantFP::get(codegen->context(), APFloat(fnv_seed_float));
break;
}
case TYPE_DOUBLE: {
// Don't care about the value, just the bit pattern
- double fvn_seed_double = *reinterpret_cast<double*>(&fvn_seed);
- null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_double));
+ double fnv_seed_double = *reinterpret_cast<double*>(&fnv_seed);
+ null_value = ConstantFP::get(codegen->context(), APFloat(fnv_seed_double));
break;
}
default:
@@ -685,13 +695,11 @@ static void CodegenAssignNullValue(LlvmCodeGen* codegen,
// becomes the start of the next block for codegen (either the next expr or just the
// end of the function).
Status HashTableCtx::CodegenEvalRow(LlvmCodeGen* codegen, bool build, Function** fn) {
- // TODO: CodegenAssignNullValue() can't handle TYPE_TIMESTAMP or TYPE_DECIMAL yet
const vector<ExprContext*>& ctxs = build ? build_expr_ctxs_ : probe_expr_ctxs_;
for (int i = 0; i < ctxs.size(); ++i) {
- PrimitiveType type = ctxs[i]->root()->type().type;
- if (type == TYPE_TIMESTAMP || type == TYPE_CHAR) {
- return Status(Substitute("HashTableCtx::CodegenEvalRow(): type $0 NYI",
- TypeToString(type)));
+ // Disable codegen for CHAR
+ if (ctxs[i]->root()->type().type == TYPE_CHAR) {
+ return Status("HashTableCtx::CodegenEvalRow(): CHAR NYI");
}
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/be/src/exec/old-hash-table.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/old-hash-table.cc b/be/src/exec/old-hash-table.cc
index db89782..f51bc74 100644
--- a/be/src/exec/old-hash-table.cc
+++ b/be/src/exec/old-hash-table.cc
@@ -184,41 +184,52 @@ int OldHashTable::AddBloomFilters() {
// we'll pick a more random value.
static void CodegenAssignNullValue(LlvmCodeGen* codegen,
LlvmCodeGen::LlvmBuilder* builder, Value* dst, const ColumnType& type) {
- int64_t fvn_seed = HashUtil::FNV_SEED;
+ uint64_t fnv_seed = HashUtil::FNV_SEED;
if (type.type == TYPE_STRING || type.type == TYPE_VARCHAR) {
Value* dst_ptr = builder->CreateStructGEP(NULL, dst, 0, "string_ptr");
Value* dst_len = builder->CreateStructGEP(NULL, dst, 1, "string_len");
- Value* null_len = codegen->GetIntConstant(TYPE_INT, fvn_seed);
+ Value* null_len = codegen->GetIntConstant(TYPE_INT, fnv_seed);
Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->ptr_type());
builder->CreateStore(null_ptr, dst_ptr);
builder->CreateStore(null_len, dst_len);
return;
} else {
Value* null_value = NULL;
- // Get a type specific representation of fvn_seed
+ int byte_size = type.GetByteSize();
+ // Get a type specific representation of fnv_seed
switch (type.type) {
case TYPE_BOOLEAN:
// In results, booleans are stored as 1 byte
dst = builder->CreateBitCast(dst, codegen->ptr_type());
- null_value = codegen->GetIntConstant(TYPE_TINYINT, fvn_seed);
+ null_value = codegen->GetIntConstant(TYPE_TINYINT, fnv_seed);
break;
+ case TYPE_TIMESTAMP: {
+ // Cast 'dst' to 'i128*'
+ DCHECK_EQ(byte_size, 16);
+ PointerType* fnv_seed_ptr_type =
+ codegen->GetPtrType(Type::getIntNTy(codegen->context(), byte_size * 8));
+ dst = builder->CreateBitCast(dst, fnv_seed_ptr_type);
+ null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed);
+ break;
+ }
case TYPE_TINYINT:
case TYPE_SMALLINT:
case TYPE_INT:
case TYPE_BIGINT:
- null_value = codegen->GetIntConstant(type.type, fvn_seed);
+ case TYPE_DECIMAL:
+ null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed);
break;
case TYPE_FLOAT: {
// Don't care about the value, just the bit pattern
- float fvn_seed_float = *reinterpret_cast<float*>(&fvn_seed);
- null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_float));
+ float fnv_seed_float = *reinterpret_cast<float*>(&fnv_seed);
+ null_value = ConstantFP::get(codegen->context(), APFloat(fnv_seed_float));
break;
}
case TYPE_DOUBLE: {
// Don't care about the value, just the bit pattern
- double fvn_seed_double = *reinterpret_cast<double*>(&fvn_seed);
- null_value = ConstantFP::get(codegen->context(), APFloat(fvn_seed_double));
+ double fnv_seed_double = *reinterpret_cast<double*>(&fnv_seed);
+ null_value = ConstantFP::get(codegen->context(), APFloat(fnv_seed_double));
break;
}
default:
@@ -256,11 +267,10 @@ static void CodegenAssignNullValue(LlvmCodeGen* codegen,
// becomes the start of the next block for codegen (either the next expr or just the
// end of the function).
Function* OldHashTable::CodegenEvalTupleRow(LlvmCodeGen* codegen, bool build) {
- // TODO: CodegenAssignNullValue() can't handle TYPE_TIMESTAMP or TYPE_DECIMAL yet
const vector<ExprContext*>& ctxs = build ? build_expr_ctxs_ : probe_expr_ctxs_;
for (int i = 0; i < ctxs.size(); ++i) {
PrimitiveType type = ctxs[i]->root()->type().type;
- if (type == TYPE_TIMESTAMP || type == TYPE_DECIMAL || type == TYPE_CHAR) return NULL;
+ if (type == TYPE_CHAR) return NULL;
}
// Get types to generate function prototype
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/be/src/util/bit-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h
index 33dd02b..9a5c4a4 100644
--- a/be/src/util/bit-util.h
+++ b/be/src/util/bit-util.h
@@ -82,6 +82,10 @@ class BitUtil {
return value & ~(factor - 1);
}
+ constexpr static inline bool IsPowerOf2(int64_t value) {
+ return (value & (value - 1)) == 0;
+ }
+
/// Specialized round up and down functions for frequently used factors,
/// like 8 (bits->bytes), 32 (bits->i32), and 64 (bits->i64).
/// Returns the rounded up number of bytes that fit the number of bits.
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/13455b5a/testdata/workloads/functional-query/queries/QueryTest/joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/joins.test b/testdata/workloads/functional-query/queries/QueryTest/joins.test
index ebb6287..db915df 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/joins.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/joins.test
@@ -720,3 +720,13 @@ on extract(minute from t1.timestamp_col) = extract(hour from t2.timestamp_col);
---- TYPES
BIGINT
====
+---- QUERY
+# Regression for IMPALA-3884. Exercise HashTableCtx::AssignNullValue() for
+# 128-bit TimestampValue.
+select count(*) from functional.alltypes t1 right outer join functional.decimal_tbl t2 on
+t1.timestamp_col = cast(t2.d4 as TIMESTAMP);
+---- RESULTS
+5
+---- TYPES
+BIGINT
+====