You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2018/11/19 10:45:12 UTC
[05/33] impala git commit: IMPALA-6661 Make NaN values equal for
grouping purposes.
IMPALA-6661 Make NaN values equal for grouping purposes.
Similar to the treatment of NULLs, we want to consider NaN values
as equal when grouping.
- When detecting a NaN in a set of row values, the NaN value must
be converted to a canonical value - so that all NaN values have
the same bit-pattern for hashing purposes.
- When doing equality evaluation, floating point types must have
additional logic to consider NaN values as equal.
- Existing logic for handling NULLs in this way is appropriate for
triggering this behavior for NaN values.
- Relabel "force null equality" as "inclusive equality" to expand
the scope of the concept to a more generic form that includes NaN.
Change-Id: I996c4a2e1934fd887046ed0c55457b7285375086
Reviewed-on: http://gerrit.cloudera.org:8080/11535
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Michael Ho <kw...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/6684fe19
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/6684fe19
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/6684fe19
Branch: refs/heads/branch-3.1.0
Commit: 6684fe1900d1c5655f449cf551aa798c1cad3c17
Parents: cb2574b
Author: Michal Ostrowski <mo...@cloudera.com>
Authored: Mon Sep 24 18:58:23 2018 -0700
Committer: Zoltan Borok-Nagy <bo...@cloudera.com>
Committed: Tue Nov 13 12:50:23 2018 +0100
----------------------------------------------------------------------
be/src/codegen/codegen-anyval.cc | 42 ++++++++++++++++++--
be/src/codegen/codegen-anyval.h | 21 +++++++++-
be/src/exec/hash-table.cc | 35 +++++++++++-----
be/src/exec/hash-table.h | 35 ++++++++--------
be/src/exec/hash-table.inline.h | 4 +-
be/src/runtime/raw-value.cc | 2 +
be/src/runtime/raw-value.h | 13 ++++++
be/src/runtime/raw-value.inline.h | 25 ++++++++++++
.../queries/QueryTest/aggregation.test | 23 +++++++++++
.../queries/QueryTest/exprs.test | 10 +++++
.../queries/QueryTest/joins.test | 27 +++++++++++++
11 files changed, 202 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/codegen/codegen-anyval.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/codegen-anyval.cc b/be/src/codegen/codegen-anyval.cc
index 3e2eb1e..a44eccc 100644
--- a/be/src/codegen/codegen-anyval.cc
+++ b/be/src/codegen/codegen-anyval.cc
@@ -18,7 +18,7 @@
#include "codegen/codegen-anyval.h"
#include "codegen/codegen-util.h"
-
+#include "runtime/raw-value.h"
#include "common/names.h"
using namespace impala;
@@ -466,6 +466,29 @@ void CodegenAnyVal::SetDate(llvm::Value* date) {
value_ = builder_->CreateInsertValue(value_, v, 0, name_);
}
+void CodegenAnyVal::ConvertToCanonicalForm() {
+ // Convert the value to a bit pattern that is unambiguous.
+ // Specifically, for floating point type values, NaN values are converted to
+ // the same bit pattern.
+ switch(type_.type) {
+ case TYPE_FLOAT:
+ case TYPE_DOUBLE: {
+ llvm::Value* raw = GetVal();
+ llvm::Value* canonical_val;
+ if (type_.type == TYPE_FLOAT) {
+ canonical_val = llvm::ConstantFP::getNaN(codegen_->float_type());
+ } else {
+ canonical_val = llvm::ConstantFP::getNaN(codegen_->double_type());
+ }
+ llvm::Value* is_nan = builder_->CreateFCmpUNO(raw, raw, "cmp_nan");
+ SetVal(builder_->CreateSelect(is_nan, canonical_val, raw));
+ break;
+ }
+ default:
+ ;
+ }
+}
+
llvm::Value* CodegenAnyVal::GetLoweredPtr(const string& name) const {
llvm::Value* lowered_ptr =
codegen_->CreateEntryBlockAlloca(*builder_, value_->getType(), name.c_str());
@@ -702,7 +725,8 @@ llvm::Value* CodegenAnyVal::Eq(CodegenAnyVal* other) {
}
}
-llvm::Value* CodegenAnyVal::EqToNativePtr(llvm::Value* native_ptr) {
+llvm::Value* CodegenAnyVal::EqToNativePtr(llvm::Value* native_ptr,
+ bool inclusive_equality) {
llvm::Value* val = NULL;
if (!type_.IsStringType()) {
val = builder_->CreateLoad(native_ptr);
@@ -718,9 +742,19 @@ llvm::Value* CodegenAnyVal::EqToNativePtr(llvm::Value* native_ptr) {
case TYPE_DECIMAL:
return builder_->CreateICmpEQ(GetVal(), val, "cmp_raw");
case TYPE_FLOAT:
- case TYPE_DOUBLE:
+ case TYPE_DOUBLE:{
// Use the ordering version "OEQ" to ensure that 'nan' != 'nan'.
- return builder_->CreateFCmpOEQ(GetVal(), val, "cmp_raw");
+ llvm::Value* local_val = GetVal();
+ llvm::Value* cmp_raw = builder_->CreateFCmpOEQ(local_val, val, "cmp_raw");
+ if (!inclusive_equality) return cmp_raw;
+
+ // Mirror logic in HashTableCtx::Equals - IMPALA-6661
+ llvm::Value* local_is_nan = builder_->CreateFCmpUNO(local_val,
+ local_val, "local_val_is_nan");
+ llvm::Value* val_is_nan = builder_->CreateFCmpUNO(val, val, "val_is_nan");
+ llvm::Value* both_nan = builder_->CreateAnd(local_is_nan, val_is_nan);
+ return builder_->CreateOr(cmp_raw, both_nan, "cmp_raw_with_nan");
+ }
case TYPE_STRING:
case TYPE_VARCHAR:
case TYPE_FIXED_UDA_INTERMEDIATE: {
http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/codegen/codegen-anyval.h
----------------------------------------------------------------------
diff --git a/be/src/codegen/codegen-anyval.h b/be/src/codegen/codegen-anyval.h
index 2bc800e..bf98fd9 100644
--- a/be/src/codegen/codegen-anyval.h
+++ b/be/src/codegen/codegen-anyval.h
@@ -226,14 +226,31 @@ class CodegenAnyVal {
void WriteToSlot(const SlotDescriptor& slot_desc, llvm::Value* tuple,
llvm::Value* pool_val, llvm::BasicBlock* insert_before = nullptr);
+ /// Rewrites the bit values of a value in a canonical form.
+ /// Floating point values may be "NaN". Nominally, NaN != NaN, but
+ /// for grouping purposes we want that to not be the case.
+ /// Therefore all NaN values need to be converted into a consistent
+ /// form where all bits are the same. This method will do that -
+ /// ensure that all NaN values have the same bit pattern.
+ ///
+ /// Generically speaking, a canonical form of a value ensures that
+ /// all ambiguity is removed from a value's bit settings -- if there
+ /// are bits that can be freely changed without changing the logical
+ /// value of the value. (Currently this only has an impact for NaN
+ /// float and double values.)
+ void ConvertToCanonicalForm();
+
/// Returns the i1 result of this == other. this and other must be non-null.
llvm::Value* Eq(CodegenAnyVal* other);
/// Compares this *Val to the value of 'native_ptr'. 'native_ptr' should be a pointer to
/// a native type, e.g. StringValue, or TimestampValue. This *Val should match
/// 'native_ptr's type (e.g. if this is an IntVal, 'native_ptr' should have type i32*).
- /// Returns the i1 result of the equality comparison.
- llvm::Value* EqToNativePtr(llvm::Value* native_ptr);
+ /// Returns the i1 result of the equality comparison. "inclusive_equality" means that
+ /// the scope of equality will be expanded to include considering as equal scenarios
+ /// that would otherwise resolve to not-equal, such as a comparison of floating-point
+ /// "NaN" values.
+ llvm::Value* EqToNativePtr(llvm::Value* native_ptr, bool inclusive_equality = false);
/// Returns the i32 result of comparing this value to 'other' (similar to
/// RawValue::Compare()). This and 'other' must be non-null. Return value is < 0 if
http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/exec/hash-table.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc
index e315fd1..576deca 100644
--- a/be/src/exec/hash-table.cc
+++ b/be/src/exec/hash-table.cc
@@ -185,7 +185,7 @@ bool HashTableCtx::EvalRow(const TupleRow* row,
bool has_null = false;
for (int i = 0; i < evals.size(); ++i) {
void* loc = expr_values_cache_.ExprValuePtr(expr_values, i);
- void* val = evals[i]->GetValue(row);
+ const void* val = evals[i]->GetValue(row);
if (val == NULL) {
// If the table doesn't store nulls, no reason to keep evaluating
if (!stores_nulls_) return true;
@@ -195,8 +195,12 @@ bool HashTableCtx::EvalRow(const TupleRow* row,
} else {
expr_values_null[i] = false;
}
- DCHECK_LE(build_exprs_[i]->type().GetSlotSize(), sizeof(NULL_VALUE));
- RawValue::Write(val, loc, build_exprs_[i]->type(), NULL);
+ const ColumnType& expr_type = build_exprs_[i]->type();
+ DCHECK_LE(expr_type.GetSlotSize(), sizeof(NULL_VALUE));
+ if (RawValue::IsNaN(val, expr_type)) {
+ val = RawValue::CanonicalNaNValue(expr_type);
+ }
+ RawValue::Write(val, loc, expr_type, NULL);
}
return has_null;
}
@@ -230,13 +234,13 @@ uint32_t HashTableCtx::HashVariableLenRow(const uint8_t* expr_values,
return hash;
}
-template <bool FORCE_NULL_EQUALITY>
+template <bool INCLUSIVE_EQUALITY>
bool HashTableCtx::Equals(const TupleRow* build_row, const uint8_t* expr_values,
const uint8_t* expr_values_null) const noexcept {
for (int i = 0; i < build_expr_evals_.size(); ++i) {
void* val = build_expr_evals_[i]->GetValue(build_row);
if (val == NULL) {
- if (!(FORCE_NULL_EQUALITY || finds_nulls_[i])) return false;
+ if (!(INCLUSIVE_EQUALITY || finds_nulls_[i])) return false;
if (!expr_values_null[i]) return false;
continue;
} else {
@@ -245,7 +249,16 @@ bool HashTableCtx::Equals(const TupleRow* build_row, const uint8_t* expr_values,
const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i);
DCHECK(build_exprs_[i] == &build_expr_evals_[i]->root());
- if (!RawValue::Eq(loc, val, build_exprs_[i]->type())) return false;
+
+ const ColumnType& expr_type = build_exprs_[i]->type();
+ if (!RawValue::Eq(loc, val, expr_type)) {
+ if (INCLUSIVE_EQUALITY) {
+ bool val_is_nan = RawValue::IsNaN(val, expr_type);
+ bool local_is_nan = RawValue::IsNaN(loc, expr_type);
+ if (val_is_nan && local_is_nan) continue;
+ }
+ return false;
+ }
}
return true;
}
@@ -786,6 +799,9 @@ Status HashTableCtx::CodegenEvalRow(
// Not null block
builder.SetInsertPoint(not_null_block);
+
+ result.ConvertToCanonicalForm();
+
result.StoreToNativePtr(llvm_loc);
builder.CreateBr(continue_block);
@@ -1059,7 +1075,7 @@ Status HashTableCtx::CodegenHashRow(
// br i1 %cmp_raw10, label %continue3, label %false_block
// }
Status HashTableCtx::CodegenEquals(
- LlvmCodeGen* codegen, bool force_null_equality, llvm::Function** fn) {
+ LlvmCodeGen* codegen, bool inclusive_equality, llvm::Function** fn) {
for (int i = 0; i < build_exprs_.size(); ++i) {
// Disable codegen for CHAR
if (build_exprs_[i]->type().type == TYPE_CHAR) {
@@ -1123,13 +1139,14 @@ Status HashTableCtx::CodegenEquals(
// We consider null values equal if we are comparing build rows or if the join
// predicate is <=>
- if (force_null_equality || finds_nulls_[i]) {
+ if (inclusive_equality || finds_nulls_[i]) {
llvm::Value* llvm_null_byte_loc = builder.CreateInBoundsGEP(
NULL, expr_values_null, codegen->GetI32Constant(i), "null_byte_loc");
llvm::Value* null_byte = builder.CreateLoad(llvm_null_byte_loc);
row_is_null =
builder.CreateICmpNE(null_byte, codegen->GetI8Constant(0));
}
+ if (inclusive_equality) result.ConvertToCanonicalForm();
// Get llvm value for row_val from 'expr_values'
int offset = expr_values_cache_.expr_values_offsets(i);
@@ -1154,7 +1171,7 @@ Status HashTableCtx::CodegenEquals(
builder.SetInsertPoint(cmp_block);
}
// Check result == row_val
- llvm::Value* is_equal = result.EqToNativePtr(row_val);
+ llvm::Value* is_equal = result.EqToNativePtr(row_val, inclusive_equality);
builder.CreateCondBr(is_equal, continue_block, false_block);
builder.SetInsertPoint(continue_block);
http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/exec/hash-table.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.h b/be/src/exec/hash-table.h
index c57b7a5..e98bdfd 100644
--- a/be/src/exec/hash-table.h
+++ b/be/src/exec/hash-table.h
@@ -165,10 +165,10 @@ class HashTableCtx {
Status CodegenEvalRow(LlvmCodeGen* codegen, bool build_row, llvm::Function** fn);
/// Codegen for evaluating a TupleRow and comparing equality. Function signature
- /// matches HashTable::Equals(). 'force_null_equality' is true if the generated
- /// equality function should treat all NULLs as equal. See the template parameter
- /// to HashTable::Equals().
- Status CodegenEquals(LlvmCodeGen* codegen, bool force_null_equality,
+ /// matches HashTable::Equals(). 'inclusive_equality' is true if the generated
+ /// equality function should treat all NULLs as equal and all NaNs as equal.
+ /// See the template parameter to HashTable::Equals().
+ Status CodegenEquals(LlvmCodeGen* codegen, bool inclusive_equality,
llvm::Function** fn);
/// Codegen for hashing expr values. Function prototype matches HashRow identically.
@@ -431,11 +431,11 @@ class HashTableCtx {
/// Wrapper function for calling correct HashUtil function in non-codegen'd case.
uint32_t Hash(const void* input, int len, uint32_t hash) const;
- /// Evaluate 'row' over build exprs, storing values into 'expr_values' and nullness into
- /// 'expr_values_null'. This will be replaced by codegen. We do not want this function
- /// inlined when cross compiled because we need to be able to differentiate between
- /// EvalBuildRow and EvalProbeRow by name and the build/probe exprs are baked into the
- /// codegen'd function.
+ /// Evaluate 'row' over build exprs, storing values into 'expr_values' and nullness
+ /// into 'expr_values_null'. This will be replaced by codegen. We do not want this
+ /// function inlined when cross compiled because we need to be able to differentiate
+ /// between EvalBuildRow and EvalProbeRow by name and the build/probe exprs are baked
+ /// into the codegen'd function.
bool IR_NO_INLINE EvalBuildRow(
const TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) noexcept {
return EvalRow(row, build_expr_evals_, expr_values, expr_values_null);
@@ -460,18 +460,17 @@ class HashTableCtx {
uint8_t* expr_values, uint8_t* expr_values_null) noexcept;
/// Returns true if the values of build_exprs evaluated over 'build_row' equal the
- /// values in 'expr_values' with nullness 'expr_values_null'. FORCE_NULL_EQUALITY is
- /// true if all nulls should be treated as equal, regardless of the values of
- /// 'finds_nulls_'. This will be replaced by codegen.
- template <bool FORCE_NULL_EQUALITY>
+ /// values in 'expr_values' with nullness 'expr_values_null'. INCLUSIVE_EQUALITY
+ /// means "NULL==NULL" and "NaN==NaN". This will be replaced by codegen.
+ template <bool INCLUSIVE_EQUALITY>
bool IR_NO_INLINE Equals(const TupleRow* build_row, const uint8_t* expr_values,
const uint8_t* expr_values_null) const noexcept;
/// Helper function that calls Equals() with the current row. Always inlined so that
/// it does not appear in cross-compiled IR.
- template <bool FORCE_NULL_EQUALITY>
+ template <bool INCLUSIVE_EQUALITY>
bool ALWAYS_INLINE Equals(const TupleRow* build_row) const {
- return Equals<FORCE_NULL_EQUALITY>(build_row, expr_values_cache_.cur_expr_values(),
+ return Equals<INCLUSIVE_EQUALITY>(build_row, expr_values_cache_.cur_expr_values(),
expr_values_cache_.cur_expr_values_null());
}
@@ -848,14 +847,14 @@ class HashTable {
/// this function. The values of the expression values cache in 'ht_ctx' will be
/// used to probe the hash table.
///
- /// 'FORCE_NULL_EQUALITY' is true if NULLs should always be considered equal when
- /// comparing two rows.
+ /// 'INCLUSIVE_EQUALITY' is true if NULLs and NaNs should always be
+ /// considered equal when comparing two rows.
///
/// 'hash' is the hash computed by EvalAndHashBuild() or EvalAndHashProbe().
/// 'found' indicates that a bucket that contains an equal row is found.
///
/// There are wrappers of this function that perform the Find and Insert logic.
- template <bool FORCE_NULL_EQUALITY>
+ template <bool INCLUSIVE_EQUALITY>
int64_t IR_ALWAYS_INLINE Probe(Bucket* buckets, int64_t num_buckets,
HashTableCtx* ht_ctx, uint32_t hash, bool* found);
http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/exec/hash-table.inline.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hash-table.inline.h b/be/src/exec/hash-table.inline.h
index 205abe7..2bcecef 100644
--- a/be/src/exec/hash-table.inline.h
+++ b/be/src/exec/hash-table.inline.h
@@ -48,7 +48,7 @@ inline void HashTableCtx::ExprValuesCache::NextRow() {
DCHECK_LE(cur_expr_values_hash_ - expr_values_hash_array_.get(), capacity_);
}
-template <bool FORCE_NULL_EQUALITY>
+template <bool INCLUSIVE_EQUALITY>
inline int64_t HashTable::Probe(Bucket* buckets, int64_t num_buckets,
HashTableCtx* ht_ctx, uint32_t hash, bool* found) {
DCHECK(buckets != NULL);
@@ -66,7 +66,7 @@ inline int64_t HashTable::Probe(Bucket* buckets, int64_t num_buckets,
if (LIKELY(!bucket->filled)) return bucket_idx;
if (hash == bucket->hash) {
if (ht_ctx != NULL &&
- ht_ctx->Equals<FORCE_NULL_EQUALITY>(GetRow(bucket, ht_ctx->scratch_row_))) {
+ ht_ctx->Equals<INCLUSIVE_EQUALITY>(GetRow(bucket, ht_ctx->scratch_row_))) {
*found = true;
return bucket_idx;
}
http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/runtime/raw-value.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.cc b/be/src/runtime/raw-value.cc
index b5de7bc..f15986b 100644
--- a/be/src/runtime/raw-value.cc
+++ b/be/src/runtime/raw-value.cc
@@ -29,6 +29,8 @@
namespace impala {
const int RawValue::ASCII_PRECISION = 16; // print 16 digits for double/float
+const double RawValue::CANONICAL_DOUBLE_NAN = nan("");
+const float RawValue::CANONICAL_FLOAT_NAN = nanf("");
void RawValue::PrintValueAsBytes(const void* value, const ColumnType& type,
stringstream* stream) {
http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/runtime/raw-value.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.h b/be/src/runtime/raw-value.h
index 6ee9191..7bf52a5 100644
--- a/be/src/runtime/raw-value.h
+++ b/be/src/runtime/raw-value.h
@@ -37,6 +37,12 @@ class RawValue {
/// Ascii output precision for double/float
static const int ASCII_PRECISION;
+ /// Single NaN values to ensure all NaN values can be assigned one bit pattern
+ /// that will always compare and hash the same way. Allows for all NaN values
+ /// to be put into the same "group by" bucket.
+ static const double CANONICAL_DOUBLE_NAN;
+ static const float CANONICAL_FLOAT_NAN;
+
/// Convert 'value' into ascii and write to 'stream'. NULL turns into "NULL". 'scale'
/// determines how many digits after the decimal are printed for floating point numbers,
/// -1 indicates to use the stream's current formatting.
@@ -97,6 +103,13 @@ class RawValue {
/// This is more performant than Compare() == 0 for string equality, mostly because of
/// the length comparison check.
static inline bool Eq(const void* v1, const void* v2, const ColumnType& type);
+
+ /// Returns true if val/type correspond to a NaN floating point value.
+ static inline bool IsNaN(const void* val, const ColumnType& type);
+
+ /// Returns a canonical NaN value for a floating point type
+ /// (which will always have the same bit-pattern to maintain consistency in hashing).
+ static inline const void* CanonicalNaNValue(const ColumnType& type);
};
}
http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/runtime/raw-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/raw-value.inline.h b/be/src/runtime/raw-value.inline.h
index 03c3699..9206cf4 100644
--- a/be/src/runtime/raw-value.inline.h
+++ b/be/src/runtime/raw-value.inline.h
@@ -39,6 +39,29 @@ namespace impala {
static const uint32_t HASH_VAL_NULL = 0x58081667;
static const uint32_t HASH_VAL_EMPTY = 0x7dca7eee;
+inline bool RawValue::IsNaN(const void* val, const ColumnType& type) {
+ switch(type.type) {
+ case TYPE_FLOAT:
+ return std::isnan(*reinterpret_cast<const float*>(val));
+ case TYPE_DOUBLE:
+ return std::isnan(*reinterpret_cast<const double*>(val));
+ default:
+ return false;
+ }
+}
+
+inline const void* RawValue::CanonicalNaNValue(const ColumnType& type) {
+ switch(type.type) {
+ case TYPE_FLOAT:
+ return &CANONICAL_FLOAT_NAN;
+ case TYPE_DOUBLE:
+ return &CANONICAL_DOUBLE_NAN;
+ default:
+ DCHECK(false);
+ return nullptr;
+ }
+}
+
inline bool RawValue::Eq(const void* v1, const void* v2, const ColumnType& type) {
const StringValue* string_value1;
const StringValue* string_value2;
@@ -144,6 +167,7 @@ inline uint32_t RawValue::GetHashValueNonNull<float>(const float* v,
const ColumnType& type, uint32_t seed) {
DCHECK_EQ(type.type, TYPE_FLOAT);
DCHECK(v != NULL);
+ if (std::isnan(*v)) v = &RawValue::CANONICAL_FLOAT_NAN;
return HashUtil::MurmurHash2_64(v, 4, seed);
}
@@ -152,6 +176,7 @@ inline uint32_t RawValue::GetHashValueNonNull<double>(const double* v,
const ColumnType& type, uint32_t seed) {
DCHECK_EQ(type.type, TYPE_DOUBLE);
DCHECK(v != NULL);
+ if (std::isnan(*v)) v = &RawValue::CANONICAL_DOUBLE_NAN;
return HashUtil::MurmurHash2_64(v, 8, seed);
}
http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/aggregation.test b/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
index 7aba138..69372f8 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
@@ -1378,3 +1378,26 @@ having extract(hour from timestamp_col) = int_col
TIMESTAMP,BIGINT
---- RESULTS
====
+---- QUERY
+# GROUP BY of NaN values aggregates NaN's as one grouping
+select count(*), sqrt(0.5-x) as Z
+from (VALUES((1.6 x, 2 y), (3.2, 4), (5.4,6))) T
+group by Z
+---- RESULTS
+3, NaN
+---- TYPES
+bigint, double
+====
+---- QUERY
+# GROUP BY of NaN values aggregates NaN's as one grouping
+select count(*), cast(sqrt(0.4-x) as FLOAT) as P, cast(sqrt(1.5-y) as FLOAT) as Q
+from (VALUES((1.6 x, 1.6 y, 0 z), (0.5, 0.5, 0), (5.4, 6, 0),
+ (0.5, 0.5, 0), (0.5, 0.5, 0), (-0.6, 0.5, 0))) T
+group by P, Q order by P, Q
+---- RESULTS
+2, NaN, NaN
+3, NaN, 1
+1, 1, 1
+---- TYPES
+bigint, float, float
+====
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/testdata/workloads/functional-query/queries/QueryTest/exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index 9187b6a..8a25b85 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -3071,3 +3071,13 @@ where cast(get_json_object(t.json, '$.a.b.c') as int) > 1
---- TYPES
BIGINT
====
+---- QUERY
+# IMPALA-6661 - NaN should not evaluate as the same as any other NaN via <=>
+WITH W AS (SELECT T.*, CAST(SQRT(X) AS FLOAT) P, CAST(SQRT(Y) AS FLOAT) Q
+FROM (VALUES((CAST(-1.0 AS FLOAT) X, CAST(-1.0 AS FLOAT) Y), (-1.0, 0), (0, -1.0), (0, 0))) T )
+SELECT * FROM W WHERE W.Q<=>W.P
+---- RESULTS
+0,0,0,0
+---- TYPES
+FLOAT, FLOAT, FLOAT, FLOAT
+====
http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/testdata/workloads/functional-query/queries/QueryTest/joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/joins.test b/testdata/workloads/functional-query/queries/QueryTest/joins.test
index 643c3b1..bbead6c 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/joins.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/joins.test
@@ -795,3 +795,30 @@ where sqrt(-a.id) = b.float_col / b.double_col
---- TYPES
INT
====
+---- QUERY
+# Test than 'nan' != 'nan' when joining.
+with t1 as (select cast(sqrt(0.5-x) as FLOAT) c1, y c2 from
+ (VALUES((1.6 x, 0 y), (3.2, 1), (5.4,2), (0.5, 3), (0.5, 4), (-0.5, 5))) XX),
+ t2 as (select * from t1)
+select t1.c1,t2.c1 from t1 right outer join t2 on t1.c1 = t2.c1;
+---- RESULTS
+0,0
+0,0
+0,0
+0,0
+1,1
+NULL,NaN
+NULL,NaN
+NULL,NaN
+---- TYPES
+FLOAT,FLOAT
+====
+---- QUERY
+# Test that NaN <=> NaN in joins
+select t1.float_col as v
+ from functional.alltypessmall t1, functional.alltypessmall t2
+ where sqrt(0.5-t1.float_col) <=> sqrt(0.5-t2.float_col) and t1.float_col > 0.5
+---- RESULTS
+---- TYPES
+FLOAT
+====