You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jr...@apache.org on 2018/01/10 20:51:38 UTC
[1/2] impala git commit: IMPALA-3651: Adds murmur_hash() built-in
function
Repository: impala
Updated Branches:
refs/heads/master 31c6a1719 -> 409b58150
IMPALA-3651: Adds murmur_hash() built-in function
murmur_hash relys on HashUtil::MurmurHash2_64 which MurmurHash2 64-bit
version.
Testing:
Add unit tests for primitive types: ExprTest.MurmurHashFunction
Add E2E tests into exprs.test
Change-Id: I14d56ffb8fab256f3f66a2669271fd4b3c50cc29
Reviewed-on: http://gerrit.cloudera.org:8080/8893
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/60418650
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/60418650
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/60418650
Branch: refs/heads/master
Commit: 6041865031c9298a6da00401bafe14553e1e0662
Parents: 31c6a17
Author: Jinchul <ji...@gmail.com>
Authored: Wed Dec 20 20:27:57 2017 +0900
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Jan 10 20:17:26 2018 +0000
----------------------------------------------------------------------
be/src/exprs/expr-test.cc | 46 ++++++++++++++++++
be/src/exprs/utility-functions-ir.cc | 49 ++++++++++++++++++++
be/src/exprs/utility-functions.h | 8 ++++
be/src/util/hash-util.h | 1 +
common/function-registry/impala_functions.py | 18 +++++++
.../queries/QueryTest/exprs.test | 12 +++++
6 files changed, 134 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/60418650/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 4322adc..7a2a65a 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -4578,6 +4578,52 @@ TEST_F(ExprTest, UtilityFunctions) {
TestIsNull("fnv_hash(NULL)", TYPE_BIGINT);
}
+TEST_F(ExprTest, MurmurHashFunction) {
+ string s("hello world");
+ int64_t expected = HashUtil::MurmurHash2_64(s.data(), s.size(),
+ HashUtil::MURMUR_DEFAULT_SEED);
+ // The comparison with the constant is to detect if MurmurHash2_64 accidentally
+ // changes behavior.
+ EXPECT_EQ(-3190198453633110066, expected);
+ TestValue("murmur_hash('hello world')", TYPE_BIGINT, expected);
+ s = string("");
+ expected = HashUtil::MurmurHash2_64(s.data(), s.size(), HashUtil::MURMUR_DEFAULT_SEED);
+ TestValue("murmur_hash('')", TYPE_BIGINT, expected);
+
+ IntValMap::iterator int_iter;
+ for(int_iter = min_int_values_.begin(); int_iter != min_int_values_.end();
+ ++int_iter) {
+ ColumnType t = ColumnType(static_cast<PrimitiveType>(int_iter->first));
+ expected = HashUtil::MurmurHash2_64(
+ &int_iter->second, t.GetByteSize(), HashUtil::MURMUR_DEFAULT_SEED);
+ string& val = default_type_strs_[int_iter->first];
+ TestValue("murmur_hash(" + val + ")", TYPE_BIGINT, expected);
+ }
+
+ // Don't use min_float_values_ for testing floats and doubles due to improper float
+ // and double literal handling, see IMPALA-669.
+ float float_val = 42;
+ expected = HashUtil::MurmurHash2_64(&float_val, sizeof(float),
+ HashUtil::MURMUR_DEFAULT_SEED);
+ TestValue("murmur_hash(CAST(42 as FLOAT))", TYPE_BIGINT, expected);
+
+ double double_val = 42;
+ expected = HashUtil::MurmurHash2_64(&double_val, sizeof(double),
+ HashUtil::MURMUR_DEFAULT_SEED);
+ TestValue("murmur_hash(CAST(42 as DOUBLE))", TYPE_BIGINT, expected);
+
+ expected = HashUtil::MurmurHash2_64(&default_timestamp_val_, 12,
+ HashUtil::MURMUR_DEFAULT_SEED);
+ TestValue("murmur_hash(" + default_timestamp_str_ + ")", TYPE_BIGINT, expected);
+
+ bool bool_val = false;
+ expected = HashUtil::MurmurHash2_64(&bool_val, 1, HashUtil::MURMUR_DEFAULT_SEED);
+ TestValue("murmur_hash(FALSE)", TYPE_BIGINT, expected);
+
+ // Test NULL input returns NULL
+ TestIsNull("murmur_hash(NULL)", TYPE_BIGINT);
+}
+
TEST_F(ExprTest, SessionFunctions) {
enum Session {S1, S2};
enum Query {Q1, Q2};
http://git-wip-us.apache.org/repos/asf/impala/blob/60418650/be/src/exprs/utility-functions-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/utility-functions-ir.cc b/be/src/exprs/utility-functions-ir.cc
index 18c4267..9806ec3 100644
--- a/be/src/exprs/utility-functions-ir.cc
+++ b/be/src/exprs/utility-functions-ir.cc
@@ -76,6 +76,55 @@ template BigIntVal UtilityFunctions::FnvHash(
template BigIntVal UtilityFunctions::FnvHash(
FunctionContext* ctx, const DoubleVal& input_val);
+BigIntVal UtilityFunctions::MurmurHashString(FunctionContext* ctx,
+ const StringVal& input_val) {
+ if (input_val.is_null) return BigIntVal::null();
+ return BigIntVal(HashUtil::MurmurHash2_64(input_val.ptr, input_val.len,
+ HashUtil::MURMUR_DEFAULT_SEED));
+}
+
+BigIntVal UtilityFunctions::MurmurHashTimestamp(FunctionContext* ctx,
+ const TimestampVal& input_val) {
+ if (input_val.is_null) return BigIntVal::null();
+ TimestampValue tv = TimestampValue::FromTimestampVal(input_val);
+ return BigIntVal(HashUtil::MurmurHash2_64(&tv, 12, HashUtil::MURMUR_DEFAULT_SEED));
+}
+
+template<typename T>
+BigIntVal UtilityFunctions::MurmurHash(FunctionContext* ctx, const T& input_val) {
+ if (input_val.is_null) return BigIntVal::null();
+ return BigIntVal(
+ HashUtil::MurmurHash2_64(&input_val.val, sizeof(input_val.val),
+ HashUtil::MURMUR_DEFAULT_SEED));
+}
+
+// Note that this only hashes the unscaled value and not the scale or precision, so this
+// function is only valid when used over a single decimal type.
+BigIntVal UtilityFunctions::MurmurHashDecimal(FunctionContext* ctx,
+ const DecimalVal& input_val) {
+ if (input_val.is_null) return BigIntVal::null();
+ const FunctionContext::TypeDesc& input_type = *ctx->GetArgType(0);
+ int byte_size = ColumnType::GetDecimalByteSize(input_type.precision);
+ // val4, val8 and val16 all start at the same memory address.
+ return BigIntVal(HashUtil::MurmurHash2_64(&input_val.val16, byte_size,
+ HashUtil::MURMUR_DEFAULT_SEED));
+}
+
+template BigIntVal UtilityFunctions::MurmurHash(
+ FunctionContext* ctx, const BooleanVal& input_val);
+template BigIntVal UtilityFunctions::MurmurHash(
+ FunctionContext* ctx, const TinyIntVal& input_val);
+template BigIntVal UtilityFunctions::MurmurHash(
+ FunctionContext* ctx, const SmallIntVal& input_val);
+template BigIntVal UtilityFunctions::MurmurHash(
+ FunctionContext* ctx, const IntVal& input_val);
+template BigIntVal UtilityFunctions::MurmurHash(
+ FunctionContext* ctx, const BigIntVal& input_val);
+template BigIntVal UtilityFunctions::MurmurHash(
+ FunctionContext* ctx, const FloatVal& input_val);
+template BigIntVal UtilityFunctions::MurmurHash(
+ FunctionContext* ctx, const DoubleVal& input_val);
+
StringVal UtilityFunctions::User(FunctionContext* ctx) {
StringVal user(ctx->user());
// An empty string indicates the user wasn't set in the session or in the query request.
http://git-wip-us.apache.org/repos/asf/impala/blob/60418650/be/src/exprs/utility-functions.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/utility-functions.h b/be/src/exprs/utility-functions.h
index 41409c4..dbb1fb1 100644
--- a/be/src/exprs/utility-functions.h
+++ b/be/src/exprs/utility-functions.h
@@ -50,6 +50,14 @@ class UtilityFunctions {
static BigIntVal FnvHashTimestamp(FunctionContext* ctx, const TimestampVal& input_val);
static BigIntVal FnvHashDecimal(FunctionContext* ctx, const DecimalVal& input_val);
+ /// Implementations of the MurmurHash function. Returns the Murmur hash of the
+ /// input as an int64_t.
+ template <typename T> static BigIntVal MurmurHash(FunctionContext* ctx,
+ const T& input_val);
+ static BigIntVal MurmurHashString(FunctionContext* ctx, const StringVal& input_val);
+ static BigIntVal MurmurHashTimestamp(FunctionContext* ctx, const TimestampVal& input_val);
+ static BigIntVal MurmurHashDecimal(FunctionContext* ctx, const DecimalVal& input_val);
+
/// Implementation of the user() function. Returns the username of the user who executed
/// this function.
static StringVal User(FunctionContext* ctx);
http://git-wip-us.apache.org/repos/asf/impala/blob/60418650/be/src/util/hash-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/hash-util.h b/be/src/util/hash-util.h
index 054cb8a..212669e 100644
--- a/be/src/util/hash-util.h
+++ b/be/src/util/hash-util.h
@@ -119,6 +119,7 @@ class HashUtil {
return hash;
}
+ static const uint64_t MURMUR_DEFAULT_SEED = 0x0;
static const uint64_t MURMUR_PRIME = 0xc6a4a7935bd1e995;
static const int MURMUR_R = 47;
http://git-wip-us.apache.org/repos/asf/impala/blob/60418650/common/function-registry/impala_functions.py
----------------------------------------------------------------------
diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py
index 0e3a3b8..aa9bb49 100644
--- a/common/function-registry/impala_functions.py
+++ b/common/function-registry/impala_functions.py
@@ -588,6 +588,24 @@ visible_functions = [
'_ZN6impala16UtilityFunctions16FnvHashTimestampEPN10impala_udf15FunctionContextERKNS1_12TimestampValE'],
[['fnv_hash'], 'BIGINT', ['DECIMAL'],
'_ZN6impala16UtilityFunctions14FnvHashDecimalEPN10impala_udf15FunctionContextERKNS1_10DecimalValE'],
+ [['murmur_hash'], 'BIGINT', ['TINYINT'],
+ '_ZN6impala16UtilityFunctions10MurmurHashIN10impala_udf10TinyIntValEEENS2_9BigIntValEPNS2_15FunctionContextERKT_'],
+ [['murmur_hash'], 'BIGINT', ['SMALLINT'],
+ '_ZN6impala16UtilityFunctions10MurmurHashIN10impala_udf11SmallIntValEEENS2_9BigIntValEPNS2_15FunctionContextERKT_'],
+ [['murmur_hash'], 'BIGINT', ['INT'],
+ '_ZN6impala16UtilityFunctions10MurmurHashIN10impala_udf6IntValEEENS2_9BigIntValEPNS2_15FunctionContextERKT_'],
+ [['murmur_hash'], 'BIGINT', ['BIGINT'],
+ '_ZN6impala16UtilityFunctions10MurmurHashIN10impala_udf9BigIntValEEES3_PNS2_15FunctionContextERKT_'],
+ [['murmur_hash'], 'BIGINT', ['FLOAT'],
+ '_ZN6impala16UtilityFunctions10MurmurHashIN10impala_udf8FloatValEEENS2_9BigIntValEPNS2_15FunctionContextERKT_'],
+ [['murmur_hash'], 'BIGINT', ['DOUBLE'],
+ '_ZN6impala16UtilityFunctions10MurmurHashIN10impala_udf9DoubleValEEENS2_9BigIntValEPNS2_15FunctionContextERKT_'],
+ [['murmur_hash'], 'BIGINT', ['STRING'],
+ '_ZN6impala16UtilityFunctions16MurmurHashStringEPN10impala_udf15FunctionContextERKNS1_9StringValE'],
+ [['murmur_hash'], 'BIGINT', ['TIMESTAMP'],
+ '_ZN6impala16UtilityFunctions19MurmurHashTimestampEPN10impala_udf15FunctionContextERKNS1_12TimestampValE'],
+ [['murmur_hash'], 'BIGINT', ['DECIMAL'],
+ '_ZN6impala16UtilityFunctions17MurmurHashDecimalEPN10impala_udf15FunctionContextERKNS1_10DecimalValE'],
# (Non)NullValue functions
[['nullvalue'], 'BOOLEAN', ['BOOLEAN'], '_ZN6impala15IsNullPredicate6IsNullIN10impala_udf10BooleanValEEES3_PNS2_15FunctionContextERKT_'],
http://git-wip-us.apache.org/repos/asf/impala/blob/60418650/testdata/workloads/functional-query/queries/QueryTest/exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index 4a0b7a7..a15f3b5 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -2919,3 +2919,15 @@ where cast(timestamp_col as string) = '2009-02-01 00:00:00'
---- TYPES
int, timestamp
====
+---- QUERY
+select
+murmur_hash(bool_col), murmur_hash(tinyint_col), murmur_hash(smallint_col),
+murmur_hash(int_col), murmur_hash(bigint_col), murmur_hash(float_col),
+murmur_hash(double_col), murmur_hash(date_string_col), murmur_hash(string_col),
+murmur_hash(timestamp_col), murmur_hash(year), murmur_hash(month)
+from functional.alltypes where id = 7
+---- RESULTS
+6351753276682545529,-8688181892109895221,5243888771994935971,988560926123810380,7108101660231151623,-8653637999116590182,-6387622242983883150,3788918177590065252,-7446916648201533712,7077699884854357665,7331012058162401363,-780611581681153783
+---- TYPES
+BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT
+====
[2/2] impala git commit: IMPALA-6278: [DOCS] Add release note
subtopics
Posted by jr...@apache.org.
IMPALA-6278: [DOCS] Add release note subtopics
Primarily placeholders that link to the 2.11
CHANGELOG file on the web.
Change-Id: I968f53c6652197774cdec364c47bc10277e6877a
Reviewed-on: http://gerrit.cloudera.org:8080/8992
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/409b5815
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/409b5815
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/409b5815
Branch: refs/heads/master
Commit: 409b58150aaf6092107f1ad98813a9bd832e60fe
Parents: 6041865
Author: John Russell <jr...@cloudera.com>
Authored: Tue Jan 9 20:34:32 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Jan 10 20:38:23 2018 +0000
----------------------------------------------------------------------
docs/impala_keydefs.ditamap | 1 +
docs/topics/impala_fixed_issues.xml | 16 ++++++++++++++++
docs/topics/impala_incompatible_changes.xml | 16 ++++++++++++++++
docs/topics/impala_new_features.xml | 17 +++++++++++++++++
4 files changed, 50 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/409b5815/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index 02cff8a..39d65ff 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10591,6 +10591,7 @@ under the License.
<keydef keys="impala13_full"><topicmeta><keywords><keyword>Impala 1.3</keyword></keywords></topicmeta></keydef>
<!-- Pointers to changelog pages -->
+ <keydef keys="changelog_211" href="https://impala.apache.org/docs/changelog-2.11.html" scope="external" format="html"/>
<keydef keys="changelog_210" href="https://impala.apache.org/docs/changelog-2.10.html" scope="external" format="html"/>
<keydef keys="changelog_29" href="https://impala.apache.org/docs/changelog-2.9.html" scope="external" format="html"/>
<keydef keys="changelog_28" href="https://impala.apache.org/docs/changelog-2.8.html" scope="external" format="html"/>
http://git-wip-us.apache.org/repos/asf/impala/blob/409b5815/docs/topics/impala_fixed_issues.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_fixed_issues.xml b/docs/topics/impala_fixed_issues.xml
index b2627fc..6f2b789 100644
--- a/docs/topics/impala_fixed_issues.xml
+++ b/docs/topics/impala_fixed_issues.xml
@@ -46,6 +46,22 @@ under the License.
<p outputclass="toc inpage"/>
</conbody>
+<!-- All 2.11.x subsections go under here -->
+
+ <concept rev="2.11.0" id="fixed_issues_2_11_0">
+
+ <title>Issues Fixed in <keyword keyref="impala2110"/></title>
+
+ <conbody>
+
+ <p>
+ For the full list of issues closed in this release, including bug fixes,
+ see the <xref keyref="changelog_211">changelog for <keyword keyref="impala211"/></xref>.
+ </p>
+
+ </conbody>
+ </concept>
+
<!-- All 2.10.x subsections go under here -->
<concept rev="2.10.0" id="fixed_issues_2100">
http://git-wip-us.apache.org/repos/asf/impala/blob/409b5815/docs/topics/impala_incompatible_changes.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_incompatible_changes.xml b/docs/topics/impala_incompatible_changes.xml
index 6d75a37..9d8d711 100644
--- a/docs/topics/impala_incompatible_changes.xml
+++ b/docs/topics/impala_incompatible_changes.xml
@@ -53,6 +53,22 @@ under the License.
<p outputclass="toc inpage"/>
</conbody>
+ <concept rev="2.11.0" id="incompatible_changes_211x">
+
+ <title>Incompatible Changes Introduced in Impala 2.11.x</title>
+
+ <conbody>
+
+ <p>
+ For the full list of issues closed in this release, including any that introduce
+ behavior changes or incompatibilities, see the
+ <xref keyref="changelog_211">changelog for <keyword keyref="impala211"/></xref>.
+ </p>
+
+ </conbody>
+
+ </concept>
+
<concept rev="2.10.0" id="incompatible_changes_210x">
<title>Incompatible Changes Introduced in Impala 2.10.x</title>
http://git-wip-us.apache.org/repos/asf/impala/blob/409b5815/docs/topics/impala_new_features.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_new_features.xml b/docs/topics/impala_new_features.xml
index bff3278..0deb311 100644
--- a/docs/topics/impala_new_features.xml
+++ b/docs/topics/impala_new_features.xml
@@ -46,6 +46,23 @@ under the License.
</conbody>
+<!-- All 2.11.x new features go under here -->
+
+ <concept rev="2.11.0" id="new_features_2110">
+
+ <title>New Features in <keyword keyref="impala211_full"/></title>
+
+ <conbody>
+
+ <p>
+ For the full list of issues closed in this release, including the issues
+ marked as <q>new features</q> or <q>improvements</q>, see the
+ <xref keyref="changelog_211">changelog for <keyword keyref="impala211"/></xref>.
+ </p>
+
+ </conbody>
+ </concept>
+
<!-- All 2.10.x new features go under here -->
<concept rev="2.10.0" id="new_features_2100">