You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jr...@apache.org on 2018/01/10 20:51:38 UTC

[1/2] impala git commit: IMPALA-3651: Adds murmur_hash() built-in function

Repository: impala
Updated Branches:
  refs/heads/master 31c6a1719 -> 409b58150


IMPALA-3651: Adds murmur_hash() built-in function

murmur_hash relys on HashUtil::MurmurHash2_64 which MurmurHash2 64-bit
version.

Testing:
Add unit tests for primitive types: ExprTest.MurmurHashFunction
Add E2E tests into exprs.test

Change-Id: I14d56ffb8fab256f3f66a2669271fd4b3c50cc29
Reviewed-on: http://gerrit.cloudera.org:8080/8893
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/60418650
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/60418650
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/60418650

Branch: refs/heads/master
Commit: 6041865031c9298a6da00401bafe14553e1e0662
Parents: 31c6a17
Author: Jinchul <ji...@gmail.com>
Authored: Wed Dec 20 20:27:57 2017 +0900
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Jan 10 20:17:26 2018 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc                       | 46 ++++++++++++++++++
 be/src/exprs/utility-functions-ir.cc            | 49 ++++++++++++++++++++
 be/src/exprs/utility-functions.h                |  8 ++++
 be/src/util/hash-util.h                         |  1 +
 common/function-registry/impala_functions.py    | 18 +++++++
 .../queries/QueryTest/exprs.test                | 12 +++++
 6 files changed, 134 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/60418650/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 4322adc..7a2a65a 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -4578,6 +4578,52 @@ TEST_F(ExprTest, UtilityFunctions) {
   TestIsNull("fnv_hash(NULL)", TYPE_BIGINT);
 }
 
+TEST_F(ExprTest, MurmurHashFunction) {
+  string s("hello world");
+  int64_t expected = HashUtil::MurmurHash2_64(s.data(), s.size(),
+      HashUtil::MURMUR_DEFAULT_SEED);
+  // The comparison with the constant is to detect if MurmurHash2_64 accidentally
+  // changes behavior.
+  EXPECT_EQ(-3190198453633110066, expected);
+  TestValue("murmur_hash('hello world')", TYPE_BIGINT, expected);
+  s = string("");
+  expected = HashUtil::MurmurHash2_64(s.data(), s.size(), HashUtil::MURMUR_DEFAULT_SEED);
+  TestValue("murmur_hash('')", TYPE_BIGINT, expected);
+
+  IntValMap::iterator int_iter;
+  for(int_iter = min_int_values_.begin(); int_iter != min_int_values_.end();
+      ++int_iter) {
+    ColumnType t = ColumnType(static_cast<PrimitiveType>(int_iter->first));
+    expected = HashUtil::MurmurHash2_64(
+        &int_iter->second, t.GetByteSize(), HashUtil::MURMUR_DEFAULT_SEED);
+    string& val = default_type_strs_[int_iter->first];
+    TestValue("murmur_hash(" + val + ")", TYPE_BIGINT, expected);
+  }
+
+  // Don't use min_float_values_ for testing floats and doubles due to improper float
+  // and double literal handling, see IMPALA-669.
+  float float_val = 42;
+  expected = HashUtil::MurmurHash2_64(&float_val, sizeof(float),
+      HashUtil::MURMUR_DEFAULT_SEED);
+  TestValue("murmur_hash(CAST(42 as FLOAT))", TYPE_BIGINT, expected);
+
+  double double_val = 42;
+  expected = HashUtil::MurmurHash2_64(&double_val, sizeof(double),
+      HashUtil::MURMUR_DEFAULT_SEED);
+  TestValue("murmur_hash(CAST(42 as DOUBLE))", TYPE_BIGINT, expected);
+
+  expected = HashUtil::MurmurHash2_64(&default_timestamp_val_, 12,
+      HashUtil::MURMUR_DEFAULT_SEED);
+  TestValue("murmur_hash(" + default_timestamp_str_ + ")", TYPE_BIGINT, expected);
+
+  bool bool_val = false;
+  expected = HashUtil::MurmurHash2_64(&bool_val, 1, HashUtil::MURMUR_DEFAULT_SEED);
+  TestValue("murmur_hash(FALSE)", TYPE_BIGINT, expected);
+
+  // Test NULL input returns NULL
+  TestIsNull("murmur_hash(NULL)", TYPE_BIGINT);
+}
+
 TEST_F(ExprTest, SessionFunctions) {
   enum Session {S1, S2};
   enum Query {Q1, Q2};

http://git-wip-us.apache.org/repos/asf/impala/blob/60418650/be/src/exprs/utility-functions-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/utility-functions-ir.cc b/be/src/exprs/utility-functions-ir.cc
index 18c4267..9806ec3 100644
--- a/be/src/exprs/utility-functions-ir.cc
+++ b/be/src/exprs/utility-functions-ir.cc
@@ -76,6 +76,55 @@ template BigIntVal UtilityFunctions::FnvHash(
 template BigIntVal UtilityFunctions::FnvHash(
     FunctionContext* ctx, const DoubleVal& input_val);
 
+BigIntVal UtilityFunctions::MurmurHashString(FunctionContext* ctx,
+    const StringVal& input_val) {
+  if (input_val.is_null) return BigIntVal::null();
+  return BigIntVal(HashUtil::MurmurHash2_64(input_val.ptr, input_val.len,
+        HashUtil::MURMUR_DEFAULT_SEED));
+}
+
+BigIntVal UtilityFunctions::MurmurHashTimestamp(FunctionContext* ctx,
+    const TimestampVal& input_val) {
+  if (input_val.is_null) return BigIntVal::null();
+  TimestampValue tv = TimestampValue::FromTimestampVal(input_val);
+  return BigIntVal(HashUtil::MurmurHash2_64(&tv, 12, HashUtil::MURMUR_DEFAULT_SEED));
+}
+
+template<typename T>
+BigIntVal UtilityFunctions::MurmurHash(FunctionContext* ctx, const T& input_val) {
+  if (input_val.is_null) return BigIntVal::null();
+  return BigIntVal(
+      HashUtil::MurmurHash2_64(&input_val.val, sizeof(input_val.val),
+        HashUtil::MURMUR_DEFAULT_SEED));
+}
+
+// Note that this only hashes the unscaled value and not the scale or precision, so this
+// function is only valid when used over a single decimal type.
+BigIntVal UtilityFunctions::MurmurHashDecimal(FunctionContext* ctx,
+    const DecimalVal& input_val) {
+  if (input_val.is_null) return BigIntVal::null();
+  const FunctionContext::TypeDesc& input_type = *ctx->GetArgType(0);
+  int byte_size = ColumnType::GetDecimalByteSize(input_type.precision);
+  // val4, val8 and val16 all start at the same memory address.
+  return BigIntVal(HashUtil::MurmurHash2_64(&input_val.val16, byte_size,
+        HashUtil::MURMUR_DEFAULT_SEED));
+}
+
+template BigIntVal UtilityFunctions::MurmurHash(
+    FunctionContext* ctx, const BooleanVal& input_val);
+template BigIntVal UtilityFunctions::MurmurHash(
+    FunctionContext* ctx, const TinyIntVal& input_val);
+template BigIntVal UtilityFunctions::MurmurHash(
+    FunctionContext* ctx, const SmallIntVal& input_val);
+template BigIntVal UtilityFunctions::MurmurHash(
+    FunctionContext* ctx, const IntVal& input_val);
+template BigIntVal UtilityFunctions::MurmurHash(
+    FunctionContext* ctx, const BigIntVal& input_val);
+template BigIntVal UtilityFunctions::MurmurHash(
+    FunctionContext* ctx, const FloatVal& input_val);
+template BigIntVal UtilityFunctions::MurmurHash(
+    FunctionContext* ctx, const DoubleVal& input_val);
+
 StringVal UtilityFunctions::User(FunctionContext* ctx) {
   StringVal user(ctx->user());
   // An empty string indicates the user wasn't set in the session or in the query request.

http://git-wip-us.apache.org/repos/asf/impala/blob/60418650/be/src/exprs/utility-functions.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/utility-functions.h b/be/src/exprs/utility-functions.h
index 41409c4..dbb1fb1 100644
--- a/be/src/exprs/utility-functions.h
+++ b/be/src/exprs/utility-functions.h
@@ -50,6 +50,14 @@ class UtilityFunctions {
   static BigIntVal FnvHashTimestamp(FunctionContext* ctx, const TimestampVal& input_val);
   static BigIntVal FnvHashDecimal(FunctionContext* ctx, const DecimalVal& input_val);
 
+  /// Implementations of the MurmurHash function. Returns the Murmur hash of the
+  /// input as an int64_t.
+  template <typename T> static BigIntVal MurmurHash(FunctionContext* ctx,
+      const T& input_val);
+  static BigIntVal MurmurHashString(FunctionContext* ctx, const StringVal& input_val);
+  static BigIntVal MurmurHashTimestamp(FunctionContext* ctx, const TimestampVal& input_val);
+  static BigIntVal MurmurHashDecimal(FunctionContext* ctx, const DecimalVal& input_val);
+
   /// Implementation of the user() function. Returns the username of the user who executed
   /// this function.
   static StringVal User(FunctionContext* ctx);

http://git-wip-us.apache.org/repos/asf/impala/blob/60418650/be/src/util/hash-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/hash-util.h b/be/src/util/hash-util.h
index 054cb8a..212669e 100644
--- a/be/src/util/hash-util.h
+++ b/be/src/util/hash-util.h
@@ -119,6 +119,7 @@ class HashUtil {
     return hash;
   }
 
+  static const uint64_t MURMUR_DEFAULT_SEED = 0x0;
   static const uint64_t MURMUR_PRIME = 0xc6a4a7935bd1e995;
   static const int MURMUR_R = 47;
 

http://git-wip-us.apache.org/repos/asf/impala/blob/60418650/common/function-registry/impala_functions.py
----------------------------------------------------------------------
diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py
index 0e3a3b8..aa9bb49 100644
--- a/common/function-registry/impala_functions.py
+++ b/common/function-registry/impala_functions.py
@@ -588,6 +588,24 @@ visible_functions = [
    '_ZN6impala16UtilityFunctions16FnvHashTimestampEPN10impala_udf15FunctionContextERKNS1_12TimestampValE'],
   [['fnv_hash'], 'BIGINT', ['DECIMAL'],
    '_ZN6impala16UtilityFunctions14FnvHashDecimalEPN10impala_udf15FunctionContextERKNS1_10DecimalValE'],
+  [['murmur_hash'], 'BIGINT', ['TINYINT'],
+   '_ZN6impala16UtilityFunctions10MurmurHashIN10impala_udf10TinyIntValEEENS2_9BigIntValEPNS2_15FunctionContextERKT_'],
+  [['murmur_hash'], 'BIGINT', ['SMALLINT'],
+   '_ZN6impala16UtilityFunctions10MurmurHashIN10impala_udf11SmallIntValEEENS2_9BigIntValEPNS2_15FunctionContextERKT_'],
+  [['murmur_hash'], 'BIGINT', ['INT'],
+   '_ZN6impala16UtilityFunctions10MurmurHashIN10impala_udf6IntValEEENS2_9BigIntValEPNS2_15FunctionContextERKT_'],
+  [['murmur_hash'], 'BIGINT', ['BIGINT'],
+   '_ZN6impala16UtilityFunctions10MurmurHashIN10impala_udf9BigIntValEEES3_PNS2_15FunctionContextERKT_'],
+  [['murmur_hash'], 'BIGINT', ['FLOAT'],
+   '_ZN6impala16UtilityFunctions10MurmurHashIN10impala_udf8FloatValEEENS2_9BigIntValEPNS2_15FunctionContextERKT_'],
+  [['murmur_hash'], 'BIGINT', ['DOUBLE'],
+   '_ZN6impala16UtilityFunctions10MurmurHashIN10impala_udf9DoubleValEEENS2_9BigIntValEPNS2_15FunctionContextERKT_'],
+  [['murmur_hash'], 'BIGINT', ['STRING'],
+   '_ZN6impala16UtilityFunctions16MurmurHashStringEPN10impala_udf15FunctionContextERKNS1_9StringValE'],
+  [['murmur_hash'], 'BIGINT', ['TIMESTAMP'],
+   '_ZN6impala16UtilityFunctions19MurmurHashTimestampEPN10impala_udf15FunctionContextERKNS1_12TimestampValE'],
+  [['murmur_hash'], 'BIGINT', ['DECIMAL'],
+   '_ZN6impala16UtilityFunctions17MurmurHashDecimalEPN10impala_udf15FunctionContextERKNS1_10DecimalValE'],
 
   # (Non)NullValue functions
   [['nullvalue'], 'BOOLEAN', ['BOOLEAN'], '_ZN6impala15IsNullPredicate6IsNullIN10impala_udf10BooleanValEEES3_PNS2_15FunctionContextERKT_'],

http://git-wip-us.apache.org/repos/asf/impala/blob/60418650/testdata/workloads/functional-query/queries/QueryTest/exprs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
index 4a0b7a7..a15f3b5 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test
@@ -2919,3 +2919,15 @@ where cast(timestamp_col as string) = '2009-02-01 00:00:00'
 ---- TYPES
 int, timestamp
 ====
+---- QUERY
+select
+murmur_hash(bool_col), murmur_hash(tinyint_col), murmur_hash(smallint_col),
+murmur_hash(int_col), murmur_hash(bigint_col), murmur_hash(float_col),
+murmur_hash(double_col), murmur_hash(date_string_col), murmur_hash(string_col),
+murmur_hash(timestamp_col), murmur_hash(year), murmur_hash(month)
+from functional.alltypes where id = 7
+---- RESULTS
+6351753276682545529,-8688181892109895221,5243888771994935971,988560926123810380,7108101660231151623,-8653637999116590182,-6387622242983883150,3788918177590065252,-7446916648201533712,7077699884854357665,7331012058162401363,-780611581681153783
+---- TYPES
+BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT , BIGINT
+====


[2/2] impala git commit: IMPALA-6278: [DOCS] Add release note subtopics

Posted by jr...@apache.org.
IMPALA-6278: [DOCS] Add release note subtopics

Primarily placeholders that link to the 2.11
CHANGELOG file on the web.

Change-Id: I968f53c6652197774cdec364c47bc10277e6877a
Reviewed-on: http://gerrit.cloudera.org:8080/8992
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/409b5815
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/409b5815
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/409b5815

Branch: refs/heads/master
Commit: 409b58150aaf6092107f1ad98813a9bd832e60fe
Parents: 6041865
Author: John Russell <jr...@cloudera.com>
Authored: Tue Jan 9 20:34:32 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Jan 10 20:38:23 2018 +0000

----------------------------------------------------------------------
 docs/impala_keydefs.ditamap                 |  1 +
 docs/topics/impala_fixed_issues.xml         | 16 ++++++++++++++++
 docs/topics/impala_incompatible_changes.xml | 16 ++++++++++++++++
 docs/topics/impala_new_features.xml         | 17 +++++++++++++++++
 4 files changed, 50 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/409b5815/docs/impala_keydefs.ditamap
----------------------------------------------------------------------
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index 02cff8a..39d65ff 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10591,6 +10591,7 @@ under the License.
   <keydef keys="impala13_full"><topicmeta><keywords><keyword>Impala 1.3</keyword></keywords></topicmeta></keydef>
 
 <!-- Pointers to changelog pages -->
+  <keydef keys="changelog_211" href="https://impala.apache.org/docs/changelog-2.11.html" scope="external" format="html"/>
   <keydef keys="changelog_210" href="https://impala.apache.org/docs/changelog-2.10.html" scope="external" format="html"/>
   <keydef keys="changelog_29" href="https://impala.apache.org/docs/changelog-2.9.html" scope="external" format="html"/>
   <keydef keys="changelog_28" href="https://impala.apache.org/docs/changelog-2.8.html" scope="external" format="html"/>

http://git-wip-us.apache.org/repos/asf/impala/blob/409b5815/docs/topics/impala_fixed_issues.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_fixed_issues.xml b/docs/topics/impala_fixed_issues.xml
index b2627fc..6f2b789 100644
--- a/docs/topics/impala_fixed_issues.xml
+++ b/docs/topics/impala_fixed_issues.xml
@@ -46,6 +46,22 @@ under the License.
     <p outputclass="toc inpage"/>
   </conbody>
 
+<!-- All 2.11.x subsections go under here -->
+
+  <concept rev="2.11.0" id="fixed_issues_2_11_0">
+
+    <title>Issues Fixed in <keyword keyref="impala2110"/></title>
+
+    <conbody>
+
+      <p>
+        For the full list of issues closed in this release, including bug fixes,
+        see the <xref keyref="changelog_211">changelog for <keyword keyref="impala211"/></xref>.
+      </p>
+
+    </conbody>
+  </concept>
+
 <!-- All 2.10.x subsections go under here -->
 
   <concept rev="2.10.0" id="fixed_issues_2100">

http://git-wip-us.apache.org/repos/asf/impala/blob/409b5815/docs/topics/impala_incompatible_changes.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_incompatible_changes.xml b/docs/topics/impala_incompatible_changes.xml
index 6d75a37..9d8d711 100644
--- a/docs/topics/impala_incompatible_changes.xml
+++ b/docs/topics/impala_incompatible_changes.xml
@@ -53,6 +53,22 @@ under the License.
     <p outputclass="toc inpage"/>
   </conbody>
 
+  <concept rev="2.11.0" id="incompatible_changes_211x">
+
+    <title>Incompatible Changes Introduced in Impala 2.11.x</title>
+
+    <conbody>
+
+      <p>
+        For the full list of issues closed in this release, including any that introduce
+        behavior changes or incompatibilities, see the
+        <xref keyref="changelog_211">changelog for <keyword keyref="impala211"/></xref>.
+      </p>
+
+    </conbody>
+
+  </concept>
+
   <concept rev="2.10.0" id="incompatible_changes_210x">
 
     <title>Incompatible Changes Introduced in Impala 2.10.x</title>

http://git-wip-us.apache.org/repos/asf/impala/blob/409b5815/docs/topics/impala_new_features.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_new_features.xml b/docs/topics/impala_new_features.xml
index bff3278..0deb311 100644
--- a/docs/topics/impala_new_features.xml
+++ b/docs/topics/impala_new_features.xml
@@ -46,6 +46,23 @@ under the License.
 
   </conbody>
 
+<!-- All 2.11.x new features go under here -->
+
+  <concept rev="2.11.0" id="new_features_2110">
+
+    <title>New Features in <keyword keyref="impala211_full"/></title>
+
+    <conbody>
+
+      <p>
+        For the full list of issues closed in this release, including the issues
+        marked as <q>new features</q> or <q>improvements</q>, see the
+        <xref keyref="changelog_211">changelog for <keyword keyref="impala211"/></xref>.
+      </p>
+
+    </conbody>
+  </concept>
+
 <!-- All 2.10.x new features go under here -->
 
   <concept rev="2.10.0" id="new_features_2100">