You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/10/20 23:20:00 UTC

[1/2] incubator-impala git commit: IMPALA-6055: Fix hdfs encryption test far Hadoop 2.8+

Repository: incubator-impala
Updated Branches:
  refs/heads/master 0bccb3ea0 -> 78776e9b5


IMPALA-6055: Fix hdfs encryption test far Hadoop 2.8+

Hadoop changed behavior regarding encrypted partitions and started
automatically provisioning .Trash directories in encrypted partitions.
This interplays poorly with the current test.

Change-Id: I30234aa50fea93f316e75beea2ced002dcea0c24
Reviewed-on: http://gerrit.cloudera.org:8080/8274
Tested-by: Impala Public Jenkins
Reviewed-by: Tim Armstrong <ta...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/0e0f295e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/0e0f295e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/0e0f295e

Branch: refs/heads/master
Commit: 0e0f295e4653eb77fc30fd904d8ee54a69d3399f
Parents: 0bccb3e
Author: Zachary Amsden <za...@cloudera.com>
Authored: Thu Oct 12 18:24:58 2017 -0700
Committer: Zach Amsden <za...@cloudera.com>
Committed: Fri Oct 20 20:45:22 2017 +0000

----------------------------------------------------------------------
 tests/metadata/test_hdfs_encryption.py | 35 +++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/0e0f295e/tests/metadata/test_hdfs_encryption.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_hdfs_encryption.py b/tests/metadata/test_hdfs_encryption.py
index ff135b8..27eaaee 100644
--- a/tests/metadata/test_hdfs_encryption.py
+++ b/tests/metadata/test_hdfs_encryption.py
@@ -160,7 +160,7 @@ class TestHdfsEncryption(ImpalaTestSuite):
     self.client.execute("alter table {0}.t1 add partition(j=3)".format(TEST_DB));
     # Clean up the trash directory to create an encrypted zone
     rc, stdout, stderr = exec_process(
-            "hadoop fs -rmr /user/{0}/.Trash/*".format(getpass.getuser()))
+            "hadoop fs -rm -r /user/{0}/.Trash/*".format(getpass.getuser()))
     assert rc == 0, 'Error deleting Trash: %s %s' % (stdout, stderr)
     # Create the necessary encryption zones
     self.create_encryption_zone("testkey1", "/test-warehouse/{0}.db/t1/j=1"\
@@ -169,8 +169,16 @@ class TestHdfsEncryption(ImpalaTestSuite):
             .format(TEST_DB))
     self.create_encryption_zone("testkey1", "/test-warehouse/{0}.db/t1/j=3"\
             .format(TEST_DB))
-    self.create_encryption_zone("testkey2", "/user/{0}/.Trash/".format(\
-            getpass.getuser()))
+
+    # HDFS 2.8+ behavior is to create individual trash per encryption zone;
+    # don't create an encryption zone on .Trash in that case, otherwise
+    # recursive trash is created.
+    has_own_trash = self.hdfs_client.exists(
+        "/test-warehouse/{0}.db/t1/j=1/.Trash".format(TEST_DB))
+    if not has_own_trash:
+      self.create_encryption_zone("testkey2", "/user/{0}/.Trash/".format(\
+              getpass.getuser()))
+
     # Load sample data into the partition directories
     self.hdfs_client.create_file("test-warehouse/{0}.db/t1/j=1/j1.txt"\
             .format(TEST_DB), file_data='j1')
@@ -178,12 +186,25 @@ class TestHdfsEncryption(ImpalaTestSuite):
             .format(TEST_DB), file_data='j2')
     self.hdfs_client.create_file("test-warehouse/{0}.db/t1/j=3/j3.txt"\
             .format(TEST_DB), file_data='j3')
+
     # Drop the partition (j=1) without purge and make sure partition directory still
     # exists. This behavior is expected due to the difference in encryption zones
-    self.execute_query_expect_failure(self.client, "alter table {0}.t1 drop \
-            partition(j=1)".format(TEST_DB));
-    assert self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1/j1.txt".format(TEST_DB))
-    assert self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1".format(TEST_DB))
+    # between the .Trash and the warehouse directory (prior to HDFS 2.8)
+    if not has_own_trash:
+      self.execute_query_expect_failure(self.client, "alter table {0}.t1 drop \
+              partition(j=1)".format(TEST_DB));
+      assert self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1/j1.txt".format(TEST_DB))
+      assert self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1".format(TEST_DB))
+    else:
+      # HDFS 2.8+ behavior succeeds the query and creates trash; the partition removal
+      # ends up destroying the directories which moves this back to the user's trash
+      self.client.execute("alter table {0}.t1 drop partition(j=1)".format(TEST_DB));
+      assert self.hdfs_client.exists(
+        "/user/{0}/.Trash/Current/test-warehouse/{1}.db/t1/j=1/j1.txt"\
+        .format(getpass.getuser(), TEST_DB))
+      assert not self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1/j1.txt".format(TEST_DB))
+      assert not self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1".format(TEST_DB))
+
     # Drop the partition j=2 (with purge) and make sure the partition directory is deleted
     self.client.execute("alter table {0}.t1 drop partition(j=2) purge".format(TEST_DB))
     assert not self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=2/j2.txt".format(TEST_DB))


[2/2] incubator-impala git commit: IMPALA-5668: Fix cast(X as timestamp) for negative subsecond Decimals

Posted by ta...@apache.org.
IMPALA-5668: Fix cast(X as timestamp) for negative subsecond Decimals

The timestamp conversion from negative fractional Decimal types
(interpreted as unix timestamp) was wrong - the whole part
was rounded toward zero, and fractional part was being added
instead of being subtracted. This is fixed by subtracting the
fractional part in case of negative decimals.

Example for the wrong behaviour:
+-------------------------------------------------+
| cast(cast(-0.1 as decimal(18,10)) as timestamp) |
+-------------------------------------------------+
| 1970-01-01 00:00:00.100000000                   |
+-------------------------------------------------+
while casting to double works correctly:
+-----------------------------------------+
| cast(cast(-0.1 as double) as timestamp) |
+-----------------------------------------+
| 1969-12-31 23:59:59.900000000           |
+-----------------------------------------+

Change-Id: I8216f4c0f100c1bd68891cd6048236bfe4c205f0
Reviewed-on: http://gerrit.cloudera.org:8080/8051
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/78776e9b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/78776e9b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/78776e9b

Branch: refs/heads/master
Commit: 78776e9b5f03d122dbc6ddda81aec6e292f75ab4
Parents: 0e0f295
Author: Csaba Ringhofer <cs...@cloudera.com>
Authored: Wed Sep 13 17:09:07 2017 +0200
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Oct 20 22:38:54 2017 +0000

----------------------------------------------------------------------
 be/src/exprs/decimal-operators-ir.cc | 62 ++++++++++++++-----------------
 be/src/exprs/decimal-operators.h     |  6 ++-
 be/src/exprs/expr-test.cc            | 10 +++++
 be/src/runtime/decimal-value.h       |  2 +
 be/src/runtime/timestamp-value.h     |  2 +-
 5 files changed, 45 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/78776e9b/be/src/exprs/decimal-operators-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/decimal-operators-ir.cc b/be/src/exprs/decimal-operators-ir.cc
index 070c70f..17ba972 100644
--- a/be/src/exprs/decimal-operators-ir.cc
+++ b/be/src/exprs/decimal-operators-ir.cc
@@ -588,8 +588,9 @@ StringVal DecimalOperators::CastToStringVal(
 }
 
 template <typename T>
-IR_ALWAYS_INLINE T DecimalOperators::ConvertToNanoseconds(T val, int scale) {
-  // Nanosecond scale means there should be 9 decimal digits.
+IR_ALWAYS_INLINE int32_t DecimalOperators::ConvertToNanoseconds(T val, int scale) {
+  // Nanosecond scale means there should be 9 decimal digits, which is representable
+  // with int32_t.
   const int NANOSECOND_SCALE = 9;
   T nanoseconds;
   if (LIKELY(scale <= NANOSECOND_SCALE)) {
@@ -599,9 +600,30 @@ IR_ALWAYS_INLINE T DecimalOperators::ConvertToNanoseconds(T val, int scale) {
     nanoseconds = val / DecimalUtil::GetScaleMultiplier<T>(
         scale - NANOSECOND_SCALE);
   }
+
+  DCHECK(nanoseconds >= numeric_limits<int32_t>::min()
+      && nanoseconds <= numeric_limits<int32_t>::max());
+
   return nanoseconds;
 }
 
+template <typename T>
+TimestampVal DecimalOperators::ConvertToTimestampVal(const T& decimal_value, int scale) {
+  typename T::StorageType seconds = decimal_value.whole_part(scale);
+  if (seconds < numeric_limits<int64_t>::min() ||
+      seconds > numeric_limits<int64_t>::max()) {
+    // TimeStampVal() takes int64_t.
+    return TimestampVal::null();
+  }
+  int32_t nanoseconds =
+      ConvertToNanoseconds(decimal_value.fractional_part(scale), scale);
+  if(decimal_value.is_negative()) nanoseconds *= -1;
+  TimestampVal result;
+  TimestampValue::FromUnixTimeNanos(seconds, nanoseconds).ToTimestampVal(&result);
+  return result;
+}
+
+
 TimestampVal DecimalOperators::CastToTimestampVal(
     FunctionContext* ctx, const DecimalVal& val) {
   if (val.is_null) return TimestampVal::null();
@@ -609,43 +631,13 @@ TimestampVal DecimalOperators::CastToTimestampVal(
   int scale = ctx->impl()->GetConstFnAttr(FunctionContextImpl::ARG_TYPE_SCALE, 0);
   TimestampVal result;
   switch (ColumnType::GetDecimalByteSize(precision)) {
-    case 4: {
-      Decimal4Value dv(val.val4);
-      int32_t seconds = dv.whole_part(scale);
-      int32_t nanoseconds = ConvertToNanoseconds(
-          dv.fractional_part(scale), scale);
-      TimestampValue tv = TimestampValue::FromUnixTimeNanos(seconds, nanoseconds);
-      tv.ToTimestampVal(&result);
-      break;
-    }
-    case 8: {
-      Decimal8Value dv(val.val8);
-      int64_t seconds = dv.whole_part(scale);
-      int64_t nanoseconds = ConvertToNanoseconds(
-          dv.fractional_part(scale), scale);
-      TimestampValue tv = TimestampValue::FromUnixTimeNanos(seconds, nanoseconds);
-      tv.ToTimestampVal(&result);
-      break;
-    }
-    case 16: {
-      Decimal16Value dv(val.val16);
-      int128_t seconds = dv.whole_part(scale);
-      if (seconds < numeric_limits<int64_t>::min() ||
-          seconds > numeric_limits<int64_t>::max()) {
-        // TimeStampVal() takes int64_t.
-        return TimestampVal::null();
-      }
-      int128_t nanoseconds = ConvertToNanoseconds(
-          dv.fractional_part(scale), scale);
-      TimestampValue tv = TimestampValue::FromUnixTimeNanos(seconds, nanoseconds);
-      tv.ToTimestampVal(&result);
-      break;
-    }
+    case 4: return ConvertToTimestampVal(Decimal4Value(val.val4), scale);
+    case 8: return ConvertToTimestampVal(Decimal8Value(val.val8), scale);
+    case 16: return ConvertToTimestampVal(Decimal16Value(val.val16), scale);
     default:
       DCHECK(false);
       return TimestampVal::null();
   }
-  return result;
 }
 
 BooleanVal DecimalOperators::CastToBooleanVal(

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/78776e9b/be/src/exprs/decimal-operators.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/decimal-operators.h b/be/src/exprs/decimal-operators.h
index 34c11cb..c2d8779 100644
--- a/be/src/exprs/decimal-operators.h
+++ b/be/src/exprs/decimal-operators.h
@@ -163,9 +163,13 @@ class DecimalOperators {
   static T RoundDelta(const DecimalValue<T>& v, int src_scale,
       int target_scale, const DecimalRoundOp& op);
 
+  /// Converts a decimal value (interpreted as unix time) to TimestampVal.
+  template <typename T>
+  static TimestampVal ConvertToTimestampVal(const T& decimal_value, int scale);
+
   /// Converts fractional 'val' with the given 'scale' to nanoseconds.
   template <typename T>
-  static T ConvertToNanoseconds(T val, int scale);
+  static int32_t ConvertToNanoseconds(T val, int scale);
 };
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/78776e9b/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index c45f4a8..d923b00 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -2646,9 +2646,15 @@ TEST_F(ExprTest, CastExprs) {
   // 32 bit Decimal.
   TestTimestampValue("cast(cast(123.45 as decimal(9,2)) as timestamp)",
       TimestampValue::Parse("1970-01-01 00:02:03.450000000", 29));
+  TestTimestampValue("cast(cast(-123.45 as decimal(9,2)) as timestamp)",
+      TimestampValue::Parse("1969-12-31 23:57:56.550000000", 29));
   // 64 bit Decimal.
   TestTimestampValue("cast(cast(123.45 as decimal(18,2)) as timestamp)",
       TimestampValue::Parse("1970-01-01 00:02:03.450000000", 29));
+  TestTimestampValue("cast(cast(-123.45 as decimal(18,2)) as timestamp)",
+      TimestampValue::Parse("1969-12-31 23:57:56.550000000", 29));
+  TestTimestampValue("cast(cast(-0.1 as decimal(18,10)) as timestamp)",
+      TimestampValue::Parse("1969-12-31 23:59:59.900000000", 29));
   TestTimestampValue("cast(cast(253402300799.99 as decimal(18, 2)) as timestamp)",
       TimestampValue::Parse("9999-12-31 23:59:59.990000000", 29));
   TestIsNull("cast(cast(260000000000.00 as decimal(18, 2)) as timestamp)",
@@ -2656,6 +2662,10 @@ TEST_F(ExprTest, CastExprs) {
   // 128 bit Decimal.
   TestTimestampValue("cast(cast(123.45 as decimal(38,2)) as timestamp)",
       TimestampValue::Parse("1970-01-01 00:02:03.450000000", 29));
+  TestTimestampValue("cast(cast(-123.45 as decimal(38,2)) as timestamp)",
+      TimestampValue::Parse("1969-12-31 23:57:56.550000000", 29));
+  TestTimestampValue("cast(cast(-0.1 as decimal(38,20)) as timestamp)",
+      TimestampValue::Parse("1969-12-31 23:59:59.900000000", 29));
   TestTimestampValue("cast(cast(253402300799.99 as decimal(38, 2)) as timestamp)",
       TimestampValue::Parse("9999-12-31 23:59:59.990000000", 29));
   TestTimestampValue("cast(cast(253402300799.99 as decimal(38, 26)) as timestamp)",

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/78776e9b/be/src/runtime/decimal-value.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/decimal-value.h b/be/src/runtime/decimal-value.h
index 88ec2e3..0432057 100644
--- a/be/src/runtime/decimal-value.h
+++ b/be/src/runtime/decimal-value.h
@@ -41,6 +41,8 @@ namespace impala {
 template<typename T>
 class DecimalValue {
  public:
+  typedef T StorageType;
+
   DecimalValue() : value_(0) { }
   DecimalValue(const T& s) : value_(s) { }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/78776e9b/be/src/runtime/timestamp-value.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/timestamp-value.h b/be/src/runtime/timestamp-value.h
index fa97f78..eb33195 100644
--- a/be/src/runtime/timestamp-value.h
+++ b/be/src/runtime/timestamp-value.h
@@ -95,7 +95,7 @@ class TimestampValue {
   }
 
   /// Same as FromUnixTime() above, but adds the specified number of nanoseconds to the
-  /// resulting TimestampValue.
+  /// resulting TimestampValue. Handles negative nanoseconds too.
   static TimestampValue FromUnixTimeNanos(time_t unix_time, int64_t nanos) {
     boost::posix_time::ptime temp = UnixTimeToPtime(unix_time);
     temp += boost::posix_time::nanoseconds(nanos);