You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2014/09/23 20:45:50 UTC

git commit: [SPARK-3598][SQL]cast to timestamp should be the same as hive

Repository: spark
Updated Branches:
  refs/heads/master 11c10df82 -> 66bc0f2d6


[SPARK-3598][SQL]cast to timestamp should be the same as hive

this patch fixes timestamp smaller than 0 and cast int as timestamp

select cast(1000 as timestamp) from src limit 1;

should return 1970-01-01 00:00:01, but we now take it as 1000 seconds.
also, current implementation has bug when the time is before 1970-01-01 00:00:00.
rxin marmbrus chenghao-intel

Author: Daoyuan Wang <da...@intel.com>

Closes #2458 from adrian-wang/timestamp and squashes the following commits:

4274b1d [Daoyuan Wang] set test not related to timezone
1234f66 [Daoyuan Wang] fix timestamp smaller than 0 and cast int as timestamp


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/66bc0f2d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/66bc0f2d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/66bc0f2d

Branch: refs/heads/master
Commit: 66bc0f2d675d06cdd48638f124a1ff32be2bf456
Parents: 11c10df
Author: Daoyuan Wang <da...@intel.com>
Authored: Tue Sep 23 11:45:44 2014 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Tue Sep 23 11:45:44 2014 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/expressions/Cast.scala   | 17 +++++++-------
 .../expressions/ExpressionEvaluationSuite.scala | 16 ++++++++-----
 ...p cast #1-0-69fc614ccea92bbe39f4decc299edcc6 |  1 +
 ...p cast #2-0-732ed232ac592c5e7f7c913a88874fd2 |  1 +
 ...mp cast #3-0-76ee270337f664b36cacfc6528ac109 |  1 +
 ...p cast #4-0-732ed232ac592c5e7f7c913a88874fd2 |  1 +
 ...p cast #5-0-dbd7bcd167d322d6617b884c02c7f247 |  1 +
 ...p cast #6-0-6d2da5cfada03605834e38bc4075bc79 |  1 +
 ...p cast #7-0-1d70654217035f8ce5f64344f4c5a80f |  1 +
 ...p cast #8-0-6d2da5cfada03605834e38bc4075bc79 |  1 +
 .../sql/hive/execution/HiveQuerySuite.scala     | 24 ++++++++++++++++++++
 11 files changed, 50 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 0379275..f626d09 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -86,15 +86,15 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
         try Timestamp.valueOf(n) catch { case _: java.lang.IllegalArgumentException => null }
       })
     case BooleanType =>
-      buildCast[Boolean](_, b => new Timestamp((if (b) 1 else 0) * 1000))
+      buildCast[Boolean](_, b => new Timestamp((if (b) 1 else 0)))
     case LongType =>
-      buildCast[Long](_, l => new Timestamp(l * 1000))
+      buildCast[Long](_, l => new Timestamp(l))
     case IntegerType =>
-      buildCast[Int](_, i => new Timestamp(i * 1000))
+      buildCast[Int](_, i => new Timestamp(i))
     case ShortType =>
-      buildCast[Short](_, s => new Timestamp(s * 1000))
+      buildCast[Short](_, s => new Timestamp(s))
     case ByteType =>
-      buildCast[Byte](_, b => new Timestamp(b * 1000))
+      buildCast[Byte](_, b => new Timestamp(b))
     // TimestampWritable.decimalToTimestamp
     case DecimalType =>
       buildCast[BigDecimal](_, d => decimalToTimestamp(d))
@@ -107,11 +107,10 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
   }
 
   private[this]  def decimalToTimestamp(d: BigDecimal) = {
-    val seconds = d.longValue()
+    val seconds = Math.floor(d.toDouble).toLong
     val bd = (d - seconds) * 1000000000
     val nanos = bd.intValue()
 
-    // Convert to millis
     val millis = seconds * 1000
     val t = new Timestamp(millis)
 
@@ -121,11 +120,11 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
   }
 
   // Timestamp to long, converting milliseconds to seconds
-  private[this] def timestampToLong(ts: Timestamp) = ts.getTime / 1000
+  private[this] def timestampToLong(ts: Timestamp) = Math.floor(ts.getTime / 1000.0).toLong
 
   private[this] def timestampToDouble(ts: Timestamp) = {
     // First part is the seconds since the beginning of time, followed by nanosecs.
-    ts.getTime / 1000 + ts.getNanos.toDouble / 1000000000
+    Math.floor(ts.getTime / 1000.0).toLong + ts.getNanos.toDouble / 1000000000
   }
 
   // Converts Timestamp to string according to Hive TimestampWritable convention

http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index b961346..8b6721d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -231,7 +231,9 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation("12.65" cast DecimalType, BigDecimal(12.65))
 
     checkEvaluation(Literal(1) cast LongType, 1)
-    checkEvaluation(Cast(Literal(1) cast TimestampType, LongType), 1)
+    checkEvaluation(Cast(Literal(1000) cast TimestampType, LongType), 1.toLong)
+    checkEvaluation(Cast(Literal(-1200) cast TimestampType, LongType), -2.toLong)
+    checkEvaluation(Cast(Literal(1.toDouble) cast TimestampType, DoubleType), 1.toDouble)
     checkEvaluation(Cast(Literal(1.toDouble) cast TimestampType, DoubleType), 1.toDouble)
 
     checkEvaluation(Cast(Literal(sts) cast TimestampType, StringType), sts)
@@ -242,11 +244,11 @@ class ExpressionEvaluationSuite extends FunSuite {
     checkEvaluation(Cast(Cast(Cast(Cast(
       Cast("5" cast ByteType, ShortType), IntegerType), FloatType), DoubleType), LongType), 5)
     checkEvaluation(Cast(Cast(Cast(Cast(
-      Cast("5" cast ByteType, TimestampType), DecimalType), LongType), StringType), ShortType), 5)
+      Cast("5" cast ByteType, TimestampType), DecimalType), LongType), StringType), ShortType), 0)
     checkEvaluation(Cast(Cast(Cast(Cast(
       Cast("5" cast TimestampType, ByteType), DecimalType), LongType), StringType), ShortType), null)
     checkEvaluation(Cast(Cast(Cast(Cast(
-      Cast("5" cast DecimalType, ByteType), TimestampType), LongType), StringType), ShortType), 5)
+      Cast("5" cast DecimalType, ByteType), TimestampType), LongType), StringType), ShortType), 0)
     checkEvaluation(Literal(true) cast IntegerType, 1)
     checkEvaluation(Literal(false) cast IntegerType, 0)
     checkEvaluation(Cast(Literal(1) cast BooleanType, IntegerType), 1)
@@ -293,16 +295,18 @@ class ExpressionEvaluationSuite extends FunSuite {
 
   test("timestamp casting") {
     val millis = 15 * 1000 + 2
+    val seconds = millis * 1000 + 2
     val ts = new Timestamp(millis)
     val ts1 = new Timestamp(15 * 1000)  // a timestamp without the milliseconds part
+    val tss = new Timestamp(seconds)
     checkEvaluation(Cast(ts, ShortType), 15)
     checkEvaluation(Cast(ts, IntegerType), 15)
     checkEvaluation(Cast(ts, LongType), 15)
     checkEvaluation(Cast(ts, FloatType), 15.002f)
     checkEvaluation(Cast(ts, DoubleType), 15.002)
-    checkEvaluation(Cast(Cast(ts, ShortType), TimestampType), ts1)
-    checkEvaluation(Cast(Cast(ts, IntegerType), TimestampType), ts1)
-    checkEvaluation(Cast(Cast(ts, LongType), TimestampType), ts1)
+    checkEvaluation(Cast(Cast(tss, ShortType), TimestampType), ts)
+    checkEvaluation(Cast(Cast(tss, IntegerType), TimestampType), ts)
+    checkEvaluation(Cast(Cast(tss, LongType), TimestampType), ts)
     checkEvaluation(Cast(Cast(millis.toFloat / 1000, TimestampType), FloatType),
       millis.toFloat / 1000)
     checkEvaluation(Cast(Cast(millis.toDouble / 1000, TimestampType), DoubleType),

http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #1-0-69fc614ccea92bbe39f4decc299edcc6
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #1-0-69fc614ccea92bbe39f4decc299edcc6 b/sql/hive/src/test/resources/golden/timestamp cast #1-0-69fc614ccea92bbe39f4decc299edcc6
new file mode 100644
index 0000000..8ebf695
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #1-0-69fc614ccea92bbe39f4decc299edcc6	
@@ -0,0 +1 @@
+0.001

http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2 b/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2
new file mode 100644
index 0000000..5625e59
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2	
@@ -0,0 +1 @@
+1.2

http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109 b/sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109	
@@ -0,0 +1 @@
+1

http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2 b/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2
new file mode 100644
index 0000000..5625e59
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2	
@@ -0,0 +1 @@
+1.2

http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247 b/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247
new file mode 100644
index 0000000..27de46f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247	
@@ -0,0 +1 @@
+-0.0010000000000000009

http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79 b/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79
new file mode 100644
index 0000000..1d94c8a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79	
@@ -0,0 +1 @@
+-1.2

http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f b/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f
new file mode 100644
index 0000000..3fbedf6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f	
@@ -0,0 +1 @@
+-2

http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79 b/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79
new file mode 100644
index 0000000..1d94c8a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79	
@@ -0,0 +1 @@
+-1.2

http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 56bcd95..6fc891b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -303,6 +303,30 @@ class HiveQuerySuite extends HiveComparisonTest {
   createQueryTest("case statements WITHOUT key #4",
     "SELECT (CASE WHEN key > 2 THEN 3 WHEN 2 > key THEN 2 ELSE 0 END) FROM src WHERE key < 15")
 
+  createQueryTest("timestamp cast #1",
+    "SELECT CAST(CAST(1 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #2",
+    "SELECT CAST(CAST(1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #3",
+    "SELECT CAST(CAST(1200 AS TIMESTAMP) AS INT) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #4",
+    "SELECT CAST(CAST(1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #5",
+    "SELECT CAST(CAST(-1 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #6",
+    "SELECT CAST(CAST(-1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #7",
+    "SELECT CAST(CAST(-1200 AS TIMESTAMP) AS INT) FROM src LIMIT 1")
+
+  createQueryTest("timestamp cast #8",
+    "SELECT CAST(CAST(-1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
   test("implement identity function using case statement") {
     val actual = sql("SELECT (CASE key WHEN key THEN key END) FROM src")
       .map { case Row(i: Int) => i }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org