You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2014/09/23 20:45:50 UTC
git commit: [SPARK-3598][SQL]cast to timestamp should be the same as
hive
Repository: spark
Updated Branches:
refs/heads/master 11c10df82 -> 66bc0f2d6
[SPARK-3598][SQL]cast to timestamp should be the same as hive
this patch fixes timestamp smaller than 0 and cast int as timestamp
select cast(1000 as timestamp) from src limit 1;
should return 1970-01-01 00:00:01, but we now take it as 1000 seconds.
also, current implementation has bug when the time is before 1970-01-01 00:00:00.
rxin marmbrus chenghao-intel
Author: Daoyuan Wang <da...@intel.com>
Closes #2458 from adrian-wang/timestamp and squashes the following commits:
4274b1d [Daoyuan Wang] set test not related to timezone
1234f66 [Daoyuan Wang] fix timestamp smaller than 0 and cast int as timestamp
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/66bc0f2d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/66bc0f2d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/66bc0f2d
Branch: refs/heads/master
Commit: 66bc0f2d675d06cdd48638f124a1ff32be2bf456
Parents: 11c10df
Author: Daoyuan Wang <da...@intel.com>
Authored: Tue Sep 23 11:45:44 2014 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Tue Sep 23 11:45:44 2014 -0700
----------------------------------------------------------------------
.../spark/sql/catalyst/expressions/Cast.scala | 17 +++++++-------
.../expressions/ExpressionEvaluationSuite.scala | 16 ++++++++-----
...p cast #1-0-69fc614ccea92bbe39f4decc299edcc6 | 1 +
...p cast #2-0-732ed232ac592c5e7f7c913a88874fd2 | 1 +
...mp cast #3-0-76ee270337f664b36cacfc6528ac109 | 1 +
...p cast #4-0-732ed232ac592c5e7f7c913a88874fd2 | 1 +
...p cast #5-0-dbd7bcd167d322d6617b884c02c7f247 | 1 +
...p cast #6-0-6d2da5cfada03605834e38bc4075bc79 | 1 +
...p cast #7-0-1d70654217035f8ce5f64344f4c5a80f | 1 +
...p cast #8-0-6d2da5cfada03605834e38bc4075bc79 | 1 +
.../sql/hive/execution/HiveQuerySuite.scala | 24 ++++++++++++++++++++
11 files changed, 50 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 0379275..f626d09 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -86,15 +86,15 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
try Timestamp.valueOf(n) catch { case _: java.lang.IllegalArgumentException => null }
})
case BooleanType =>
- buildCast[Boolean](_, b => new Timestamp((if (b) 1 else 0) * 1000))
+ buildCast[Boolean](_, b => new Timestamp((if (b) 1 else 0)))
case LongType =>
- buildCast[Long](_, l => new Timestamp(l * 1000))
+ buildCast[Long](_, l => new Timestamp(l))
case IntegerType =>
- buildCast[Int](_, i => new Timestamp(i * 1000))
+ buildCast[Int](_, i => new Timestamp(i))
case ShortType =>
- buildCast[Short](_, s => new Timestamp(s * 1000))
+ buildCast[Short](_, s => new Timestamp(s))
case ByteType =>
- buildCast[Byte](_, b => new Timestamp(b * 1000))
+ buildCast[Byte](_, b => new Timestamp(b))
// TimestampWritable.decimalToTimestamp
case DecimalType =>
buildCast[BigDecimal](_, d => decimalToTimestamp(d))
@@ -107,11 +107,10 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
}
private[this] def decimalToTimestamp(d: BigDecimal) = {
- val seconds = d.longValue()
+ val seconds = Math.floor(d.toDouble).toLong
val bd = (d - seconds) * 1000000000
val nanos = bd.intValue()
- // Convert to millis
val millis = seconds * 1000
val t = new Timestamp(millis)
@@ -121,11 +120,11 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
}
// Timestamp to long, converting milliseconds to seconds
- private[this] def timestampToLong(ts: Timestamp) = ts.getTime / 1000
+ private[this] def timestampToLong(ts: Timestamp) = Math.floor(ts.getTime / 1000.0).toLong
private[this] def timestampToDouble(ts: Timestamp) = {
// First part is the seconds since the beginning of time, followed by nanosecs.
- ts.getTime / 1000 + ts.getNanos.toDouble / 1000000000
+ Math.floor(ts.getTime / 1000.0).toLong + ts.getNanos.toDouble / 1000000000
}
// Converts Timestamp to string according to Hive TimestampWritable convention
http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index b961346..8b6721d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -231,7 +231,9 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation("12.65" cast DecimalType, BigDecimal(12.65))
checkEvaluation(Literal(1) cast LongType, 1)
- checkEvaluation(Cast(Literal(1) cast TimestampType, LongType), 1)
+ checkEvaluation(Cast(Literal(1000) cast TimestampType, LongType), 1.toLong)
+ checkEvaluation(Cast(Literal(-1200) cast TimestampType, LongType), -2.toLong)
+ checkEvaluation(Cast(Literal(1.toDouble) cast TimestampType, DoubleType), 1.toDouble)
checkEvaluation(Cast(Literal(1.toDouble) cast TimestampType, DoubleType), 1.toDouble)
checkEvaluation(Cast(Literal(sts) cast TimestampType, StringType), sts)
@@ -242,11 +244,11 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast ByteType, ShortType), IntegerType), FloatType), DoubleType), LongType), 5)
checkEvaluation(Cast(Cast(Cast(Cast(
- Cast("5" cast ByteType, TimestampType), DecimalType), LongType), StringType), ShortType), 5)
+ Cast("5" cast ByteType, TimestampType), DecimalType), LongType), StringType), ShortType), 0)
checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast TimestampType, ByteType), DecimalType), LongType), StringType), ShortType), null)
checkEvaluation(Cast(Cast(Cast(Cast(
- Cast("5" cast DecimalType, ByteType), TimestampType), LongType), StringType), ShortType), 5)
+ Cast("5" cast DecimalType, ByteType), TimestampType), LongType), StringType), ShortType), 0)
checkEvaluation(Literal(true) cast IntegerType, 1)
checkEvaluation(Literal(false) cast IntegerType, 0)
checkEvaluation(Cast(Literal(1) cast BooleanType, IntegerType), 1)
@@ -293,16 +295,18 @@ class ExpressionEvaluationSuite extends FunSuite {
test("timestamp casting") {
val millis = 15 * 1000 + 2
+ val seconds = millis * 1000 + 2
val ts = new Timestamp(millis)
val ts1 = new Timestamp(15 * 1000) // a timestamp without the milliseconds part
+ val tss = new Timestamp(seconds)
checkEvaluation(Cast(ts, ShortType), 15)
checkEvaluation(Cast(ts, IntegerType), 15)
checkEvaluation(Cast(ts, LongType), 15)
checkEvaluation(Cast(ts, FloatType), 15.002f)
checkEvaluation(Cast(ts, DoubleType), 15.002)
- checkEvaluation(Cast(Cast(ts, ShortType), TimestampType), ts1)
- checkEvaluation(Cast(Cast(ts, IntegerType), TimestampType), ts1)
- checkEvaluation(Cast(Cast(ts, LongType), TimestampType), ts1)
+ checkEvaluation(Cast(Cast(tss, ShortType), TimestampType), ts)
+ checkEvaluation(Cast(Cast(tss, IntegerType), TimestampType), ts)
+ checkEvaluation(Cast(Cast(tss, LongType), TimestampType), ts)
checkEvaluation(Cast(Cast(millis.toFloat / 1000, TimestampType), FloatType),
millis.toFloat / 1000)
checkEvaluation(Cast(Cast(millis.toDouble / 1000, TimestampType), DoubleType),
http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #1-0-69fc614ccea92bbe39f4decc299edcc6
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #1-0-69fc614ccea92bbe39f4decc299edcc6 b/sql/hive/src/test/resources/golden/timestamp cast #1-0-69fc614ccea92bbe39f4decc299edcc6
new file mode 100644
index 0000000..8ebf695
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #1-0-69fc614ccea92bbe39f4decc299edcc6
@@ -0,0 +1 @@
+0.001
http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2 b/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2
new file mode 100644
index 0000000..5625e59
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2
@@ -0,0 +1 @@
+1.2
http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109 b/sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109
@@ -0,0 +1 @@
+1
http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2 b/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2
new file mode 100644
index 0000000..5625e59
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2
@@ -0,0 +1 @@
+1.2
http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247 b/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247
new file mode 100644
index 0000000..27de46f
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247
@@ -0,0 +1 @@
+-0.0010000000000000009
http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79 b/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79
new file mode 100644
index 0000000..1d94c8a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79
@@ -0,0 +1 @@
+-1.2
http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f b/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f
new file mode 100644
index 0000000..3fbedf6
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f
@@ -0,0 +1 @@
+-2
http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79 b/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79
new file mode 100644
index 0000000..1d94c8a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79
@@ -0,0 +1 @@
+-1.2
http://git-wip-us.apache.org/repos/asf/spark/blob/66bc0f2d/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 56bcd95..6fc891b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -303,6 +303,30 @@ class HiveQuerySuite extends HiveComparisonTest {
createQueryTest("case statements WITHOUT key #4",
"SELECT (CASE WHEN key > 2 THEN 3 WHEN 2 > key THEN 2 ELSE 0 END) FROM src WHERE key < 15")
+ createQueryTest("timestamp cast #1",
+ "SELECT CAST(CAST(1 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #2",
+ "SELECT CAST(CAST(1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #3",
+ "SELECT CAST(CAST(1200 AS TIMESTAMP) AS INT) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #4",
+ "SELECT CAST(CAST(1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #5",
+ "SELECT CAST(CAST(-1 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #6",
+ "SELECT CAST(CAST(-1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #7",
+ "SELECT CAST(CAST(-1200 AS TIMESTAMP) AS INT) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #8",
+ "SELECT CAST(CAST(-1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
test("implement identity function using case statement") {
val actual = sql("SELECT (CASE key WHEN key THEN key END) FROM src")
.map { case Row(i: Int) => i }
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org