You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/02/02 03:51:45 UTC
spark git commit: [SPARK-5262] [SPARK-5244] [SQL] add coalesce in
SQLParser and widen types for parameters of coalesce
Repository: spark
Updated Branches:
refs/heads/master 1b56f1d6b -> 8cf4a1f02
[SPARK-5262] [SPARK-5244] [SQL] add coalesce in SQLParser and widen types for parameters of coalesce
I'll add test case in #4040
Author: Daoyuan Wang <da...@intel.com>
Closes #4057 from adrian-wang/coal and squashes the following commits:
4d0111a [Daoyuan Wang] address Yin's comments
c393e18 [Daoyuan Wang] fix rebase conflicts
e47c03a [Daoyuan Wang] add coalesce in parser
c74828d [Daoyuan Wang] cast types for coalesce
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8cf4a1f0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8cf4a1f0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8cf4a1f0
Branch: refs/heads/master
Commit: 8cf4a1f02e40f37f940f6a347c078f5879585bf4
Parents: 1b56f1d
Author: Daoyuan Wang <da...@intel.com>
Authored: Sun Feb 1 18:51:38 2015 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Sun Feb 1 18:51:38 2015 -0800
----------------------------------------------------------------------
.../apache/spark/sql/catalyst/SqlParser.scala | 2 ++
.../catalyst/analysis/HiveTypeCoercion.scala | 16 ++++++++++++
.../analysis/HiveTypeCoercionSuite.scala | 27 ++++++++++++++++++++
.../org/apache/spark/sql/SQLQuerySuite.scala | 12 +++++++++
.../org/apache/spark/sql/hive/HiveQl.scala | 2 ++
.../hive/execution/HiveTypeCoercionSuite.scala | 6 +++++
6 files changed, 65 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/8cf4a1f0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index 24a65f8..594a423 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -50,6 +50,7 @@ class SqlParser extends AbstractSparkSQLParser {
protected val CACHE = Keyword("CACHE")
protected val CASE = Keyword("CASE")
protected val CAST = Keyword("CAST")
+ protected val COALESCE = Keyword("COALESCE")
protected val COUNT = Keyword("COUNT")
protected val DECIMAL = Keyword("DECIMAL")
protected val DESC = Keyword("DESC")
@@ -295,6 +296,7 @@ class SqlParser extends AbstractSparkSQLParser {
{ case s ~ p => Substring(s, p, Literal(Integer.MAX_VALUE)) }
| (SUBSTR | SUBSTRING) ~ "(" ~> expression ~ ("," ~> expression) ~ ("," ~> expression) <~ ")" ^^
{ case s ~ p ~ l => Substring(s, p, l) }
+ | COALESCE ~ "(" ~> repsep(expression, ",") <~ ")" ^^ { case exprs => Coalesce(exprs) }
| SQRT ~ "(" ~> expression <~ ")" ^^ { case exp => Sqrt(exp) }
| ABS ~ "(" ~> expression <~ ")" ^^ { case exp => Abs(exp) }
| ident ~ ("(" ~> repsep(expression, ",")) <~ ")" ^^
http://git-wip-us.apache.org/repos/asf/spark/blob/8cf4a1f0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 6ef8577..34ef7d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -503,6 +503,22 @@ trait HiveTypeCoercion {
// Hive lets you do aggregation of timestamps... for some reason
case Sum(e @ TimestampType()) => Sum(Cast(e, DoubleType))
case Average(e @ TimestampType()) => Average(Cast(e, DoubleType))
+
+ // Coalesce should return the first non-null value, which could be any column
+ // from the list. So we need to make sure the return type is deterministic and
+ // compatible with every child column.
+ case Coalesce(es) if es.map(_.dataType).distinct.size > 1 =>
+ val dt: Option[DataType] = Some(NullType)
+ val types = es.map(_.dataType)
+ val rt = types.foldLeft(dt)((r, c) => r match {
+ case None => None
+ case Some(d) => findTightestCommonType(d, c)
+ })
+ rt match {
+ case Some(finaldt) => Coalesce(es.map(Cast(_, finaldt)))
+ case None =>
+ sys.error(s"Could not determine return type of Coalesce for ${types.mkString(",")}")
+ }
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/8cf4a1f0/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
index f5a502b..85798d0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
@@ -114,4 +114,31 @@ class HiveTypeCoercionSuite extends FunSuite {
// Stringify boolean when casting to string.
ruleTest(Cast(Literal(false), StringType), If(Literal(false), Literal("true"), Literal("false")))
}
+
+ test("coalesce casts") {
+ val fac = new HiveTypeCoercion { }.FunctionArgumentConversion
+ def ruleTest(initial: Expression, transformed: Expression) {
+ val testRelation = LocalRelation(AttributeReference("a", IntegerType)())
+ assert(fac(Project(Seq(Alias(initial, "a")()), testRelation)) ==
+ Project(Seq(Alias(transformed, "a")()), testRelation))
+ }
+ ruleTest(
+ Coalesce(Literal(1.0)
+ :: Literal(1)
+ :: Literal(1.0, FloatType)
+ :: Nil),
+ Coalesce(Cast(Literal(1.0), DoubleType)
+ :: Cast(Literal(1), DoubleType)
+ :: Cast(Literal(1.0, FloatType), DoubleType)
+ :: Nil))
+ ruleTest(
+ Coalesce(Literal(1L)
+ :: Literal(1)
+ :: Literal(new java.math.BigDecimal("1000000000000000000000"))
+ :: Nil),
+ Coalesce(Cast(Literal(1L), DecimalType())
+ :: Cast(Literal(1), DecimalType())
+ :: Cast(Literal(new java.math.BigDecimal("1000000000000000000000")), DecimalType())
+ :: Nil))
+ }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/8cf4a1f0/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index d684278..d82c343 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -88,6 +88,18 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
setConf(SQLConf.CODEGEN_ENABLED, originalValue.toString)
}
+ test("Add Parser of SQL COALESCE()") {
+ checkAnswer(
+ sql("""SELECT COALESCE(1, 2)"""),
+ Row(1))
+ checkAnswer(
+ sql("SELECT COALESCE(null, 1, 1.5)"),
+ Row(1.toDouble))
+ checkAnswer(
+ sql("SELECT COALESCE(null, null, null)"),
+ Row(null))
+ }
+
test("SPARK-3176 Added Parser of SQL LAST()") {
checkAnswer(
sql("SELECT LAST(n) FROM lowerCaseData"),
http://git-wip-us.apache.org/repos/asf/spark/blob/8cf4a1f0/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 399e58b..30a64b4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -965,6 +965,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
/* Case insensitive matches */
val ARRAY = "(?i)ARRAY".r
+ val COALESCE = "(?i)COALESCE".r
val COUNT = "(?i)COUNT".r
val AVG = "(?i)AVG".r
val SUM = "(?i)SUM".r
@@ -1140,6 +1141,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
Substring(nodeToExpr(string), nodeToExpr(pos), Literal(Integer.MAX_VALUE, IntegerType))
case Token("TOK_FUNCTION", Token(SUBSTR(), Nil) :: string :: pos :: length :: Nil) =>
Substring(nodeToExpr(string), nodeToExpr(pos), nodeToExpr(length))
+ case Token("TOK_FUNCTION", Token(COALESCE(), Nil) :: list) => Coalesce(list.map(nodeToExpr))
/* UDFs - Must be last otherwise will preempt built in functions */
case Token("TOK_FUNCTION", Token(name, Nil) :: args) =>
http://git-wip-us.apache.org/repos/asf/spark/blob/8cf4a1f0/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
index 48fffe5..ab0e044 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTypeCoercionSuite.scala
@@ -57,4 +57,10 @@ class HiveTypeCoercionSuite extends HiveComparisonTest {
}
assert(numEquals === 1)
}
+
+ test("COALESCE with different types") {
+ intercept[RuntimeException] {
+ TestHive.sql("""SELECT COALESCE(1, true, "abc") FROM src limit 1""").collect()
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org