You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/04/18 21:26:59 UTC
spark git commit: [SPARK-14580][SPARK-14655][SQL] Hive IfCoercion
should preserve predicate.
Repository: spark
Updated Branches:
refs/heads/master b64482f49 -> d280d1da1
[SPARK-14580][SPARK-14655][SQL] Hive IfCoercion should preserve predicate.
## What changes were proposed in this pull request?
Currently, `HiveTypeCoercion.IfCoercion` removes all predicates whose return-type are null. However, some UDFs need evaluations because they are designed to throw exceptions. This PR fixes that to preserve the predicates. Also, `assert_true` is implemented as Spark SQL function.
**Before**
```
scala> sql("select if(assert_true(false),2,3)").head
res2: org.apache.spark.sql.Row = [3]
```
**After**
```
scala> sql("select if(assert_true(false),2,3)").head
... ASSERT_TRUE ...
```
**Hive**
```
hive> select if(assert_true(false),2,3);
OK
Failed with exception java.io.IOException:org.apache.hadoop.hive.ql.metadata.HiveException: ASSERT_TRUE(): assertion failed.
```
## How was this patch tested?
Pass the Jenkins tests (including a new testcase in `HivePlanTest`)
Author: Dongjoon Hyun <do...@apache.org>
Closes #12340 from dongjoon-hyun/SPARK-14580.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d280d1da
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d280d1da
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d280d1da
Branch: refs/heads/master
Commit: d280d1da1aec925687a0bfb496f3a6e0979e896f
Parents: b64482f
Author: Dongjoon Hyun <do...@apache.org>
Authored: Mon Apr 18 12:26:56 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Mon Apr 18 12:26:56 2016 -0700
----------------------------------------------------------------------
.../catalyst/analysis/FunctionRegistry.scala | 1 +
.../catalyst/analysis/HiveTypeCoercion.scala | 6 ++--
.../spark/sql/catalyst/expressions/misc.scala | 38 ++++++++++++++++++++
.../analysis/HiveTypeCoercionSuite.scala | 15 +++++---
.../expressions/MiscFunctionsSuite.scala | 17 +++++++++
5 files changed, 70 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/d280d1da/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index ed19191..a444300 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -329,6 +329,7 @@ object FunctionRegistry {
expression[SortArray]("sort_array"),
// misc functions
+ expression[AssertTrue]("assert_true"),
expression[Crc32]("crc32"),
expression[Md5]("md5"),
expression[Murmur3Hash]("hash"),
http://git-wip-us.apache.org/repos/asf/spark/blob/d280d1da/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 823d249..5323b79 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -584,10 +584,10 @@ object HiveTypeCoercion {
val newRight = if (right.dataType == widestType) right else Cast(right, widestType)
If(pred, newLeft, newRight)
}.getOrElse(i) // If there is no applicable conversion, leave expression unchanged.
- // Convert If(null literal, _, _) into boolean type.
- // In the optimizer, we should short-circuit this directly into false value.
- case If(pred, left, right) if pred.dataType == NullType =>
+ case If(Literal(null, NullType), left, right) =>
If(Literal.create(null, BooleanType), left, right)
+ case If(pred, left, right) if pred.dataType == NullType =>
+ If(Cast(pred, BooleanType), left, right)
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/d280d1da/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 113fc86..f2f0c2d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -487,6 +487,44 @@ case class PrintToStderr(child: Expression) extends UnaryExpression {
}
/**
+ * A function throws an exception if 'condition' is not true.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(condition) - Throw an exception if 'condition' is not true.")
+case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
+
+ override def nullable: Boolean = true
+
+ override def inputTypes: Seq[DataType] = Seq(BooleanType)
+
+ override def dataType: DataType = NullType
+
+ override def prettyName: String = "assert_true"
+
+ override def eval(input: InternalRow) : Any = {
+ val v = child.eval(input)
+ if (v == null || java.lang.Boolean.FALSE.equals(v)) {
+ throw new RuntimeException(s"'${child.simpleString}' is not true!")
+ } else {
+ null
+ }
+ }
+
+ override def genCode(ctx: CodegenContext, ev: ExprCode): String = {
+ val eval = child.gen(ctx)
+ ev.isNull = "true"
+ ev.value = "null"
+ s"""${eval.code}
+ |if (${eval.isNull} || !${eval.value}) {
+ | throw new RuntimeException("'${child.simpleString}' is not true.");
+ |}
+ """.stripMargin
+ }
+
+ override def sql: String = s"assert_true(${child.sql})"
+}
+
+/**
* A xxHash64 64-bit hash expression.
*/
case class XxHash64(children: Seq[Expression], seed: Long) extends HashExpression[Long] {
http://git-wip-us.apache.org/repos/asf/spark/blob/d280d1da/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
index 883ef48..18de8b1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
@@ -348,15 +348,22 @@ class HiveTypeCoercionSuite extends PlanTest {
test("type coercion for If") {
val rule = HiveTypeCoercion.IfCoercion
+
ruleTest(rule,
If(Literal(true), Literal(1), Literal(1L)),
- If(Literal(true), Cast(Literal(1), LongType), Literal(1L))
- )
+ If(Literal(true), Cast(Literal(1), LongType), Literal(1L)))
ruleTest(rule,
If(Literal.create(null, NullType), Literal(1), Literal(1)),
- If(Literal.create(null, BooleanType), Literal(1), Literal(1))
- )
+ If(Literal.create(null, BooleanType), Literal(1), Literal(1)))
+
+ ruleTest(rule,
+ If(AssertTrue(Literal.create(true, BooleanType)), Literal(1), Literal(2)),
+ If(Cast(AssertTrue(Literal.create(true, BooleanType)), BooleanType), Literal(1), Literal(2)))
+
+ ruleTest(rule,
+ If(AssertTrue(Literal.create(false, BooleanType)), Literal(1), Literal(2)),
+ If(Cast(AssertTrue(Literal.create(false, BooleanType)), BooleanType), Literal(1), Literal(2)))
}
test("type coercion for CaseKeyWhen") {
http://git-wip-us.apache.org/repos/asf/spark/blob/d280d1da/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala
index f5bafcc..56de822 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala
@@ -69,6 +69,23 @@ class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkConsistencyBetweenInterpretedAndCodegen(Crc32, BinaryType)
}
+ test("assert_true") {
+ intercept[RuntimeException] {
+ checkEvaluation(AssertTrue(Literal(false, BooleanType)), null)
+ }
+ intercept[RuntimeException] {
+ checkEvaluation(AssertTrue(Cast(Literal(0), BooleanType)), null)
+ }
+ intercept[RuntimeException] {
+ checkEvaluation(AssertTrue(Literal.create(null, NullType)), null)
+ }
+ intercept[RuntimeException] {
+ checkEvaluation(AssertTrue(Literal.create(null, BooleanType)), null)
+ }
+ checkEvaluation(AssertTrue(Literal(true, BooleanType)), null)
+ checkEvaluation(AssertTrue(Cast(Literal(1), BooleanType)), null)
+ }
+
private val structOfString = new StructType().add("str", StringType)
private val structOfUDT = new StructType().add("udt", new ExamplePointUDT, false)
private val arrayOfString = ArrayType(StringType)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org