You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yh...@apache.org on 2016/04/09 22:54:33 UTC

spark git commit: [SPARK-14335][SQL] Describe function command returns wrong output

Repository: spark
Updated Branches:
  refs/heads/master f7ec854f1 -> cd2fed701


[SPARK-14335][SQL] Describe function command returns wrong output

## What changes were proposed in this pull request?

…because some of built-in functions are not in function registry.

This fix tries to fix issues in `describe function` command where some of the outputs
still shows Hive's function because some built-in functions are not in FunctionRegistry.

The following built-in functions have been added to FunctionRegistry:
```
-
!
*
/
&
%
^
+
<
<=
<=>
=
==
>
>=
|
~
and
in
like
not
or
rlike
when
```

The following listed functions are not added, but hard coded in `commands.scala` (hvanhovell):
```
!=
<>
between
case
```
Below are the existing result of the above functions that have not been added:
```
spark-sql> describe function `!=`;
Function: <>
Class: org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual
Usage: a <> b - Returns TRUE if a is not equal to b
```
```
spark-sql> describe function `<>`;
Function: <>
Class: org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual
Usage: a <> b - Returns TRUE if a is not equal to b
```
```
spark-sql> describe function `between`;
Function: between
Class: org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween
Usage: between a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c
```
```
spark-sql> describe function `case`;
Function: case
Class: org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase
Usage: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - When a = b, returns c; when a = d, return e; else return f
```

## How was this patch tested?

Existing tests passed. Additional test cases added.

Author: Yong Tang <yo...@outlook.com>

Closes #12128 from yongtang/SPARK-14335.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cd2fed70
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cd2fed70
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cd2fed70

Branch: refs/heads/master
Commit: cd2fed70129ba601f8c849a93eeb44a5d69c2402
Parents: f7ec854
Author: Yong Tang <yo...@outlook.com>
Authored: Sat Apr 9 13:54:30 2016 -0700
Committer: Yin Huai <yh...@databricks.com>
Committed: Sat Apr 9 13:54:30 2016 -0700

----------------------------------------------------------------------
 .../catalyst/analysis/FunctionRegistry.scala    | 33 ++++++++++++++-
 .../spark/sql/execution/command/commands.scala  | 44 ++++++++++++++------
 .../sql/hive/execution/SQLQuerySuite.scala      | 30 +++++++++----
 3 files changed, 86 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/cd2fed70/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index f239b33..f2abf13 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -171,6 +171,7 @@ object FunctionRegistry {
     expression[Rand]("rand"),
     expression[Randn]("randn"),
     expression[CreateStruct]("struct"),
+    expression[CaseWhen]("when"),
 
     // math functions
     expression[Acos]("acos"),
@@ -217,6 +218,12 @@ object FunctionRegistry {
     expression[Tan]("tan"),
     expression[Tanh]("tanh"),
 
+    expression[Add]("+"),
+    expression[Subtract]("-"),
+    expression[Multiply]("*"),
+    expression[Divide]("/"),
+    expression[Remainder]("%"),
+
     // aggregate functions
     expression[HyperLogLogPlusPlus]("approx_count_distinct"),
     expression[Average]("avg"),
@@ -257,6 +264,7 @@ object FunctionRegistry {
     expression[Lower]("lcase"),
     expression[Length]("length"),
     expression[Levenshtein]("levenshtein"),
+    expression[Like]("like"),
     expression[Lower]("lower"),
     expression[StringLocate]("locate"),
     expression[StringLPad]("lpad"),
@@ -267,6 +275,7 @@ object FunctionRegistry {
     expression[RegExpReplace]("regexp_replace"),
     expression[StringRepeat]("repeat"),
     expression[StringReverse]("reverse"),
+    expression[RLike]("rlike"),
     expression[StringRPad]("rpad"),
     expression[StringTrimRight]("rtrim"),
     expression[SoundEx]("soundex"),
@@ -343,7 +352,29 @@ object FunctionRegistry {
     expression[NTile]("ntile"),
     expression[Rank]("rank"),
     expression[DenseRank]("dense_rank"),
-    expression[PercentRank]("percent_rank")
+    expression[PercentRank]("percent_rank"),
+
+    // predicates
+    expression[And]("and"),
+    expression[In]("in"),
+    expression[Not]("not"),
+    expression[Or]("or"),
+
+    expression[EqualNullSafe]("<=>"),
+    expression[EqualTo]("="),
+    expression[EqualTo]("=="),
+    expression[GreaterThan](">"),
+    expression[GreaterThanOrEqual](">="),
+    expression[LessThan]("<"),
+    expression[LessThanOrEqual]("<="),
+    expression[Not]("!"),
+
+    // bitwise
+    expression[BitwiseAnd]("&"),
+    expression[BitwiseNot]("~"),
+    expression[BitwiseOr]("|"),
+    expression[BitwiseXor]("^")
+
   )
 
   val builtin: SimpleFunctionRegistry = {

http://git-wip-us.apache.org/repos/asf/spark/blob/cd2fed70/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index 3fd2a93..5d00c80 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -483,20 +483,38 @@ case class DescribeFunction(
   }
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
-    sqlContext.sessionState.functionRegistry.lookupFunction(functionName) match {
-      case Some(info) =>
-        val result =
-          Row(s"Function: ${info.getName}") ::
-          Row(s"Class: ${info.getClassName}") ::
-          Row(s"Usage: ${replaceFunctionName(info.getUsage(), info.getName)}") :: Nil
-
-        if (isExtended) {
-          result :+ Row(s"Extended Usage:\n${replaceFunctionName(info.getExtended, info.getName)}")
-        } else {
-          result
-        }
+    // Hard code "<>", "!=", "between", and "case" for now as there is no corresponding functions.
+    functionName.toLowerCase match {
+      case "<>" =>
+        Row(s"Function: $functionName") ::
+        Row(s"Usage: a <> b - Returns TRUE if a is not equal to b") :: Nil
+      case "!=" =>
+        Row(s"Function: $functionName") ::
+        Row(s"Usage: a != b - Returns TRUE if a is not equal to b") :: Nil
+      case "between" =>
+        Row(s"Function: between") ::
+        Row(s"Usage: a [NOT] BETWEEN b AND c - " +
+          s"evaluate if a is [not] in between b and c") :: Nil
+      case "case" =>
+        Row(s"Function: case") ::
+        Row(s"Usage: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - " +
+          s"When a = b, returns c; when a = d, return e; else return f") :: Nil
+      case _ => sqlContext.sessionState.functionRegistry.lookupFunction(functionName) match {
+        case Some(info) =>
+          val result =
+            Row(s"Function: ${info.getName}") ::
+            Row(s"Class: ${info.getClassName}") ::
+            Row(s"Usage: ${replaceFunctionName(info.getUsage(), info.getName)}") :: Nil
+
+          if (isExtended) {
+            result :+
+              Row(s"Extended Usage:\n${replaceFunctionName(info.getExtended, info.getName)}")
+          } else {
+            result
+          }
 
-      case None => Seq(Row(s"Function: $functionName not found."))
+        case None => Seq(Row(s"Function: $functionName not found."))
+      }
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/cd2fed70/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 14a1d4c..d7ec85c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -203,8 +203,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     checkAnswer(sql("SHOW functions abc.abs"), Row("abs"))
     checkAnswer(sql("SHOW functions `abc`.`abs`"), Row("abs"))
     checkAnswer(sql("SHOW functions `abc`.`abs`"), Row("abs"))
-    // TODO: Re-enable this test after we fix SPARK-14335.
-    // checkAnswer(sql("SHOW functions `~`"), Row("~"))
+    checkAnswer(sql("SHOW functions `~`"), Row("~"))
     checkAnswer(sql("SHOW functions `a function doens't exist`"), Nil)
     checkAnswer(sql("SHOW functions `weekofyea*`"), Row("weekofyear"))
     // this probably will failed if we add more function with `sha` prefixing.
@@ -236,11 +235,28 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     checkExistence(sql("describe functioN abcadf"), true,
       "Function: abcadf not found.")
 
-    // TODO: Re-enable this test after we fix SPARK-14335.
-    // checkExistence(sql("describe functioN  `~`"), true,
-    //  "Function: ~",
-    //  "Class: org.apache.hadoop.hive.ql.udf.UDFOPBitNot",
-    //  "Usage: ~ n - Bitwise not")
+    checkExistence(sql("describe functioN  `~`"), true,
+      "Function: ~",
+      "Class: org.apache.spark.sql.catalyst.expressions.BitwiseNot",
+      "Usage: To be added.")
+
+    // Hard coded describe functions
+    checkExistence(sql("describe function  `<>`"), true,
+      "Function: <>",
+      "Usage: a <> b - Returns TRUE if a is not equal to b")
+
+    checkExistence(sql("describe function  `!=`"), true,
+      "Function: !=",
+      "Usage: a != b - Returns TRUE if a is not equal to b")
+
+    checkExistence(sql("describe function  `between`"), true,
+      "Function: between",
+      "Usage: a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c")
+
+    checkExistence(sql("describe function  `case`"), true,
+      "Function: case",
+      "Usage: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - " +
+        "When a = b, returns c; when a = d, return e; else return f")
   }
 
   test("SPARK-5371: union with null and sum") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org