You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/04/21 18:24:37 UTC

[spark] branch branch-3.0 updated: [SPARK-31503][SQL] fix the SQL string of the TRIM functions

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new fc4b49c  [SPARK-31503][SQL] fix the SQL string of the TRIM functions
fc4b49c is described below

commit fc4b49c603f9ddef45c6e08144de06f6e4786208
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Tue Apr 21 11:22:18 2020 -0700

    [SPARK-31503][SQL] fix the SQL string of the TRIM functions
    
    ### What changes were proposed in this pull request?
    
    override the `sql` method of `StringTrim`, `StringTrimLeft` and `StringTrimRight`, to use the standard SQL syntax.
    
    ### Why are the changes needed?
    
    The current implementation is wrong. It gives you a SQL string that returns different result.
    
    ### Does this PR introduce any user-facing change?
    
    No
    
    ### How was this patch tested?
    
    new tests
    
    Closes #28281 from cloud-fan/sql.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
    (cherry picked from commit b209b5f40677ee7b241362e3805da9897ee942b4)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../catalyst/expressions/stringExpressions.scala   | 30 +++++++++++-----------
 .../sql-tests/inputs/string-functions.sql          |  1 +
 .../sql-tests/results/postgreSQL/strings.sql.out   |  2 +-
 .../sql-tests/results/string-functions.sql.out     | 26 ++++++++++++-------
 4 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 50a90ae..3723680 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -683,11 +683,22 @@ case class FindInSet(left: Expression, right: Expression) extends BinaryExpressi
 
 trait String2TrimExpression extends Expression with ImplicitCastInputTypes {
 
+  protected def srcStr: Expression
+  protected def trimStr: Option[Expression]
+  protected def direction: String
+
+  override def children: Seq[Expression] = srcStr +: trimStr.toSeq
   override def dataType: DataType = StringType
   override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
 
   override def nullable: Boolean = children.exists(_.nullable)
   override def foldable: Boolean = children.forall(_.foldable)
+
+  override def sql: String = if (trimStr.isDefined) {
+    s"TRIM($direction ${trimStr.get.sql} FROM ${srcStr.sql})"
+  } else {
+    super.sql
+  }
 }
 
 object StringTrim {
@@ -769,11 +780,8 @@ case class StringTrim(
 
   override def prettyName: String = "trim"
 
-  override def children: Seq[Expression] = if (trimStr.isDefined) {
-    srcStr :: trimStr.get :: Nil
-  } else {
-    srcStr :: Nil
-  }
+  override protected def direction: String = "BOTH"
+
   override def eval(input: InternalRow): Any = {
     val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
     if (srcString == null) {
@@ -865,11 +873,7 @@ case class StringTrimLeft(
 
   override def prettyName: String = "ltrim"
 
-  override def children: Seq[Expression] = if (trimStr.isDefined) {
-    srcStr :: trimStr.get :: Nil
-  } else {
-    srcStr :: Nil
-  }
+  override protected def direction: String = "LEADING"
 
   override def eval(input: InternalRow): Any = {
     val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
@@ -964,11 +968,7 @@ case class StringTrimRight(
 
   override def prettyName: String = "rtrim"
 
-  override def children: Seq[Expression] = if (trimStr.isDefined) {
-    srcStr :: trimStr.get :: Nil
-  } else {
-    srcStr :: Nil
-  }
+  override protected def direction: String = "TRAILING"
 
   override def eval(input: InternalRow): Any = {
     val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index fd6cc4d..8e33471 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -40,6 +40,7 @@ SELECT substring('Spark SQL' from -3);
 SELECT substring('Spark SQL' from 5 for 1);
 
 -- trim
+SELECT trim(" xyz "), ltrim(" xyz "), rtrim(" xyz ");
 SELECT trim(BOTH 'xyz' FROM 'yxTomxx'), trim('xyz' FROM 'yxTomxx');
 SELECT trim(BOTH 'x' FROM 'xxxbarxxx'), trim('x' FROM 'xxxbarxxx');
 SELECT trim(LEADING 'xyz' FROM 'zzzytest');
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
index 5f89c79..e8a3a9b 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
@@ -977,7 +977,7 @@ struct<repeat(Pg, -4):string>
 -- !query
 SELECT trim(binary('\\000') from binary('\\000Tom\\000'))
 -- !query schema
-struct<trim(CAST(CAST(\000Tom\000 AS BINARY) AS STRING), CAST(CAST(\000 AS BINARY) AS STRING)):string>
+struct<TRIM(BOTH CAST(CAST(\000 AS BINARY) AS STRING) FROM CAST(CAST(\000Tom\000 AS BINARY) AS STRING)):string>
 -- !query output
 Tom
 
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 042d332..43c18f5 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 33
+-- Number of queries: 34
 
 
 -- !query
@@ -205,9 +205,17 @@ k
 
 
 -- !query
+SELECT trim(" xyz "), ltrim(" xyz "), rtrim(" xyz ")
+-- !query schema
+struct<trim( xyz ):string,ltrim( xyz ):string,rtrim( xyz ):string>
+-- !query output
+xyz	xyz 	 xyz
+
+
+-- !query
 SELECT trim(BOTH 'xyz' FROM 'yxTomxx'), trim('xyz' FROM 'yxTomxx')
 -- !query schema
-struct<trim(yxTomxx, xyz):string,trim(yxTomxx, xyz):string>
+struct<TRIM(BOTH xyz FROM yxTomxx):string,TRIM(BOTH xyz FROM yxTomxx):string>
 -- !query output
 Tom	Tom
 
@@ -215,7 +223,7 @@ Tom	Tom
 -- !query
 SELECT trim(BOTH 'x' FROM 'xxxbarxxx'), trim('x' FROM 'xxxbarxxx')
 -- !query schema
-struct<trim(xxxbarxxx, x):string,trim(xxxbarxxx, x):string>
+struct<TRIM(BOTH x FROM xxxbarxxx):string,TRIM(BOTH x FROM xxxbarxxx):string>
 -- !query output
 bar	bar
 
@@ -223,7 +231,7 @@ bar	bar
 -- !query
 SELECT trim(LEADING 'xyz' FROM 'zzzytest')
 -- !query schema
-struct<ltrim(zzzytest, xyz):string>
+struct<TRIM(LEADING xyz FROM zzzytest):string>
 -- !query output
 test
 
@@ -231,7 +239,7 @@ test
 -- !query
 SELECT trim(LEADING 'xyz' FROM 'zzzytestxyz')
 -- !query schema
-struct<ltrim(zzzytestxyz, xyz):string>
+struct<TRIM(LEADING xyz FROM zzzytestxyz):string>
 -- !query output
 testxyz
 
@@ -239,7 +247,7 @@ testxyz
 -- !query
 SELECT trim(LEADING 'xy' FROM 'xyxXxyLAST WORD')
 -- !query schema
-struct<ltrim(xyxXxyLAST WORD, xy):string>
+struct<TRIM(LEADING xy FROM xyxXxyLAST WORD):string>
 -- !query output
 XxyLAST WORD
 
@@ -247,7 +255,7 @@ XxyLAST WORD
 -- !query
 SELECT trim(TRAILING 'xyz' FROM 'testxxzx')
 -- !query schema
-struct<rtrim(testxxzx, xyz):string>
+struct<TRIM(TRAILING xyz FROM testxxzx):string>
 -- !query output
 test
 
@@ -255,7 +263,7 @@ test
 -- !query
 SELECT trim(TRAILING 'xyz' FROM 'xyztestxxzx')
 -- !query schema
-struct<rtrim(xyztestxxzx, xyz):string>
+struct<TRIM(TRAILING xyz FROM xyztestxxzx):string>
 -- !query output
 xyztest
 
@@ -263,6 +271,6 @@ xyztest
 -- !query
 SELECT trim(TRAILING 'xy' FROM 'TURNERyxXxy')
 -- !query schema
-struct<rtrim(TURNERyxXxy, xy):string>
+struct<TRIM(TRAILING xy FROM TURNERyxXxy):string>
 -- !query output
 TURNERyxX


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org