You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/04/21 18:24:37 UTC
[spark] branch branch-3.0 updated: [SPARK-31503][SQL] fix the SQL
string of the TRIM functions
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new fc4b49c [SPARK-31503][SQL] fix the SQL string of the TRIM functions
fc4b49c is described below
commit fc4b49c603f9ddef45c6e08144de06f6e4786208
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Tue Apr 21 11:22:18 2020 -0700
[SPARK-31503][SQL] fix the SQL string of the TRIM functions
### What changes were proposed in this pull request?
override the `sql` method of `StringTrim`, `StringTrimLeft` and `StringTrimRight`, to use the standard SQL syntax.
### Why are the changes needed?
The current implementation is wrong. It gives you a SQL string that returns different result.
### Does this PR introduce any user-facing change?
No
### How was this patch tested?
new tests
Closes #28281 from cloud-fan/sql.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
(cherry picked from commit b209b5f40677ee7b241362e3805da9897ee942b4)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../catalyst/expressions/stringExpressions.scala | 30 +++++++++++-----------
.../sql-tests/inputs/string-functions.sql | 1 +
.../sql-tests/results/postgreSQL/strings.sql.out | 2 +-
.../sql-tests/results/string-functions.sql.out | 26 ++++++++++++-------
4 files changed, 34 insertions(+), 25 deletions(-)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 50a90ae..3723680 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -683,11 +683,22 @@ case class FindInSet(left: Expression, right: Expression) extends BinaryExpressi
trait String2TrimExpression extends Expression with ImplicitCastInputTypes {
+ protected def srcStr: Expression
+ protected def trimStr: Option[Expression]
+ protected def direction: String
+
+ override def children: Seq[Expression] = srcStr +: trimStr.toSeq
override def dataType: DataType = StringType
override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
override def nullable: Boolean = children.exists(_.nullable)
override def foldable: Boolean = children.forall(_.foldable)
+
+ override def sql: String = if (trimStr.isDefined) {
+ s"TRIM($direction ${trimStr.get.sql} FROM ${srcStr.sql})"
+ } else {
+ super.sql
+ }
}
object StringTrim {
@@ -769,11 +780,8 @@ case class StringTrim(
override def prettyName: String = "trim"
- override def children: Seq[Expression] = if (trimStr.isDefined) {
- srcStr :: trimStr.get :: Nil
- } else {
- srcStr :: Nil
- }
+ override protected def direction: String = "BOTH"
+
override def eval(input: InternalRow): Any = {
val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
if (srcString == null) {
@@ -865,11 +873,7 @@ case class StringTrimLeft(
override def prettyName: String = "ltrim"
- override def children: Seq[Expression] = if (trimStr.isDefined) {
- srcStr :: trimStr.get :: Nil
- } else {
- srcStr :: Nil
- }
+ override protected def direction: String = "LEADING"
override def eval(input: InternalRow): Any = {
val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
@@ -964,11 +968,7 @@ case class StringTrimRight(
override def prettyName: String = "rtrim"
- override def children: Seq[Expression] = if (trimStr.isDefined) {
- srcStr :: trimStr.get :: Nil
- } else {
- srcStr :: Nil
- }
+ override protected def direction: String = "TRAILING"
override def eval(input: InternalRow): Any = {
val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index fd6cc4d..8e33471 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -40,6 +40,7 @@ SELECT substring('Spark SQL' from -3);
SELECT substring('Spark SQL' from 5 for 1);
-- trim
+SELECT trim(" xyz "), ltrim(" xyz "), rtrim(" xyz ");
SELECT trim(BOTH 'xyz' FROM 'yxTomxx'), trim('xyz' FROM 'yxTomxx');
SELECT trim(BOTH 'x' FROM 'xxxbarxxx'), trim('x' FROM 'xxxbarxxx');
SELECT trim(LEADING 'xyz' FROM 'zzzytest');
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
index 5f89c79..e8a3a9b 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
@@ -977,7 +977,7 @@ struct<repeat(Pg, -4):string>
-- !query
SELECT trim(binary('\\000') from binary('\\000Tom\\000'))
-- !query schema
-struct<trim(CAST(CAST(\000Tom\000 AS BINARY) AS STRING), CAST(CAST(\000 AS BINARY) AS STRING)):string>
+struct<TRIM(BOTH CAST(CAST(\000 AS BINARY) AS STRING) FROM CAST(CAST(\000Tom\000 AS BINARY) AS STRING)):string>
-- !query output
Tom
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 042d332..43c18f5 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 33
+-- Number of queries: 34
-- !query
@@ -205,9 +205,17 @@ k
-- !query
+SELECT trim(" xyz "), ltrim(" xyz "), rtrim(" xyz ")
+-- !query schema
+struct<trim( xyz ):string,ltrim( xyz ):string,rtrim( xyz ):string>
+-- !query output
+xyz xyz xyz
+
+
+-- !query
SELECT trim(BOTH 'xyz' FROM 'yxTomxx'), trim('xyz' FROM 'yxTomxx')
-- !query schema
-struct<trim(yxTomxx, xyz):string,trim(yxTomxx, xyz):string>
+struct<TRIM(BOTH xyz FROM yxTomxx):string,TRIM(BOTH xyz FROM yxTomxx):string>
-- !query output
Tom Tom
@@ -215,7 +223,7 @@ Tom Tom
-- !query
SELECT trim(BOTH 'x' FROM 'xxxbarxxx'), trim('x' FROM 'xxxbarxxx')
-- !query schema
-struct<trim(xxxbarxxx, x):string,trim(xxxbarxxx, x):string>
+struct<TRIM(BOTH x FROM xxxbarxxx):string,TRIM(BOTH x FROM xxxbarxxx):string>
-- !query output
bar bar
@@ -223,7 +231,7 @@ bar bar
-- !query
SELECT trim(LEADING 'xyz' FROM 'zzzytest')
-- !query schema
-struct<ltrim(zzzytest, xyz):string>
+struct<TRIM(LEADING xyz FROM zzzytest):string>
-- !query output
test
@@ -231,7 +239,7 @@ test
-- !query
SELECT trim(LEADING 'xyz' FROM 'zzzytestxyz')
-- !query schema
-struct<ltrim(zzzytestxyz, xyz):string>
+struct<TRIM(LEADING xyz FROM zzzytestxyz):string>
-- !query output
testxyz
@@ -239,7 +247,7 @@ testxyz
-- !query
SELECT trim(LEADING 'xy' FROM 'xyxXxyLAST WORD')
-- !query schema
-struct<ltrim(xyxXxyLAST WORD, xy):string>
+struct<TRIM(LEADING xy FROM xyxXxyLAST WORD):string>
-- !query output
XxyLAST WORD
@@ -247,7 +255,7 @@ XxyLAST WORD
-- !query
SELECT trim(TRAILING 'xyz' FROM 'testxxzx')
-- !query schema
-struct<rtrim(testxxzx, xyz):string>
+struct<TRIM(TRAILING xyz FROM testxxzx):string>
-- !query output
test
@@ -255,7 +263,7 @@ test
-- !query
SELECT trim(TRAILING 'xyz' FROM 'xyztestxxzx')
-- !query schema
-struct<rtrim(xyztestxxzx, xyz):string>
+struct<TRIM(TRAILING xyz FROM xyztestxxzx):string>
-- !query output
xyztest
@@ -263,6 +271,6 @@ xyztest
-- !query
SELECT trim(TRAILING 'xy' FROM 'TURNERyxXxy')
-- !query schema
-struct<rtrim(TURNERyxXxy, xy):string>
+struct<TRIM(TRAILING xy FROM TURNERyxXxy):string>
-- !query output
TURNERyxX
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org