You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/07/19 04:44:55 UTC
[spark] branch master updated: [SPARK-28277][SQL][PYTHON][TESTS]
Convert and port 'except.sql' into UDF test base
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new cd676e9f [SPARK-28277][SQL][PYTHON][TESTS] Convert and port 'except.sql' into UDF test base
cd676e9f is described below
commit cd676e9f5e233668c6171f016bdc81e969726ba5
Author: Huaxin Gao <hu...@us.ibm.com>
AuthorDate: Fri Jul 19 13:44:26 2019 +0900
[SPARK-28277][SQL][PYTHON][TESTS] Convert and port 'except.sql' into UDF test base
## What changes were proposed in this pull request?
This PR adds some tests converted from ```except.sql``` to test UDFs. Please see contribution guide of this umbrella ticket - [SPARK-27921](https://issues.apache.org/jira/browse/SPARK-27921).
<details><summary>Diff comparing to 'except.sql'</summary>
<p>
```diff
diff --git a/sql/core/src/test/resources/sql-tests/results/except.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
index c9b712d4d2..27ca7ea226 100644
--- a/sql/core/src/test/resources/sql-tests/results/except.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
-30,16 +30,16 struct<>
-- !query 2
-SELECT * FROM t1 EXCEPT SELECT * FROM t2
+SELECT udf(k), udf(v) FROM t1 EXCEPT SELECT udf(k), udf(v) FROM t2
-- !query 2 schema
-struct<k:string,v:int>
+struct<CAST(udf(cast(k as string)) AS STRING):string,CAST(udf(cast(v as string)) AS INT):int>
-- !query 2 output
three 3
two 2
-- !query 3
-SELECT * FROM t1 EXCEPT SELECT * FROM t1 where v <> 1 and v <> 2
+SELECT * FROM t1 EXCEPT SELECT * FROM t1 where udf(v) <> 1 and v <> udf(2)
-- !query 3 schema
struct<k:string,v:int>
-- !query 3 output
-49,7 +49,7 two 2
-- !query 4
-SELECT * FROM t1 where v <> 1 and v <> 22 EXCEPT SELECT * FROM t1 where v <> 2 and v >= 3
+SELECT * FROM t1 where udf(v) <> 1 and v <> udf(22) EXCEPT SELECT * FROM t1 where udf(v) <> 2 and v >= udf(3)
-- !query 4 schema
struct<k:string,v:int>
-- !query 4 output
-59,7 +59,7 two 2
-- !query 5
SELECT t1.* FROM t1, t2 where t1.k = t2.k
EXCEPT
-SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != 'one'
+SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != udf('one')
-- !query 5 schema
struct<k:string,v:int>
-- !query 5 output
-68,7 +68,7 one NULL
-- !query 6
-SELECT * FROM t2 where v >= 1 and v <> 22 EXCEPT SELECT * FROM t1
+SELECT * FROM t2 where v >= udf(1) and udf(v) <> 22 EXCEPT SELECT * FROM t1
-- !query 6 schema
struct<k:string,v:int>
-- !query 6 output
-77,9 +77,9 one 5
-- !query 7
-SELECT (SELECT min(k) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
+SELECT (SELECT min(udf(k)) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
MINUS
-SELECT (SELECT min(k) FROM t2) abs_min_t2 FROM t1 WHERE t1.k = 'one'
+SELECT (SELECT udf(min(k)) FROM t2) abs_min_t2 FROM t1 WHERE t1.k = udf('one')
-- !query 7 schema
struct<min_t2:string>
-- !query 7 output
-90,16 +90,17 two
-- !query 8
SELECT t1.k
FROM t1
-WHERE t1.v <= (SELECT max(t2.v)
+WHERE t1.v <= (SELECT udf(max(udf(t2.v)))
FROM t2
- WHERE t2.k = t1.k)
+ WHERE udf(t2.k) = udf(t1.k))
MINUS
SELECT t1.k
FROM t1
-WHERE t1.v >= (SELECT min(t2.v)
+WHERE udf(t1.v) >= (SELECT min(udf(t2.v))
FROM t2
WHERE t2.k = t1.k)
-- !query 8 schema
-struct<k:string>
+struct<>
-- !query 8 output
-two
+java.lang.UnsupportedOperationException
+Cannot evaluate expression: udf(cast(null as string))
```
</p>
</details>
## How was this patch tested?
Tested as guided in [SPARK-27921.](https://issues.apache.org/jira/browse/SPARK-27921)
Closes #25101 from huaxingao/spark-28277.
Authored-by: Huaxin Gao <hu...@us.ibm.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
.../resources/sql-tests/inputs/udf/udf-except.sql | 59 +++++++++++++++
.../sql-tests/results/udf/udf-except.sql.out | 87 ++++++++++++++++++++++
2 files changed, 146 insertions(+)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-except.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-except.sql
new file mode 100644
index 0000000..17e2488
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-except.sql
@@ -0,0 +1,59 @@
+-- This test file was converted from except.sql.
+-- Tests different scenarios of except operation
+create temporary view t1 as select * from values
+ ("one", 1),
+ ("two", 2),
+ ("three", 3),
+ ("one", NULL)
+ as t1(k, v);
+
+create temporary view t2 as select * from values
+ ("one", 1),
+ ("two", 22),
+ ("one", 5),
+ ("one", NULL),
+ (NULL, 5)
+ as t2(k, v);
+
+
+-- Except operation that will be replaced by left anti join
+SELECT udf(k), udf(v) FROM t1 EXCEPT SELECT udf(k), udf(v) FROM t2;
+
+
+-- Except operation that will be replaced by Filter: SPARK-22181
+SELECT * FROM t1 EXCEPT SELECT * FROM t1 where udf(v) <> 1 and v <> udf(2);
+
+
+-- Except operation that will be replaced by Filter: SPARK-22181
+SELECT * FROM t1 where udf(v) <> 1 and v <> udf(22) EXCEPT SELECT * FROM t1 where udf(v) <> 2 and v >= udf(3);
+
+
+-- Except operation that will be replaced by Filter: SPARK-22181
+SELECT t1.* FROM t1, t2 where t1.k = t2.k
+EXCEPT
+SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != udf('one');
+
+
+-- Except operation that will be replaced by left anti join
+SELECT * FROM t2 where v >= udf(1) and udf(v) <> 22 EXCEPT SELECT * FROM t1;
+
+
+-- Except operation that will be replaced by left anti join
+SELECT (SELECT min(udf(k)) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
+MINUS
+SELECT (SELECT udf(min(k)) FROM t2) abs_min_t2 FROM t1 WHERE t1.k = udf('one');
+
+
+-- Except operation that will be replaced by left anti join
+--- [SPARK-28441] udf(max(udf(column))) throws java.lang.UnsupportedOperationException: Cannot evaluate expression: udf(null)
+--- SELECT t1.k
+--- FROM t1
+--- WHERE t1.v <= (SELECT udf(max(udf(t2.v)))
+--- FROM t2
+--- WHERE udf(t2.k) = udf(t1.k))
+--- MINUS
+--- SELECT t1.k
+--- FROM t1
+--- WHERE udf(t1.v) >= (SELECT min(udf(t2.v))
+--- FROM t2
+--- WHERE t2.k = t1.k);
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
new file mode 100644
index 0000000..cb8a4e8
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
@@ -0,0 +1,87 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 8
+
+
+-- !query 0
+create temporary view t1 as select * from values
+ ("one", 1),
+ ("two", 2),
+ ("three", 3),
+ ("one", NULL)
+ as t1(k, v)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+create temporary view t2 as select * from values
+ ("one", 1),
+ ("two", 22),
+ ("one", 5),
+ ("one", NULL),
+ (NULL, 5)
+ as t2(k, v)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+SELECT udf(k), udf(v) FROM t1 EXCEPT SELECT udf(k), udf(v) FROM t2
+-- !query 2 schema
+struct<CAST(udf(cast(k as string)) AS STRING):string,CAST(udf(cast(v as string)) AS INT):int>
+-- !query 2 output
+three 3
+two 2
+
+
+-- !query 3
+SELECT * FROM t1 EXCEPT SELECT * FROM t1 where udf(v) <> 1 and v <> udf(2)
+-- !query 3 schema
+struct<k:string,v:int>
+-- !query 3 output
+one 1
+one NULL
+two 2
+
+
+-- !query 4
+SELECT * FROM t1 where udf(v) <> 1 and v <> udf(22) EXCEPT SELECT * FROM t1 where udf(v) <> 2 and v >= udf(3)
+-- !query 4 schema
+struct<k:string,v:int>
+-- !query 4 output
+two 2
+
+
+-- !query 5
+SELECT t1.* FROM t1, t2 where t1.k = t2.k
+EXCEPT
+SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != udf('one')
+-- !query 5 schema
+struct<k:string,v:int>
+-- !query 5 output
+one 1
+one NULL
+
+
+-- !query 6
+SELECT * FROM t2 where v >= udf(1) and udf(v) <> 22 EXCEPT SELECT * FROM t1
+-- !query 6 schema
+struct<k:string,v:int>
+-- !query 6 output
+NULL 5
+one 5
+
+
+-- !query 7
+SELECT (SELECT min(udf(k)) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
+MINUS
+SELECT (SELECT udf(min(k)) FROM t2) abs_min_t2 FROM t1 WHERE t1.k = udf('one')
+-- !query 7 schema
+struct<min_t2:string>
+-- !query 7 output
+NULL
+two
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org