You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/07/11 00:57:52 UTC
[spark] branch master updated: [SPARK-28281][SQL][PYTHON][TESTS]
Convert and port 'having.sql' into UDF test base
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 3a94fb3 [SPARK-28281][SQL][PYTHON][TESTS] Convert and port 'having.sql' into UDF test base
3a94fb3 is described below
commit 3a94fb3dd92a05676a3c11cbcea314dd296ec059
Author: Huaxin Gao <hu...@us.ibm.com>
AuthorDate: Thu Jul 11 09:57:34 2019 +0900
[SPARK-28281][SQL][PYTHON][TESTS] Convert and port 'having.sql' into UDF test base
## What changes were proposed in this pull request?
This PR adds some tests converted from having.sql to test UDFs following the combination guide in [SPARK-27921](url)
<details><summary>Diff comparing to 'having.sql'</summary>
<p>
```diff
diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
index d87ee52216..7cea2e5128 100644
--- a/sql/core/src/test/resources/sql-tests/results/having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
-16,34 +16,34 struct<>
-- !query 1
-SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2
+SELECT udf(k) AS k, udf(sum(v)) FROM hav GROUP BY k HAVING udf(sum(v)) > 2
-- !query 1 schema
-struct<k:string,sum(v):bigint>
+struct<k:string,udf(sum(cast(v as bigint))):string>
-- !query 1 output
one 6
three 3
-- !query 2
-SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2
+SELECT udf(count(udf(k))) FROM hav GROUP BY v + 1 HAVING v + 1 = udf(2)
-- !query 2 schema
-struct<count(k):bigint>
+struct<udf(count(udf(k))):string>
-- !query 2 output
1
-- !query 3
-SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0)
+SELECT udf(MIN(t.v)) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(udf(COUNT(udf(1))) > 0)
-- !query 3 schema
-struct<min(v):int>
+struct<udf(min(v)):string>
-- !query 3 output
1
-- !query 4
-SELECT a + b FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > 1
+SELECT udf(a + b) FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > udf(1)
-- !query 4 schema
-struct<(a + CAST(b AS BIGINT)):bigint>
+struct<udf((a + cast(b as bigint))):string>
-- !query 4 output
3
7
```
</p>
</details>
## How was this patch tested?
Tested as guided in SPARK-27921.
Closes #25093 from huaxingao/spark-28281.
Authored-by: Huaxin Gao <hu...@us.ibm.com>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
.../resources/sql-tests/inputs/udf/udf-having.sql | 22 ++++++++++
.../sql-tests/results/udf/udf-having.sql.out | 49 ++++++++++++++++++++++
2 files changed, 71 insertions(+)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-having.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-having.sql
new file mode 100644
index 0000000..6ae34ae
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-having.sql
@@ -0,0 +1,22 @@
+-- This test file was converted from having.sql.
+-- Note that currently registered UDF returns a string. So there are some differences, for instance
+-- in string cast within UDF in Scala and Python.
+
+create temporary view hav as select * from values
+ ("one", 1),
+ ("two", 2),
+ ("three", 3),
+ ("one", 5)
+ as hav(k, v);
+
+-- having clause
+SELECT udf(k) AS k, udf(sum(v)) FROM hav GROUP BY k HAVING udf(sum(v)) > 2;
+
+-- having condition contains grouping column
+SELECT udf(count(udf(k))) FROM hav GROUP BY v + 1 HAVING v + 1 = udf(2);
+
+-- SPARK-11032: resolve having correctly
+SELECT udf(MIN(t.v)) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(udf(COUNT(udf(1))) > 0);
+
+-- SPARK-20329: make sure we handle timezones correctly
+SELECT udf(a + b) FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > udf(1);
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
new file mode 100644
index 0000000..7cea2e5
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
@@ -0,0 +1,49 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 5
+
+
+-- !query 0
+create temporary view hav as select * from values
+ ("one", 1),
+ ("two", 2),
+ ("three", 3),
+ ("one", 5)
+ as hav(k, v)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT udf(k) AS k, udf(sum(v)) FROM hav GROUP BY k HAVING udf(sum(v)) > 2
+-- !query 1 schema
+struct<k:string,udf(sum(cast(v as bigint))):string>
+-- !query 1 output
+one 6
+three 3
+
+
+-- !query 2
+SELECT udf(count(udf(k))) FROM hav GROUP BY v + 1 HAVING v + 1 = udf(2)
+-- !query 2 schema
+struct<udf(count(udf(k))):string>
+-- !query 2 output
+1
+
+
+-- !query 3
+SELECT udf(MIN(t.v)) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(udf(COUNT(udf(1))) > 0)
+-- !query 3 schema
+struct<udf(min(v)):string>
+-- !query 3 output
+1
+
+
+-- !query 4
+SELECT udf(a + b) FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > udf(1)
+-- !query 4 schema
+struct<udf((a + cast(b as bigint))):string>
+-- !query 4 output
+3
+7
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org