You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/07/11 00:57:52 UTC

[spark] branch master updated: [SPARK-28281][SQL][PYTHON][TESTS] Convert and port 'having.sql' into UDF test base

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 3a94fb3  [SPARK-28281][SQL][PYTHON][TESTS] Convert and port 'having.sql' into UDF test base
3a94fb3 is described below

commit 3a94fb3dd92a05676a3c11cbcea314dd296ec059
Author: Huaxin Gao <hu...@us.ibm.com>
AuthorDate: Thu Jul 11 09:57:34 2019 +0900

    [SPARK-28281][SQL][PYTHON][TESTS] Convert and port 'having.sql' into UDF test base
    
    ## What changes were proposed in this pull request?
    
    This PR adds some tests converted from having.sql to test UDFs following the combination guide in [SPARK-27921](url)
    <details><summary>Diff comparing to 'having.sql'</summary>
    <p>
    
    ```diff
    diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
    index d87ee52216..7cea2e5128 100644
    --- a/sql/core/src/test/resources/sql-tests/results/having.sql.out
    +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
     -16,34 +16,34  struct<>
    
     -- !query 1
    -SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2
    +SELECT udf(k) AS k, udf(sum(v)) FROM hav GROUP BY k HAVING udf(sum(v)) > 2
     -- !query 1 schema
    -struct<k:string,sum(v):bigint>
    +struct<k:string,udf(sum(cast(v as bigint))):string>
     -- !query 1 output
     one    6
     three  3
    
     -- !query 2
    -SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2
    +SELECT udf(count(udf(k))) FROM hav GROUP BY v + 1 HAVING v + 1 = udf(2)
     -- !query 2 schema
    -struct<count(k):bigint>
    +struct<udf(count(udf(k))):string>
     -- !query 2 output
     1
    
     -- !query 3
    -SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0)
    +SELECT udf(MIN(t.v)) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(udf(COUNT(udf(1))) > 0)
     -- !query 3 schema
    -struct<min(v):int>
    +struct<udf(min(v)):string>
     -- !query 3 output
     1
    
     -- !query 4
    -SELECT a + b FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > 1
    +SELECT udf(a + b) FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > udf(1)
     -- !query 4 schema
    -struct<(a + CAST(b AS BIGINT)):bigint>
    +struct<udf((a + cast(b as bigint))):string>
     -- !query 4 output
     3
     7
    
    ```
    
    </p>
    </details>
    
    ## How was this patch tested?
    
    Tested as guided in SPARK-27921.
    
    Closes #25093 from huaxingao/spark-28281.
    
    Authored-by: Huaxin Gao <hu...@us.ibm.com>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../resources/sql-tests/inputs/udf/udf-having.sql  | 22 ++++++++++
 .../sql-tests/results/udf/udf-having.sql.out       | 49 ++++++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-having.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-having.sql
new file mode 100644
index 0000000..6ae34ae
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-having.sql
@@ -0,0 +1,22 @@
+-- This test file was converted from having.sql.
+-- Note that currently registered UDF returns a string. So there are some differences, for instance
+-- in string cast within UDF in Scala and Python.
+
+create temporary view hav as select * from values
+  ("one", 1),
+  ("two", 2),
+  ("three", 3),
+  ("one", 5)
+  as hav(k, v);
+
+-- having clause
+SELECT udf(k) AS k, udf(sum(v)) FROM hav GROUP BY k HAVING udf(sum(v)) > 2;
+
+-- having condition contains grouping column
+SELECT udf(count(udf(k))) FROM hav GROUP BY v + 1 HAVING v + 1 = udf(2);
+
+-- SPARK-11032: resolve having correctly
+SELECT udf(MIN(t.v)) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(udf(COUNT(udf(1))) > 0);
+
+-- SPARK-20329: make sure we handle timezones correctly
+SELECT udf(a + b) FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > udf(1);
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
new file mode 100644
index 0000000..7cea2e5
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
@@ -0,0 +1,49 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 5
+
+
+-- !query 0
+create temporary view hav as select * from values
+  ("one", 1),
+  ("two", 2),
+  ("three", 3),
+  ("one", 5)
+  as hav(k, v)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT udf(k) AS k, udf(sum(v)) FROM hav GROUP BY k HAVING udf(sum(v)) > 2
+-- !query 1 schema
+struct<k:string,udf(sum(cast(v as bigint))):string>
+-- !query 1 output
+one	6
+three	3
+
+
+-- !query 2
+SELECT udf(count(udf(k))) FROM hav GROUP BY v + 1 HAVING v + 1 = udf(2)
+-- !query 2 schema
+struct<udf(count(udf(k))):string>
+-- !query 2 output
+1
+
+
+-- !query 3
+SELECT udf(MIN(t.v)) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(udf(COUNT(udf(1))) > 0)
+-- !query 3 schema
+struct<udf(min(v)):string>
+-- !query 3 output
+1
+
+
+-- !query 4
+SELECT udf(a + b) FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > udf(1)
+-- !query 4 schema
+struct<udf((a + cast(b as bigint))):string>
+-- !query 4 output
+3
+7


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org