You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/07/19 04:44:55 UTC

[spark] branch master updated: [SPARK-28277][SQL][PYTHON][TESTS] Convert and port 'except.sql' into UDF test base

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new cd676e9f [SPARK-28277][SQL][PYTHON][TESTS] Convert and port 'except.sql' into UDF test base
cd676e9f is described below

commit cd676e9f5e233668c6171f016bdc81e969726ba5
Author: Huaxin Gao <hu...@us.ibm.com>
AuthorDate: Fri Jul 19 13:44:26 2019 +0900

    [SPARK-28277][SQL][PYTHON][TESTS] Convert and port 'except.sql' into UDF test base
    
    ## What changes were proposed in this pull request?
    
    This PR adds some tests converted from ```except.sql``` to test UDFs. Please see contribution guide of this umbrella ticket - [SPARK-27921](https://issues.apache.org/jira/browse/SPARK-27921).
    <details><summary>Diff comparing to 'except.sql'</summary>
    <p>
    
    ```diff
    diff --git a/sql/core/src/test/resources/sql-tests/results/except.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
    index c9b712d4d2..27ca7ea226 100644
    --- a/sql/core/src/test/resources/sql-tests/results/except.sql.out
    +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
     -30,16 +30,16  struct<>
    
     -- !query 2
    -SELECT * FROM t1 EXCEPT SELECT * FROM t2
    +SELECT udf(k), udf(v) FROM t1 EXCEPT SELECT udf(k), udf(v) FROM t2
     -- !query 2 schema
    -struct<k:string,v:int>
    +struct<CAST(udf(cast(k as string)) AS STRING):string,CAST(udf(cast(v as string)) AS INT):int>
     -- !query 2 output
     three  3
     two    2
    
     -- !query 3
    -SELECT * FROM t1 EXCEPT SELECT * FROM t1 where v <> 1 and v <> 2
    +SELECT * FROM t1 EXCEPT SELECT * FROM t1 where udf(v) <> 1 and v <> udf(2)
     -- !query 3 schema
     struct<k:string,v:int>
     -- !query 3 output
     -49,7 +49,7  two   2
    
     -- !query 4
    -SELECT * FROM t1 where v <> 1 and v <> 22 EXCEPT SELECT * FROM t1 where v <> 2 and v >= 3
    +SELECT * FROM t1 where udf(v) <> 1 and v <> udf(22) EXCEPT SELECT * FROM t1 where udf(v) <> 2 and v >= udf(3)
     -- !query 4 schema
     struct<k:string,v:int>
     -- !query 4 output
     -59,7 +59,7  two   2
     -- !query 5
     SELECT t1.* FROM t1, t2 where t1.k = t2.k
     EXCEPT
    -SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != 'one'
    +SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != udf('one')
     -- !query 5 schema
     struct<k:string,v:int>
     -- !query 5 output
     -68,7 +68,7  one   NULL
    
     -- !query 6
    -SELECT * FROM t2 where v >= 1 and v <> 22 EXCEPT SELECT * FROM t1
    +SELECT * FROM t2 where v >= udf(1) and udf(v) <> 22 EXCEPT SELECT * FROM t1
     -- !query 6 schema
     struct<k:string,v:int>
     -- !query 6 output
     -77,9 +77,9  one   5
    
     -- !query 7
    -SELECT (SELECT min(k) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
    +SELECT (SELECT min(udf(k)) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
     MINUS
    -SELECT (SELECT min(k) FROM t2) abs_min_t2 FROM t1 WHERE  t1.k = 'one'
    +SELECT (SELECT udf(min(k)) FROM t2) abs_min_t2 FROM t1 WHERE  t1.k = udf('one')
     -- !query 7 schema
     struct<min_t2:string>
     -- !query 7 output
     -90,16 +90,17  two
     -- !query 8
     SELECT t1.k
     FROM   t1
    -WHERE  t1.v <= (SELECT   max(t2.v)
    +WHERE  t1.v <= (SELECT   udf(max(udf(t2.v)))
                     FROM     t2
    -                WHERE    t2.k = t1.k)
    +                WHERE    udf(t2.k) = udf(t1.k))
     MINUS
     SELECT t1.k
     FROM   t1
    -WHERE  t1.v >= (SELECT   min(t2.v)
    +WHERE  udf(t1.v) >= (SELECT   min(udf(t2.v))
                     FROM     t2
                     WHERE    t2.k = t1.k)
     -- !query 8 schema
    -struct<k:string>
    +struct<>
     -- !query 8 output
    -two
    +java.lang.UnsupportedOperationException
    +Cannot evaluate expression: udf(cast(null as string))
    ```
    
    </p>
    </details>
    
    ## How was this patch tested?
    Tested as guided in [SPARK-27921.](https://issues.apache.org/jira/browse/SPARK-27921)
    
    Closes #25101 from huaxingao/spark-28277.
    
    Authored-by: Huaxin Gao <hu...@us.ibm.com>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../resources/sql-tests/inputs/udf/udf-except.sql  | 59 +++++++++++++++
 .../sql-tests/results/udf/udf-except.sql.out       | 87 ++++++++++++++++++++++
 2 files changed, 146 insertions(+)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-except.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-except.sql
new file mode 100644
index 0000000..17e2488
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-except.sql
@@ -0,0 +1,59 @@
+-- This test file was converted from except.sql.
+-- Tests different scenarios of except operation
+create temporary view t1 as select * from values
+  ("one", 1),
+  ("two", 2),
+  ("three", 3),
+  ("one", NULL)
+  as t1(k, v);
+
+create temporary view t2 as select * from values
+  ("one", 1),
+  ("two", 22),
+  ("one", 5),
+  ("one", NULL),
+  (NULL, 5)
+  as t2(k, v);
+
+
+-- Except operation that will be replaced by left anti join
+SELECT udf(k), udf(v) FROM t1 EXCEPT SELECT udf(k), udf(v) FROM t2;
+
+
+-- Except operation that will be replaced by Filter: SPARK-22181
+SELECT * FROM t1 EXCEPT SELECT * FROM t1 where udf(v) <> 1 and v <> udf(2);
+
+
+-- Except operation that will be replaced by Filter: SPARK-22181
+SELECT * FROM t1 where udf(v) <> 1 and v <> udf(22) EXCEPT SELECT * FROM t1 where udf(v) <> 2 and v >= udf(3);
+
+
+-- Except operation that will be replaced by Filter: SPARK-22181
+SELECT t1.* FROM t1, t2 where t1.k = t2.k
+EXCEPT
+SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != udf('one');
+
+
+-- Except operation that will be replaced by left anti join
+SELECT * FROM t2 where v >= udf(1) and udf(v) <> 22 EXCEPT SELECT * FROM t1;
+
+
+-- Except operation that will be replaced by left anti join
+SELECT (SELECT min(udf(k)) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
+MINUS
+SELECT (SELECT udf(min(k)) FROM t2) abs_min_t2 FROM t1 WHERE  t1.k = udf('one');
+
+
+-- Except operation that will be replaced by left anti join
+--- [SPARK-28441] udf(max(udf(column))) throws java.lang.UnsupportedOperationException: Cannot evaluate expression: udf(null)
+--- SELECT t1.k
+--- FROM   t1
+--- WHERE  t1.v <= (SELECT   udf(max(udf(t2.v)))
+---                 FROM     t2
+---                 WHERE    udf(t2.k) = udf(t1.k))
+--- MINUS
+--- SELECT t1.k
+--- FROM   t1
+--- WHERE  udf(t1.v) >= (SELECT   min(udf(t2.v))
+---                 FROM     t2
+---                 WHERE    t2.k = t1.k);
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
new file mode 100644
index 0000000..cb8a4e8
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
@@ -0,0 +1,87 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 8
+
+
+-- !query 0
+create temporary view t1 as select * from values
+  ("one", 1),
+  ("two", 2),
+  ("three", 3),
+  ("one", NULL)
+  as t1(k, v)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+create temporary view t2 as select * from values
+  ("one", 1),
+  ("two", 22),
+  ("one", 5),
+  ("one", NULL),
+  (NULL, 5)
+  as t2(k, v)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+SELECT udf(k), udf(v) FROM t1 EXCEPT SELECT udf(k), udf(v) FROM t2
+-- !query 2 schema
+struct<CAST(udf(cast(k as string)) AS STRING):string,CAST(udf(cast(v as string)) AS INT):int>
+-- !query 2 output
+three	3
+two	2
+
+
+-- !query 3
+SELECT * FROM t1 EXCEPT SELECT * FROM t1 where udf(v) <> 1 and v <> udf(2)
+-- !query 3 schema
+struct<k:string,v:int>
+-- !query 3 output
+one	1
+one	NULL
+two	2
+
+
+-- !query 4
+SELECT * FROM t1 where udf(v) <> 1 and v <> udf(22) EXCEPT SELECT * FROM t1 where udf(v) <> 2 and v >= udf(3)
+-- !query 4 schema
+struct<k:string,v:int>
+-- !query 4 output
+two	2
+
+
+-- !query 5
+SELECT t1.* FROM t1, t2 where t1.k = t2.k
+EXCEPT
+SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != udf('one')
+-- !query 5 schema
+struct<k:string,v:int>
+-- !query 5 output
+one	1
+one	NULL
+
+
+-- !query 6
+SELECT * FROM t2 where v >= udf(1) and udf(v) <> 22 EXCEPT SELECT * FROM t1
+-- !query 6 schema
+struct<k:string,v:int>
+-- !query 6 output
+NULL	5
+one	5
+
+
+-- !query 7
+SELECT (SELECT min(udf(k)) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
+MINUS
+SELECT (SELECT udf(min(k)) FROM t2) abs_min_t2 FROM t1 WHERE  t1.k = udf('one')
+-- !query 7 schema
+struct<min_t2:string>
+-- !query 7 output
+NULL
+two


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org