You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/07/15 07:20:38 UTC
[spark] branch master updated: [SPARK-28392][SQL][TESTS] Add traits
for UDF and PostgreSQL tests to share initialization
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a7a02a8 [SPARK-28392][SQL][TESTS] Add traits for UDF and PostgreSQL tests to share initialization
a7a02a8 is described below
commit a7a02a86adafd3808051d843cf7e70176a7c4099
Author: HyukjinKwon <gu...@apache.org>
AuthorDate: Mon Jul 15 16:20:09 2019 +0900
[SPARK-28392][SQL][TESTS] Add traits for UDF and PostgreSQL tests to share initialization
## What changes were proposed in this pull request?
This PR adds some traits so that we can deduplicate initialization stuff for each type of test case. For instance, see [SPARK-28343](https://issues.apache.org/jira/browse/SPARK-28343).
It's a little bit overkill but I think it will make adding test cases easier and cause less confusions.
This PR adds both:
```
private trait PgSQLTest
private trait UDFTest
```
To indicate and share the logics related to each combination of test types.
## How was this patch tested?
Manually tested.
Closes #25155 from HyukjinKwon/SPARK-28392.
Authored-by: HyukjinKwon <gu...@apache.org>
Signed-off-by: HyukjinKwon <gu...@apache.org>
---
.../sql-tests/inputs/udf/pgSQL/udf-case.sql | 5 -
.../sql-tests/results/udf/pgSQL/udf-case.sql.out | 190 ++++++++++-----------
.../org/apache/spark/sql/SQLQueryTestSuite.scala | 56 ++++--
3 files changed, 129 insertions(+), 122 deletions(-)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql
index b05c21d..a2aab79 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql
@@ -6,14 +6,10 @@
-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/case.sql
-- Test the CASE statement
--
--- This test suite contains two Cartesian products without using explicit CROSS JOIN syntax.
--- Thus, we set spark.sql.crossJoin.enabled to true.
-
-- This test file was converted from pgSQL/case.sql.
-- Note that currently registered UDF returns a string. So there are some differences, for instance
-- in string cast within UDF in Scala and Python.
-set spark.sql.crossJoin.enabled=true;
CREATE TABLE CASE_TBL (
i integer,
f double
@@ -269,4 +265,3 @@ SELECT CASE
DROP TABLE CASE_TBL;
DROP TABLE CASE2_TBL;
-set spark.sql.crossJoin.enabled=false;
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out
index 55bef64..6bb7a78 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out
@@ -1,19 +1,22 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 37
+-- Number of queries: 35
-- !query 0
-set spark.sql.crossJoin.enabled=true
+CREATE TABLE CASE_TBL (
+ i integer,
+ f double
+) USING parquet
-- !query 0 schema
-struct<key:string,value:string>
+struct<>
-- !query 0 output
-spark.sql.crossJoin.enabled true
+
-- !query 1
-CREATE TABLE CASE_TBL (
+CREATE TABLE CASE2_TBL (
i integer,
- f double
+ j integer
) USING parquet
-- !query 1 schema
struct<>
@@ -22,10 +25,7 @@ struct<>
-- !query 2
-CREATE TABLE CASE2_TBL (
- i integer,
- j integer
-) USING parquet
+INSERT INTO CASE_TBL VALUES (1, 10.1)
-- !query 2 schema
struct<>
-- !query 2 output
@@ -33,7 +33,7 @@ struct<>
-- !query 3
-INSERT INTO CASE_TBL VALUES (1, 10.1)
+INSERT INTO CASE_TBL VALUES (2, 20.2)
-- !query 3 schema
struct<>
-- !query 3 output
@@ -41,7 +41,7 @@ struct<>
-- !query 4
-INSERT INTO CASE_TBL VALUES (2, 20.2)
+INSERT INTO CASE_TBL VALUES (3, -30.3)
-- !query 4 schema
struct<>
-- !query 4 output
@@ -49,7 +49,7 @@ struct<>
-- !query 5
-INSERT INTO CASE_TBL VALUES (3, -30.3)
+INSERT INTO CASE_TBL VALUES (4, NULL)
-- !query 5 schema
struct<>
-- !query 5 output
@@ -57,7 +57,7 @@ struct<>
-- !query 6
-INSERT INTO CASE_TBL VALUES (4, NULL)
+INSERT INTO CASE2_TBL VALUES (1, -1)
-- !query 6 schema
struct<>
-- !query 6 output
@@ -65,7 +65,7 @@ struct<>
-- !query 7
-INSERT INTO CASE2_TBL VALUES (1, -1)
+INSERT INTO CASE2_TBL VALUES (2, -2)
-- !query 7 schema
struct<>
-- !query 7 output
@@ -73,7 +73,7 @@ struct<>
-- !query 8
-INSERT INTO CASE2_TBL VALUES (2, -2)
+INSERT INTO CASE2_TBL VALUES (3, -3)
-- !query 8 schema
struct<>
-- !query 8 output
@@ -81,7 +81,7 @@ struct<>
-- !query 9
-INSERT INTO CASE2_TBL VALUES (3, -3)
+INSERT INTO CASE2_TBL VALUES (2, -4)
-- !query 9 schema
struct<>
-- !query 9 output
@@ -89,7 +89,7 @@ struct<>
-- !query 10
-INSERT INTO CASE2_TBL VALUES (2, -4)
+INSERT INTO CASE2_TBL VALUES (1, NULL)
-- !query 10 schema
struct<>
-- !query 10 output
@@ -97,7 +97,7 @@ struct<>
-- !query 11
-INSERT INTO CASE2_TBL VALUES (1, NULL)
+INSERT INTO CASE2_TBL VALUES (NULL, -6)
-- !query 11 schema
struct<>
-- !query 11 output
@@ -105,148 +105,140 @@ struct<>
-- !query 12
-INSERT INTO CASE2_TBL VALUES (NULL, -6)
--- !query 12 schema
-struct<>
--- !query 12 output
-
-
-
--- !query 13
SELECT '3' AS `One`,
CASE
WHEN CAST(udf(1 < 2) AS boolean) THEN 3
END AS `Simple WHEN`
--- !query 13 schema
+-- !query 12 schema
struct<One:string,Simple WHEN:int>
--- !query 13 output
+-- !query 12 output
3 3
--- !query 14
+-- !query 13
SELECT '<NULL>' AS `One`,
CASE
WHEN 1 > 2 THEN udf(3)
END AS `Simple default`
--- !query 14 schema
+-- !query 13 schema
struct<One:string,Simple default:string>
--- !query 14 output
+-- !query 13 output
<NULL> NULL
--- !query 15
+-- !query 14
SELECT '3' AS `One`,
CASE
WHEN udf(1) < 2 THEN udf(3)
ELSE udf(4)
END AS `Simple ELSE`
--- !query 15 schema
+-- !query 14 schema
struct<One:string,Simple ELSE:string>
--- !query 15 output
+-- !query 14 output
3 3
--- !query 16
+-- !query 15
SELECT udf('4') AS `One`,
CASE
WHEN 1 > 2 THEN 3
ELSE 4
END AS `ELSE default`
--- !query 16 schema
+-- !query 15 schema
struct<One:string,ELSE default:int>
--- !query 16 output
+-- !query 15 output
4 4
--- !query 17
+-- !query 16
SELECT udf('6') AS `One`,
CASE
WHEN CAST(udf(1 > 2) AS boolean) THEN 3
WHEN udf(4) < 5 THEN 6
ELSE 7
END AS `Two WHEN with default`
--- !query 17 schema
+-- !query 16 schema
struct<One:string,Two WHEN with default:int>
--- !query 17 output
+-- !query 16 output
6 6
--- !query 18
+-- !query 17
SELECT '7' AS `None`,
CASE WHEN rand() < udf(0) THEN 1
END AS `NULL on no matches`
--- !query 18 schema
+-- !query 17 schema
struct<None:string,NULL on no matches:int>
--- !query 18 output
+-- !query 17 output
7 NULL
--- !query 19
+-- !query 18
SELECT CASE WHEN CAST(udf(1=0) AS boolean) THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END
--- !query 19 schema
+-- !query 18 schema
struct<CASE WHEN CAST(udf((1 = 0)) AS BOOLEAN) THEN (CAST(1 AS DOUBLE) / CAST(0 AS DOUBLE)) WHEN (1 = 1) THEN CAST(1 AS DOUBLE) ELSE (CAST(2 AS DOUBLE) / CAST(0 AS DOUBLE)) END:double>
--- !query 19 output
+-- !query 18 output
1.0
--- !query 20
+-- !query 19
SELECT CASE 1 WHEN 0 THEN 1/udf(0) WHEN 1 THEN 1 ELSE 2/0 END
--- !query 20 schema
+-- !query 19 schema
struct<CASE WHEN (1 = 0) THEN (CAST(1 AS DOUBLE) / CAST(CAST(udf(0) AS DOUBLE) AS DOUBLE)) WHEN (1 = 1) THEN CAST(1 AS DOUBLE) ELSE (CAST(2 AS DOUBLE) / CAST(0 AS DOUBLE)) END:double>
--- !query 20 output
+-- !query 19 output
1.0
--- !query 21
+-- !query 20
SELECT CASE WHEN i > 100 THEN udf(1/0) ELSE udf(0) END FROM case_tbl
--- !query 21 schema
+-- !query 20 schema
struct<CASE WHEN (i > 100) THEN udf((cast(1 as double) / cast(0 as double))) ELSE udf(0) END:string>
--- !query 21 output
+-- !query 20 output
0
0
0
0
--- !query 22
+-- !query 21
SELECT CASE 'a' WHEN 'a' THEN udf(1) ELSE udf(2) END
--- !query 22 schema
+-- !query 21 schema
struct<CASE WHEN (a = a) THEN udf(1) ELSE udf(2) END:string>
--- !query 22 output
+-- !query 21 output
1
--- !query 23
+-- !query 22
SELECT '' AS `Five`,
CASE
WHEN i >= 3 THEN i
END AS `>= 3 or Null`
FROM CASE_TBL
--- !query 23 schema
+-- !query 22 schema
struct<Five:string,>= 3 or Null:int>
--- !query 23 output
+-- !query 22 output
3
4
NULL
NULL
--- !query 24
+-- !query 23
SELECT '' AS `Five`,
CASE WHEN i >= 3 THEN (i + i)
ELSE i
END AS `Simplest Math`
FROM CASE_TBL
--- !query 24 schema
+-- !query 23 schema
struct<Five:string,Simplest Math:int>
--- !query 24 output
+-- !query 23 output
1
2
6
8
--- !query 25
+-- !query 24
SELECT '' AS `Five`, i AS `Value`,
CASE WHEN (i < 0) THEN 'small'
WHEN (i = 0) THEN 'zero'
@@ -255,16 +247,16 @@ SELECT '' AS `Five`, i AS `Value`,
ELSE 'big'
END AS `Category`
FROM CASE_TBL
--- !query 25 schema
+-- !query 24 schema
struct<Five:string,Value:int,Category:string>
--- !query 25 output
+-- !query 24 output
1 one
2 two
3 big
4 big
--- !query 26
+-- !query 25
SELECT '' AS `Five`,
CASE WHEN ((i < 0) or (i < 0)) THEN 'small'
WHEN ((i = 0) or (i = 0)) THEN 'zero'
@@ -273,37 +265,37 @@ SELECT '' AS `Five`,
ELSE 'big'
END AS `Category`
FROM CASE_TBL
--- !query 26 schema
+-- !query 25 schema
struct<Five:string,Category:string>
--- !query 26 output
+-- !query 25 output
big
big
one
two
--- !query 27
+-- !query 26
SELECT * FROM CASE_TBL WHERE udf(COALESCE(f,i)) = 4
--- !query 27 schema
+-- !query 26 schema
struct<i:int,f:double>
--- !query 27 output
+-- !query 26 output
4 NULL
--- !query 28
+-- !query 27
SELECT * FROM CASE_TBL WHERE udf(NULLIF(f,i)) = 2
--- !query 28 schema
+-- !query 27 schema
struct<i:int,f:double>
--- !query 28 output
+-- !query 27 output
--- !query 29
+-- !query 28
SELECT udf(COALESCE(a.f, b.i, b.j))
FROM CASE_TBL a, CASE2_TBL b
--- !query 29 schema
+-- !query 28 schema
struct<udf(coalesce(f, cast(i as double), cast(j as double))):string>
--- !query 29 output
+-- !query 28 output
-30.3
-30.3
-30.3
@@ -330,24 +322,24 @@ struct<udf(coalesce(f, cast(i as double), cast(j as double))):string>
3.0
--- !query 30
+-- !query 29
SELECT *
FROM CASE_TBL a, CASE2_TBL b
WHERE udf(COALESCE(a.f, b.i, b.j)) = 2
--- !query 30 schema
+-- !query 29 schema
struct<i:int,f:double,i:int,j:int>
--- !query 30 output
+-- !query 29 output
4 NULL 2 -2
4 NULL 2 -4
--- !query 31
+-- !query 30
SELECT udf('') AS Five, NULLIF(a.i,b.i) AS `NULLIF(a.i,b.i)`,
NULLIF(b.i, 4) AS `NULLIF(b.i,4)`
FROM CASE_TBL a, CASE2_TBL b
--- !query 31 schema
+-- !query 30 schema
struct<Five:string,NULLIF(a.i,b.i):int,NULLIF(b.i,4):int>
--- !query 31 output
+-- !query 30 output
1 2
1 2
1 3
@@ -374,18 +366,18 @@ struct<Five:string,NULLIF(a.i,b.i):int,NULLIF(b.i,4):int>
NULL 3
--- !query 32
+-- !query 31
SELECT '' AS `Two`, *
FROM CASE_TBL a, CASE2_TBL b
WHERE CAST(udf(COALESCE(f,b.i) = 2) AS boolean)
--- !query 32 schema
+-- !query 31 schema
struct<Two:string,i:int,f:double,i:int,j:int>
--- !query 32 output
+-- !query 31 output
4 NULL 2 -2
4 NULL 2 -4
--- !query 33
+-- !query 32
SELECT CASE
(CASE vol('bar')
WHEN udf('foo') THEN 'it was foo!'
@@ -395,31 +387,23 @@ SELECT CASE
WHEN udf('it was foo!') THEN 'foo recognized'
WHEN 'it was bar!' THEN udf('bar recognized')
ELSE 'unrecognized' END AS col
--- !query 33 schema
+-- !query 32 schema
struct<col:string>
--- !query 33 output
+-- !query 32 output
bar recognized
--- !query 34
+-- !query 33
DROP TABLE CASE_TBL
--- !query 34 schema
+-- !query 33 schema
struct<>
--- !query 34 output
+-- !query 33 output
--- !query 35
+-- !query 34
DROP TABLE CASE2_TBL
--- !query 35 schema
+-- !query 34 schema
struct<>
--- !query 35 output
-
-
+-- !query 34 output
--- !query 36
-set spark.sql.crossJoin.enabled=false
--- !query 36 schema
-struct<key:string,value:string>
--- !query 36 output
-spark.sql.crossJoin.enabled false
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 6e40fcf..1085f03 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -151,17 +151,37 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
val resultFile: String
}
+ /**
+ * traits that indicate UDF or PgSQL to trigger the code path specific to each. For instance,
+ * PgSQL tests require to register some UDF functions.
+ */
+ private trait PgSQLTest
+
+ private trait UDFTest {
+ val udf: TestUDF
+ }
+
/** A regular test case. */
private case class RegularTestCase(
name: String, inputFile: String, resultFile: String) extends TestCase
/** A PostgreSQL test case. */
private case class PgSQLTestCase(
- name: String, inputFile: String, resultFile: String) extends TestCase
+ name: String, inputFile: String, resultFile: String) extends TestCase with PgSQLTest
/** A UDF test case. */
private case class UDFTestCase(
- name: String, inputFile: String, resultFile: String, udf: TestUDF) extends TestCase
+ name: String,
+ inputFile: String,
+ resultFile: String,
+ udf: TestUDF) extends TestCase with UDFTest
+
+ /** A UDF PostgreSQL test case. */
+ private case class UDFPgSQLTestCase(
+ name: String,
+ inputFile: String,
+ resultFile: String,
+ udf: TestUDF) extends TestCase with UDFTest with PgSQLTest
private def createScalaTestCase(testCase: TestCase): Unit = {
if (blackList.exists(t =>
@@ -169,12 +189,14 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
// Create a test case to ignore this case.
ignore(testCase.name) { /* Do nothing */ }
} else testCase match {
- case UDFTestCase(_, _, _, udf: TestPythonUDF) if !shouldTestPythonUDFs =>
+ case udfTestCase: UDFTest
+ if udfTestCase.udf.isInstanceOf[TestPythonUDF] && !shouldTestPythonUDFs =>
ignore(s"${testCase.name} is skipped because " +
s"[$pythonExec] and/or pyspark were not available.") {
/* Do nothing */
}
- case UDFTestCase(_, _, _, udf: TestScalarPandasUDF) if !shouldTestScalarPandasUDFs =>
+ case udfTestCase: UDFTest
+ if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && !shouldTestScalarPandasUDFs =>
ignore(s"${testCase.name} is skipped because pyspark," +
s"pandas and/or pyarrow were not available in [$pythonExec].") {
/* Do nothing */
@@ -254,12 +276,15 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
// This does not isolate catalog changes.
val localSparkSession = spark.newSession()
loadTestData(localSparkSession)
+
testCase match {
- case udfTestCase: UDFTestCase =>
- // vol used by udf-case.sql.
- localSparkSession.udf.register("vol", (s: String) => s)
+ case udfTestCase: UDFTest =>
registerTestUDF(udfTestCase.udf, localSparkSession)
- case _: PgSQLTestCase =>
+ case _ =>
+ }
+
+ testCase match {
+ case _: PgSQLTest =>
// booleq/boolne used by boolean.sql
localSparkSession.udf.register("booleq", (b1: Boolean, b2: Boolean) => b1 == b2)
localSparkSession.udf.register("boolne", (b1: Boolean, b2: Boolean) => b1 != b2)
@@ -268,7 +293,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
// PostgreSQL enabled cartesian product by default.
localSparkSession.conf.set(SQLConf.CROSS_JOINS_ENABLED.key, true)
localSparkSession.conf.set(SQLConf.ANSI_SQL_PARSER.key, true)
- case _ => // Don't add UDFs in Regular tests.
+ case _ =>
}
if (configSet.isDefined) {
@@ -388,13 +413,16 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
val absPath = file.getAbsolutePath
val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator)
- if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udf")) {
+ if (file.getAbsolutePath.startsWith(
+ s"$inputFilePath${File.separator}udf${File.separator}pgSQL")) {
+ Seq(TestScalaUDF("udf"), TestPythonUDF("udf"), TestScalarPandasUDF("udf")).map { udf =>
+ UDFPgSQLTestCase(
+ s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
+ }
+ } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udf")) {
Seq(TestScalaUDF("udf"), TestPythonUDF("udf"), TestScalarPandasUDF("udf")).map { udf =>
UDFTestCase(
- s"$testCaseName - ${udf.prettyName}",
- absPath,
- resultFile,
- udf)
+ s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
}
} else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}pgSQL")) {
PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org