You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2019/07/15 07:20:38 UTC

[spark] branch master updated: [SPARK-28392][SQL][TESTS] Add traits for UDF and PostgreSQL tests to share initialization

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new a7a02a8  [SPARK-28392][SQL][TESTS] Add traits for UDF and PostgreSQL tests to share initialization
a7a02a8 is described below

commit a7a02a86adafd3808051d843cf7e70176a7c4099
Author: HyukjinKwon <gu...@apache.org>
AuthorDate: Mon Jul 15 16:20:09 2019 +0900

    [SPARK-28392][SQL][TESTS] Add traits for UDF and PostgreSQL tests to share initialization
    
    ## What changes were proposed in this pull request?
    
    This PR adds some traits so that we can deduplicate initialization stuff for each type of test case. For instance, see [SPARK-28343](https://issues.apache.org/jira/browse/SPARK-28343).
    
    It's a little bit overkill but I think it will make adding test cases easier and cause less confusions.
    
    This PR adds both:
    
    ```
      private trait PgSQLTest
      private trait UDFTest
    ```
    
    To indicate and share the logics related to each combination of test types.
    
    ## How was this patch tested?
    
    Manually tested.
    
    Closes #25155 from HyukjinKwon/SPARK-28392.
    
    Authored-by: HyukjinKwon <gu...@apache.org>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 .../sql-tests/inputs/udf/pgSQL/udf-case.sql        |   5 -
 .../sql-tests/results/udf/pgSQL/udf-case.sql.out   | 190 ++++++++++-----------
 .../org/apache/spark/sql/SQLQueryTestSuite.scala   |  56 ++++--
 3 files changed, 129 insertions(+), 122 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql
index b05c21d..a2aab79 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql
@@ -6,14 +6,10 @@
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/case.sql
 -- Test the CASE statement
 --
--- This test suite contains two Cartesian products without using explicit CROSS JOIN syntax.
--- Thus, we set spark.sql.crossJoin.enabled to true.
-
 -- This test file was converted from pgSQL/case.sql.
 -- Note that currently registered UDF returns a string. So there are some differences, for instance
 -- in string cast within UDF in Scala and Python.
 
-set spark.sql.crossJoin.enabled=true;
 CREATE TABLE CASE_TBL (
   i integer,
   f double
@@ -269,4 +265,3 @@ SELECT CASE
 
 DROP TABLE CASE_TBL;
 DROP TABLE CASE2_TBL;
-set spark.sql.crossJoin.enabled=false;
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out
index 55bef64..6bb7a78 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out
@@ -1,19 +1,22 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 37
+-- Number of queries: 35
 
 
 -- !query 0
-set spark.sql.crossJoin.enabled=true
+CREATE TABLE CASE_TBL (
+  i integer,
+  f double
+) USING parquet
 -- !query 0 schema
-struct<key:string,value:string>
+struct<>
 -- !query 0 output
-spark.sql.crossJoin.enabled	true
+
 
 
 -- !query 1
-CREATE TABLE CASE_TBL (
+CREATE TABLE CASE2_TBL (
   i integer,
-  f double
+  j integer
 ) USING parquet
 -- !query 1 schema
 struct<>
@@ -22,10 +25,7 @@ struct<>
 
 
 -- !query 2
-CREATE TABLE CASE2_TBL (
-  i integer,
-  j integer
-) USING parquet
+INSERT INTO CASE_TBL VALUES (1, 10.1)
 -- !query 2 schema
 struct<>
 -- !query 2 output
@@ -33,7 +33,7 @@ struct<>
 
 
 -- !query 3
-INSERT INTO CASE_TBL VALUES (1, 10.1)
+INSERT INTO CASE_TBL VALUES (2, 20.2)
 -- !query 3 schema
 struct<>
 -- !query 3 output
@@ -41,7 +41,7 @@ struct<>
 
 
 -- !query 4
-INSERT INTO CASE_TBL VALUES (2, 20.2)
+INSERT INTO CASE_TBL VALUES (3, -30.3)
 -- !query 4 schema
 struct<>
 -- !query 4 output
@@ -49,7 +49,7 @@ struct<>
 
 
 -- !query 5
-INSERT INTO CASE_TBL VALUES (3, -30.3)
+INSERT INTO CASE_TBL VALUES (4, NULL)
 -- !query 5 schema
 struct<>
 -- !query 5 output
@@ -57,7 +57,7 @@ struct<>
 
 
 -- !query 6
-INSERT INTO CASE_TBL VALUES (4, NULL)
+INSERT INTO CASE2_TBL VALUES (1, -1)
 -- !query 6 schema
 struct<>
 -- !query 6 output
@@ -65,7 +65,7 @@ struct<>
 
 
 -- !query 7
-INSERT INTO CASE2_TBL VALUES (1, -1)
+INSERT INTO CASE2_TBL VALUES (2, -2)
 -- !query 7 schema
 struct<>
 -- !query 7 output
@@ -73,7 +73,7 @@ struct<>
 
 
 -- !query 8
-INSERT INTO CASE2_TBL VALUES (2, -2)
+INSERT INTO CASE2_TBL VALUES (3, -3)
 -- !query 8 schema
 struct<>
 -- !query 8 output
@@ -81,7 +81,7 @@ struct<>
 
 
 -- !query 9
-INSERT INTO CASE2_TBL VALUES (3, -3)
+INSERT INTO CASE2_TBL VALUES (2, -4)
 -- !query 9 schema
 struct<>
 -- !query 9 output
@@ -89,7 +89,7 @@ struct<>
 
 
 -- !query 10
-INSERT INTO CASE2_TBL VALUES (2, -4)
+INSERT INTO CASE2_TBL VALUES (1, NULL)
 -- !query 10 schema
 struct<>
 -- !query 10 output
@@ -97,7 +97,7 @@ struct<>
 
 
 -- !query 11
-INSERT INTO CASE2_TBL VALUES (1, NULL)
+INSERT INTO CASE2_TBL VALUES (NULL, -6)
 -- !query 11 schema
 struct<>
 -- !query 11 output
@@ -105,148 +105,140 @@ struct<>
 
 
 -- !query 12
-INSERT INTO CASE2_TBL VALUES (NULL, -6)
--- !query 12 schema
-struct<>
--- !query 12 output
-
-
-
--- !query 13
 SELECT '3' AS `One`,
   CASE
     WHEN CAST(udf(1 < 2) AS boolean) THEN 3
   END AS `Simple WHEN`
--- !query 13 schema
+-- !query 12 schema
 struct<One:string,Simple WHEN:int>
--- !query 13 output
+-- !query 12 output
 3	3
 
 
--- !query 14
+-- !query 13
 SELECT '<NULL>' AS `One`,
   CASE
     WHEN 1 > 2 THEN udf(3)
   END AS `Simple default`
--- !query 14 schema
+-- !query 13 schema
 struct<One:string,Simple default:string>
--- !query 14 output
+-- !query 13 output
 <NULL>	NULL
 
 
--- !query 15
+-- !query 14
 SELECT '3' AS `One`,
   CASE
     WHEN udf(1) < 2 THEN udf(3)
     ELSE udf(4)
   END AS `Simple ELSE`
--- !query 15 schema
+-- !query 14 schema
 struct<One:string,Simple ELSE:string>
--- !query 15 output
+-- !query 14 output
 3	3
 
 
--- !query 16
+-- !query 15
 SELECT udf('4') AS `One`,
   CASE
     WHEN 1 > 2 THEN 3
     ELSE 4
   END AS `ELSE default`
--- !query 16 schema
+-- !query 15 schema
 struct<One:string,ELSE default:int>
--- !query 16 output
+-- !query 15 output
 4	4
 
 
--- !query 17
+-- !query 16
 SELECT udf('6') AS `One`,
   CASE
     WHEN CAST(udf(1 > 2) AS boolean) THEN 3
     WHEN udf(4) < 5 THEN 6
     ELSE 7
   END AS `Two WHEN with default`
--- !query 17 schema
+-- !query 16 schema
 struct<One:string,Two WHEN with default:int>
--- !query 17 output
+-- !query 16 output
 6	6
 
 
--- !query 18
+-- !query 17
 SELECT '7' AS `None`,
   CASE WHEN rand() < udf(0) THEN 1
   END AS `NULL on no matches`
--- !query 18 schema
+-- !query 17 schema
 struct<None:string,NULL on no matches:int>
--- !query 18 output
+-- !query 17 output
 7	NULL
 
 
--- !query 19
+-- !query 18
 SELECT CASE WHEN CAST(udf(1=0) AS boolean) THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END
--- !query 19 schema
+-- !query 18 schema
 struct<CASE WHEN CAST(udf((1 = 0)) AS BOOLEAN) THEN (CAST(1 AS DOUBLE) / CAST(0 AS DOUBLE)) WHEN (1 = 1) THEN CAST(1 AS DOUBLE) ELSE (CAST(2 AS DOUBLE) / CAST(0 AS DOUBLE)) END:double>
--- !query 19 output
+-- !query 18 output
 1.0
 
 
--- !query 20
+-- !query 19
 SELECT CASE 1 WHEN 0 THEN 1/udf(0) WHEN 1 THEN 1 ELSE 2/0 END
--- !query 20 schema
+-- !query 19 schema
 struct<CASE WHEN (1 = 0) THEN (CAST(1 AS DOUBLE) / CAST(CAST(udf(0) AS DOUBLE) AS DOUBLE)) WHEN (1 = 1) THEN CAST(1 AS DOUBLE) ELSE (CAST(2 AS DOUBLE) / CAST(0 AS DOUBLE)) END:double>
--- !query 20 output
+-- !query 19 output
 1.0
 
 
--- !query 21
+-- !query 20
 SELECT CASE WHEN i > 100 THEN udf(1/0) ELSE udf(0) END FROM case_tbl
--- !query 21 schema
+-- !query 20 schema
 struct<CASE WHEN (i > 100) THEN udf((cast(1 as double) / cast(0 as double))) ELSE udf(0) END:string>
--- !query 21 output
+-- !query 20 output
 0
 0
 0
 0
 
 
--- !query 22
+-- !query 21
 SELECT CASE 'a' WHEN 'a' THEN udf(1) ELSE udf(2) END
--- !query 22 schema
+-- !query 21 schema
 struct<CASE WHEN (a = a) THEN udf(1) ELSE udf(2) END:string>
--- !query 22 output
+-- !query 21 output
 1
 
 
--- !query 23
+-- !query 22
 SELECT '' AS `Five`,
   CASE
     WHEN i >= 3 THEN i
   END AS `>= 3 or Null`
   FROM CASE_TBL
--- !query 23 schema
+-- !query 22 schema
 struct<Five:string,>= 3 or Null:int>
--- !query 23 output
+-- !query 22 output
 3
 	4
 	NULL
 	NULL
 
 
--- !query 24
+-- !query 23
 SELECT '' AS `Five`,
   CASE WHEN i >= 3 THEN (i + i)
        ELSE i
   END AS `Simplest Math`
   FROM CASE_TBL
--- !query 24 schema
+-- !query 23 schema
 struct<Five:string,Simplest Math:int>
--- !query 24 output
+-- !query 23 output
 1
 	2
 	6
 	8
 
 
--- !query 25
+-- !query 24
 SELECT '' AS `Five`, i AS `Value`,
   CASE WHEN (i < 0) THEN 'small'
        WHEN (i = 0) THEN 'zero'
@@ -255,16 +247,16 @@ SELECT '' AS `Five`, i AS `Value`,
        ELSE 'big'
   END AS `Category`
   FROM CASE_TBL
--- !query 25 schema
+-- !query 24 schema
 struct<Five:string,Value:int,Category:string>
--- !query 25 output
+-- !query 24 output
 1	one
 	2	two
 	3	big
 	4	big
 
 
--- !query 26
+-- !query 25
 SELECT '' AS `Five`,
   CASE WHEN ((i < 0) or (i < 0)) THEN 'small'
        WHEN ((i = 0) or (i = 0)) THEN 'zero'
@@ -273,37 +265,37 @@ SELECT '' AS `Five`,
        ELSE 'big'
   END AS `Category`
   FROM CASE_TBL
--- !query 26 schema
+-- !query 25 schema
 struct<Five:string,Category:string>
--- !query 26 output
+-- !query 25 output
 big
 	big
 	one
 	two
 
 
--- !query 27
+-- !query 26
 SELECT * FROM CASE_TBL WHERE udf(COALESCE(f,i)) = 4
--- !query 27 schema
+-- !query 26 schema
 struct<i:int,f:double>
--- !query 27 output
+-- !query 26 output
 4	NULL
 
 
--- !query 28
+-- !query 27
 SELECT * FROM CASE_TBL WHERE udf(NULLIF(f,i)) = 2
--- !query 28 schema
+-- !query 27 schema
 struct<i:int,f:double>
--- !query 28 output
+-- !query 27 output
 
 
 
--- !query 29
+-- !query 28
 SELECT udf(COALESCE(a.f, b.i, b.j))
   FROM CASE_TBL a, CASE2_TBL b
--- !query 29 schema
+-- !query 28 schema
 struct<udf(coalesce(f, cast(i as double), cast(j as double))):string>
--- !query 29 output
+-- !query 28 output
 -30.3
 -30.3
 -30.3
@@ -330,24 +322,24 @@ struct<udf(coalesce(f, cast(i as double), cast(j as double))):string>
 3.0
 
 
--- !query 30
+-- !query 29
 SELECT *
    FROM CASE_TBL a, CASE2_TBL b
    WHERE udf(COALESCE(a.f, b.i, b.j)) = 2
--- !query 30 schema
+-- !query 29 schema
 struct<i:int,f:double,i:int,j:int>
--- !query 30 output
+-- !query 29 output
 4	NULL	2	-2
 4	NULL	2	-4
 
 
--- !query 31
+-- !query 30
 SELECT udf('') AS Five, NULLIF(a.i,b.i) AS `NULLIF(a.i,b.i)`,
   NULLIF(b.i, 4) AS `NULLIF(b.i,4)`
   FROM CASE_TBL a, CASE2_TBL b
--- !query 31 schema
+-- !query 30 schema
 struct<Five:string,NULLIF(a.i,b.i):int,NULLIF(b.i,4):int>
--- !query 31 output
+-- !query 30 output
 1	2
 	1	2
 	1	3
@@ -374,18 +366,18 @@ struct<Five:string,NULLIF(a.i,b.i):int,NULLIF(b.i,4):int>
 	NULL	3
 
 
--- !query 32
+-- !query 31
 SELECT '' AS `Two`, *
   FROM CASE_TBL a, CASE2_TBL b
   WHERE CAST(udf(COALESCE(f,b.i) = 2) AS boolean)
--- !query 32 schema
+-- !query 31 schema
 struct<Two:string,i:int,f:double,i:int,j:int>
--- !query 32 output
+-- !query 31 output
 4	NULL	2	-2
 	4	NULL	2	-4
 
 
--- !query 33
+-- !query 32
 SELECT CASE
   (CASE vol('bar')
     WHEN udf('foo') THEN 'it was foo!'
@@ -395,31 +387,23 @@ SELECT CASE
   WHEN udf('it was foo!') THEN 'foo recognized'
   WHEN 'it was bar!' THEN udf('bar recognized')
   ELSE 'unrecognized' END AS col
--- !query 33 schema
+-- !query 32 schema
 struct<col:string>
--- !query 33 output
+-- !query 32 output
 bar recognized
 
 
--- !query 34
+-- !query 33
 DROP TABLE CASE_TBL
--- !query 34 schema
+-- !query 33 schema
 struct<>
--- !query 34 output
+-- !query 33 output
 
 
 
--- !query 35
+-- !query 34
 DROP TABLE CASE2_TBL
--- !query 35 schema
+-- !query 34 schema
 struct<>
--- !query 35 output
-
-
+-- !query 34 output
 
--- !query 36
-set spark.sql.crossJoin.enabled=false
--- !query 36 schema
-struct<key:string,value:string>
--- !query 36 output
-spark.sql.crossJoin.enabled	false
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 6e40fcf..1085f03 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -151,17 +151,37 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
     val resultFile: String
   }
 
+  /**
+   * traits that indicate UDF or PgSQL to trigger the code path specific to each. For instance,
+   * PgSQL tests require to register some UDF functions.
+   */
+  private trait PgSQLTest
+
+  private trait UDFTest {
+    val udf: TestUDF
+  }
+
   /** A regular test case. */
   private case class RegularTestCase(
       name: String, inputFile: String, resultFile: String) extends TestCase
 
   /** A PostgreSQL test case. */
   private case class PgSQLTestCase(
-      name: String, inputFile: String, resultFile: String) extends TestCase
+      name: String, inputFile: String, resultFile: String) extends TestCase with PgSQLTest
 
   /** A UDF test case. */
   private case class UDFTestCase(
-      name: String, inputFile: String, resultFile: String, udf: TestUDF) extends TestCase
+      name: String,
+      inputFile: String,
+      resultFile: String,
+      udf: TestUDF) extends TestCase with UDFTest
+
+  /** A UDF PostgreSQL test case. */
+  private case class UDFPgSQLTestCase(
+      name: String,
+      inputFile: String,
+      resultFile: String,
+      udf: TestUDF) extends TestCase with UDFTest with PgSQLTest
 
   private def createScalaTestCase(testCase: TestCase): Unit = {
     if (blackList.exists(t =>
@@ -169,12 +189,14 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
       // Create a test case to ignore this case.
       ignore(testCase.name) { /* Do nothing */ }
     } else testCase match {
-      case UDFTestCase(_, _, _, udf: TestPythonUDF) if !shouldTestPythonUDFs =>
+      case udfTestCase: UDFTest
+          if udfTestCase.udf.isInstanceOf[TestPythonUDF] && !shouldTestPythonUDFs =>
         ignore(s"${testCase.name} is skipped because " +
           s"[$pythonExec] and/or pyspark were not available.") {
           /* Do nothing */
         }
-      case UDFTestCase(_, _, _, udf: TestScalarPandasUDF) if !shouldTestScalarPandasUDFs =>
+      case udfTestCase: UDFTest
+          if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && !shouldTestScalarPandasUDFs =>
         ignore(s"${testCase.name} is skipped because pyspark," +
           s"pandas and/or pyarrow were not available in [$pythonExec].") {
           /* Do nothing */
@@ -254,12 +276,15 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
     // This does not isolate catalog changes.
     val localSparkSession = spark.newSession()
     loadTestData(localSparkSession)
+
     testCase match {
-      case udfTestCase: UDFTestCase =>
-        // vol used by udf-case.sql.
-        localSparkSession.udf.register("vol", (s: String) => s)
+      case udfTestCase: UDFTest =>
         registerTestUDF(udfTestCase.udf, localSparkSession)
-      case _: PgSQLTestCase =>
+      case _ =>
+    }
+
+    testCase match {
+      case _: PgSQLTest =>
         // booleq/boolne used by boolean.sql
         localSparkSession.udf.register("booleq", (b1: Boolean, b2: Boolean) => b1 == b2)
         localSparkSession.udf.register("boolne", (b1: Boolean, b2: Boolean) => b1 != b2)
@@ -268,7 +293,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
         // PostgreSQL enabled cartesian product by default.
         localSparkSession.conf.set(SQLConf.CROSS_JOINS_ENABLED.key, true)
         localSparkSession.conf.set(SQLConf.ANSI_SQL_PARSER.key, true)
-      case _ => // Don't add UDFs in Regular tests.
+      case _ =>
     }
 
     if (configSet.isDefined) {
@@ -388,13 +413,16 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
       val absPath = file.getAbsolutePath
       val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator)
 
-      if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udf")) {
+      if (file.getAbsolutePath.startsWith(
+        s"$inputFilePath${File.separator}udf${File.separator}pgSQL")) {
+        Seq(TestScalaUDF("udf"), TestPythonUDF("udf"), TestScalarPandasUDF("udf")).map { udf =>
+          UDFPgSQLTestCase(
+            s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
+        }
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udf")) {
         Seq(TestScalaUDF("udf"), TestPythonUDF("udf"), TestScalarPandasUDF("udf")).map { udf =>
           UDFTestCase(
-            s"$testCaseName - ${udf.prettyName}",
-            absPath,
-            resultFile,
-            udf)
+            s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
         }
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}pgSQL")) {
         PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org