You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2022/01/11 23:37:52 UTC

[spark] branch master updated: [SPARK-37871][TESTS] Use `python3` instead of `python` in BaseScriptTransformation tests

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 9227231  [SPARK-37871][TESTS] Use `python3` instead of `python` in BaseScriptTransformation tests
9227231 is described below

commit 922723189e61d7228d6b5669d836ed86ef95e3ef
Author: Dongjoon Hyun <do...@apache.org>
AuthorDate: Tue Jan 11 15:36:05 2022 -0800

    [SPARK-37871][TESTS] Use `python3` instead of `python` in BaseScriptTransformation tests
    
    ### What changes were proposed in this pull request?
    
    This PR aims to use `python3` instead of `python` in `BaseScriptTransformation` tests.
    
    ### Why are the changes needed?
    
    Since Apache Spark deprecated `Python 2`, this PR aims to make it sure.
    
    In addition, sometimes `python3` or `python3.x` command exists.
    ```
    [info] - SPARK-25158: Executor accidentally exit because ScriptTransformationWriterThread throw Exception *** FAILED *** (248 milliseconds)
    [info]   "Job aborted due to stage failure: Task 0 in stage 2162.0 failed 1 times, most recent failure: Lost task 0.0 in stage 2162.0 (TID 2627) (0ac7628d09c6 executor driver): org.apache.spark.SparkException:
    Subprocess exited with status 127.
    Error: /bin/bash: python: command not found
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    Closes #35171 from dongjoon-hyun/SPARK-37871.
    
    Authored-by: Dongjoon Hyun <do...@apache.org>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../execution/BaseScriptTransformationSuite.scala  | 26 +++++++++++-----------
 .../sql/hive/execution/BigDataBenchmarkSuite.scala |  2 +-
 .../spark/sql/hive/execution/SQLQuerySuite.scala   |  7 +++---
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index 488a0fd..f774c45 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -109,7 +109,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
   }
 
   test("SPARK-25990: TRANSFORM should handle different data types correctly") {
-    assume(TestUtils.testCommandAvailable("python"))
+    assume(TestUtils.testCommandAvailable("python3"))
     val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsoluteFile
 
     withTempView("v") {
@@ -126,7 +126,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
            |TRANSFORM(a, b, c, d, e)
            |  ROW FORMAT DELIMITED
            |  FIELDS TERMINATED BY '\t'
-           |  USING 'python $scriptFilePath' AS (a, b, c, d, e)
+           |  USING 'python3 $scriptFilePath' AS (a, b, c, d, e)
            |  ROW FORMAT DELIMITED
            |  FIELDS TERMINATED BY '\t'
            |FROM v
@@ -212,7 +212,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
 
   def testBasicInputDataTypesWith(serde: ScriptTransformationIOSchema, testName: String): Unit = {
     test(s"SPARK-32400: TRANSFORM should support basic data types as input ($testName)") {
-      assume(TestUtils.testCommandAvailable("python"))
+      assume(TestUtils.testCommandAvailable("python3"))
       withTempView("v") {
         val df = Seq(
           (1, "1", 1.0f, 1.0, 11.toByte, BigDecimal(1.0), new Timestamp(1),
@@ -251,7 +251,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
 
   test("SPARK-32400: TRANSFORM should support more data types (interval, array, map, struct " +
     "and udt) as input (no serde)") {
-    assume(TestUtils.testCommandAvailable("python"))
+    assume(TestUtils.testCommandAvailable("python3"))
     withTempView("v") {
       val df = Seq(
         (new CalendarInterval(7, 1, 1000), Array(0, 1, 2), Map("a" -> 1), (1, 2),
@@ -287,7 +287,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
   }
 
   test("SPARK-32400: TRANSFORM should respect DATETIME_JAVA8API_ENABLED (no serde)") {
-    assume(TestUtils.testCommandAvailable("python"))
+    assume(TestUtils.testCommandAvailable("python3"))
     Array(false, true).foreach { java8AapiEnable =>
       withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8AapiEnable.toString) {
         withTempView("v") {
@@ -405,7 +405,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
        """.stripMargin,
       s"""
          |SELECT TRANSFORM(a)
-         |USING 'python some_non_existent_file' AS (a)
+         |USING 'python3 some_non_existent_file' AS (a)
          |FROM VALUES (1) t(a)
        """.stripMargin).foreach { query =>
       intercept[SparkException] {
@@ -442,7 +442,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
   }
 
   test("SPARK-31936: Script transform support ArrayType/MapType/StructType (no serde)") {
-    assume(TestUtils.testCommandAvailable("python"))
+    assume(TestUtils.testCommandAvailable("python3"))
     withTempView("v") {
       val df = Seq(
         (Array(0, 1, 2), Array(Array(0, 1), Array(2)),
@@ -488,7 +488,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
   }
 
   test("SPARK-33934: Add SparkFile's root dir to env property PATH") {
-    assume(TestUtils.testCommandAvailable("python"))
+    assume(TestUtils.testCommandAvailable("python3"))
     val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsoluteFile
     withTempView("v") {
       val df = Seq(
@@ -498,7 +498,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
       ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18)
       df.createTempView("v")
 
-      // test 'python /path/to/script.py' with local file
+      // test 'python3 /path/to/script.py' with local file
       checkAnswer(
         sql(
           s"""
@@ -506,7 +506,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
              |TRANSFORM(a, b, c, d, e)
              |  ROW FORMAT DELIMITED
              |  FIELDS TERMINATED BY '\t'
-             |  USING 'python $scriptFilePath' AS (a, b, c, d, e)
+             |  USING 'python3 $scriptFilePath' AS (a, b, c, d, e)
              |  ROW FORMAT DELIMITED
              |  FIELDS TERMINATED BY '\t'
              |FROM v
@@ -583,14 +583,14 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
           'd.cast("string"),
           'e.cast("string")).collect())
 
-      // test `python script.py` when file added
+      // test `python3 script.py` when file added
       checkAnswer(
         sql(
           s"""
              |SELECT TRANSFORM(a, b, c, d, e)
              |  ROW FORMAT DELIMITED
              |  FIELDS TERMINATED BY '\t'
-             |  USING 'python ${scriptFilePath.getName}' AS (a, b, c, d, e)
+             |  USING 'python3 ${scriptFilePath.getName}' AS (a, b, c, d, e)
              |  ROW FORMAT DELIMITED
              |  FIELDS TERMINATED BY '\t'
              |FROM v
@@ -635,7 +635,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
   }
 
   test("SPARK-36208: TRANSFORM should support ANSI interval (no serde)") {
-    assume(TestUtils.testCommandAvailable("python"))
+    assume(TestUtils.testCommandAvailable("python3"))
     withTempView("v") {
       val df = Seq(
         (Period.of(1, 2, 0), Duration.ofDays(1).plusHours(2).plusMinutes(3).plusSeconds(4))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala
index b495dae..591ac02a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/BigDataBenchmarkSuite.scala
@@ -114,7 +114,7 @@ class BigDataBenchmarkSuite extends HiveComparisonTest {
         |DROP TABLE IF EXISTS url_counts_partial;
         |CREATE TABLE url_counts_partial AS
         |  SELECT TRANSFORM (line)
-        |  USING 'python target/url_count.py' as (sourcePage,
+        |  USING 'python3 target/url_count.py' as (sourcePage,
         |    destPage, count) from documents;
         |DROP TABLE IF EXISTS url_counts_total;
         |CREATE TABLE url_counts_total AS
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index ba362d9..ba8e6cd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1319,7 +1319,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
         sql(
           s"""FROM(
             |  FROM test SELECT TRANSFORM(a, b)
-            |  USING 'python $scriptFilePath/scripts/test_transform.py "\t"'
+            |  USING 'python3 $scriptFilePath/scripts/test_transform.py "\t"'
             |  AS (c STRING, d STRING)
             |) t
             |SELECT c
@@ -1341,7 +1341,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
           |SELECT TRANSFORM(a, b)
           |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
           |WITH SERDEPROPERTIES('field.delim' = '|')
-          |USING 'python $scriptFilePath/scripts/test_transform.py "|"'
+          |USING 'python3 $scriptFilePath/scripts/test_transform.py "|"'
           |AS (c STRING, d STRING)
           |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
           |WITH SERDEPROPERTIES('field.delim' = '|')
@@ -2440,6 +2440,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
 
   test("SPARK-25158: " +
     "Executor accidentally exit because ScriptTransformationWriterThread throw Exception") {
+    assume(TestUtils.testCommandAvailable("python3"))
     withTempView("test") {
       val defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler
       try {
@@ -2459,7 +2460,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
         val e = intercept[SparkException] {
           sql(
             s"""FROM test SELECT TRANSFORM(a)
-               |USING 'python $scriptFilePath/scripts/test_transform.py "\t"'
+               |USING 'python3 $scriptFilePath/scripts/test_transform.py "\t"'
              """.stripMargin).collect()
         }
         assert(e.getMessage.contains("Failed to produce data."))

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org