You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/10/18 08:10:59 UTC
[spark] branch master updated: [SPARK-45370][PYTHON][TESTS] Fix python test when ansi mode enabled

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 4a35a31c038f [SPARK-45370][PYTHON][TESTS] Fix python test when ansi mode enabled
4a35a31c038f is described below

commit 4a35a31c038f726f9329b4f28f3dde87286fb8d2
Author: panbingkun <pb...@gmail.com>
AuthorDate: Wed Oct 18 17:10:42 2023 +0900

    [SPARK-45370][PYTHON][TESTS] Fix python test when ansi mode enabled
    
    ### What changes were proposed in this pull request?
    The pr aims to fix some UT in `pyspark.sql.functions` when SPARK_ANSI_SQL_MODE=true.
    
    ### Why are the changes needed?
    Make pyspark test happy.
    When Ansi workflow daily GA runs, the following error occurs, eg:
    https://github.com/apache/spark/actions/workflows/build_ansi.yml
    https://github.com/apache/spark/actions/runs/6333367232/job/17202251325
    <img width="1011" alt="image" src="https://github.com/apache/spark/assets/15246973/c22fb8c4-8b87-46fd-85f2-51f4c1d8d13d">
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Manually test:
    ```
    python/run-tests --testnames 'pyspark.sql.functions'
    Running PySpark tests. Output is in /Users/panbingkun/Developer/spark/spark-community/python/unit-tests.log
    Will test against the following Python executables: ['python3.9']
    Will test the following Python tests: ['pyspark.sql.functions']
    python3.9 python_implementation is CPython
    python3.9 version is: Python 3.9.13
    Starting test(python3.9): pyspark.sql.functions (temp output: /Users/panbingkun/Developer/spark/spark-community/python/target/c8705d5c-d9f9-4bc5-babf-d3642736c70c/python3.9__pyspark.sql.functions__gcfwu3ik.log)
    Finished test(python3.9): pyspark.sql.functions (47s)
    Tests passed in 47 seconds
    
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #43168 from panbingkun/SPARK-45370.
    
    Authored-by: panbingkun <pb...@gmail.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/pyspark/sql/functions.py | 114 +++++++++++++++++++++++++---------------
 1 file changed, 73 insertions(+), 41 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 31e5884e9ebd..7807919ce2c3 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -11740,15 +11740,29 @@ def create_map(
 
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("Alice", 2, "female"),
-    ...                             ("Bob", 5, "male")], ("name", "age", "gender"))
+    ...     ("Bob", 5, "male")], ("name", "age", "gender"))
     >>> df.select(sf.create_map(sf.lit('name'), df['name'],
-    ...                         sf.lit('age'), df['age'])).show(truncate=False)
-    +-------------------------+
-    |map(name, name, age, age)|
-    +-------------------------+
-    |{name -> Alice, age -> 2}|
-    |{name -> Bob, age -> 5}  |
-    +-------------------------+
+    ...     sf.lit('gender'), df['gender'])).show(truncate=False)
+    +---------------------------------+
+    |map(name, name, gender, gender)  |
+    +---------------------------------+
+    |{name -> Alice, gender -> female}|
+    |{name -> Bob, gender -> male}    |
+    +---------------------------------+
+
+    Example 4: Usage of create_map function with values of different types.
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([("Alice", 2, 22.2),
+    ...     ("Bob", 5, 36.1)], ("name", "age", "weight"))
+    >>> df.select(sf.create_map(sf.lit('age'), df['age'],
+    ...     sf.lit('weight'), df['weight'])).show(truncate=False)
+    +-----------------------------+
+    |map(age, age, weight, weight)|
+    +-----------------------------+
+    |{age -> 2.0, weight -> 22.2} |
+    |{age -> 5.0, weight -> 36.1} |
+    +-----------------------------+
     """
     if len(cols) == 1 and isinstance(cols[0], (list, set)):
         cols = cols[0]  # type: ignore[assignment]
@@ -11833,50 +11847,68 @@ def array(
     Example 1: Basic usage of array function with column names.
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
-    >>> df.select(sf.array('name', 'age').alias("arr")).show()
-    +----------+
-    |       arr|
-    +----------+
-    |[Alice, 2]|
-    |  [Bob, 5]|
-    +----------+
+    >>> df = spark.createDataFrame([("Alice", "doctor"), ("Bob", "engineer")],
+    ...     ("name", "occupation"))
+    >>> df.select(sf.array('name', 'occupation').alias("arr")).show()
+    +---------------+
+    |            arr|
+    +---------------+
+    |[Alice, doctor]|
+    |[Bob, engineer]|
+    +---------------+
 
     Example 2: Usage of array function with Column objects.
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
-    >>> df.select(sf.array(df.name, df.age).alias("arr")).show()
-    +----------+
-    |       arr|
-    +----------+
-    |[Alice, 2]|
-    |  [Bob, 5]|
-    +----------+
+    >>> df = spark.createDataFrame([("Alice", "doctor"), ("Bob", "engineer")],
+    ...     ("name", "occupation"))
+    >>> df.select(sf.array(df.name, df.occupation).alias("arr")).show()
+    +---------------+
+    |            arr|
+    +---------------+
+    |[Alice, doctor]|
+    |[Bob, engineer]|
+    +---------------+
 
     Example 3: Single argument as list of column names.
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
-    >>> df.select(sf.array(['name', 'age']).alias("arr")).show()
-    +----------+
-    |       arr|
-    +----------+
-    |[Alice, 2]|
-    |  [Bob, 5]|
-    +----------+
+    >>> df = spark.createDataFrame([("Alice", "doctor"), ("Bob", "engineer")],
+    ...     ("name", "occupation"))
+    >>> df.select(sf.array(['name', 'occupation']).alias("arr")).show()
+    +---------------+
+    |            arr|
+    +---------------+
+    |[Alice, doctor]|
+    |[Bob, engineer]|
+    +---------------+
 
-    Example 4: array function with a column containing null values.
+    Example 4: Usage of array function with columns of different types.
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame([("Alice", None), ("Bob", 5)], ("name", "age"))
-    >>> df.select(sf.array('name', 'age').alias("arr")).show()
-    +-------------+
-    |          arr|
-    +-------------+
-    |[Alice, NULL]|
-    |     [Bob, 5]|
-    +-------------+
+    >>> df = spark.createDataFrame(
+    ...     [("Alice", 2, 22.2), ("Bob", 5, 36.1)],
+    ...     ("name", "age", "weight"))
+    >>> df.select(sf.array(['age', 'weight']).alias("arr")).show()
+    +-----------+
+    |        arr|
+    +-----------+
+    |[2.0, 22.2]|
+    |[5.0, 36.1]|
+    +-----------+
+
+    Example 5: array function with a column containing null values.
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([("Alice", None), ("Bob", "engineer")],
+    ...     ("name", "occupation"))
+    >>> df.select(sf.array('name', 'occupation').alias("arr")).show()
+    +---------------+
+    |            arr|
+    +---------------+
+    |  [Alice, NULL]|
+    |[Bob, engineer]|
+    +---------------+
     """
     if len(cols) == 1 and isinstance(cols[0], (list, set)):
         cols = cols[0]  # type: ignore[assignment]


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org