You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/10/18 08:10:59 UTC
[spark] branch master updated: [SPARK-45370][PYTHON][TESTS] Fix python test when ansi mode enabled
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 4a35a31c038f [SPARK-45370][PYTHON][TESTS] Fix python test when ansi mode enabled
4a35a31c038f is described below
commit 4a35a31c038f726f9329b4f28f3dde87286fb8d2
Author: panbingkun <pb...@gmail.com>
AuthorDate: Wed Oct 18 17:10:42 2023 +0900
[SPARK-45370][PYTHON][TESTS] Fix python test when ansi mode enabled
### What changes were proposed in this pull request?
The pr aims to fix some UT in `pyspark.sql.functions` when SPARK_ANSI_SQL_MODE=true.
### Why are the changes needed?
Make pyspark test happy.
When Ansi workflow daily GA runs, the following error occurs, eg:
https://github.com/apache/spark/actions/workflows/build_ansi.yml
https://github.com/apache/spark/actions/runs/6333367232/job/17202251325
<img width="1011" alt="image" src="https://github.com/apache/spark/assets/15246973/c22fb8c4-8b87-46fd-85f2-51f4c1d8d13d">
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Manually test:
```
python/run-tests --testnames 'pyspark.sql.functions'
Running PySpark tests. Output is in /Users/panbingkun/Developer/spark/spark-community/python/unit-tests.log
Will test against the following Python executables: ['python3.9']
Will test the following Python tests: ['pyspark.sql.functions']
python3.9 python_implementation is CPython
python3.9 version is: Python 3.9.13
Starting test(python3.9): pyspark.sql.functions (temp output: /Users/panbingkun/Developer/spark/spark-community/python/target/c8705d5c-d9f9-4bc5-babf-d3642736c70c/python3.9__pyspark.sql.functions__gcfwu3ik.log)
Finished test(python3.9): pyspark.sql.functions (47s)
Tests passed in 47 seconds
```
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43168 from panbingkun/SPARK-45370.
Authored-by: panbingkun <pb...@gmail.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
python/pyspark/sql/functions.py | 114 +++++++++++++++++++++++++---------------
1 file changed, 73 insertions(+), 41 deletions(-)
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 31e5884e9ebd..7807919ce2c3 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -11740,15 +11740,29 @@ def create_map(
>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([("Alice", 2, "female"),
- ... ("Bob", 5, "male")], ("name", "age", "gender"))
+ ... ("Bob", 5, "male")], ("name", "age", "gender"))
>>> df.select(sf.create_map(sf.lit('name'), df['name'],
- ... sf.lit('age'), df['age'])).show(truncate=False)
- +-------------------------+
- |map(name, name, age, age)|
- +-------------------------+
- |{name -> Alice, age -> 2}|
- |{name -> Bob, age -> 5} |
- +-------------------------+
+ ... sf.lit('gender'), df['gender'])).show(truncate=False)
+ +---------------------------------+
+ |map(name, name, gender, gender) |
+ +---------------------------------+
+ |{name -> Alice, gender -> female}|
+ |{name -> Bob, gender -> male} |
+ +---------------------------------+
+
+ Example 4: Usage of create_map function with values of different types.
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame([("Alice", 2, 22.2),
+ ... ("Bob", 5, 36.1)], ("name", "age", "weight"))
+ >>> df.select(sf.create_map(sf.lit('age'), df['age'],
+ ... sf.lit('weight'), df['weight'])).show(truncate=False)
+ +-----------------------------+
+ |map(age, age, weight, weight)|
+ +-----------------------------+
+ |{age -> 2.0, weight -> 22.2} |
+ |{age -> 5.0, weight -> 36.1} |
+ +-----------------------------+
"""
if len(cols) == 1 and isinstance(cols[0], (list, set)):
cols = cols[0] # type: ignore[assignment]
@@ -11833,50 +11847,68 @@ def array(
Example 1: Basic usage of array function with column names.
>>> from pyspark.sql import functions as sf
- >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
- >>> df.select(sf.array('name', 'age').alias("arr")).show()
- +----------+
- | arr|
- +----------+
- |[Alice, 2]|
- | [Bob, 5]|
- +----------+
+ >>> df = spark.createDataFrame([("Alice", "doctor"), ("Bob", "engineer")],
+ ... ("name", "occupation"))
+ >>> df.select(sf.array('name', 'occupation').alias("arr")).show()
+ +---------------+
+ | arr|
+ +---------------+
+ |[Alice, doctor]|
+ |[Bob, engineer]|
+ +---------------+
Example 2: Usage of array function with Column objects.
>>> from pyspark.sql import functions as sf
- >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
- >>> df.select(sf.array(df.name, df.age).alias("arr")).show()
- +----------+
- | arr|
- +----------+
- |[Alice, 2]|
- | [Bob, 5]|
- +----------+
+ >>> df = spark.createDataFrame([("Alice", "doctor"), ("Bob", "engineer")],
+ ... ("name", "occupation"))
+ >>> df.select(sf.array(df.name, df.occupation).alias("arr")).show()
+ +---------------+
+ | arr|
+ +---------------+
+ |[Alice, doctor]|
+ |[Bob, engineer]|
+ +---------------+
Example 3: Single argument as list of column names.
>>> from pyspark.sql import functions as sf
- >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
- >>> df.select(sf.array(['name', 'age']).alias("arr")).show()
- +----------+
- | arr|
- +----------+
- |[Alice, 2]|
- | [Bob, 5]|
- +----------+
+ >>> df = spark.createDataFrame([("Alice", "doctor"), ("Bob", "engineer")],
+ ... ("name", "occupation"))
+ >>> df.select(sf.array(['name', 'occupation']).alias("arr")).show()
+ +---------------+
+ | arr|
+ +---------------+
+ |[Alice, doctor]|
+ |[Bob, engineer]|
+ +---------------+
- Example 4: array function with a column containing null values.
+ Example 4: Usage of array function with columns of different types.
>>> from pyspark.sql import functions as sf
- >>> df = spark.createDataFrame([("Alice", None), ("Bob", 5)], ("name", "age"))
- >>> df.select(sf.array('name', 'age').alias("arr")).show()
- +-------------+
- | arr|
- +-------------+
- |[Alice, NULL]|
- | [Bob, 5]|
- +-------------+
+ >>> df = spark.createDataFrame(
+ ... [("Alice", 2, 22.2), ("Bob", 5, 36.1)],
+ ... ("name", "age", "weight"))
+ >>> df.select(sf.array(['age', 'weight']).alias("arr")).show()
+ +-----------+
+ | arr|
+ +-----------+
+ |[2.0, 22.2]|
+ |[5.0, 36.1]|
+ +-----------+
+
+ Example 5: array function with a column containing null values.
+
+ >>> from pyspark.sql import functions as sf
+ >>> df = spark.createDataFrame([("Alice", None), ("Bob", "engineer")],
+ ... ("name", "occupation"))
+ >>> df.select(sf.array('name', 'occupation').alias("arr")).show()
+ +---------------+
+ | arr|
+ +---------------+
+ | [Alice, NULL]|
+ |[Bob, engineer]|
+ +---------------+
"""
if len(cols) == 1 and isinstance(cols[0], (list, set)):
cols = cols[0] # type: ignore[assignment]
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org