You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ru...@apache.org on 2023/09/16 00:37:21 UTC
[spark] branch master updated: [SPARK-45166][PYTHON][FOLLOWUP] Delete unused `pyarrow_version_less_than_minimum` from `pyspark.sql.pandas.utils`
This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 3536f3343b5 [SPARK-45166][PYTHON][FOLLOWUP] Delete unused `pyarrow_version_less_than_minimum` from `pyspark.sql.pandas.utils`
3536f3343b5 is described below
commit 3536f3343b57c14784f481b2a5163d8305952d79
Author: Ruifeng Zheng <ru...@apache.org>
AuthorDate: Sat Sep 16 08:37:02 2023 +0800
[SPARK-45166][PYTHON][FOLLOWUP] Delete unused `pyarrow_version_less_than_minimum` from `pyspark.sql.pandas.utils`
### What changes were proposed in this pull request?
Delete unused `pyarrow_version_less_than_minimum` from `pyspark.sql.pandas.utils`
### Why are the changes needed?
this method is only used to compare PyArrow version with 2.0.0, which is on longer needed after the minimum version is set 4.0.0
### Does this PR introduce _any_ user-facing change?
No, dev-only
### How was this patch tested?
CI
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #42948 from zhengruifeng/del_pyarrow_version_less_than_minimum.
Authored-by: Ruifeng Zheng <ru...@apache.org>
Signed-off-by: Ruifeng Zheng <ru...@apache.org>
---
python/pyspark/sql/pandas/utils.py | 13 -------------
python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py | 5 -----
python/pyspark/sql/tests/test_dataframe.py | 7 ++-----
3 files changed, 2 insertions(+), 23 deletions(-)
diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py
index 74939f83d8f..c7504f901e6 100644
--- a/python/pyspark/sql/pandas/utils.py
+++ b/python/pyspark/sql/pandas/utils.py
@@ -71,16 +71,3 @@ def require_minimum_pyarrow_version() -> None:
"Arrow legacy IPC format is not supported in PySpark, "
"please unset ARROW_PRE_0_15_IPC_FORMAT"
)
-
-
-def pyarrow_version_less_than_minimum(minimum_pyarrow_version: str) -> bool:
- """Return False if the installed pyarrow version is less than minimum_pyarrow_version
- or if pyarrow is not installed."""
- from distutils.version import LooseVersion
-
- try:
- import pyarrow
- except ImportError:
- return False
-
- return LooseVersion(pyarrow.__version__) < LooseVersion(minimum_pyarrow_version)
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
index 0e0018d80da..d37e6b0130f 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
@@ -28,7 +28,6 @@ from pyspark import TaskContext
from pyspark.rdd import PythonEvalType
from pyspark.sql import Column
from pyspark.sql.functions import array, col, expr, lit, sum, struct, udf, pandas_udf, PandasUDFType
-from pyspark.sql.pandas.utils import pyarrow_version_less_than_minimum
from pyspark.sql.types import (
IntegerType,
ByteType,
@@ -215,10 +214,6 @@ class ScalarPandasUDFTestsMixin:
Row(res="[array([1, 2, 3], dtype=int32) array([4, 5], dtype=int32)]"),
)
- @unittest.skipIf(
- pyarrow_version_less_than_minimum("2.0.0"),
- "Pyarrow version must be 2.0.0 or higher",
- )
def test_pandas_array_struct(self):
# SPARK-38098: Support Array of Struct for Pandas UDFs and toPandas
import numpy as np
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 8aa6535e02b..2ffd4f312ff 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -28,7 +28,6 @@ from contextlib import redirect_stdout
from pyspark.sql import SparkSession, Row, functions
from pyspark.sql.functions import col, lit, count, sum, mean, struct
-from pyspark.sql.pandas.utils import pyarrow_version_less_than_minimum
from pyspark.sql.types import (
StringType,
IntegerType,
@@ -1479,10 +1478,8 @@ class DataFrameTestsMixin:
self.assertTrue(np.all(pdf_with_only_nulls.dtypes == pdf_with_some_nulls.dtypes))
@unittest.skipIf(
- not have_pandas or not have_pyarrow or pyarrow_version_less_than_minimum("2.0.0"),
- pandas_requirement_message
- or pyarrow_requirement_message
- or "Pyarrow version must be 2.0.0 or higher",
+ not have_pandas or not have_pyarrow,
+ pandas_requirement_message or pyarrow_requirement_message,
)
def test_to_pandas_for_array_of_struct(self):
for is_arrow_enabled in [True, False]:
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org