You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/10/10 05:27:03 UTC

[spark] branch master updated: [SPARK-45450][PYTHON] Fix imports according to PEP8: pyspark.pandas and pyspark (core)

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 4dbe4ffebfc [SPARK-45450][PYTHON] Fix imports according to PEP8: pyspark.pandas and pyspark (core)
4dbe4ffebfc is described below

commit 4dbe4ffebfc8cc3a894c9e798c5a7b364cf7a399
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Tue Oct 10 14:26:45 2023 +0900

    [SPARK-45450][PYTHON] Fix imports according to PEP8: pyspark.pandas and pyspark (core)
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to fix imports according to PEP8 in `pyspark.pandas` and `pyspark.*` (core), see https://peps.python.org/pep-0008/#imports.
    
    ### Why are the changes needed?
    
    I have not been fixing them as they are too minor. However, this practice is being propagated across the whole PySpark packages, and I think we should fix them all so other users do not follow the non-standard practice.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing linters and tests should cover.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #43257 from HyukjinKwon/SPARK-45450.
    
    Authored-by: Hyukjin Kwon <gu...@apache.org>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/pyspark/conf.py                                        |  1 +
 python/pyspark/errors_doc_gen.py                              |  1 +
 python/pyspark/java_gateway.py                                |  1 +
 python/pyspark/join.py                                        |  3 ++-
 python/pyspark/pandas/accessors.py                            |  1 -
 python/pyspark/pandas/base.py                                 |  2 +-
 python/pyspark/pandas/config.py                               |  1 -
 python/pyspark/pandas/correlation.py                          |  1 -
 python/pyspark/pandas/data_type_ops/date_ops.py               |  1 -
 python/pyspark/pandas/data_type_ops/datetime_ops.py           |  1 -
 python/pyspark/pandas/data_type_ops/string_ops.py             |  1 -
 python/pyspark/pandas/frame.py                                |  7 ++-----
 python/pyspark/pandas/generic.py                              |  1 -
 python/pyspark/pandas/groupby.py                              |  2 --
 python/pyspark/pandas/indexes/base.py                         |  1 -
 python/pyspark/pandas/indexes/multi.py                        |  2 --
 python/pyspark/pandas/indexing.py                             |  6 ++----
 python/pyspark/pandas/internal.py                             | 11 +++--------
 python/pyspark/pandas/mlflow.py                               |  4 ++--
 python/pyspark/pandas/namespace.py                            |  2 +-
 python/pyspark/pandas/numpy_compat.py                         |  2 +-
 python/pyspark/pandas/plot/core.py                            |  6 +++---
 python/pyspark/pandas/plot/matplotlib.py                      |  1 -
 python/pyspark/pandas/resample.py                             |  2 --
 python/pyspark/pandas/series.py                               |  2 +-
 python/pyspark/pandas/spark/accessors.py                      |  3 ---
 python/pyspark/pandas/spark/functions.py                      |  2 --
 python/pyspark/pandas/sql_processor.py                        |  2 +-
 python/pyspark/pandas/strings.py                              |  3 +--
 python/pyspark/pandas/supported_api_gen.py                    |  6 +++---
 python/pyspark/pandas/tests/computation/test_corrwith.py      |  1 -
 python/pyspark/pandas/tests/computation/test_cov.py           |  1 -
 .../pandas/tests/connect/data_type_ops/testing_utils.py       |  1 -
 python/pyspark/pandas/tests/connect/test_parity_extension.py  |  1 +
 python/pyspark/pandas/tests/connect/test_parity_indexing.py   |  1 +
 .../pyspark/pandas/tests/connect/test_parity_numpy_compat.py  |  1 +
 python/pyspark/pandas/tests/data_type_ops/testing_utils.py    |  2 --
 python/pyspark/pandas/tests/frame/test_reshaping.py           |  1 -
 python/pyspark/pandas/tests/frame/test_spark.py               |  2 +-
 python/pyspark/pandas/tests/series/test_series.py             |  1 -
 python/pyspark/pandas/tests/series/test_stat.py               |  2 +-
 python/pyspark/pandas/tests/test_indexops_spark.py            |  2 +-
 python/pyspark/pandas/tests/test_stats.py                     |  1 +
 python/pyspark/pandas/utils.py                                |  7 +++----
 python/pyspark/pandas/window.py                               |  2 --
 python/pyspark/profiler.py                                    |  1 -
 python/pyspark/rdd.py                                         |  6 +++---
 python/pyspark/shuffle.py                                     |  2 +-
 python/pyspark/tests/test_statcounter.py                      |  3 ++-
 python/pyspark/util.py                                        |  4 ++--
 python/pyspark/worker.py                                      |  1 -
 51 files changed, 44 insertions(+), 78 deletions(-)

diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index ddf8c22feea..ba43a506375 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -21,6 +21,7 @@ import sys
 from typing import Dict, List, Optional, Tuple, cast, overload
 
 from py4j.java_gateway import JVMView, JavaObject
+
 from pyspark.errors import PySparkRuntimeError
 
 
diff --git a/python/pyspark/errors_doc_gen.py b/python/pyspark/errors_doc_gen.py
index e9b229062ba..a30e2513f91 100644
--- a/python/pyspark/errors_doc_gen.py
+++ b/python/pyspark/errors_doc_gen.py
@@ -1,4 +1,5 @@
 import re
+
 from pyspark.errors.error_classes import ERROR_CLASSES_MAP
 
 
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 691120a1312..39a90a0afba 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -28,6 +28,7 @@ from subprocess import Popen, PIPE
 
 from py4j.java_gateway import java_import, JavaGateway, JavaObject, GatewayParameters
 from py4j.clientserver import ClientServer, JavaParameters, PythonParameters
+
 from pyspark.find_spark_home import _find_spark_home
 from pyspark.serializers import read_int, write_with_length, UTF8Deserializer
 from pyspark.errors import PySparkRuntimeError
diff --git a/python/pyspark/join.py b/python/pyspark/join.py
index 003e9ec2fc8..e12d674614d 100644
--- a/python/pyspark/join.py
+++ b/python/pyspark/join.py
@@ -31,9 +31,10 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 """
 
-from pyspark.resultiterable import ResultIterable
 from functools import reduce
 
+from pyspark.resultiterable import ResultIterable
+
 
 def _do_python_join(rdd, other, numPartitions, dispatch):
     vs = rdd.mapValues(lambda v: (1, v))
diff --git a/python/pyspark/pandas/accessors.py b/python/pyspark/pandas/accessors.py
index 4e96f4d4cf3..4c36f7976af 100644
--- a/python/pyspark/pandas/accessors.py
+++ b/python/pyspark/pandas/accessors.py
@@ -27,7 +27,6 @@ import pandas as pd
 from pyspark.sql import functions as F
 from pyspark.sql.functions import pandas_udf
 from pyspark.sql.types import DataType, LongType, StructField, StructType
-
 from pyspark.pandas._typing import DataFrameOrSeries, Name
 from pyspark.pandas.internal import (
     InternalField,
diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
index fa513e8b9b6..771d79dc6e0 100644
--- a/python/pyspark/pandas/base.py
+++ b/python/pyspark/pandas/base.py
@@ -27,9 +27,9 @@ from typing import Any, Callable, Optional, Sequence, Tuple, Union, cast, TYPE_C
 import numpy as np
 import pandas as pd
 from pandas.api.types import is_list_like, CategoricalDtype  # type: ignore[attr-defined]
+
 from pyspark.sql import functions as F, Column, Window
 from pyspark.sql.types import LongType, BooleanType, NumericType
-
 from pyspark import pandas as ps  # For running doctests and reference resolution in PyCharm.
 from pyspark.pandas._typing import Axis, Dtype, IndexOpsLike, Label, SeriesOrIndex
 from pyspark.pandas.config import get_option, option_context
diff --git a/python/pyspark/pandas/config.py b/python/pyspark/pandas/config.py
index 79cb859faa2..2228e41c1df 100644
--- a/python/pyspark/pandas/config.py
+++ b/python/pyspark/pandas/config.py
@@ -23,7 +23,6 @@ import json
 from typing import Any, Callable, Dict, Iterator, List, Tuple, Union
 
 from pyspark._globals import _NoValue, _NoValueType
-
 from pyspark.pandas.utils import default_session
 
 
diff --git a/python/pyspark/pandas/correlation.py b/python/pyspark/pandas/correlation.py
index 75d3a857a0f..da51dc2cc61 100644
--- a/python/pyspark/pandas/correlation.py
+++ b/python/pyspark/pandas/correlation.py
@@ -19,7 +19,6 @@ from typing import List
 
 from pyspark.sql import DataFrame as SparkDataFrame, functions as F
 from pyspark.sql.window import Window
-
 from pyspark.pandas.utils import verify_temp_column_name
 
 
diff --git a/python/pyspark/pandas/data_type_ops/date_ops.py b/python/pyspark/pandas/data_type_ops/date_ops.py
index 51d1018a304..771b5d38a17 100644
--- a/python/pyspark/pandas/data_type_ops/date_ops.py
+++ b/python/pyspark/pandas/data_type_ops/date_ops.py
@@ -26,7 +26,6 @@ from pandas.api.types import CategoricalDtype
 from pyspark.sql import functions as F
 from pyspark.sql.types import BooleanType, DateType, StringType
 from pyspark.sql.utils import get_column_class
-
 from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
 from pyspark.pandas.base import column_op, IndexOpsMixin
 from pyspark.pandas.data_type_ops.base import (
diff --git a/python/pyspark/pandas/data_type_ops/datetime_ops.py b/python/pyspark/pandas/data_type_ops/datetime_ops.py
index ea9b994076b..8d5853b6824 100644
--- a/python/pyspark/pandas/data_type_ops/datetime_ops.py
+++ b/python/pyspark/pandas/data_type_ops/datetime_ops.py
@@ -34,7 +34,6 @@ from pyspark.sql.types import (
     NumericType,
 )
 from pyspark.sql.utils import pyspark_column_op
-
 from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
 from pyspark.pandas.base import IndexOpsMixin
 from pyspark.pandas.data_type_ops.base import (
diff --git a/python/pyspark/pandas/data_type_ops/string_ops.py b/python/pyspark/pandas/data_type_ops/string_ops.py
index 53095c55e81..6c8bc754ac9 100644
--- a/python/pyspark/pandas/data_type_ops/string_ops.py
+++ b/python/pyspark/pandas/data_type_ops/string_ops.py
@@ -23,7 +23,6 @@ from pandas.api.types import CategoricalDtype
 from pyspark.sql import functions as F
 from pyspark.sql.types import IntegralType, StringType
 from pyspark.sql.utils import pyspark_column_op
-
 from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
 from pyspark.pandas.base import column_op, IndexOpsMixin
 from pyspark.pandas.data_type_ops.base import (
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index faa595f80e3..8f3555685ff 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -59,14 +59,14 @@ from pandas.api.types import (  # type: ignore[attr-defined]
 )
 from pandas.tseries.frequencies import DateOffset, to_offset
 
-from pyspark.errors import PySparkValueError
-
 if TYPE_CHECKING:
     from pandas.io.formats.style import Styler
 
 from pandas.core.dtypes.common import infer_dtype_from_object
 from pandas.core.accessor import CachedAccessor
 from pandas.core.dtypes.inference import is_sequence
+
+from pyspark.errors import PySparkValueError
 from pyspark import StorageLevel
 from pyspark.sql import Column as PySparkColumn, DataFrame as PySparkDataFrame, functions as F
 from pyspark.sql.functions import pandas_udf
@@ -86,7 +86,6 @@ from pyspark.sql.types import (
     NullType,
 )
 from pyspark.sql.window import Window
-
 from pyspark import pandas as ps  # For running doctests and reference resolution in PyCharm.
 from pyspark.pandas._typing import (
     Axis,
@@ -150,8 +149,6 @@ from pyspark.pandas.typedef.typehints import (
     create_tuple_for_frame_type,
 )
 from pyspark.pandas.plot import PandasOnSparkPlotAccessor
-
-# For supporting Spark Connect
 from pyspark.sql.utils import get_column_class, get_dataframe_class
 
 if TYPE_CHECKING:
diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index 81f4f6db7ed..300fd73e43f 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -48,7 +48,6 @@ from pyspark.sql.types import (
     LongType,
     NumericType,
 )
-
 from pyspark import pandas as ps  # For running doctests and reference resolution in PyCharm.
 from pyspark.pandas._typing import (
     Axis,
diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py
index 3d51fabd4b2..b19a40b837a 100644
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@@ -44,7 +44,6 @@ import warnings
 
 import pandas as pd
 from pandas.api.types import is_number, is_hashable, is_list_like  # type: ignore[attr-defined]
-
 from pandas.core.common import _builtin_table  # type: ignore[attr-defined]
 
 from pyspark.sql import Column, DataFrame as SparkDataFrame, Window, functions as F
@@ -57,7 +56,6 @@ from pyspark.sql.types import (
     StructType,
     StringType,
 )
-
 from pyspark import pandas as ps  # For running doctests and reference resolution in PyCharm.
 from pyspark.pandas._typing import Axis, FrameLike, Label, Name
 from pyspark.pandas.typedef import infer_return_type, DataFrameType, ScalarType, SeriesType
diff --git a/python/pyspark/pandas/indexes/base.py b/python/pyspark/pandas/indexes/base.py
index 5652c6a8a85..2ec0a39dc71 100644
--- a/python/pyspark/pandas/indexes/base.py
+++ b/python/pyspark/pandas/indexes/base.py
@@ -53,7 +53,6 @@ from pyspark.sql.types import (
     TimestampType,
     TimestampNTZType,
 )
-
 from pyspark import pandas as ps  # For running doctests and reference resolution in PyCharm.
 from pyspark.pandas._typing import Dtype, Label, Name, Scalar
 from pyspark.pandas.config import get_option, option_context
diff --git a/python/pyspark/pandas/indexes/multi.py b/python/pyspark/pandas/indexes/multi.py
index 043d6762fb7..9917a42fb38 100644
--- a/python/pyspark/pandas/indexes/multi.py
+++ b/python/pyspark/pandas/indexes/multi.py
@@ -24,8 +24,6 @@ from pandas.api.types import is_hashable, is_list_like  # type: ignore[attr-defi
 from pyspark.sql import functions as F, Column as PySparkColumn, Window
 from pyspark.sql.types import DataType
 from pyspark.sql.utils import get_column_class
-
-# For running doctests and reference resolution in PyCharm.
 from pyspark import pandas as ps
 from pyspark.pandas._typing import Label, Name, Scalar
 from pyspark.pandas.exceptions import PandasNotImplementedError
diff --git a/python/pyspark/pandas/indexing.py b/python/pyspark/pandas/indexing.py
index c725d01d673..de5baa3fae1 100644
--- a/python/pyspark/pandas/indexing.py
+++ b/python/pyspark/pandas/indexing.py
@@ -25,11 +25,11 @@ from typing import Any, Optional, List, Tuple, TYPE_CHECKING, Union, cast, Sized
 
 import pandas as pd
 from pandas.api.types import is_list_like  # type: ignore[attr-defined]
+import numpy as np
+
 from pyspark.sql import functions as F, Column as PySparkColumn
 from pyspark.sql.types import BooleanType, LongType, DataType
 from pyspark.errors import AnalysisException
-import numpy as np
-
 from pyspark import pandas as ps  # noqa: F401
 from pyspark.pandas._typing import Label, Name, Scalar
 from pyspark.pandas.internal import (
@@ -50,8 +50,6 @@ from pyspark.pandas.utils import (
     spark_column_equals,
     verify_temp_column_name,
 )
-
-# For Supporting Spark Connect
 from pyspark.sql.utils import get_column_class
 
 if TYPE_CHECKING:
diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py
index e025d91e7b7..2966db073d0 100644
--- a/python/pyspark/pandas/internal.py
+++ b/python/pyspark/pandas/internal.py
@@ -24,6 +24,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, TYPE_CHECK
 import numpy as np
 import pandas as pd
 from pandas.api.types import CategoricalDtype  # noqa: F401
+
 from pyspark._globals import _NoValue, _NoValueType
 from pyspark.sql import (
     functions as F,
@@ -40,17 +41,9 @@ from pyspark.sql.types import (  # noqa: F401
     StringType,
 )
 from pyspark.sql.utils import is_timestamp_ntz_preferred
-
-# For supporting Spark Connect
 from pyspark.sql.utils import is_remote, get_column_class, get_dataframe_class
-
-# For running doctests and reference resolution in PyCharm.
 from pyspark import pandas as ps
 from pyspark.pandas._typing import Label
-
-if TYPE_CHECKING:
-    # This is required in old Python 3.5 to prevent circular reference.
-    from pyspark.pandas.series import Series
 from pyspark.pandas.spark.utils import as_nullable_spark_type, force_decimal_precision_scale
 from pyspark.pandas.data_type_ops.base import DataTypeOps
 from pyspark.pandas.typedef import (
@@ -71,6 +64,8 @@ from pyspark.pandas.utils import (
     spark_column_equals,
 )
 
+if TYPE_CHECKING:
+    from pyspark.pandas.series import Series
 
 # A function to turn given numbers to Spark columns that represent pandas-on-Spark index.
 SPARK_INDEX_NAME_FORMAT = "__index_level_{}__".format
diff --git a/python/pyspark/pandas/mlflow.py b/python/pyspark/pandas/mlflow.py
index b78ae934d74..a609f9b7069 100644
--- a/python/pyspark/pandas/mlflow.py
+++ b/python/pyspark/pandas/mlflow.py
@@ -19,12 +19,12 @@
 MLflow-related functions to load models and apply them to pandas-on-Spark dataframes.
 """
 from typing import List, Union
+from typing import Any
 
-from pyspark.sql.types import DataType
 import pandas as pd
 import numpy as np
-from typing import Any
 
+from pyspark.sql.types import DataType
 from pyspark.pandas._typing import Label, Dtype
 from pyspark.pandas.utils import lazy_property, default_session
 from pyspark.pandas.frame import DataFrame
diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py
index a700a243e5d..e8898ab4893 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -48,6 +48,7 @@ from pandas.api.types import (  # type: ignore[attr-defined]
 from pandas.tseries.offsets import DateOffset
 import pyarrow as pa
 import pyarrow.parquet as pq
+
 from pyspark.sql import functions as F, Column as PySparkColumn
 from pyspark.sql.functions import pandas_udf
 from pyspark.sql.types import (
@@ -67,7 +68,6 @@ from pyspark.sql.types import (
     DataType,
 )
 from pyspark.sql.dataframe import DataFrame as PySparkDataFrame
-
 from pyspark import pandas as ps
 from pyspark.pandas._typing import Axis, Dtype, Label, Name
 from pyspark.pandas.base import IndexOpsMixin
diff --git a/python/pyspark/pandas/numpy_compat.py b/python/pyspark/pandas/numpy_compat.py
index efffaa7042c..6fed89157d2 100644
--- a/python/pyspark/pandas/numpy_compat.py
+++ b/python/pyspark/pandas/numpy_compat.py
@@ -17,10 +17,10 @@
 from typing import Any, Callable, no_type_check
 
 import numpy as np
+
 from pyspark.sql import functions as F
 from pyspark.sql.pandas.functions import pandas_udf
 from pyspark.sql.types import DoubleType, LongType, BooleanType
-
 from pyspark.pandas.base import IndexOpsMixin
 
 
diff --git a/python/pyspark/pandas/plot/core.py b/python/pyspark/pandas/plot/core.py
index ccae96a2ef8..e7e716f52ac 100644
--- a/python/pyspark/pandas/plot/core.py
+++ b/python/pyspark/pandas/plot/core.py
@@ -19,12 +19,12 @@ import importlib
 
 import pandas as pd
 import numpy as np
-from pyspark.ml.feature import Bucketizer
-from pyspark.mllib.stat import KernelDensity
-from pyspark.sql import functions as F
 from pandas.core.base import PandasObject
 from pandas.core.dtypes.inference import is_integer
 
+from pyspark.ml.feature import Bucketizer
+from pyspark.mllib.stat import KernelDensity
+from pyspark.sql import functions as F
 from pyspark.pandas.missing import unsupported_function
 from pyspark.pandas.config import get_option
 from pyspark.pandas.utils import name_like_string
diff --git a/python/pyspark/pandas/plot/matplotlib.py b/python/pyspark/pandas/plot/matplotlib.py
index 42f30ebf7ae..fe23f457187 100644
--- a/python/pyspark/pandas/plot/matplotlib.py
+++ b/python/pyspark/pandas/plot/matplotlib.py
@@ -22,7 +22,6 @@ import numpy as np
 from matplotlib.axes._base import _process_plot_format  # type: ignore[attr-defined]
 from pandas.core.dtypes.inference import is_list_like
 from pandas.io.formats.printing import pprint_thing
-
 from pandas.plotting._matplotlib import (  # type: ignore[attr-defined]
     BarPlot as PandasBarPlot,
     BoxPlot as PandasBoxPlot,
diff --git a/python/pyspark/pandas/resample.py b/python/pyspark/pandas/resample.py
index 5bb754d69b2..fdcfa3243c0 100644
--- a/python/pyspark/pandas/resample.py
+++ b/python/pyspark/pandas/resample.py
@@ -29,7 +29,6 @@ from typing import (
 )
 
 import numpy as np
-
 import pandas as pd
 from pandas.tseries.frequencies import to_offset
 
@@ -40,7 +39,6 @@ from pyspark.sql.types import (
     TimestampNTZType,
     DataType,
 )
-
 from pyspark import pandas as ps  # For running doctests and reference resolution in PyCharm.
 from pyspark.pandas._typing import FrameLike
 from pyspark.pandas.frame import DataFrame
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index e96e5c3b3dc..e0cdb1ea030 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -53,6 +53,7 @@ from pandas.api.types import (  # type: ignore[attr-defined]
     CategoricalDtype,
 )
 from pandas.tseries.frequencies import DateOffset
+
 from pyspark.sql import functions as F, Column as PySparkColumn, DataFrame as SparkDataFrame
 from pyspark.sql.types import (
     ArrayType,
@@ -71,7 +72,6 @@ from pyspark.sql.types import (
 )
 from pyspark.sql.window import Window
 from pyspark.sql.utils import get_column_class, get_window_class
-
 from pyspark import pandas as ps  # For running doctests and reference resolution in PyCharm.
 from pyspark.pandas._typing import Axis, Dtype, Label, Name, Scalar, T
 from pyspark.pandas.accessors import PandasOnSparkSeriesMethods
diff --git a/python/pyspark/pandas/spark/accessors.py b/python/pyspark/pandas/spark/accessors.py
index bcbe044185a..7fb14e6ed75 100644
--- a/python/pyspark/pandas/spark/accessors.py
+++ b/python/pyspark/pandas/spark/accessors.py
@@ -25,11 +25,8 @@ from typing import TYPE_CHECKING, Callable, Generic, List, Optional, Union
 from pyspark import StorageLevel
 from pyspark.sql import Column as PySparkColumn, DataFrame as PySparkDataFrame
 from pyspark.sql.types import DataType, StructType
-
 from pyspark.pandas._typing import IndexOpsLike
 from pyspark.pandas.internal import InternalField
-
-# For Supporting Spark Connect
 from pyspark.sql.utils import get_column_class, get_dataframe_class
 
 if TYPE_CHECKING:
diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py
index 9fef983b46a..36ea007c4d7 100644
--- a/python/pyspark/pandas/spark/functions.py
+++ b/python/pyspark/pandas/spark/functions.py
@@ -19,8 +19,6 @@ Additional Spark functions used in pandas-on-Spark.
 """
 from pyspark import SparkContext
 from pyspark.sql.column import Column
-
-# For supporting Spark Connect
 from pyspark.sql.utils import is_remote
 
 
diff --git a/python/pyspark/pandas/sql_processor.py b/python/pyspark/pandas/sql_processor.py
index bce6921e029..1bd1cb9823c 100644
--- a/python/pyspark/pandas/sql_processor.py
+++ b/python/pyspark/pandas/sql_processor.py
@@ -18,10 +18,10 @@
 import _string  # type: ignore[import]
 from typing import Any, Dict, Optional, Union, List
 import inspect
+
 import pandas as pd
 
 from pyspark.sql import SparkSession, DataFrame as SDataFrame
-
 from pyspark import pandas as ps  # For running doctests and reference resolution in PyCharm.
 from pyspark.pandas.utils import default_session
 from pyspark.pandas.frame import DataFrame
diff --git a/python/pyspark/pandas/strings.py b/python/pyspark/pandas/strings.py
index bf9cafbaf09..abf3103dd43 100644
--- a/python/pyspark/pandas/strings.py
+++ b/python/pyspark/pandas/strings.py
@@ -30,12 +30,11 @@ from typing import (
 )
 
 import numpy as np
-
 import pandas as pd
+
 from pyspark.sql.types import StringType, BinaryType, ArrayType, LongType, MapType
 from pyspark.sql import functions as F
 from pyspark.sql.functions import pandas_udf
-
 import pyspark.pandas as ps
 
 
diff --git a/python/pyspark/pandas/supported_api_gen.py b/python/pyspark/pandas/supported_api_gen.py
index a43ad198011..c4471a0af36 100644
--- a/python/pyspark/pandas/supported_api_gen.py
+++ b/python/pyspark/pandas/supported_api_gen.py
@@ -19,7 +19,6 @@
 Generate 'Supported pandas APIs' documentation file
 """
 import warnings
-from pyspark.loose_version import LooseVersion
 from enum import Enum, unique
 from inspect import getmembers, isclass, isfunction, signature
 from typing import Any, Callable, Dict, List, NamedTuple, Set, TextIO, Tuple
@@ -27,12 +26,13 @@ from typing import Any, Callable, Dict, List, NamedTuple, Set, TextIO, Tuple
 import pyspark.pandas as ps
 import pyspark.pandas.groupby as psg
 import pyspark.pandas.window as psw
-from pyspark.pandas.exceptions import PandasNotImplementedError
-
 import pandas as pd
 import pandas.core.groupby as pdg
 import pandas.core.window as pdw
 
+from pyspark.loose_version import LooseVersion
+from pyspark.pandas.exceptions import PandasNotImplementedError
+
 MAX_MISSING_PARAMS_SIZE = 5
 COMMON_PARAMETER_SET = {
     "kwargs",
diff --git a/python/pyspark/pandas/tests/computation/test_corrwith.py b/python/pyspark/pandas/tests/computation/test_corrwith.py
index b64bf2d411b..a74c1f7c3df 100644
--- a/python/pyspark/pandas/tests/computation/test_corrwith.py
+++ b/python/pyspark/pandas/tests/computation/test_corrwith.py
@@ -16,7 +16,6 @@
 #
 import unittest
 
-
 import numpy as np
 import pandas as pd
 
diff --git a/python/pyspark/pandas/tests/computation/test_cov.py b/python/pyspark/pandas/tests/computation/test_cov.py
index 23e5ec587e9..fb40884e1c3 100644
--- a/python/pyspark/pandas/tests/computation/test_cov.py
+++ b/python/pyspark/pandas/tests/computation/test_cov.py
@@ -17,7 +17,6 @@
 import unittest
 import decimal
 
-
 import numpy as np
 import pandas as pd
 
diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py
index b62eb734a93..f1e36aecd19 100644
--- a/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py
+++ b/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py
@@ -22,7 +22,6 @@ import numpy as np
 import pandas as pd
 
 import pyspark.pandas as ps
-
 from pyspark.pandas.typedef.typehints import (
     extension_dtypes_available,
     extension_float_dtypes_available,
diff --git a/python/pyspark/pandas/tests/connect/test_parity_extension.py b/python/pyspark/pandas/tests/connect/test_parity_extension.py
index 849139980b2..7413801d3f8 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_extension.py
+++ b/python/pyspark/pandas/tests/connect/test_parity_extension.py
@@ -18,6 +18,7 @@ import unittest
 
 import pandas as pd
 import numpy as np
+
 from pyspark import pandas as ps
 from pyspark.pandas.tests.test_extension import ExtensionTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
diff --git a/python/pyspark/pandas/tests/connect/test_parity_indexing.py b/python/pyspark/pandas/tests/connect/test_parity_indexing.py
index 9a14978539f..950bd2d0b2d 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_indexing.py
+++ b/python/pyspark/pandas/tests/connect/test_parity_indexing.py
@@ -17,6 +17,7 @@
 import unittest
 
 import pandas as pd
+
 from pyspark import pandas as ps
 from pyspark.pandas.tests.test_indexing import BasicIndexingTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
diff --git a/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py b/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py
index 6bf0a02589b..6cc0a277718 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py
+++ b/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py
@@ -17,6 +17,7 @@
 import unittest
 
 import pandas as pd
+
 from pyspark import pandas as ps
 from pyspark.pandas.tests.test_numpy_compat import NumPyCompatTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
diff --git a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
index 44673053bc2..8bafc86c8dd 100644
--- a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
+++ b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
@@ -23,13 +23,11 @@ import pandas as pd
 
 import pyspark.pandas as ps
 from pyspark.pandas.typedef import extension_dtypes
-
 from pyspark.pandas.typedef.typehints import (
     extension_dtypes_available,
     extension_float_dtypes_available,
     extension_object_dtypes_available,
 )
-
 from pyspark.testing.pandasutils import ComparisonTestBase
 
 if extension_dtypes_available:
diff --git a/python/pyspark/pandas/tests/frame/test_reshaping.py b/python/pyspark/pandas/tests/frame/test_reshaping.py
index b5176a87ca6..0a1c5e5b098 100644
--- a/python/pyspark/pandas/tests/frame/test_reshaping.py
+++ b/python/pyspark/pandas/tests/frame/test_reshaping.py
@@ -22,7 +22,6 @@ import pandas as pd
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import option_context
-
 from pyspark.testing.pandasutils import ComparisonTestBase
 from pyspark.testing.sqlutils import SQLTestUtils
 
diff --git a/python/pyspark/pandas/tests/frame/test_spark.py b/python/pyspark/pandas/tests/frame/test_spark.py
index da16e943578..4413279e32f 100644
--- a/python/pyspark/pandas/tests/frame/test_spark.py
+++ b/python/pyspark/pandas/tests/frame/test_spark.py
@@ -21,10 +21,10 @@ from io import StringIO
 
 import numpy as np
 import pandas as pd
+
 from pyspark import StorageLevel
 from pyspark.ml.linalg import SparseVector
 from pyspark.sql.types import StructType
-
 from pyspark import pandas as ps
 from pyspark.pandas.frame import CachedDataFrame
 from pyspark.pandas.exceptions import PandasNotImplementedError
diff --git a/python/pyspark/pandas/tests/series/test_series.py b/python/pyspark/pandas/tests/series/test_series.py
index 75c81431d32..b9fa6f6063f 100644
--- a/python/pyspark/pandas/tests/series/test_series.py
+++ b/python/pyspark/pandas/tests/series/test_series.py
@@ -18,7 +18,6 @@
 import unittest
 from collections import defaultdict
 import inspect
-
 from datetime import datetime, timedelta
 
 import numpy as np
diff --git a/python/pyspark/pandas/tests/series/test_stat.py b/python/pyspark/pandas/tests/series/test_stat.py
index 62672b1e8d5..1e379d32d56 100644
--- a/python/pyspark/pandas/tests/series/test_stat.py
+++ b/python/pyspark/pandas/tests/series/test_stat.py
@@ -15,10 +15,10 @@
 # limitations under the License.
 #
 import unittest
+from decimal import Decimal
 
 import numpy as np
 import pandas as pd
-from decimal import Decimal
 
 from pyspark import pandas as ps
 from pyspark.testing.pandasutils import ComparisonTestBase
diff --git a/python/pyspark/pandas/tests/test_indexops_spark.py b/python/pyspark/pandas/tests/test_indexops_spark.py
index 3f7691a3863..2a0bcf242fa 100644
--- a/python/pyspark/pandas/tests/test_indexops_spark.py
+++ b/python/pyspark/pandas/tests/test_indexops_spark.py
@@ -16,9 +16,9 @@
 #
 
 import pandas as pd
+
 from pyspark.errors import AnalysisException
 from pyspark.sql import functions as F
-
 from pyspark import pandas as ps
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils
diff --git a/python/pyspark/pandas/tests/test_stats.py b/python/pyspark/pandas/tests/test_stats.py
index 40ee64a5f68..bdc83ad7d5f 100644
--- a/python/pyspark/pandas/tests/test_stats.py
+++ b/python/pyspark/pandas/tests/test_stats.py
@@ -16,6 +16,7 @@
 #
 
 import unittest
+
 import numpy as np
 import pandas as pd
 
diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py
index b647697edf9..9f372a53079 100644
--- a/python/pyspark/pandas/utils.py
+++ b/python/pyspark/pandas/utils.py
@@ -37,14 +37,13 @@ from typing import (
 )
 import warnings
 
+import pandas as pd
+from pandas.api.types import is_list_like  # type: ignore[attr-defined]
+
 from pyspark.sql import functions as F, Column, DataFrame as PySparkDataFrame, SparkSession
 from pyspark.sql.types import DoubleType
 from pyspark.sql.utils import is_remote, get_dataframe_class
 from pyspark.errors import PySparkTypeError
-import pandas as pd
-from pandas.api.types import is_list_like  # type: ignore[attr-defined]
-
-# For running doctests and reference resolution in PyCharm.
 from pyspark import pandas as ps  # noqa: F401
 from pyspark.pandas._typing import (
     Axis,
diff --git a/python/pyspark/pandas/window.py b/python/pyspark/pandas/window.py
index db98867674a..0aaeb7df89b 100644
--- a/python/pyspark/pandas/window.py
+++ b/python/pyspark/pandas/window.py
@@ -30,8 +30,6 @@ from pyspark.pandas.missing.window import (
     MissingPandasLikeExponentialMoving,
     MissingPandasLikeExponentialMovingGroupby,
 )
-
-# For running doctests and reference resolution in PyCharm.
 from pyspark import pandas as ps  # noqa: F401
 from pyspark.pandas._typing import FrameLike
 from pyspark.pandas.groupby import GroupBy, DataFrameGroupBy
diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py
index 6aa504c7c08..b7ea6a19063 100644
--- a/python/pyspark/profiler.py
+++ b/python/pyspark/profiler.py
@@ -27,7 +27,6 @@ from typing import (
     Union,
     cast,
 )
-
 import cProfile
 import inspect
 import pstats
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 0e0d958f46a..d2a8bc4b111 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -94,6 +94,9 @@ if TYPE_CHECKING:
     import socket
     import io
 
+    from py4j.java_gateway import JavaObject
+    from py4j.java_collections import JavaArray
+
     from pyspark._typing import NonUDFType
     from pyspark._typing import S, NumberOrArray
     from pyspark.context import SparkContext
@@ -119,9 +122,6 @@ if TYPE_CHECKING:
         SQLTableUDFType,
     )
 
-    from py4j.java_gateway import JavaObject
-    from py4j.java_collections import JavaArray
-
 T = TypeVar("T")
 T_co = TypeVar("T_co", covariant=True)
 U = TypeVar("U")
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
index da03110c321..57964654963 100644
--- a/python/pyspark/shuffle.py
+++ b/python/pyspark/shuffle.py
@@ -24,8 +24,8 @@ import itertools
 import operator
 import random
 import sys
-
 import heapq
+
 from pyspark.serializers import (
     BatchedSerializer,
     CPickleSerializer,
diff --git a/python/pyspark/tests/test_statcounter.py b/python/pyspark/tests/test_statcounter.py
index 747f42e67b2..1879ea6974e 100644
--- a/python/pyspark/tests/test_statcounter.py
+++ b/python/pyspark/tests/test_statcounter.py
@@ -14,9 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import math
+
 from pyspark.statcounter import StatCounter
 from pyspark.testing.utils import ReusedPySparkTestCase
-import math
 
 
 class StatCounterTests(ReusedPySparkTestCase):
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index 47f5933079e..9c70bac2a3d 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -28,10 +28,10 @@ import typing
 from types import TracebackType
 from typing import Any, Callable, IO, Iterator, List, Optional, TextIO, Tuple, Union
 
-from pyspark.errors import PySparkRuntimeError
-
 from py4j.clientserver import ClientServer
 
+from pyspark.errors import PySparkRuntimeError
+
 __all__: List[str] = []
 
 from py4j.java_gateway import JavaObject
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index a073942adb6..3d08f6c4bae 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -24,7 +24,6 @@ import time
 from inspect import getfullargspec
 import json
 from typing import Any, Callable, Iterable, Iterator
-
 import faulthandler
 
 from pyspark.accumulators import _accumulatorRegistry


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org