You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/10/10 05:27:03 UTC
[spark] branch master updated: [SPARK-45450][PYTHON] Fix imports according to PEP8: pyspark.pandas and pyspark (core)
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 4dbe4ffebfc [SPARK-45450][PYTHON] Fix imports according to PEP8: pyspark.pandas and pyspark (core)
4dbe4ffebfc is described below
commit 4dbe4ffebfc8cc3a894c9e798c5a7b364cf7a399
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Tue Oct 10 14:26:45 2023 +0900
[SPARK-45450][PYTHON] Fix imports according to PEP8: pyspark.pandas and pyspark (core)
### What changes were proposed in this pull request?
This PR proposes to fix imports according to PEP8 in `pyspark.pandas` and `pyspark.*` (core), see https://peps.python.org/pep-0008/#imports.
### Why are the changes needed?
I have not been fixing them as they are too minor. However, this practice is being propagated across the whole PySpark packages, and I think we should fix them all so other users do not follow the non-standard practice.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing linters and tests should cover.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43257 from HyukjinKwon/SPARK-45450.
Authored-by: Hyukjin Kwon <gu...@apache.org>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
python/pyspark/conf.py | 1 +
python/pyspark/errors_doc_gen.py | 1 +
python/pyspark/java_gateway.py | 1 +
python/pyspark/join.py | 3 ++-
python/pyspark/pandas/accessors.py | 1 -
python/pyspark/pandas/base.py | 2 +-
python/pyspark/pandas/config.py | 1 -
python/pyspark/pandas/correlation.py | 1 -
python/pyspark/pandas/data_type_ops/date_ops.py | 1 -
python/pyspark/pandas/data_type_ops/datetime_ops.py | 1 -
python/pyspark/pandas/data_type_ops/string_ops.py | 1 -
python/pyspark/pandas/frame.py | 7 ++-----
python/pyspark/pandas/generic.py | 1 -
python/pyspark/pandas/groupby.py | 2 --
python/pyspark/pandas/indexes/base.py | 1 -
python/pyspark/pandas/indexes/multi.py | 2 --
python/pyspark/pandas/indexing.py | 6 ++----
python/pyspark/pandas/internal.py | 11 +++--------
python/pyspark/pandas/mlflow.py | 4 ++--
python/pyspark/pandas/namespace.py | 2 +-
python/pyspark/pandas/numpy_compat.py | 2 +-
python/pyspark/pandas/plot/core.py | 6 +++---
python/pyspark/pandas/plot/matplotlib.py | 1 -
python/pyspark/pandas/resample.py | 2 --
python/pyspark/pandas/series.py | 2 +-
python/pyspark/pandas/spark/accessors.py | 3 ---
python/pyspark/pandas/spark/functions.py | 2 --
python/pyspark/pandas/sql_processor.py | 2 +-
python/pyspark/pandas/strings.py | 3 +--
python/pyspark/pandas/supported_api_gen.py | 6 +++---
python/pyspark/pandas/tests/computation/test_corrwith.py | 1 -
python/pyspark/pandas/tests/computation/test_cov.py | 1 -
.../pandas/tests/connect/data_type_ops/testing_utils.py | 1 -
python/pyspark/pandas/tests/connect/test_parity_extension.py | 1 +
python/pyspark/pandas/tests/connect/test_parity_indexing.py | 1 +
.../pyspark/pandas/tests/connect/test_parity_numpy_compat.py | 1 +
python/pyspark/pandas/tests/data_type_ops/testing_utils.py | 2 --
python/pyspark/pandas/tests/frame/test_reshaping.py | 1 -
python/pyspark/pandas/tests/frame/test_spark.py | 2 +-
python/pyspark/pandas/tests/series/test_series.py | 1 -
python/pyspark/pandas/tests/series/test_stat.py | 2 +-
python/pyspark/pandas/tests/test_indexops_spark.py | 2 +-
python/pyspark/pandas/tests/test_stats.py | 1 +
python/pyspark/pandas/utils.py | 7 +++----
python/pyspark/pandas/window.py | 2 --
python/pyspark/profiler.py | 1 -
python/pyspark/rdd.py | 6 +++---
python/pyspark/shuffle.py | 2 +-
python/pyspark/tests/test_statcounter.py | 3 ++-
python/pyspark/util.py | 4 ++--
python/pyspark/worker.py | 1 -
51 files changed, 44 insertions(+), 78 deletions(-)
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index ddf8c22feea..ba43a506375 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -21,6 +21,7 @@ import sys
from typing import Dict, List, Optional, Tuple, cast, overload
from py4j.java_gateway import JVMView, JavaObject
+
from pyspark.errors import PySparkRuntimeError
diff --git a/python/pyspark/errors_doc_gen.py b/python/pyspark/errors_doc_gen.py
index e9b229062ba..a30e2513f91 100644
--- a/python/pyspark/errors_doc_gen.py
+++ b/python/pyspark/errors_doc_gen.py
@@ -1,4 +1,5 @@
import re
+
from pyspark.errors.error_classes import ERROR_CLASSES_MAP
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 691120a1312..39a90a0afba 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -28,6 +28,7 @@ from subprocess import Popen, PIPE
from py4j.java_gateway import java_import, JavaGateway, JavaObject, GatewayParameters
from py4j.clientserver import ClientServer, JavaParameters, PythonParameters
+
from pyspark.find_spark_home import _find_spark_home
from pyspark.serializers import read_int, write_with_length, UTF8Deserializer
from pyspark.errors import PySparkRuntimeError
diff --git a/python/pyspark/join.py b/python/pyspark/join.py
index 003e9ec2fc8..e12d674614d 100644
--- a/python/pyspark/join.py
+++ b/python/pyspark/join.py
@@ -31,9 +31,10 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
-from pyspark.resultiterable import ResultIterable
from functools import reduce
+from pyspark.resultiterable import ResultIterable
+
def _do_python_join(rdd, other, numPartitions, dispatch):
vs = rdd.mapValues(lambda v: (1, v))
diff --git a/python/pyspark/pandas/accessors.py b/python/pyspark/pandas/accessors.py
index 4e96f4d4cf3..4c36f7976af 100644
--- a/python/pyspark/pandas/accessors.py
+++ b/python/pyspark/pandas/accessors.py
@@ -27,7 +27,6 @@ import pandas as pd
from pyspark.sql import functions as F
from pyspark.sql.functions import pandas_udf
from pyspark.sql.types import DataType, LongType, StructField, StructType
-
from pyspark.pandas._typing import DataFrameOrSeries, Name
from pyspark.pandas.internal import (
InternalField,
diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
index fa513e8b9b6..771d79dc6e0 100644
--- a/python/pyspark/pandas/base.py
+++ b/python/pyspark/pandas/base.py
@@ -27,9 +27,9 @@ from typing import Any, Callable, Optional, Sequence, Tuple, Union, cast, TYPE_C
import numpy as np
import pandas as pd
from pandas.api.types import is_list_like, CategoricalDtype # type: ignore[attr-defined]
+
from pyspark.sql import functions as F, Column, Window
from pyspark.sql.types import LongType, BooleanType, NumericType
-
from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm.
from pyspark.pandas._typing import Axis, Dtype, IndexOpsLike, Label, SeriesOrIndex
from pyspark.pandas.config import get_option, option_context
diff --git a/python/pyspark/pandas/config.py b/python/pyspark/pandas/config.py
index 79cb859faa2..2228e41c1df 100644
--- a/python/pyspark/pandas/config.py
+++ b/python/pyspark/pandas/config.py
@@ -23,7 +23,6 @@ import json
from typing import Any, Callable, Dict, Iterator, List, Tuple, Union
from pyspark._globals import _NoValue, _NoValueType
-
from pyspark.pandas.utils import default_session
diff --git a/python/pyspark/pandas/correlation.py b/python/pyspark/pandas/correlation.py
index 75d3a857a0f..da51dc2cc61 100644
--- a/python/pyspark/pandas/correlation.py
+++ b/python/pyspark/pandas/correlation.py
@@ -19,7 +19,6 @@ from typing import List
from pyspark.sql import DataFrame as SparkDataFrame, functions as F
from pyspark.sql.window import Window
-
from pyspark.pandas.utils import verify_temp_column_name
diff --git a/python/pyspark/pandas/data_type_ops/date_ops.py b/python/pyspark/pandas/data_type_ops/date_ops.py
index 51d1018a304..771b5d38a17 100644
--- a/python/pyspark/pandas/data_type_ops/date_ops.py
+++ b/python/pyspark/pandas/data_type_ops/date_ops.py
@@ -26,7 +26,6 @@ from pandas.api.types import CategoricalDtype
from pyspark.sql import functions as F
from pyspark.sql.types import BooleanType, DateType, StringType
from pyspark.sql.utils import get_column_class
-
from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
from pyspark.pandas.base import column_op, IndexOpsMixin
from pyspark.pandas.data_type_ops.base import (
diff --git a/python/pyspark/pandas/data_type_ops/datetime_ops.py b/python/pyspark/pandas/data_type_ops/datetime_ops.py
index ea9b994076b..8d5853b6824 100644
--- a/python/pyspark/pandas/data_type_ops/datetime_ops.py
+++ b/python/pyspark/pandas/data_type_ops/datetime_ops.py
@@ -34,7 +34,6 @@ from pyspark.sql.types import (
NumericType,
)
from pyspark.sql.utils import pyspark_column_op
-
from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
from pyspark.pandas.base import IndexOpsMixin
from pyspark.pandas.data_type_ops.base import (
diff --git a/python/pyspark/pandas/data_type_ops/string_ops.py b/python/pyspark/pandas/data_type_ops/string_ops.py
index 53095c55e81..6c8bc754ac9 100644
--- a/python/pyspark/pandas/data_type_ops/string_ops.py
+++ b/python/pyspark/pandas/data_type_ops/string_ops.py
@@ -23,7 +23,6 @@ from pandas.api.types import CategoricalDtype
from pyspark.sql import functions as F
from pyspark.sql.types import IntegralType, StringType
from pyspark.sql.utils import pyspark_column_op
-
from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
from pyspark.pandas.base import column_op, IndexOpsMixin
from pyspark.pandas.data_type_ops.base import (
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index faa595f80e3..8f3555685ff 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -59,14 +59,14 @@ from pandas.api.types import ( # type: ignore[attr-defined]
)
from pandas.tseries.frequencies import DateOffset, to_offset
-from pyspark.errors import PySparkValueError
-
if TYPE_CHECKING:
from pandas.io.formats.style import Styler
from pandas.core.dtypes.common import infer_dtype_from_object
from pandas.core.accessor import CachedAccessor
from pandas.core.dtypes.inference import is_sequence
+
+from pyspark.errors import PySparkValueError
from pyspark import StorageLevel
from pyspark.sql import Column as PySparkColumn, DataFrame as PySparkDataFrame, functions as F
from pyspark.sql.functions import pandas_udf
@@ -86,7 +86,6 @@ from pyspark.sql.types import (
NullType,
)
from pyspark.sql.window import Window
-
from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm.
from pyspark.pandas._typing import (
Axis,
@@ -150,8 +149,6 @@ from pyspark.pandas.typedef.typehints import (
create_tuple_for_frame_type,
)
from pyspark.pandas.plot import PandasOnSparkPlotAccessor
-
-# For supporting Spark Connect
from pyspark.sql.utils import get_column_class, get_dataframe_class
if TYPE_CHECKING:
diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index 81f4f6db7ed..300fd73e43f 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -48,7 +48,6 @@ from pyspark.sql.types import (
LongType,
NumericType,
)
-
from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm.
from pyspark.pandas._typing import (
Axis,
diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py
index 3d51fabd4b2..b19a40b837a 100644
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@@ -44,7 +44,6 @@ import warnings
import pandas as pd
from pandas.api.types import is_number, is_hashable, is_list_like # type: ignore[attr-defined]
-
from pandas.core.common import _builtin_table # type: ignore[attr-defined]
from pyspark.sql import Column, DataFrame as SparkDataFrame, Window, functions as F
@@ -57,7 +56,6 @@ from pyspark.sql.types import (
StructType,
StringType,
)
-
from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm.
from pyspark.pandas._typing import Axis, FrameLike, Label, Name
from pyspark.pandas.typedef import infer_return_type, DataFrameType, ScalarType, SeriesType
diff --git a/python/pyspark/pandas/indexes/base.py b/python/pyspark/pandas/indexes/base.py
index 5652c6a8a85..2ec0a39dc71 100644
--- a/python/pyspark/pandas/indexes/base.py
+++ b/python/pyspark/pandas/indexes/base.py
@@ -53,7 +53,6 @@ from pyspark.sql.types import (
TimestampType,
TimestampNTZType,
)
-
from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm.
from pyspark.pandas._typing import Dtype, Label, Name, Scalar
from pyspark.pandas.config import get_option, option_context
diff --git a/python/pyspark/pandas/indexes/multi.py b/python/pyspark/pandas/indexes/multi.py
index 043d6762fb7..9917a42fb38 100644
--- a/python/pyspark/pandas/indexes/multi.py
+++ b/python/pyspark/pandas/indexes/multi.py
@@ -24,8 +24,6 @@ from pandas.api.types import is_hashable, is_list_like # type: ignore[attr-defi
from pyspark.sql import functions as F, Column as PySparkColumn, Window
from pyspark.sql.types import DataType
from pyspark.sql.utils import get_column_class
-
-# For running doctests and reference resolution in PyCharm.
from pyspark import pandas as ps
from pyspark.pandas._typing import Label, Name, Scalar
from pyspark.pandas.exceptions import PandasNotImplementedError
diff --git a/python/pyspark/pandas/indexing.py b/python/pyspark/pandas/indexing.py
index c725d01d673..de5baa3fae1 100644
--- a/python/pyspark/pandas/indexing.py
+++ b/python/pyspark/pandas/indexing.py
@@ -25,11 +25,11 @@ from typing import Any, Optional, List, Tuple, TYPE_CHECKING, Union, cast, Sized
import pandas as pd
from pandas.api.types import is_list_like # type: ignore[attr-defined]
+import numpy as np
+
from pyspark.sql import functions as F, Column as PySparkColumn
from pyspark.sql.types import BooleanType, LongType, DataType
from pyspark.errors import AnalysisException
-import numpy as np
-
from pyspark import pandas as ps # noqa: F401
from pyspark.pandas._typing import Label, Name, Scalar
from pyspark.pandas.internal import (
@@ -50,8 +50,6 @@ from pyspark.pandas.utils import (
spark_column_equals,
verify_temp_column_name,
)
-
-# For Supporting Spark Connect
from pyspark.sql.utils import get_column_class
if TYPE_CHECKING:
diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py
index e025d91e7b7..2966db073d0 100644
--- a/python/pyspark/pandas/internal.py
+++ b/python/pyspark/pandas/internal.py
@@ -24,6 +24,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, TYPE_CHECK
import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype # noqa: F401
+
from pyspark._globals import _NoValue, _NoValueType
from pyspark.sql import (
functions as F,
@@ -40,17 +41,9 @@ from pyspark.sql.types import ( # noqa: F401
StringType,
)
from pyspark.sql.utils import is_timestamp_ntz_preferred
-
-# For supporting Spark Connect
from pyspark.sql.utils import is_remote, get_column_class, get_dataframe_class
-
-# For running doctests and reference resolution in PyCharm.
from pyspark import pandas as ps
from pyspark.pandas._typing import Label
-
-if TYPE_CHECKING:
- # This is required in old Python 3.5 to prevent circular reference.
- from pyspark.pandas.series import Series
from pyspark.pandas.spark.utils import as_nullable_spark_type, force_decimal_precision_scale
from pyspark.pandas.data_type_ops.base import DataTypeOps
from pyspark.pandas.typedef import (
@@ -71,6 +64,8 @@ from pyspark.pandas.utils import (
spark_column_equals,
)
+if TYPE_CHECKING:
+ from pyspark.pandas.series import Series
# A function to turn given numbers to Spark columns that represent pandas-on-Spark index.
SPARK_INDEX_NAME_FORMAT = "__index_level_{}__".format
diff --git a/python/pyspark/pandas/mlflow.py b/python/pyspark/pandas/mlflow.py
index b78ae934d74..a609f9b7069 100644
--- a/python/pyspark/pandas/mlflow.py
+++ b/python/pyspark/pandas/mlflow.py
@@ -19,12 +19,12 @@
MLflow-related functions to load models and apply them to pandas-on-Spark dataframes.
"""
from typing import List, Union
+from typing import Any
-from pyspark.sql.types import DataType
import pandas as pd
import numpy as np
-from typing import Any
+from pyspark.sql.types import DataType
from pyspark.pandas._typing import Label, Dtype
from pyspark.pandas.utils import lazy_property, default_session
from pyspark.pandas.frame import DataFrame
diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py
index a700a243e5d..e8898ab4893 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -48,6 +48,7 @@ from pandas.api.types import ( # type: ignore[attr-defined]
from pandas.tseries.offsets import DateOffset
import pyarrow as pa
import pyarrow.parquet as pq
+
from pyspark.sql import functions as F, Column as PySparkColumn
from pyspark.sql.functions import pandas_udf
from pyspark.sql.types import (
@@ -67,7 +68,6 @@ from pyspark.sql.types import (
DataType,
)
from pyspark.sql.dataframe import DataFrame as PySparkDataFrame
-
from pyspark import pandas as ps
from pyspark.pandas._typing import Axis, Dtype, Label, Name
from pyspark.pandas.base import IndexOpsMixin
diff --git a/python/pyspark/pandas/numpy_compat.py b/python/pyspark/pandas/numpy_compat.py
index efffaa7042c..6fed89157d2 100644
--- a/python/pyspark/pandas/numpy_compat.py
+++ b/python/pyspark/pandas/numpy_compat.py
@@ -17,10 +17,10 @@
from typing import Any, Callable, no_type_check
import numpy as np
+
from pyspark.sql import functions as F
from pyspark.sql.pandas.functions import pandas_udf
from pyspark.sql.types import DoubleType, LongType, BooleanType
-
from pyspark.pandas.base import IndexOpsMixin
diff --git a/python/pyspark/pandas/plot/core.py b/python/pyspark/pandas/plot/core.py
index ccae96a2ef8..e7e716f52ac 100644
--- a/python/pyspark/pandas/plot/core.py
+++ b/python/pyspark/pandas/plot/core.py
@@ -19,12 +19,12 @@ import importlib
import pandas as pd
import numpy as np
-from pyspark.ml.feature import Bucketizer
-from pyspark.mllib.stat import KernelDensity
-from pyspark.sql import functions as F
from pandas.core.base import PandasObject
from pandas.core.dtypes.inference import is_integer
+from pyspark.ml.feature import Bucketizer
+from pyspark.mllib.stat import KernelDensity
+from pyspark.sql import functions as F
from pyspark.pandas.missing import unsupported_function
from pyspark.pandas.config import get_option
from pyspark.pandas.utils import name_like_string
diff --git a/python/pyspark/pandas/plot/matplotlib.py b/python/pyspark/pandas/plot/matplotlib.py
index 42f30ebf7ae..fe23f457187 100644
--- a/python/pyspark/pandas/plot/matplotlib.py
+++ b/python/pyspark/pandas/plot/matplotlib.py
@@ -22,7 +22,6 @@ import numpy as np
from matplotlib.axes._base import _process_plot_format # type: ignore[attr-defined]
from pandas.core.dtypes.inference import is_list_like
from pandas.io.formats.printing import pprint_thing
-
from pandas.plotting._matplotlib import ( # type: ignore[attr-defined]
BarPlot as PandasBarPlot,
BoxPlot as PandasBoxPlot,
diff --git a/python/pyspark/pandas/resample.py b/python/pyspark/pandas/resample.py
index 5bb754d69b2..fdcfa3243c0 100644
--- a/python/pyspark/pandas/resample.py
+++ b/python/pyspark/pandas/resample.py
@@ -29,7 +29,6 @@ from typing import (
)
import numpy as np
-
import pandas as pd
from pandas.tseries.frequencies import to_offset
@@ -40,7 +39,6 @@ from pyspark.sql.types import (
TimestampNTZType,
DataType,
)
-
from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm.
from pyspark.pandas._typing import FrameLike
from pyspark.pandas.frame import DataFrame
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index e96e5c3b3dc..e0cdb1ea030 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -53,6 +53,7 @@ from pandas.api.types import ( # type: ignore[attr-defined]
CategoricalDtype,
)
from pandas.tseries.frequencies import DateOffset
+
from pyspark.sql import functions as F, Column as PySparkColumn, DataFrame as SparkDataFrame
from pyspark.sql.types import (
ArrayType,
@@ -71,7 +72,6 @@ from pyspark.sql.types import (
)
from pyspark.sql.window import Window
from pyspark.sql.utils import get_column_class, get_window_class
-
from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm.
from pyspark.pandas._typing import Axis, Dtype, Label, Name, Scalar, T
from pyspark.pandas.accessors import PandasOnSparkSeriesMethods
diff --git a/python/pyspark/pandas/spark/accessors.py b/python/pyspark/pandas/spark/accessors.py
index bcbe044185a..7fb14e6ed75 100644
--- a/python/pyspark/pandas/spark/accessors.py
+++ b/python/pyspark/pandas/spark/accessors.py
@@ -25,11 +25,8 @@ from typing import TYPE_CHECKING, Callable, Generic, List, Optional, Union
from pyspark import StorageLevel
from pyspark.sql import Column as PySparkColumn, DataFrame as PySparkDataFrame
from pyspark.sql.types import DataType, StructType
-
from pyspark.pandas._typing import IndexOpsLike
from pyspark.pandas.internal import InternalField
-
-# For Supporting Spark Connect
from pyspark.sql.utils import get_column_class, get_dataframe_class
if TYPE_CHECKING:
diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py
index 9fef983b46a..36ea007c4d7 100644
--- a/python/pyspark/pandas/spark/functions.py
+++ b/python/pyspark/pandas/spark/functions.py
@@ -19,8 +19,6 @@ Additional Spark functions used in pandas-on-Spark.
"""
from pyspark import SparkContext
from pyspark.sql.column import Column
-
-# For supporting Spark Connect
from pyspark.sql.utils import is_remote
diff --git a/python/pyspark/pandas/sql_processor.py b/python/pyspark/pandas/sql_processor.py
index bce6921e029..1bd1cb9823c 100644
--- a/python/pyspark/pandas/sql_processor.py
+++ b/python/pyspark/pandas/sql_processor.py
@@ -18,10 +18,10 @@
import _string # type: ignore[import]
from typing import Any, Dict, Optional, Union, List
import inspect
+
import pandas as pd
from pyspark.sql import SparkSession, DataFrame as SDataFrame
-
from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm.
from pyspark.pandas.utils import default_session
from pyspark.pandas.frame import DataFrame
diff --git a/python/pyspark/pandas/strings.py b/python/pyspark/pandas/strings.py
index bf9cafbaf09..abf3103dd43 100644
--- a/python/pyspark/pandas/strings.py
+++ b/python/pyspark/pandas/strings.py
@@ -30,12 +30,11 @@ from typing import (
)
import numpy as np
-
import pandas as pd
+
from pyspark.sql.types import StringType, BinaryType, ArrayType, LongType, MapType
from pyspark.sql import functions as F
from pyspark.sql.functions import pandas_udf
-
import pyspark.pandas as ps
diff --git a/python/pyspark/pandas/supported_api_gen.py b/python/pyspark/pandas/supported_api_gen.py
index a43ad198011..c4471a0af36 100644
--- a/python/pyspark/pandas/supported_api_gen.py
+++ b/python/pyspark/pandas/supported_api_gen.py
@@ -19,7 +19,6 @@
Generate 'Supported pandas APIs' documentation file
"""
import warnings
-from pyspark.loose_version import LooseVersion
from enum import Enum, unique
from inspect import getmembers, isclass, isfunction, signature
from typing import Any, Callable, Dict, List, NamedTuple, Set, TextIO, Tuple
@@ -27,12 +26,13 @@ from typing import Any, Callable, Dict, List, NamedTuple, Set, TextIO, Tuple
import pyspark.pandas as ps
import pyspark.pandas.groupby as psg
import pyspark.pandas.window as psw
-from pyspark.pandas.exceptions import PandasNotImplementedError
-
import pandas as pd
import pandas.core.groupby as pdg
import pandas.core.window as pdw
+from pyspark.loose_version import LooseVersion
+from pyspark.pandas.exceptions import PandasNotImplementedError
+
MAX_MISSING_PARAMS_SIZE = 5
COMMON_PARAMETER_SET = {
"kwargs",
diff --git a/python/pyspark/pandas/tests/computation/test_corrwith.py b/python/pyspark/pandas/tests/computation/test_corrwith.py
index b64bf2d411b..a74c1f7c3df 100644
--- a/python/pyspark/pandas/tests/computation/test_corrwith.py
+++ b/python/pyspark/pandas/tests/computation/test_corrwith.py
@@ -16,7 +16,6 @@
#
import unittest
-
import numpy as np
import pandas as pd
diff --git a/python/pyspark/pandas/tests/computation/test_cov.py b/python/pyspark/pandas/tests/computation/test_cov.py
index 23e5ec587e9..fb40884e1c3 100644
--- a/python/pyspark/pandas/tests/computation/test_cov.py
+++ b/python/pyspark/pandas/tests/computation/test_cov.py
@@ -17,7 +17,6 @@
import unittest
import decimal
-
import numpy as np
import pandas as pd
diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py
index b62eb734a93..f1e36aecd19 100644
--- a/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py
+++ b/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py
@@ -22,7 +22,6 @@ import numpy as np
import pandas as pd
import pyspark.pandas as ps
-
from pyspark.pandas.typedef.typehints import (
extension_dtypes_available,
extension_float_dtypes_available,
diff --git a/python/pyspark/pandas/tests/connect/test_parity_extension.py b/python/pyspark/pandas/tests/connect/test_parity_extension.py
index 849139980b2..7413801d3f8 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_extension.py
+++ b/python/pyspark/pandas/tests/connect/test_parity_extension.py
@@ -18,6 +18,7 @@ import unittest
import pandas as pd
import numpy as np
+
from pyspark import pandas as ps
from pyspark.pandas.tests.test_extension import ExtensionTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
diff --git a/python/pyspark/pandas/tests/connect/test_parity_indexing.py b/python/pyspark/pandas/tests/connect/test_parity_indexing.py
index 9a14978539f..950bd2d0b2d 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_indexing.py
+++ b/python/pyspark/pandas/tests/connect/test_parity_indexing.py
@@ -17,6 +17,7 @@
import unittest
import pandas as pd
+
from pyspark import pandas as ps
from pyspark.pandas.tests.test_indexing import BasicIndexingTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
diff --git a/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py b/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py
index 6bf0a02589b..6cc0a277718 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py
+++ b/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py
@@ -17,6 +17,7 @@
import unittest
import pandas as pd
+
from pyspark import pandas as ps
from pyspark.pandas.tests.test_numpy_compat import NumPyCompatTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
diff --git a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
index 44673053bc2..8bafc86c8dd 100644
--- a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
+++ b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
@@ -23,13 +23,11 @@ import pandas as pd
import pyspark.pandas as ps
from pyspark.pandas.typedef import extension_dtypes
-
from pyspark.pandas.typedef.typehints import (
extension_dtypes_available,
extension_float_dtypes_available,
extension_object_dtypes_available,
)
-
from pyspark.testing.pandasutils import ComparisonTestBase
if extension_dtypes_available:
diff --git a/python/pyspark/pandas/tests/frame/test_reshaping.py b/python/pyspark/pandas/tests/frame/test_reshaping.py
index b5176a87ca6..0a1c5e5b098 100644
--- a/python/pyspark/pandas/tests/frame/test_reshaping.py
+++ b/python/pyspark/pandas/tests/frame/test_reshaping.py
@@ -22,7 +22,6 @@ import pandas as pd
from pyspark import pandas as ps
from pyspark.pandas.config import option_context
-
from pyspark.testing.pandasutils import ComparisonTestBase
from pyspark.testing.sqlutils import SQLTestUtils
diff --git a/python/pyspark/pandas/tests/frame/test_spark.py b/python/pyspark/pandas/tests/frame/test_spark.py
index da16e943578..4413279e32f 100644
--- a/python/pyspark/pandas/tests/frame/test_spark.py
+++ b/python/pyspark/pandas/tests/frame/test_spark.py
@@ -21,10 +21,10 @@ from io import StringIO
import numpy as np
import pandas as pd
+
from pyspark import StorageLevel
from pyspark.ml.linalg import SparseVector
from pyspark.sql.types import StructType
-
from pyspark import pandas as ps
from pyspark.pandas.frame import CachedDataFrame
from pyspark.pandas.exceptions import PandasNotImplementedError
diff --git a/python/pyspark/pandas/tests/series/test_series.py b/python/pyspark/pandas/tests/series/test_series.py
index 75c81431d32..b9fa6f6063f 100644
--- a/python/pyspark/pandas/tests/series/test_series.py
+++ b/python/pyspark/pandas/tests/series/test_series.py
@@ -18,7 +18,6 @@
import unittest
from collections import defaultdict
import inspect
-
from datetime import datetime, timedelta
import numpy as np
diff --git a/python/pyspark/pandas/tests/series/test_stat.py b/python/pyspark/pandas/tests/series/test_stat.py
index 62672b1e8d5..1e379d32d56 100644
--- a/python/pyspark/pandas/tests/series/test_stat.py
+++ b/python/pyspark/pandas/tests/series/test_stat.py
@@ -15,10 +15,10 @@
# limitations under the License.
#
import unittest
+from decimal import Decimal
import numpy as np
import pandas as pd
-from decimal import Decimal
from pyspark import pandas as ps
from pyspark.testing.pandasutils import ComparisonTestBase
diff --git a/python/pyspark/pandas/tests/test_indexops_spark.py b/python/pyspark/pandas/tests/test_indexops_spark.py
index 3f7691a3863..2a0bcf242fa 100644
--- a/python/pyspark/pandas/tests/test_indexops_spark.py
+++ b/python/pyspark/pandas/tests/test_indexops_spark.py
@@ -16,9 +16,9 @@
#
import pandas as pd
+
from pyspark.errors import AnalysisException
from pyspark.sql import functions as F
-
from pyspark import pandas as ps
from pyspark.testing.pandasutils import PandasOnSparkTestCase
from pyspark.testing.sqlutils import SQLTestUtils
diff --git a/python/pyspark/pandas/tests/test_stats.py b/python/pyspark/pandas/tests/test_stats.py
index 40ee64a5f68..bdc83ad7d5f 100644
--- a/python/pyspark/pandas/tests/test_stats.py
+++ b/python/pyspark/pandas/tests/test_stats.py
@@ -16,6 +16,7 @@
#
import unittest
+
import numpy as np
import pandas as pd
diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py
index b647697edf9..9f372a53079 100644
--- a/python/pyspark/pandas/utils.py
+++ b/python/pyspark/pandas/utils.py
@@ -37,14 +37,13 @@ from typing import (
)
import warnings
+import pandas as pd
+from pandas.api.types import is_list_like # type: ignore[attr-defined]
+
from pyspark.sql import functions as F, Column, DataFrame as PySparkDataFrame, SparkSession
from pyspark.sql.types import DoubleType
from pyspark.sql.utils import is_remote, get_dataframe_class
from pyspark.errors import PySparkTypeError
-import pandas as pd
-from pandas.api.types import is_list_like # type: ignore[attr-defined]
-
-# For running doctests and reference resolution in PyCharm.
from pyspark import pandas as ps # noqa: F401
from pyspark.pandas._typing import (
Axis,
diff --git a/python/pyspark/pandas/window.py b/python/pyspark/pandas/window.py
index db98867674a..0aaeb7df89b 100644
--- a/python/pyspark/pandas/window.py
+++ b/python/pyspark/pandas/window.py
@@ -30,8 +30,6 @@ from pyspark.pandas.missing.window import (
MissingPandasLikeExponentialMoving,
MissingPandasLikeExponentialMovingGroupby,
)
-
-# For running doctests and reference resolution in PyCharm.
from pyspark import pandas as ps # noqa: F401
from pyspark.pandas._typing import FrameLike
from pyspark.pandas.groupby import GroupBy, DataFrameGroupBy
diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py
index 6aa504c7c08..b7ea6a19063 100644
--- a/python/pyspark/profiler.py
+++ b/python/pyspark/profiler.py
@@ -27,7 +27,6 @@ from typing import (
Union,
cast,
)
-
import cProfile
import inspect
import pstats
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 0e0d958f46a..d2a8bc4b111 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -94,6 +94,9 @@ if TYPE_CHECKING:
import socket
import io
+ from py4j.java_gateway import JavaObject
+ from py4j.java_collections import JavaArray
+
from pyspark._typing import NonUDFType
from pyspark._typing import S, NumberOrArray
from pyspark.context import SparkContext
@@ -119,9 +122,6 @@ if TYPE_CHECKING:
SQLTableUDFType,
)
- from py4j.java_gateway import JavaObject
- from py4j.java_collections import JavaArray
-
T = TypeVar("T")
T_co = TypeVar("T_co", covariant=True)
U = TypeVar("U")
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
index da03110c321..57964654963 100644
--- a/python/pyspark/shuffle.py
+++ b/python/pyspark/shuffle.py
@@ -24,8 +24,8 @@ import itertools
import operator
import random
import sys
-
import heapq
+
from pyspark.serializers import (
BatchedSerializer,
CPickleSerializer,
diff --git a/python/pyspark/tests/test_statcounter.py b/python/pyspark/tests/test_statcounter.py
index 747f42e67b2..1879ea6974e 100644
--- a/python/pyspark/tests/test_statcounter.py
+++ b/python/pyspark/tests/test_statcounter.py
@@ -14,9 +14,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+import math
+
from pyspark.statcounter import StatCounter
from pyspark.testing.utils import ReusedPySparkTestCase
-import math
class StatCounterTests(ReusedPySparkTestCase):
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index 47f5933079e..9c70bac2a3d 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -28,10 +28,10 @@ import typing
from types import TracebackType
from typing import Any, Callable, IO, Iterator, List, Optional, TextIO, Tuple, Union
-from pyspark.errors import PySparkRuntimeError
-
from py4j.clientserver import ClientServer
+from pyspark.errors import PySparkRuntimeError
+
__all__: List[str] = []
from py4j.java_gateway import JavaObject
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index a073942adb6..3d08f6c4bae 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -24,7 +24,6 @@ import time
from inspect import getfullargspec
import json
from typing import Any, Callable, Iterable, Iterator
-
import faulthandler
from pyspark.accumulators import _accumulatorRegistry
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org