You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/02/16 03:04:42 UTC
[spark] branch master updated: [SPARK-42459][CONNECT] Create pyspark.sql.connect.utils to keep common codes
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 7ee8a32077b [SPARK-42459][CONNECT] Create pyspark.sql.connect.utils to keep common codes
7ee8a32077b is described below
commit 7ee8a32077b09cb847b6ac41cdc5067cf7bd83e9
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Thu Feb 16 12:04:26 2023 +0900
[SPARK-42459][CONNECT] Create pyspark.sql.connect.utils to keep common codes
### What changes were proposed in this pull request?
This PR proposes to `pyspark.sql.connect.utils` to keep common codes, especially about dependnecies.
### Why are the changes needed?
For example, [SPARK-41457](https://issues.apache.org/jira/browse/SPARK-41457) added `require_minimum_grpc_version` in `pyspark.sql.pandas.utils` which is actually unrelated from the connect module. we should move all to a separate utils directory for better readability and maintenance.
### Does this PR introduce _any_ user-facing change?
No, dev-only.
### How was this patch tested?
Existing tests should cover this.
Closes #40047 from HyukjinKwon/refactor-utils.
Authored-by: Hyukjin Kwon <gu...@apache.org>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
python/pyspark/sql/connect/__init__.py | 23 ------------------
python/pyspark/sql/connect/catalog.py | 2 +-
python/pyspark/sql/connect/client.py | 2 +-
python/pyspark/sql/connect/column.py | 2 +-
python/pyspark/sql/connect/conversion.py | 2 +-
python/pyspark/sql/connect/dataframe.py | 2 +-
python/pyspark/sql/connect/expressions.py | 2 +-
python/pyspark/sql/connect/functions.py | 2 +-
python/pyspark/sql/connect/group.py | 2 +-
python/pyspark/sql/connect/plan.py | 2 +-
python/pyspark/sql/connect/readwriter.py | 2 +-
python/pyspark/sql/connect/session.py | 2 +-
python/pyspark/sql/connect/types.py | 2 +-
python/pyspark/sql/connect/udf.py | 2 +-
.../pyspark/sql/connect/{__init__.py => utils.py} | 28 +++++++++++++++-------
python/pyspark/sql/connect/window.py | 2 +-
python/pyspark/sql/pandas/utils.py | 19 ---------------
17 files changed, 34 insertions(+), 64 deletions(-)
diff --git a/python/pyspark/sql/connect/__init__.py b/python/pyspark/sql/connect/__init__.py
index aaf52e57f03..9bd4513db22 100644
--- a/python/pyspark/sql/connect/__init__.py
+++ b/python/pyspark/sql/connect/__init__.py
@@ -17,26 +17,3 @@
"""Currently Spark Connect is very experimental and the APIs to interact with
Spark through this API are can be changed at any time without warning."""
-import sys
-
-from pyspark.sql.pandas.utils import (
- require_minimum_pandas_version,
- require_minimum_pyarrow_version,
- require_minimum_grpc_version,
-)
-
-
-def check_dependencies(mod_name: str, file_name: str) -> None:
- if mod_name == "__main__":
- from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
-
- if not should_test_connect:
- print(
- f"Skipping {file_name} doctests: {connect_requirement_message}",
- file=sys.stderr,
- )
- sys.exit(0)
- else:
- require_minimum_pandas_version()
- require_minimum_pyarrow_version()
- require_minimum_grpc_version()
diff --git a/python/pyspark/sql/connect/catalog.py b/python/pyspark/sql/connect/catalog.py
index 233fb904529..f2bbae344f2 100644
--- a/python/pyspark/sql/connect/catalog.py
+++ b/python/pyspark/sql/connect/catalog.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/client.py b/python/pyspark/sql/connect/client.py
index 2c07596fec0..aade0f6e050 100644
--- a/python/pyspark/sql/connect/client.py
+++ b/python/pyspark/sql/connect/client.py
@@ -19,7 +19,7 @@ __all__ = [
"SparkConnectClient",
]
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py
index 79e1e81992b..f5c82336bb7 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/conversion.py b/python/pyspark/sql/connect/conversion.py
index 4dbdb5db212..40679b80291 100644
--- a/python/pyspark/sql/connect/conversion.py
+++ b/python/pyspark/sql/connect/conversion.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index 667295e8667..3564f5def17 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/expressions.py b/python/pyspark/sql/connect/expressions.py
index 6e34719042d..876748d06d8 100644
--- a/python/pyspark/sql/connect/expressions.py
+++ b/python/pyspark/sql/connect/expressions.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py
index 42b59d18a5b..7d61a86c8b5 100644
--- a/python/pyspark/sql/connect/functions.py
+++ b/python/pyspark/sql/connect/functions.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/group.py b/python/pyspark/sql/connect/group.py
index f3841edf1d4..8d876762804 100644
--- a/python/pyspark/sql/connect/group.py
+++ b/python/pyspark/sql/connect/group.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index 3e12ef03515..0f27b214502 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py
index d43d8e5488f..292e58b3552 100644
--- a/python/pyspark/sql/connect/readwriter.py
+++ b/python/pyspark/sql/connect/readwriter.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py
index 75c8e61752e..08e63f544e2 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/types.py b/python/pyspark/sql/connect/types.py
index 6b9975c52cd..28eb51d72cc 100644
--- a/python/pyspark/sql/connect/types.py
+++ b/python/pyspark/sql/connect/types.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/udf.py b/python/pyspark/sql/connect/udf.py
index bef5a99a65b..a60f9e516c7 100644
--- a/python/pyspark/sql/connect/udf.py
+++ b/python/pyspark/sql/connect/udf.py
@@ -17,7 +17,7 @@
"""
User-defined function related classes and functions
"""
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/connect/__init__.py b/python/pyspark/sql/connect/utils.py
similarity index 63%
copy from python/pyspark/sql/connect/__init__.py
copy to python/pyspark/sql/connect/utils.py
index aaf52e57f03..fbc34aa8d59 100644
--- a/python/pyspark/sql/connect/__init__.py
+++ b/python/pyspark/sql/connect/utils.py
@@ -14,16 +14,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-
-"""Currently Spark Connect is very experimental and the APIs to interact with
-Spark through this API are can be changed at any time without warning."""
import sys
-from pyspark.sql.pandas.utils import (
- require_minimum_pandas_version,
- require_minimum_pyarrow_version,
- require_minimum_grpc_version,
-)
+from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
def check_dependencies(mod_name: str, file_name: str) -> None:
@@ -40,3 +33,22 @@ def check_dependencies(mod_name: str, file_name: str) -> None:
require_minimum_pandas_version()
require_minimum_pyarrow_version()
require_minimum_grpc_version()
+
+
+def require_minimum_grpc_version() -> None:
+ """Raise ImportError if minimum version of grpc is not installed"""
+ minimum_grpc_version = "1.48.1"
+
+ from distutils.version import LooseVersion
+
+ try:
+ import grpc
+ except ImportError as error:
+ raise ImportError(
+ "grpc >= %s must be installed; however, " "it was not found." % minimum_grpc_version
+ ) from error
+ if LooseVersion(grpc.__version__) < LooseVersion(minimum_grpc_version):
+ raise ImportError(
+ "gRPC >= %s must be installed; however, "
+ "your version was %s." % (minimum_grpc_version, grpc.__version__)
+ )
diff --git a/python/pyspark/sql/connect/window.py b/python/pyspark/sql/connect/window.py
index 39df41bae3f..51a9452e611 100644
--- a/python/pyspark/sql/connect/window.py
+++ b/python/pyspark/sql/connect/window.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__, __file__)
diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py
index f2d76395c25..c51a90ca57a 100644
--- a/python/pyspark/sql/pandas/utils.py
+++ b/python/pyspark/sql/pandas/utils.py
@@ -73,25 +73,6 @@ def require_minimum_pyarrow_version() -> None:
)
-def require_minimum_grpc_version() -> None:
- """Raise ImportError if minimum version of grpc is not installed"""
- minimum_grpc_version = "1.48.1"
-
- from distutils.version import LooseVersion
-
- try:
- import grpc
- except ImportError as error:
- raise ImportError(
- "grpc >= %s must be installed; however, " "it was not found." % minimum_grpc_version
- ) from error
- if LooseVersion(grpc.__version__) < LooseVersion(minimum_grpc_version):
- raise ImportError(
- "gRPC >= %s must be installed; however, "
- "your version was %s." % (minimum_grpc_version, grpc.__version__)
- )
-
-
def pyarrow_version_less_than_minimum(minimum_pyarrow_version: str) -> bool:
"""Return False if the installed pyarrow version is less than minimum_pyarrow_version
or if pyarrow is not installed."""
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org