You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/02/16 03:04:42 UTC

[spark] branch master updated: [SPARK-42459][CONNECT] Create pyspark.sql.connect.utils to keep common codes

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 7ee8a32077b [SPARK-42459][CONNECT] Create pyspark.sql.connect.utils to keep common codes
7ee8a32077b is described below

commit 7ee8a32077b09cb847b6ac41cdc5067cf7bd83e9
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Thu Feb 16 12:04:26 2023 +0900

    [SPARK-42459][CONNECT] Create pyspark.sql.connect.utils to keep common codes
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to `pyspark.sql.connect.utils` to keep common codes, especially about dependnecies.
    
    ### Why are the changes needed?
    
    For example, [SPARK-41457](https://issues.apache.org/jira/browse/SPARK-41457) added `require_minimum_grpc_version` in `pyspark.sql.pandas.utils` which is actually unrelated from the connect module. we should move all to a separate utils directory for better readability and maintenance.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, dev-only.
    
    ### How was this patch tested?
    
    Existing tests should cover this.
    
    Closes #40047 from HyukjinKwon/refactor-utils.
    
    Authored-by: Hyukjin Kwon <gu...@apache.org>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/pyspark/sql/connect/__init__.py             | 23 ------------------
 python/pyspark/sql/connect/catalog.py              |  2 +-
 python/pyspark/sql/connect/client.py               |  2 +-
 python/pyspark/sql/connect/column.py               |  2 +-
 python/pyspark/sql/connect/conversion.py           |  2 +-
 python/pyspark/sql/connect/dataframe.py            |  2 +-
 python/pyspark/sql/connect/expressions.py          |  2 +-
 python/pyspark/sql/connect/functions.py            |  2 +-
 python/pyspark/sql/connect/group.py                |  2 +-
 python/pyspark/sql/connect/plan.py                 |  2 +-
 python/pyspark/sql/connect/readwriter.py           |  2 +-
 python/pyspark/sql/connect/session.py              |  2 +-
 python/pyspark/sql/connect/types.py                |  2 +-
 python/pyspark/sql/connect/udf.py                  |  2 +-
 .../pyspark/sql/connect/{__init__.py => utils.py}  | 28 +++++++++++++++-------
 python/pyspark/sql/connect/window.py               |  2 +-
 python/pyspark/sql/pandas/utils.py                 | 19 ---------------
 17 files changed, 34 insertions(+), 64 deletions(-)

diff --git a/python/pyspark/sql/connect/__init__.py b/python/pyspark/sql/connect/__init__.py
index aaf52e57f03..9bd4513db22 100644
--- a/python/pyspark/sql/connect/__init__.py
+++ b/python/pyspark/sql/connect/__init__.py
@@ -17,26 +17,3 @@
 
 """Currently Spark Connect is very experimental and the APIs to interact with
 Spark through this API are can be changed at any time without warning."""
-import sys
-
-from pyspark.sql.pandas.utils import (
-    require_minimum_pandas_version,
-    require_minimum_pyarrow_version,
-    require_minimum_grpc_version,
-)
-
-
-def check_dependencies(mod_name: str, file_name: str) -> None:
-    if mod_name == "__main__":
-        from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
-
-        if not should_test_connect:
-            print(
-                f"Skipping {file_name} doctests: {connect_requirement_message}",
-                file=sys.stderr,
-            )
-            sys.exit(0)
-    else:
-        require_minimum_pandas_version()
-        require_minimum_pyarrow_version()
-        require_minimum_grpc_version()
diff --git a/python/pyspark/sql/connect/catalog.py b/python/pyspark/sql/connect/catalog.py
index 233fb904529..f2bbae344f2 100644
--- a/python/pyspark/sql/connect/catalog.py
+++ b/python/pyspark/sql/connect/catalog.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/client.py b/python/pyspark/sql/connect/client.py
index 2c07596fec0..aade0f6e050 100644
--- a/python/pyspark/sql/connect/client.py
+++ b/python/pyspark/sql/connect/client.py
@@ -19,7 +19,7 @@ __all__ = [
     "SparkConnectClient",
 ]
 
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py
index 79e1e81992b..f5c82336bb7 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/conversion.py b/python/pyspark/sql/connect/conversion.py
index 4dbdb5db212..40679b80291 100644
--- a/python/pyspark/sql/connect/conversion.py
+++ b/python/pyspark/sql/connect/conversion.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index 667295e8667..3564f5def17 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/expressions.py b/python/pyspark/sql/connect/expressions.py
index 6e34719042d..876748d06d8 100644
--- a/python/pyspark/sql/connect/expressions.py
+++ b/python/pyspark/sql/connect/expressions.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py
index 42b59d18a5b..7d61a86c8b5 100644
--- a/python/pyspark/sql/connect/functions.py
+++ b/python/pyspark/sql/connect/functions.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/group.py b/python/pyspark/sql/connect/group.py
index f3841edf1d4..8d876762804 100644
--- a/python/pyspark/sql/connect/group.py
+++ b/python/pyspark/sql/connect/group.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index 3e12ef03515..0f27b214502 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py
index d43d8e5488f..292e58b3552 100644
--- a/python/pyspark/sql/connect/readwriter.py
+++ b/python/pyspark/sql/connect/readwriter.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py
index 75c8e61752e..08e63f544e2 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/types.py b/python/pyspark/sql/connect/types.py
index 6b9975c52cd..28eb51d72cc 100644
--- a/python/pyspark/sql/connect/types.py
+++ b/python/pyspark/sql/connect/types.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/udf.py b/python/pyspark/sql/connect/udf.py
index bef5a99a65b..a60f9e516c7 100644
--- a/python/pyspark/sql/connect/udf.py
+++ b/python/pyspark/sql/connect/udf.py
@@ -17,7 +17,7 @@
 """
 User-defined function related classes and functions
 """
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/connect/__init__.py b/python/pyspark/sql/connect/utils.py
similarity index 63%
copy from python/pyspark/sql/connect/__init__.py
copy to python/pyspark/sql/connect/utils.py
index aaf52e57f03..fbc34aa8d59 100644
--- a/python/pyspark/sql/connect/__init__.py
+++ b/python/pyspark/sql/connect/utils.py
@@ -14,16 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-"""Currently Spark Connect is very experimental and the APIs to interact with
-Spark through this API are can be changed at any time without warning."""
 import sys
 
-from pyspark.sql.pandas.utils import (
-    require_minimum_pandas_version,
-    require_minimum_pyarrow_version,
-    require_minimum_grpc_version,
-)
+from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
 
 
 def check_dependencies(mod_name: str, file_name: str) -> None:
@@ -40,3 +33,22 @@ def check_dependencies(mod_name: str, file_name: str) -> None:
         require_minimum_pandas_version()
         require_minimum_pyarrow_version()
         require_minimum_grpc_version()
+
+
+def require_minimum_grpc_version() -> None:
+    """Raise ImportError if minimum version of grpc is not installed"""
+    minimum_grpc_version = "1.48.1"
+
+    from distutils.version import LooseVersion
+
+    try:
+        import grpc
+    except ImportError as error:
+        raise ImportError(
+            "grpc >= %s must be installed; however, " "it was not found." % minimum_grpc_version
+        ) from error
+    if LooseVersion(grpc.__version__) < LooseVersion(minimum_grpc_version):
+        raise ImportError(
+            "gRPC >= %s must be installed; however, "
+            "your version was %s." % (minimum_grpc_version, grpc.__version__)
+        )
diff --git a/python/pyspark/sql/connect/window.py b/python/pyspark/sql/connect/window.py
index 39df41bae3f..51a9452e611 100644
--- a/python/pyspark/sql/connect/window.py
+++ b/python/pyspark/sql/connect/window.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect import check_dependencies
+from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__, __file__)
 
diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py
index f2d76395c25..c51a90ca57a 100644
--- a/python/pyspark/sql/pandas/utils.py
+++ b/python/pyspark/sql/pandas/utils.py
@@ -73,25 +73,6 @@ def require_minimum_pyarrow_version() -> None:
         )
 
 
-def require_minimum_grpc_version() -> None:
-    """Raise ImportError if minimum version of grpc is not installed"""
-    minimum_grpc_version = "1.48.1"
-
-    from distutils.version import LooseVersion
-
-    try:
-        import grpc
-    except ImportError as error:
-        raise ImportError(
-            "grpc >= %s must be installed; however, " "it was not found." % minimum_grpc_version
-        ) from error
-    if LooseVersion(grpc.__version__) < LooseVersion(minimum_grpc_version):
-        raise ImportError(
-            "gRPC >= %s must be installed; however, "
-            "your version was %s." % (minimum_grpc_version, grpc.__version__)
-        )
-
-
 def pyarrow_version_less_than_minimum(minimum_pyarrow_version: str) -> bool:
     """Return False if the installed pyarrow version is less than minimum_pyarrow_version
     or if pyarrow is not installed."""


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org