You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ru...@apache.org on 2023/06/07 01:39:47 UTC

[spark] branch master updated: [SPARK-43970][PYTHON][CONNECT] Hide unsupported dataframe methods from auto-completion

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new e3957ce8718 [SPARK-43970][PYTHON][CONNECT] Hide unsupported dataframe methods from auto-completion
e3957ce8718 is described below

commit e3957ce8718697be2fcb2a95ede439bb49ceadad
Author: Ruifeng Zheng <ru...@apache.org>
AuthorDate: Wed Jun 7 09:39:27 2023 +0800

    [SPARK-43970][PYTHON][CONNECT] Hide unsupported dataframe methods from auto-completion
    
    ### What changes were proposed in this pull request?
    Hide unsupported dataframe methods from auto-completion
    
    ### Why are the changes needed?
    
    for better UX
    
    before
    <img width="507" alt="image" src="https://github.com/apache/spark/assets/7322292/8f4f228c-e30e-4027-8d52-768f1657f19e">
    
    after
    <img width="892" alt="image" src="https://github.com/apache/spark/assets/7322292/1d308937-dd57-4ca6-b37a-29b09348bda5">
    
    ### Does this PR introduce _any_ user-facing change?
    yes
    
    ### How was this patch tested?
    manually check in ipython
    
    Closes #41462 from zhengruifeng/connect_hide_unsupported_df_functions.
    
    Authored-by: Ruifeng Zheng <ru...@apache.org>
    Signed-off-by: Ruifeng Zheng <ru...@apache.org>
---
 python/pyspark/sql/connect/dataframe.py | 48 +++++++++------------------------
 1 file changed, 12 insertions(+), 36 deletions(-)

diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index a8a4612aec7..6429645f0e0 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -1572,6 +1572,18 @@ class DataFrame:
             raise PySparkAttributeError(
                 error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name}
             )
+        elif name in [
+            "rdd",
+            "toJSON",
+            "foreach",
+            "foreachPartition",
+            "checkpoint",
+            "localCheckpoint",
+        ]:
+            raise PySparkNotImplementedError(
+                error_class="NOT_IMPLEMENTED",
+                message_parameters={"feature": f"{name}()"},
+            )
         return self[name]
 
     @overload
@@ -1817,12 +1829,6 @@ class DataFrame:
 
     createOrReplaceGlobalTempView.__doc__ = PySparkDataFrame.createOrReplaceGlobalTempView.__doc__
 
-    def rdd(self, *args: Any, **kwargs: Any) -> None:
-        raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "RDD Support for Spark Connect"},
-        )
-
     def cache(self) -> "DataFrame":
         if self._plan is None:
             raise Exception("Cannot cache on empty plan.")
@@ -1870,18 +1876,6 @@ class DataFrame:
     def is_cached(self) -> bool:
         return self.storageLevel != StorageLevel.NONE
 
-    def foreach(self, *args: Any, **kwargs: Any) -> None:
-        raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "foreach()"},
-        )
-
-    def foreachPartition(self, *args: Any, **kwargs: Any) -> None:
-        raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "foreachPartition()"},
-        )
-
     def toLocalIterator(self, prefetchPartitions: bool = False) -> Iterator[Row]:
         from pyspark.sql.connect.conversion import ArrowTableToRowsConversion
 
@@ -1905,18 +1899,6 @@ class DataFrame:
 
     toLocalIterator.__doc__ = PySparkDataFrame.toLocalIterator.__doc__
 
-    def checkpoint(self, *args: Any, **kwargs: Any) -> None:
-        raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "checkpoint()"},
-        )
-
-    def localCheckpoint(self, *args: Any, **kwargs: Any) -> None:
-        raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "localCheckpoint()"},
-        )
-
     def to_pandas_on_spark(
         self, index_col: Optional[Union[str, List[str]]] = None
     ) -> "PandasOnSparkDataFrame":
@@ -2001,12 +1983,6 @@ class DataFrame:
 
     writeStream.__doc__ = PySparkDataFrame.writeStream.__doc__
 
-    def toJSON(self, *args: Any, **kwargs: Any) -> None:
-        raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "toJSON()"},
-        )
-
     def sameSemantics(self, other: "DataFrame") -> bool:
         assert self._plan is not None
         assert other._plan is not None


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org