You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ru...@apache.org on 2023/06/07 01:39:47 UTC
[spark] branch master updated: [SPARK-43970][PYTHON][CONNECT] Hide unsupported dataframe methods from auto-completion
This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new e3957ce8718 [SPARK-43970][PYTHON][CONNECT] Hide unsupported dataframe methods from auto-completion
e3957ce8718 is described below
commit e3957ce8718697be2fcb2a95ede439bb49ceadad
Author: Ruifeng Zheng <ru...@apache.org>
AuthorDate: Wed Jun 7 09:39:27 2023 +0800
[SPARK-43970][PYTHON][CONNECT] Hide unsupported dataframe methods from auto-completion
### What changes were proposed in this pull request?
Hide unsupported dataframe methods from auto-completion
### Why are the changes needed?
for better UX
before
<img width="507" alt="image" src="https://github.com/apache/spark/assets/7322292/8f4f228c-e30e-4027-8d52-768f1657f19e">
after
<img width="892" alt="image" src="https://github.com/apache/spark/assets/7322292/1d308937-dd57-4ca6-b37a-29b09348bda5">
### Does this PR introduce _any_ user-facing change?
yes
### How was this patch tested?
manually check in ipython
Closes #41462 from zhengruifeng/connect_hide_unsupported_df_functions.
Authored-by: Ruifeng Zheng <ru...@apache.org>
Signed-off-by: Ruifeng Zheng <ru...@apache.org>
---
python/pyspark/sql/connect/dataframe.py | 48 +++++++++------------------------
1 file changed, 12 insertions(+), 36 deletions(-)
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index a8a4612aec7..6429645f0e0 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -1572,6 +1572,18 @@ class DataFrame:
raise PySparkAttributeError(
error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name}
)
+ elif name in [
+ "rdd",
+ "toJSON",
+ "foreach",
+ "foreachPartition",
+ "checkpoint",
+ "localCheckpoint",
+ ]:
+ raise PySparkNotImplementedError(
+ error_class="NOT_IMPLEMENTED",
+ message_parameters={"feature": f"{name}()"},
+ )
return self[name]
@overload
@@ -1817,12 +1829,6 @@ class DataFrame:
createOrReplaceGlobalTempView.__doc__ = PySparkDataFrame.createOrReplaceGlobalTempView.__doc__
- def rdd(self, *args: Any, **kwargs: Any) -> None:
- raise PySparkNotImplementedError(
- error_class="NOT_IMPLEMENTED",
- message_parameters={"feature": "RDD Support for Spark Connect"},
- )
-
def cache(self) -> "DataFrame":
if self._plan is None:
raise Exception("Cannot cache on empty plan.")
@@ -1870,18 +1876,6 @@ class DataFrame:
def is_cached(self) -> bool:
return self.storageLevel != StorageLevel.NONE
- def foreach(self, *args: Any, **kwargs: Any) -> None:
- raise PySparkNotImplementedError(
- error_class="NOT_IMPLEMENTED",
- message_parameters={"feature": "foreach()"},
- )
-
- def foreachPartition(self, *args: Any, **kwargs: Any) -> None:
- raise PySparkNotImplementedError(
- error_class="NOT_IMPLEMENTED",
- message_parameters={"feature": "foreachPartition()"},
- )
-
def toLocalIterator(self, prefetchPartitions: bool = False) -> Iterator[Row]:
from pyspark.sql.connect.conversion import ArrowTableToRowsConversion
@@ -1905,18 +1899,6 @@ class DataFrame:
toLocalIterator.__doc__ = PySparkDataFrame.toLocalIterator.__doc__
- def checkpoint(self, *args: Any, **kwargs: Any) -> None:
- raise PySparkNotImplementedError(
- error_class="NOT_IMPLEMENTED",
- message_parameters={"feature": "checkpoint()"},
- )
-
- def localCheckpoint(self, *args: Any, **kwargs: Any) -> None:
- raise PySparkNotImplementedError(
- error_class="NOT_IMPLEMENTED",
- message_parameters={"feature": "localCheckpoint()"},
- )
-
def to_pandas_on_spark(
self, index_col: Optional[Union[str, List[str]]] = None
) -> "PandasOnSparkDataFrame":
@@ -2001,12 +1983,6 @@ class DataFrame:
writeStream.__doc__ = PySparkDataFrame.writeStream.__doc__
- def toJSON(self, *args: Any, **kwargs: Any) -> None:
- raise PySparkNotImplementedError(
- error_class="NOT_IMPLEMENTED",
- message_parameters={"feature": "toJSON()"},
- )
-
def sameSemantics(self, other: "DataFrame") -> bool:
assert self._plan is not None
assert other._plan is not None
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org