You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/07/24 07:59:48 UTC
[spark] branch branch-3.1 updated: [SPARK-36225][PYTHON][DOCS] Use
DataFrame in python docstrings
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 824426e [SPARK-36225][PYTHON][DOCS] Use DataFrame in python docstrings
824426e is described below
commit 824426e7da86a3effccef1b99f0bd7e5c776e496
Author: Dominik Gehl <do...@open.ch>
AuthorDate: Sat Jul 24 16:58:10 2021 +0900
[SPARK-36225][PYTHON][DOCS] Use DataFrame in python docstrings
### What changes were proposed in this pull request?
Changing references to Dataset in python docstrings to DataFrame
### Why are the changes needed?
no Dataset class in pyspark
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Doc change only
Closes #33438 from dominikgehl/feature/SPARK-36225.
Lead-authored-by: Dominik Gehl <do...@open.ch>
Co-authored-by: Dominik Gehl <ge...@fastmail.fm>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
(cherry picked from commit ae1c20ee0dc24bd35cd15380e814f06e07314af2)
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
python/pyspark/ml/util.py | 7 ++++---
python/pyspark/sql/dataframe.py | 26 +++++++++++++-------------
2 files changed, 17 insertions(+), 16 deletions(-)
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 21d2725..553b834 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -351,7 +351,8 @@ class DefaultParamsWritable(MLWritable):
class stores all data as :py:class:`Param` values, then extending this trait will provide
a default implementation of writing saved instances of the class.
This only handles simple :py:class:`Param` types; e.g., it will not handle
- :py:class:`Dataset`. See :py:class:`DefaultParamsReadable`, the counterpart to this trait.
+ :py:class:`pyspark.sql.DataFrame`. See :py:class:`DefaultParamsReadable`, the counterpart
+ to this class.
.. versionadded:: 2.3.0
"""
@@ -460,8 +461,8 @@ class DefaultParamsReadable(MLReadable):
If a :py:class:`Params` class stores all data as :py:class:`Param` values,
then extending this trait will provide a default implementation of reading saved
instances of the class. This only handles simple :py:class:`Param` types;
- e.g., it will not handle :py:class:`Dataset`. See :py:class:`DefaultParamsWritable`,
- the counterpart to this trait.
+ e.g., it will not handle :py:class:`pyspark.sql.DataFrame`. See
+ :py:class:`DefaultParamsWritable`, the counterpart to this class.
.. versionadded:: 2.3.0
"""
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 3085092..4e45a6b 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -116,7 +116,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
return RDD(rdd.toJavaRDD(), self._sc, UTF8Deserializer(use_unicode))
def registerTempTable(self, name):
- """Registers this DataFrame as a temporary table using the given name.
+ """Registers this :class:`DataFrame` as a temporary table using the given name.
The lifetime of this temporary table is tied to the :class:`SparkSession`
that was used to create this :class:`DataFrame`.
@@ -422,12 +422,12 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
@property
def isStreaming(self):
- """Returns ``True`` if this :class:`Dataset` contains one or more sources that continuously
- return data as it arrives. A :class:`Dataset` that reads data from a streaming source
- must be executed as a :class:`StreamingQuery` using the :func:`start` method in
- :class:`DataStreamWriter`. Methods that return a single answer, (e.g., :func:`count` or
- :func:`collect`) will throw an :class:`AnalysisException` when there is a streaming
- source present.
+ """Returns ``True`` if this :class:`DataFrame` contains one or more sources that
+ continuously return data as it arrives. A :class:`DataFrame` that reads data from a
+ streaming source must be executed as a :class:`StreamingQuery` using the :func:`start`
+ method in :class:`DataStreamWriter`. Methods that return a single answer, (e.g.,
+ :func:`count` or :func:`collect`) will throw an :class:`AnalysisException` when there
+ is a streaming source present.
.. versionadded:: 2.0.0
@@ -527,10 +527,10 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
return None
def checkpoint(self, eager=True):
- """Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the
- logical plan of this :class:`DataFrame`, which is especially useful in iterative algorithms
- where the plan may grow exponentially. It will be saved to files inside the checkpoint
- directory set with :meth:`SparkContext.setCheckpointDir`.
+ """Returns a checkpointed version of this :class:`DataFrame`. Checkpointing can be used to
+ truncate the logical plan of this :class:`DataFrame`, which is especially useful in
+ iterative algorithms where the plan may grow exponentially. It will be saved to files
+ inside the checkpoint directory set with :meth:`SparkContext.setCheckpointDir`.
.. versionadded:: 2.1.0
@@ -547,8 +547,8 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
return DataFrame(jdf, self.sql_ctx)
def localCheckpoint(self, eager=True):
- """Returns a locally checkpointed version of this Dataset. Checkpointing can be used to
- truncate the logical plan of this :class:`DataFrame`, which is especially useful in
+ """Returns a locally checkpointed version of this :class:`DataFrame`. Checkpointing can be
+ used to truncate the logical plan of this :class:`DataFrame`, which is especially useful in
iterative algorithms where the plan may grow exponentially. Local checkpoints are
stored in the executors using the caching subsystem and therefore they are not reliable.
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org