You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2017/09/28 01:24:57 UTC

spark git commit: [MINOR] Fixed up pandas_udf related docs and formatting

Repository: spark
Updated Branches:
  refs/heads/master 9244957b5 -> 7bf4da8a3


[MINOR] Fixed up pandas_udf related docs and formatting

## What changes were proposed in this pull request?

Fixed some minor issues with pandas_udf related docs and formatting.

## How was this patch tested?

NA

Author: Bryan Cutler <cu...@gmail.com>

Closes #19375 from BryanCutler/arrow-pandas_udf-cleanup-minor.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7bf4da8a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7bf4da8a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7bf4da8a

Branch: refs/heads/master
Commit: 7bf4da8a33c33b03bbfddc698335fe9b86ce1e0e
Parents: 9244957
Author: Bryan Cutler <cu...@gmail.com>
Authored: Thu Sep 28 10:24:51 2017 +0900
Committer: hyukjinkwon <gu...@gmail.com>
Committed: Thu Sep 28 10:24:51 2017 +0900

----------------------------------------------------------------------
 python/pyspark/serializers.py   | 6 +++---
 python/pyspark/sql/functions.py | 6 ++----
 2 files changed, 5 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/7bf4da8a/python/pyspark/serializers.py
----------------------------------------------------------------------
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index db77b7e..ad18bd0 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -191,7 +191,7 @@ class FramedSerializer(Serializer):
 
 class ArrowSerializer(FramedSerializer):
     """
-    Serializes an Arrow stream.
+    Serializes bytes as Arrow data with the Arrow file format.
     """
 
     def dumps(self, batch):
@@ -239,7 +239,7 @@ class ArrowStreamPandasSerializer(Serializer):
 
     def dump_stream(self, iterator, stream):
         """
-        Make ArrowRecordBatches from Pandas Serieses and serialize. Input is a single series or
+        Make ArrowRecordBatches from Pandas Series and serialize. Input is a single series or
         a list of series accompanied by an optional pyarrow type to coerce the data to.
         """
         import pyarrow as pa
@@ -257,7 +257,7 @@ class ArrowStreamPandasSerializer(Serializer):
 
     def load_stream(self, stream):
         """
-        Deserialize ArrowRecordBatchs to an Arrow table and return as a list of pandas.Series.
+        Deserialize ArrowRecordBatches to an Arrow table and return as a list of pandas.Series.
         """
         import pyarrow as pa
         reader = pa.open_stream(stream)

http://git-wip-us.apache.org/repos/asf/spark/blob/7bf4da8a/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 63e9a83..b45a59d 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2199,16 +2199,14 @@ def pandas_udf(f=None, returnType=StringType()):
     ...
     >>> df = spark.createDataFrame([(1, "John Doe", 21)], ("id", "name", "age"))
     >>> df.select(slen("name").alias("slen(name)"), to_upper("name"), add_one("age")) \\
-    ...     .show() # doctest: +SKIP
+    ...     .show()  # doctest: +SKIP
     +----------+--------------+------------+
     |slen(name)|to_upper(name)|add_one(age)|
     +----------+--------------+------------+
     |         8|      JOHN DOE|          22|
     +----------+--------------+------------+
     """
-    wrapped_udf = _create_udf(f, returnType=returnType, vectorized=True)
-
-    return wrapped_udf
+    return _create_udf(f, returnType=returnType, vectorized=True)
 
 
 blacklist = ['map', 'since', 'ignore_unicode_prefix']


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org