You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2016/03/15 03:25:53 UTC
spark git commit: [SPARK-10380][SQL] Fix confusing documentation
examples for astype/drop_duplicates.
Repository: spark
Updated Branches:
refs/heads/master 4bf460979 -> 8e0b03060
[SPARK-10380][SQL] Fix confusing documentation examples for astype/drop_duplicates.
## What changes were proposed in this pull request?
We have seen users getting confused by the documentation for astype and drop_duplicates, because the examples in them do not use these functions (but do uses their aliases). This patch simply removes all examples for these functions, and say that they are aliases.
## How was this patch tested?
Existing PySpark unit tests.
Closes #11543.
Author: Reynold Xin <rx...@databricks.com>
Closes #11698 from rxin/SPARK-10380.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8e0b0306
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8e0b0306
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8e0b0306
Branch: refs/heads/master
Commit: 8e0b030606927741f91317660cd14a8a5ed6e5f9
Parents: 4bf4609
Author: Reynold Xin <rx...@databricks.com>
Authored: Mon Mar 14 19:25:49 2016 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Mon Mar 14 19:25:49 2016 -0700
----------------------------------------------------------------------
python/pyspark/__init__.py | 20 ++++++++++++++++++++
python/pyspark/sql/column.py | 4 ++--
python/pyspark/sql/dataframe.py | 20 +++++++++++++++-----
3 files changed, 37 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/8e0b0306/python/pyspark/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index d530723..111ebaa 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -37,6 +37,8 @@ Public classes:
"""
+import types
+
from pyspark.conf import SparkConf
from pyspark.context import SparkContext
from pyspark.rdd import RDD
@@ -64,6 +66,24 @@ def since(version):
return deco
+def copy_func(f, name=None, sinceversion=None, doc=None):
+ """
+ Returns a function with same code, globals, defaults, closure, and
+ name (or provide a new name).
+ """
+ # See
+ # http://stackoverflow.com/questions/6527633/how-can-i-make-a-deepcopy-of-a-function-in-python
+ fn = types.FunctionType(f.__code__, f.__globals__, name or f.__name__, f.__defaults__,
+ f.__closure__)
+ # in case f was given attrs (note this dict is a shallow copy):
+ fn.__dict__.update(f.__dict__)
+ if doc is not None:
+ fn.__doc__ = doc
+ if sinceversion is not None:
+ fn = since(sinceversion)(fn)
+ return fn
+
+
# for back compatibility
from pyspark.sql import SQLContext, HiveContext, Row
http://git-wip-us.apache.org/repos/asf/spark/blob/8e0b0306/python/pyspark/sql/column.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 3866a49..19ec6fc 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -22,7 +22,7 @@ if sys.version >= '3':
basestring = str
long = int
-from pyspark import since
+from pyspark import copy_func, since
from pyspark.context import SparkContext
from pyspark.rdd import ignore_unicode_prefix
from pyspark.sql.types import *
@@ -337,7 +337,7 @@ class Column(object):
raise TypeError("unexpected type: %s" % type(dataType))
return Column(jc)
- astype = cast
+ astype = copy_func(cast, sinceversion=1.4, doc=":func:`astype` is an alias for :func:`cast`.")
@since(1.3)
def between(self, lowerBound, upperBound):
http://git-wip-us.apache.org/repos/asf/spark/blob/8e0b0306/python/pyspark/sql/dataframe.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 7008e8f..7e1854c 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -26,7 +26,7 @@ if sys.version >= '3':
else:
from itertools import imap as map
-from pyspark import since
+from pyspark import copy_func, since
from pyspark.rdd import RDD, _load_from_socket, ignore_unicode_prefix
from pyspark.serializers import BatchedSerializer, PickleSerializer, UTF8Deserializer
from pyspark.storagelevel import StorageLevel
@@ -829,8 +829,6 @@ class DataFrame(object):
raise TypeError("condition should be string or Column")
return DataFrame(jdf, self.sql_ctx)
- where = filter
-
@ignore_unicode_prefix
@since(1.3)
def groupBy(self, *cols):
@@ -1361,8 +1359,20 @@ class DataFrame(object):
# Pandas compatibility
##########################################################################################
- groupby = groupBy
- drop_duplicates = dropDuplicates
+ groupby = copy_func(
+ groupBy,
+ sinceversion=1.4,
+ doc=":func:`groupby` is an alias for :func:`groupBy`.")
+
+ drop_duplicates = copy_func(
+ dropDuplicates,
+ sinceversion=1.4,
+ doc=":func:`drop_duplicates` is an alias for :func:`dropDuplicates`.")
+
+ where = copy_func(
+ filter,
+ sinceversion=1.3,
+ doc=":func:`where` is an alias for :func:`filter`.")
def _to_scala_map(sc, jm):
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org