You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/07/20 09:53:24 UTC
spark git commit: [SPARK-24868][PYTHON] add sequence function in
Python
Repository: spark
Updated Branches:
refs/heads/master 2b91d9918 -> 0ab07b357
[SPARK-24868][PYTHON] add sequence function in Python
## What changes were proposed in this pull request?
Add ```sequence``` in functions.py
## How was this patch tested?
Add doctest.
Author: Huaxin Gao <hu...@us.ibm.com>
Closes #21820 from huaxingao/spark-24868.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ab07b35
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ab07b35
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ab07b35
Branch: refs/heads/master
Commit: 0ab07b357b5ddae29f815734237013c21d2d2b4e
Parents: 2b91d99
Author: Huaxin Gao <hu...@us.ibm.com>
Authored: Fri Jul 20 17:53:14 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Fri Jul 20 17:53:14 2018 +0800
----------------------------------------------------------------------
python/pyspark/sql/functions.py | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/0ab07b35/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 5ef7398..f2e6633 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2551,6 +2551,28 @@ def map_concat(*cols):
return Column(jc)
+@since(2.4)
+def sequence(start, stop, step=None):
+ """
+ Generate a sequence of integers from `start` to `stop`, incrementing by `step`.
+ If `step` is not set, incrementing by 1 if `start` is less than or equal to `stop`,
+ otherwise -1.
+
+ >>> df1 = spark.createDataFrame([(-2, 2)], ('C1', 'C2'))
+ >>> df1.select(sequence('C1', 'C2').alias('r')).collect()
+ [Row(r=[-2, -1, 0, 1, 2])]
+ >>> df2 = spark.createDataFrame([(4, -4, -2)], ('C1', 'C2', 'C3'))
+ >>> df2.select(sequence('C1', 'C2', 'C3').alias('r')).collect()
+ [Row(r=[4, 2, 0, -2, -4])]
+ """
+ sc = SparkContext._active_spark_context
+ if step is None:
+ return Column(sc._jvm.functions.sequence(_to_java_column(start), _to_java_column(stop)))
+ else:
+ return Column(sc._jvm.functions.sequence(
+ _to_java_column(start), _to_java_column(stop), _to_java_column(step)))
+
+
# ---------------------------- User Defined Function ----------------------------------
class PandasUDFType(object):
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org