You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/07/20 09:53:24 UTC

spark git commit: [SPARK-24868][PYTHON] add sequence function in Python

Repository: spark
Updated Branches:
  refs/heads/master 2b91d9918 -> 0ab07b357


[SPARK-24868][PYTHON] add sequence function in Python

## What changes were proposed in this pull request?

Add ```sequence``` in functions.py

## How was this patch tested?

Add doctest.

Author: Huaxin Gao <hu...@us.ibm.com>

Closes #21820 from huaxingao/spark-24868.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ab07b35
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ab07b35
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ab07b35

Branch: refs/heads/master
Commit: 0ab07b357b5ddae29f815734237013c21d2d2b4e
Parents: 2b91d99
Author: Huaxin Gao <hu...@us.ibm.com>
Authored: Fri Jul 20 17:53:14 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Fri Jul 20 17:53:14 2018 +0800

----------------------------------------------------------------------
 python/pyspark/sql/functions.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0ab07b35/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 5ef7398..f2e6633 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2551,6 +2551,28 @@ def map_concat(*cols):
     return Column(jc)
 
 
+@since(2.4)
+def sequence(start, stop, step=None):
+    """
+    Generate a sequence of integers from `start` to `stop`, incrementing by `step`.
+    If `step` is not set, incrementing by 1 if `start` is less than or equal to `stop`,
+    otherwise -1.
+
+    >>> df1 = spark.createDataFrame([(-2, 2)], ('C1', 'C2'))
+    >>> df1.select(sequence('C1', 'C2').alias('r')).collect()
+    [Row(r=[-2, -1, 0, 1, 2])]
+    >>> df2 = spark.createDataFrame([(4, -4, -2)], ('C1', 'C2', 'C3'))
+    >>> df2.select(sequence('C1', 'C2', 'C3').alias('r')).collect()
+    [Row(r=[4, 2, 0, -2, -4])]
+    """
+    sc = SparkContext._active_spark_context
+    if step is None:
+        return Column(sc._jvm.functions.sequence(_to_java_column(start), _to_java_column(stop)))
+    else:
+        return Column(sc._jvm.functions.sequence(
+            _to_java_column(start), _to_java_column(stop), _to_java_column(step)))
+
+
 # ---------------------------- User Defined Function ----------------------------------
 
 class PandasUDFType(object):


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org