You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ho...@apache.org on 2017/02/13 23:24:01 UTC

spark git commit: [SPARK-19429][PYTHON][SQL] Support slice arguments in Column.__getitem__

Repository: spark
Updated Branches:
  refs/heads/master 0169360ef -> e02ac303c


[SPARK-19429][PYTHON][SQL] Support slice arguments in Column.__getitem__

## What changes were proposed in this pull request?

- Add support for `slice` arguments in `Column.__getitem__`.
- Remove obsolete `__getslice__` bindings.

## How was this patch tested?

Existing unit tests, additional tests covering `[]` with `slice`.

Author: zero323 <ze...@users.noreply.github.com>

Closes #16771 from zero323/SPARK-19429.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e02ac303
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e02ac303
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e02ac303

Branch: refs/heads/master
Commit: e02ac303c6356cdf7fffec7361311d828a723afe
Parents: 0169360
Author: zero323 <ze...@users.noreply.github.com>
Authored: Mon Feb 13 15:23:56 2017 -0800
Committer: Holden Karau <ho...@us.ibm.com>
Committed: Mon Feb 13 15:23:56 2017 -0800

----------------------------------------------------------------------
 python/pyspark/sql/column.py | 11 ++++++++---
 python/pyspark/sql/tests.py  |  8 ++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e02ac303/python/pyspark/sql/column.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index ec059d6..73c8672 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -180,7 +180,6 @@ class Column(object):
 
     # container operators
     __contains__ = _bin_op("contains")
-    __getitem__ = _bin_op("apply")
 
     # bitwise operators
     bitwiseOR = _bin_op("bitwiseOR")
@@ -236,6 +235,14 @@ class Column(object):
             raise AttributeError(item)
         return self.getField(item)
 
+    def __getitem__(self, k):
+        if isinstance(k, slice):
+            if k.step is not None:
+                raise ValueError("slice with step is not supported.")
+            return self.substr(k.start, k.stop)
+        else:
+            return _bin_op("apply")(self, k)
+
     def __iter__(self):
         raise TypeError("Column is not iterable")
 
@@ -267,8 +274,6 @@ class Column(object):
             raise TypeError("Unexpected type: %s" % type(startPos))
         return Column(jc)
 
-    __getslice__ = substr
-
     @ignore_unicode_prefix
     @since(1.5)
     def isin(self, *cols):

http://git-wip-us.apache.org/repos/asf/spark/blob/e02ac303/python/pyspark/sql/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index ab9d3f6..d9d0333 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -874,6 +874,14 @@ class SQLTests(ReusedPySparkTestCase):
         self.assertTrue(all(isinstance(c, Column) for c in css))
         self.assertTrue(isinstance(ci.cast(LongType()), Column))
 
+    def test_column_getitem(self):
+        from pyspark.sql.functions import col
+
+        self.assertIsInstance(col("foo")[1:3], Column)
+        self.assertIsInstance(col("foo")[0], Column)
+        self.assertIsInstance(col("foo")["bar"], Column)
+        self.assertRaises(ValueError, lambda: col("foo")[0:10:2])
+
     def test_column_select(self):
         df = self.df
         self.assertEqual(self.testData, df.select("*").collect())


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org