You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/03/22 18:06:49 UTC

arrow git commit: ARROW-552: [Python] Implement getitem for DictionaryArray by returning a value from the dictionary

Repository: arrow
Updated Branches:
  refs/heads/master ced9d766d -> 2406d4eed


ARROW-552: [Python] Implement getitem for DictionaryArray by returning a value from the dictionary

Author: Miki Tebeka <mi...@gmail.com>
Author: Wes McKinney <we...@twosigma.com>

Closes #414 from wesm/ARROW-552 and squashes the following commits:

8a039b5 [Wes McKinney] Implement DictionaryArray.getitem by indexing into the dictionary. Add indices and dictionary properties
e700b45 [Miki Tebeka] ARROW-552: [Python] Add scalar value support for Dictionary type (WIP)


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2406d4ee
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2406d4ee
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2406d4ee

Branch: refs/heads/master
Commit: 2406d4eed9af41b1ef60c53834aced036a933327
Parents: ced9d76
Author: Miki Tebeka <mi...@gmail.com>
Authored: Wed Mar 22 14:06:42 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Mar 22 14:06:42 2017 -0400

----------------------------------------------------------------------
 python/pyarrow/array.pxd             |  4 +++-
 python/pyarrow/array.pyx             | 25 +++++++++++++++++++++++++
 python/pyarrow/scalar.pyx            |  2 +-
 python/pyarrow/tests/test_scalars.py | 13 +++++++++++++
 4 files changed, 42 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/2406d4ee/python/pyarrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd
index 56bb53d..c3e7997 100644
--- a/python/pyarrow/array.pxd
+++ b/python/pyarrow/array.pxd
@@ -109,7 +109,9 @@ cdef class BinaryArray(Array):
 
 
 cdef class DictionaryArray(Array):
-    pass
+    cdef:
+        object _indices, _dictionary
+
 
 
 cdef wrap_array_output(PyObject* output)

http://git-wip-us.apache.org/repos/asf/arrow/blob/2406d4ee/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
index 6afeaa0..795076c 100644
--- a/python/pyarrow/array.pyx
+++ b/python/pyarrow/array.pyx
@@ -406,6 +406,31 @@ cdef class BinaryArray(Array):
 
 cdef class DictionaryArray(Array):
 
+    cdef getitem(self, int64_t i):
+        cdef Array dictionary = self.dictionary
+        cdef int64_t index = self.indices[i].as_py()
+        return scalar.box_scalar(dictionary.type, dictionary.sp_array, index)
+
+    property dictionary:
+
+        def __get__(self):
+            cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
+
+            if self._dictionary is None:
+                self._dictionary = box_array(darr.dictionary())
+
+            return self._dictionary
+
+    property indices:
+
+        def __get__(self):
+            cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
+
+            if self._indices is None:
+                self._indices = box_array(darr.indices())
+
+            return self._indices
+
     @staticmethod
     def from_arrays(indices, dictionary, mask=None,
                     MemoryPool memory_pool=None):

http://git-wip-us.apache.org/repos/asf/arrow/blob/2406d4ee/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
index 8c88f90..1b7e67b 100644
--- a/python/pyarrow/scalar.pyx
+++ b/python/pyarrow/scalar.pyx
@@ -241,7 +241,7 @@ cdef dict _scalar_classes = {
     Type_DOUBLE: DoubleValue,
     Type_LIST: ListValue,
     Type_BINARY: BinaryValue,
-    Type_STRING: StringValue,
+    Type_STRING: StringValue
 }
 
 cdef object box_scalar(DataType type, const shared_ptr[CArray]& sp_array,

http://git-wip-us.apache.org/repos/asf/arrow/blob/2406d4ee/python/pyarrow/tests/test_scalars.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index ef600a0..d56481c 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -16,6 +16,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import pandas as pd
+
 from pyarrow.compat import unittest, u, unicode_type
 import pyarrow as A
 
@@ -100,3 +102,14 @@ class TestScalars(unittest.TestCase):
 
         v = arr[3]
         assert len(v) == 0
+
+    def test_dictionary(self):
+        colors = ['red', 'green', 'blue']
+        values = pd.Series(colors * 4)
+
+        categorical = pd.Categorical(values, categories=colors)
+
+        v = A.DictionaryArray.from_arrays(categorical.codes,
+                                          categorical.categories)
+        for i, c in enumerate(values):
+            assert v[i].as_py() == c