You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/01/02 13:48:49 UTC

arrow git commit: ARROW-449: Python: Conversion from pyarrow.{Table, RecordBatch} to a Python dict

Repository: arrow
Updated Branches:
  refs/heads/master e8b6231b2 -> 806239fdd


ARROW-449: Python: Conversion from pyarrow.{Table,RecordBatch} to a Python dict

Author: Uwe L. Korn <uw...@xhochy.com>

Closes #262 from xhochy/ARROW-449 and squashes the following commits:

5f15533 [Uwe L. Korn] Fix string conversion routines
9d72c85 [Uwe L. Korn] ARROW-449: Python: Conversion from pyarrow.{Table,RecordBatch} to a Python dict


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/806239fd
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/806239fd
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/806239fd

Branch: refs/heads/master
Commit: 806239fdd102649b7afa1dbe9aa1c09911f2885e
Parents: e8b6231
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Mon Jan 2 08:48:20 2017 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon Jan 2 08:48:20 2017 -0500

----------------------------------------------------------------------
 python/pyarrow/table.pyx           | 36 ++++++++++++++++++++++++++++++++-
 python/pyarrow/tests/test_table.py | 10 ++++++++-
 2 files changed, 44 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/806239fd/python/pyarrow/table.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/table.pyx b/python/pyarrow/table.pyx
index 20137e3..9255431 100644
--- a/python/pyarrow/table.pyx
+++ b/python/pyarrow/table.pyx
@@ -36,6 +36,9 @@ from pyarrow.compat import frombytes, tobytes
 
 cimport cpython
 
+from collections import OrderedDict
+
+
 cdef class ChunkedArray:
     """
     Array backed via one or more memory chunks.
@@ -204,7 +207,7 @@ cdef class Column:
         -------
         str
         """
-        return frombytes(self.column.name())
+        return bytes(self.column.name()).decode('utf8')
 
     @property
     def type(self):
@@ -345,6 +348,22 @@ cdef class RecordBatch:
 
         return self.batch.Equals(deref(other.batch))
 
+    def to_pydict(self):
+        """
+        Converted the arrow::RecordBatch to an OrderedDict
+
+        Returns
+        -------
+        OrderedDict
+        """
+        entries = []
+        for i in range(self.batch.num_columns()):
+            name = bytes(self.batch.column_name(i)).decode('utf8')
+            column = self[i].to_pylist()
+            entries.append((name, column))
+        return OrderedDict(entries)
+
+
     def to_pandas(self):
         """
         Convert the arrow::RecordBatch to a pandas DataFrame
@@ -635,6 +654,21 @@ cdef class Table:
         mgr = table_to_blockmanager(self.sp_table, nthreads)
         return pd.DataFrame(mgr)
 
+    def to_pydict(self):
+        """
+        Converted the arrow::Table to an OrderedDict
+
+        Returns
+        -------
+        OrderedDict
+        """
+        entries = []
+        for i in range(self.table.num_columns()):
+            name = self.column(i).name
+            column = self.column(i).to_pylist()
+            entries.append((name, column))
+        return OrderedDict(entries)
+
     @property
     def name(self):
         """

http://git-wip-us.apache.org/repos/asf/arrow/blob/806239fd/python/pyarrow/tests/test_table.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 2546314..9985b3e 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -15,8 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from collections import OrderedDict
 import numpy as np
-
 from pandas.util.testing import assert_frame_equal
 import pandas as pd
 import pytest
@@ -35,6 +35,10 @@ def test_recordbatch_basics():
     assert len(batch) == 5
     assert batch.num_rows == 5
     assert batch.num_columns == len(data)
+    assert batch.to_pydict() == OrderedDict([
+        ('c0', [0, 1, 2, 3, 4]),
+        ('c1', [-10, -5, 0, 5, 10])
+    ])
 
 
 def test_recordbatch_from_to_pandas():
@@ -97,6 +101,10 @@ def test_table_basics():
     assert table.num_rows == 5
     assert table.num_columns == 2
     assert table.shape == (5, 2)
+    assert table.to_pydict() == OrderedDict([
+        ('a', [0, 1, 2, 3, 4]),
+        ('b', [-10, -5, 0, 5, 10])
+    ])
 
     for col in table.itercolumns():
         for chunk in col.data.iterchunks():