You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/01/02 13:48:49 UTC
arrow git commit: ARROW-449: Python: Conversion from pyarrow.{Table,
RecordBatch} to a Python dict
Repository: arrow
Updated Branches:
refs/heads/master e8b6231b2 -> 806239fdd
ARROW-449: Python: Conversion from pyarrow.{Table,RecordBatch} to a Python dict
Author: Uwe L. Korn <uw...@xhochy.com>
Closes #262 from xhochy/ARROW-449 and squashes the following commits:
5f15533 [Uwe L. Korn] Fix string conversion routines
9d72c85 [Uwe L. Korn] ARROW-449: Python: Conversion from pyarrow.{Table,RecordBatch} to a Python dict
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/806239fd
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/806239fd
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/806239fd
Branch: refs/heads/master
Commit: 806239fdd102649b7afa1dbe9aa1c09911f2885e
Parents: e8b6231
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Mon Jan 2 08:48:20 2017 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon Jan 2 08:48:20 2017 -0500
----------------------------------------------------------------------
python/pyarrow/table.pyx | 36 ++++++++++++++++++++++++++++++++-
python/pyarrow/tests/test_table.py | 10 ++++++++-
2 files changed, 44 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/806239fd/python/pyarrow/table.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/table.pyx b/python/pyarrow/table.pyx
index 20137e3..9255431 100644
--- a/python/pyarrow/table.pyx
+++ b/python/pyarrow/table.pyx
@@ -36,6 +36,9 @@ from pyarrow.compat import frombytes, tobytes
cimport cpython
+from collections import OrderedDict
+
+
cdef class ChunkedArray:
"""
Array backed via one or more memory chunks.
@@ -204,7 +207,7 @@ cdef class Column:
-------
str
"""
- return frombytes(self.column.name())
+ return bytes(self.column.name()).decode('utf8')
@property
def type(self):
@@ -345,6 +348,22 @@ cdef class RecordBatch:
return self.batch.Equals(deref(other.batch))
+ def to_pydict(self):
+ """
+ Converted the arrow::RecordBatch to an OrderedDict
+
+ Returns
+ -------
+ OrderedDict
+ """
+ entries = []
+ for i in range(self.batch.num_columns()):
+ name = bytes(self.batch.column_name(i)).decode('utf8')
+ column = self[i].to_pylist()
+ entries.append((name, column))
+ return OrderedDict(entries)
+
+
def to_pandas(self):
"""
Convert the arrow::RecordBatch to a pandas DataFrame
@@ -635,6 +654,21 @@ cdef class Table:
mgr = table_to_blockmanager(self.sp_table, nthreads)
return pd.DataFrame(mgr)
+ def to_pydict(self):
+ """
+ Converted the arrow::Table to an OrderedDict
+
+ Returns
+ -------
+ OrderedDict
+ """
+ entries = []
+ for i in range(self.table.num_columns()):
+ name = self.column(i).name
+ column = self.column(i).to_pylist()
+ entries.append((name, column))
+ return OrderedDict(entries)
+
@property
def name(self):
"""
http://git-wip-us.apache.org/repos/asf/arrow/blob/806239fd/python/pyarrow/tests/test_table.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 2546314..9985b3e 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -15,8 +15,8 @@
# specific language governing permissions and limitations
# under the License.
+from collections import OrderedDict
import numpy as np
-
from pandas.util.testing import assert_frame_equal
import pandas as pd
import pytest
@@ -35,6 +35,10 @@ def test_recordbatch_basics():
assert len(batch) == 5
assert batch.num_rows == 5
assert batch.num_columns == len(data)
+ assert batch.to_pydict() == OrderedDict([
+ ('c0', [0, 1, 2, 3, 4]),
+ ('c1', [-10, -5, 0, 5, 10])
+ ])
def test_recordbatch_from_to_pandas():
@@ -97,6 +101,10 @@ def test_table_basics():
assert table.num_rows == 5
assert table.num_columns == 2
assert table.shape == (5, 2)
+ assert table.to_pydict() == OrderedDict([
+ ('a', [0, 1, 2, 3, 4]),
+ ('b', [-10, -5, 0, 5, 10])
+ ])
for col in table.itercolumns():
for chunk in col.data.iterchunks():