You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2019/04/19 12:48:23 UTC
[arrow] branch master updated: ARROW-5178: [Python] Add
Table.from_pydict()
This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 5a022d8 ARROW-5178: [Python] Add Table.from_pydict()
5a022d8 is described below
commit 5a022d8547fbadd6e562a1c786f294539b2d18f0
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Fri Apr 19 14:48:04 2019 +0200
ARROW-5178: [Python] Add Table.from_pydict()
Author: Antoine Pitrou <an...@python.org>
Closes #4164 from pitrou/ARROW-5178-table-from-pydict and squashes the following commits:
7b8fad5a3 <Antoine Pitrou> ARROW-5178: Add Table.from_pydict()
---
python/pyarrow/lib.pyx | 1 -
python/pyarrow/table.pxi | 38 +++++++++++++++++++++++++++-
python/pyarrow/tests/test_table.py | 52 ++++++++++++++++++++++++++++++++++++++
3 files changed, 89 insertions(+), 2 deletions(-)
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index b7c2a93..766811e 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -23,7 +23,6 @@ from collections import OrderedDict
import datetime
import decimal as _pydecimal
import json
-import multiprocessing
import numpy as np
import os
import six
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 53678f2..b8434a8 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -1160,6 +1160,8 @@ cdef class Table(_PandasConvertible):
inferred. If Arrays passed, this argument is required
schema : Schema, default None
If not passed, will be inferred from the arrays
+ metadata : dict or Mapping, default None
+ Optional metadata for the schema (if inferred).
Returns
-------
@@ -1176,7 +1178,9 @@ cdef class Table(_PandasConvertible):
_schema_from_arrays(arrays, names, metadata, &c_schema)
elif schema is not None:
if names is not None:
- raise ValueError('Cannot pass schema and arrays')
+ raise ValueError('Cannot pass both schema and names')
+ if metadata is not None:
+ raise ValueError('Cannot pass both schema and metadata')
cy_schema = schema
if len(schema) != len(arrays):
@@ -1215,6 +1219,38 @@ cdef class Table(_PandasConvertible):
return pyarrow_wrap_table(CTable.Make(c_schema, columns))
@staticmethod
+ def from_pydict(mapping, schema=None, metadata=None):
+ """
+ Construct a Table from Arrow arrays or columns
+
+ Parameters
+ ----------
+ mapping : dict or Mapping
+ A mapping of strings to Arrays or Python lists.
+ schema : Schema, default None
+ If not passed, will be inferred from the Mapping values
+ metadata : dict or Mapping, default None
+ Optional metadata for the schema (if inferred).
+
+ Returns
+ -------
+ pyarrow.Table
+
+ """
+ names = []
+ arrays = []
+ for k, v in mapping.items():
+ names.append(k)
+ if not isinstance(v, (Array, ChunkedArray)):
+ v = array(v)
+ arrays.append(v)
+ if schema is None:
+ return Table.from_arrays(arrays, names, metadata=metadata)
+ else:
+ # Will raise if metadata is not None
+ return Table.from_arrays(arrays, schema=schema, metadata=metadata)
+
+ @staticmethod
def from_batches(batches, Schema schema=None):
"""
Construct a Table from a sequence or iterator of Arrow RecordBatches
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 91a87bf..4fcc7e3 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -906,3 +906,55 @@ def test_invalid_table_construct():
with pytest.raises(pa.lib.ArrowInvalid):
pa.Table.from_arrays(arrays, names=["a1", "a2"])
+
+
+def test_table_from_pydict():
+ table = pa.Table.from_pydict({})
+ assert table.num_columns == 0
+ assert table.num_rows == 0
+ assert table.schema == pa.schema([])
+ assert table.to_pydict() == {}
+
+ # With arrays as values
+ data = OrderedDict([('strs', pa.array([u'', u'foo', u'bar'])),
+ ('floats', pa.array([4.5, 5, None]))])
+ schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float64())])
+ table = pa.Table.from_pydict(data)
+ assert table.num_columns == 2
+ assert table.num_rows == 3
+ assert table.schema == schema
+
+ # With chunked arrays as values
+ data = OrderedDict([('strs', pa.chunked_array([[u''], [u'foo', u'bar']])),
+ ('floats', pa.chunked_array([[4.5], [5, None]]))])
+ table = pa.Table.from_pydict(data)
+ assert table.num_columns == 2
+ assert table.num_rows == 3
+ assert table.schema == schema
+
+ # With lists as values
+ data = OrderedDict([('strs', [u'', u'foo', u'bar']),
+ ('floats', [4.5, 5, None])])
+ table = pa.Table.from_pydict(data)
+ assert table.num_columns == 2
+ assert table.num_rows == 3
+ assert table.schema == schema
+ assert table.to_pydict() == data
+
+ # With metadata and inferred schema
+ metadata = {b'foo': b'bar'}
+ schema = schema.add_metadata(metadata)
+ table = pa.Table.from_pydict(data, metadata=metadata)
+ assert table.schema == schema
+ assert table.schema.metadata == metadata
+ assert table.to_pydict() == data
+
+ # With explicit schema
+ table = pa.Table.from_pydict(data, schema=schema)
+ assert table.schema == schema
+ assert table.schema.metadata == metadata
+ assert table.to_pydict() == data
+
+ # Cannot pass both schema and metadata
+ with pytest.raises(ValueError):
+ pa.Table.from_pydict(data, schema=schema, metadata=metadata)