You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2019/04/19 12:48:23 UTC
[arrow] branch master updated: ARROW-5178: [Python] Add Table.from_pydict()

This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 5a022d8  ARROW-5178: [Python] Add Table.from_pydict()
5a022d8 is described below

commit 5a022d8547fbadd6e562a1c786f294539b2d18f0
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Fri Apr 19 14:48:04 2019 +0200

    ARROW-5178: [Python] Add Table.from_pydict()
    
    Author: Antoine Pitrou <an...@python.org>
    
    Closes #4164 from pitrou/ARROW-5178-table-from-pydict and squashes the following commits:
    
    7b8fad5a3 <Antoine Pitrou> ARROW-5178:  Add Table.from_pydict()
---
 python/pyarrow/lib.pyx             |  1 -
 python/pyarrow/table.pxi           | 38 +++++++++++++++++++++++++++-
 python/pyarrow/tests/test_table.py | 52 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index b7c2a93..766811e 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -23,7 +23,6 @@ from collections import OrderedDict
 import datetime
 import decimal as _pydecimal
 import json
-import multiprocessing
 import numpy as np
 import os
 import six
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 53678f2..b8434a8 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -1160,6 +1160,8 @@ cdef class Table(_PandasConvertible):
             inferred. If Arrays passed, this argument is required
         schema : Schema, default None
             If not passed, will be inferred from the arrays
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
 
         Returns
         -------
@@ -1176,7 +1178,9 @@ cdef class Table(_PandasConvertible):
             _schema_from_arrays(arrays, names, metadata, &c_schema)
         elif schema is not None:
             if names is not None:
-                raise ValueError('Cannot pass schema and arrays')
+                raise ValueError('Cannot pass both schema and names')
+            if metadata is not None:
+                raise ValueError('Cannot pass both schema and metadata')
             cy_schema = schema
 
             if len(schema) != len(arrays):
@@ -1215,6 +1219,38 @@ cdef class Table(_PandasConvertible):
         return pyarrow_wrap_table(CTable.Make(c_schema, columns))
 
     @staticmethod
+    def from_pydict(mapping, schema=None, metadata=None):
+        """
+        Construct a Table from Arrow arrays or columns
+
+        Parameters
+        ----------
+        mapping : dict or Mapping
+            A mapping of strings to Arrays or Python lists.
+        schema : Schema, default None
+            If not passed, will be inferred from the Mapping values
+        metadata : dict or Mapping, default None
+            Optional metadata for the schema (if inferred).
+
+        Returns
+        -------
+        pyarrow.Table
+
+        """
+        names = []
+        arrays = []
+        for k, v in mapping.items():
+            names.append(k)
+            if not isinstance(v, (Array, ChunkedArray)):
+                v = array(v)
+            arrays.append(v)
+        if schema is None:
+            return Table.from_arrays(arrays, names, metadata=metadata)
+        else:
+            # Will raise if metadata is not None
+            return Table.from_arrays(arrays, schema=schema, metadata=metadata)
+
+    @staticmethod
     def from_batches(batches, Schema schema=None):
         """
         Construct a Table from a sequence or iterator of Arrow RecordBatches
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 91a87bf..4fcc7e3 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -906,3 +906,55 @@ def test_invalid_table_construct():
 
     with pytest.raises(pa.lib.ArrowInvalid):
         pa.Table.from_arrays(arrays, names=["a1", "a2"])
+
+
+def test_table_from_pydict():
+    table = pa.Table.from_pydict({})
+    assert table.num_columns == 0
+    assert table.num_rows == 0
+    assert table.schema == pa.schema([])
+    assert table.to_pydict() == {}
+
+    # With arrays as values
+    data = OrderedDict([('strs', pa.array([u'', u'foo', u'bar'])),
+                        ('floats', pa.array([4.5, 5, None]))])
+    schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float64())])
+    table = pa.Table.from_pydict(data)
+    assert table.num_columns == 2
+    assert table.num_rows == 3
+    assert table.schema == schema
+
+    # With chunked arrays as values
+    data = OrderedDict([('strs', pa.chunked_array([[u''], [u'foo', u'bar']])),
+                        ('floats', pa.chunked_array([[4.5], [5, None]]))])
+    table = pa.Table.from_pydict(data)
+    assert table.num_columns == 2
+    assert table.num_rows == 3
+    assert table.schema == schema
+
+    # With lists as values
+    data = OrderedDict([('strs', [u'', u'foo', u'bar']),
+                        ('floats', [4.5, 5, None])])
+    table = pa.Table.from_pydict(data)
+    assert table.num_columns == 2
+    assert table.num_rows == 3
+    assert table.schema == schema
+    assert table.to_pydict() == data
+
+    # With metadata and inferred schema
+    metadata = {b'foo': b'bar'}
+    schema = schema.add_metadata(metadata)
+    table = pa.Table.from_pydict(data, metadata=metadata)
+    assert table.schema == schema
+    assert table.schema.metadata == metadata
+    assert table.to_pydict() == data
+
+    # With explicit schema
+    table = pa.Table.from_pydict(data, schema=schema)
+    assert table.schema == schema
+    assert table.schema.metadata == metadata
+    assert table.to_pydict() == data
+
+    # Cannot pass both schema and metadata
+    with pytest.raises(ValueError):
+        pa.Table.from_pydict(data, schema=schema, metadata=metadata)