You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/05/13 13:38:09 UTC

[arrow] branch master updated: ARROW-5286: [Python] support struct type in from_pandas

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3939252  ARROW-5286: [Python] support struct type in from_pandas
3939252 is described below

commit 3939252a895fdc77102f3849012e5fec08603ce0
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Mon May 13 15:38:00 2019 +0200

    ARROW-5286: [Python] support struct type in from_pandas
    
    https://issues.apache.org/jira/browse/ARROW-5286
    
    We can infer struct type from dicts, but currently do not allow it in `from_pandas` because getting a "logical pandas type" is not implemented. I opted for now to let this return "object", as that is how pandas stores such data.
    
    Author: Joris Van den Bossche <jo...@gmail.com>
    
    Closes #4297 from jorisvandenbossche/ARROW-5286-from_pandas-structs and squashes the following commits:
    
    e803f9b4a <Joris Van den Bossche> ARROW-5286:  support struct type in from_pandas
---
 python/pyarrow/pandas_compat.py     |  2 +-
 python/pyarrow/tests/test_pandas.py | 38 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 7712b89..d90c8a2 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -75,7 +75,7 @@ def get_logical_type(arrow_type):
             return 'datetimetz' if arrow_type.tz is not None else 'datetime'
         elif isinstance(arrow_type, pa.lib.Decimal128Type):
             return 'decimal'
-        raise NotImplementedError(str(arrow_type))
+        return 'object'
 
 
 _numpy_logical_type_map = {
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 9c0603e..6d002db 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -428,6 +428,12 @@ class TestConvertMetadata(object):
         assert data_column['pandas_type'] == 'list[int64]'
         assert data_column['numpy_type'] == 'object'
 
+    def test_struct_metadata(self):
+        df = pd.DataFrame({'dicts': [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}]})
+        table = pa.Table.from_pandas(df)
+        pandas_metadata = table.schema.pandas_metadata
+        assert pandas_metadata['columns'][0]['pandas_type'] == 'object'
+
     def test_decimal_metadata(self):
         expected = pd.DataFrame({
             'decimals': [
@@ -1579,7 +1585,7 @@ class TestConvertDecimalTypes(object):
         _check_pandas_roundtrip(df)
 
 
-class TestListTypes(object):
+class TestConvertListTypes(object):
     """
     Conversion tests for list<> types.
     """
@@ -1827,6 +1833,19 @@ class TestConvertStructTypes(object):
     Conversion tests for struct types.
     """
 
+    def test_pandas_roundtrip(self):
+        df = pd.DataFrame({'dicts': [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}]})
+
+        expected_schema = pa.schema([
+            ('dicts', pa.struct([('a', pa.int64()), ('b', pa.int64())])),
+        ])
+
+        _check_pandas_roundtrip(df, expected_schema=expected_schema)
+
+        # specifying schema explicitly in from_pandas
+        _check_pandas_roundtrip(
+            df, schema=expected_schema, expected_schema=expected_schema)
+
     def test_to_pandas(self):
         ints = pa.array([None, 2, 3], type=pa.int64())
         strs = pa.array([u'a', None, u'c'], type=pa.string())
@@ -1970,6 +1989,23 @@ class TestConvertStructTypes(object):
                            match="Expected struct array"):
             pa.array(data, type=ty)
 
+    def test_from_tuples(self):
+        df = pd.DataFrame({'tuples': [(1, 2), (3, 4)]})
+        expected_df = pd.DataFrame(
+            {'tuples': [{'a': 1, 'b': 2}, {'a': 3, 'b': 4}]})
+
+        # conversion from tuples works when specifying expected struct type
+        struct_type = pa.struct([('a', pa.int64()), ('b', pa.int64())])
+
+        arr = np.asarray(df['tuples'])
+        _check_array_roundtrip(
+            arr, expected=expected_df['tuples'], type=struct_type)
+
+        expected_schema = pa.schema([('tuples', struct_type)])
+        _check_pandas_roundtrip(
+            df, expected=expected_df, schema=expected_schema,
+            expected_schema=expected_schema)
+
 
 class TestZeroCopyConversion(object):
     """