You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/06/24 23:39:14 UTC

[arrow] branch master updated: ARROW-5335: [Python] Raise exception on variable dictionaries in conversion to Python/pandas

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new bd0cbc9  ARROW-5335: [Python] Raise exception on variable dictionaries in conversion to Python/pandas
bd0cbc9 is described below

commit bd0cbc95862c0a22b80413ca535057417235cd58
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Mon Jun 24 18:39:06 2019 -0500

    ARROW-5335: [Python] Raise exception on variable dictionaries in conversion to Python/pandas
    
    Unification will happen later per ARROW-5717
    
    Author: Joris Van den Bossche <jo...@gmail.com>
    
    Closes #4615 from jorisvandenbossche/ARROW-5335-variable-dict-to-python and squashes the following commits:
    
    b630f11c9 <Joris Van den Bossche> raise error if dictionaries are not equal for all chunks
---
 cpp/src/arrow/python/arrow_to_pandas.cc | 13 +++++++++++++
 python/pyarrow/tests/test_pandas.py     | 15 +++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index 5d84c00..d992001 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -1147,6 +1147,19 @@ class CategoricalBlock : public PandasBlock {
       converted_col =
           std::make_shared<Column>(field(col->name(), out.type()), out.chunked_array());
     } else {
+      // check if all dictionaries are equal
+      const ChunkedArray& data = *col->data().get();
+      const std::shared_ptr<Array> arr_first = data.chunk(0);
+      const auto& dict_arr_first = checked_cast<const DictionaryArray&>(*arr_first);
+
+      for (int c = 1; c < data.num_chunks(); c++) {
+        const std::shared_ptr<Array> arr = data.chunk(c);
+        const auto& dict_arr = checked_cast<const DictionaryArray&>(*arr);
+
+        if (!(dict_arr_first.dictionary()->Equals(dict_arr.dictionary()))) {
+          return Status::NotImplemented("Variable dictionary type not supported");
+        }
+      }
       converted_col = col;
     }
 
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index ebcf624..3cc04dc 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2807,6 +2807,21 @@ def test_dictionary_with_pandas():
     tm.assert_series_equal(pd.Series(pandas2), pd.Series(ex_pandas2))
 
 
+def test_variable_dictionary_with_pandas():
+    a1 = pa.DictionaryArray.from_arrays([0, 1, 2], ['a', 'b', 'c'])
+    a2 = pa.DictionaryArray.from_arrays([0, 1], ['a', 'c'])
+
+    a = pa.chunked_array([a1, a2])
+    assert a.to_pylist() == ['a', 'b', 'c', 'a', 'c']
+    with pytest.raises(NotImplementedError):
+        a.to_pandas()
+
+    a = pa.chunked_array([a2, a1])
+    assert a.to_pylist() == ['a', 'c', 'a', 'b', 'c']
+    with pytest.raises(NotImplementedError):
+        a.to_pandas()
+
+
 # ----------------------------------------------------------------------
 # Legacy metadata compatibility tests