You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/06/24 23:39:14 UTC
[arrow] branch master updated: ARROW-5335: [Python] Raise exception
on variable dictionaries in conversion to Python/pandas
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new bd0cbc9 ARROW-5335: [Python] Raise exception on variable dictionaries in conversion to Python/pandas
bd0cbc9 is described below
commit bd0cbc95862c0a22b80413ca535057417235cd58
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Mon Jun 24 18:39:06 2019 -0500
ARROW-5335: [Python] Raise exception on variable dictionaries in conversion to Python/pandas
Unification will happen later per ARROW-5717
Author: Joris Van den Bossche <jo...@gmail.com>
Closes #4615 from jorisvandenbossche/ARROW-5335-variable-dict-to-python and squashes the following commits:
b630f11c9 <Joris Van den Bossche> raise error if dictionaries are not equal for all chunks
---
cpp/src/arrow/python/arrow_to_pandas.cc | 13 +++++++++++++
python/pyarrow/tests/test_pandas.py | 15 +++++++++++++++
2 files changed, 28 insertions(+)
diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index 5d84c00..d992001 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -1147,6 +1147,19 @@ class CategoricalBlock : public PandasBlock {
converted_col =
std::make_shared<Column>(field(col->name(), out.type()), out.chunked_array());
} else {
+ // check if all dictionaries are equal
+ const ChunkedArray& data = *col->data().get();
+ const std::shared_ptr<Array> arr_first = data.chunk(0);
+ const auto& dict_arr_first = checked_cast<const DictionaryArray&>(*arr_first);
+
+ for (int c = 1; c < data.num_chunks(); c++) {
+ const std::shared_ptr<Array> arr = data.chunk(c);
+ const auto& dict_arr = checked_cast<const DictionaryArray&>(*arr);
+
+ if (!(dict_arr_first.dictionary()->Equals(dict_arr.dictionary()))) {
+ return Status::NotImplemented("Variable dictionary type not supported");
+ }
+ }
converted_col = col;
}
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index ebcf624..3cc04dc 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2807,6 +2807,21 @@ def test_dictionary_with_pandas():
tm.assert_series_equal(pd.Series(pandas2), pd.Series(ex_pandas2))
+def test_variable_dictionary_with_pandas():
+ a1 = pa.DictionaryArray.from_arrays([0, 1, 2], ['a', 'b', 'c'])
+ a2 = pa.DictionaryArray.from_arrays([0, 1], ['a', 'c'])
+
+ a = pa.chunked_array([a1, a2])
+ assert a.to_pylist() == ['a', 'b', 'c', 'a', 'c']
+ with pytest.raises(NotImplementedError):
+ a.to_pandas()
+
+ a = pa.chunked_array([a2, a1])
+ assert a.to_pylist() == ['a', 'c', 'a', 'b', 'c']
+ with pytest.raises(NotImplementedError):
+ a.to_pandas()
+
+
# ----------------------------------------------------------------------
# Legacy metadata compatibility tests