You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/01/28 16:33:58 UTC
[arrow] branch master updated: ARROW-1992: [C++/Python] Fix
segfault when string to categorical empty string array
This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new d322634 ARROW-1992: [C++/Python] Fix segfault when string to categorical empty string array
d322634 is described below
commit d3226349fc61a0ffbb2139f259053ae787e500c8
Author: Licht-T <li...@outlook.jp>
AuthorDate: Sun Jan 28 17:33:20 2018 +0100
ARROW-1992: [C++/Python] Fix segfault when string to categorical empty string array
This closes [ARROW-1992](https://issues.apache.org/jira/browse/ARROW-1992).
Author: Licht-T <li...@outlook.jp>
Closes #1508 from Licht-T/fix-segfault-when-string_to_categorical-empty-string-array and squashes the following commits:
afea4be [Licht-T] BUG: Fix segfault when to_pandas the empty string array with string_to_categorical=True
f90e7b8 [Licht-T] TST: Add test for to_pandas the empty string array with string_to_categorical=True
---
cpp/src/arrow/compute/kernels/hash.cc | 8 +++++++-
python/pyarrow/tests/test_convert_pandas.py | 15 +++++++++++++++
2 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/compute/kernels/hash.cc b/cpp/src/arrow/compute/kernels/hash.cc
index 8fac796..acbf403 100644
--- a/cpp/src/arrow/compute/kernels/hash.cc
+++ b/cpp/src/arrow/compute/kernels/hash.cc
@@ -407,12 +407,18 @@ class HashTableKernel<Type, Action, enable_if_binary<Type>> : public HashTable {
}
Status Append(const ArrayData& arr) override {
+ constexpr uint8_t empty_value = 0;
if (!initialized_) {
RETURN_NOT_OK(Init());
}
const int32_t* offsets = GetValues<int32_t>(arr, 1);
- const uint8_t* data = GetValues<uint8_t>(arr, 2);
+ const uint8_t* data;
+ if (arr.buffers[2].get() == nullptr) {
+ data = &empty_value;
+ } else {
+ data = GetValues<uint8_t>(arr, 2);
+ }
auto action = static_cast<Action*>(this);
RETURN_NOT_OK(action->Reserve(arr.length));
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 5acb9c3..fa265e5 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -1237,6 +1237,21 @@ class TestPandasConversion(object):
assert data_column['numpy_type'] == 'object'
assert data_column['metadata'] == {'precision': 26, 'scale': 11}
+ def test_table_empty_str(self):
+ values = ['', '', '', '', '']
+ df = pd.DataFrame({'strings': values})
+ field = pa.field('strings', pa.string())
+ schema = pa.schema([field])
+ table = pa.Table.from_pandas(df, schema=schema)
+
+ result1 = table.to_pandas(strings_to_categorical=False)
+ expected1 = pd.DataFrame({'strings': values})
+ tm.assert_frame_equal(result1, expected1, check_dtype=True)
+
+ result2 = table.to_pandas(strings_to_categorical=True)
+ expected2 = pd.DataFrame({'strings': pd.Categorical(values)})
+ tm.assert_frame_equal(result2, expected2, check_dtype=True)
+
def test_table_str_to_categorical_without_na(self):
values = ['a', 'a', 'b', 'b', 'c']
df = pd.DataFrame({'strings': values})
--
To stop receiving notification emails like this one, please contact
uwe@apache.org.