You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/06/23 14:12:13 UTC
arrow git commit: ARROW-1143: C++: Fix comparison of NullArray
Repository: arrow
Updated Branches:
refs/heads/master c1ec0c723 -> 074dde41c
ARROW-1143: C++: Fix comparison of NullArray
Change-Id: Ib18dc6b00c9806aaf541c61cb63673ac51b0525c
Author: Uwe L. Korn <uw...@xhochy.com>
Closes #772 from xhochy/ARROW-1143 and squashes the following commits:
eff44306 [Uwe L. Korn] ARROW-1143: C++: Fix comparison of NullArray
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/074dde41
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/074dde41
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/074dde41
Branch: refs/heads/master
Commit: 074dde41c6ba70daeac07b2f88413e2eb21ec9f0
Parents: c1ec0c7
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Fri Jun 23 10:12:07 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri Jun 23 10:12:07 2017 -0400
----------------------------------------------------------------------
cpp/src/arrow/array-test.cc | 10 ++++++++++
cpp/src/arrow/compare.cc | 7 +++++--
python/pyarrow/tests/test_convert_pandas.py | 6 ++++++
python/pyarrow/tests/test_parquet.py | 6 +++++-
4 files changed, 26 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/074dde41/cpp/src/arrow/array-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index beffa1b..8f6323b 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -95,6 +95,16 @@ TEST_F(TestArray, TestEquality) {
EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array));
}
+TEST_F(TestArray, TestNullArrayEquality) {
+ auto array_1 = std::make_shared<NullArray>(10);
+ auto array_2 = std::make_shared<NullArray>(10);
+ auto array_3 = std::make_shared<NullArray>(20);
+
+ EXPECT_TRUE(array_1->Equals(array_1));
+ EXPECT_TRUE(array_1->Equals(array_2));
+ EXPECT_FALSE(array_1->Equals(array_3));
+}
+
TEST_F(TestArray, SliceRecomputeNullCount) {
vector<uint8_t> valid_bytes = {1, 0, 1, 1, 0, 1, 0, 0, 0};
http://git-wip-us.apache.org/repos/asf/arrow/blob/074dde41/cpp/src/arrow/compare.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 562d4e1..c2f4f84 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -322,7 +322,10 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor {
explicit ArrayEqualsVisitor(const Array& right)
: RangeEqualsVisitor(right, 0, right.length(), 0) {}
- Status Visit(const NullArray& left) { return Status::OK(); }
+ Status Visit(const NullArray& left) {
+ result_ = true;
+ return Status::OK();
+ }
Status Visit(const BooleanArray& left) {
const auto& right = static_cast<const BooleanArray&>(right_);
@@ -529,7 +532,7 @@ static bool BaseDataEquals(const Array& left, const Array& right) {
left.type_id() != right.type_id()) {
return false;
}
- if (left.null_count() > 0) {
+ if (left.null_count() > 0 && left.null_count() < left.length()) {
return BitmapEquals(left.null_bitmap()->data(), left.offset(),
right.null_bitmap()->data(), right.offset(), left.length());
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/074dde41/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index d17ef3c..f6ada09 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -103,6 +103,12 @@ class TestPandasConversion(unittest.TestCase):
self._check_pandas_roundtrip(df)
+ def test_all_none_category(self):
+ df = pd.DataFrame({'a': [None, None, None]})
+ df['a'] = df['a'].astype('category')
+ self._check_pandas_roundtrip(df)
+
+
def test_float_no_nulls(self):
data = {}
fields = []
http://git-wip-us.apache.org/repos/asf/arrow/blob/074dde41/python/pyarrow/tests/test_parquet.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 3c2b73e..1cf5495 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -225,8 +225,12 @@ def _test_dataframe(size=10000, seed=0):
'float32': np.random.randn(size).astype(np.float32),
'float64': np.arange(size, dtype=np.float64),
'bool': np.random.randn(size) > 0,
- 'strings': [tm.rands(10) for i in range(size)]
+ 'strings': [tm.rands(10) for i in range(size)],
+ 'all_none': [None] * size,
+ 'all_none_category': [None] * size
})
+ # TODO(PARQUET-1015)
+ # df['all_none_category'] = df['all_none_category'].astype('category')
return df