You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/06/23 14:12:13 UTC

arrow git commit: ARROW-1143: C++: Fix comparison of NullArray

Repository: arrow
Updated Branches:
  refs/heads/master c1ec0c723 -> 074dde41c


ARROW-1143: C++: Fix comparison of NullArray

Change-Id: Ib18dc6b00c9806aaf541c61cb63673ac51b0525c

Author: Uwe L. Korn <uw...@xhochy.com>

Closes #772 from xhochy/ARROW-1143 and squashes the following commits:

eff44306 [Uwe L. Korn] ARROW-1143: C++: Fix comparison of NullArray


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/074dde41
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/074dde41
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/074dde41

Branch: refs/heads/master
Commit: 074dde41c6ba70daeac07b2f88413e2eb21ec9f0
Parents: c1ec0c7
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Fri Jun 23 10:12:07 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri Jun 23 10:12:07 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/array-test.cc                 | 10 ++++++++++
 cpp/src/arrow/compare.cc                    |  7 +++++--
 python/pyarrow/tests/test_convert_pandas.py |  6 ++++++
 python/pyarrow/tests/test_parquet.py        |  6 +++++-
 4 files changed, 26 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/074dde41/cpp/src/arrow/array-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index beffa1b..8f6323b 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -95,6 +95,16 @@ TEST_F(TestArray, TestEquality) {
   EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array));
 }
 
+TEST_F(TestArray, TestNullArrayEquality) {
+  auto array_1 = std::make_shared<NullArray>(10);
+  auto array_2 = std::make_shared<NullArray>(10);
+  auto array_3 = std::make_shared<NullArray>(20);
+
+  EXPECT_TRUE(array_1->Equals(array_1));
+  EXPECT_TRUE(array_1->Equals(array_2));
+  EXPECT_FALSE(array_1->Equals(array_3));
+}
+
 TEST_F(TestArray, SliceRecomputeNullCount) {
   vector<uint8_t> valid_bytes = {1, 0, 1, 1, 0, 1, 0, 0, 0};
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/074dde41/cpp/src/arrow/compare.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 562d4e1..c2f4f84 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -322,7 +322,10 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor {
   explicit ArrayEqualsVisitor(const Array& right)
       : RangeEqualsVisitor(right, 0, right.length(), 0) {}
 
-  Status Visit(const NullArray& left) { return Status::OK(); }
+  Status Visit(const NullArray& left) {
+      result_ = true;
+      return Status::OK();
+  }
 
   Status Visit(const BooleanArray& left) {
     const auto& right = static_cast<const BooleanArray&>(right_);
@@ -529,7 +532,7 @@ static bool BaseDataEquals(const Array& left, const Array& right) {
       left.type_id() != right.type_id()) {
     return false;
   }
-  if (left.null_count() > 0) {
+  if (left.null_count() > 0 && left.null_count() < left.length()) {
     return BitmapEquals(left.null_bitmap()->data(), left.offset(),
         right.null_bitmap()->data(), right.offset(), left.length());
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/074dde41/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index d17ef3c..f6ada09 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -103,6 +103,12 @@ class TestPandasConversion(unittest.TestCase):
         self._check_pandas_roundtrip(df)
 
 
+    def test_all_none_category(self):
+        df = pd.DataFrame({'a': [None, None, None]})
+        df['a'] = df['a'].astype('category')
+        self._check_pandas_roundtrip(df)
+
+
     def test_float_no_nulls(self):
         data = {}
         fields = []

http://git-wip-us.apache.org/repos/asf/arrow/blob/074dde41/python/pyarrow/tests/test_parquet.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 3c2b73e..1cf5495 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -225,8 +225,12 @@ def _test_dataframe(size=10000, seed=0):
         'float32': np.random.randn(size).astype(np.float32),
         'float64': np.arange(size, dtype=np.float64),
         'bool': np.random.randn(size) > 0,
-        'strings': [tm.rands(10) for i in range(size)]
+        'strings': [tm.rands(10) for i in range(size)],
+        'all_none': [None] * size,
+        'all_none_category': [None] * size
     })
+    # TODO(PARQUET-1015)
+    # df['all_none_category'] = df['all_none_category'].astype('category')
     return df