You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/01/03 12:23:25 UTC
arrow git commit: ARROW-417: Add Equals implementation to compare
ChunkedArrays
Repository: arrow
Updated Branches:
refs/heads/master 26140dca8 -> fdbc57941
ARROW-417: Add Equals implementation to compare ChunkedArrays
Author: Uwe L. Korn <uw...@xhochy.com>
Closes #259 from xhochy/ARROW-417 and squashes the following commits:
ffc076a [Uwe L. Korn] Add interface for non-shared_ptr-based Equals
3686d6c [Uwe L. Korn] ARROW-415: C++: Add Equals implementation to compare Tables
54cbf54 [Uwe L. Korn] ARROW-416: C++: Add Equals implementation to compare Columns
21e73a0 [Uwe L. Korn] Make signed comparison explicit
8563cb2 [Uwe L. Korn] ARROW-417: Add Equals implementation to compare ChunkedArrays
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/fdbc5794
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/fdbc5794
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/fdbc5794
Branch: refs/heads/master
Commit: fdbc57941fd3615c71b3a61b409b63eb6a48a817
Parents: 26140dc
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Tue Jan 3 07:23:17 2017 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Jan 3 07:23:17 2017 -0500
----------------------------------------------------------------------
cpp/src/arrow/column-test.cc | 121 ++++++++++++++++++++++++++++++++++++--
cpp/src/arrow/column.cc | 51 ++++++++++++++++
cpp/src/arrow/column.h | 7 +++
cpp/src/arrow/table-test.cc | 44 ++++++++++----
cpp/src/arrow/table.cc | 17 ++++++
cpp/src/arrow/table.h | 3 +
cpp/src/arrow/test-util.h | 2 +-
7 files changed, 228 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/fdbc5794/cpp/src/arrow/column-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column-test.cc b/cpp/src/arrow/column-test.cc
index 9005245..1e722ed 100644
--- a/cpp/src/arrow/column-test.cc
+++ b/cpp/src/arrow/column-test.cc
@@ -33,12 +33,92 @@ using std::vector;
namespace arrow {
-const auto INT32 = std::make_shared<Int32Type>();
+class TestChunkedArray : public TestBase {
+ protected:
+ virtual void Construct() {
+ one_ = std::make_shared<ChunkedArray>(arrays_one_);
+ another_ = std::make_shared<ChunkedArray>(arrays_another_);
+ }
+
+ ArrayVector arrays_one_;
+ ArrayVector arrays_another_;
+
+ std::shared_ptr<ChunkedArray> one_;
+ std::shared_ptr<ChunkedArray> another_;
+};
+
+TEST_F(TestChunkedArray, BasicEquals) {
+ std::vector<bool> null_bitmap(100, true);
+ std::vector<int32_t> data(100, 1);
+ std::shared_ptr<Array> array;
+ ArrayFromVector<Int32Type, int32_t>(int32(), null_bitmap, data, &array);
+ arrays_one_.push_back(array);
+ arrays_another_.push_back(array);
+
+ Construct();
+ ASSERT_TRUE(one_->Equals(one_));
+ ASSERT_FALSE(one_->Equals(nullptr));
+ ASSERT_TRUE(one_->Equals(another_));
+ ASSERT_TRUE(one_->Equals(*another_.get()));
+}
+
+TEST_F(TestChunkedArray, EqualsDifferingTypes) {
+ std::vector<bool> null_bitmap(100, true);
+ std::vector<int32_t> data32(100, 1);
+ std::vector<int64_t> data64(100, 1);
+ std::shared_ptr<Array> array;
+ ArrayFromVector<Int32Type, int32_t>(int32(), null_bitmap, data32, &array);
+ arrays_one_.push_back(array);
+ ArrayFromVector<Int64Type, int64_t>(int64(), null_bitmap, data64, &array);
+ arrays_another_.push_back(array);
+
+ Construct();
+ ASSERT_FALSE(one_->Equals(another_));
+ ASSERT_FALSE(one_->Equals(*another_.get()));
+}
+
+TEST_F(TestChunkedArray, EqualsDifferingLengths) {
+ std::vector<bool> null_bitmap100(100, true);
+ std::vector<bool> null_bitmap101(101, true);
+ std::vector<int32_t> data100(100, 1);
+ std::vector<int32_t> data101(101, 1);
+ std::shared_ptr<Array> array;
+ ArrayFromVector<Int32Type, int32_t>(int32(), null_bitmap100, data100, &array);
+ arrays_one_.push_back(array);
+ ArrayFromVector<Int32Type, int32_t>(int32(), null_bitmap101, data101, &array);
+ arrays_another_.push_back(array);
+
+ Construct();
+ ASSERT_FALSE(one_->Equals(another_));
+ ASSERT_FALSE(one_->Equals(*another_.get()));
+
+ std::vector<bool> null_bitmap1(1, true);
+ std::vector<int32_t> data1(1, 1);
+ ArrayFromVector<Int32Type, int32_t>(int32(), null_bitmap1, data1, &array);
+ arrays_one_.push_back(array);
-class TestColumn : public TestBase {
+ Construct();
+ ASSERT_TRUE(one_->Equals(another_));
+ ASSERT_TRUE(one_->Equals(*another_.get()));
+}
+
+class TestColumn : public TestChunkedArray {
protected:
+ void Construct() override {
+ TestChunkedArray::Construct();
+
+ one_col_ = std::make_shared<Column>(one_field_, one_);
+ another_col_ = std::make_shared<Column>(another_field_, another_);
+ }
+
std::shared_ptr<ChunkedArray> data_;
std::unique_ptr<Column> column_;
+
+ std::shared_ptr<Field> one_field_;
+ std::shared_ptr<Field> another_field_;
+
+ std::shared_ptr<Column> one_col_;
+ std::shared_ptr<Column> another_col_;
};
TEST_F(TestColumn, BasicAPI) {
@@ -47,11 +127,11 @@ TEST_F(TestColumn, BasicAPI) {
arrays.push_back(MakePrimitive<Int32Array>(100, 10));
arrays.push_back(MakePrimitive<Int32Array>(100, 20));
- auto field = std::make_shared<Field>("c0", INT32);
+ auto field = std::make_shared<Field>("c0", int32());
column_.reset(new Column(field, arrays));
ASSERT_EQ("c0", column_->name());
- ASSERT_TRUE(column_->type()->Equals(INT32));
+ ASSERT_TRUE(column_->type()->Equals(int32()));
ASSERT_EQ(300, column_->length());
ASSERT_EQ(30, column_->null_count());
ASSERT_EQ(3, column_->data()->num_chunks());
@@ -62,7 +142,7 @@ TEST_F(TestColumn, ChunksInhomogeneous) {
arrays.push_back(MakePrimitive<Int32Array>(100));
arrays.push_back(MakePrimitive<Int32Array>(100, 10));
- auto field = std::make_shared<Field>("c0", INT32);
+ auto field = std::make_shared<Field>("c0", int32());
column_.reset(new Column(field, arrays));
ASSERT_OK(column_->ValidateData());
@@ -72,4 +152,35 @@ TEST_F(TestColumn, ChunksInhomogeneous) {
ASSERT_RAISES(Invalid, column_->ValidateData());
}
+TEST_F(TestColumn, Equals) {
+ std::vector<bool> null_bitmap(100, true);
+ std::vector<int32_t> data(100, 1);
+ std::shared_ptr<Array> array;
+ ArrayFromVector<Int32Type, int32_t>(int32(), null_bitmap, data, &array);
+ arrays_one_.push_back(array);
+ arrays_another_.push_back(array);
+
+ one_field_ = std::make_shared<Field>("column", int32());
+ another_field_ = std::make_shared<Field>("column", int32());
+
+ Construct();
+ ASSERT_TRUE(one_col_->Equals(one_col_));
+ ASSERT_FALSE(one_col_->Equals(nullptr));
+ ASSERT_TRUE(one_col_->Equals(another_col_));
+ ASSERT_TRUE(one_col_->Equals(*another_col_.get()));
+
+ // Field is different
+ another_field_ = std::make_shared<Field>("two", int32());
+ Construct();
+ ASSERT_FALSE(one_col_->Equals(another_col_));
+ ASSERT_FALSE(one_col_->Equals(*another_col_.get()));
+
+ // ChunkedArray is different
+ another_field_ = std::make_shared<Field>("column", int32());
+ arrays_another_.push_back(array);
+ Construct();
+ ASSERT_FALSE(one_col_->Equals(another_col_));
+ ASSERT_FALSE(one_col_->Equals(*another_col_.get()));
+}
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/fdbc5794/cpp/src/arrow/column.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column.cc b/cpp/src/arrow/column.cc
index 1d136e7..3e89956 100644
--- a/cpp/src/arrow/column.cc
+++ b/cpp/src/arrow/column.cc
@@ -35,6 +35,45 @@ ChunkedArray::ChunkedArray(const ArrayVector& chunks) : chunks_(chunks) {
}
}
+bool ChunkedArray::Equals(const ChunkedArray& other) const {
+ if (length_ != other.length()) { return false; }
+ if (null_count_ != other.null_count()) { return false; }
+
+ // Check contents of the underlying arrays. This checks for equality of
+ // the underlying data independently of the chunk size.
+ int this_chunk_idx = 0;
+ int32_t this_start_idx = 0;
+ int other_chunk_idx = 0;
+ int32_t other_start_idx = 0;
+ while (this_chunk_idx < static_cast<int32_t>(chunks_.size())) {
+ const std::shared_ptr<Array> this_array = chunks_[this_chunk_idx];
+ const std::shared_ptr<Array> other_array = other.chunk(other_chunk_idx);
+ int32_t common_length = std::min(
+ this_array->length() - this_start_idx, other_array->length() - other_start_idx);
+ if (!this_array->RangeEquals(this_start_idx, this_start_idx + common_length,
+ other_start_idx, other_array)) {
+ return false;
+ }
+
+ // If we have exhausted the current chunk, proceed to the next one individually.
+ if (this_start_idx + common_length == this_array->length()) {
+ this_chunk_idx++;
+ this_start_idx = 0;
+ }
+ if (other_start_idx + common_length == other_array->length()) {
+ other_chunk_idx++;
+ other_start_idx = 0;
+ }
+ }
+ return true;
+}
+
+bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
+ if (this == other.get()) { return true; }
+ if (!other) { return false; }
+ return Equals(*other.get());
+}
+
Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks)
: field_(field) {
data_ = std::make_shared<ChunkedArray>(chunks);
@@ -49,6 +88,18 @@ Column::Column(
const std::shared_ptr<Field>& field, const std::shared_ptr<ChunkedArray>& data)
: field_(field), data_(data) {}
+bool Column::Equals(const Column& other) const {
+ if (!field_->Equals(other.field())) { return false; }
+ return data_->Equals(other.data());
+}
+
+bool Column::Equals(const std::shared_ptr<Column>& other) const {
+ if (this == other.get()) { return true; }
+ if (!other) { return false; }
+
+ return Equals(*other.get());
+}
+
Status Column::ValidateData() {
for (int i = 0; i < data_->num_chunks(); ++i) {
std::shared_ptr<DataType> type = data_->chunk(i)->type();
http://git-wip-us.apache.org/repos/asf/arrow/blob/fdbc5794/cpp/src/arrow/column.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column.h b/cpp/src/arrow/column.h
index 1caafec..f716473 100644
--- a/cpp/src/arrow/column.h
+++ b/cpp/src/arrow/column.h
@@ -48,6 +48,9 @@ class ARROW_EXPORT ChunkedArray {
std::shared_ptr<Array> chunk(int i) const { return chunks_[i]; }
+ bool Equals(const ChunkedArray& other) const;
+ bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
+
protected:
ArrayVector chunks_;
int64_t length_;
@@ -78,6 +81,10 @@ class ARROW_EXPORT Column {
// @returns: the column's data as a chunked logical array
std::shared_ptr<ChunkedArray> data() const { return data_; }
+
+ bool Equals(const Column& other) const;
+ bool Equals(const std::shared_ptr<Column>& other) const;
+
// Verify that the column's array data is consistent with the passed field's
// metadata
Status ValidateData();
http://git-wip-us.apache.org/repos/asf/arrow/blob/fdbc5794/cpp/src/arrow/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
index f62336d..734b941 100644
--- a/cpp/src/arrow/table-test.cc
+++ b/cpp/src/arrow/table-test.cc
@@ -34,16 +34,12 @@ using std::vector;
namespace arrow {
-const auto INT16 = std::make_shared<Int16Type>();
-const auto UINT8 = std::make_shared<UInt8Type>();
-const auto INT32 = std::make_shared<Int32Type>();
-
class TestTable : public TestBase {
public:
void MakeExample1(int length) {
- auto f0 = std::make_shared<Field>("f0", INT32);
- auto f1 = std::make_shared<Field>("f1", UINT8);
- auto f2 = std::make_shared<Field>("f2", INT16);
+ auto f0 = std::make_shared<Field>("f0", int32());
+ auto f1 = std::make_shared<Field>("f1", uint8());
+ auto f2 = std::make_shared<Field>("f2", int16());
vector<shared_ptr<Field>> fields = {f0, f1, f2};
schema_ = std::make_shared<Schema>(fields);
@@ -55,7 +51,7 @@ class TestTable : public TestBase {
}
protected:
- std::unique_ptr<Table> table_;
+ std::shared_ptr<Table> table_;
shared_ptr<Schema> schema_;
vector<std::shared_ptr<Column>> columns_;
};
@@ -123,14 +119,40 @@ TEST_F(TestTable, InvalidColumns) {
ASSERT_RAISES(Invalid, table_->ValidateColumns());
}
+TEST_F(TestTable, Equals) {
+ int length = 100;
+ MakeExample1(length);
+
+ std::string name = "data";
+ table_.reset(new Table(name, schema_, columns_));
+
+ ASSERT_TRUE(table_->Equals(table_));
+ ASSERT_FALSE(table_->Equals(nullptr));
+ // Differing name
+ ASSERT_FALSE(table_->Equals(std::make_shared<Table>("other_name", schema_, columns_)));
+ // Differing schema
+ auto f0 = std::make_shared<Field>("f3", int32());
+ auto f1 = std::make_shared<Field>("f4", uint8());
+ auto f2 = std::make_shared<Field>("f5", int16());
+ vector<shared_ptr<Field>> fields = {f0, f1, f2};
+ auto other_schema = std::make_shared<Schema>(fields);
+ ASSERT_FALSE(table_->Equals(std::make_shared<Table>(name, other_schema, columns_)));
+ // Differing columns
+ std::vector<std::shared_ptr<Column>> other_columns = {
+ std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length, 10)),
+ std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length, 10)),
+ std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length, 10))};
+ ASSERT_FALSE(table_->Equals(std::make_shared<Table>(name, schema_, other_columns)));
+}
+
class TestRecordBatch : public TestBase {};
TEST_F(TestRecordBatch, Equals) {
const int length = 10;
- auto f0 = std::make_shared<Field>("f0", INT32);
- auto f1 = std::make_shared<Field>("f1", UINT8);
- auto f2 = std::make_shared<Field>("f2", INT16);
+ auto f0 = std::make_shared<Field>("f0", int32());
+ auto f1 = std::make_shared<Field>("f1", uint8());
+ auto f2 = std::make_shared<Field>("f2", int16());
vector<shared_ptr<Field>> fields = {f0, f1, f2};
auto schema = std::make_shared<Schema>(fields);
http://git-wip-us.apache.org/repos/asf/arrow/blob/fdbc5794/cpp/src/arrow/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 855d4ec..45f672e 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -77,6 +77,23 @@ Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows)
: name_(name), schema_(schema), columns_(columns), num_rows_(num_rows) {}
+bool Table::Equals(const Table& other) const {
+ if (name_ != other.name()) { return false; }
+ if (!schema_->Equals(other.schema())) { return false; }
+ if (static_cast<int64_t>(columns_.size()) != other.num_columns()) { return false; }
+
+ for (size_t i = 0; i < columns_.size(); i++) {
+ if (!columns_[i]->Equals(other.column(i))) { return false; }
+ }
+ return true;
+}
+
+bool Table::Equals(const std::shared_ptr<Table>& other) const {
+ if (this == other.get()) { return true; }
+ if (!other) { return false; }
+ return Equals(*other.get());
+}
+
Status Table::ValidateColumns() const {
if (num_columns() != schema_->num_fields()) {
return Status::Invalid("Number of columns did not match schema");
http://git-wip-us.apache.org/repos/asf/arrow/blob/fdbc5794/cpp/src/arrow/table.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index bf5c39f..0f2418d 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -100,6 +100,9 @@ class ARROW_EXPORT Table {
// @returns: the number of rows (the corresponding length of each column)
int64_t num_rows() const { return num_rows_; }
+ bool Equals(const Table& other) const;
+ bool Equals(const std::shared_ptr<Table>& other) const;
+
// After construction, perform any checks to validate the input arguments
Status ValidateColumns() const;
http://git-wip-us.apache.org/repos/asf/arrow/blob/fdbc5794/cpp/src/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index ce9327d..70e9333 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -81,7 +81,7 @@ class TestBase : public ::testing::Test {
auto null_bitmap = std::make_shared<PoolBuffer>(pool_);
EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type)));
EXPECT_OK(null_bitmap->Resize(BitUtil::BytesForBits(length)));
- return std::make_shared<ArrayType>(length, data, 10, null_bitmap);
+ return std::make_shared<ArrayType>(length, data, null_count, null_bitmap);
}
protected: