You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/03/31 14:20:27 UTC
arrow git commit: ARROW-603: [C++] Add RecordBatch::Validate method,
call in RecordBatch ctor in debug builds
Repository: arrow
Updated Branches:
refs/heads/master 4915ecf1e -> f5967ed68
ARROW-603: [C++] Add RecordBatch::Validate method, call in RecordBatch ctor in debug builds
This function will help catch malformed RecordBatch objects during development
Author: Wes McKinney <we...@twosigma.com>
Closes #466 from wesm/ARROW-603 and squashes the following commits:
dfdb048 [Wes McKinney] Fix incorrect clang-tidy name
5a51f69 [Wes McKinney] Add RecordBatch::Validate method, call in RecordBatch ctor in debug builds
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f5967ed6
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f5967ed6
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f5967ed6
Branch: refs/heads/master
Commit: f5967ed682e63dd752d0120573bb33f42dd56e27
Parents: 4915ecf
Author: Wes McKinney <we...@twosigma.com>
Authored: Fri Mar 31 10:20:20 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri Mar 31 10:20:20 2017 -0400
----------------------------------------------------------------------
cpp/cmake_modules/FindClangTools.cmake | 29 +++++++----
cpp/src/arrow/io/io-hdfs-test.cc | 7 ++-
cpp/src/arrow/table-test.cc | 76 ++++++++++++++++++++---------
cpp/src/arrow/table.cc | 25 ++++++++++
cpp/src/arrow/table.h | 4 ++
5 files changed, 104 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/f5967ed6/cpp/cmake_modules/FindClangTools.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindClangTools.cmake b/cpp/cmake_modules/FindClangTools.cmake
index c07c7d2..0e9430b 100644
--- a/cpp/cmake_modules/FindClangTools.cmake
+++ b/cpp/cmake_modules/FindClangTools.cmake
@@ -27,16 +27,21 @@
# This module defines
# CLANG_TIDY_BIN, The path to the clang tidy binary
# CLANG_TIDY_FOUND, Whether clang tidy was found
-# CLANG_FORMAT_BIN, The path to the clang format binary
+# CLANG_FORMAT_BIN, The path to the clang format binary
# CLANG_TIDY_FOUND, Whether clang format was found
-find_program(CLANG_TIDY_BIN
- NAMES clang-tidy-3.8 clang-tidy-3.7 clang-tidy-3.6 clang-tidy
- PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
+find_program(CLANG_TIDY_BIN
+ NAMES clang-tidy-4.0
+ clang-tidy-3.9
+ clang-tidy-3.8
+ clang-tidy-3.7
+ clang-tidy-3.6
+ clang-tidy
+ PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
NO_DEFAULT_PATH
)
-if ( "${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND" )
+if ( "${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND" )
set(CLANG_TIDY_FOUND 0)
message("clang-tidy not found")
else()
@@ -44,17 +49,21 @@ else()
message("clang-tidy found at ${CLANG_TIDY_BIN}")
endif()
-find_program(CLANG_FORMAT_BIN
- NAMES clang-format-3.8 clang-format-3.7 clang-format-3.6 clang-format
- PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
+find_program(CLANG_FORMAT_BIN
+ NAMES clang-format-4.0
+ clang-format-3.9
+ clang-format-3.8
+ clang-format-3.7
+ clang-format-3.6
+ clang-format
+ PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
NO_DEFAULT_PATH
)
-if ( "${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND" )
+if ( "${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND" )
set(CLANG_FORMAT_FOUND 0)
message("clang-format not found")
else()
set(CLANG_FORMAT_FOUND 1)
message("clang-format found at ${CLANG_FORMAT_BIN}")
endif()
-
http://git-wip-us.apache.org/repos/asf/arrow/blob/f5967ed6/cpp/src/arrow/io/io-hdfs-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/io/io-hdfs-test.cc b/cpp/src/arrow/io/io-hdfs-test.cc
index af59e96..f3140be 100644
--- a/cpp/src/arrow/io/io-hdfs-test.cc
+++ b/cpp/src/arrow/io/io-hdfs-test.cc
@@ -78,10 +78,9 @@ class TestHdfsClient : public ::testing::Test {
LibHdfsShim* driver_shim;
client_ = nullptr;
- scratch_dir_ =
- boost::filesystem::unique_path(
- boost::filesystem::temp_directory_path() / "arrow-hdfs/scratch-%%%%")
- .string();
+ scratch_dir_ = boost::filesystem::unique_path(
+ boost::filesystem::temp_directory_path() / "arrow-hdfs/scratch-%%%%")
+ .string();
loaded_driver_ = false;
http://git-wip-us.apache.org/repos/asf/arrow/blob/f5967ed6/cpp/src/arrow/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
index 3853306..cd32f4a 100644
--- a/cpp/src/arrow/table-test.cc
+++ b/cpp/src/arrow/table-test.cc
@@ -127,8 +127,8 @@ TEST_F(TestColumn, BasicAPI) {
arrays.push_back(MakePrimitive<Int32Array>(100, 10));
arrays.push_back(MakePrimitive<Int32Array>(100, 20));
- auto field = std::make_shared<Field>("c0", int32());
- column_.reset(new Column(field, arrays));
+ auto f0 = field("c0", int32());
+ column_.reset(new Column(f0, arrays));
ASSERT_EQ("c0", column_->name());
ASSERT_TRUE(column_->type()->Equals(int32()));
@@ -137,7 +137,7 @@ TEST_F(TestColumn, BasicAPI) {
ASSERT_EQ(3, column_->data()->num_chunks());
// nullptr array should not break
- column_.reset(new Column(field, std::shared_ptr<Array>(nullptr)));
+ column_.reset(new Column(f0, std::shared_ptr<Array>(nullptr)));
ASSERT_NE(column_.get(), nullptr);
}
@@ -146,13 +146,13 @@ TEST_F(TestColumn, ChunksInhomogeneous) {
arrays.push_back(MakePrimitive<Int32Array>(100));
arrays.push_back(MakePrimitive<Int32Array>(100, 10));
- auto field = std::make_shared<Field>("c0", int32());
- column_.reset(new Column(field, arrays));
+ auto f0 = field("c0", int32());
+ column_.reset(new Column(f0, arrays));
ASSERT_OK(column_->ValidateData());
arrays.push_back(MakePrimitive<Int16Array>(100, 10));
- column_.reset(new Column(field, arrays));
+ column_.reset(new Column(f0, arrays));
ASSERT_RAISES(Invalid, column_->ValidateData());
}
@@ -164,8 +164,8 @@ TEST_F(TestColumn, Equals) {
arrays_one_.push_back(array);
arrays_another_.push_back(array);
- one_field_ = std::make_shared<Field>("column", int32());
- another_field_ = std::make_shared<Field>("column", int32());
+ one_field_ = field("column", int32());
+ another_field_ = field("column", int32());
Construct();
ASSERT_TRUE(one_col_->Equals(one_col_));
@@ -174,13 +174,13 @@ TEST_F(TestColumn, Equals) {
ASSERT_TRUE(one_col_->Equals(*another_col_.get()));
// Field is different
- another_field_ = std::make_shared<Field>("two", int32());
+ another_field_ = field("two", int32());
Construct();
ASSERT_FALSE(one_col_->Equals(another_col_));
ASSERT_FALSE(one_col_->Equals(*another_col_.get()));
// ChunkedArray is different
- another_field_ = std::make_shared<Field>("column", int32());
+ another_field_ = field("column", int32());
arrays_another_.push_back(array);
Construct();
ASSERT_FALSE(one_col_->Equals(another_col_));
@@ -190,9 +190,9 @@ TEST_F(TestColumn, Equals) {
class TestTable : public TestBase {
public:
void MakeExample1(int length) {
- auto f0 = std::make_shared<Field>("f0", int32());
- auto f1 = std::make_shared<Field>("f1", uint8());
- auto f2 = std::make_shared<Field>("f2", int16());
+ auto f0 = field("f0", int32());
+ auto f1 = field("f1", uint8());
+ auto f2 = field("f2", int16());
vector<shared_ptr<Field>> fields = {f0, f1, f2};
schema_ = std::make_shared<Schema>(fields);
@@ -279,9 +279,9 @@ TEST_F(TestTable, Equals) {
ASSERT_TRUE(table_->Equals(*table_));
// Differing schema
- auto f0 = std::make_shared<Field>("f3", int32());
- auto f1 = std::make_shared<Field>("f4", uint8());
- auto f2 = std::make_shared<Field>("f5", int16());
+ auto f0 = field("f3", int32());
+ auto f1 = field("f4", uint8());
+ auto f2 = field("f5", int16());
vector<shared_ptr<Field>> fields = {f0, f1, f2};
auto other_schema = std::make_shared<Schema>(fields);
ASSERT_FALSE(table_->Equals(Table(other_schema, columns_)));
@@ -389,9 +389,9 @@ class TestRecordBatch : public TestBase {};
TEST_F(TestRecordBatch, Equals) {
const int length = 10;
- auto f0 = std::make_shared<Field>("f0", int32());
- auto f1 = std::make_shared<Field>("f1", uint8());
- auto f2 = std::make_shared<Field>("f2", int16());
+ auto f0 = field("f0", int32());
+ auto f1 = field("f1", uint8());
+ auto f2 = field("f2", int16());
vector<shared_ptr<Field>> fields = {f0, f1, f2};
auto schema = std::make_shared<Schema>(fields);
@@ -401,21 +401,51 @@ TEST_F(TestRecordBatch, Equals) {
auto a2 = MakePrimitive<Int16Array>(length);
RecordBatch b1(schema, length, {a0, a1, a2});
- RecordBatch b2(schema, 5, {a0, a1, a2});
RecordBatch b3(schema, length, {a0, a1});
RecordBatch b4(schema, length, {a0, a1, a1});
ASSERT_TRUE(b1.Equals(b1));
- ASSERT_FALSE(b1.Equals(b2));
ASSERT_FALSE(b1.Equals(b3));
ASSERT_FALSE(b1.Equals(b4));
}
+#ifdef NDEBUG
+// In debug builds, RecordBatch ctor aborts if you construct an invalid one
+
+TEST_F(TestRecordBatch, Validate) {
+ const int length = 10;
+
+ auto f0 = field("f0", int32());
+ auto f1 = field("f1", uint8());
+ auto f2 = field("f2", int16());
+
+ auto schema = std::shared_ptr<Schema>(new Schema({f0, f1, f2}));
+
+ auto a0 = MakePrimitive<Int32Array>(length);
+ auto a1 = MakePrimitive<UInt8Array>(length);
+ auto a2 = MakePrimitive<Int16Array>(length);
+ auto a3 = MakePrimitive<Int16Array>(5);
+
+ RecordBatch b1(schema, length, {a0, a1, a2});
+
+ ASSERT_OK(b1.Validate());
+
+ // Length mismatch
+ RecordBatch b2(schema, length, {a0, a1, a3});
+ ASSERT_RAISES(Invalid, b2.Validate());
+
+ // Type mismatch
+ RecordBatch b3(schema, length, {a0, a1, a0});
+ ASSERT_RAISES(Invalid, b3.Validate());
+}
+
+#endif
+
TEST_F(TestRecordBatch, Slice) {
const int length = 10;
- auto f0 = std::make_shared<Field>("f0", int32());
- auto f1 = std::make_shared<Field>("f1", uint8());
+ auto f0 = field("f0", int32());
+ auto f1 = field("f1", uint8());
vector<shared_ptr<Field>> fields = {f0, f1};
auto schema = std::make_shared<Schema>(fields);
http://git-wip-us.apache.org/repos/asf/arrow/blob/f5967ed6/cpp/src/arrow/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 8e283f4..da61fbb 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -139,6 +139,11 @@ Status Column::ValidateData() {
// ----------------------------------------------------------------------
// RecordBatch methods
+void AssertBatchValid(const RecordBatch& batch) {
+ Status s = batch.Validate();
+ if (!s.ok()) { DCHECK(false) << s.ToString(); }
+}
+
RecordBatch::RecordBatch(const std::shared_ptr<Schema>& schema, int64_t num_rows,
const std::vector<std::shared_ptr<Array>>& columns)
: schema_(schema), num_rows_(num_rows), columns_(columns) {}
@@ -190,6 +195,26 @@ std::shared_ptr<RecordBatch> RecordBatch::Slice(int64_t offset, int64_t length)
return std::make_shared<RecordBatch>(schema_, num_rows, arrays);
}
+Status RecordBatch::Validate() const {
+ for (int i = 0; i < num_columns(); ++i) {
+ const Array& arr = *columns_[i];
+ if (arr.length() != num_rows_) {
+ std::stringstream ss;
+ ss << "Number of rows in column " << i << " did not match batch: " << arr.length()
+ << " vs " << num_rows_;
+ return Status::Invalid(ss.str());
+ }
+ const auto& schema_type = *schema_->field(i)->type;
+ if (!arr.type()->Equals(schema_type)) {
+ std::stringstream ss;
+ ss << "Column " << i << " type not match schema: " << arr.type()->ToString()
+ << " vs " << schema_type.ToString();
+ return Status::Invalid(ss.str());
+ }
+ }
+ return Status::OK();
+}
+
// ----------------------------------------------------------------------
// Table methods
http://git-wip-us.apache.org/repos/asf/arrow/blob/f5967ed6/cpp/src/arrow/table.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index 7b739c9..0f35dd8 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -140,6 +140,10 @@ class ARROW_EXPORT RecordBatch {
std::shared_ptr<RecordBatch> Slice(int64_t offset);
std::shared_ptr<RecordBatch> Slice(int64_t offset, int64_t length);
+ /// Returns error status is there is something wrong with the record batch
+ /// contents, like a schema/array mismatch or inconsistent lengths
+ Status Validate() const;
+
private:
std::shared_ptr<Schema> schema_;
int64_t num_rows_;