You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/12/12 22:17:54 UTC
[5/5] arrow git commit: ARROW-418: [C++] Array / Builder class code
reorganization, flattening
ARROW-418: [C++] Array / Builder class code reorganization, flattening
I've been wanting to do this for a while -- it feels cleaner to me. I also am going to promote modules from arrow/util to the top level as well. I'm open to other ideas, too.
Author: Wes McKinney <we...@twosigma.com>
Closes #236 from wesm/ARROW-418 and squashes the following commits:
6f556ea [Wes McKinney] Add missing math.h include for clang
9dc2e22 [Wes McKinney] Fix remaining old includes
6f7ae77 [Wes McKinney] Fixes, cpplint
66ac3f7 [Wes McKinney] Promote buffer.h/status.h/memory-pool.h to top level directory
8cdf059 [Wes McKinney] Consolidate Array and Builder classes in array.h, builder.h. Remove arrow/types subdirectory
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2c10d7cc
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2c10d7cc
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2c10d7cc
Branch: refs/heads/master
Commit: 2c10d7ccec3c07fb061e1988be16aecaf9916af4
Parents: 73fe556
Author: Wes McKinney <we...@twosigma.com>
Authored: Mon Dec 12 17:17:31 2016 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon Dec 12 17:17:31 2016 -0500
----------------------------------------------------------------------
cpp/CMakeLists.txt | 15 +-
cpp/src/arrow/CMakeLists.txt | 11 +
cpp/src/arrow/api.h | 13 +-
cpp/src/arrow/array-decimal-test.cc | 40 ++
cpp/src/arrow/array-list-test.cc | 237 ++++++++++++
cpp/src/arrow/array-primitive-test.cc | 476 +++++++++++++++++++++++
cpp/src/arrow/array-string-test.cc | 358 ++++++++++++++++++
cpp/src/arrow/array-struct-test.cc | 391 +++++++++++++++++++
cpp/src/arrow/array-test.cc | 5 +-
cpp/src/arrow/array.cc | 443 +++++++++++++++++++++-
cpp/src/arrow/array.h | 373 +++++++++++++++++-
cpp/src/arrow/buffer-test.cc | 140 +++++++
cpp/src/arrow/buffer.cc | 102 +++++
cpp/src/arrow/buffer.h | 232 ++++++++++++
cpp/src/arrow/builder.cc | 329 +++++++++++++++-
cpp/src/arrow/builder.h | 315 +++++++++++++++-
cpp/src/arrow/column-benchmark.cc | 4 +-
cpp/src/arrow/column-test.cc | 1 -
cpp/src/arrow/column.cc | 2 +-
cpp/src/arrow/io/file.cc | 6 +-
cpp/src/arrow/io/hdfs.cc | 6 +-
cpp/src/arrow/io/interfaces.cc | 4 +-
cpp/src/arrow/io/io-file-test.cc | 2 +-
cpp/src/arrow/io/io-hdfs-test.cc | 2 +-
cpp/src/arrow/io/libhdfs_shim.cc | 2 +-
cpp/src/arrow/io/memory.cc | 5 +-
cpp/src/arrow/io/test-common.h | 4 +-
cpp/src/arrow/ipc/adapter.cc | 9 +-
cpp/src/arrow/ipc/file.cc | 4 +-
cpp/src/arrow/ipc/ipc-adapter-test.cc | 10 +-
cpp/src/arrow/ipc/ipc-file-test.cc | 11 +-
cpp/src/arrow/ipc/ipc-json-test.cc | 18 +-
cpp/src/arrow/ipc/ipc-metadata-test.cc | 2 +-
cpp/src/arrow/ipc/json-integration-test.cc | 2 +-
cpp/src/arrow/ipc/json-internal.cc | 10 +-
cpp/src/arrow/ipc/json.cc | 6 +-
cpp/src/arrow/ipc/metadata-internal.cc | 4 +-
cpp/src/arrow/ipc/metadata.cc | 4 +-
cpp/src/arrow/ipc/test-common.h | 9 +-
cpp/src/arrow/ipc/util.h | 2 +-
cpp/src/arrow/memory_pool-test.cc | 69 ++++
cpp/src/arrow/memory_pool.cc | 111 ++++++
cpp/src/arrow/memory_pool.h | 43 +++
cpp/src/arrow/pretty_print-test.cc | 5 +-
cpp/src/arrow/pretty_print.cc | 5 +-
cpp/src/arrow/status-test.cc | 38 ++
cpp/src/arrow/status.cc | 86 +++++
cpp/src/arrow/status.h | 192 ++++++++++
cpp/src/arrow/table-test.cc | 4 +-
cpp/src/arrow/table.cc | 2 +-
cpp/src/arrow/test-util.h | 43 ++-
cpp/src/arrow/type.cc | 8 +-
cpp/src/arrow/type.h | 2 +-
cpp/src/arrow/types/CMakeLists.txt | 39 --
cpp/src/arrow/types/construct.cc | 124 ------
cpp/src/arrow/types/construct.h | 47 ---
cpp/src/arrow/types/datetime.h | 27 --
cpp/src/arrow/types/decimal-test.cc | 40 --
cpp/src/arrow/types/decimal.cc | 31 --
cpp/src/arrow/types/decimal.h | 28 --
cpp/src/arrow/types/list-test.cc | 241 ------------
cpp/src/arrow/types/list.cc | 162 --------
cpp/src/arrow/types/list.h | 170 ---------
cpp/src/arrow/types/primitive-test.cc | 478 ------------------------
cpp/src/arrow/types/primitive.cc | 294 ---------------
cpp/src/arrow/types/primitive.h | 371 ------------------
cpp/src/arrow/types/string-test.cc | 360 ------------------
cpp/src/arrow/types/string.cc | 150 --------
cpp/src/arrow/types/string.h | 149 --------
cpp/src/arrow/types/struct-test.cc | 396 --------------------
cpp/src/arrow/types/struct.cc | 108 ------
cpp/src/arrow/types/struct.h | 116 ------
cpp/src/arrow/types/test-common.h | 70 ----
cpp/src/arrow/types/union.cc | 27 --
cpp/src/arrow/types/union.h | 48 ---
cpp/src/arrow/util/CMakeLists.txt | 6 -
cpp/src/arrow/util/bit-util.cc | 4 +-
cpp/src/arrow/util/buffer-test.cc | 140 -------
cpp/src/arrow/util/buffer.cc | 102 -----
cpp/src/arrow/util/buffer.h | 232 ------------
cpp/src/arrow/util/memory-pool-test.cc | 69 ----
cpp/src/arrow/util/memory-pool.cc | 111 ------
cpp/src/arrow/util/memory-pool.h | 43 ---
cpp/src/arrow/util/status-test.cc | 38 --
cpp/src/arrow/util/status.cc | 86 -----
cpp/src/arrow/util/status.h | 192 ----------
python/src/pyarrow/adapters/builtin.cc | 2 +-
python/src/pyarrow/adapters/pandas.cc | 2 +-
python/src/pyarrow/common.cc | 4 +-
python/src/pyarrow/common.h | 5 +-
python/src/pyarrow/io.cc | 4 +-
91 files changed, 4103 insertions(+), 4630 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 798d75f..adcca0e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -743,25 +743,17 @@ set(ARROW_PRIVATE_LINK_LIBS
set(ARROW_SRCS
src/arrow/array.cc
+ src/arrow/buffer.cc
src/arrow/builder.cc
src/arrow/column.cc
+ src/arrow/memory_pool.cc
src/arrow/pretty_print.cc
src/arrow/schema.cc
+ src/arrow/status.cc
src/arrow/table.cc
src/arrow/type.cc
- src/arrow/types/construct.cc
- src/arrow/types/decimal.cc
- src/arrow/types/list.cc
- src/arrow/types/primitive.cc
- src/arrow/types/string.cc
- src/arrow/types/struct.cc
- src/arrow/types/union.cc
-
src/arrow/util/bit-util.cc
- src/arrow/util/buffer.cc
- src/arrow/util/memory-pool.cc
- src/arrow/util/status.cc
)
add_library(arrow_objlib OBJECT
@@ -823,7 +815,6 @@ endif()
add_subdirectory(src/arrow)
add_subdirectory(src/arrow/io)
add_subdirectory(src/arrow/util)
-add_subdirectory(src/arrow/types)
#----------------------------------------------------------------------
# IPC library
http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 6c0dea2..7d7bc29 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -20,9 +20,12 @@ install(FILES
api.h
array.h
column.h
+ buffer.h
builder.h
+ memory_pool.h
pretty_print.h
schema.h
+ status.h
table.h
type.h
type_fwd.h
@@ -37,9 +40,17 @@ install(FILES
set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
ADD_ARROW_TEST(array-test)
+ADD_ARROW_TEST(array-decimal-test)
+ADD_ARROW_TEST(array-list-test)
+ADD_ARROW_TEST(array-primitive-test)
+ADD_ARROW_TEST(array-string-test)
+ADD_ARROW_TEST(array-struct-test)
+ADD_ARROW_TEST(buffer-test)
ADD_ARROW_TEST(column-test)
+ADD_ARROW_TEST(memory_pool-test)
ADD_ARROW_TEST(pretty_print-test)
ADD_ARROW_TEST(schema-test)
+ADD_ARROW_TEST(status-test)
ADD_ARROW_TEST(table-test)
ADD_ARROW_BENCHMARK(column-benchmark)
http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index 2d317b4..51437d8 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -21,20 +21,13 @@
#define ARROW_API_H
#include "arrow/array.h"
+#include "arrow/buffer.h"
#include "arrow/builder.h"
#include "arrow/column.h"
+#include "arrow/memory_pool.h"
#include "arrow/schema.h"
+#include "arrow/status.h"
#include "arrow/table.h"
#include "arrow/type.h"
-#include "arrow/types/construct.h"
-#include "arrow/types/list.h"
-#include "arrow/types/primitive.h"
-#include "arrow/types/string.h"
-#include "arrow/types/struct.h"
-
-#include "arrow/util/buffer.h"
-#include "arrow/util/memory-pool.h"
-#include "arrow/util/status.h"
-
#endif // ARROW_API_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array-decimal-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-decimal-test.cc b/cpp/src/arrow/array-decimal-test.cc
new file mode 100644
index 0000000..9e00fd9
--- /dev/null
+++ b/cpp/src/arrow/array-decimal-test.cc
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gtest/gtest.h"
+
+#include "arrow/type.h"
+
+namespace arrow {
+
+TEST(TypesTest, TestDecimalType) {
+ DecimalType t1(8, 4);
+
+ ASSERT_EQ(t1.type, Type::DECIMAL);
+ ASSERT_EQ(t1.precision, 8);
+ ASSERT_EQ(t1.scale, 4);
+
+ ASSERT_EQ(t1.ToString(), std::string("decimal(8, 4)"));
+
+ // Test copy constructor
+ DecimalType t2 = t1;
+ ASSERT_EQ(t2.type, Type::DECIMAL);
+ ASSERT_EQ(t2.precision, 8);
+ ASSERT_EQ(t2.scale, 4);
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array-list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-list-test.cc b/cpp/src/arrow/array-list-test.cc
new file mode 100644
index 0000000..8baaf06
--- /dev/null
+++ b/cpp/src/arrow/array-list-test.cc
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdlib>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/status.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+
+using std::shared_ptr;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+namespace arrow {
+
+TEST(TypesTest, TestListType) {
+ std::shared_ptr<DataType> vt = std::make_shared<UInt8Type>();
+
+ ListType list_type(vt);
+ ASSERT_EQ(list_type.type, Type::LIST);
+
+ ASSERT_EQ(list_type.name(), string("list"));
+ ASSERT_EQ(list_type.ToString(), string("list<item: uint8>"));
+
+ ASSERT_EQ(list_type.value_type()->type, vt->type);
+ ASSERT_EQ(list_type.value_type()->type, vt->type);
+
+ std::shared_ptr<DataType> st = std::make_shared<StringType>();
+ std::shared_ptr<DataType> lt = std::make_shared<ListType>(st);
+ ASSERT_EQ(lt->ToString(), string("list<item: string>"));
+
+ ListType lt2(lt);
+ ASSERT_EQ(lt2.ToString(), string("list<item: list<item: string>>"));
+}
+
+// ----------------------------------------------------------------------
+// List tests
+
+class TestListBuilder : public TestBuilder {
+ public:
+ void SetUp() {
+ TestBuilder::SetUp();
+
+ value_type_ = TypePtr(new Int32Type());
+ type_ = TypePtr(new ListType(value_type_));
+
+ std::shared_ptr<ArrayBuilder> tmp;
+ ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+ builder_ = std::dynamic_pointer_cast<ListBuilder>(tmp);
+ }
+
+ void Done() {
+ std::shared_ptr<Array> out;
+ EXPECT_OK(builder_->Finish(&out));
+ result_ = std::dynamic_pointer_cast<ListArray>(out);
+ }
+
+ protected:
+ TypePtr value_type_;
+ TypePtr type_;
+
+ shared_ptr<ListBuilder> builder_;
+ shared_ptr<ListArray> result_;
+};
+
+TEST_F(TestListBuilder, Equality) {
+ Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());
+
+ ArrayPtr array, equal_array, unequal_array;
+ vector<int32_t> equal_offsets = {0, 1, 2, 5};
+ vector<int32_t> equal_values = {1, 2, 3, 4, 5, 2, 2, 2};
+ vector<int32_t> unequal_offsets = {0, 1, 4};
+ vector<int32_t> unequal_values = {1, 2, 2, 2, 3, 4, 5};
+
+ // setup two equal arrays
+ ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size()));
+ ASSERT_OK(vb->Append(equal_values.data(), equal_values.size()));
+
+ ASSERT_OK(builder_->Finish(&array));
+ ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size()));
+ ASSERT_OK(vb->Append(equal_values.data(), equal_values.size()));
+
+ ASSERT_OK(builder_->Finish(&equal_array));
+ // now an unequal one
+ ASSERT_OK(builder_->Append(unequal_offsets.data(), unequal_offsets.size()));
+ ASSERT_OK(vb->Append(unequal_values.data(), unequal_values.size()));
+
+ ASSERT_OK(builder_->Finish(&unequal_array));
+
+ // Test array equality
+ EXPECT_TRUE(array->Equals(array));
+ EXPECT_TRUE(array->Equals(equal_array));
+ EXPECT_TRUE(equal_array->Equals(array));
+ EXPECT_FALSE(equal_array->Equals(unequal_array));
+ EXPECT_FALSE(unequal_array->Equals(equal_array));
+
+ // Test range equality
+ EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_array));
+ EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_array));
+ EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array));
+ EXPECT_TRUE(array->RangeEquals(2, 3, 2, unequal_array));
+ EXPECT_TRUE(array->RangeEquals(3, 4, 1, unequal_array));
+}
+
+TEST_F(TestListBuilder, TestResize) {}
+
+TEST_F(TestListBuilder, TestAppendNull) {
+ ASSERT_OK(builder_->AppendNull());
+ ASSERT_OK(builder_->AppendNull());
+
+ Done();
+
+ ASSERT_OK(result_->Validate());
+ ASSERT_TRUE(result_->IsNull(0));
+ ASSERT_TRUE(result_->IsNull(1));
+
+ ASSERT_EQ(0, result_->raw_offsets()[0]);
+ ASSERT_EQ(0, result_->offset(1));
+ ASSERT_EQ(0, result_->offset(2));
+
+ Int32Array* values = static_cast<Int32Array*>(result_->values().get());
+ ASSERT_EQ(0, values->length());
+}
+
+void ValidateBasicListArray(const ListArray* result, const vector<int32_t>& values,
+ const vector<uint8_t>& is_valid) {
+ ASSERT_OK(result->Validate());
+ ASSERT_EQ(1, result->null_count());
+ ASSERT_EQ(0, result->values()->null_count());
+
+ ASSERT_EQ(3, result->length());
+ vector<int32_t> ex_offsets = {0, 3, 3, 7};
+ for (size_t i = 0; i < ex_offsets.size(); ++i) {
+ ASSERT_EQ(ex_offsets[i], result->offset(i));
+ }
+
+ for (int i = 0; i < result->length(); ++i) {
+ ASSERT_EQ(!static_cast<bool>(is_valid[i]), result->IsNull(i));
+ }
+
+ ASSERT_EQ(7, result->values()->length());
+ Int32Array* varr = static_cast<Int32Array*>(result->values().get());
+
+ for (size_t i = 0; i < values.size(); ++i) {
+ ASSERT_EQ(values[i], varr->Value(i));
+ }
+}
+
+TEST_F(TestListBuilder, TestBasics) {
+ vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
+ vector<int> lengths = {3, 0, 4};
+ vector<uint8_t> is_valid = {1, 0, 1};
+
+ Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());
+
+ ASSERT_OK(builder_->Reserve(lengths.size()));
+ ASSERT_OK(vb->Reserve(values.size()));
+
+ int pos = 0;
+ for (size_t i = 0; i < lengths.size(); ++i) {
+ ASSERT_OK(builder_->Append(is_valid[i] > 0));
+ for (int j = 0; j < lengths[i]; ++j) {
+ vb->Append(values[pos++]);
+ }
+ }
+
+ Done();
+ ValidateBasicListArray(result_.get(), values, is_valid);
+}
+
+TEST_F(TestListBuilder, BulkAppend) {
+ vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
+ vector<int> lengths = {3, 0, 4};
+ vector<uint8_t> is_valid = {1, 0, 1};
+ vector<int32_t> offsets = {0, 3, 3};
+
+ Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());
+ ASSERT_OK(vb->Reserve(values.size()));
+
+ builder_->Append(offsets.data(), offsets.size(), is_valid.data());
+ for (int32_t value : values) {
+ vb->Append(value);
+ }
+ Done();
+ ValidateBasicListArray(result_.get(), values, is_valid);
+}
+
+TEST_F(TestListBuilder, BulkAppendInvalid) {
+ vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
+ vector<int> lengths = {3, 0, 4};
+ vector<uint8_t> is_null = {0, 1, 0};
+ vector<uint8_t> is_valid = {1, 0, 1};
+ vector<int32_t> offsets = {0, 2, 4}; // should be 0, 3, 3 given the is_null array
+
+ Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());
+ ASSERT_OK(vb->Reserve(values.size()));
+
+ builder_->Append(offsets.data(), offsets.size(), is_valid.data());
+ builder_->Append(offsets.data(), offsets.size(), is_valid.data());
+ for (int32_t value : values) {
+ vb->Append(value);
+ }
+
+ Done();
+ ASSERT_RAISES(Invalid, result_->Validate());
+}
+
+TEST_F(TestListBuilder, TestZeroLength) {
+ // All buffers are null
+ Done();
+ ASSERT_OK(result_->Validate());
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array-primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-primitive-test.cc b/cpp/src/arrow/array-primitive-test.cc
new file mode 100644
index 0000000..a10e240
--- /dev/null
+++ b/cpp/src/arrow/array-primitive-test.cc
@@ -0,0 +1,476 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/builder.h"
+#include "arrow/status.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit-util.h"
+
+using std::string;
+using std::shared_ptr;
+using std::unique_ptr;
+using std::vector;
+
+namespace arrow {
+
+class Array;
+
+#define PRIMITIVE_TEST(KLASS, ENUM, NAME) \
+ TEST(TypesTest, TestPrimitive_##ENUM) { \
+ KLASS tp; \
+ \
+ ASSERT_EQ(tp.type, Type::ENUM); \
+ ASSERT_EQ(tp.ToString(), string(NAME)); \
+ \
+ KLASS tp_copy = tp; \
+ ASSERT_EQ(tp_copy.type, Type::ENUM); \
+ }
+
+PRIMITIVE_TEST(Int8Type, INT8, "int8");
+PRIMITIVE_TEST(Int16Type, INT16, "int16");
+PRIMITIVE_TEST(Int32Type, INT32, "int32");
+PRIMITIVE_TEST(Int64Type, INT64, "int64");
+PRIMITIVE_TEST(UInt8Type, UINT8, "uint8");
+PRIMITIVE_TEST(UInt16Type, UINT16, "uint16");
+PRIMITIVE_TEST(UInt32Type, UINT32, "uint32");
+PRIMITIVE_TEST(UInt64Type, UINT64, "uint64");
+
+PRIMITIVE_TEST(FloatType, FLOAT, "float");
+PRIMITIVE_TEST(DoubleType, DOUBLE, "double");
+
+PRIMITIVE_TEST(BooleanType, BOOL, "bool");
+
+// ----------------------------------------------------------------------
+// Primitive type tests
+
+TEST_F(TestBuilder, TestReserve) {
+ builder_->Init(10);
+ ASSERT_EQ(2, builder_->null_bitmap()->size());
+
+ builder_->Reserve(30);
+ ASSERT_EQ(4, builder_->null_bitmap()->size());
+}
+
+template <typename Attrs>
+class TestPrimitiveBuilder : public TestBuilder {
+ public:
+ typedef typename Attrs::ArrayType ArrayType;
+ typedef typename Attrs::BuilderType BuilderType;
+ typedef typename Attrs::T T;
+ typedef typename Attrs::Type Type;
+
+ virtual void SetUp() {
+ TestBuilder::SetUp();
+
+ type_ = Attrs::type();
+
+ std::shared_ptr<ArrayBuilder> tmp;
+ ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+ builder_ = std::dynamic_pointer_cast<BuilderType>(tmp);
+
+ ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+ builder_nn_ = std::dynamic_pointer_cast<BuilderType>(tmp);
+ }
+
+ void RandomData(int N, double pct_null = 0.1) {
+ Attrs::draw(N, &draws_);
+
+ valid_bytes_.resize(N);
+ test::random_null_bytes(N, pct_null, valid_bytes_.data());
+ }
+
+ void Check(const std::shared_ptr<BuilderType>& builder, bool nullable) {
+ int size = builder->length();
+
+ auto ex_data = std::make_shared<Buffer>(
+ reinterpret_cast<uint8_t*>(draws_.data()), size * sizeof(T));
+
+ std::shared_ptr<Buffer> ex_null_bitmap;
+ int32_t ex_null_count = 0;
+
+ if (nullable) {
+ ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_);
+ ex_null_count = test::null_count(valid_bytes_);
+ } else {
+ ex_null_bitmap = nullptr;
+ }
+
+ auto expected =
+ std::make_shared<ArrayType>(size, ex_data, ex_null_count, ex_null_bitmap);
+
+ std::shared_ptr<Array> out;
+ ASSERT_OK(builder->Finish(&out));
+
+ std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(out);
+
+ // Builder is now reset
+ ASSERT_EQ(0, builder->length());
+ ASSERT_EQ(0, builder->capacity());
+ ASSERT_EQ(0, builder->null_count());
+ ASSERT_EQ(nullptr, builder->data());
+
+ ASSERT_EQ(ex_null_count, result->null_count());
+ ASSERT_TRUE(result->EqualsExact(*expected.get()));
+ }
+
+ protected:
+ std::shared_ptr<DataType> type_;
+ shared_ptr<BuilderType> builder_;
+ shared_ptr<BuilderType> builder_nn_;
+
+ vector<T> draws_;
+ vector<uint8_t> valid_bytes_;
+};
+
+#define PTYPE_DECL(CapType, c_type) \
+ typedef CapType##Array ArrayType; \
+ typedef CapType##Builder BuilderType; \
+ typedef CapType##Type Type; \
+ typedef c_type T; \
+ \
+ static std::shared_ptr<DataType> type() { \
+ return std::shared_ptr<DataType>(new Type()); \
+ }
+
+#define PINT_DECL(CapType, c_type, LOWER, UPPER) \
+ struct P##CapType { \
+ PTYPE_DECL(CapType, c_type); \
+ static void draw(int N, vector<T>* draws) { \
+ test::randint<T>(N, LOWER, UPPER, draws); \
+ } \
+ }
+
+#define PFLOAT_DECL(CapType, c_type, LOWER, UPPER) \
+ struct P##CapType { \
+ PTYPE_DECL(CapType, c_type); \
+ static void draw(int N, vector<T>* draws) { \
+ test::random_real<T>(N, 0, LOWER, UPPER, draws); \
+ } \
+ }
+
+PINT_DECL(UInt8, uint8_t, 0, UINT8_MAX);
+PINT_DECL(UInt16, uint16_t, 0, UINT16_MAX);
+PINT_DECL(UInt32, uint32_t, 0, UINT32_MAX);
+PINT_DECL(UInt64, uint64_t, 0, UINT64_MAX);
+
+PINT_DECL(Int8, int8_t, INT8_MIN, INT8_MAX);
+PINT_DECL(Int16, int16_t, INT16_MIN, INT16_MAX);
+PINT_DECL(Int32, int32_t, INT32_MIN, INT32_MAX);
+PINT_DECL(Int64, int64_t, INT64_MIN, INT64_MAX);
+
+PFLOAT_DECL(Float, float, -1000, 1000);
+PFLOAT_DECL(Double, double, -1000, 1000);
+
+struct PBoolean {
+ PTYPE_DECL(Boolean, uint8_t);
+};
+
+template <>
+void TestPrimitiveBuilder<PBoolean>::RandomData(int N, double pct_null) {
+ draws_.resize(N);
+ valid_bytes_.resize(N);
+
+ test::random_null_bytes(N, 0.5, draws_.data());
+ test::random_null_bytes(N, pct_null, valid_bytes_.data());
+}
+
+template <>
+void TestPrimitiveBuilder<PBoolean>::Check(
+ const std::shared_ptr<BooleanBuilder>& builder, bool nullable) {
+ int size = builder->length();
+
+ auto ex_data = test::bytes_to_null_buffer(draws_);
+
+ std::shared_ptr<Buffer> ex_null_bitmap;
+ int32_t ex_null_count = 0;
+
+ if (nullable) {
+ ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_);
+ ex_null_count = test::null_count(valid_bytes_);
+ } else {
+ ex_null_bitmap = nullptr;
+ }
+
+ auto expected =
+ std::make_shared<BooleanArray>(size, ex_data, ex_null_count, ex_null_bitmap);
+
+ std::shared_ptr<Array> out;
+ ASSERT_OK(builder->Finish(&out));
+ std::shared_ptr<BooleanArray> result = std::dynamic_pointer_cast<BooleanArray>(out);
+
+ // Builder is now reset
+ ASSERT_EQ(0, builder->length());
+ ASSERT_EQ(0, builder->capacity());
+ ASSERT_EQ(0, builder->null_count());
+ ASSERT_EQ(nullptr, builder->data());
+
+ ASSERT_EQ(ex_null_count, result->null_count());
+
+ ASSERT_EQ(expected->length(), result->length());
+
+ for (int i = 0; i < result->length(); ++i) {
+ if (nullable) { ASSERT_EQ(valid_bytes_[i] == 0, result->IsNull(i)) << i; }
+ bool actual = BitUtil::GetBit(result->raw_data(), i);
+ ASSERT_EQ(static_cast<bool>(draws_[i]), actual) << i;
+ }
+ ASSERT_TRUE(result->EqualsExact(*expected.get()));
+}
+
+typedef ::testing::Types<PBoolean, PUInt8, PUInt16, PUInt32, PUInt64, PInt8, PInt16,
+ PInt32, PInt64, PFloat, PDouble>
+ Primitives;
+
+TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives);
+
+#define DECL_T() typedef typename TestFixture::T T;
+
+#define DECL_TYPE() typedef typename TestFixture::Type Type;
+
+#define DECL_ARRAYTYPE() typedef typename TestFixture::ArrayType ArrayType;
+
+TYPED_TEST(TestPrimitiveBuilder, TestInit) {
+ DECL_TYPE();
+
+ int n = 1000;
+ ASSERT_OK(this->builder_->Reserve(n));
+ ASSERT_EQ(BitUtil::NextPower2(n), this->builder_->capacity());
+ ASSERT_EQ(BitUtil::NextPower2(TypeTraits<Type>::bytes_required(n)),
+ this->builder_->data()->size());
+
+ // unsure if this should go in all builder classes
+ ASSERT_EQ(0, this->builder_->num_children());
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) {
+ int size = 1000;
+ for (int i = 0; i < size; ++i) {
+ ASSERT_OK(this->builder_->AppendNull());
+ }
+
+ std::shared_ptr<Array> result;
+ ASSERT_OK(this->builder_->Finish(&result));
+
+ for (int i = 0; i < size; ++i) {
+ ASSERT_TRUE(result->IsNull(i)) << i;
+ }
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
+ DECL_T();
+
+ int size = 1000;
+
+ vector<T>& draws = this->draws_;
+ vector<uint8_t>& valid_bytes = this->valid_bytes_;
+
+ int64_t memory_before = this->pool_->bytes_allocated();
+
+ this->RandomData(size);
+
+ this->builder_->Reserve(size);
+
+ int i;
+ for (i = 0; i < size; ++i) {
+ if (valid_bytes[i] > 0) {
+ this->builder_->Append(draws[i]);
+ } else {
+ this->builder_->AppendNull();
+ }
+ }
+
+ do {
+ std::shared_ptr<Array> result;
+ ASSERT_OK(this->builder_->Finish(&result));
+ } while (false);
+
+ ASSERT_EQ(memory_before, this->pool_->bytes_allocated());
+}
+
+TYPED_TEST(TestPrimitiveBuilder, Equality) {
+ DECL_T();
+
+ const int size = 1000;
+ this->RandomData(size);
+ vector<T>& draws = this->draws_;
+ vector<uint8_t>& valid_bytes = this->valid_bytes_;
+ ArrayPtr array, equal_array, unequal_array;
+ auto builder = this->builder_.get();
+ ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &array));
+ ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &equal_array));
+
+ // Make the not equal array by negating the first valid element with itself.
+ const auto first_valid = std::find_if(
+ valid_bytes.begin(), valid_bytes.end(), [](uint8_t valid) { return valid > 0; });
+ const int first_valid_idx = std::distance(valid_bytes.begin(), first_valid);
+ // This should be true with a very high probability, but might introduce flakiness
+ ASSERT_LT(first_valid_idx, size - 1);
+ draws[first_valid_idx] = ~*reinterpret_cast<int64_t*>(&draws[first_valid_idx]);
+ ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &unequal_array));
+
+ // test normal equality
+ EXPECT_TRUE(array->Equals(array));
+ EXPECT_TRUE(array->Equals(equal_array));
+ EXPECT_TRUE(equal_array->Equals(array));
+ EXPECT_FALSE(equal_array->Equals(unequal_array));
+ EXPECT_FALSE(unequal_array->Equals(equal_array));
+
+ // Test range equality
+ EXPECT_FALSE(array->RangeEquals(0, first_valid_idx + 1, 0, unequal_array));
+ EXPECT_FALSE(array->RangeEquals(first_valid_idx, size, first_valid_idx, unequal_array));
+ EXPECT_TRUE(array->RangeEquals(0, first_valid_idx, 0, unequal_array));
+ EXPECT_TRUE(
+ array->RangeEquals(first_valid_idx + 1, size, first_valid_idx + 1, unequal_array));
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
+ DECL_T();
+
+ const int size = 10000;
+
+ vector<T>& draws = this->draws_;
+ vector<uint8_t>& valid_bytes = this->valid_bytes_;
+
+ this->RandomData(size);
+
+ this->builder_->Reserve(1000);
+ this->builder_nn_->Reserve(1000);
+
+ int i;
+ int null_count = 0;
+ // Append the first 1000
+ for (i = 0; i < 1000; ++i) {
+ if (valid_bytes[i] > 0) {
+ this->builder_->Append(draws[i]);
+ } else {
+ this->builder_->AppendNull();
+ ++null_count;
+ }
+ this->builder_nn_->Append(draws[i]);
+ }
+
+ ASSERT_EQ(null_count, this->builder_->null_count());
+
+ ASSERT_EQ(1000, this->builder_->length());
+ ASSERT_EQ(1024, this->builder_->capacity());
+
+ ASSERT_EQ(1000, this->builder_nn_->length());
+ ASSERT_EQ(1024, this->builder_nn_->capacity());
+
+ this->builder_->Reserve(size - 1000);
+ this->builder_nn_->Reserve(size - 1000);
+
+ // Append the next 9000
+ for (i = 1000; i < size; ++i) {
+ if (valid_bytes[i] > 0) {
+ this->builder_->Append(draws[i]);
+ } else {
+ this->builder_->AppendNull();
+ }
+ this->builder_nn_->Append(draws[i]);
+ }
+
+ ASSERT_EQ(size, this->builder_->length());
+ ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
+
+ ASSERT_EQ(size, this->builder_nn_->length());
+ ASSERT_EQ(BitUtil::NextPower2(size), this->builder_nn_->capacity());
+
+ this->Check(this->builder_, true);
+ this->Check(this->builder_nn_, false);
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestAppendVector) {
+ DECL_T();
+
+ int size = 10000;
+ this->RandomData(size);
+
+ vector<T>& draws = this->draws_;
+ vector<uint8_t>& valid_bytes = this->valid_bytes_;
+
+ // first slug
+ int K = 1000;
+
+ ASSERT_OK(this->builder_->Append(draws.data(), K, valid_bytes.data()));
+ ASSERT_OK(this->builder_nn_->Append(draws.data(), K));
+
+ ASSERT_EQ(1000, this->builder_->length());
+ ASSERT_EQ(1024, this->builder_->capacity());
+
+ ASSERT_EQ(1000, this->builder_nn_->length());
+ ASSERT_EQ(1024, this->builder_nn_->capacity());
+
+ // Append the next 9000
+ ASSERT_OK(this->builder_->Append(draws.data() + K, size - K, valid_bytes.data() + K));
+ ASSERT_OK(this->builder_nn_->Append(draws.data() + K, size - K));
+
+ ASSERT_EQ(size, this->builder_->length());
+ ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
+
+ this->Check(this->builder_, true);
+ this->Check(this->builder_nn_, false);
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestAdvance) {
+ int n = 1000;
+ ASSERT_OK(this->builder_->Reserve(n));
+
+ ASSERT_OK(this->builder_->Advance(100));
+ ASSERT_EQ(100, this->builder_->length());
+
+ ASSERT_OK(this->builder_->Advance(900));
+
+ int too_many = this->builder_->capacity() - 1000 + 1;
+ ASSERT_RAISES(Invalid, this->builder_->Advance(too_many));
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestResize) {
+ DECL_TYPE();
+
+ int cap = kMinBuilderCapacity * 2;
+
+ ASSERT_OK(this->builder_->Reserve(cap));
+ ASSERT_EQ(cap, this->builder_->capacity());
+
+ ASSERT_EQ(TypeTraits<Type>::bytes_required(cap), this->builder_->data()->size());
+ ASSERT_EQ(BitUtil::BytesForBits(cap), this->builder_->null_bitmap()->size());
+}
+
+TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
+ ASSERT_OK(this->builder_->Reserve(10));
+ ASSERT_EQ(0, this->builder_->length());
+ ASSERT_EQ(kMinBuilderCapacity, this->builder_->capacity());
+
+ ASSERT_OK(this->builder_->Reserve(90));
+ ASSERT_OK(this->builder_->Advance(100));
+ ASSERT_OK(this->builder_->Reserve(kMinBuilderCapacity));
+
+ ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), this->builder_->capacity());
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array-string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-string-test.cc b/cpp/src/arrow/array-string-test.cc
new file mode 100644
index 0000000..b144c63
--- /dev/null
+++ b/cpp/src/arrow/array-string-test.cc
@@ -0,0 +1,358 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdlib>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+
+namespace arrow {
+
+class Buffer;
+
+TEST(TypesTest, BinaryType) {
+ BinaryType t1;
+ BinaryType e1;
+ StringType t2;
+ EXPECT_TRUE(t1.Equals(&e1));
+ EXPECT_FALSE(t1.Equals(&t2));
+ ASSERT_EQ(t1.type, Type::BINARY);
+ ASSERT_EQ(t1.ToString(), std::string("binary"));
+}
+
+TEST(TypesTest, TestStringType) {
+ StringType str;
+ ASSERT_EQ(str.type, Type::STRING);
+ ASSERT_EQ(str.ToString(), std::string("string"));
+}
+
+// ----------------------------------------------------------------------
+// String container
+
+class TestStringContainer : public ::testing::Test {
+ public:
+ void SetUp() {
+ chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
+ offsets_ = {0, 1, 1, 1, 3, 6};
+ valid_bytes_ = {1, 1, 0, 1, 1};
+ expected_ = {"a", "", "", "bb", "ccc"};
+
+ MakeArray();
+ }
+
+ void MakeArray() {
+ length_ = offsets_.size() - 1;
+ value_buf_ = test::GetBufferFromVector(chars_);
+ offsets_buf_ = test::GetBufferFromVector(offsets_);
+ null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
+ null_count_ = test::null_count(valid_bytes_);
+
+ strings_ = std::make_shared<StringArray>(
+ length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
+ }
+
+ protected:
+ std::vector<int32_t> offsets_;
+ std::vector<char> chars_;
+ std::vector<uint8_t> valid_bytes_;
+
+ std::vector<std::string> expected_;
+
+ std::shared_ptr<Buffer> value_buf_;
+ std::shared_ptr<Buffer> offsets_buf_;
+ std::shared_ptr<Buffer> null_bitmap_;
+
+ int null_count_;
+ int length_;
+
+ std::shared_ptr<StringArray> strings_;
+};
+
+TEST_F(TestStringContainer, TestArrayBasics) {
+ ASSERT_EQ(length_, strings_->length());
+ ASSERT_EQ(1, strings_->null_count());
+ ASSERT_OK(strings_->Validate());
+}
+
+TEST_F(TestStringContainer, TestType) {
+ TypePtr type = strings_->type();
+
+ ASSERT_EQ(Type::STRING, type->type);
+ ASSERT_EQ(Type::STRING, strings_->type_enum());
+}
+
+TEST_F(TestStringContainer, TestListFunctions) {
+ int pos = 0;
+ for (size_t i = 0; i < expected_.size(); ++i) {
+ ASSERT_EQ(pos, strings_->value_offset(i));
+ ASSERT_EQ(static_cast<int>(expected_[i].size()), strings_->value_length(i));
+ pos += expected_[i].size();
+ }
+}
+
+TEST_F(TestStringContainer, TestDestructor) {
+ auto arr = std::make_shared<StringArray>(
+ length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
+}
+
+TEST_F(TestStringContainer, TestGetString) {
+ for (size_t i = 0; i < expected_.size(); ++i) {
+ if (valid_bytes_[i] == 0) {
+ ASSERT_TRUE(strings_->IsNull(i));
+ } else {
+ ASSERT_EQ(expected_[i], strings_->GetString(i));
+ }
+ }
+}
+
+TEST_F(TestStringContainer, TestEmptyStringComparison) {
+ offsets_ = {0, 0, 0, 0, 0, 0};
+ offsets_buf_ = test::GetBufferFromVector(offsets_);
+ length_ = offsets_.size() - 1;
+
+ auto strings_a = std::make_shared<StringArray>(
+ length_, offsets_buf_, nullptr, null_count_, null_bitmap_);
+ auto strings_b = std::make_shared<StringArray>(
+ length_, offsets_buf_, nullptr, null_count_, null_bitmap_);
+ ASSERT_TRUE(strings_a->Equals(strings_b));
+}
+
+// ----------------------------------------------------------------------
+// String builder tests
+
+class TestStringBuilder : public TestBuilder {
+ public:
+ void SetUp() {
+ TestBuilder::SetUp();
+ type_ = TypePtr(new StringType());
+ builder_.reset(new StringBuilder(pool_, type_));
+ }
+
+ void Done() {
+ std::shared_ptr<Array> out;
+ EXPECT_OK(builder_->Finish(&out));
+
+ result_ = std::dynamic_pointer_cast<StringArray>(out);
+ result_->Validate();
+ }
+
+ protected:
+ TypePtr type_;
+
+ std::unique_ptr<StringBuilder> builder_;
+ std::shared_ptr<StringArray> result_;
+};
+
+TEST_F(TestStringBuilder, TestScalarAppend) {
+ std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
+ std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+
+ int N = strings.size();
+ int reps = 1000;
+
+ for (int j = 0; j < reps; ++j) {
+ for (int i = 0; i < N; ++i) {
+ if (is_null[i]) {
+ builder_->AppendNull();
+ } else {
+ builder_->Append(strings[i]);
+ }
+ }
+ }
+ Done();
+
+ ASSERT_EQ(reps * N, result_->length());
+ ASSERT_EQ(reps, result_->null_count());
+ ASSERT_EQ(reps * 6, result_->data()->size());
+
+ int32_t length;
+ int32_t pos = 0;
+ for (int i = 0; i < N * reps; ++i) {
+ if (is_null[i % N]) {
+ ASSERT_TRUE(result_->IsNull(i));
+ } else {
+ ASSERT_FALSE(result_->IsNull(i));
+ result_->GetValue(i, &length);
+ ASSERT_EQ(pos, result_->offset(i));
+ ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
+ ASSERT_EQ(strings[i % N], result_->GetString(i));
+
+ pos += length;
+ }
+ }
+}
+
+TEST_F(TestStringBuilder, TestZeroLength) {
+ // All buffers are null
+ Done();
+}
+
+// Binary container type
+// TODO(emkornfield) there should be some way to refactor these to avoid code duplicating
+// with String
+class TestBinaryContainer : public ::testing::Test {
+ public:
+ void SetUp() {
+ chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
+ offsets_ = {0, 1, 1, 1, 3, 6};
+ valid_bytes_ = {1, 1, 0, 1, 1};
+ expected_ = {"a", "", "", "bb", "ccc"};
+
+ MakeArray();
+ }
+
+ void MakeArray() {
+ length_ = offsets_.size() - 1;
+ value_buf_ = test::GetBufferFromVector(chars_);
+ offsets_buf_ = test::GetBufferFromVector(offsets_);
+
+ null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
+ null_count_ = test::null_count(valid_bytes_);
+
+ strings_ = std::make_shared<BinaryArray>(
+ length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
+ }
+
+ protected:
+ std::vector<int32_t> offsets_;
+ std::vector<char> chars_;
+ std::vector<uint8_t> valid_bytes_;
+
+ std::vector<std::string> expected_;
+
+ std::shared_ptr<Buffer> value_buf_;
+ std::shared_ptr<Buffer> offsets_buf_;
+ std::shared_ptr<Buffer> null_bitmap_;
+
+ int null_count_;
+ int length_;
+
+ std::shared_ptr<BinaryArray> strings_;
+};
+
+TEST_F(TestBinaryContainer, TestArrayBasics) {
+ ASSERT_EQ(length_, strings_->length());
+ ASSERT_EQ(1, strings_->null_count());
+ ASSERT_OK(strings_->Validate());
+}
+
+TEST_F(TestBinaryContainer, TestType) {
+ TypePtr type = strings_->type();
+
+ ASSERT_EQ(Type::BINARY, type->type);
+ ASSERT_EQ(Type::BINARY, strings_->type_enum());
+}
+
+TEST_F(TestBinaryContainer, TestListFunctions) {
+ int pos = 0;
+ for (size_t i = 0; i < expected_.size(); ++i) {
+ ASSERT_EQ(pos, strings_->value_offset(i));
+ ASSERT_EQ(static_cast<int>(expected_[i].size()), strings_->value_length(i));
+ pos += expected_[i].size();
+ }
+}
+
+TEST_F(TestBinaryContainer, TestDestructor) {
+ auto arr = std::make_shared<BinaryArray>(
+ length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
+}
+
+TEST_F(TestBinaryContainer, TestGetValue) {
+ for (size_t i = 0; i < expected_.size(); ++i) {
+ if (valid_bytes_[i] == 0) {
+ ASSERT_TRUE(strings_->IsNull(i));
+ } else {
+ int32_t len = -1;
+ const uint8_t* bytes = strings_->GetValue(i, &len);
+ ASSERT_EQ(0, std::memcmp(expected_[i].data(), bytes, len));
+ }
+ }
+}
+
+class TestBinaryBuilder : public TestBuilder {
+ public:
+ void SetUp() {
+ TestBuilder::SetUp();
+ type_ = TypePtr(new BinaryType());
+ builder_.reset(new BinaryBuilder(pool_, type_));
+ }
+
+ void Done() {
+ std::shared_ptr<Array> out;
+ EXPECT_OK(builder_->Finish(&out));
+
+ result_ = std::dynamic_pointer_cast<BinaryArray>(out);
+ result_->Validate();
+ }
+
+ protected:
+ TypePtr type_;
+
+ std::unique_ptr<BinaryBuilder> builder_;
+ std::shared_ptr<BinaryArray> result_;
+};
+
+TEST_F(TestBinaryBuilder, TestScalarAppend) {
+ std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
+ std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+
+ int N = strings.size();
+ int reps = 1000;
+
+ for (int j = 0; j < reps; ++j) {
+ for (int i = 0; i < N; ++i) {
+ if (is_null[i]) {
+ builder_->AppendNull();
+ } else {
+ builder_->Append(
+ reinterpret_cast<const uint8_t*>(strings[i].data()), strings[i].size());
+ }
+ }
+ }
+ Done();
+ ASSERT_OK(result_->Validate());
+ ASSERT_EQ(reps * N, result_->length());
+ ASSERT_EQ(reps, result_->null_count());
+ ASSERT_EQ(reps * 6, result_->data()->size());
+
+ int32_t length;
+ for (int i = 0; i < N * reps; ++i) {
+ if (is_null[i % N]) {
+ ASSERT_TRUE(result_->IsNull(i));
+ } else {
+ ASSERT_FALSE(result_->IsNull(i));
+ const uint8_t* vals = result_->GetValue(i, &length);
+ ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
+ ASSERT_EQ(0, std::memcmp(vals, strings[i % N].data(), length));
+ }
+ }
+}
+
+TEST_F(TestBinaryBuilder, TestZeroLength) {
+ // All buffers are null
+ Done();
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array-struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-struct-test.cc b/cpp/src/arrow/array-struct-test.cc
new file mode 100644
index 0000000..58386fe
--- /dev/null
+++ b/cpp/src/arrow/array-struct-test.cc
@@ -0,0 +1,391 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/status.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+
+using std::shared_ptr;
+using std::string;
+using std::vector;
+
+namespace arrow {
+
+TEST(TestStructType, Basics) {
+ TypePtr f0_type = TypePtr(new Int32Type());
+ auto f0 = std::make_shared<Field>("f0", f0_type);
+
+ TypePtr f1_type = TypePtr(new StringType());
+ auto f1 = std::make_shared<Field>("f1", f1_type);
+
+ TypePtr f2_type = TypePtr(new UInt8Type());
+ auto f2 = std::make_shared<Field>("f2", f2_type);
+
+ vector<shared_ptr<Field>> fields = {f0, f1, f2};
+
+ StructType struct_type(fields);
+
+ ASSERT_TRUE(struct_type.child(0)->Equals(f0));
+ ASSERT_TRUE(struct_type.child(1)->Equals(f1));
+ ASSERT_TRUE(struct_type.child(2)->Equals(f2));
+
+ ASSERT_EQ(struct_type.ToString(), "struct<f0: int32, f1: string, f2: uint8>");
+
+ // TODO(wesm): out of bounds for field(...)
+}
+
+void ValidateBasicStructArray(const StructArray* result,
+ const vector<uint8_t>& struct_is_valid, const vector<char>& list_values,
+ const vector<uint8_t>& list_is_valid, const vector<int>& list_lengths,
+ const vector<int>& list_offsets, const vector<int32_t>& int_values) {
+ ASSERT_EQ(4, result->length());
+ ASSERT_OK(result->Validate());
+
+ auto list_char_arr = static_cast<ListArray*>(result->field(0).get());
+ auto char_arr = static_cast<Int8Array*>(list_char_arr->values().get());
+ auto int32_arr = static_cast<Int32Array*>(result->field(1).get());
+
+ ASSERT_EQ(0, result->null_count());
+ ASSERT_EQ(1, list_char_arr->null_count());
+ ASSERT_EQ(0, int32_arr->null_count());
+
+ // List<char>
+ ASSERT_EQ(4, list_char_arr->length());
+ ASSERT_EQ(10, list_char_arr->values()->length());
+ for (size_t i = 0; i < list_offsets.size(); ++i) {
+ ASSERT_EQ(list_offsets[i], list_char_arr->raw_offsets()[i]);
+ }
+ for (size_t i = 0; i < list_values.size(); ++i) {
+ ASSERT_EQ(list_values[i], char_arr->Value(i));
+ }
+
+ // Int32
+ ASSERT_EQ(4, int32_arr->length());
+ for (size_t i = 0; i < int_values.size(); ++i) {
+ ASSERT_EQ(int_values[i], int32_arr->Value(i));
+ }
+}
+
+// ----------------------------------------------------------------------------------
+// Struct test
+class TestStructBuilder : public TestBuilder {
+ public:
+ void SetUp() {
+ TestBuilder::SetUp();
+
+ auto int32_type = TypePtr(new Int32Type());
+ auto char_type = TypePtr(new Int8Type());
+ auto list_type = TypePtr(new ListType(char_type));
+
+ std::vector<TypePtr> types = {list_type, int32_type};
+ std::vector<FieldPtr> fields;
+ fields.push_back(FieldPtr(new Field("list", list_type)));
+ fields.push_back(FieldPtr(new Field("int", int32_type)));
+
+ type_ = TypePtr(new StructType(fields));
+ value_fields_ = fields;
+
+ std::shared_ptr<ArrayBuilder> tmp;
+ ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+
+ builder_ = std::dynamic_pointer_cast<StructBuilder>(tmp);
+ ASSERT_EQ(2, static_cast<int>(builder_->field_builders().size()));
+ }
+
+ void Done() {
+ std::shared_ptr<Array> out;
+ ASSERT_OK(builder_->Finish(&out));
+ result_ = std::dynamic_pointer_cast<StructArray>(out);
+ }
+
+ protected:
+ std::vector<FieldPtr> value_fields_;
+ TypePtr type_;
+
+ std::shared_ptr<StructBuilder> builder_;
+ std::shared_ptr<StructArray> result_;
+};
+
+TEST_F(TestStructBuilder, TestAppendNull) {
+ ASSERT_OK(builder_->AppendNull());
+ ASSERT_OK(builder_->AppendNull());
+ ASSERT_EQ(2, static_cast<int>(builder_->field_builders().size()));
+
+ ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+ ASSERT_OK(list_vb->AppendNull());
+ ASSERT_OK(list_vb->AppendNull());
+ ASSERT_EQ(2, list_vb->length());
+
+ Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+ ASSERT_OK(int_vb->AppendNull());
+ ASSERT_OK(int_vb->AppendNull());
+ ASSERT_EQ(2, int_vb->length());
+
+ Done();
+
+ ASSERT_OK(result_->Validate());
+
+ ASSERT_EQ(2, static_cast<int>(result_->fields().size()));
+ ASSERT_EQ(2, result_->length());
+ ASSERT_EQ(2, result_->field(0)->length());
+ ASSERT_EQ(2, result_->field(1)->length());
+ ASSERT_TRUE(result_->IsNull(0));
+ ASSERT_TRUE(result_->IsNull(1));
+ ASSERT_TRUE(result_->field(0)->IsNull(0));
+ ASSERT_TRUE(result_->field(0)->IsNull(1));
+ ASSERT_TRUE(result_->field(1)->IsNull(0));
+ ASSERT_TRUE(result_->field(1)->IsNull(1));
+
+ ASSERT_EQ(Type::LIST, result_->field(0)->type_enum());
+ ASSERT_EQ(Type::INT32, result_->field(1)->type_enum());
+}
+
+TEST_F(TestStructBuilder, TestBasics) {
+ vector<int32_t> int_values = {1, 2, 3, 4};
+ vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
+ vector<int> list_lengths = {3, 0, 3, 4};
+ vector<int> list_offsets = {0, 3, 3, 6, 10};
+ vector<uint8_t> list_is_valid = {1, 0, 1, 1};
+ vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
+
+ ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+ Int8Builder* char_vb = static_cast<Int8Builder*>(list_vb->value_builder().get());
+ Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+ ASSERT_EQ(2, static_cast<int>(builder_->field_builders().size()));
+
+ EXPECT_OK(builder_->Resize(list_lengths.size()));
+ EXPECT_OK(char_vb->Resize(list_values.size()));
+ EXPECT_OK(int_vb->Resize(int_values.size()));
+
+ int pos = 0;
+ for (size_t i = 0; i < list_lengths.size(); ++i) {
+ ASSERT_OK(list_vb->Append(list_is_valid[i] > 0));
+ int_vb->UnsafeAppend(int_values[i]);
+ for (int j = 0; j < list_lengths[i]; ++j) {
+ char_vb->UnsafeAppend(list_values[pos++]);
+ }
+ }
+
+ for (size_t i = 0; i < struct_is_valid.size(); ++i) {
+ ASSERT_OK(builder_->Append(struct_is_valid[i] > 0));
+ }
+
+ Done();
+
+ ValidateBasicStructArray(result_.get(), struct_is_valid, list_values, list_is_valid,
+ list_lengths, list_offsets, int_values);
+}
+
+TEST_F(TestStructBuilder, BulkAppend) {
+ vector<int32_t> int_values = {1, 2, 3, 4};
+ vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
+ vector<int> list_lengths = {3, 0, 3, 4};
+ vector<int> list_offsets = {0, 3, 3, 6};
+ vector<uint8_t> list_is_valid = {1, 0, 1, 1};
+ vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
+
+ ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+ Int8Builder* char_vb = static_cast<Int8Builder*>(list_vb->value_builder().get());
+ Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+
+ ASSERT_OK(builder_->Resize(list_lengths.size()));
+ ASSERT_OK(char_vb->Resize(list_values.size()));
+ ASSERT_OK(int_vb->Resize(int_values.size()));
+
+ builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+
+ list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+ for (int8_t value : list_values) {
+ char_vb->UnsafeAppend(value);
+ }
+ for (int32_t value : int_values) {
+ int_vb->UnsafeAppend(value);
+ }
+
+ Done();
+ ValidateBasicStructArray(result_.get(), struct_is_valid, list_values, list_is_valid,
+ list_lengths, list_offsets, int_values);
+}
+
+TEST_F(TestStructBuilder, BulkAppendInvalid) {
+ vector<int32_t> int_values = {1, 2, 3, 4};
+ vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
+ vector<int> list_lengths = {3, 0, 3, 4};
+ vector<int> list_offsets = {0, 3, 3, 6};
+ vector<uint8_t> list_is_valid = {1, 0, 1, 1};
+ vector<uint8_t> struct_is_valid = {1, 0, 1, 1}; // should be 1, 1, 1, 1
+
+ ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+ Int8Builder* char_vb = static_cast<Int8Builder*>(list_vb->value_builder().get());
+ Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+
+ ASSERT_OK(builder_->Reserve(list_lengths.size()));
+ ASSERT_OK(char_vb->Reserve(list_values.size()));
+ ASSERT_OK(int_vb->Reserve(int_values.size()));
+
+ builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+
+ list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+ for (int8_t value : list_values) {
+ char_vb->UnsafeAppend(value);
+ }
+ for (int32_t value : int_values) {
+ int_vb->UnsafeAppend(value);
+ }
+
+ Done();
+ // Even null bitmap of the parent Struct is not valid, Validate() will ignore it.
+ ASSERT_OK(result_->Validate());
+}
+
+TEST_F(TestStructBuilder, TestEquality) {
+ ArrayPtr array, equal_array;
+ ArrayPtr unequal_bitmap_array, unequal_offsets_array, unequal_values_array;
+
+ vector<int32_t> int_values = {1, 2, 3, 4};
+ vector<char> list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'};
+ vector<int> list_lengths = {3, 0, 3, 4};
+ vector<int> list_offsets = {0, 3, 3, 6};
+ vector<uint8_t> list_is_valid = {1, 0, 1, 1};
+ vector<uint8_t> struct_is_valid = {1, 1, 1, 1};
+
+ vector<int32_t> unequal_int_values = {4, 2, 3, 1};
+ vector<char> unequal_list_values = {'j', 'o', 'e', 'b', 'o', 'b', 'l', 'u', 'c', 'y'};
+ vector<int> unequal_list_offsets = {0, 3, 4, 6};
+ vector<uint8_t> unequal_list_is_valid = {1, 1, 1, 1};
+ vector<uint8_t> unequal_struct_is_valid = {1, 0, 0, 1};
+
+ ListBuilder* list_vb = static_cast<ListBuilder*>(builder_->field_builder(0).get());
+ Int8Builder* char_vb = static_cast<Int8Builder*>(list_vb->value_builder().get());
+ Int32Builder* int_vb = static_cast<Int32Builder*>(builder_->field_builder(1).get());
+ ASSERT_OK(builder_->Reserve(list_lengths.size()));
+ ASSERT_OK(char_vb->Reserve(list_values.size()));
+ ASSERT_OK(int_vb->Reserve(int_values.size()));
+
+ // setup two equal arrays, one of which takes an unequal bitmap
+ builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+ list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+ for (int8_t value : list_values) {
+ char_vb->UnsafeAppend(value);
+ }
+ for (int32_t value : int_values) {
+ int_vb->UnsafeAppend(value);
+ }
+
+ ASSERT_OK(builder_->Finish(&array));
+
+ ASSERT_OK(builder_->Resize(list_lengths.size()));
+ ASSERT_OK(char_vb->Resize(list_values.size()));
+ ASSERT_OK(int_vb->Resize(int_values.size()));
+
+ builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+ list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+ for (int8_t value : list_values) {
+ char_vb->UnsafeAppend(value);
+ }
+ for (int32_t value : int_values) {
+ int_vb->UnsafeAppend(value);
+ }
+
+ ASSERT_OK(builder_->Finish(&equal_array));
+
+ ASSERT_OK(builder_->Resize(list_lengths.size()));
+ ASSERT_OK(char_vb->Resize(list_values.size()));
+ ASSERT_OK(int_vb->Resize(int_values.size()));
+
+ // setup an unequal one with the unequal bitmap
+ builder_->Append(unequal_struct_is_valid.size(), unequal_struct_is_valid.data());
+ list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+ for (int8_t value : list_values) {
+ char_vb->UnsafeAppend(value);
+ }
+ for (int32_t value : int_values) {
+ int_vb->UnsafeAppend(value);
+ }
+
+ ASSERT_OK(builder_->Finish(&unequal_bitmap_array));
+
+ ASSERT_OK(builder_->Resize(list_lengths.size()));
+ ASSERT_OK(char_vb->Resize(list_values.size()));
+ ASSERT_OK(int_vb->Resize(int_values.size()));
+
+ // setup an unequal one with unequal offsets
+ builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+ list_vb->Append(unequal_list_offsets.data(), unequal_list_offsets.size(),
+ unequal_list_is_valid.data());
+ for (int8_t value : list_values) {
+ char_vb->UnsafeAppend(value);
+ }
+ for (int32_t value : int_values) {
+ int_vb->UnsafeAppend(value);
+ }
+
+ ASSERT_OK(builder_->Finish(&unequal_offsets_array));
+
+ ASSERT_OK(builder_->Resize(list_lengths.size()));
+ ASSERT_OK(char_vb->Resize(list_values.size()));
+ ASSERT_OK(int_vb->Resize(int_values.size()));
+
+ // setup anunequal one with unequal values
+ builder_->Append(struct_is_valid.size(), struct_is_valid.data());
+ list_vb->Append(list_offsets.data(), list_offsets.size(), list_is_valid.data());
+ for (int8_t value : unequal_list_values) {
+ char_vb->UnsafeAppend(value);
+ }
+ for (int32_t value : unequal_int_values) {
+ int_vb->UnsafeAppend(value);
+ }
+
+ ASSERT_OK(builder_->Finish(&unequal_values_array));
+
+ // Test array equality
+ EXPECT_TRUE(array->Equals(array));
+ EXPECT_TRUE(array->Equals(equal_array));
+ EXPECT_TRUE(equal_array->Equals(array));
+ EXPECT_FALSE(equal_array->Equals(unequal_bitmap_array));
+ EXPECT_FALSE(unequal_bitmap_array->Equals(equal_array));
+ EXPECT_FALSE(unequal_bitmap_array->Equals(unequal_values_array));
+ EXPECT_FALSE(unequal_values_array->Equals(unequal_bitmap_array));
+ EXPECT_FALSE(unequal_bitmap_array->Equals(unequal_offsets_array));
+ EXPECT_FALSE(unequal_offsets_array->Equals(unequal_bitmap_array));
+
+ // Test range equality
+ EXPECT_TRUE(array->RangeEquals(0, 4, 0, equal_array));
+ EXPECT_TRUE(array->RangeEquals(3, 4, 3, unequal_bitmap_array));
+ EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_offsets_array));
+ EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_offsets_array));
+ EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_offsets_array));
+ EXPECT_FALSE(array->RangeEquals(0, 1, 0, unequal_values_array));
+ EXPECT_TRUE(array->RangeEquals(1, 3, 1, unequal_values_array));
+ EXPECT_FALSE(array->RangeEquals(3, 4, 3, unequal_values_array));
+}
+
+TEST_F(TestStructBuilder, TestZeroLength) {
+ // All buffers are null
+ Done();
+ ASSERT_OK(result_->Validate());
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 1581244..783104e 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -24,11 +24,10 @@
#include "gtest/gtest.h"
#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
#include "arrow/test-util.h"
#include "arrow/type.h"
-#include "arrow/types/primitive.h"
-#include "arrow/util/buffer.h"
-#include "arrow/util/memory-pool.h"
namespace arrow {
http://git-wip-us.apache.org/repos/asf/arrow/blob/2c10d7cc/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 1f0bb66..7ab61f5 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -19,10 +19,13 @@
#include <cstdint>
#include <cstring>
+#include <sstream>
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
-#include "arrow/util/buffer.h"
-#include "arrow/util/status.h"
+#include "arrow/util/logging.h"
namespace arrow {
@@ -85,4 +88,440 @@ Status NullArray::Accept(ArrayVisitor* visitor) const {
return visitor->Visit(*this);
}
+// ----------------------------------------------------------------------
+// Primitive array base
+
+PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length,
+ const std::shared_ptr<Buffer>& data, int32_t null_count,
+ const std::shared_ptr<Buffer>& null_bitmap)
+ : Array(type, length, null_count, null_bitmap) {
+ data_ = data;
+ raw_data_ = data == nullptr ? nullptr : data_->data();
+}
+
+bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {
+ if (this == &other) { return true; }
+ if (null_count_ != other.null_count_) { return false; }
+
+ if (null_count_ > 0) {
+ bool equal_bitmap =
+ null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8);
+ if (!equal_bitmap) { return false; }
+
+ const uint8_t* this_data = raw_data_;
+ const uint8_t* other_data = other.raw_data_;
+
+ auto size_meta = dynamic_cast<const FixedWidthType*>(type_.get());
+ int value_byte_size = size_meta->bit_width() / 8;
+ DCHECK_GT(value_byte_size, 0);
+
+ for (int i = 0; i < length_; ++i) {
+ if (!IsNull(i) && memcmp(this_data, other_data, value_byte_size)) { return false; }
+ this_data += value_byte_size;
+ other_data += value_byte_size;
+ }
+ return true;
+ } else {
+ if (length_ == 0 && other.length_ == 0) { return true; }
+ return data_->Equals(*other.data_, length_);
+ }
+}
+
+bool PrimitiveArray::Equals(const std::shared_ptr<Array>& arr) const {
+ if (this == arr.get()) { return true; }
+ if (!arr) { return false; }
+ if (this->type_enum() != arr->type_enum()) { return false; }
+ return EqualsExact(*static_cast<const PrimitiveArray*>(arr.get()));
+}
+
+template <typename T>
+Status NumericArray<T>::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+template class NumericArray<UInt8Type>;
+template class NumericArray<UInt16Type>;
+template class NumericArray<UInt32Type>;
+template class NumericArray<UInt64Type>;
+template class NumericArray<Int8Type>;
+template class NumericArray<Int16Type>;
+template class NumericArray<Int32Type>;
+template class NumericArray<Int64Type>;
+template class NumericArray<TimestampType>;
+template class NumericArray<HalfFloatType>;
+template class NumericArray<FloatType>;
+template class NumericArray<DoubleType>;
+
+// ----------------------------------------------------------------------
+// BooleanArray
+
+BooleanArray::BooleanArray(int32_t length, const std::shared_ptr<Buffer>& data,
+ int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap)
+ : PrimitiveArray(
+ std::make_shared<BooleanType>(), length, data, null_count, null_bitmap) {}
+
+BooleanArray::BooleanArray(const TypePtr& type, int32_t length,
+ const std::shared_ptr<Buffer>& data, int32_t null_count,
+ const std::shared_ptr<Buffer>& null_bitmap)
+ : PrimitiveArray(type, length, data, null_count, null_bitmap) {}
+
+bool BooleanArray::EqualsExact(const BooleanArray& other) const {
+ if (this == &other) return true;
+ if (null_count_ != other.null_count_) { return false; }
+
+ if (null_count_ > 0) {
+ bool equal_bitmap =
+ null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
+ if (!equal_bitmap) { return false; }
+
+ const uint8_t* this_data = raw_data_;
+ const uint8_t* other_data = other.raw_data_;
+
+ for (int i = 0; i < length_; ++i) {
+ if (!IsNull(i) && BitUtil::GetBit(this_data, i) != BitUtil::GetBit(other_data, i)) {
+ return false;
+ }
+ }
+ return true;
+ } else {
+ return data_->Equals(*other.data_, BitUtil::BytesForBits(length_));
+ }
+}
+
+bool BooleanArray::Equals(const ArrayPtr& arr) const {
+ if (this == arr.get()) return true;
+ if (Type::BOOL != arr->type_enum()) { return false; }
+ return EqualsExact(*static_cast<const BooleanArray*>(arr.get()));
+}
+
+bool BooleanArray::RangeEquals(int32_t start_idx, int32_t end_idx,
+ int32_t other_start_idx, const ArrayPtr& arr) const {
+ if (this == arr.get()) { return true; }
+ if (!arr) { return false; }
+ if (this->type_enum() != arr->type_enum()) { return false; }
+ const auto other = static_cast<BooleanArray*>(arr.get());
+ for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
+ const bool is_null = IsNull(i);
+ if (is_null != arr->IsNull(o_i) || (!is_null && Value(i) != other->Value(o_i))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+Status BooleanArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+// ----------------------------------------------------------------------
+// ListArray
+
+bool ListArray::EqualsExact(const ListArray& other) const {
+ if (this == &other) { return true; }
+ if (null_count_ != other.null_count_) { return false; }
+
+ bool equal_offsets =
+ offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t));
+ if (!equal_offsets) { return false; }
+ bool equal_null_bitmap = true;
+ if (null_count_ > 0) {
+ equal_null_bitmap =
+ null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
+ }
+
+ if (!equal_null_bitmap) { return false; }
+
+ return values()->Equals(other.values());
+}
+
+bool ListArray::Equals(const std::shared_ptr<Array>& arr) const {
+ if (this == arr.get()) { return true; }
+ if (this->type_enum() != arr->type_enum()) { return false; }
+ return EqualsExact(*static_cast<const ListArray*>(arr.get()));
+}
+
+bool ListArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
+ const std::shared_ptr<Array>& arr) const {
+ if (this == arr.get()) { return true; }
+ if (!arr) { return false; }
+ if (this->type_enum() != arr->type_enum()) { return false; }
+ const auto other = static_cast<ListArray*>(arr.get());
+ for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
+ const bool is_null = IsNull(i);
+ if (is_null != arr->IsNull(o_i)) { return false; }
+ if (is_null) continue;
+ const int32_t begin_offset = offset(i);
+ const int32_t end_offset = offset(i + 1);
+ const int32_t other_begin_offset = other->offset(o_i);
+ const int32_t other_end_offset = other->offset(o_i + 1);
+ // Underlying can't be equal if the size isn't equal
+ if (end_offset - begin_offset != other_end_offset - other_begin_offset) {
+ return false;
+ }
+ if (!values_->RangeEquals(
+ begin_offset, end_offset, other_begin_offset, other->values())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+Status ListArray::Validate() const {
+ if (length_ < 0) { return Status::Invalid("Length was negative"); }
+ if (!offset_buffer_) { return Status::Invalid("offset_buffer_ was null"); }
+ if (offset_buffer_->size() / static_cast<int>(sizeof(int32_t)) < length_) {
+ std::stringstream ss;
+ ss << "offset buffer size (bytes): " << offset_buffer_->size()
+ << " isn't large enough for length: " << length_;
+ return Status::Invalid(ss.str());
+ }
+ const int32_t last_offset = offset(length_);
+ if (last_offset > 0) {
+ if (!values_) {
+ return Status::Invalid("last offset was non-zero and values was null");
+ }
+ if (values_->length() != last_offset) {
+ std::stringstream ss;
+ ss << "Final offset invariant not equal to values length: " << last_offset
+ << "!=" << values_->length();
+ return Status::Invalid(ss.str());
+ }
+
+ const Status child_valid = values_->Validate();
+ if (!child_valid.ok()) {
+ std::stringstream ss;
+ ss << "Child array invalid: " << child_valid.ToString();
+ return Status::Invalid(ss.str());
+ }
+ }
+
+ int32_t prev_offset = offset(0);
+ if (prev_offset != 0) { return Status::Invalid("The first offset wasn't zero"); }
+ for (int32_t i = 1; i <= length_; ++i) {
+ int32_t current_offset = offset(i);
+ if (IsNull(i - 1) && current_offset != prev_offset) {
+ std::stringstream ss;
+ ss << "Offset invariant failure at: " << i << " inconsistent offsets for null slot"
+ << current_offset << "!=" << prev_offset;
+ return Status::Invalid(ss.str());
+ }
+ if (current_offset < prev_offset) {
+ std::stringstream ss;
+ ss << "Offset invariant failure: " << i
+ << " inconsistent offset for non-null slot: " << current_offset << "<"
+ << prev_offset;
+ return Status::Invalid(ss.str());
+ }
+ prev_offset = current_offset;
+ }
+ return Status::OK();
+}
+
+Status ListArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+// ----------------------------------------------------------------------
+// String and binary
+
+static std::shared_ptr<DataType> kBinary = std::make_shared<BinaryType>();
+static std::shared_ptr<DataType> kString = std::make_shared<StringType>();
+
+BinaryArray::BinaryArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
+ const std::shared_ptr<Buffer>& data, int32_t null_count,
+ const std::shared_ptr<Buffer>& null_bitmap)
+ : BinaryArray(kBinary, length, offsets, data, null_count, null_bitmap) {}
+
+BinaryArray::BinaryArray(const TypePtr& type, int32_t length,
+ const std::shared_ptr<Buffer>& offsets, const std::shared_ptr<Buffer>& data,
+ int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap)
+ : Array(type, length, null_count, null_bitmap),
+ offset_buffer_(offsets),
+ offsets_(reinterpret_cast<const int32_t*>(offset_buffer_->data())),
+ data_buffer_(data),
+ data_(nullptr) {
+ if (data_buffer_ != nullptr) { data_ = data_buffer_->data(); }
+}
+
+Status BinaryArray::Validate() const {
+ // TODO(wesm): what to do here?
+ return Status::OK();
+}
+
+bool BinaryArray::EqualsExact(const BinaryArray& other) const {
+ if (!Array::EqualsExact(other)) { return false; }
+
+ bool equal_offsets =
+ offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t));
+ if (!equal_offsets) { return false; }
+
+ if (!data_buffer_ && !(other.data_buffer_)) { return true; }
+
+ return data_buffer_->Equals(*other.data_buffer_, data_buffer_->size());
+}
+
+bool BinaryArray::Equals(const std::shared_ptr<Array>& arr) const {
+ if (this == arr.get()) { return true; }
+ if (this->type_enum() != arr->type_enum()) { return false; }
+ return EqualsExact(*static_cast<const BinaryArray*>(arr.get()));
+}
+
+bool BinaryArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
+ const std::shared_ptr<Array>& arr) const {
+ if (this == arr.get()) { return true; }
+ if (!arr) { return false; }
+ if (this->type_enum() != arr->type_enum()) { return false; }
+ const auto other = static_cast<const BinaryArray*>(arr.get());
+ for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
+ const bool is_null = IsNull(i);
+ if (is_null != arr->IsNull(o_i)) { return false; }
+ if (is_null) continue;
+ const int32_t begin_offset = offset(i);
+ const int32_t end_offset = offset(i + 1);
+ const int32_t other_begin_offset = other->offset(o_i);
+ const int32_t other_end_offset = other->offset(o_i + 1);
+ // Underlying can't be equal if the size isn't equal
+ if (end_offset - begin_offset != other_end_offset - other_begin_offset) {
+ return false;
+ }
+
+ if (std::memcmp(data_ + begin_offset, other->data_ + other_begin_offset,
+ end_offset - begin_offset)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+Status BinaryArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+StringArray::StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
+ const std::shared_ptr<Buffer>& data, int32_t null_count,
+ const std::shared_ptr<Buffer>& null_bitmap)
+ : BinaryArray(kString, length, offsets, data, null_count, null_bitmap) {}
+
+Status StringArray::Validate() const {
+ // TODO(emkornfield) Validate proper UTF8 code points?
+ return BinaryArray::Validate();
+}
+
+Status StringArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+// ----------------------------------------------------------------------
+// Struct
+
+std::shared_ptr<Array> StructArray::field(int32_t pos) const {
+ DCHECK_GT(field_arrays_.size(), 0);
+ return field_arrays_[pos];
+}
+
+bool StructArray::Equals(const std::shared_ptr<Array>& arr) const {
+ if (this == arr.get()) { return true; }
+ if (!arr) { return false; }
+ if (this->type_enum() != arr->type_enum()) { return false; }
+ if (null_count_ != arr->null_count()) { return false; }
+ return RangeEquals(0, length_, 0, arr);
+}
+
+bool StructArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
+ const std::shared_ptr<Array>& arr) const {
+ if (this == arr.get()) { return true; }
+ if (!arr) { return false; }
+ if (Type::STRUCT != arr->type_enum()) { return false; }
+ const auto other = static_cast<StructArray*>(arr.get());
+
+ bool equal_fields = true;
+ for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) {
+ if (IsNull(i) != arr->IsNull(o_i)) { return false; }
+ if (IsNull(i)) continue;
+ for (size_t j = 0; j < field_arrays_.size(); ++j) {
+ // TODO: really we should be comparing stretches of non-null data rather
+ // than looking at one value at a time.
+ equal_fields = field(j)->RangeEquals(i, i + 1, o_i, other->field(j));
+ if (!equal_fields) { return false; }
+ }
+ }
+
+ return true;
+}
+
+Status StructArray::Validate() const {
+ if (length_ < 0) { return Status::Invalid("Length was negative"); }
+
+ if (null_count() > length_) {
+ return Status::Invalid("Null count exceeds the length of this struct");
+ }
+
+ if (field_arrays_.size() > 0) {
+ // Validate fields
+ int32_t array_length = field_arrays_[0]->length();
+ size_t idx = 0;
+ for (auto it : field_arrays_) {
+ if (it->length() != array_length) {
+ std::stringstream ss;
+ ss << "Length is not equal from field " << it->type()->ToString()
+ << " at position {" << idx << "}";
+ return Status::Invalid(ss.str());
+ }
+
+ const Status child_valid = it->Validate();
+ if (!child_valid.ok()) {
+ std::stringstream ss;
+ ss << "Child array invalid: " << child_valid.ToString() << " at position {" << idx
+ << "}";
+ return Status::Invalid(ss.str());
+ }
+ ++idx;
+ }
+
+ if (array_length > 0 && array_length != length_) {
+ return Status::Invalid("Struct's length is not equal to its child arrays");
+ }
+ }
+ return Status::OK();
+}
+
+Status StructArray::Accept(ArrayVisitor* visitor) const {
+ return visitor->Visit(*this);
+}
+
+// ----------------------------------------------------------------------
+
+#define MAKE_PRIMITIVE_ARRAY_CASE(ENUM, ArrayType) \
+ case Type::ENUM: \
+ out->reset(new ArrayType(type, length, data, null_count, null_bitmap)); \
+ break;
+
+Status MakePrimitiveArray(const TypePtr& type, int32_t length,
+ const std::shared_ptr<Buffer>& data, int32_t null_count,
+ const std::shared_ptr<Buffer>& null_bitmap, ArrayPtr* out) {
+ switch (type->type) {
+ MAKE_PRIMITIVE_ARRAY_CASE(BOOL, BooleanArray);
+ MAKE_PRIMITIVE_ARRAY_CASE(UINT8, UInt8Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(INT8, Int8Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(UINT16, UInt16Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(INT16, Int16Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(UINT32, UInt32Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(INT32, Int32Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(UINT64, UInt64Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(INT64, Int64Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(FLOAT, FloatArray);
+ MAKE_PRIMITIVE_ARRAY_CASE(DOUBLE, DoubleArray);
+ MAKE_PRIMITIVE_ARRAY_CASE(TIME, Int64Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP, TimestampArray);
+ MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP_DOUBLE, DoubleArray);
+ default:
+ return Status::NotImplemented(type->ToString());
+ }
+#ifdef NDEBUG
+ return Status::OK();
+#else
+ return (*out)->Validate();
+#endif
+}
+
} // namespace arrow