You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/23 02:45:23 UTC
[2/3] arrow git commit: ARROW-67: C++ metadata flatbuffer
serialization and data movement to memory maps
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/metadata.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc
new file mode 100644
index 0000000..642f21a
--- /dev/null
+++ b/cpp/src/arrow/ipc/metadata.cc
@@ -0,0 +1,238 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/metadata.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+// Generated C++ flatbuffer IDL
+#include "arrow/ipc/Message_generated.h"
+#include "arrow/ipc/metadata-internal.h"
+
+#include "arrow/schema.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+namespace flatbuf = apache::arrow::flatbuf;
+
+namespace ipc {
+
+Status WriteSchema(const Schema* schema, std::shared_ptr<Buffer>* out) {
+ MessageBuilder message;
+ RETURN_NOT_OK(message.SetSchema(schema));
+ RETURN_NOT_OK(message.Finish());
+ return message.GetBuffer(out);
+}
+
+//----------------------------------------------------------------------
+// Message reader
+
+class Message::Impl {
+ public:
+ explicit Impl(const std::shared_ptr<Buffer>& buffer,
+ const flatbuf::Message* message) :
+ buffer_(buffer),
+ message_(message) {}
+
+ Message::Type type() const {
+ switch (message_->header_type()) {
+ case flatbuf::MessageHeader_Schema:
+ return Message::SCHEMA;
+ case flatbuf::MessageHeader_DictionaryBatch:
+ return Message::DICTIONARY_BATCH;
+ case flatbuf::MessageHeader_RecordBatch:
+ return Message::RECORD_BATCH;
+ default:
+ return Message::NONE;
+ }
+ }
+
+ const void* header() const {
+ return message_->header();
+ }
+
+ int64_t body_length() const {
+ return message_->bodyLength();
+ }
+
+ private:
+ // Owns the memory this message accesses
+ std::shared_ptr<Buffer> buffer_;
+
+ const flatbuf::Message* message_;
+};
+
+class SchemaMessage::Impl {
+ public:
+ explicit Impl(const void* schema) :
+ schema_(static_cast<const flatbuf::Schema*>(schema)) {}
+
+ const flatbuf::Field* field(int i) const {
+ return schema_->fields()->Get(i);
+ }
+
+ int num_fields() const {
+ return schema_->fields()->size();
+ }
+
+ private:
+ const flatbuf::Schema* schema_;
+};
+
+Message::Message() {}
+
+Status Message::Open(const std::shared_ptr<Buffer>& buffer,
+ std::shared_ptr<Message>* out) {
+ std::shared_ptr<Message> result(new Message());
+
+ // The buffer is prefixed by its size as int32_t
+ const uint8_t* fb_head = buffer->data() + sizeof(int32_t);
+ const flatbuf::Message* message = flatbuf::GetMessage(fb_head);
+
+ // TODO(wesm): verify message
+ result->impl_.reset(new Impl(buffer, message));
+ *out = result;
+
+ return Status::OK();
+}
+
+Message::Type Message::type() const {
+ return impl_->type();
+}
+
+int64_t Message::body_length() const {
+ return impl_->body_length();
+}
+
+std::shared_ptr<Message> Message::get_shared_ptr() {
+ return this->shared_from_this();
+}
+
+std::shared_ptr<SchemaMessage> Message::GetSchema() {
+ return std::make_shared<SchemaMessage>(this->shared_from_this(),
+ impl_->header());
+}
+
+SchemaMessage::SchemaMessage(const std::shared_ptr<Message>& message,
+ const void* schema) {
+ message_ = message;
+ impl_.reset(new Impl(schema));
+}
+
+int SchemaMessage::num_fields() const {
+ return impl_->num_fields();
+}
+
+Status SchemaMessage::GetField(int i, std::shared_ptr<Field>* out) const {
+ const flatbuf::Field* field = impl_->field(i);
+ return FieldFromFlatbuffer(field, out);
+}
+
+Status SchemaMessage::GetSchema(std::shared_ptr<Schema>* out) const {
+ std::vector<std::shared_ptr<Field>> fields(num_fields());
+ for (int i = 0; i < this->num_fields(); ++i) {
+ RETURN_NOT_OK(GetField(i, &fields[i]));
+ }
+ *out = std::make_shared<Schema>(fields);
+ return Status::OK();
+}
+
+class RecordBatchMessage::Impl {
+ public:
+ explicit Impl(const void* batch) :
+ batch_(static_cast<const flatbuf::RecordBatch*>(batch)) {
+ nodes_ = batch_->nodes();
+ buffers_ = batch_->buffers();
+ }
+
+ const flatbuf::FieldNode* field(int i) const {
+ return nodes_->Get(i);
+ }
+
+ const flatbuf::Buffer* buffer(int i) const {
+ return buffers_->Get(i);
+ }
+
+ int32_t length() const {
+ return batch_->length();
+ }
+
+ int num_buffers() const {
+ return batch_->buffers()->size();
+ }
+
+ int num_fields() const {
+ return batch_->nodes()->size();
+ }
+
+ private:
+ const flatbuf::RecordBatch* batch_;
+ const flatbuffers::Vector<const flatbuf::FieldNode*>* nodes_;
+ const flatbuffers::Vector<const flatbuf::Buffer*>* buffers_;
+};
+
+std::shared_ptr<RecordBatchMessage> Message::GetRecordBatch() {
+ return std::make_shared<RecordBatchMessage>(this->shared_from_this(),
+ impl_->header());
+}
+
+RecordBatchMessage::RecordBatchMessage(const std::shared_ptr<Message>& message,
+ const void* batch) {
+ message_ = message;
+ impl_.reset(new Impl(batch));
+}
+
+// TODO(wesm): Copying the flatbuffer data isn't great, but this will do for
+// now
+FieldMetadata RecordBatchMessage::field(int i) const {
+ const flatbuf::FieldNode* node = impl_->field(i);
+
+ FieldMetadata result;
+ result.length = node->length();
+ result.null_count = node->null_count();
+ return result;
+}
+
+BufferMetadata RecordBatchMessage::buffer(int i) const {
+ const flatbuf::Buffer* buffer = impl_->buffer(i);
+
+ BufferMetadata result;
+ result.page = buffer->page();
+ result.offset = buffer->offset();
+ result.length = buffer->length();
+ return result;
+}
+
+int32_t RecordBatchMessage::length() const {
+ return impl_->length();
+}
+
+int RecordBatchMessage::num_buffers() const {
+ return impl_->num_buffers();
+}
+
+int RecordBatchMessage::num_fields() const {
+ return impl_->num_fields();
+}
+
+} // namespace ipc
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/metadata.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.h b/cpp/src/arrow/ipc/metadata.h
new file mode 100644
index 0000000..c728852
--- /dev/null
+++ b/cpp/src/arrow/ipc/metadata.h
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// C++ object model and user API for interprocess schema messaging
+
+#ifndef ARROW_IPC_METADATA_H
+#define ARROW_IPC_METADATA_H
+
+#include <cstdint>
+#include <memory>
+
+namespace arrow {
+
+class Buffer;
+struct Field;
+class Schema;
+class Status;
+
+namespace ipc {
+
+//----------------------------------------------------------------------
+// Message read/write APIs
+
+// Serialize arrow::Schema as a Flatbuffer
+Status WriteSchema(const Schema* schema, std::shared_ptr<Buffer>* out);
+
+//----------------------------------------------------------------------
+
+// Read interface classes. We do not fully deserialize the flatbuffers so that
+// individual fields metadata can be retrieved from very large schema without
+//
+
+class Message;
+
+// Container for serialized Schema metadata contained in an IPC message
+class SchemaMessage {
+ public:
+ // Accepts an opaque flatbuffer pointer
+ SchemaMessage(const std::shared_ptr<Message>& message, const void* schema);
+
+ int num_fields() const;
+
+ // Construct an arrow::Field for the i-th value in the metadata
+ Status GetField(int i, std::shared_ptr<Field>* out) const;
+
+ // Construct a complete Schema from the message. May be expensive for very
+ // large schemas if you are only interested in a few fields
+ Status GetSchema(std::shared_ptr<Schema>* out) const;
+
+ private:
+ // Parent, owns the flatbuffer data
+ std::shared_ptr<Message> message_;
+
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+// Field metadata
+struct FieldMetadata {
+ int32_t length;
+ int32_t null_count;
+};
+
+struct BufferMetadata {
+ int32_t page;
+ int64_t offset;
+ int64_t length;
+};
+
+// Container for serialized record batch metadata contained in an IPC message
+class RecordBatchMessage {
+ public:
+ // Accepts an opaque flatbuffer pointer
+ RecordBatchMessage(const std::shared_ptr<Message>& message,
+ const void* batch_meta);
+
+ FieldMetadata field(int i) const;
+ BufferMetadata buffer(int i) const;
+
+ int32_t length() const;
+ int num_buffers() const;
+ int num_fields() const;
+
+ private:
+ // Parent, owns the flatbuffer data
+ std::shared_ptr<Message> message_;
+
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+class DictionaryBatchMessage {
+ public:
+ int64_t id() const;
+ std::unique_ptr<RecordBatchMessage> data() const;
+};
+
+class Message : public std::enable_shared_from_this<Message> {
+ public:
+ enum Type {
+ NONE,
+ SCHEMA,
+ DICTIONARY_BATCH,
+ RECORD_BATCH
+ };
+
+ static Status Open(const std::shared_ptr<Buffer>& buffer,
+ std::shared_ptr<Message>* out);
+
+ std::shared_ptr<Message> get_shared_ptr();
+
+ int64_t body_length() const;
+
+ Type type() const;
+
+ // These methods only to be invoked if you have checked the message type
+ std::shared_ptr<SchemaMessage> GetSchema();
+ std::shared_ptr<RecordBatchMessage> GetRecordBatch();
+ std::shared_ptr<DictionaryBatchMessage> GetDictionaryBatch();
+
+ private:
+ Message();
+
+ // Hide serialization details from user API
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_METADATA_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h
new file mode 100644
index 0000000..0fccce9
--- /dev/null
+++ b/cpp/src/arrow/ipc/test-common.h
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_IPC_TEST_COMMON_H
+#define ARROW_IPC_TEST_COMMON_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arrow {
+namespace ipc {
+
+class MemoryMapFixture {
+ public:
+ void TearDown() {
+ for (auto path : tmp_files_) {
+ std::remove(path.c_str());
+ }
+ }
+
+ void CreateFile(const std::string path, int64_t size) {
+ FILE* file = fopen(path.c_str(), "w");
+ if (file != nullptr) {
+ tmp_files_.push_back(path);
+ }
+ ftruncate(fileno(file), size);
+ fclose(file);
+ }
+
+ private:
+ std::vector<std::string> tmp_files_;
+};
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_TEST_COMMON_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema-test.cc b/cpp/src/arrow/schema-test.cc
new file mode 100644
index 0000000..a1de1dc
--- /dev/null
+++ b/cpp/src/arrow/schema-test.cc
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/schema.h"
+#include "arrow/type.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+
+const auto INT32 = std::make_shared<Int32Type>();
+
+TEST(TestField, Basics) {
+ Field f0("f0", INT32);
+ Field f0_nn("f0", INT32, false);
+
+ ASSERT_EQ(f0.name, "f0");
+ ASSERT_EQ(f0.type->ToString(), INT32->ToString());
+
+ ASSERT_TRUE(f0.nullable);
+ ASSERT_FALSE(f0_nn.nullable);
+}
+
+TEST(TestField, Equals) {
+ Field f0("f0", INT32);
+ Field f0_nn("f0", INT32, false);
+ Field f0_other("f0", INT32);
+
+ ASSERT_EQ(f0, f0_other);
+ ASSERT_NE(f0, f0_nn);
+}
+
+class TestSchema : public ::testing::Test {
+ public:
+ void SetUp() {}
+};
+
+TEST_F(TestSchema, Basics) {
+ auto f0 = std::make_shared<Field>("f0", INT32);
+ auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(), false);
+ auto f1_optional = std::make_shared<Field>("f1", std::make_shared<UInt8Type>());
+
+ auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
+
+ vector<shared_ptr<Field>> fields = {f0, f1, f2};
+ auto schema = std::make_shared<Schema>(fields);
+
+ ASSERT_EQ(3, schema->num_fields());
+ ASSERT_EQ(f0, schema->field(0));
+ ASSERT_EQ(f1, schema->field(1));
+ ASSERT_EQ(f2, schema->field(2));
+
+ auto schema2 = std::make_shared<Schema>(fields);
+
+ vector<shared_ptr<Field>> fields3 = {f0, f1_optional, f2};
+ auto schema3 = std::make_shared<Schema>(fields3);
+ ASSERT_TRUE(schema->Equals(schema2));
+ ASSERT_FALSE(schema->Equals(schema3));
+
+ ASSERT_TRUE(schema->Equals(*schema2.get()));
+ ASSERT_FALSE(schema->Equals(*schema3.get()));
+}
+
+TEST_F(TestSchema, ToString) {
+ auto f0 = std::make_shared<Field>("f0", INT32);
+ auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(), false);
+ auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
+ auto f3 = std::make_shared<Field>("f3",
+ std::make_shared<ListType>(std::make_shared<Int16Type>()));
+
+ vector<shared_ptr<Field>> fields = {f0, f1, f2, f3};
+ auto schema = std::make_shared<Schema>(fields);
+
+ std::string result = schema->ToString();
+ std::string expected = R"(f0: int32
+f1: uint8 not null
+f2: string
+f3: list<item: int16>)";
+
+ ASSERT_EQ(expected, result);
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema.cc b/cpp/src/arrow/schema.cc
new file mode 100644
index 0000000..18aad0e
--- /dev/null
+++ b/cpp/src/arrow/schema.cc
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/schema.h"
+
+#include <memory>
+#include <string>
+#include <sstream>
+#include <vector>
+
+#include "arrow/type.h"
+
+namespace arrow {
+
+Schema::Schema(const std::vector<std::shared_ptr<Field>>& fields) :
+ fields_(fields) {}
+
+bool Schema::Equals(const Schema& other) const {
+ if (this == &other) return true;
+ if (num_fields() != other.num_fields()) {
+ return false;
+ }
+ for (int i = 0; i < num_fields(); ++i) {
+ if (!field(i)->Equals(*other.field(i).get())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool Schema::Equals(const std::shared_ptr<Schema>& other) const {
+ return Equals(*other.get());
+}
+
+std::string Schema::ToString() const {
+ std::stringstream buffer;
+
+ int i = 0;
+ for (auto field : fields_) {
+ if (i > 0) {
+ buffer << std::endl;
+ }
+ buffer << field->ToString();
+ ++i;
+ }
+ return buffer.str();
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/schema.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema.h b/cpp/src/arrow/schema.h
new file mode 100644
index 0000000..52f3c1c
--- /dev/null
+++ b/cpp/src/arrow/schema.h
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_SCHEMA_H
+#define ARROW_SCHEMA_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arrow {
+
+struct Field;
+
+class Schema {
+ public:
+ explicit Schema(const std::vector<std::shared_ptr<Field>>& fields);
+
+ // Returns true if all of the schema fields are equal
+ bool Equals(const Schema& other) const;
+ bool Equals(const std::shared_ptr<Schema>& other) const;
+
+ // Return the ith schema element. Does not boundscheck
+ const std::shared_ptr<Field>& field(int i) const {
+ return fields_[i];
+ }
+
+ // Render a string representation of the schema suitable for debugging
+ std::string ToString() const;
+
+ int num_fields() const {
+ return fields_.size();
+ }
+
+ private:
+ std::vector<std::shared_ptr<Field>> fields_;
+};
+
+} // namespace arrow
+
+#endif // ARROW_FIELD_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
new file mode 100644
index 0000000..4c7b8f8
--- /dev/null
+++ b/cpp/src/arrow/table-test.cc
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/types/primitive.h"
+#include "arrow/util/status.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+
+const auto INT16 = std::make_shared<Int16Type>();
+const auto UINT8 = std::make_shared<UInt8Type>();
+const auto INT32 = std::make_shared<Int32Type>();
+
+class TestTable : public TestBase {
+ public:
+ void MakeExample1(int length) {
+ auto f0 = std::make_shared<Field>("f0", INT32);
+ auto f1 = std::make_shared<Field>("f1", UINT8);
+ auto f2 = std::make_shared<Field>("f2", INT16);
+
+ vector<shared_ptr<Field>> fields = {f0, f1, f2};
+ schema_ = std::make_shared<Schema>(fields);
+
+ columns_ = {
+ std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
+ std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
+ std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length))
+ };
+ }
+
+ protected:
+ std::unique_ptr<Table> table_;
+ shared_ptr<Schema> schema_;
+ vector<std::shared_ptr<Column>> columns_;
+};
+
+TEST_F(TestTable, EmptySchema) {
+ auto empty_schema = shared_ptr<Schema>(new Schema({}));
+ table_.reset(new Table("data", empty_schema, columns_));
+ ASSERT_OK(table_->ValidateColumns());
+ ASSERT_EQ(0, table_->num_rows());
+ ASSERT_EQ(0, table_->num_columns());
+}
+
+TEST_F(TestTable, Ctors) {
+ int length = 100;
+ MakeExample1(length);
+
+ std::string name = "data";
+
+ table_.reset(new Table(name, schema_, columns_));
+ ASSERT_OK(table_->ValidateColumns());
+ ASSERT_EQ(name, table_->name());
+ ASSERT_EQ(length, table_->num_rows());
+ ASSERT_EQ(3, table_->num_columns());
+
+ table_.reset(new Table(name, schema_, columns_, length));
+ ASSERT_OK(table_->ValidateColumns());
+ ASSERT_EQ(name, table_->name());
+ ASSERT_EQ(length, table_->num_rows());
+}
+
+TEST_F(TestTable, Metadata) {
+ int length = 100;
+ MakeExample1(length);
+
+ std::string name = "data";
+ table_.reset(new Table(name, schema_, columns_));
+
+ ASSERT_TRUE(table_->schema()->Equals(schema_));
+
+ auto col = table_->column(0);
+ ASSERT_EQ(schema_->field(0)->name, col->name());
+ ASSERT_EQ(schema_->field(0)->type, col->type());
+}
+
+TEST_F(TestTable, InvalidColumns) {
+ // Check that columns are all the same length
+ int length = 100;
+ MakeExample1(length);
+
+ table_.reset(new Table("data", schema_, columns_, length - 1));
+ ASSERT_RAISES(Invalid, table_->ValidateColumns());
+
+ columns_.clear();
+
+ // Wrong number of columns
+ table_.reset(new Table("data", schema_, columns_, length));
+ ASSERT_RAISES(Invalid, table_->ValidateColumns());
+
+ columns_ = {
+ std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
+ std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
+ std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length - 1))
+ };
+
+ table_.reset(new Table("data", schema_, columns_, length));
+ ASSERT_RAISES(Invalid, table_->ValidateColumns());
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
new file mode 100644
index 0000000..e405c1d
--- /dev/null
+++ b/cpp/src/arrow/table.cc
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/table.h"
+
+#include <cstdlib>
+#include <memory>
+#include <sstream>
+
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+RowBatch::RowBatch(const std::shared_ptr<Schema>& schema, int num_rows,
+ const std::vector<std::shared_ptr<Array>>& columns) :
+ schema_(schema),
+ num_rows_(num_rows),
+ columns_(columns) {}
+
+const std::string& RowBatch::column_name(int i) const {
+ return schema_->field(i)->name;
+}
+
+Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+ const std::vector<std::shared_ptr<Column>>& columns) :
+ name_(name),
+ schema_(schema),
+ columns_(columns) {
+ if (columns.size() == 0) {
+ num_rows_ = 0;
+ } else {
+ num_rows_ = columns[0]->length();
+ }
+}
+
+Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+ const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows) :
+ name_(name),
+ schema_(schema),
+ columns_(columns),
+ num_rows_(num_rows) {}
+
+Status Table::ValidateColumns() const {
+ if (num_columns() != schema_->num_fields()) {
+ return Status::Invalid("Number of columns did not match schema");
+ }
+
+ // Make sure columns are all the same length
+ for (size_t i = 0; i < columns_.size(); ++i) {
+ const Column* col = columns_[i].get();
+ if (col == nullptr) {
+ std::stringstream ss;
+ ss << "Column " << i << " named " << col->name()
+ << " was null";
+ return Status::Invalid(ss.str());
+ }
+ if (col->length() != num_rows_) {
+ std::stringstream ss;
+ ss << "Column " << i << " named " << col->name()
+ << " expected length "
+ << num_rows_
+ << " but got length "
+ << col->length();
+ return Status::Invalid(ss.str());
+ }
+ }
+ return Status::OK();
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
new file mode 100644
index 0000000..e2f73a2
--- /dev/null
+++ b/cpp/src/arrow/table.h
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TABLE_H
+#define ARROW_TABLE_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arrow {
+
+class Array;
+class Column;
+class Schema;
+class Status;
+
+// A row batch is a simpler and more rigid table data structure intended for
+// use primarily in shared memory IPC. It contains a schema (metadata) and a
+// corresponding vector of equal-length Arrow arrays
+class RowBatch {
+ public:
+ // num_rows is a parameter to allow for row batches of a particular size not
+ // having any materialized columns. Each array should have the same length as
+ // num_rows
+ RowBatch(const std::shared_ptr<Schema>& schema, int num_rows,
+ const std::vector<std::shared_ptr<Array>>& columns);
+
+ // @returns: the table's schema
+ const std::shared_ptr<Schema>& schema() const {
+ return schema_;
+ }
+
+ // @returns: the i-th column
+ // Note: Does not boundscheck
+ const std::shared_ptr<Array>& column(int i) const {
+ return columns_[i];
+ }
+
+ const std::string& column_name(int i) const;
+
+ // @returns: the number of columns in the table
+ int num_columns() const {
+ return columns_.size();
+ }
+
+ // @returns: the number of rows (the corresponding length of each column)
+ int64_t num_rows() const {
+ return num_rows_;
+ }
+
+ private:
+ std::shared_ptr<Schema> schema_;
+ int num_rows_;
+ std::vector<std::shared_ptr<Array>> columns_;
+};
+
+// Immutable container of fixed-length columns conforming to a particular schema
+class Table {
+ public:
+ // If columns is zero-length, the table's number of rows is zero
+ Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+ const std::vector<std::shared_ptr<Column>>& columns);
+
+ // num_rows is a parameter to allow for tables of a particular size not
+ // having any materialized columns. Each column should therefore have the
+ // same length as num_rows -- you can validate this using
+ // Table::ValidateColumns
+ Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+ const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows);
+
+ // @returns: the table's name, if any (may be length 0)
+ const std::string& name() const {
+ return name_;
+ }
+
+ // @returns: the table's schema
+ const std::shared_ptr<Schema>& schema() const {
+ return schema_;
+ }
+
+ // Note: Does not boundscheck
+ // @returns: the i-th column
+ const std::shared_ptr<Column>& column(int i) const {
+ return columns_[i];
+ }
+
+ // @returns: the number of columns in the table
+ int num_columns() const {
+ return columns_.size();
+ }
+
+ // @returns: the number of rows (the corresponding length of each column)
+ int64_t num_rows() const {
+ return num_rows_;
+ }
+
+ // After construction, perform any checks to validate the input arguments
+ Status ValidateColumns() const;
+
+ private:
+ // The table's name, optional
+ std::string name_;
+
+ std::shared_ptr<Schema> schema_;
+ std::vector<std::shared_ptr<Column>> columns_;
+
+ int64_t num_rows_;
+};
+
+} // namespace arrow
+
+#endif // ARROW_TABLE_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/CMakeLists.txt b/cpp/src/arrow/table/CMakeLists.txt
deleted file mode 100644
index d9f00e7..0000000
--- a/cpp/src/arrow/table/CMakeLists.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#######################################
-# arrow_table
-#######################################
-
-# Headers: top level
-install(FILES
- column.h
- schema.h
- table.h
- DESTINATION include/arrow/table)
-
-ADD_ARROW_TEST(column-test)
-ADD_ARROW_TEST(schema-test)
-ADD_ARROW_TEST(table-test)
-
-ADD_ARROW_BENCHMARK(column-benchmark)
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column-benchmark.cc b/cpp/src/arrow/table/column-benchmark.cc
deleted file mode 100644
index c01146d..0000000
--- a/cpp/src/arrow/table/column-benchmark.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-
-#include "benchmark/benchmark.h"
-
-#include "arrow/test-util.h"
-#include "arrow/table/test-common.h"
-#include "arrow/types/integer.h"
-#include "arrow/util/memory-pool.h"
-
-namespace arrow {
-namespace {
- template <typename ArrayType>
- std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
- auto pool = GetDefaultMemoryPool();
- auto data = std::make_shared<PoolBuffer>(pool);
- auto nulls = std::make_shared<PoolBuffer>(pool);
- data->Resize(length * sizeof(typename ArrayType::value_type));
- nulls->Resize(util::bytes_for_bits(length));
- return std::make_shared<ArrayType>(length, data, 10, nulls);
- }
-} // anonymous namespace
-
-
-static void BM_BuildInt32ColumnByChunk(benchmark::State& state) { //NOLINT non-const reference
- ArrayVector arrays;
- for (int chunk_n = 0; chunk_n < state.range_x(); ++chunk_n) {
- arrays.push_back(MakePrimitive<Int32Array>(100, 10));
- }
- const auto INT32 = std::make_shared<Int32Type>();
- const auto field = std::make_shared<Field>("c0", INT32);
- std::unique_ptr<Column> column;
- while (state.KeepRunning()) {
- column.reset(new Column(field, arrays));
- }
-}
-
-BENCHMARK(BM_BuildInt32ColumnByChunk)->Range(5, 50000);
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column-test.cc b/cpp/src/arrow/table/column-test.cc
deleted file mode 100644
index 3b102e4..0000000
--- a/cpp/src/arrow/table/column-test.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/test-common.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "arrow/types/integer.h"
-
-using std::shared_ptr;
-using std::vector;
-
-namespace arrow {
-
-const auto INT32 = std::make_shared<Int32Type>();
-
-class TestColumn : public TestBase {
- protected:
- std::shared_ptr<ChunkedArray> data_;
- std::unique_ptr<Column> column_;
-};
-
-TEST_F(TestColumn, BasicAPI) {
- ArrayVector arrays;
- arrays.push_back(MakePrimitive<Int32Array>(100));
- arrays.push_back(MakePrimitive<Int32Array>(100, 10));
- arrays.push_back(MakePrimitive<Int32Array>(100, 20));
-
- auto field = std::make_shared<Field>("c0", INT32);
- column_.reset(new Column(field, arrays));
-
- ASSERT_EQ("c0", column_->name());
- ASSERT_TRUE(column_->type()->Equals(INT32));
- ASSERT_EQ(300, column_->length());
- ASSERT_EQ(30, column_->null_count());
- ASSERT_EQ(3, column_->data()->num_chunks());
-}
-
-TEST_F(TestColumn, ChunksInhomogeneous) {
- ArrayVector arrays;
- arrays.push_back(MakePrimitive<Int32Array>(100));
- arrays.push_back(MakePrimitive<Int32Array>(100, 10));
-
- auto field = std::make_shared<Field>("c0", INT32);
- column_.reset(new Column(field, arrays));
-
- ASSERT_OK(column_->ValidateData());
-
- arrays.push_back(MakePrimitive<Int16Array>(100, 10));
- column_.reset(new Column(field, arrays));
- ASSERT_RAISES(Invalid, column_->ValidateData());
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column.cc b/cpp/src/arrow/table/column.cc
deleted file mode 100644
index 573e650..0000000
--- a/cpp/src/arrow/table/column.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table/column.h"
-
-#include <memory>
-#include <sstream>
-
-#include "arrow/type.h"
-#include "arrow/util/status.h"
-
-namespace arrow {
-
-ChunkedArray::ChunkedArray(const ArrayVector& chunks) :
- chunks_(chunks) {
- length_ = 0;
- for (const std::shared_ptr<Array>& chunk : chunks) {
- length_ += chunk->length();
- null_count_ += chunk->null_count();
- }
-}
-
-Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks) :
- field_(field) {
- data_ = std::make_shared<ChunkedArray>(chunks);
-}
-
-Column::Column(const std::shared_ptr<Field>& field,
- const std::shared_ptr<Array>& data) :
- field_(field) {
- data_ = std::make_shared<ChunkedArray>(ArrayVector({data}));
-}
-
-Column::Column(const std::shared_ptr<Field>& field,
- const std::shared_ptr<ChunkedArray>& data) :
- field_(field),
- data_(data) {}
-
-Status Column::ValidateData() {
- for (int i = 0; i < data_->num_chunks(); ++i) {
- const std::shared_ptr<DataType>& type = data_->chunk(i)->type();
- if (!this->type()->Equals(type)) {
- std::stringstream ss;
- ss << "In chunk " << i << " expected type "
- << this->type()->ToString()
- << " but saw "
- << type->ToString();
- return Status::Invalid(ss.str());
- }
- }
- return Status::OK();
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column.h b/cpp/src/arrow/table/column.h
deleted file mode 100644
index dfc7516..0000000
--- a/cpp/src/arrow/table/column.h
+++ /dev/null
@@ -1,105 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TABLE_COLUMN_H
-#define ARROW_TABLE_COLUMN_H
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-typedef std::vector<std::shared_ptr<Array> > ArrayVector;
-
-// A data structure managing a list of primitive Arrow arrays logically as one
-// large array
-class ChunkedArray {
- public:
- explicit ChunkedArray(const ArrayVector& chunks);
-
- // @returns: the total length of the chunked array; computed on construction
- int64_t length() const {
- return length_;
- }
-
- int64_t null_count() const {
- return null_count_;
- }
-
- int num_chunks() const {
- return chunks_.size();
- }
-
- const std::shared_ptr<Array>& chunk(int i) const {
- return chunks_[i];
- }
-
- protected:
- ArrayVector chunks_;
- int64_t length_;
- int64_t null_count_;
-};
-
-// An immutable column data structure consisting of a field (type metadata) and
-// a logical chunked data array (which can be validated as all being the same
-// type).
-class Column {
- public:
- Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks);
- Column(const std::shared_ptr<Field>& field,
- const std::shared_ptr<ChunkedArray>& data);
-
- Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array>& data);
-
- int64_t length() const {
- return data_->length();
- }
-
- int64_t null_count() const {
- return data_->null_count();
- }
-
- // @returns: the column's name in the passed metadata
- const std::string& name() const {
- return field_->name;
- }
-
- // @returns: the column's type according to the metadata
- const std::shared_ptr<DataType>& type() const {
- return field_->type;
- }
-
- // @returns: the column's data as a chunked logical array
- const std::shared_ptr<ChunkedArray>& data() const {
- return data_;
- }
- // Verify that the column's array data is consistent with the passed field's
- // metadata
- Status ValidateData();
-
- protected:
- std::shared_ptr<Field> field_;
- std::shared_ptr<ChunkedArray> data_;
-};
-
-} // namespace arrow
-
-#endif // ARROW_TABLE_COLUMN_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema-test.cc b/cpp/src/arrow/table/schema-test.cc
deleted file mode 100644
index 9dfade2..0000000
--- a/cpp/src/arrow/table/schema-test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/schema.h"
-#include "arrow/type.h"
-#include "arrow/types/string.h"
-
-using std::shared_ptr;
-using std::vector;
-
-namespace arrow {
-
-const auto INT32 = std::make_shared<Int32Type>();
-
-TEST(TestField, Basics) {
- shared_ptr<DataType> ftype = INT32;
- shared_ptr<DataType> ftype_nn = std::make_shared<Int32Type>(false);
- Field f0("f0", ftype);
- Field f0_nn("f0", ftype_nn);
-
- ASSERT_EQ(f0.name, "f0");
- ASSERT_EQ(f0.type->ToString(), ftype->ToString());
-
- ASSERT_TRUE(f0.nullable());
- ASSERT_FALSE(f0_nn.nullable());
-}
-
-TEST(TestField, Equals) {
- shared_ptr<DataType> ftype = INT32;
- shared_ptr<DataType> ftype_nn = std::make_shared<Int32Type>(false);
-
- Field f0("f0", ftype);
- Field f0_nn("f0", ftype_nn);
- Field f0_other("f0", ftype);
-
- ASSERT_EQ(f0, f0_other);
- ASSERT_NE(f0, f0_nn);
-}
-
-class TestSchema : public ::testing::Test {
- public:
- void SetUp() {}
-};
-
-TEST_F(TestSchema, Basics) {
- auto f0 = std::make_shared<Field>("f0", INT32);
- auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(false));
- auto f1_optional = std::make_shared<Field>("f1", std::make_shared<UInt8Type>());
-
- auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
-
- vector<shared_ptr<Field> > fields = {f0, f1, f2};
- auto schema = std::make_shared<Schema>(fields);
-
- ASSERT_EQ(3, schema->num_fields());
- ASSERT_EQ(f0, schema->field(0));
- ASSERT_EQ(f1, schema->field(1));
- ASSERT_EQ(f2, schema->field(2));
-
- auto schema2 = std::make_shared<Schema>(fields);
-
- vector<shared_ptr<Field> > fields3 = {f0, f1_optional, f2};
- auto schema3 = std::make_shared<Schema>(fields3);
- ASSERT_TRUE(schema->Equals(schema2));
- ASSERT_FALSE(schema->Equals(schema3));
-
- ASSERT_TRUE(schema->Equals(*schema2.get()));
- ASSERT_FALSE(schema->Equals(*schema3.get()));
-}
-
-TEST_F(TestSchema, ToString) {
- auto f0 = std::make_shared<Field>("f0", std::make_shared<Int32Type>());
- auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(false));
- auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
- auto f3 = std::make_shared<Field>("f3",
- std::make_shared<ListType>(std::make_shared<Int16Type>()));
-
- vector<shared_ptr<Field> > fields = {f0, f1, f2, f3};
- auto schema = std::make_shared<Schema>(fields);
-
- std::string result = schema->ToString();
- std::string expected = R"(f0 int32
-f1 uint8 not null
-f2 string
-f3 list<int16>
-)";
-
- ASSERT_EQ(expected, result);
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema.cc b/cpp/src/arrow/table/schema.cc
deleted file mode 100644
index d49d0a7..0000000
--- a/cpp/src/arrow/table/schema.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table/schema.h"
-
-#include <memory>
-#include <string>
-#include <sstream>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-Schema::Schema(const std::vector<std::shared_ptr<Field> >& fields) :
- fields_(fields) {}
-
-bool Schema::Equals(const Schema& other) const {
- if (this == &other) return true;
- if (num_fields() != other.num_fields()) {
- return false;
- }
- for (int i = 0; i < num_fields(); ++i) {
- if (!field(i)->Equals(*other.field(i).get())) {
- return false;
- }
- }
- return true;
-}
-
-bool Schema::Equals(const std::shared_ptr<Schema>& other) const {
- return Equals(*other.get());
-}
-
-std::string Schema::ToString() const {
- std::stringstream buffer;
-
- for (auto field : fields_) {
- buffer << field->ToString() << std::endl;
- }
- return buffer.str();
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/schema.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema.h b/cpp/src/arrow/table/schema.h
deleted file mode 100644
index 103f01b..0000000
--- a/cpp/src/arrow/table/schema.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_SCHEMA_H
-#define ARROW_SCHEMA_H
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-class Schema {
- public:
- explicit Schema(const std::vector<std::shared_ptr<Field> >& fields);
-
- // Returns true if all of the schema fields are equal
- bool Equals(const Schema& other) const;
- bool Equals(const std::shared_ptr<Schema>& other) const;
-
- // Return the ith schema element. Does not boundscheck
- const std::shared_ptr<Field>& field(int i) const {
- return fields_[i];
- }
-
- // Render a string representation of the schema suitable for debugging
- std::string ToString() const;
-
- int num_fields() const {
- return fields_.size();
- }
-
- private:
- std::vector<std::shared_ptr<Field> > fields_;
-};
-
-} // namespace arrow
-
-#endif // ARROW_FIELD_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table-test.cc b/cpp/src/arrow/table/table-test.cc
deleted file mode 100644
index 8b354e8..0000000
--- a/cpp/src/arrow/table/table-test.cc
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/table.h"
-#include "arrow/table/test-common.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "arrow/types/integer.h"
-
-using std::shared_ptr;
-using std::vector;
-
-namespace arrow {
-
-const auto INT16 = std::make_shared<Int16Type>();
-const auto UINT8 = std::make_shared<UInt8Type>();
-const auto INT32 = std::make_shared<Int32Type>();
-
-class TestTable : public TestBase {
- public:
- void MakeExample1(int length) {
- auto f0 = std::make_shared<Field>("f0", INT32);
- auto f1 = std::make_shared<Field>("f1", UINT8);
- auto f2 = std::make_shared<Field>("f2", INT16);
-
- vector<shared_ptr<Field> > fields = {f0, f1, f2};
- schema_ = std::make_shared<Schema>(fields);
-
- columns_ = {
- std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
- std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
- std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length))
- };
- }
-
- protected:
- std::unique_ptr<Table> table_;
- shared_ptr<Schema> schema_;
- vector<std::shared_ptr<Column> > columns_;
-};
-
-TEST_F(TestTable, EmptySchema) {
- auto empty_schema = shared_ptr<Schema>(new Schema({}));
- table_.reset(new Table("data", empty_schema, columns_));
- ASSERT_OK(table_->ValidateColumns());
- ASSERT_EQ(0, table_->num_rows());
- ASSERT_EQ(0, table_->num_columns());
-}
-
-TEST_F(TestTable, Ctors) {
- int length = 100;
- MakeExample1(length);
-
- std::string name = "data";
-
- table_.reset(new Table(name, schema_, columns_));
- ASSERT_OK(table_->ValidateColumns());
- ASSERT_EQ(name, table_->name());
- ASSERT_EQ(length, table_->num_rows());
- ASSERT_EQ(3, table_->num_columns());
-
- table_.reset(new Table(name, schema_, columns_, length));
- ASSERT_OK(table_->ValidateColumns());
- ASSERT_EQ(name, table_->name());
- ASSERT_EQ(length, table_->num_rows());
-}
-
-TEST_F(TestTable, Metadata) {
- int length = 100;
- MakeExample1(length);
-
- std::string name = "data";
- table_.reset(new Table(name, schema_, columns_));
-
- ASSERT_TRUE(table_->schema()->Equals(schema_));
-
- auto col = table_->column(0);
- ASSERT_EQ(schema_->field(0)->name, col->name());
- ASSERT_EQ(schema_->field(0)->type, col->type());
-}
-
-TEST_F(TestTable, InvalidColumns) {
- // Check that columns are all the same length
- int length = 100;
- MakeExample1(length);
-
- table_.reset(new Table("data", schema_, columns_, length - 1));
- ASSERT_RAISES(Invalid, table_->ValidateColumns());
-
- columns_.clear();
-
- // Wrong number of columns
- table_.reset(new Table("data", schema_, columns_, length));
- ASSERT_RAISES(Invalid, table_->ValidateColumns());
-
- columns_ = {
- std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
- std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
- std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length - 1))
- };
-
- table_.reset(new Table("data", schema_, columns_, length));
- ASSERT_RAISES(Invalid, table_->ValidateColumns());
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table.cc b/cpp/src/arrow/table/table.cc
deleted file mode 100644
index 0c788b8..0000000
--- a/cpp/src/arrow/table/table.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table/table.h"
-
-#include <memory>
-#include <sstream>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/type.h"
-#include "arrow/util/status.h"
-
-namespace arrow {
-
-Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
- const std::vector<std::shared_ptr<Column> >& columns) :
- name_(name),
- schema_(schema),
- columns_(columns) {
- if (columns.size() == 0) {
- num_rows_ = 0;
- } else {
- num_rows_ = columns[0]->length();
- }
-}
-
-Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
- const std::vector<std::shared_ptr<Column> >& columns, int64_t num_rows) :
- name_(name),
- schema_(schema),
- columns_(columns),
- num_rows_(num_rows) {}
-
-Status Table::ValidateColumns() const {
- if (num_columns() != schema_->num_fields()) {
- return Status::Invalid("Number of columns did not match schema");
- }
-
- if (columns_.size() == 0) {
- return Status::OK();
- }
-
- // Make sure columns are all the same length
- for (size_t i = 0; i < columns_.size(); ++i) {
- const Column* col = columns_[i].get();
- if (col->length() != num_rows_) {
- std::stringstream ss;
- ss << "Column " << i << " expected length "
- << num_rows_
- << " but got length "
- << col->length();
- return Status::Invalid(ss.str());
- }
- }
- return Status::OK();
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/table.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table.h b/cpp/src/arrow/table/table.h
deleted file mode 100644
index b012938..0000000
--- a/cpp/src/arrow/table/table.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TABLE_TABLE_H
-#define ARROW_TABLE_TABLE_H
-
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace arrow {
-
-class Column;
-class Schema;
-class Status;
-
-// Immutable container of fixed-length columns conforming to a particular schema
-class Table {
- public:
- // If columns is zero-length, the table's number of rows is zero
- Table(const std::string& name, const std::shared_ptr<Schema>& schema,
- const std::vector<std::shared_ptr<Column> >& columns);
-
- Table(const std::string& name, const std::shared_ptr<Schema>& schema,
- const std::vector<std::shared_ptr<Column> >& columns, int64_t num_rows);
-
- // @returns: the table's name, if any (may be length 0)
- const std::string& name() const {
- return name_;
- }
-
- // @returns: the table's schema
- const std::shared_ptr<Schema>& schema() const {
- return schema_;
- }
-
- // Note: Does not boundscheck
- // @returns: the i-th column
- const std::shared_ptr<Column>& column(int i) const {
- return columns_[i];
- }
-
- // @returns: the number of columns in the table
- int num_columns() const {
- return columns_.size();
- }
-
- // @returns: the number of rows (the corresponding length of each column)
- int64_t num_rows() const {
- return num_rows_;
- }
-
- // After construction, perform any checks to validate the input arguments
- Status ValidateColumns() const;
-
- private:
- // The table's name, optional
- std::string name_;
-
- std::shared_ptr<Schema> schema_;
- std::vector<std::shared_ptr<Column> > columns_;
-
- int64_t num_rows_;
-};
-
-} // namespace arrow
-
-#endif // ARROW_TABLE_TABLE_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/test-common.h b/cpp/src/arrow/table/test-common.h
deleted file mode 100644
index 50a5f6a..0000000
--- a/cpp/src/arrow/table/test-common.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/table.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "arrow/util/bit-util.h"
-#include "arrow/util/buffer.h"
-#include "arrow/util/memory-pool.h"
-
-namespace arrow {
-
-class TestBase : public ::testing::Test {
- public:
- void SetUp() {
- pool_ = GetDefaultMemoryPool();
- }
-
- template <typename ArrayType>
- std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
- auto data = std::make_shared<PoolBuffer>(pool_);
- auto nulls = std::make_shared<PoolBuffer>(pool_);
- EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type)));
- EXPECT_OK(nulls->Resize(util::bytes_for_bits(length)));
- return std::make_shared<ArrayType>(length, data, 10, nulls);
- }
-
- protected:
- MemoryPool* pool_;
-};
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index 0898c8e..a9fb2a7 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -18,26 +18,39 @@
#ifndef ARROW_TEST_UTIL_H_
#define ARROW_TEST_UTIL_H_
-#include <gtest/gtest.h>
+#include <cstdint>
#include <memory>
+#include <random>
#include <string>
#include <vector>
+#include "gtest/gtest.h"
+
+#include "arrow/type.h"
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
#include "arrow/util/bit-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/memory-pool.h"
#include "arrow/util/random.h"
#include "arrow/util/status.h"
#define ASSERT_RAISES(ENUM, expr) \
do { \
Status s = (expr); \
- ASSERT_TRUE(s.Is##ENUM()); \
+ if (!s.Is##ENUM()) { \
+ FAIL() << s.ToString(); \
+ } \
} while (0)
#define ASSERT_OK(expr) \
do { \
Status s = (expr); \
- ASSERT_TRUE(s.ok()); \
+ if (!s.ok()) { \
+ FAIL() << s.ToString(); \
+ } \
} while (0)
@@ -50,6 +63,27 @@
namespace arrow {
+class TestBase : public ::testing::Test {
+ public:
+ void SetUp() {
+ pool_ = default_memory_pool();
+ }
+
+ template <typename ArrayType>
+ std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
+ auto data = std::make_shared<PoolBuffer>(pool_);
+ auto nulls = std::make_shared<PoolBuffer>(pool_);
+ EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type)));
+ EXPECT_OK(nulls->Resize(util::bytes_for_bits(length)));
+ return std::make_shared<ArrayType>(length, data, 10, nulls);
+ }
+
+ protected:
+ MemoryPool* pool_;
+};
+
+namespace test {
+
template <typename T>
void randint(int64_t N, T lower, T upper, std::vector<T>* out) {
Random rng(random_seed());
@@ -84,6 +118,33 @@ void random_nulls(int64_t n, double pct_null, std::vector<bool>* nulls) {
}
}
+static inline void random_bytes(int n, uint32_t seed, uint8_t* out) {
+ std::mt19937 gen(seed);
+ std::uniform_int_distribution<int> d(0, 255);
+
+ for (int i = 0; i < n; ++i) {
+ out[i] = d(gen) & 0xFF;
+ }
+}
+
+template <typename T>
+void rand_uniform_int(int n, uint32_t seed, T min_value, T max_value, T* out) {
+ std::mt19937 gen(seed);
+ std::uniform_int_distribution<T> d(min_value, max_value);
+ for (int i = 0; i < n; ++i) {
+ out[i] = d(gen);
+ }
+}
+
+static inline int bitmap_popcount(const uint8_t* data, int length) {
+ int count = 0;
+ for (int i = 0; i < length; ++i) {
+ // TODO: accelerate this
+ if (util::get_bit(data, i)) ++count;
+ }
+ return count;
+}
+
static inline int null_count(const std::vector<uint8_t>& nulls) {
int result = 0;
for (size_t i = 0; i < nulls.size(); ++i) {
@@ -102,6 +163,7 @@ std::shared_ptr<Buffer> bytes_to_null_buffer(uint8_t* bytes, int length) {
return out;
}
+} // namespace test
} // namespace arrow
#endif // ARROW_TEST_UTIL_H_
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 0a2e817..f7f835e 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -24,45 +24,37 @@ namespace arrow {
std::string Field::ToString() const {
std::stringstream ss;
- ss << this->name << " " << this->type->ToString();
+ ss << this->name << ": " << this->type->ToString();
+ if (!this->nullable) {
+ ss << " not null";
+ }
return ss.str();
}
DataType::~DataType() {}
-StringType::StringType(bool nullable)
- : DataType(LogicalType::STRING, nullable) {}
-
-StringType::StringType(const StringType& other)
- : StringType(other.nullable) {}
+StringType::StringType() : DataType(Type::STRING) {}
std::string StringType::ToString() const {
std::string result(name());
- if (!nullable) {
- result.append(" not null");
- }
return result;
}
std::string ListType::ToString() const {
std::stringstream s;
- s << "list<" << value_type->ToString() << ">";
- if (!this->nullable) {
- s << " not null";
- }
+ s << "list<" << value_field()->ToString() << ">";
return s.str();
}
std::string StructType::ToString() const {
std::stringstream s;
s << "struct<";
- for (size_t i = 0; i < fields_.size(); ++i) {
+ for (int i = 0; i < this->num_children(); ++i) {
if (i > 0) s << ", ";
- const std::shared_ptr<Field>& field = fields_[i];
+ const std::shared_ptr<Field>& field = this->child(i);
s << field->name << ": " << field->type->ToString();
}
s << ">";
- if (!nullable) s << " not null";
return s.str();
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 00b01ea..5984b67 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -18,62 +18,34 @@
#ifndef ARROW_TYPE_H
#define ARROW_TYPE_H
+#include <cstdint>
#include <memory>
#include <string>
#include <vector>
namespace arrow {
-// Physical data type that describes the memory layout of values. See details
-// for each type
-enum class LayoutEnum: char {
- // A physical type consisting of some non-negative number of bytes
- BYTE = 0,
-
- // A physical type consisting of some non-negative number of bits
- BIT = 1,
-
- // A parametric variable-length value type. Full specification requires a
- // child logical type
- LIST = 2,
-
- // A collection of multiple equal-length child arrays. Parametric type taking
- // 1 or more child logical types
- STRUCT = 3,
-
- // An array with heterogeneous value types. Parametric types taking 1 or more
- // child logical types
- DENSE_UNION = 4,
- SPARSE_UNION = 5
-};
-
-
-struct LayoutType {
- LayoutEnum type;
- explicit LayoutType(LayoutEnum type) : type(type) {}
-};
-
// Data types in this library are all *logical*. They can be expressed as
// either a primitive physical type (bytes or bits of some fixed size), a
// nested type consisting of other data types, or another data type (e.g. a
// timestamp encoded as an int64)
-struct LogicalType {
+struct Type {
enum type {
// A degenerate NULL type represented as 0 bytes/bits
NA = 0,
- // Little-endian integer types
- UINT8 = 1,
- INT8 = 2,
- UINT16 = 3,
- INT16 = 4,
- UINT32 = 5,
- INT32 = 6,
- UINT64 = 7,
- INT64 = 8,
-
// A boolean value represented as 1 bit
- BOOL = 9,
+ BOOL = 1,
+
+ // Little-endian integer types
+ UINT8 = 2,
+ INT8 = 3,
+ UINT16 = 4,
+ INT16 = 5,
+ UINT32 = 6,
+ INT32 = 7,
+ UINT64 = 8,
+ INT64 = 9,
// 4-byte floating point value
FLOAT = 10,
@@ -131,30 +103,38 @@ struct LogicalType {
};
};
+struct Field;
+
struct DataType {
- LogicalType::type type;
- bool nullable;
+ Type::type type;
- explicit DataType(LogicalType::type type, bool nullable = true) :
- type(type),
- nullable(nullable) {}
+ std::vector<std::shared_ptr<Field>> children_;
+
+ explicit DataType(Type::type type) :
+ type(type) {}
virtual ~DataType();
bool Equals(const DataType* other) {
// Call with a pointer so more friendly to subclasses
- return this == other || (this->type == other->type &&
- this->nullable == other->nullable);
+ return this == other || (this->type == other->type);
}
bool Equals(const std::shared_ptr<DataType>& other) {
return Equals(other.get());
}
+ const std::shared_ptr<Field>& child(int i) const {
+ return children_[i];
+ }
+
+ int num_children() const {
+ return children_.size();
+ }
+
virtual std::string ToString() const = 0;
};
-typedef std::shared_ptr<LayoutType> LayoutPtr;
typedef std::shared_ptr<DataType> TypePtr;
// A field is a piece of metadata that includes (for now) a name and a data
@@ -166,9 +146,13 @@ struct Field {
// The field's data type
TypePtr type;
- Field(const std::string& name, const TypePtr& type) :
+ // Fields can be nullable
+ bool nullable;
+
+ Field(const std::string& name, const TypePtr& type, bool nullable = true) :
name(name),
- type(type) {}
+ type(type),
+ nullable(nullable) {}
bool operator==(const Field& other) const {
return this->Equals(other);
@@ -180,6 +164,7 @@ struct Field {
bool Equals(const Field& other) const {
return (this == &other) || (this->name == other.name &&
+ this->nullable == other.nullable &&
this->type->Equals(other.type.get()));
}
@@ -187,36 +172,12 @@ struct Field {
return Equals(*other.get());
}
- bool nullable() const {
- return this->type->nullable;
- }
-
std::string ToString() const;
};
-struct BytesType : public LayoutType {
- int size;
-
- explicit BytesType(int size)
- : LayoutType(LayoutEnum::BYTE),
- size(size) {}
-
- BytesType(const BytesType& other)
- : BytesType(other.size) {}
-};
-
-struct ListLayoutType : public LayoutType {
- LayoutPtr value_type;
-
- explicit ListLayoutType(const LayoutPtr& value_type)
- : LayoutType(LayoutEnum::BYTE),
- value_type(value_type) {}
-};
-
template <typename Derived>
struct PrimitiveType : public DataType {
- explicit PrimitiveType(bool nullable = true)
- : DataType(Derived::type_enum, nullable) {}
+ PrimitiveType() : DataType(Derived::type_enum) {}
std::string ToString() const override;
};
@@ -224,22 +185,19 @@ struct PrimitiveType : public DataType {
template <typename Derived>
inline std::string PrimitiveType<Derived>::ToString() const {
std::string result(static_cast<const Derived*>(this)->name());
- if (!nullable) {
- result.append(" not null");
- }
return result;
}
-#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
- typedef C_TYPE c_type; \
- static constexpr LogicalType::type type_enum = LogicalType::ENUM; \
- static constexpr int size = SIZE; \
- \
- explicit TYPENAME(bool nullable = true) \
- : PrimitiveType<TYPENAME>(nullable) {} \
- \
- static const char* name() { \
- return NAME; \
+#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
+ typedef C_TYPE c_type; \
+ static constexpr Type::type type_enum = Type::ENUM; \
+ static constexpr int size = SIZE; \
+ \
+ TYPENAME() \
+ : PrimitiveType<TYPENAME>() {} \
+ \
+ static const char* name() { \
+ return NAME; \
}
struct NullType : public PrimitiveType<NullType> {
@@ -292,11 +250,23 @@ struct DoubleType : public PrimitiveType<DoubleType> {
struct ListType : public DataType {
// List can contain any other logical value type
- TypePtr value_type;
+ explicit ListType(const std::shared_ptr<DataType>& value_type)
+ : DataType(Type::LIST) {
+ children_ = {std::make_shared<Field>("item", value_type)};
+ }
+
+ explicit ListType(const std::shared_ptr<Field>& value_field)
+ : DataType(Type::LIST) {
+ children_ = {value_field};
+ }
- explicit ListType(const TypePtr& value_type, bool nullable = true)
- : DataType(LogicalType::LIST, nullable),
- value_type(value_type) {}
+ const std::shared_ptr<Field>& value_field() const {
+ return children_[0];
+ }
+
+ const std::shared_ptr<DataType>& value_type() const {
+ return children_[0]->type;
+ }
static char const *name() {
return "list";
@@ -307,9 +277,7 @@ struct ListType : public DataType {
// String is a logical type consisting of a physical list of 1-byte values
struct StringType : public DataType {
- explicit StringType(bool nullable = true);
-
- StringType(const StringType& other);
+ StringType();
static char const *name() {
return "string";
@@ -319,20 +287,9 @@ struct StringType : public DataType {
};
struct StructType : public DataType {
- std::vector<std::shared_ptr<Field> > fields_;
-
- explicit StructType(const std::vector<std::shared_ptr<Field> >& fields,
- bool nullable = true)
- : DataType(LogicalType::STRUCT, nullable) {
- fields_ = fields;
- }
-
- const std::shared_ptr<Field>& field(int i) const {
- return fields_[i];
- }
-
- int num_children() const {
- return fields_.size();
+ explicit StructType(const std::vector<std::shared_ptr<Field>>& fields)
+ : DataType(Type::STRUCT) {
+ children_ = fields;
}
std::string ToString() const override;
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt
index 57cabde..595b3be 100644
--- a/cpp/src/arrow/types/CMakeLists.txt
+++ b/cpp/src/arrow/types/CMakeLists.txt
@@ -26,8 +26,6 @@ install(FILES
construct.h
datetime.h
decimal.h
- floating.h
- integer.h
json.h
list.h
primitive.h
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/boolean.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/boolean.h b/cpp/src/arrow/types/boolean.h
index a5023d7..1cb91f9 100644
--- a/cpp/src/arrow/types/boolean.h
+++ b/cpp/src/arrow/types/boolean.h
@@ -22,7 +22,7 @@
namespace arrow {
-typedef PrimitiveArrayImpl<BooleanType> BooleanArray;
+// typedef PrimitiveArrayImpl<BooleanType> BooleanArray;
class BooleanBuilder : public ArrayBuilder {
};
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/collection.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/collection.h b/cpp/src/arrow/types/collection.h
index 42a9c92..46d84f1 100644
--- a/cpp/src/arrow/types/collection.h
+++ b/cpp/src/arrow/types/collection.h
@@ -25,7 +25,7 @@
namespace arrow {
-template <LogicalType::type T>
+template <Type::type T>
struct CollectionType : public DataType {
std::vector<TypePtr> child_types_;
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index 43f01a3..290decd 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -19,24 +19,26 @@
#include <memory>
-#include "arrow/types/floating.h"
-#include "arrow/types/integer.h"
+#include "arrow/type.h"
+#include "arrow/types/primitive.h"
#include "arrow/types/list.h"
#include "arrow/types/string.h"
+#include "arrow/util/buffer.h"
#include "arrow/util/status.h"
namespace arrow {
class ArrayBuilder;
-// Initially looked at doing this with vtables, but shared pointers makes it
-// difficult
-
#define BUILDER_CASE(ENUM, BuilderType) \
- case LogicalType::ENUM: \
+ case Type::ENUM: \
out->reset(new BuilderType(pool, type)); \
return Status::OK();
+// Initially looked at doing this with vtables, but shared pointers makes it
+// difficult
+//
+// TODO(wesm): come up with a less monolithic strategy
Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
std::shared_ptr<ArrayBuilder>* out) {
switch (type->type) {
@@ -56,30 +58,41 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
BUILDER_CASE(STRING, StringBuilder);
- case LogicalType::LIST:
+ case Type::LIST:
{
std::shared_ptr<ArrayBuilder> value_builder;
const std::shared_ptr<DataType>& value_type = static_cast<ListType*>(
- type.get())->value_type;
+ type.get())->value_type();
RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
out->reset(new ListBuilder(pool, type, value_builder));
return Status::OK();
}
- // BUILDER_CASE(CHAR, CharBuilder);
-
- // BUILDER_CASE(VARCHAR, VarcharBuilder);
- // BUILDER_CASE(BINARY, BinaryBuilder);
-
- // BUILDER_CASE(DATE, DateBuilder);
- // BUILDER_CASE(TIMESTAMP, TimestampBuilder);
- // BUILDER_CASE(TIME, TimeBuilder);
+ default:
+ return Status::NotImplemented(type->ToString());
+ }
+}
- // BUILDER_CASE(LIST, ListBuilder);
- // BUILDER_CASE(STRUCT, StructBuilder);
- // BUILDER_CASE(DENSE_UNION, DenseUnionBuilder);
- // BUILDER_CASE(SPARSE_UNION, SparseUnionBuilder);
+#define MAKE_PRIMITIVE_ARRAY_CASE(ENUM, ArrayType) \
+ case Type::ENUM: \
+ out->reset(new ArrayType(type, length, data, null_count, nulls)); \
+ return Status::OK();
+Status MakePrimitiveArray(const std::shared_ptr<DataType>& type,
+ int32_t length, const std::shared_ptr<Buffer>& data,
+ int32_t null_count, const std::shared_ptr<Buffer>& nulls,
+ std::shared_ptr<Array>* out) {
+ switch (type->type) {
+ MAKE_PRIMITIVE_ARRAY_CASE(UINT8, UInt8Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(INT8, Int8Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(UINT16, UInt16Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(INT16, Int16Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(UINT32, UInt32Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(INT32, Int32Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(UINT64, UInt64Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(INT64, Int64Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(FLOAT, FloatArray);
+ MAKE_PRIMITIVE_ARRAY_CASE(DOUBLE, DoubleArray);
default:
return Status::NotImplemented(type->ToString());
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/construct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h
index 59ebe1a..089c484 100644
--- a/cpp/src/arrow/types/construct.h
+++ b/cpp/src/arrow/types/construct.h
@@ -18,19 +18,26 @@
#ifndef ARROW_TYPES_CONSTRUCT_H
#define ARROW_TYPES_CONSTRUCT_H
+#include <cstdint>
#include <memory>
-#include "arrow/type.h"
-
namespace arrow {
+class Array;
class ArrayBuilder;
+class Buffer;
+struct DataType;
class MemoryPool;
class Status;
Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
std::shared_ptr<ArrayBuilder>* out);
+Status MakePrimitiveArray(const std::shared_ptr<DataType>& type,
+ int32_t length, const std::shared_ptr<Buffer>& data,
+ int32_t null_count, const std::shared_ptr<Buffer>& nulls,
+ std::shared_ptr<Array>* out);
+
} // namespace arrow
#endif // ARROW_BUILDER_H_
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/datetime.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/datetime.h b/cpp/src/arrow/types/datetime.h
index 765fc29..e57b66a 100644
--- a/cpp/src/arrow/types/datetime.h
+++ b/cpp/src/arrow/types/datetime.h
@@ -31,8 +31,8 @@ struct DateType : public DataType {
Unit unit;
- explicit DateType(Unit unit = Unit::DAY, bool nullable = true)
- : DataType(LogicalType::DATE, nullable),
+ explicit DateType(Unit unit = Unit::DAY)
+ : DataType(Type::DATE),
unit(unit) {}
DateType(const DateType& other)
@@ -41,10 +41,6 @@ struct DateType : public DataType {
static char const *name() {
return "date";
}
-
- // virtual std::string ToString() {
- // return name();
- // }
};
@@ -58,8 +54,8 @@ struct TimestampType : public DataType {
Unit unit;
- explicit TimestampType(Unit unit = Unit::MILLI, bool nullable = true)
- : DataType(LogicalType::TIMESTAMP, nullable),
+ explicit TimestampType(Unit unit = Unit::MILLI)
+ : DataType(Type::TIMESTAMP),
unit(unit) {}
TimestampType(const TimestampType& other)
@@ -68,10 +64,6 @@ struct TimestampType : public DataType {
static char const *name() {
return "timestamp";
}
-
- // virtual std::string ToString() {
- // return name();
- // }
};
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/floating.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/floating.cc b/cpp/src/arrow/types/floating.cc
deleted file mode 100644
index bde2826..0000000
--- a/cpp/src/arrow/types/floating.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/types/floating.h"
-
-namespace arrow {
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/floating.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/floating.h b/cpp/src/arrow/types/floating.h
deleted file mode 100644
index e752278..0000000
--- a/cpp/src/arrow/types/floating.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_FLOATING_H
-#define ARROW_TYPES_FLOATING_H
-
-#include <string>
-
-#include "arrow/types/primitive.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-typedef PrimitiveArrayImpl<FloatType> FloatArray;
-typedef PrimitiveArrayImpl<DoubleType> DoubleArray;
-
-typedef PrimitiveBuilder<FloatType, FloatArray> FloatBuilder;
-typedef PrimitiveBuilder<DoubleType, DoubleArray> DoubleBuilder;
-
-} // namespace arrow
-
-#endif // ARROW_TYPES_FLOATING_H