You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/23 02:45:22 UTC
[1/3] arrow git commit: ARROW-67: C++ metadata flatbuffer
serialization and data movement to memory maps
Repository: arrow
Updated Branches:
refs/heads/master 093f9bd8c -> 65db0da80
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/integer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/integer.cc b/cpp/src/arrow/types/integer.cc
deleted file mode 100644
index 4696536..0000000
--- a/cpp/src/arrow/types/integer.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/types/integer.h"
-
-namespace arrow {
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/integer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/integer.h b/cpp/src/arrow/types/integer.h
deleted file mode 100644
index 5684191..0000000
--- a/cpp/src/arrow/types/integer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_INTEGER_H
-#define ARROW_TYPES_INTEGER_H
-
-#include <cstdint>
-#include <string>
-
-#include "arrow/types/primitive.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-// Array containers
-
-typedef PrimitiveArrayImpl<UInt8Type> UInt8Array;
-typedef PrimitiveArrayImpl<Int8Type> Int8Array;
-
-typedef PrimitiveArrayImpl<UInt16Type> UInt16Array;
-typedef PrimitiveArrayImpl<Int16Type> Int16Array;
-
-typedef PrimitiveArrayImpl<UInt32Type> UInt32Array;
-typedef PrimitiveArrayImpl<Int32Type> Int32Array;
-
-typedef PrimitiveArrayImpl<UInt64Type> UInt64Array;
-typedef PrimitiveArrayImpl<Int64Type> Int64Array;
-
-// Builders
-
-typedef PrimitiveBuilder<UInt8Type, UInt8Array> UInt8Builder;
-typedef PrimitiveBuilder<UInt16Type, UInt16Array> UInt16Builder;
-typedef PrimitiveBuilder<UInt32Type, UInt32Array> UInt32Builder;
-typedef PrimitiveBuilder<UInt64Type, UInt64Array> UInt64Builder;
-
-typedef PrimitiveBuilder<Int8Type, Int8Array> Int8Builder;
-typedef PrimitiveBuilder<Int16Type, Int16Array> Int16Builder;
-typedef PrimitiveBuilder<Int32Type, Int32Array> Int32Builder;
-typedef PrimitiveBuilder<Int64Type, Int64Array> Int64Builder;
-
-} // namespace arrow
-
-#endif // ARROW_TYPES_INTEGER_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/json.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/json.cc b/cpp/src/arrow/types/json.cc
index 168e370..fb731ed 100644
--- a/cpp/src/arrow/types/json.cc
+++ b/cpp/src/arrow/types/json.cc
@@ -20,7 +20,6 @@
#include <vector>
#include "arrow/type.h"
-#include "arrow/types/string.h"
#include "arrow/types/union.h"
namespace arrow {
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/json.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/json.h b/cpp/src/arrow/types/json.h
index b67fb38..9c850af 100644
--- a/cpp/src/arrow/types/json.h
+++ b/cpp/src/arrow/types/json.h
@@ -28,8 +28,8 @@ struct JSONScalar : public DataType {
static TypePtr dense_type;
static TypePtr sparse_type;
- explicit JSONScalar(bool dense = true, bool nullable = true)
- : DataType(LogicalType::JSON_SCALAR, nullable),
+ explicit JSONScalar(bool dense = true)
+ : DataType(Type::JSON_SCALAR),
dense(dense) {}
};
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index 02991de..eb55ca8 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -15,20 +15,21 @@
// specific language governing permissions and limitations
// under the License.
-#include <gtest/gtest.h>
#include <cstdlib>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
+#include "gtest/gtest.h"
+
#include "arrow/array.h"
+#include "arrow/builder.h"
#include "arrow/test-util.h"
#include "arrow/type.h"
#include "arrow/types/construct.h"
-#include "arrow/types/integer.h"
#include "arrow/types/list.h"
-#include "arrow/types/string.h"
+#include "arrow/types/primitive.h"
#include "arrow/types/test-common.h"
#include "arrow/util/status.h"
@@ -39,27 +40,24 @@ using std::vector;
namespace arrow {
-class ArrayBuilder;
-
TEST(TypesTest, TestListType) {
std::shared_ptr<DataType> vt = std::make_shared<UInt8Type>();
ListType list_type(vt);
- ASSERT_EQ(list_type.type, LogicalType::LIST);
+ ASSERT_EQ(list_type.type, Type::LIST);
ASSERT_EQ(list_type.name(), string("list"));
- ASSERT_EQ(list_type.ToString(), string("list<uint8>"));
+ ASSERT_EQ(list_type.ToString(), string("list<item: uint8>"));
- ASSERT_EQ(list_type.value_type->type, vt->type);
- ASSERT_EQ(list_type.value_type->type, vt->type);
+ ASSERT_EQ(list_type.value_type()->type, vt->type);
+ ASSERT_EQ(list_type.value_type()->type, vt->type);
- std::shared_ptr<DataType> st = std::make_shared<StringType>(false);
- std::shared_ptr<DataType> lt = std::make_shared<ListType>(st, false);
- ASSERT_EQ(lt->ToString(), string("list<string not null> not null"));
+ std::shared_ptr<DataType> st = std::make_shared<StringType>();
+ std::shared_ptr<DataType> lt = std::make_shared<ListType>(st);
+ ASSERT_EQ(lt->ToString(), string("list<item: string>"));
- ListType lt2(lt, false);
- ASSERT_EQ(lt2.ToString(),
- string("list<list<string not null> not null> not null"));
+ ListType lt2(lt);
+ ASSERT_EQ(lt2.ToString(), string("list<item: list<item: string>>"));
}
// ----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/list.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc
index 69a79a7..670ee4d 100644
--- a/cpp/src/arrow/types/list.cc
+++ b/cpp/src/arrow/types/list.cc
@@ -19,4 +19,33 @@
namespace arrow {
+bool ListArray::EqualsExact(const ListArray& other) const {
+ if (this == &other) return true;
+ if (null_count_ != other.null_count_) {
+ return false;
+ }
+
+ bool equal_offsets = offset_buf_->Equals(*other.offset_buf_,
+ length_ + 1);
+ bool equal_nulls = true;
+ if (null_count_ > 0) {
+ equal_nulls = nulls_->Equals(*other.nulls_,
+ util::bytes_for_bits(length_));
+ }
+
+ if (!(equal_offsets && equal_nulls)) {
+ return false;
+ }
+
+ return values()->Equals(other.values());
+}
+
+bool ListArray::Equals(const std::shared_ptr<Array>& arr) const {
+ if (this == arr.get()) return true;
+ if (this->type_enum() != arr->type_enum()) {
+ return false;
+ }
+ return EqualsExact(*static_cast<const ListArray*>(arr.get()));
+}
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index 210c76a..141f762 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -21,12 +21,10 @@
#include <cstdint>
#include <cstring>
#include <memory>
-#include <string>
#include "arrow/array.h"
#include "arrow/builder.h"
#include "arrow/type.h"
-#include "arrow/types/integer.h"
#include "arrow/types/primitive.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/buffer.h"
@@ -38,29 +36,19 @@ class MemoryPool;
class ListArray : public Array {
public:
- ListArray() : Array(), offset_buf_(nullptr), offsets_(nullptr) {}
-
ListArray(const TypePtr& type, int32_t length, std::shared_ptr<Buffer> offsets,
const ArrayPtr& values,
int32_t null_count = 0,
- std::shared_ptr<Buffer> nulls = nullptr) {
- Init(type, length, offsets, values, null_count, nulls);
- }
-
- virtual ~ListArray() {}
-
- void Init(const TypePtr& type, int32_t length, std::shared_ptr<Buffer> offsets,
- const ArrayPtr& values,
- int32_t null_count = 0,
- std::shared_ptr<Buffer> nulls = nullptr) {
+ std::shared_ptr<Buffer> nulls = nullptr) :
+ Array(type, length, null_count, nulls) {
offset_buf_ = offsets;
offsets_ = offsets == nullptr? nullptr :
reinterpret_cast<const int32_t*>(offset_buf_->data());
-
values_ = values;
- Array::Init(type, length, null_count, nulls);
}
+ virtual ~ListArray() {}
+
// Return a shared pointer in case the requestor desires to share ownership
// with this array.
const std::shared_ptr<Array>& values() const {return values_;}
@@ -77,6 +65,9 @@ class ListArray : public Array {
int32_t value_offset(int i) { return offsets_[i];}
int32_t value_length(int i) { return offsets_[i + 1] - offsets_[i];}
+ bool EqualsExact(const ListArray& other) const;
+ bool Equals(const std::shared_ptr<Array>& arr) const override;
+
protected:
std::shared_ptr<Buffer> offset_buf_;
const int32_t* offsets_;
@@ -137,8 +128,6 @@ class ListBuilder : public Int32Builder {
template <typename Container>
std::shared_ptr<Array> Transfer() {
- auto result = std::make_shared<Container>();
-
std::shared_ptr<Array> items = value_builder_->Finish();
// Add final offset if the length is non-zero
@@ -146,8 +135,9 @@ class ListBuilder : public Int32Builder {
raw_buffer()[length_] = items->length();
}
- result->Init(type_, length_, values_, items,
+ auto result = std::make_shared<Container>(type_, length_, values_, items,
null_count_, nulls_);
+
values_ = nulls_ = nullptr;
capacity_ = length_ = null_count_ = 0;
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index f35a258..7eae8cd 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -15,21 +15,17 @@
// specific language governing permissions and limitations
// under the License.
-#include <gtest/gtest.h>
-
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
-#include "arrow/array.h"
+#include "gtest/gtest.h"
+
#include "arrow/builder.h"
#include "arrow/test-util.h"
#include "arrow/type.h"
-#include "arrow/types/boolean.h"
#include "arrow/types/construct.h"
-#include "arrow/types/floating.h"
-#include "arrow/types/integer.h"
#include "arrow/types/primitive.h"
#include "arrow/types/test-common.h"
#include "arrow/util/bit-util.h"
@@ -43,23 +39,17 @@ using std::vector;
namespace arrow {
-TEST(TypesTest, TestBytesType) {
- BytesType t1(3);
-
- ASSERT_EQ(t1.type, LayoutEnum::BYTE);
- ASSERT_EQ(t1.size, 3);
-}
-
+class Array;
#define PRIMITIVE_TEST(KLASS, ENUM, NAME) \
TEST(TypesTest, TestPrimitive_##ENUM) { \
KLASS tp; \
\
- ASSERT_EQ(tp.type, LogicalType::ENUM); \
+ ASSERT_EQ(tp.type, Type::ENUM); \
ASSERT_EQ(tp.name(), string(NAME)); \
\
KLASS tp_copy = tp; \
- ASSERT_EQ(tp_copy.type, LogicalType::ENUM); \
+ ASSERT_EQ(tp_copy.type, Type::ENUM); \
}
PRIMITIVE_TEST(Int8Type, INT8, "int8");
@@ -109,22 +99,20 @@ class TestPrimitiveBuilder : public TestBuilder {
void RandomData(int N, double pct_null = 0.1) {
Attrs::draw(N, &draws_);
- random_nulls(N, pct_null, &nulls_);
+ test::random_nulls(N, pct_null, &nulls_);
}
void CheckNullable() {
- ArrayType expected;
int size = builder_->length();
auto ex_data = std::make_shared<Buffer>(
reinterpret_cast<uint8_t*>(draws_.data()),
size * sizeof(T));
- auto ex_nulls = bytes_to_null_buffer(nulls_.data(), size);
-
- int32_t ex_null_count = null_count(nulls_);
+ auto ex_nulls = test::bytes_to_null_buffer(nulls_.data(), size);
+ int32_t ex_null_count = test::null_count(nulls_);
- expected.Init(size, ex_data, ex_null_count, ex_nulls);
+ auto expected = std::make_shared<ArrayType>(size, ex_data, ex_null_count, ex_nulls);
std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
builder_->Finish());
@@ -135,18 +123,17 @@ class TestPrimitiveBuilder : public TestBuilder {
ASSERT_EQ(0, builder_->null_count());
ASSERT_EQ(nullptr, builder_->buffer());
- ASSERT_TRUE(result->Equals(expected));
+ ASSERT_TRUE(result->EqualsExact(*expected.get()));
ASSERT_EQ(ex_null_count, result->null_count());
}
void CheckNonNullable() {
- ArrayType expected;
int size = builder_nn_->length();
auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
size * sizeof(T));
- expected.Init(size, ex_data);
+ auto expected = std::make_shared<ArrayType>(size, ex_data);
std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
builder_nn_->Finish());
@@ -156,7 +143,7 @@ class TestPrimitiveBuilder : public TestBuilder {
ASSERT_EQ(0, builder_nn_->capacity());
ASSERT_EQ(nullptr, builder_nn_->buffer());
- ASSERT_TRUE(result->Equals(expected));
+ ASSERT_TRUE(result->EqualsExact(*expected.get()));
ASSERT_EQ(0, result->null_count());
}
@@ -183,8 +170,8 @@ class TestPrimitiveBuilder : public TestBuilder {
#define PINT_DECL(CapType, c_type, LOWER, UPPER) \
struct P##CapType { \
PTYPE_DECL(CapType, c_type); \
- static void draw(int N, vector<T>* draws) { \
- randint<T>(N, LOWER, UPPER, draws); \
+ static void draw(int N, vector<T>* draws) { \
+ test::randint<T>(N, LOWER, UPPER, draws); \
} \
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/primitive.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc
index c86260b..32b8bfa 100644
--- a/cpp/src/arrow/types/primitive.cc
+++ b/cpp/src/arrow/types/primitive.cc
@@ -26,16 +26,16 @@ namespace arrow {
// ----------------------------------------------------------------------
// Primitive array base
-void PrimitiveArray::Init(const TypePtr& type, int32_t length,
+PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length,
const std::shared_ptr<Buffer>& data,
int32_t null_count,
- const std::shared_ptr<Buffer>& nulls) {
- Array::Init(type, length, null_count, nulls);
+ const std::shared_ptr<Buffer>& nulls) :
+ Array(type, length, null_count, nulls) {
data_ = data;
raw_data_ = data == nullptr? nullptr : data_->data();
}
-bool PrimitiveArray::Equals(const PrimitiveArray& other) const {
+bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {
if (this == &other) return true;
if (null_count_ != other.null_count_) {
return false;
@@ -50,4 +50,12 @@ bool PrimitiveArray::Equals(const PrimitiveArray& other) const {
}
}
+bool PrimitiveArray::Equals(const std::shared_ptr<Array>& arr) const {
+ if (this == arr.get()) return true;
+ if (this->type_enum() != arr->type_enum()) {
+ return false;
+ }
+ return EqualsExact(*static_cast<const PrimitiveArray*>(arr.get()));
+}
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index 22ab59c..e01027c 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -21,7 +21,6 @@
#include <cstdint>
#include <cstring>
#include <memory>
-#include <string>
#include "arrow/array.h"
#include "arrow/builder.h"
@@ -38,64 +37,57 @@ class MemoryPool;
// Base class for fixed-size logical types
class PrimitiveArray : public Array {
public:
- PrimitiveArray() : Array(), data_(nullptr), raw_data_(nullptr) {}
-
- virtual ~PrimitiveArray() {}
-
- void Init(const TypePtr& type, int32_t length,
+ PrimitiveArray(const TypePtr& type, int32_t length,
const std::shared_ptr<Buffer>& data,
int32_t null_count = 0,
const std::shared_ptr<Buffer>& nulls = nullptr);
+ virtual ~PrimitiveArray() {}
const std::shared_ptr<Buffer>& data() const { return data_;}
- bool Equals(const PrimitiveArray& other) const;
+ bool EqualsExact(const PrimitiveArray& other) const;
+ bool Equals(const std::shared_ptr<Array>& arr) const override;
protected:
std::shared_ptr<Buffer> data_;
const uint8_t* raw_data_;
};
-
-template <typename TypeClass>
-class PrimitiveArrayImpl : public PrimitiveArray {
- public:
- typedef typename TypeClass::c_type value_type;
-
- PrimitiveArrayImpl() : PrimitiveArray() {}
-
- virtual ~PrimitiveArrayImpl() {}
-
- PrimitiveArrayImpl(int32_t length, const std::shared_ptr<Buffer>& data,
- int32_t null_count = 0,
- const std::shared_ptr<Buffer>& nulls = nullptr) {
- Init(length, data, null_count, nulls);
- }
-
- void Init(int32_t length, const std::shared_ptr<Buffer>& data,
- int32_t null_count = 0,
- const std::shared_ptr<Buffer>& nulls = nullptr) {
- TypePtr type(new TypeClass());
- PrimitiveArray::Init(type, length, data, null_count, nulls);
- }
-
- bool Equals(const PrimitiveArrayImpl& other) const {
- return PrimitiveArray::Equals(*static_cast<const PrimitiveArray*>(&other));
- }
-
- const value_type* raw_data() const {
- return reinterpret_cast<const value_type*>(raw_data_);
- }
-
- value_type Value(int i) const {
- return raw_data()[i];
- }
-
- TypeClass* exact_type() const {
- return static_cast<TypeClass*>(type_);
- }
+#define NUMERIC_ARRAY_DECL(NAME, TypeClass, T) \
+class NAME : public PrimitiveArray { \
+ public: \
+ using value_type = T; \
+ using PrimitiveArray::PrimitiveArray; \
+ NAME(int32_t length, const std::shared_ptr<Buffer>& data, \
+ int32_t null_count = 0, \
+ const std::shared_ptr<Buffer>& nulls = nullptr) : \
+ PrimitiveArray(std::make_shared<TypeClass>(), length, data, \
+ null_count, nulls) {} \
+ \
+ bool EqualsExact(const NAME& other) const { \
+ return PrimitiveArray::EqualsExact( \
+ *static_cast<const PrimitiveArray*>(&other)); \
+ } \
+ \
+ const T* raw_data() const { \
+ return reinterpret_cast<const T*>(raw_data_); \
+ } \
+ \
+ T Value(int i) const { \
+ return raw_data()[i]; \
+ } \
};
+NUMERIC_ARRAY_DECL(UInt8Array, UInt8Type, uint8_t);
+NUMERIC_ARRAY_DECL(Int8Array, Int8Type, int8_t);
+NUMERIC_ARRAY_DECL(UInt16Array, UInt16Type, uint16_t);
+NUMERIC_ARRAY_DECL(Int16Array, Int16Type, int16_t);
+NUMERIC_ARRAY_DECL(UInt32Array, UInt32Type, uint32_t);
+NUMERIC_ARRAY_DECL(Int32Array, Int32Type, int32_t);
+NUMERIC_ARRAY_DECL(UInt64Array, UInt64Type, uint64_t);
+NUMERIC_ARRAY_DECL(Int64Array, Int64Type, int64_t);
+NUMERIC_ARRAY_DECL(FloatArray, FloatType, float);
+NUMERIC_ARRAY_DECL(DoubleArray, DoubleType, double);
template <typename Type, typename ArrayType>
class PrimitiveBuilder : public ArrayBuilder {
@@ -202,8 +194,9 @@ class PrimitiveBuilder : public ArrayBuilder {
}
std::shared_ptr<Array> Finish() override {
- std::shared_ptr<ArrayType> result = std::make_shared<ArrayType>();
- result->PrimitiveArray::Init(type_, length_, values_, null_count_, nulls_);
+ std::shared_ptr<ArrayType> result = std::make_shared<ArrayType>(
+ type_, length_, values_, null_count_, nulls_);
+
values_ = nulls_ = nullptr;
capacity_ = length_ = null_count_ = 0;
return result;
@@ -222,6 +215,21 @@ class PrimitiveBuilder : public ArrayBuilder {
int elsize_;
};
+// Builders
+
+typedef PrimitiveBuilder<UInt8Type, UInt8Array> UInt8Builder;
+typedef PrimitiveBuilder<UInt16Type, UInt16Array> UInt16Builder;
+typedef PrimitiveBuilder<UInt32Type, UInt32Array> UInt32Builder;
+typedef PrimitiveBuilder<UInt64Type, UInt64Array> UInt64Builder;
+
+typedef PrimitiveBuilder<Int8Type, Int8Array> Int8Builder;
+typedef PrimitiveBuilder<Int16Type, Int16Array> Int16Builder;
+typedef PrimitiveBuilder<Int32Type, Int32Array> Int32Builder;
+typedef PrimitiveBuilder<Int64Type, Int64Array> Int64Builder;
+
+typedef PrimitiveBuilder<FloatType, FloatArray> FloatBuilder;
+typedef PrimitiveBuilder<DoubleType, DoubleArray> DoubleBuilder;
+
} // namespace arrow
#endif // ARROW_TYPES_PRIMITIVE_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
index 6381093..7dc3d68 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -15,21 +15,20 @@
// specific language governing permissions and limitations
// under the License.
-#include <gtest/gtest.h>
#include <cstdint>
+#include <cstdlib>
#include <memory>
#include <string>
#include <vector>
+#include "gtest/gtest.h"
+
#include "arrow/array.h"
-#include "arrow/builder.h"
#include "arrow/test-util.h"
#include "arrow/type.h"
-#include "arrow/types/construct.h"
-#include "arrow/types/integer.h"
+#include "arrow/types/primitive.h"
#include "arrow/types/string.h"
#include "arrow/types/test-common.h"
-#include "arrow/util/status.h"
namespace arrow {
@@ -38,14 +37,14 @@ class Buffer;
TEST(TypesTest, TestCharType) {
CharType t1(5);
- ASSERT_EQ(t1.type, LogicalType::CHAR);
+ ASSERT_EQ(t1.type, Type::CHAR);
ASSERT_EQ(t1.size, 5);
ASSERT_EQ(t1.ToString(), std::string("char(5)"));
// Test copy constructor
CharType t2 = t1;
- ASSERT_EQ(t2.type, LogicalType::CHAR);
+ ASSERT_EQ(t2.type, Type::CHAR);
ASSERT_EQ(t2.size, 5);
}
@@ -53,22 +52,20 @@ TEST(TypesTest, TestCharType) {
TEST(TypesTest, TestVarcharType) {
VarcharType t1(5);
- ASSERT_EQ(t1.type, LogicalType::VARCHAR);
+ ASSERT_EQ(t1.type, Type::VARCHAR);
ASSERT_EQ(t1.size, 5);
- ASSERT_EQ(t1.physical_type.size, 6);
ASSERT_EQ(t1.ToString(), std::string("varchar(5)"));
// Test copy constructor
VarcharType t2 = t1;
- ASSERT_EQ(t2.type, LogicalType::VARCHAR);
+ ASSERT_EQ(t2.type, Type::VARCHAR);
ASSERT_EQ(t2.size, 5);
- ASSERT_EQ(t2.physical_type.size, 6);
}
TEST(TypesTest, TestStringType) {
StringType str;
- ASSERT_EQ(str.type, LogicalType::STRING);
+ ASSERT_EQ(str.type, Type::STRING);
ASSERT_EQ(str.name(), std::string("string"));
}
@@ -90,15 +87,16 @@ class TestStringContainer : public ::testing::Test {
length_ = offsets_.size() - 1;
int nchars = chars_.size();
- value_buf_ = to_buffer(chars_);
+ value_buf_ = test::to_buffer(chars_);
values_ = ArrayPtr(new UInt8Array(nchars, value_buf_));
- offsets_buf_ = to_buffer(offsets_);
+ offsets_buf_ = test::to_buffer(offsets_);
- nulls_buf_ = bytes_to_null_buffer(nulls_.data(), nulls_.size());
- null_count_ = null_count(nulls_);
+ nulls_buf_ = test::bytes_to_null_buffer(nulls_.data(), nulls_.size());
+ null_count_ = test::null_count(nulls_);
- strings_.Init(length_, offsets_buf_, values_, null_count_, nulls_buf_);
+ strings_ = std::make_shared<StringArray>(length_, offsets_buf_, values_,
+ null_count_, nulls_buf_);
}
protected:
@@ -116,28 +114,28 @@ class TestStringContainer : public ::testing::Test {
int length_;
ArrayPtr values_;
- StringArray strings_;
+ std::shared_ptr<StringArray> strings_;
};
TEST_F(TestStringContainer, TestArrayBasics) {
- ASSERT_EQ(length_, strings_.length());
- ASSERT_EQ(1, strings_.null_count());
+ ASSERT_EQ(length_, strings_->length());
+ ASSERT_EQ(1, strings_->null_count());
}
TEST_F(TestStringContainer, TestType) {
- TypePtr type = strings_.type();
+ TypePtr type = strings_->type();
- ASSERT_EQ(LogicalType::STRING, type->type);
- ASSERT_EQ(LogicalType::STRING, strings_.logical_type());
+ ASSERT_EQ(Type::STRING, type->type);
+ ASSERT_EQ(Type::STRING, strings_->type_enum());
}
TEST_F(TestStringContainer, TestListFunctions) {
int pos = 0;
for (size_t i = 0; i < expected_.size(); ++i) {
- ASSERT_EQ(pos, strings_.value_offset(i));
- ASSERT_EQ(expected_[i].size(), strings_.value_length(i));
+ ASSERT_EQ(pos, strings_->value_offset(i));
+ ASSERT_EQ(expected_[i].size(), strings_->value_length(i));
pos += expected_[i].size();
}
}
@@ -151,9 +149,9 @@ TEST_F(TestStringContainer, TestDestructor) {
TEST_F(TestStringContainer, TestGetString) {
for (size_t i = 0; i < expected_.size(); ++i) {
if (nulls_[i]) {
- ASSERT_TRUE(strings_.IsNull(i));
+ ASSERT_TRUE(strings_->IsNull(i));
} else {
- ASSERT_EQ(expected_[i], strings_.GetString(i));
+ ASSERT_EQ(expected_[i], strings_->GetString(i));
}
}
}
@@ -199,7 +197,7 @@ TEST_F(TestStringBuilder, TestScalarAppend) {
Done();
ASSERT_EQ(reps * N, result_->length());
- ASSERT_EQ(reps * null_count(is_null), result_->null_count());
+ ASSERT_EQ(reps * test::null_count(is_null), result_->null_count());
ASSERT_EQ(reps * 6, result_->values()->length());
int32_t length;
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h
index 8ccc0a9..2b3fba5 100644
--- a/cpp/src/arrow/types/string.h
+++ b/cpp/src/arrow/types/string.h
@@ -25,25 +25,21 @@
#include "arrow/array.h"
#include "arrow/type.h"
-#include "arrow/types/integer.h"
#include "arrow/types/list.h"
+#include "arrow/types/primitive.h"
#include "arrow/util/status.h"
namespace arrow {
-class ArrayBuilder;
class Buffer;
class MemoryPool;
struct CharType : public DataType {
int size;
- BytesType physical_type;
-
- explicit CharType(int size, bool nullable = true)
- : DataType(LogicalType::CHAR, nullable),
- size(size),
- physical_type(BytesType(size)) {}
+ explicit CharType(int size)
+ : DataType(Type::CHAR),
+ size(size) {}
CharType(const CharType& other)
: CharType(other.size) {}
@@ -56,54 +52,36 @@ struct CharType : public DataType {
struct VarcharType : public DataType {
int size;
- BytesType physical_type;
-
- explicit VarcharType(int size, bool nullable = true)
- : DataType(LogicalType::VARCHAR, nullable),
- size(size),
- physical_type(BytesType(size + 1)) {}
+ explicit VarcharType(int size)
+ : DataType(Type::VARCHAR),
+ size(size) {}
VarcharType(const VarcharType& other)
: VarcharType(other.size) {}
virtual std::string ToString() const;
};
-static const LayoutPtr byte1(new BytesType(1));
-static const LayoutPtr physical_string = LayoutPtr(new ListLayoutType(byte1));
-
// TODO: add a BinaryArray layer in between
class StringArray : public ListArray {
public:
- StringArray() : ListArray(), bytes_(nullptr), raw_bytes_(nullptr) {}
-
- StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
- const ArrayPtr& values,
- int32_t null_count = 0,
- const std::shared_ptr<Buffer>& nulls = nullptr) {
- Init(length, offsets, values, null_count, nulls);
- }
-
- void Init(const TypePtr& type, int32_t length,
+ StringArray(const TypePtr& type, int32_t length,
const std::shared_ptr<Buffer>& offsets,
const ArrayPtr& values,
int32_t null_count = 0,
- const std::shared_ptr<Buffer>& nulls = nullptr) {
- ListArray::Init(type, length, offsets, values, null_count, nulls);
-
- // TODO: type validation for values array
-
+ const std::shared_ptr<Buffer>& nulls = nullptr) :
+ ListArray(type, length, offsets, values, null_count, nulls) {
// For convenience
bytes_ = static_cast<UInt8Array*>(values.get());
raw_bytes_ = bytes_->raw_data();
}
- void Init(int32_t length, const std::shared_ptr<Buffer>& offsets,
+ StringArray(int32_t length,
+ const std::shared_ptr<Buffer>& offsets,
const ArrayPtr& values,
int32_t null_count = 0,
- const std::shared_ptr<Buffer>& nulls = nullptr) {
- TypePtr type(new StringType());
- Init(type, length, offsets, values, null_count, nulls);
- }
+ const std::shared_ptr<Buffer>& nulls = nullptr) :
+ StringArray(std::make_shared<StringType>(), length, offsets, values,
+ null_count, nulls) {}
// Compute the pointer t
const uint8_t* GetValue(int i, int32_t* out_length) const {
@@ -125,9 +103,6 @@ class StringArray : public ListArray {
};
// Array builder
-
-
-
class StringBuilder : public ListBuilder {
public:
explicit StringBuilder(MemoryPool* pool, const TypePtr& type) :
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc
index 9a4777e..d94396f 100644
--- a/cpp/src/arrow/types/struct-test.cc
+++ b/cpp/src/arrow/types/struct-test.cc
@@ -15,16 +15,13 @@
// specific language governing permissions and limitations
// under the License.
-#include <gtest/gtest.h>
-
#include <memory>
#include <string>
#include <vector>
+#include "gtest/gtest.h"
+
#include "arrow/type.h"
-#include "arrow/types/integer.h"
-#include "arrow/types/string.h"
-#include "arrow/types/struct.h"
using std::shared_ptr;
using std::string;
@@ -42,13 +39,13 @@ TEST(TestStructType, Basics) {
TypePtr f2_type = TypePtr(new UInt8Type());
auto f2 = std::make_shared<Field>("f2", f2_type);
- vector<shared_ptr<Field> > fields = {f0, f1, f2};
+ vector<shared_ptr<Field>> fields = {f0, f1, f2};
StructType struct_type(fields);
- ASSERT_TRUE(struct_type.field(0)->Equals(f0));
- ASSERT_TRUE(struct_type.field(1)->Equals(f1));
- ASSERT_TRUE(struct_type.field(2)->Equals(f2));
+ ASSERT_TRUE(struct_type.child(0)->Equals(f0));
+ ASSERT_TRUE(struct_type.child(1)->Equals(f1));
+ ASSERT_TRUE(struct_type.child(2)->Equals(f2));
ASSERT_EQ(struct_type.ToString(), "struct<f0: int32, f1: string, f2: uint8>");
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/test-common.h b/cpp/src/arrow/types/test-common.h
index 1744efc..227aca6 100644
--- a/cpp/src/arrow/types/test-common.h
+++ b/cpp/src/arrow/types/test-common.h
@@ -18,11 +18,12 @@
#ifndef ARROW_TYPES_TEST_COMMON_H
#define ARROW_TYPES_TEST_COMMON_H
-#include <gtest/gtest.h>
#include <memory>
#include <string>
#include <vector>
+#include "gtest/gtest.h"
+
#include "arrow/test-util.h"
#include "arrow/type.h"
#include "arrow/util/memory-pool.h"
@@ -34,7 +35,7 @@ namespace arrow {
class TestBuilder : public ::testing::Test {
public:
void SetUp() {
- pool_ = GetDefaultMemoryPool();
+ pool_ = default_memory_pool();
type_ = TypePtr(new UInt8Type());
builder_.reset(new UInt8Builder(pool_, type_));
builder_nn_.reset(new UInt8Builder(pool_, type_));
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/union.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/union.h b/cpp/src/arrow/types/union.h
index 9aff780..29cda90 100644
--- a/cpp/src/arrow/types/union.h
+++ b/cpp/src/arrow/types/union.h
@@ -30,8 +30,8 @@ namespace arrow {
class Buffer;
-struct DenseUnionType : public CollectionType<LogicalType::DENSE_UNION> {
- typedef CollectionType<LogicalType::DENSE_UNION> Base;
+struct DenseUnionType : public CollectionType<Type::DENSE_UNION> {
+ typedef CollectionType<Type::DENSE_UNION> Base;
explicit DenseUnionType(const std::vector<TypePtr>& child_types) :
Base() {
@@ -42,8 +42,8 @@ struct DenseUnionType : public CollectionType<LogicalType::DENSE_UNION> {
};
-struct SparseUnionType : public CollectionType<LogicalType::SPARSE_UNION> {
- typedef CollectionType<LogicalType::SPARSE_UNION> Base;
+struct SparseUnionType : public CollectionType<Type::SPARSE_UNION> {
+ typedef CollectionType<Type::SPARSE_UNION> Base;
explicit SparseUnionType(const std::vector<TypePtr>& child_types) :
Base() {
@@ -55,28 +55,20 @@ struct SparseUnionType : public CollectionType<LogicalType::SPARSE_UNION> {
class UnionArray : public Array {
- public:
- UnionArray() : Array() {}
-
protected:
// The data are types encoded as int16
Buffer* types_;
- std::vector<std::shared_ptr<Array> > children_;
+ std::vector<std::shared_ptr<Array>> children_;
};
class DenseUnionArray : public UnionArray {
- public:
- DenseUnionArray() : UnionArray() {}
-
protected:
Buffer* offset_buf_;
};
class SparseUnionArray : public UnionArray {
- public:
- SparseUnionArray() : UnionArray() {}
};
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc
index 7506ca5..220bff0 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -15,10 +15,10 @@
// specific language governing permissions and limitations
// under the License.
-#include <gtest/gtest.h>
-
#include "arrow/util/bit-util.h"
+#include "gtest/gtest.h"
+
namespace arrow {
TEST(UtilTests, TestNextPower2) {
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 5e7197f..1d2d1d5 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -19,7 +19,6 @@
#define ARROW_UTIL_BIT_UTIL_H
#include <cstdint>
-#include <cstdlib>
#include <memory>
namespace arrow {
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/buffer-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer-test.cc b/cpp/src/arrow/util/buffer-test.cc
index 9f1fd91..1d58226 100644
--- a/cpp/src/arrow/util/buffer-test.cc
+++ b/cpp/src/arrow/util/buffer-test.cc
@@ -15,11 +15,12 @@
// specific language governing permissions and limitations
// under the License.
-#include <gtest/gtest.h>
#include <cstdint>
#include <limits>
#include <string>
+#include "gtest/gtest.h"
+
#include "arrow/test-util.h"
#include "arrow/util/buffer.h"
#include "arrow/util/status.h"
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc
index 50f4716..04cdcd7 100644
--- a/cpp/src/arrow/util/buffer.cc
+++ b/cpp/src/arrow/util/buffer.cc
@@ -40,7 +40,7 @@ std::shared_ptr<Buffer> MutableBuffer::GetImmutableView() {
PoolBuffer::PoolBuffer(MemoryPool* pool) :
ResizableBuffer(nullptr, 0) {
if (pool == nullptr) {
- pool = GetDefaultMemoryPool();
+ pool = default_memory_pool();
}
pool_ = pool;
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/memory-pool-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool-test.cc b/cpp/src/arrow/util/memory-pool-test.cc
index 954b5f9..6ef07a0 100644
--- a/cpp/src/arrow/util/memory-pool-test.cc
+++ b/cpp/src/arrow/util/memory-pool-test.cc
@@ -15,10 +15,11 @@
// specific language governing permissions and limitations
// under the License.
-#include <gtest/gtest.h>
#include <cstdint>
#include <limits>
+#include "gtest/gtest.h"
+
#include "arrow/test-util.h"
#include "arrow/util/memory-pool.h"
#include "arrow/util/status.h"
@@ -26,7 +27,7 @@
namespace arrow {
TEST(DefaultMemoryPool, MemoryTracking) {
- MemoryPool* pool = GetDefaultMemoryPool();
+ MemoryPool* pool = default_memory_pool();
uint8_t* data;
ASSERT_OK(pool->Allocate(100, &data));
@@ -37,7 +38,7 @@ TEST(DefaultMemoryPool, MemoryTracking) {
}
TEST(DefaultMemoryPool, OOM) {
- MemoryPool* pool = GetDefaultMemoryPool();
+ MemoryPool* pool = default_memory_pool();
uint8_t* data;
int64_t to_alloc = std::numeric_limits<int64_t>::max();
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/memory-pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool.cc b/cpp/src/arrow/util/memory-pool.cc
index 5820346..0b885e9 100644
--- a/cpp/src/arrow/util/memory-pool.cc
+++ b/cpp/src/arrow/util/memory-pool.cc
@@ -70,9 +70,9 @@ void InternalMemoryPool::Free(uint8_t* buffer, int64_t size) {
InternalMemoryPool::~InternalMemoryPool() {}
-MemoryPool* GetDefaultMemoryPool() {
- static InternalMemoryPool default_memory_pool;
- return &default_memory_pool;
+MemoryPool* default_memory_pool() {
+ static InternalMemoryPool default_memory_pool_;
+ return &default_memory_pool_;
}
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/memory-pool.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool.h b/cpp/src/arrow/util/memory-pool.h
index a7cb10d..0d24786 100644
--- a/cpp/src/arrow/util/memory-pool.h
+++ b/cpp/src/arrow/util/memory-pool.h
@@ -34,7 +34,7 @@ class MemoryPool {
virtual int64_t bytes_allocated() const = 0;
};
-MemoryPool* GetDefaultMemoryPool();
+MemoryPool* default_memory_pool();
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/status.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/status.cc b/cpp/src/arrow/util/status.cc
index c6e113e..43cb87e 100644
--- a/cpp/src/arrow/util/status.cc
+++ b/cpp/src/arrow/util/status.cc
@@ -54,6 +54,9 @@ std::string Status::CodeAsString() const {
case StatusCode::Invalid:
type = "Invalid";
break;
+ case StatusCode::IOError:
+ type = "IOError";
+ break;
case StatusCode::NotImplemented:
type = "NotImplemented";
break;
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/status.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/status.h b/cpp/src/arrow/util/status.h
index 47fda40..b593123 100644
--- a/cpp/src/arrow/util/status.h
+++ b/cpp/src/arrow/util/status.h
@@ -63,6 +63,7 @@ enum class StatusCode: char {
OutOfMemory = 1,
KeyError = 2,
Invalid = 3,
+ IOError = 4,
NotImplemented = 10,
};
@@ -97,12 +98,17 @@ class Status {
return Status(StatusCode::Invalid, msg, -1);
}
+ static Status IOError(const std::string& msg) {
+ return Status(StatusCode::IOError, msg, -1);
+ }
+
// Returns true iff the status indicates success.
bool ok() const { return (state_ == NULL); }
bool IsOutOfMemory() const { return code() == StatusCode::OutOfMemory; }
bool IsKeyError() const { return code() == StatusCode::KeyError; }
bool IsInvalid() const { return code() == StatusCode::Invalid; }
+ bool IsIOError() const { return code() == StatusCode::IOError; }
// Return a string representation of this status suitable for printing.
// Returns the string "OK" for success.
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/test_main.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/test_main.cc b/cpp/src/arrow/util/test_main.cc
index 00139f3..adc8466 100644
--- a/cpp/src/arrow/util/test_main.cc
+++ b/cpp/src/arrow/util/test_main.cc
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-#include <gtest/gtest.h>
+#include "gtest/gtest.h"
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/thirdparty/build_thirdparty.sh
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/build_thirdparty.sh b/cpp/thirdparty/build_thirdparty.sh
index 294737c..3d5f532 100755
--- a/cpp/thirdparty/build_thirdparty.sh
+++ b/cpp/thirdparty/build_thirdparty.sh
@@ -17,6 +17,7 @@ else
case $arg in
"gtest") F_GTEST=1 ;;
"gbenchmark") F_GBENCHMARK=1 ;;
+ "flatbuffers") F_FLATBUFFERS=1 ;;
*) echo "Unknown module: $arg"; exit 1 ;;
esac
done
@@ -78,6 +79,14 @@ if [ -n "$F_ALL" -o -n "$F_GBENCHMARK" ]; then
make VERBOSE=1 install || { echo "make $GBENCHMARK_ERROR" ; exit 1; }
fi
+FLATBUFFERS_ERROR="failed for flatbuffers"
+if [ -n "$F_ALL" -o -n "$F_FLATBUFFERS" ]; then
+ cd $TP_DIR/$FLATBUFFERS_BASEDIR
+
+ CXXFLAGS=-fPIC cmake -DCMAKE_INSTALL_PREFIX:PATH=$PREFIX -DFLATBUFFERS_BUILD_TESTS=OFF . || { echo "cmake $FLATBUFFERS_ERROR" ; exit 1; }
+ make -j$PARALLEL
+ make install
+fi
echo "---------------------"
echo "Thirdparty dependencies built and installed into $PREFIX successfully"
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/thirdparty/download_thirdparty.sh
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/download_thirdparty.sh b/cpp/thirdparty/download_thirdparty.sh
index d22c559..d299afc 100755
--- a/cpp/thirdparty/download_thirdparty.sh
+++ b/cpp/thirdparty/download_thirdparty.sh
@@ -25,3 +25,8 @@ if [ ! -d ${GBENCHMARK_BASEDIR} ]; then
echo "Fetching google benchmark"
download_extract_and_cleanup $GBENCHMARK_URL
fi
+
+if [ ! -d ${FLATBUFFERS_BASEDIR} ]; then
+ echo "Fetching flatbuffers"
+ download_extract_and_cleanup $FLATBUFFERS_URL
+fi
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/thirdparty/versions.sh
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/versions.sh b/cpp/thirdparty/versions.sh
index 9cfc7cd..cb455b4 100755
--- a/cpp/thirdparty/versions.sh
+++ b/cpp/thirdparty/versions.sh
@@ -5,3 +5,7 @@ GTEST_BASEDIR=googletest-release-$GTEST_VERSION
GBENCHMARK_VERSION=1.0.0
GBENCHMARK_URL="https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
GBENCHMARK_BASEDIR=benchmark-$GBENCHMARK_VERSION
+
+FLATBUFFERS_VERSION=1.3.0
+FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz"
+FLATBUFFERS_BASEDIR=flatbuffers-$FLATBUFFERS_VERSION
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/format/Message.fbs
----------------------------------------------------------------------
diff --git a/format/Message.fbs b/format/Message.fbs
new file mode 100644
index 0000000..3ffd203
--- /dev/null
+++ b/format/Message.fbs
@@ -0,0 +1,183 @@
+namespace apache.arrow.flatbuf;
+
+/// ----------------------------------------------------------------------
+/// Logical types and their metadata (if any)
+///
+/// These are stored in the flatbuffer in the Type union below
+
+/// A Tuple in the flatbuffer metadata is the same as an Arrow Struct
+/// (according to the physical memory layout). We used Tuple here as Struct is
+/// a reserved word in Flatbuffers
+table Tuple {
+}
+
+table List {
+}
+
+enum UnionMode:int { Sparse, Dense }
+
+table Union {
+ mode: UnionMode;
+}
+
+table Bit {
+}
+
+table Int {
+ bitWidth: int; // 1 to 64
+ is_signed: bool;
+}
+
+enum Precision:int {SINGLE, DOUBLE}
+
+table FloatingPoint {
+ precision: Precision;
+}
+
+table Utf8 {
+}
+
+table Binary {
+}
+
+table Bool {
+}
+
+table Decimal {
+ precision: int;
+ scale: int;
+}
+
+table Timestamp {
+ timezone: string;
+}
+
+table JSONScalar {
+ dense:bool=true;
+}
+
+/// ----------------------------------------------------------------------
+/// Top-level Type value, enabling extensible type-specific metadata. We can
+/// add new logical types to Type without breaking backwards compatibility
+
+union Type {
+ Int,
+ Bit,
+ FloatingPoint,
+ Binary,
+ Utf8,
+ Bool,
+ Decimal,
+ Timestamp,
+ List,
+ Tuple,
+ Union,
+ JSONScalar
+}
+
+/// ----------------------------------------------------------------------
+/// A field represents a named column in a record / row batch or child of a
+/// nested type.
+///
+/// - children is only for nested Arrow arrays
+/// - For primitive types, children will have length 0
+/// - nullable should default to true in general
+
+table Field {
+ // Name is not required, in i.e. a List
+ name: string;
+ nullable: bool;
+ type: Type;
+ children: [Field];
+}
+
+/// ----------------------------------------------------------------------
+/// A Schema describes the columns in a row batch
+
+table Schema {
+ fields: [Field];
+}
+
+/// ----------------------------------------------------------------------
+/// Data structures for describing a table row batch (a collection of
+/// equal-length Arrow arrays)
+
+/// A Buffer represents a single contiguous memory segment
+struct Buffer {
+ /// The shared memory page id where this buffer is located. Currently this is
+ /// not used
+ page: int;
+
+ /// The relative offset into the shared memory page where the bytes for this
+ /// buffer starts
+ offset: long;
+
+ /// The absolute length (in bytes) of the memory buffer. The memory is found
+ /// from offset (inclusive) to offset + length (non-inclusive).
+ length: long;
+}
+
+/// Metadata about a field at some level of a nested type tree (but not
+/// its children).
+///
+/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
+/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
+/// null_count: 0} for its Int16 node, as separate FieldNode structs
+struct FieldNode {
+ /// The number of value slots in the Arrow array at this level of a nested
+ /// tree
+ length: int;
+
+ /// The number of observed nulls. Fields with null_count == 0 may choose not
+ /// to write their physical null bitmap out as a materialized buffer, instead
+ /// setting the length of the null buffer to 0.
+ null_count: int;
+}
+
+/// A data header describing the shared memory layout of a "record" or "row"
+/// batch. Some systems call this a "row batch" internally and others a "record
+/// batch".
+table RecordBatch {
+ /// number of records / rows. The arrays in the batch should all have this
+ /// length
+ length: int;
+
+ /// Nodes correspond to the pre-ordered flattened logical schema
+ nodes: [FieldNode];
+
+ /// Buffers correspond to the pre-ordered flattened buffer tree
+ ///
+ /// The number of buffers appended to this list depends on the schema. For
+ /// example, most primitive arrays will have 2 buffers, 1 for the null bitmap
+ /// and 1 for the values. For struct arrays, there will only be a single
+ /// buffer for the null bitmap
+ buffers: [Buffer];
+}
+
+/// ----------------------------------------------------------------------
+/// For sending dictionary encoding information. Any Field can be
+/// dictionary-encoded, but in this case none of its children may be
+/// dictionary-encoded.
+///
+/// TODO(wesm): To be documented in more detail
+
+table DictionaryBatch {
+ id: long;
+ data: RecordBatch;
+}
+
+/// ----------------------------------------------------------------------
+/// The root Message type
+
+/// This union enables us to easily send different message types without
+/// redundant storage, and in the future we can easily add new message types.
+union MessageHeader {
+ Schema, DictionaryBatch, RecordBatch
+}
+
+table Message {
+ header: MessageHeader;
+ bodyLength: long;
+}
+
+root_type Message;
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 8d93a15..9a08070 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -35,4 +35,6 @@ from pyarrow.schema import (null, bool_,
uint8, uint16, uint32, uint64,
float_, double, string,
list_, struct, field,
- DataType, Field, Schema)
+ DataType, Field, Schema, schema)
+
+from pyarrow.array import RowBatch
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd
index d0d3486..de3c774 100644
--- a/python/pyarrow/array.pxd
+++ b/python/pyarrow/array.pxd
@@ -16,7 +16,7 @@
# under the License.
from pyarrow.includes.common cimport shared_ptr
-from pyarrow.includes.libarrow cimport CArray, LogicalType
+from pyarrow.includes.libarrow cimport CArray
from pyarrow.scalar import NA
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
index bceb333..c5d40dd 100644
--- a/python/pyarrow/array.pyx
+++ b/python/pyarrow/array.pyx
@@ -28,6 +28,9 @@ from pyarrow.error cimport check_status
cimport pyarrow.scalar as scalar
from pyarrow.scalar import NA
+from pyarrow.schema cimport Schema
+import pyarrow.schema as schema
+
def total_allocated_bytes():
cdef MemoryPool* pool = pyarrow.GetMemoryPool()
return pool.bytes_allocated()
@@ -155,12 +158,12 @@ cdef class StringArray(Array):
cdef dict _array_classes = {
- LogicalType_NA: NullArray,
- LogicalType_BOOL: BooleanArray,
- LogicalType_INT64: Int64Array,
- LogicalType_DOUBLE: DoubleArray,
- LogicalType_LIST: ListArray,
- LogicalType_STRING: StringArray,
+ Type_NA: NullArray,
+ Type_BOOL: BooleanArray,
+ Type_INT64: Int64Array,
+ Type_DOUBLE: DoubleArray,
+ Type_LIST: ListArray,
+ Type_STRING: StringArray,
}
cdef object box_arrow_array(const shared_ptr[CArray]& sp_array):
@@ -190,3 +193,35 @@ def from_pylist(object list_obj, DataType type=None):
raise NotImplementedError
return box_arrow_array(sp_array)
+
+#----------------------------------------------------------------------
+# Table-like data structures
+
+cdef class RowBatch:
+ """
+
+ """
+ cdef readonly:
+ Schema schema
+ int num_rows
+ list arrays
+
+ def __cinit__(self, Schema schema, int num_rows, list arrays):
+ self.schema = schema
+ self.num_rows = num_rows
+ self.arrays = arrays
+
+ if len(self.schema) != len(arrays):
+ raise ValueError('Mismatch number of data arrays and '
+ 'schema fields')
+
+ def __len__(self):
+ return self.num_rows
+
+ property num_columns:
+
+ def __get__(self):
+ return len(self.arrays)
+
+ def __getitem__(self, i):
+ return self.arrays[i]
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index baba112..e6afcbd 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -21,31 +21,30 @@ from pyarrow.includes.common cimport *
cdef extern from "arrow/api.h" namespace "arrow" nogil:
- enum LogicalType" arrow::LogicalType::type":
- LogicalType_NA" arrow::LogicalType::NA"
+ enum Type" arrow::Type::type":
+ Type_NA" arrow::Type::NA"
- LogicalType_BOOL" arrow::LogicalType::BOOL"
+ Type_BOOL" arrow::Type::BOOL"
- LogicalType_UINT8" arrow::LogicalType::UINT8"
- LogicalType_INT8" arrow::LogicalType::INT8"
- LogicalType_UINT16" arrow::LogicalType::UINT16"
- LogicalType_INT16" arrow::LogicalType::INT16"
- LogicalType_UINT32" arrow::LogicalType::UINT32"
- LogicalType_INT32" arrow::LogicalType::INT32"
- LogicalType_UINT64" arrow::LogicalType::UINT64"
- LogicalType_INT64" arrow::LogicalType::INT64"
+ Type_UINT8" arrow::Type::UINT8"
+ Type_INT8" arrow::Type::INT8"
+ Type_UINT16" arrow::Type::UINT16"
+ Type_INT16" arrow::Type::INT16"
+ Type_UINT32" arrow::Type::UINT32"
+ Type_INT32" arrow::Type::INT32"
+ Type_UINT64" arrow::Type::UINT64"
+ Type_INT64" arrow::Type::INT64"
- LogicalType_FLOAT" arrow::LogicalType::FLOAT"
- LogicalType_DOUBLE" arrow::LogicalType::DOUBLE"
+ Type_FLOAT" arrow::Type::FLOAT"
+ Type_DOUBLE" arrow::Type::DOUBLE"
- LogicalType_STRING" arrow::LogicalType::STRING"
+ Type_STRING" arrow::Type::STRING"
- LogicalType_LIST" arrow::LogicalType::LIST"
- LogicalType_STRUCT" arrow::LogicalType::STRUCT"
+ Type_LIST" arrow::Type::LIST"
+ Type_STRUCT" arrow::Type::STRUCT"
cdef cppclass CDataType" arrow::DataType":
- LogicalType type
- c_bool nullable
+ Type type
c_bool Equals(const CDataType* other)
@@ -55,8 +54,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
int64_t bytes_allocated()
cdef cppclass CListType" arrow::ListType"(CDataType):
- CListType(const shared_ptr[CDataType]& value_type,
- c_bool nullable)
+ CListType(const shared_ptr[CDataType]& value_type)
cdef cppclass CStringType" arrow::StringType"(CDataType):
pass
@@ -65,21 +63,26 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_string name
shared_ptr[CDataType] type
- CField(const c_string& name, const shared_ptr[CDataType]& type)
+ c_bool nullable
+
+ CField(const c_string& name, const shared_ptr[CDataType]& type,
+ c_bool nullable)
cdef cppclass CStructType" arrow::StructType"(CDataType):
- CStructType(const vector[shared_ptr[CField]]& fields,
- c_bool nullable)
+ CStructType(const vector[shared_ptr[CField]]& fields)
cdef cppclass CSchema" arrow::Schema":
- CSchema(const shared_ptr[CField]& fields)
+ CSchema(const vector[shared_ptr[CField]]& fields)
+ const shared_ptr[CField]& field(int i)
+ int num_fields()
+ c_string ToString()
cdef cppclass CArray" arrow::Array":
const shared_ptr[CDataType]& type()
int32_t length()
int32_t null_count()
- LogicalType logical_type()
+ Type type_enum()
c_bool IsNull(int i)
@@ -122,3 +125,57 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CStringArray" arrow::StringArray"(CListArray):
c_string GetString(int i)
+
+
+cdef extern from "arrow/api.h" namespace "arrow" nogil:
+ # We can later add more of the common status factory methods as needed
+ cdef CStatus CStatus_OK "Status::OK"()
+
+ cdef cppclass CStatus "arrow::Status":
+ CStatus()
+
+ c_string ToString()
+
+ c_bool ok()
+ c_bool IsOutOfMemory()
+ c_bool IsKeyError()
+ c_bool IsNotImplemented()
+ c_bool IsInvalid()
+
+ cdef cppclass Buffer:
+ uint8_t* data()
+ int64_t size()
+
+
+cdef extern from "arrow/ipc/metadata.h" namespace "arrow::ipc" nogil:
+ cdef cppclass SchemaMessage:
+ int num_fields()
+ CStatus GetField(int i, shared_ptr[CField]* out)
+ CStatus GetSchema(shared_ptr[CSchema]* out)
+
+ cdef cppclass FieldMetadata:
+ pass
+
+ cdef cppclass BufferMetadata:
+ pass
+
+ cdef cppclass RecordBatchMessage:
+ pass
+
+ cdef cppclass DictionaryBatchMessage:
+ pass
+
+ enum MessageType" arrow::ipc::Message::Type":
+ MessageType_SCHEMA" arrow::ipc::Message::SCHEMA"
+ MessageType_RECORD_BATCH" arrow::ipc::Message::RECORD_BATCH"
+ MessageType_DICTIONARY_BATCH" arrow::ipc::Message::DICTIONARY_BATCH"
+
+ cdef cppclass Message:
+ CStatus Open(const shared_ptr[Buffer]& buf,
+ shared_ptr[Message]* out)
+ int64_t body_length()
+ MessageType type()
+
+ shared_ptr[SchemaMessage] GetSchema()
+ shared_ptr[RecordBatchMessage] GetRecordBatch()
+ shared_ptr[DictionaryBatchMessage] GetDictionaryBatch()
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd
index 9a0c004..eedfc85 100644
--- a/python/pyarrow/includes/pyarrow.pxd
+++ b/python/pyarrow/includes/pyarrow.pxd
@@ -18,8 +18,7 @@
# distutils: language = c++
from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport (CArray, CDataType, LogicalType,
- MemoryPool)
+from pyarrow.includes.libarrow cimport CArray, CDataType, Type, MemoryPool
cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
# We can later add more of the common status factory methods as needed
@@ -39,7 +38,7 @@ cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
c_bool IsNotImplemented()
c_bool IsArrowError()
- shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable)
+ shared_ptr[CDataType] GetPrimitiveType(Type type)
Status ConvertPySequence(object obj, shared_ptr[CArray]* out)
MemoryPool* GetMemoryPool()
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
index 261a389..04f013d 100644
--- a/python/pyarrow/scalar.pyx
+++ b/python/pyarrow/scalar.pyx
@@ -172,18 +172,18 @@ cdef class ListValue(ArrayValue):
cdef dict _scalar_classes = {
- LogicalType_UINT8: Int8Value,
- LogicalType_UINT16: Int16Value,
- LogicalType_UINT32: Int32Value,
- LogicalType_UINT64: Int64Value,
- LogicalType_INT8: Int8Value,
- LogicalType_INT16: Int16Value,
- LogicalType_INT32: Int32Value,
- LogicalType_INT64: Int64Value,
- LogicalType_FLOAT: FloatValue,
- LogicalType_DOUBLE: DoubleValue,
- LogicalType_LIST: ListValue,
- LogicalType_STRING: StringValue
+ Type_UINT8: Int8Value,
+ Type_UINT16: Int16Value,
+ Type_UINT32: Int32Value,
+ Type_UINT64: Int64Value,
+ Type_INT8: Int8Value,
+ Type_INT16: Int16Value,
+ Type_INT32: Int32Value,
+ Type_INT64: Int64Value,
+ Type_FLOAT: FloatValue,
+ Type_DOUBLE: DoubleValue,
+ Type_LIST: ListValue,
+ Type_STRING: StringValue
}
cdef object box_arrow_scalar(DataType type,
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/schema.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pxd b/python/pyarrow/schema.pxd
index 07b9bd0..61458b7 100644
--- a/python/pyarrow/schema.pxd
+++ b/python/pyarrow/schema.pxd
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-from pyarrow.includes.common cimport shared_ptr
+from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport CDataType, CField, CSchema
cdef class DataType:
@@ -33,9 +33,13 @@ cdef class Field:
cdef readonly:
DataType type
+ cdef init(self, const shared_ptr[CField]& field)
+
cdef class Schema:
cdef:
shared_ptr[CSchema] sp_schema
CSchema* schema
+ cdef init(self, const vector[shared_ptr[CField]]& fields)
+
cdef DataType box_data_type(const shared_ptr[CDataType]& type)
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
index ea87872..b3bf02a 100644
--- a/python/pyarrow/schema.pyx
+++ b/python/pyarrow/schema.pyx
@@ -54,94 +54,153 @@ cdef class DataType:
cdef class Field:
- def __cinit__(self, object name, DataType type):
- self.type = type
- self.sp_field.reset(new CField(tobytes(name), type.sp_type))
- self.field = self.sp_field.get()
+ def __cinit__(self):
+ pass
+
+ cdef init(self, const shared_ptr[CField]& field):
+ self.sp_field = field
+ self.field = field.get()
+
+ @classmethod
+ def from_py(cls, object name, DataType type, bint nullable=True):
+ cdef Field result = Field()
+ result.type = type
+ result.sp_field.reset(new CField(tobytes(name), type.sp_type,
+ nullable))
+ result.field = result.sp_field.get()
+
+ return result
def __repr__(self):
return 'Field({0!r}, type={1})'.format(self.name, str(self.type))
+ property nullable:
+
+ def __get__(self):
+ return self.field.nullable
+
property name:
def __get__(self):
return frombytes(self.field.name)
+cdef class Schema:
+
+ def __cinit__(self):
+ pass
+
+ def __len__(self):
+ return self.schema.num_fields()
+
+ def __getitem__(self, i):
+ if i < 0 or i >= len(self):
+ raise IndexError("{0} is out of bounds".format(i))
+
+ cdef Field result = Field()
+ result.init(self.schema.field(i))
+ result.type = box_data_type(result.field.type)
+
+ return result
+
+ cdef init(self, const vector[shared_ptr[CField]]& fields):
+ self.schema = new CSchema(fields)
+ self.sp_schema.reset(self.schema)
+
+ @classmethod
+ def from_fields(cls, fields):
+ cdef:
+ Schema result
+ Field field
+ vector[shared_ptr[CField]] c_fields
+
+ c_fields.resize(len(fields))
+
+ for i in range(len(fields)):
+ field = fields[i]
+ c_fields[i] = field.sp_field
+
+ result = Schema()
+ result.init(c_fields)
+
+ return result
+
+ def __repr__(self):
+ return frombytes(self.schema.ToString())
+
cdef dict _type_cache = {}
-cdef DataType primitive_type(LogicalType type, bint nullable=True):
- if (type, nullable) in _type_cache:
- return _type_cache[type, nullable]
+cdef DataType primitive_type(Type type):
+ if type in _type_cache:
+ return _type_cache[type]
cdef DataType out = DataType()
- out.init(pyarrow.GetPrimitiveType(type, nullable))
+ out.init(pyarrow.GetPrimitiveType(type))
- _type_cache[type, nullable] = out
+ _type_cache[type] = out
return out
#------------------------------------------------------------
# Type factory functions
-def field(name, type):
- return Field(name, type)
+def field(name, type, bint nullable=True):
+ return Field.from_py(name, type, nullable)
cdef set PRIMITIVE_TYPES = set([
- LogicalType_NA, LogicalType_BOOL,
- LogicalType_UINT8, LogicalType_INT8,
- LogicalType_UINT16, LogicalType_INT16,
- LogicalType_UINT32, LogicalType_INT32,
- LogicalType_UINT64, LogicalType_INT64,
- LogicalType_FLOAT, LogicalType_DOUBLE])
+ Type_NA, Type_BOOL,
+ Type_UINT8, Type_INT8,
+ Type_UINT16, Type_INT16,
+ Type_UINT32, Type_INT32,
+ Type_UINT64, Type_INT64,
+ Type_FLOAT, Type_DOUBLE])
def null():
- return primitive_type(LogicalType_NA)
+ return primitive_type(Type_NA)
-def bool_(c_bool nullable=True):
- return primitive_type(LogicalType_BOOL, nullable)
+def bool_():
+ return primitive_type(Type_BOOL)
-def uint8(c_bool nullable=True):
- return primitive_type(LogicalType_UINT8, nullable)
+def uint8():
+ return primitive_type(Type_UINT8)
-def int8(c_bool nullable=True):
- return primitive_type(LogicalType_INT8, nullable)
+def int8():
+ return primitive_type(Type_INT8)
-def uint16(c_bool nullable=True):
- return primitive_type(LogicalType_UINT16, nullable)
+def uint16():
+ return primitive_type(Type_UINT16)
-def int16(c_bool nullable=True):
- return primitive_type(LogicalType_INT16, nullable)
+def int16():
+ return primitive_type(Type_INT16)
-def uint32(c_bool nullable=True):
- return primitive_type(LogicalType_UINT32, nullable)
+def uint32():
+ return primitive_type(Type_UINT32)
-def int32(c_bool nullable=True):
- return primitive_type(LogicalType_INT32, nullable)
+def int32():
+ return primitive_type(Type_INT32)
-def uint64(c_bool nullable=True):
- return primitive_type(LogicalType_UINT64, nullable)
+def uint64():
+ return primitive_type(Type_UINT64)
-def int64(c_bool nullable=True):
- return primitive_type(LogicalType_INT64, nullable)
+def int64():
+ return primitive_type(Type_INT64)
-def float_(c_bool nullable=True):
- return primitive_type(LogicalType_FLOAT, nullable)
+def float_():
+ return primitive_type(Type_FLOAT)
-def double(c_bool nullable=True):
- return primitive_type(LogicalType_DOUBLE, nullable)
+def double():
+ return primitive_type(Type_DOUBLE)
-def string(c_bool nullable=True):
+def string():
"""
UTF8 string
"""
- return primitive_type(LogicalType_STRING, nullable)
+ return primitive_type(Type_STRING)
-def list_(DataType value_type, c_bool nullable=True):
+def list_(DataType value_type):
cdef DataType out = DataType()
- out.init(shared_ptr[CDataType](
- new CListType(value_type.sp_type, nullable)))
+ out.init(shared_ptr[CDataType](new CListType(value_type.sp_type)))
return out
-def struct(fields, c_bool nullable=True):
+def struct(fields):
"""
"""
@@ -154,9 +213,11 @@ def struct(fields, c_bool nullable=True):
c_fields.push_back(field.sp_field)
out.init(shared_ptr[CDataType](
- new CStructType(c_fields, nullable)))
+ new CStructType(c_fields)))
return out
+def schema(fields):
+ return Schema.from_fields(fields)
cdef DataType box_data_type(const shared_ptr[CDataType]& type):
cdef DataType out = DataType()
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/tests/test_schema.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 0235526..2894ea8 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -18,6 +18,8 @@
from pyarrow.compat import unittest
import pyarrow as arrow
+A = arrow
+
class TestTypes(unittest.TestCase):
@@ -28,15 +30,12 @@ class TestTypes(unittest.TestCase):
for name in dtypes:
factory = getattr(arrow, name)
t = factory()
- t_required = factory(False)
-
assert str(t) == name
- assert str(t_required) == '{0} not null'.format(name)
def test_list(self):
value_type = arrow.int32()
list_type = arrow.list_(value_type)
- assert str(list_type) == 'list<int32>'
+ assert str(list_type) == 'list<item: int32>'
def test_string(self):
t = arrow.string()
@@ -47,5 +46,26 @@ class TestTypes(unittest.TestCase):
f = arrow.field('foo', t)
assert f.name == 'foo'
+ assert f.nullable
assert f.type is t
assert repr(f) == "Field('foo', type=string)"
+
+ f = arrow.field('foo', t, False)
+ assert not f.nullable
+
+ def test_schema(self):
+ fields = [
+ A.field('foo', A.int32()),
+ A.field('bar', A.string()),
+ A.field('baz', A.list_(A.int8()))
+ ]
+ sch = A.schema(fields)
+
+ assert len(sch) == 3
+ assert sch[0].name == 'foo'
+ assert sch[0].type == fields[0].type
+
+ assert repr(sch) == """\
+foo: int32
+bar: string
+baz: list<item: int8>"""
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/tests/test_table.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
new file mode 100644
index 0000000..2e24445
--- /dev/null
+++ b/python/pyarrow/tests/test_table.py
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest
+import pyarrow as arrow
+
+A = arrow
+
+
+class TestRowBatch(unittest.TestCase):
+
+ def test_basics(self):
+ data = [
+ A.from_pylist(range(5)),
+ A.from_pylist([-10, -5, 0, 5, 10])
+ ]
+ num_rows = 5
+
+ descr = A.schema([A.field('c0', data[0].type),
+ A.field('c1', data[1].type)])
+
+ batch = A.RowBatch(descr, num_rows, data)
+
+ assert len(batch) == num_rows
+ assert batch.num_rows == num_rows
+ assert batch.num_columns == len(data)
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/src/pyarrow/adapters/builtin.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc
index bb79052..acb13ac 100644
--- a/python/src/pyarrow/adapters/builtin.cc
+++ b/python/src/pyarrow/adapters/builtin.cc
@@ -27,7 +27,7 @@
using arrow::ArrayBuilder;
using arrow::DataType;
-using arrow::LogicalType;
+using arrow::Type;
namespace pyarrow {
@@ -356,17 +356,17 @@ class ListConverter : public TypedConverter<arrow::ListBuilder> {
// Dynamic constructor for sequence converters
std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) {
switch (type->type) {
- case LogicalType::BOOL:
+ case Type::BOOL:
return std::make_shared<BoolConverter>();
- case LogicalType::INT64:
+ case Type::INT64:
return std::make_shared<Int64Converter>();
- case LogicalType::DOUBLE:
+ case Type::DOUBLE:
return std::make_shared<DoubleConverter>();
- case LogicalType::STRING:
+ case Type::STRING:
return std::make_shared<StringConverter>();
- case LogicalType::LIST:
+ case Type::LIST:
return std::make_shared<ListConverter>();
- case LogicalType::STRUCT:
+ case Type::STRUCT:
default:
return nullptr;
break;
@@ -378,7 +378,7 @@ Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
typed_builder_ = static_cast<arrow::ListBuilder*>(builder.get());
value_converter_ = GetConverter(static_cast<arrow::ListType*>(
- builder->type().get())->value_type);
+ builder->type().get())->value_type());
if (value_converter_ == nullptr) {
return Status::NotImplemented("value type not implemented");
}
@@ -393,8 +393,8 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out) {
PY_RETURN_NOT_OK(InferArrowType(obj, &size, &type));
// Handle NA / NullType case
- if (type->type == LogicalType::NA) {
- out->reset(new arrow::Array(type, size, size));
+ if (type->type == Type::NA) {
+ out->reset(new arrow::NullArray(type, size));
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/src/pyarrow/helpers.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/helpers.cc b/python/src/pyarrow/helpers.cc
index 0921fc4..08003aa 100644
--- a/python/src/pyarrow/helpers.cc
+++ b/python/src/pyarrow/helpers.cc
@@ -37,19 +37,14 @@ const std::shared_ptr<FloatType> FLOAT = std::make_shared<FloatType>();
const std::shared_ptr<DoubleType> DOUBLE = std::make_shared<DoubleType>();
const std::shared_ptr<StringType> STRING = std::make_shared<StringType>();
-#define GET_PRIMITIVE_TYPE(NAME, Type) \
- case LogicalType::NAME: \
- if (nullable) { \
- return NAME; \
- } else { \
- return std::make_shared<Type>(nullable); \
- } \
+#define GET_PRIMITIVE_TYPE(NAME, Class) \
+ case Type::NAME: \
+ return NAME; \
break;
-std::shared_ptr<DataType> GetPrimitiveType(LogicalType::type type,
- bool nullable) {
+std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
switch (type) {
- case LogicalType::NA:
+ case Type::NA:
return NA;
GET_PRIMITIVE_TYPE(UINT8, UInt8Type);
GET_PRIMITIVE_TYPE(INT8, Int8Type);
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/src/pyarrow/helpers.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/helpers.h b/python/src/pyarrow/helpers.h
index e41568d..ec42bb3 100644
--- a/python/src/pyarrow/helpers.h
+++ b/python/src/pyarrow/helpers.h
@@ -24,7 +24,7 @@
namespace pyarrow {
using arrow::DataType;
-using arrow::LogicalType;
+using arrow::Type;
extern const std::shared_ptr<arrow::NullType> NA;
extern const std::shared_ptr<arrow::BooleanType> BOOL;
@@ -40,8 +40,7 @@ extern const std::shared_ptr<arrow::FloatType> FLOAT;
extern const std::shared_ptr<arrow::DoubleType> DOUBLE;
extern const std::shared_ptr<arrow::StringType> STRING;
-std::shared_ptr<DataType> GetPrimitiveType(LogicalType::type type,
- bool nullable);
+std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
} // namespace pyarrow
[3/3] arrow git commit: ARROW-67: C++ metadata flatbuffer
serialization and data movement to memory maps
Posted by we...@apache.org.
ARROW-67: C++ metadata flatbuffer serialization and data movement to memory maps
Several things here:
* Add Google flatbuffers dependency
* Flatbuffers IDL draft in collaboration with @jacques-n and @stevenmphillips
* Add Schema wrapper in Cython
* arrow::Schema conversion to/from flatbuffer representation
* Remove unneeded physical layout types from type.h
* Refactor ListType to be a nested type with a single child
* Implement shared memory round-trip for numeric row batches
* mmap-based shared memory interface and MemorySource abstract API
Quite a bit of judicious code cleaning and consolidation as part of this. For example, List types are now internally equivalent to a nested type with 1 named child field (versus a struct, which can have any number of child fields).
Associated JIRAs: ARROW-48, ARROW-57, ARROW-58
Author: Wes McKinney <we...@apache.org>
Closes #28 from wesm/cpp-ipc-draft and squashes the following commits:
0cef7ea [Wes McKinney] Add NullArray type now that Array is virtual, fix pyarrow build
5e841f7 [Wes McKinney] Create explicit PrimitiveArray subclasses to avoid unwanted template instantiation
6fa6319 [Wes McKinney] ARROW-28: Draft C++ shared memory IPC workflow and related refactoring / scaffolding / cleaning.
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/65db0da8
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/65db0da8
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/65db0da8
Branch: refs/heads/master
Commit: 65db0da80b6a1fb6887b7ac1df24e2423d41dfb9
Parents: 093f9bd
Author: Wes McKinney <we...@apache.org>
Authored: Tue Mar 22 18:45:13 2016 -0700
Committer: Wes McKinney <we...@apache.org>
Committed: Tue Mar 22 18:45:13 2016 -0700
----------------------------------------------------------------------
ci/travis_before_script_cpp.sh | 9 +-
ci/travis_script_cpp.sh | 6 +-
cpp/CMakeLists.txt | 96 +++++---
cpp/cmake_modules/FindFlatbuffers.cmake | 95 ++++++++
cpp/setup_build_env.sh | 5 +-
cpp/src/arrow/CMakeLists.txt | 8 +
cpp/src/arrow/api.h | 11 +-
cpp/src/arrow/array-test.cc | 14 +-
cpp/src/arrow/array.cc | 26 ++-
cpp/src/arrow/array.h | 27 ++-
cpp/src/arrow/builder.h | 2 +-
cpp/src/arrow/column-benchmark.cc | 54 +++++
cpp/src/arrow/column-test.cc | 75 +++++++
cpp/src/arrow/column.cc | 70 ++++++
cpp/src/arrow/column.h | 108 +++++++++
cpp/src/arrow/ipc/.gitignore | 1 +
cpp/src/arrow/ipc/CMakeLists.txt | 51 +++++
cpp/src/arrow/ipc/adapter.cc | 305 ++++++++++++++++++++++++++
cpp/src/arrow/ipc/adapter.h | 86 ++++++++
cpp/src/arrow/ipc/ipc-adapter-test.cc | 112 ++++++++++
cpp/src/arrow/ipc/ipc-memory-test.cc | 82 +++++++
cpp/src/arrow/ipc/ipc-metadata-test.cc | 99 +++++++++
cpp/src/arrow/ipc/memory.cc | 162 ++++++++++++++
cpp/src/arrow/ipc/memory.h | 131 +++++++++++
cpp/src/arrow/ipc/metadata-internal.cc | 317 +++++++++++++++++++++++++++
cpp/src/arrow/ipc/metadata-internal.h | 69 ++++++
cpp/src/arrow/ipc/metadata.cc | 238 ++++++++++++++++++++
cpp/src/arrow/ipc/metadata.h | 146 ++++++++++++
cpp/src/arrow/ipc/test-common.h | 53 +++++
cpp/src/arrow/schema-test.cc | 104 +++++++++
cpp/src/arrow/schema.cc | 63 ++++++
cpp/src/arrow/schema.h | 55 +++++
cpp/src/arrow/table-test.cc | 128 +++++++++++
cpp/src/arrow/table.cc | 86 ++++++++
cpp/src/arrow/table.h | 128 +++++++++++
cpp/src/arrow/table/CMakeLists.txt | 33 ---
cpp/src/arrow/table/column-benchmark.cc | 55 -----
cpp/src/arrow/table/column-test.cc | 75 -------
cpp/src/arrow/table/column.cc | 68 ------
cpp/src/arrow/table/column.h | 105 ---------
cpp/src/arrow/table/schema-test.cc | 110 ----------
cpp/src/arrow/table/schema.cc | 58 -----
cpp/src/arrow/table/schema.h | 55 -----
cpp/src/arrow/table/table-test.cc | 128 -----------
cpp/src/arrow/table/table.cc | 73 ------
cpp/src/arrow/table/table.h | 82 -------
cpp/src/arrow/table/test-common.h | 54 -----
cpp/src/arrow/test-util.h | 68 +++++-
cpp/src/arrow/type.cc | 24 +-
cpp/src/arrow/type.h | 177 ++++++---------
cpp/src/arrow/types/CMakeLists.txt | 2 -
cpp/src/arrow/types/boolean.h | 2 +-
cpp/src/arrow/types/collection.h | 2 +-
cpp/src/arrow/types/construct.cc | 53 +++--
cpp/src/arrow/types/construct.h | 11 +-
cpp/src/arrow/types/datetime.h | 16 +-
cpp/src/arrow/types/floating.cc | 22 --
cpp/src/arrow/types/floating.h | 36 ---
cpp/src/arrow/types/integer.cc | 22 --
cpp/src/arrow/types/integer.h | 57 -----
cpp/src/arrow/types/json.cc | 1 -
cpp/src/arrow/types/json.h | 4 +-
cpp/src/arrow/types/list-test.cc | 28 ++-
cpp/src/arrow/types/list.cc | 29 +++
cpp/src/arrow/types/list.h | 28 +--
cpp/src/arrow/types/primitive-test.cc | 41 ++--
cpp/src/arrow/types/primitive.cc | 16 +-
cpp/src/arrow/types/primitive.h | 102 +++++----
cpp/src/arrow/types/string-test.cc | 54 +++--
cpp/src/arrow/types/string.h | 55 ++---
cpp/src/arrow/types/struct-test.cc | 15 +-
cpp/src/arrow/types/test-common.h | 5 +-
cpp/src/arrow/types/union.h | 18 +-
cpp/src/arrow/util/bit-util-test.cc | 4 +-
cpp/src/arrow/util/bit-util.h | 1 -
cpp/src/arrow/util/buffer-test.cc | 3 +-
cpp/src/arrow/util/buffer.cc | 2 +-
cpp/src/arrow/util/memory-pool-test.cc | 7 +-
cpp/src/arrow/util/memory-pool.cc | 6 +-
cpp/src/arrow/util/memory-pool.h | 2 +-
cpp/src/arrow/util/status.cc | 3 +
cpp/src/arrow/util/status.h | 6 +
cpp/src/arrow/util/test_main.cc | 2 +-
cpp/thirdparty/build_thirdparty.sh | 9 +
cpp/thirdparty/download_thirdparty.sh | 5 +
cpp/thirdparty/versions.sh | 4 +
format/Message.fbs | 183 ++++++++++++++++
python/pyarrow/__init__.py | 4 +-
python/pyarrow/array.pxd | 2 +-
python/pyarrow/array.pyx | 47 +++-
python/pyarrow/includes/libarrow.pxd | 107 ++++++---
python/pyarrow/includes/pyarrow.pxd | 5 +-
python/pyarrow/scalar.pyx | 24 +-
python/pyarrow/schema.pxd | 6 +-
python/pyarrow/schema.pyx | 155 +++++++++----
python/pyarrow/tests/test_schema.py | 28 ++-
python/pyarrow/tests/test_table.py | 40 ++++
python/src/pyarrow/adapters/builtin.cc | 20 +-
python/src/pyarrow/helpers.cc | 15 +-
python/src/pyarrow/helpers.h | 5 +-
100 files changed, 3888 insertions(+), 1613 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/ci/travis_before_script_cpp.sh
----------------------------------------------------------------------
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index 49dcc39..193c76f 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -19,7 +19,14 @@ echo $GTEST_HOME
: ${ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install}
-cmake -DARROW_BUILD_BENCHMARKS=ON -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
+CMAKE_COMMON_FLAGS="-DARROW_BUILD_BENCHMARKS=ON -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL"
+
+if [ $TRAVIS_OS_NAME == "linux" ]; then
+ cmake -DARROW_TEST_MEMCHECK=on $CMAKE_COMMON_FLAGS -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
+else
+ cmake $CMAKE_COMMON_FLAGS -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
+fi
+
make -j4
make install
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/ci/travis_script_cpp.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_cpp.sh b/ci/travis_script_cpp.sh
index d96b98f..997bdf3 100755
--- a/ci/travis_script_cpp.sh
+++ b/ci/travis_script_cpp.sh
@@ -8,10 +8,6 @@ pushd $CPP_BUILD_DIR
make lint
-if [ $TRAVIS_OS_NAME == "linux" ]; then
- valgrind --tool=memcheck --leak-check=yes --error-exitcode=1 ctest -L unittest
-else
- ctest -L unittest
-fi
+ctest -L unittest
popd
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 268c1d1..6d70107 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -51,7 +51,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
option(ARROW_PARQUET
"Build the Parquet adapter and link to libparquet"
OFF)
-
+ option(ARROW_TEST_MEMCHECK
+ "Run the test suite using valgrind --tool=memcheck"
+ OFF)
option(ARROW_BUILD_TESTS
"Build the Arrow googletest unit tests"
ON)
@@ -60,6 +62,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
"Build the Arrow micro benchmarks"
OFF)
+ option(ARROW_IPC
+ "Build the Arrow IPC extensions"
+ ON)
+
endif()
if(NOT ARROW_BUILD_TESTS)
@@ -260,17 +266,17 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
include_directories(src)
############################################################
-# Benchmarking
+# Benchmarking
############################################################
# Add a new micro benchmark, with or without an executable that should be built.
# If benchmarks are enabled then they will be run along side unit tests with ctest.
-# 'make runbenchmark' and 'make unittest' to build/run only benchmark or unittests,
+# 'make runbenchmark' and 'make unittest' to build/run only benchmark or unittests,
# respectively.
#
# REL_BENCHMARK_NAME is the name of the benchmark app. It may be a single component
# (e.g. monotime-benchmark) or contain additional components (e.g.
# net/net_util-benchmark). Either way, the last component must be a globally
-# unique name.
+# unique name.
# The benchmark will registered as unit test with ctest with a label
# of 'benchmark'.
@@ -281,7 +287,7 @@ function(ADD_ARROW_BENCHMARK REL_BENCHMARK_NAME)
return()
endif()
get_filename_component(BENCHMARK_NAME ${REL_BENCHMARK_NAME} NAME_WE)
-
+
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME}.cc)
# This benchmark has a corresponding .cc file, set it up as an executable.
set(BENCHMARK_PATH "${EXECUTABLE_OUTPUT_PATH}/${BENCHMARK_NAME}")
@@ -294,7 +300,7 @@ function(ADD_ARROW_BENCHMARK REL_BENCHMARK_NAME)
set(BENCHMARK_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME})
set(NO_COLOR "")
endif()
-
+
add_test(${BENCHMARK_NAME}
${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} benchmark ${BENCHMARK_PATH} ${NO_COLOR})
set_tests_properties(${BENCHMARK_NAME} PROPERTIES LABELS "benchmark")
@@ -345,9 +351,18 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME})
endif()
- add_test(${TEST_NAME}
- ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH})
+ if (ARROW_TEST_MEMCHECK)
+ SET_PROPERTY(TARGET ${TEST_NAME}
+ APPEND_STRING PROPERTY
+ COMPILE_FLAGS " -DARROW_VALGRIND")
+ add_test(${TEST_NAME}
+ valgrind --tool=memcheck --leak-check=full --error-exitcode=1 ${TEST_PATH})
+ else()
+ add_test(${TEST_NAME}
+ ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH})
+ endif()
set_tests_properties(${TEST_NAME} PROPERTIES LABELS "unittest")
+
if(ARGN)
set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN})
endif()
@@ -403,7 +418,7 @@ if ("$ENV{GTEST_HOME}" STREQUAL "")
set(GTest_HOME ${THIRDPARTY_DIR}/googletest-release-1.7.0)
endif()
-## Google Benchmark
+## Google Benchmark
if ("$ENV{GBENCHMARK_HOME}" STREQUAL "")
set(GBENCHMARK_HOME ${THIRDPARTY_DIR}/installed)
endif()
@@ -487,24 +502,10 @@ if (UNIX)
add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py
--verbose=2
--linelength=90
- --filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/c++11
- `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h`)
+ --filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/c++11,-runtime/references
+ `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed -e '/_generated/g'`)
endif (UNIX)
-#----------------------------------------------------------------------
-# Parquet adapter
-
-if(ARROW_PARQUET)
- find_package(Parquet REQUIRED)
- include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
- ADD_THIRDPARTY_LIB(parquet
- STATIC_LIB ${PARQUET_STATIC_LIB}
- SHARED_LIB ${PARQUET_SHARED_LIB})
-
- add_subdirectory(src/arrow/parquet)
- list(APPEND LINK_LIBS arrow_parquet parquet)
-endif()
-
############################################################
# Subdirectories
############################################################
@@ -515,15 +516,18 @@ set(LIBARROW_LINK_LIBS
set(ARROW_SRCS
src/arrow/array.cc
src/arrow/builder.cc
+ src/arrow/column.cc
+ src/arrow/schema.cc
+ src/arrow/table.cc
src/arrow/type.cc
- src/arrow/table/column.cc
- src/arrow/table/schema.cc
- src/arrow/table/table.cc
+ # IPC / Shared memory library; to be turned into an optional component
+ src/arrow/ipc/adapter.cc
+ src/arrow/ipc/memory.cc
+ src/arrow/ipc/metadata.cc
+ src/arrow/ipc/metadata-internal.cc
src/arrow/types/construct.cc
- src/arrow/types/floating.cc
- src/arrow/types/integer.cc
src/arrow/types/json.cc
src/arrow/types/list.cc
src/arrow/types/primitive.cc
@@ -559,9 +563,39 @@ target_link_libraries(arrow ${LIBARROW_LINK_LIBS})
add_subdirectory(src/arrow)
add_subdirectory(src/arrow/util)
-add_subdirectory(src/arrow/table)
add_subdirectory(src/arrow/types)
install(TARGETS arrow
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
+
+#----------------------------------------------------------------------
+# Parquet adapter library
+
+if(ARROW_PARQUET)
+ find_package(Parquet REQUIRED)
+ include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
+ ADD_THIRDPARTY_LIB(parquet
+ STATIC_LIB ${PARQUET_STATIC_LIB}
+ SHARED_LIB ${PARQUET_SHARED_LIB})
+
+ add_subdirectory(src/arrow/parquet)
+ list(APPEND LINK_LIBS arrow_parquet parquet)
+endif()
+
+#----------------------------------------------------------------------
+# IPC library
+
+## Flatbuffers
+if(ARROW_IPC)
+ find_package(Flatbuffers REQUIRED)
+ message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}")
+ message(STATUS "Flatbuffers static library: ${FLATBUFFERS_STATIC_LIB}")
+ message(STATUS "Flatbuffers compiler: ${FLATBUFFERS_COMPILER}")
+ include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
+ add_library(flatbuffers STATIC IMPORTED)
+ set_target_properties(flatbuffers PROPERTIES
+ IMPORTED_LOCATION ${FLATBUFFERS_STATIC_LIB})
+
+ add_subdirectory(src/arrow/ipc)
+endif()
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/cmake_modules/FindFlatbuffers.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindFlatbuffers.cmake b/cpp/cmake_modules/FindFlatbuffers.cmake
new file mode 100644
index 0000000..ee472d1
--- /dev/null
+++ b/cpp/cmake_modules/FindFlatbuffers.cmake
@@ -0,0 +1,95 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find Flatbuffers headers and libraries.
+#
+# Usage of this module as follows:
+#
+# find_package(Flatbuffers)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+# Flatbuffers_HOME -
+# When set, this path is inspected instead of standard library locations as
+# the root of the Flatbuffers installation. The environment variable
+# FLATBUFFERS_HOME overrides this veriable.
+#
+# This module defines
+# FLATBUFFERS_INCLUDE_DIR, directory containing headers
+# FLATBUFFERS_LIBS, directory containing flatbuffers libraries
+# FLATBUFFERS_STATIC_LIB, path to libflatbuffers.a
+# FLATBUFFERS_FOUND, whether flatbuffers has been found
+
+if( NOT "$ENV{FLATBUFFERS_HOME}" STREQUAL "")
+ file( TO_CMAKE_PATH "$ENV{FLATBUFFERS_HOME}" _native_path )
+ list( APPEND _flatbuffers_roots ${_native_path} )
+elseif ( Flatbuffers_HOME )
+ list( APPEND _flatbuffers_roots ${Flatbuffers_HOME} )
+endif()
+
+# Try the parameterized roots, if they exist
+if ( _flatbuffers_roots )
+ find_path( FLATBUFFERS_INCLUDE_DIR NAMES flatbuffers/flatbuffers.h
+ PATHS ${_flatbuffers_roots} NO_DEFAULT_PATH
+ PATH_SUFFIXES "include" )
+ find_library( FLATBUFFERS_LIBRARIES NAMES flatbuffers
+ PATHS ${_flatbuffers_roots} NO_DEFAULT_PATH
+ PATH_SUFFIXES "lib" )
+else ()
+ find_path( FLATBUFFERS_INCLUDE_DIR NAMES flatbuffers/flatbuffers.h )
+ find_library( FLATBUFFERS_LIBRARIES NAMES flatbuffers )
+endif ()
+
+find_program(FLATBUFFERS_COMPILER flatc
+ $ENV{FLATBUFFERS_HOME}/bin
+ /usr/local/bin
+ /usr/bin
+ NO_DEFAULT_PATH
+)
+
+if (FLATBUFFERS_INCLUDE_DIR AND FLATBUFFERS_LIBRARIES)
+ set(FLATBUFFERS_FOUND TRUE)
+ get_filename_component( FLATBUFFERS_LIBS ${FLATBUFFERS_LIBRARIES} PATH )
+ set(FLATBUFFERS_LIB_NAME libflatbuffers)
+ set(FLATBUFFERS_STATIC_LIB ${FLATBUFFERS_LIBS}/${FLATBUFFERS_LIB_NAME}.a)
+else ()
+ set(FLATBUFFERS_FOUND FALSE)
+endif ()
+
+if (FLATBUFFERS_FOUND)
+ if (NOT Flatbuffers_FIND_QUIETLY)
+ message(STATUS "Found the Flatbuffers library: ${FLATBUFFERS_LIBRARIES}")
+ endif ()
+else ()
+ if (NOT Flatbuffers_FIND_QUIETLY)
+ set(FLATBUFFERS_ERR_MSG "Could not find the Flatbuffers library. Looked in ")
+ if ( _flatbuffers_roots )
+ set(FLATBUFFERS_ERR_MSG "${FLATBUFFERS_ERR_MSG} in ${_flatbuffers_roots}.")
+ else ()
+ set(FLATBUFFERS_ERR_MSG "${FLATBUFFERS_ERR_MSG} system search paths.")
+ endif ()
+ if (Flatbuffers_FIND_REQUIRED)
+ message(FATAL_ERROR "${FLATBUFFERS_ERR_MSG}")
+ else (Flatbuffers_FIND_REQUIRED)
+ message(STATUS "${FLATBUFFERS_ERR_MSG}")
+ endif (Flatbuffers_FIND_REQUIRED)
+ endif ()
+endif ()
+
+mark_as_advanced(
+ FLATBUFFERS_INCLUDE_DIR
+ FLATBUFFERS_LIBS
+ FLATBUFFERS_STATIC_LIB
+ FLATBUFFERS_COMPILER
+)
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/setup_build_env.sh
----------------------------------------------------------------------
diff --git a/cpp/setup_build_env.sh b/cpp/setup_build_env.sh
index 04688e7..6520dbd 100755
--- a/cpp/setup_build_env.sh
+++ b/cpp/setup_build_env.sh
@@ -2,11 +2,12 @@
SOURCE_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
-./thirdparty/download_thirdparty.sh || { echo "download_thirdparty.sh failed" ; return; }
-./thirdparty/build_thirdparty.sh || { echo "build_thirdparty.sh failed" ; return; }
+./thirdparty/download_thirdparty.sh || { echo "download_thirdparty.sh failed" ; return; }
+./thirdparty/build_thirdparty.sh || { echo "build_thirdparty.sh failed" ; return; }
source thirdparty/versions.sh
export GTEST_HOME=$SOURCE_DIR/thirdparty/$GTEST_BASEDIR
export GBENCHMARK_HOME=$SOURCE_DIR/thirdparty/installed
+export FLATBUFFERS_HOME=$SOURCE_DIR/thirdparty/installed
echo "Build env initialized"
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 73e6a9b..2d42edc 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -19,7 +19,10 @@
install(FILES
api.h
array.h
+ column.h
builder.h
+ schema.h
+ table.h
type.h
DESTINATION include/arrow)
@@ -30,3 +33,8 @@ install(FILES
set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
ADD_ARROW_TEST(array-test)
+ADD_ARROW_TEST(column-test)
+ADD_ARROW_TEST(schema-test)
+ADD_ARROW_TEST(table-test)
+
+ADD_ARROW_BENCHMARK(column-benchmark)
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index c73d4b3..7be7f88 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -22,20 +22,19 @@
#include "arrow/array.h"
#include "arrow/builder.h"
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
#include "arrow/type.h"
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/table.h"
-
#include "arrow/types/boolean.h"
#include "arrow/types/construct.h"
-#include "arrow/types/floating.h"
-#include "arrow/types/integer.h"
#include "arrow/types/list.h"
+#include "arrow/types/primitive.h"
#include "arrow/types/string.h"
#include "arrow/types/struct.h"
+#include "arrow/util/buffer.h"
#include "arrow/util/memory-pool.h"
#include "arrow/util/status.h"
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/array-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index df827aa..eded594 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -15,30 +15,26 @@
// specific language governing permissions and limitations
// under the License.
-#include <gtest/gtest.h>
-
#include <cstdint>
#include <cstdlib>
#include <memory>
#include <vector>
+#include "gtest/gtest.h"
+
#include "arrow/array.h"
#include "arrow/test-util.h"
#include "arrow/type.h"
-#include "arrow/types/integer.h"
#include "arrow/types/primitive.h"
#include "arrow/util/buffer.h"
#include "arrow/util/memory-pool.h"
-#include "arrow/util/status.h"
namespace arrow {
-static TypePtr int32 = TypePtr(new Int32Type());
-
class TestArray : public ::testing::Test {
public:
void SetUp() {
- pool_ = GetDefaultMemoryPool();
+ pool_ = default_memory_pool();
}
protected:
@@ -75,10 +71,10 @@ TEST_F(TestArray, TestIsNull) {
if (x > 0) ++null_count;
}
- std::shared_ptr<Buffer> null_buf = bytes_to_null_buffer(nulls.data(),
+ std::shared_ptr<Buffer> null_buf = test::bytes_to_null_buffer(nulls.data(),
nulls.size());
std::unique_ptr<Array> arr;
- arr.reset(new Array(int32, nulls.size(), null_count, null_buf));
+ arr.reset(new Int32Array(nulls.size(), nullptr, null_count, null_buf));
ASSERT_EQ(null_count, arr->null_count());
ASSERT_EQ(5, null_buf->size());
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index ee4ef66..5a5bc10 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -28,11 +28,6 @@ namespace arrow {
Array::Array(const TypePtr& type, int32_t length, int32_t null_count,
const std::shared_ptr<Buffer>& nulls) {
- Init(type, length, null_count, nulls);
-}
-
-void Array::Init(const TypePtr& type, int32_t length, int32_t null_count,
- const std::shared_ptr<Buffer>& nulls) {
type_ = type;
length_ = length;
null_count_ = null_count;
@@ -42,4 +37,25 @@ void Array::Init(const TypePtr& type, int32_t length, int32_t null_count,
}
}
+bool Array::EqualsExact(const Array& other) const {
+ if (this == &other) return true;
+ if (length_ != other.length_ || null_count_ != other.null_count_ ||
+ type_enum() != other.type_enum()) {
+ return false;
+ }
+ if (null_count_ > 0) {
+ return nulls_->Equals(*other.nulls_, util::bytes_for_bits(length_));
+ } else {
+ return true;
+ }
+}
+
+bool NullArray::Equals(const std::shared_ptr<Array>& arr) const {
+ if (this == arr.get()) return true;
+ if (Type::NA != arr->type_enum()) {
+ return false;
+ }
+ return arr->length() == length_;
+}
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/array.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 85e853e..65fc0aa 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -40,20 +40,11 @@ class Buffer;
// explicitly increment its reference count
class Array {
public:
- Array() :
- null_count_(0),
- length_(0),
- nulls_(nullptr),
- null_bits_(nullptr) {}
-
Array(const TypePtr& type, int32_t length, int32_t null_count = 0,
const std::shared_ptr<Buffer>& nulls = nullptr);
virtual ~Array() {}
- void Init(const TypePtr& type, int32_t length, int32_t null_count,
- const std::shared_ptr<Buffer>& nulls);
-
// Determine if a slot is null. For inner loops. Does *not* boundscheck
bool IsNull(int i) const {
return null_count_ > 0 && util::get_bit(null_bits_, i);
@@ -63,12 +54,15 @@ class Array {
int32_t null_count() const { return null_count_;}
const std::shared_ptr<DataType>& type() const { return type_;}
- LogicalType::type logical_type() const { return type_->type;}
+ Type::type type_enum() const { return type_->type;}
const std::shared_ptr<Buffer>& nulls() const {
return nulls_;
}
+ bool EqualsExact(const Array& arr) const;
+ virtual bool Equals(const std::shared_ptr<Array>& arr) const = 0;
+
protected:
TypePtr type_;
int32_t null_count_;
@@ -78,9 +72,22 @@ class Array {
const uint8_t* null_bits_;
private:
+ Array() {}
DISALLOW_COPY_AND_ASSIGN(Array);
};
+// Degenerate null type Array
+class NullArray : public Array {
+ public:
+ NullArray(const std::shared_ptr<DataType>& type, int32_t length) :
+ Array(type, length, length, nullptr) {}
+
+ explicit NullArray(int32_t length) :
+ NullArray(std::make_shared<NullType>(), length) {}
+
+ bool Equals(const std::shared_ptr<Array>& arr) const override;
+};
+
typedef std::shared_ptr<Array> ArrayPtr;
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 8cc689c..d5d1fdf 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -99,7 +99,7 @@ class ArrayBuilder {
int32_t capacity_;
// Child value array builders. These are owned by this class
- std::vector<std::unique_ptr<ArrayBuilder> > children_;
+ std::vector<std::unique_ptr<ArrayBuilder>> children_;
private:
DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/column-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column-benchmark.cc b/cpp/src/arrow/column-benchmark.cc
new file mode 100644
index 0000000..69ee52c
--- /dev/null
+++ b/cpp/src/arrow/column-benchmark.cc
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/test-util.h"
+#include "arrow/types/primitive.h"
+#include "arrow/util/memory-pool.h"
+
+namespace arrow {
+namespace {
+ template <typename ArrayType>
+ std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
+ auto pool = default_memory_pool();
+ auto data = std::make_shared<PoolBuffer>(pool);
+ auto nulls = std::make_shared<PoolBuffer>(pool);
+ data->Resize(length * sizeof(typename ArrayType::value_type));
+ nulls->Resize(util::bytes_for_bits(length));
+ return std::make_shared<ArrayType>(length, data, 10, nulls);
+ }
+} // anonymous namespace
+
+
+static void BM_BuildInt32ColumnByChunk(benchmark::State& state) { //NOLINT non-const reference
+ ArrayVector arrays;
+ for (int chunk_n = 0; chunk_n < state.range_x(); ++chunk_n) {
+ arrays.push_back(MakePrimitive<Int32Array>(100, 10));
+ }
+ const auto INT32 = std::make_shared<Int32Type>();
+ const auto field = std::make_shared<Field>("c0", INT32);
+ std::unique_ptr<Column> column;
+ while (state.KeepRunning()) {
+ column.reset(new Column(field, arrays));
+ }
+}
+
+BENCHMARK(BM_BuildInt32ColumnByChunk)->Range(5, 50000);
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/column-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column-test.cc b/cpp/src/arrow/column-test.cc
new file mode 100644
index 0000000..0630785
--- /dev/null
+++ b/cpp/src/arrow/column-test.cc
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/types/primitive.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+
+const auto INT32 = std::make_shared<Int32Type>();
+
+class TestColumn : public TestBase {
+ protected:
+ std::shared_ptr<ChunkedArray> data_;
+ std::unique_ptr<Column> column_;
+};
+
+TEST_F(TestColumn, BasicAPI) {
+ ArrayVector arrays;
+ arrays.push_back(MakePrimitive<Int32Array>(100));
+ arrays.push_back(MakePrimitive<Int32Array>(100, 10));
+ arrays.push_back(MakePrimitive<Int32Array>(100, 20));
+
+ auto field = std::make_shared<Field>("c0", INT32);
+ column_.reset(new Column(field, arrays));
+
+ ASSERT_EQ("c0", column_->name());
+ ASSERT_TRUE(column_->type()->Equals(INT32));
+ ASSERT_EQ(300, column_->length());
+ ASSERT_EQ(30, column_->null_count());
+ ASSERT_EQ(3, column_->data()->num_chunks());
+}
+
+TEST_F(TestColumn, ChunksInhomogeneous) {
+ ArrayVector arrays;
+ arrays.push_back(MakePrimitive<Int32Array>(100));
+ arrays.push_back(MakePrimitive<Int32Array>(100, 10));
+
+ auto field = std::make_shared<Field>("c0", INT32);
+ column_.reset(new Column(field, arrays));
+
+ ASSERT_OK(column_->ValidateData());
+
+ arrays.push_back(MakePrimitive<Int16Array>(100, 10));
+ column_.reset(new Column(field, arrays));
+ ASSERT_RAISES(Invalid, column_->ValidateData());
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/column.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column.cc b/cpp/src/arrow/column.cc
new file mode 100644
index 0000000..46acf8d
--- /dev/null
+++ b/cpp/src/arrow/column.cc
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/column.h"
+
+#include <memory>
+#include <sstream>
+
+#include "arrow/array.h"
+#include "arrow/type.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+ChunkedArray::ChunkedArray(const ArrayVector& chunks) :
+ chunks_(chunks) {
+ length_ = 0;
+ null_count_ = 0;
+ for (const std::shared_ptr<Array>& chunk : chunks) {
+ length_ += chunk->length();
+ null_count_ += chunk->null_count();
+ }
+}
+
+Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks) :
+ field_(field) {
+ data_ = std::make_shared<ChunkedArray>(chunks);
+}
+
+Column::Column(const std::shared_ptr<Field>& field,
+ const std::shared_ptr<Array>& data) :
+ field_(field) {
+ data_ = std::make_shared<ChunkedArray>(ArrayVector({data}));
+}
+
+Column::Column(const std::shared_ptr<Field>& field,
+ const std::shared_ptr<ChunkedArray>& data) :
+ field_(field),
+ data_(data) {}
+
+Status Column::ValidateData() {
+ for (int i = 0; i < data_->num_chunks(); ++i) {
+ const std::shared_ptr<DataType>& type = data_->chunk(i)->type();
+ if (!this->type()->Equals(type)) {
+ std::stringstream ss;
+ ss << "In chunk " << i << " expected type "
+ << this->type()->ToString()
+ << " but saw "
+ << type->ToString();
+ return Status::Invalid(ss.str());
+ }
+ }
+ return Status::OK();
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/column.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column.h b/cpp/src/arrow/column.h
new file mode 100644
index 0000000..1ad97b2
--- /dev/null
+++ b/cpp/src/arrow/column.h
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_COLUMN_H
+#define ARROW_COLUMN_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/type.h"
+
+namespace arrow {
+
+class Array;
+class Status;
+
+typedef std::vector<std::shared_ptr<Array>> ArrayVector;
+
+// A data structure managing a list of primitive Arrow arrays logically as one
+// large array
+class ChunkedArray {
+ public:
+ explicit ChunkedArray(const ArrayVector& chunks);
+
+ // @returns: the total length of the chunked array; computed on construction
+ int64_t length() const {
+ return length_;
+ }
+
+ int64_t null_count() const {
+ return null_count_;
+ }
+
+ int num_chunks() const {
+ return chunks_.size();
+ }
+
+ const std::shared_ptr<Array>& chunk(int i) const {
+ return chunks_[i];
+ }
+
+ protected:
+ ArrayVector chunks_;
+ int64_t length_;
+ int64_t null_count_;
+};
+
+// An immutable column data structure consisting of a field (type metadata) and
+// a logical chunked data array (which can be validated as all being the same
+// type).
+class Column {
+ public:
+ Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks);
+ Column(const std::shared_ptr<Field>& field,
+ const std::shared_ptr<ChunkedArray>& data);
+
+ Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array>& data);
+
+ int64_t length() const {
+ return data_->length();
+ }
+
+ int64_t null_count() const {
+ return data_->null_count();
+ }
+
+ // @returns: the column's name in the passed metadata
+ const std::string& name() const {
+ return field_->name;
+ }
+
+ // @returns: the column's type according to the metadata
+ const std::shared_ptr<DataType>& type() const {
+ return field_->type;
+ }
+
+ // @returns: the column's data as a chunked logical array
+ const std::shared_ptr<ChunkedArray>& data() const {
+ return data_;
+ }
+ // Verify that the column's array data is consistent with the passed field's
+ // metadata
+ Status ValidateData();
+
+ protected:
+ std::shared_ptr<Field> field_;
+ std::shared_ptr<ChunkedArray> data_;
+};
+
+} // namespace arrow
+
+#endif // ARROW_COLUMN_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/.gitignore
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/.gitignore b/cpp/src/arrow/ipc/.gitignore
new file mode 100644
index 0000000..8150d7e
--- /dev/null
+++ b/cpp/src/arrow/ipc/.gitignore
@@ -0,0 +1 @@
+*_generated.h
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
new file mode 100644
index 0000000..383684f
--- /dev/null
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#######################################
+# arrow_ipc
+#######################################
+
+# Headers: top level
+install(FILES
+ adapter.h
+ metadata.h
+ memory.h
+ DESTINATION include/arrow/ipc)
+
+ADD_ARROW_TEST(ipc-adapter-test)
+ADD_ARROW_TEST(ipc-memory-test)
+ADD_ARROW_TEST(ipc-metadata-test)
+
+# make clean will delete the generated file
+set_source_files_properties(Metadata_generated.h PROPERTIES GENERATED TRUE)
+
+set(OUTPUT_DIR ${CMAKE_SOURCE_DIR}/src/arrow/ipc)
+set(FBS_OUTPUT_FILES "${OUTPUT_DIR}/Message_generated.h")
+
+set(FBS_SRC ${CMAKE_SOURCE_DIR}/../format/Message.fbs)
+get_filename_component(ABS_FBS_SRC ${FBS_SRC} ABSOLUTE)
+
+add_custom_command(
+ OUTPUT ${FBS_OUTPUT_FILES}
+ COMMAND ${FLATBUFFERS_COMPILER} -c -o ${OUTPUT_DIR} ${ABS_FBS_SRC}
+ DEPENDS ${ABS_FBS_SRC}
+ COMMENT "Running flatc compiler on ${FBS_SRC}"
+ VERBATIM
+)
+
+add_custom_target(metadata_fbs DEPENDS ${FBS_OUTPUT_FILES})
+add_dependencies(arrow metadata_fbs)
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/adapter.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc
new file mode 100644
index 0000000..7cdb965
--- /dev/null
+++ b/cpp/src/arrow/ipc/adapter.cc
@@ -0,0 +1,305 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/adapter.h"
+
+#include <cstdint>
+#include <cstring>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/ipc/memory.h"
+#include "arrow/ipc/Message_generated.h"
+#include "arrow/ipc/metadata.h"
+#include "arrow/ipc/metadata-internal.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/types/construct.h"
+#include "arrow/types/primitive.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+namespace flatbuf = apache::arrow::flatbuf;
+
+namespace ipc {
+
+static bool IsPrimitive(const DataType* type) {
+ switch (type->type) {
+ // NA is null type or "no type", considered primitive for now
+ case Type::NA:
+ case Type::BOOL:
+ case Type::UINT8:
+ case Type::INT8:
+ case Type::UINT16:
+ case Type::INT16:
+ case Type::UINT32:
+ case Type::INT32:
+ case Type::UINT64:
+ case Type::INT64:
+ case Type::FLOAT:
+ case Type::DOUBLE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// ----------------------------------------------------------------------
+// Row batch write path
+
+Status VisitArray(const Array* arr, std::vector<flatbuf::FieldNode>* field_nodes,
+ std::vector<std::shared_ptr<Buffer>>* buffers) {
+ if (IsPrimitive(arr->type().get())) {
+ const PrimitiveArray* prim_arr = static_cast<const PrimitiveArray*>(arr);
+
+ field_nodes->push_back(
+ flatbuf::FieldNode(prim_arr->length(), prim_arr->null_count()));
+
+ if (prim_arr->null_count() > 0) {
+ buffers->push_back(prim_arr->nulls());
+ } else {
+ // Push a dummy zero-length buffer, not to be copied
+ buffers->push_back(std::make_shared<Buffer>(nullptr, 0));
+ }
+ buffers->push_back(prim_arr->data());
+ } else if (arr->type_enum() == Type::LIST) {
+ // TODO(wesm)
+ return Status::NotImplemented("List type");
+ } else if (arr->type_enum() == Type::STRUCT) {
+ // TODO(wesm)
+ return Status::NotImplemented("Struct type");
+ }
+
+ return Status::OK();
+}
+
+class RowBatchWriter {
+ public:
+ explicit RowBatchWriter(const RowBatch* batch) :
+ batch_(batch) {}
+
+ Status AssemblePayload() {
+ // Perform depth-first traversal of the row-batch
+ for (int i = 0; i < batch_->num_columns(); ++i) {
+ const Array* arr = batch_->column(i).get();
+ RETURN_NOT_OK(VisitArray(arr, &field_nodes_, &buffers_));
+ }
+ return Status::OK();
+ }
+
+ Status Write(MemorySource* dst, int64_t position, int64_t* data_header_offset) {
+ // Write out all the buffers contiguously and compute the total size of the
+ // memory payload
+ int64_t offset = 0;
+ for (size_t i = 0; i < buffers_.size(); ++i) {
+ const Buffer* buffer = buffers_[i].get();
+ int64_t size = buffer->size();
+
+ // TODO(wesm): We currently have no notion of shared memory page id's,
+ // but we've included it in the metadata IDL for when we have it in the
+ // future. Use page=0 for now
+ //
+ // Note that page ids are a bespoke notion for Arrow and not a feature we
+ // are using from any OS-level shared memory. The thought is that systems
+ // may (in the future) associate integer page id's with physical memory
+ // pages (according to whatever is the desired shared memory mechanism)
+ buffer_meta_.push_back(flatbuf::Buffer(0, position + offset, size));
+
+ if (size > 0) {
+ RETURN_NOT_OK(dst->Write(position + offset, buffer->data(), size));
+ offset += size;
+ }
+ }
+
+ // Now that we have computed the locations of all of the buffers in shared
+ // memory, the data header can be converted to a flatbuffer and written out
+ //
+ // Note: The memory written here is prefixed by the size of the flatbuffer
+ // itself as an int32_t. On reading from a MemorySource, you will have to
+ // determine the data header size then request a buffer such that you can
+ // construct the flatbuffer data accessor object (see arrow::ipc::Message)
+ std::shared_ptr<Buffer> data_header;
+ RETURN_NOT_OK(WriteDataHeader(batch_->num_rows(), offset,
+ field_nodes_, buffer_meta_, &data_header));
+
+ // Write the data header at the end
+ RETURN_NOT_OK(dst->Write(position + offset, data_header->data(),
+ data_header->size()));
+
+ *data_header_offset = position + offset;
+ return Status::OK();
+ }
+
+ // This must be called after invoking AssemblePayload
+ int64_t DataHeaderSize() {
+ // TODO(wesm): In case it is needed, compute the upper bound for the size
+ // of the buffer containing the flatbuffer data header.
+ return 0;
+ }
+
+ // Total footprint of buffers. This must be called after invoking
+ // AssemblePayload
+ int64_t TotalBytes() {
+ int64_t total = 0;
+ for (const std::shared_ptr<Buffer>& buffer : buffers_) {
+ total += buffer->size();
+ }
+ return total;
+ }
+
+ private:
+ const RowBatch* batch_;
+
+ std::vector<flatbuf::FieldNode> field_nodes_;
+ std::vector<flatbuf::Buffer> buffer_meta_;
+ std::vector<std::shared_ptr<Buffer>> buffers_;
+};
+
+Status WriteRowBatch(MemorySource* dst, const RowBatch* batch, int64_t position,
+ int64_t* header_offset) {
+ RowBatchWriter serializer(batch);
+ RETURN_NOT_OK(serializer.AssemblePayload());
+ return serializer.Write(dst, position, header_offset);
+}
+// ----------------------------------------------------------------------
+// Row batch read path
+
+static constexpr int64_t INIT_METADATA_SIZE = 4096;
+
+class RowBatchReader::Impl {
+ public:
+ Impl(MemorySource* source, const std::shared_ptr<RecordBatchMessage>& metadata) :
+ source_(source),
+ metadata_(metadata) {
+ num_buffers_ = metadata->num_buffers();
+ num_flattened_fields_ = metadata->num_fields();
+ }
+
+ Status AssembleBatch(const std::shared_ptr<Schema>& schema,
+ std::shared_ptr<RowBatch>* out) {
+ std::vector<std::shared_ptr<Array>> arrays(schema->num_fields());
+
+ // The field_index and buffer_index are incremented in NextArray based on
+ // how much of the batch is "consumed" (through nested data reconstruction,
+ // for example)
+ field_index_ = 0;
+ buffer_index_ = 0;
+ for (int i = 0; i < schema->num_fields(); ++i) {
+ const Field* field = schema->field(i).get();
+ RETURN_NOT_OK(NextArray(field, &arrays[i]));
+ }
+
+ *out = std::make_shared<RowBatch>(schema, metadata_->length(),
+ arrays);
+ return Status::OK();
+ }
+
+ private:
+ // Traverse the flattened record batch metadata and reassemble the
+ // corresponding array containers
+ Status NextArray(const Field* field, std::shared_ptr<Array>* out) {
+ const std::shared_ptr<DataType>& type = field->type;
+
+ // pop off a field
+ if (field_index_ >= num_flattened_fields_) {
+ return Status::Invalid("Ran out of field metadata, likely malformed");
+ }
+
+ // This only contains the length and null count, which we need to figure
+ // out what to do with the buffers. For example, if null_count == 0, then
+ // we can skip that buffer without reading from shared memory
+ FieldMetadata field_meta = metadata_->field(field_index_++);
+
+ if (IsPrimitive(type.get())) {
+ std::shared_ptr<Buffer> nulls;
+ std::shared_ptr<Buffer> data;
+ if (field_meta.null_count == 0) {
+ nulls = nullptr;
+ ++buffer_index_;
+ } else {
+ RETURN_NOT_OK(GetBuffer(buffer_index_++, &nulls));
+ }
+ if (field_meta.length > 0) {
+ RETURN_NOT_OK(GetBuffer(buffer_index_++, &data));
+ } else {
+ data.reset(new Buffer(nullptr, 0));
+ }
+ return MakePrimitiveArray(type, field_meta.length, data,
+ field_meta.null_count, nulls, out);
+ } else {
+ return Status::NotImplemented("Non-primitive types not complete yet");
+ }
+ }
+
+ Status GetBuffer(int buffer_index, std::shared_ptr<Buffer>* out) {
+ BufferMetadata metadata = metadata_->buffer(buffer_index);
+ return source_->ReadAt(metadata.offset, metadata.length, out);
+ }
+
+ MemorySource* source_;
+ std::shared_ptr<RecordBatchMessage> metadata_;
+
+ int field_index_;
+ int buffer_index_;
+ int num_buffers_;
+ int num_flattened_fields_;
+};
+
+Status RowBatchReader::Open(MemorySource* source, int64_t position,
+ std::shared_ptr<RowBatchReader>* out) {
+ std::shared_ptr<Buffer> metadata;
+ RETURN_NOT_OK(source->ReadAt(position, INIT_METADATA_SIZE, &metadata));
+
+ int32_t metadata_size = *reinterpret_cast<const int32_t*>(metadata->data());
+
+ // We may not need to call source->ReadAt again
+ if (metadata_size > static_cast<int>(INIT_METADATA_SIZE - sizeof(int32_t))) {
+ // We don't have enough data, read the indicated metadata size.
+ RETURN_NOT_OK(source->ReadAt(position + sizeof(int32_t),
+ metadata_size, &metadata));
+ }
+
+ // TODO(wesm): buffer slicing here would be better in case ReadAt returns
+ // allocated memory
+
+ std::shared_ptr<Message> message;
+ RETURN_NOT_OK(Message::Open(metadata, &message));
+
+ if (message->type() != Message::RECORD_BATCH) {
+ return Status::Invalid("Metadata message is not a record batch");
+ }
+
+ std::shared_ptr<RecordBatchMessage> batch_meta = message->GetRecordBatch();
+
+ std::shared_ptr<RowBatchReader> result(new RowBatchReader());
+ result->impl_.reset(new Impl(source, batch_meta));
+ *out = result;
+
+ return Status::OK();
+}
+
+Status RowBatchReader::GetRowBatch(const std::shared_ptr<Schema>& schema,
+ std::shared_ptr<RowBatch>* out) {
+ return impl_->AssembleBatch(schema, out);
+}
+
+
+} // namespace ipc
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/adapter.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/adapter.h b/cpp/src/arrow/ipc/adapter.h
new file mode 100644
index 0000000..26dea6d
--- /dev/null
+++ b/cpp/src/arrow/ipc/adapter.h
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Public API for writing and accessing (with zero copy, if possible) Arrow
+// data in shared memory
+
+#ifndef ARROW_IPC_ADAPTER_H
+#define ARROW_IPC_ADAPTER_H
+
+#include <cstdint>
+#include <memory>
+
+namespace arrow {
+
+class Array;
+class RowBatch;
+class Schema;
+class Status;
+
+namespace ipc {
+
+class MemorySource;
+class RecordBatchMessage;
+
+// ----------------------------------------------------------------------
+// Write path
+
+// Write the RowBatch (collection of equal-length Arrow arrays) to the memory
+// source at the indicated position
+//
+// First, each of the memory buffers are written out end-to-end in starting at
+// the indicated position.
+//
+// Then, this function writes the batch metadata as a flatbuffer (see
+// format/Message.fbs -- the RecordBatch message type) like so:
+//
+// <int32: metadata size> <uint8*: metadata>
+//
+// Finally, the memory offset to the start of the metadata / data header is
+// returned in an out-variable
+Status WriteRowBatch(MemorySource* dst, const RowBatch* batch, int64_t position,
+ int64_t* header_offset);
+
+// int64_t GetRowBatchMetadata(const RowBatch* batch);
+
+// Compute the precise number of bytes needed in a contiguous memory segment to
+// write the row batch. This involves generating the complete serialized
+// Flatbuffers metadata.
+int64_t GetRowBatchSize(const RowBatch* batch);
+
+// ----------------------------------------------------------------------
+// "Read" path; does not copy data if the MemorySource does not
+
+class RowBatchReader {
+ public:
+ static Status Open(MemorySource* source, int64_t position,
+ std::shared_ptr<RowBatchReader>* out);
+
+ // Reassemble the row batch. A Schema is required to be able to construct the
+ // right array containers
+ Status GetRowBatch(const std::shared_ptr<Schema>& schema,
+ std::shared_ptr<RowBatch>* out);
+
+ private:
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_MEMORY_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/ipc-adapter-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-adapter-test.cc b/cpp/src/arrow/ipc/ipc-adapter-test.cc
new file mode 100644
index 0000000..d75998f
--- /dev/null
+++ b/cpp/src/arrow/ipc/ipc-adapter-test.cc
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/ipc/adapter.h"
+#include "arrow/ipc/memory.h"
+#include "arrow/ipc/test-common.h"
+
+#include "arrow/test-util.h"
+#include "arrow/types/primitive.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/memory-pool.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+namespace ipc {
+
+class TestWriteRowBatch : public ::testing::Test, public MemoryMapFixture {
+ public:
+ void SetUp() {
+ pool_ = default_memory_pool();
+ }
+ void TearDown() {
+ MemoryMapFixture::TearDown();
+ }
+
+ void InitMemoryMap(int64_t size) {
+ std::string path = "test-write-row-batch";
+ MemoryMapFixture::CreateFile(path, size);
+ ASSERT_OK(MemoryMappedSource::Open(path, MemorySource::READ_WRITE, &mmap_));
+ }
+
+ protected:
+ MemoryPool* pool_;
+ std::shared_ptr<MemoryMappedSource> mmap_;
+};
+
+const auto INT32 = std::make_shared<Int32Type>();
+
+TEST_F(TestWriteRowBatch, IntegerRoundTrip) {
+ const int length = 1000;
+
+ // Make the schema
+ auto f0 = std::make_shared<Field>("f0", INT32);
+ auto f1 = std::make_shared<Field>("f1", INT32);
+ std::shared_ptr<Schema> schema(new Schema({f0, f1}));
+
+ // Example data
+
+ auto data = std::make_shared<PoolBuffer>(pool_);
+ ASSERT_OK(data->Resize(length * sizeof(int32_t)));
+ test::rand_uniform_int(length, 0, 0, std::numeric_limits<int32_t>::max(),
+ reinterpret_cast<int32_t*>(data->mutable_data()));
+
+ auto nulls = std::make_shared<PoolBuffer>(pool_);
+ int null_bytes = util::bytes_for_bits(length);
+ ASSERT_OK(nulls->Resize(null_bytes));
+ test::random_bytes(null_bytes, 0, nulls->mutable_data());
+
+ auto a0 = std::make_shared<Int32Array>(length, data);
+ auto a1 = std::make_shared<Int32Array>(length, data,
+ test::bitmap_popcount(nulls->data(), length), nulls);
+
+ RowBatch batch(schema, length, {a0, a1});
+
+ // TODO(wesm): computing memory requirements for a row batch
+ // 64k is plenty of space
+ InitMemoryMap(1 << 16);
+
+ int64_t header_location;
+ ASSERT_OK(WriteRowBatch(mmap_.get(), &batch, 0, &header_location));
+
+ std::shared_ptr<RowBatchReader> result;
+ ASSERT_OK(RowBatchReader::Open(mmap_.get(), header_location, &result));
+
+ std::shared_ptr<RowBatch> batch_result;
+ ASSERT_OK(result->GetRowBatch(schema, &batch_result));
+ EXPECT_EQ(batch.num_rows(), batch_result->num_rows());
+
+ for (int i = 0; i < batch.num_columns(); ++i) {
+ EXPECT_TRUE(batch.column(i)->Equals(batch_result->column(i)))
+ << i << batch.column_name(i);
+ }
+}
+
+} // namespace ipc
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/ipc-memory-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-memory-test.cc b/cpp/src/arrow/ipc/ipc-memory-test.cc
new file mode 100644
index 0000000..332ad2a
--- /dev/null
+++ b/cpp/src/arrow/ipc/ipc-memory-test.cc
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/ipc/memory.h"
+#include "arrow/ipc/test-common.h"
+#include "arrow/test-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+namespace ipc {
+
+class TestMemoryMappedSource : public ::testing::Test, public MemoryMapFixture {
+ public:
+ void TearDown() {
+ MemoryMapFixture::TearDown();
+ }
+};
+
+TEST_F(TestMemoryMappedSource, InvalidUsages) {
+}
+
+TEST_F(TestMemoryMappedSource, WriteRead) {
+ const int64_t buffer_size = 1024;
+ std::vector<uint8_t> buffer(buffer_size);
+
+ test::random_bytes(1024, 0, buffer.data());
+
+ const int reps = 5;
+
+ std::string path = "ipc-write-read-test";
+ CreateFile(path, reps * buffer_size);
+
+ std::shared_ptr<MemoryMappedSource> result;
+ ASSERT_OK(MemoryMappedSource::Open(path, MemorySource::READ_WRITE, &result));
+
+ int64_t position = 0;
+
+ std::shared_ptr<Buffer> out_buffer;
+ for (int i = 0; i < reps; ++i) {
+ ASSERT_OK(result->Write(position, buffer.data(), buffer_size));
+ ASSERT_OK(result->ReadAt(position, buffer_size, &out_buffer));
+
+ ASSERT_EQ(0, memcmp(out_buffer->data(), buffer.data(), buffer_size));
+
+ position += buffer_size;
+ }
+}
+
+TEST_F(TestMemoryMappedSource, InvalidFile) {
+ std::string non_existent_path = "invalid-file-name-asfd";
+
+ std::shared_ptr<MemoryMappedSource> result;
+ ASSERT_RAISES(IOError, MemoryMappedSource::Open(non_existent_path,
+ MemorySource::READ_ONLY, &result));
+}
+
+} // namespace ipc
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/ipc-metadata-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-metadata-test.cc b/cpp/src/arrow/ipc/ipc-metadata-test.cc
new file mode 100644
index 0000000..ceabec0
--- /dev/null
+++ b/cpp/src/arrow/ipc/ipc-metadata-test.cc
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "gtest/gtest.h"
+
+#include "arrow/ipc/metadata.h"
+#include "arrow/schema.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+class Buffer;
+
+static inline void assert_schema_equal(const Schema* lhs, const Schema* rhs) {
+ if (!lhs->Equals(*rhs)) {
+ std::stringstream ss;
+ ss << "left schema: " << lhs->ToString() << std::endl
+ << "right schema: " << rhs->ToString() << std::endl;
+ FAIL() << ss.str();
+ }
+}
+
+class TestSchemaMessage : public ::testing::Test {
+ public:
+ void SetUp() {}
+
+ void CheckRoundtrip(const Schema* schema) {
+ std::shared_ptr<Buffer> buffer;
+ ASSERT_OK(ipc::WriteSchema(schema, &buffer));
+
+ std::shared_ptr<ipc::Message> message;
+ ASSERT_OK(ipc::Message::Open(buffer, &message));
+
+ ASSERT_EQ(ipc::Message::SCHEMA, message->type());
+
+ std::shared_ptr<ipc::SchemaMessage> schema_msg = message->GetSchema();
+ ASSERT_EQ(schema->num_fields(), schema_msg->num_fields());
+
+ std::shared_ptr<Schema> schema2;
+ ASSERT_OK(schema_msg->GetSchema(&schema2));
+
+ assert_schema_equal(schema, schema2.get());
+ }
+};
+
+const std::shared_ptr<DataType> INT32 = std::make_shared<Int32Type>();
+
+TEST_F(TestSchemaMessage, PrimitiveFields) {
+ auto f0 = std::make_shared<Field>("f0", std::make_shared<Int8Type>());
+ auto f1 = std::make_shared<Field>("f1", std::make_shared<Int16Type>());
+ auto f2 = std::make_shared<Field>("f2", std::make_shared<Int32Type>());
+ auto f3 = std::make_shared<Field>("f3", std::make_shared<Int64Type>());
+ auto f4 = std::make_shared<Field>("f4", std::make_shared<UInt8Type>());
+ auto f5 = std::make_shared<Field>("f5", std::make_shared<UInt16Type>());
+ auto f6 = std::make_shared<Field>("f6", std::make_shared<UInt32Type>());
+ auto f7 = std::make_shared<Field>("f7", std::make_shared<UInt64Type>());
+ auto f8 = std::make_shared<Field>("f8", std::make_shared<FloatType>());
+ auto f9 = std::make_shared<Field>("f9", std::make_shared<DoubleType>());
+ auto f10 = std::make_shared<Field>("f10", std::make_shared<BooleanType>());
+
+ Schema schema({f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10});
+ CheckRoundtrip(&schema);
+}
+
+TEST_F(TestSchemaMessage, NestedFields) {
+ auto type = std::make_shared<ListType>(std::make_shared<Int32Type>());
+ auto f0 = std::make_shared<Field>("f0", type);
+
+ std::shared_ptr<StructType> type2(new StructType({
+ std::make_shared<Field>("k1", INT32),
+ std::make_shared<Field>("k2", INT32),
+ std::make_shared<Field>("k3", INT32)}));
+ auto f1 = std::make_shared<Field>("f1", type2);
+
+ Schema schema({f0, f1});
+ CheckRoundtrip(&schema);
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/memory.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/memory.cc b/cpp/src/arrow/ipc/memory.cc
new file mode 100644
index 0000000..e630ccd
--- /dev/null
+++ b/cpp/src/arrow/ipc/memory.cc
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/memory.h"
+
+#include <sys/mman.h> // For memory-mapping
+#include <algorithm>
+#include <cerrno>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <sstream>
+#include <string>
+
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+namespace ipc {
+
+MemorySource::MemorySource(AccessMode access_mode) :
+ access_mode_(access_mode) {}
+
+MemorySource::~MemorySource() {}
+
+// Implement MemoryMappedSource
+
+class MemoryMappedSource::Impl {
+ public:
+ Impl() :
+ file_(nullptr),
+ is_open_(false),
+ data_(nullptr) {}
+
+ ~Impl() {
+ if (is_open_) {
+ munmap(data_, size_);
+ fclose(file_);
+ }
+ }
+
+ Status Open(const std::string& path, MemorySource::AccessMode mode) {
+ if (is_open_) {
+ return Status::IOError("A file is already open");
+ }
+
+ path_ = path;
+
+ if (mode == MemorySource::READ_WRITE) {
+ file_ = fopen(path.c_str(), "r+b");
+ } else {
+ file_ = fopen(path.c_str(), "rb");
+ }
+ if (file_ == nullptr) {
+ std::stringstream ss;
+ ss << "Unable to open file, errno: " << errno;
+ return Status::IOError(ss.str());
+ }
+
+ fseek(file_, 0L, SEEK_END);
+ if (ferror(file_)) {
+ return Status::IOError("Unable to seek to end of file");
+ }
+ size_ = ftell(file_);
+
+ fseek(file_, 0L, SEEK_SET);
+ is_open_ = true;
+
+ // TODO(wesm): Add read-only version of this
+ data_ = reinterpret_cast<uint8_t*>(mmap(nullptr, size_,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED, fileno(file_), 0));
+ if (data_ == nullptr) {
+ std::stringstream ss;
+ ss << "Memory mapping file failed, errno: " << errno;
+ return Status::IOError(ss.str());
+ }
+
+ return Status::OK();
+ }
+
+ int64_t size() const {
+ return size_;
+ }
+
+ uint8_t* data() {
+ return data_;
+ }
+
+ private:
+ std::string path_;
+ FILE* file_;
+ int64_t size_;
+ bool is_open_;
+
+ // The memory map
+ uint8_t* data_;
+};
+
+MemoryMappedSource::MemoryMappedSource(AccessMode access_mode) :
+ MemorySource(access_mode) {}
+
+Status MemoryMappedSource::Open(const std::string& path, AccessMode access_mode,
+ std::shared_ptr<MemoryMappedSource>* out) {
+ std::shared_ptr<MemoryMappedSource> result(new MemoryMappedSource(access_mode));
+
+ result->impl_.reset(new Impl());
+ RETURN_NOT_OK(result->impl_->Open(path, access_mode));
+
+ *out = result;
+ return Status::OK();
+}
+
+int64_t MemoryMappedSource::Size() const {
+ return impl_->size();
+}
+
+Status MemoryMappedSource::Close() {
+ // munmap handled in ::Impl dtor
+ return Status::OK();
+}
+
+Status MemoryMappedSource::ReadAt(int64_t position, int64_t nbytes,
+ std::shared_ptr<Buffer>* out) {
+ if (position < 0 || position >= impl_->size()) {
+ return Status::Invalid("position is out of bounds");
+ }
+
+ nbytes = std::min(nbytes, impl_->size() - position);
+ *out = std::make_shared<Buffer>(impl_->data() + position, nbytes);
+ return Status::OK();
+}
+
+Status MemoryMappedSource::Write(int64_t position, const uint8_t* data,
+ int64_t nbytes) {
+ if (position < 0 || position >= impl_->size()) {
+ return Status::Invalid("position is out of bounds");
+ }
+
+ // TODO(wesm): verify we are not writing past the end of the buffer
+ uint8_t* dst = impl_->data() + position;
+ memcpy(dst, data, nbytes);
+
+ return Status::OK();
+}
+
+} // namespace ipc
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/memory.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/memory.h b/cpp/src/arrow/ipc/memory.h
new file mode 100644
index 0000000..0b4d834
--- /dev/null
+++ b/cpp/src/arrow/ipc/memory.h
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Public API for different interprocess memory sharing mechanisms
+
+#ifndef ARROW_IPC_MEMORY_H
+#define ARROW_IPC_MEMORY_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "arrow/util/macros.h"
+
+namespace arrow {
+
+class Buffer;
+class MutableBuffer;
+class Status;
+
+namespace ipc {
+
+// Abstract output stream
+class OutputStream {
+ public:
+ virtual ~OutputStream() {}
+ // Close the output stream
+ virtual Status Close() = 0;
+
+ // The current position in the output stream
+ virtual int64_t Tell() const = 0;
+
+ // Write bytes to the stream
+ virtual Status Write(const uint8_t* data, int64_t length) = 0;
+};
+
+// An output stream that writes to a MutableBuffer, such as one obtained from a
+// memory map
+class BufferOutputStream : public OutputStream {
+ public:
+ explicit BufferOutputStream(const std::shared_ptr<MutableBuffer>& buffer):
+ buffer_(buffer) {}
+
+ // Implement the OutputStream interface
+ Status Close() override;
+ int64_t Tell() const override;
+ Status Write(const uint8_t* data, int64_t length) override;
+
+ // Returns the number of bytes remaining in the buffer
+ int64_t bytes_remaining() const;
+
+ private:
+ std::shared_ptr<MutableBuffer> buffer_;
+ int64_t capacity_;
+ int64_t position_;
+};
+
+class MemorySource {
+ public:
+ // Indicates the access permissions of the memory source
+ enum AccessMode {
+ READ_ONLY,
+ READ_WRITE
+ };
+
+ virtual ~MemorySource();
+
+ // Retrieve a buffer of memory from the source of the indicates size and at
+ // the indicated location
+ // @returns: arrow::Status indicating success / failure. The buffer is set
+ // into the *out argument
+ virtual Status ReadAt(int64_t position, int64_t nbytes,
+ std::shared_ptr<Buffer>* out) = 0;
+
+ virtual Status Close() = 0;
+
+ virtual Status Write(int64_t position, const uint8_t* data, int64_t nbytes) = 0;
+
+ // @return: the size in bytes of the memory source
+ virtual int64_t Size() const = 0;
+
+ protected:
+ explicit MemorySource(AccessMode access_mode = AccessMode::READ_WRITE);
+
+ AccessMode access_mode_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(MemorySource);
+};
+
+// A memory source that uses memory-mapped files for memory interactions
+class MemoryMappedSource : public MemorySource {
+ public:
+ static Status Open(const std::string& path, AccessMode access_mode,
+ std::shared_ptr<MemoryMappedSource>* out);
+
+ Status Close() override;
+
+ Status ReadAt(int64_t position, int64_t nbytes,
+ std::shared_ptr<Buffer>* out) override;
+
+ Status Write(int64_t position, const uint8_t* data, int64_t nbytes) override;
+
+ // @return: the size in bytes of the memory source
+ int64_t Size() const override;
+
+ private:
+ explicit MemoryMappedSource(AccessMode access_mode);
+ // Hide the internal details of this class for now
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_MEMORY_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/metadata-internal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
new file mode 100644
index 0000000..14b1869
--- /dev/null
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -0,0 +1,317 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/metadata-internal.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "arrow/ipc/Message_generated.h"
+#include "arrow/schema.h"
+#include "arrow/type.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+typedef flatbuffers::FlatBufferBuilder FBB;
+typedef flatbuffers::Offset<arrow::flatbuf::Field> FieldOffset;
+typedef flatbuffers::Offset<void> Offset;
+
+namespace arrow {
+
+namespace flatbuf = apache::arrow::flatbuf;
+
+namespace ipc {
+
+const std::shared_ptr<DataType> BOOL = std::make_shared<BooleanType>();
+const std::shared_ptr<DataType> INT8 = std::make_shared<Int8Type>();
+const std::shared_ptr<DataType> INT16 = std::make_shared<Int16Type>();
+const std::shared_ptr<DataType> INT32 = std::make_shared<Int32Type>();
+const std::shared_ptr<DataType> INT64 = std::make_shared<Int64Type>();
+const std::shared_ptr<DataType> UINT8 = std::make_shared<UInt8Type>();
+const std::shared_ptr<DataType> UINT16 = std::make_shared<UInt16Type>();
+const std::shared_ptr<DataType> UINT32 = std::make_shared<UInt32Type>();
+const std::shared_ptr<DataType> UINT64 = std::make_shared<UInt64Type>();
+const std::shared_ptr<DataType> FLOAT = std::make_shared<FloatType>();
+const std::shared_ptr<DataType> DOUBLE = std::make_shared<DoubleType>();
+
+static Status IntFromFlatbuffer(const flatbuf::Int* int_data,
+ std::shared_ptr<DataType>* out) {
+ if (int_data->bitWidth() % 8 != 0) {
+ return Status::NotImplemented("Integers not in cstdint are not implemented");
+ } else if (int_data->bitWidth() > 64) {
+ return Status::NotImplemented("Integers with more than 64 bits not implemented");
+ }
+
+ switch (int_data->bitWidth()) {
+ case 8:
+ *out = int_data->is_signed() ? INT8 : UINT8;
+ break;
+ case 16:
+ *out = int_data->is_signed() ? INT16 : UINT16;
+ break;
+ case 32:
+ *out = int_data->is_signed() ? INT32 : UINT32;
+ break;
+ case 64:
+ *out = int_data->is_signed() ? INT64 : UINT64;
+ break;
+ default:
+ *out = nullptr;
+ break;
+ }
+ return Status::OK();
+}
+
+static Status FloatFromFlatuffer(const flatbuf::FloatingPoint* float_data,
+ std::shared_ptr<DataType>* out) {
+ if (float_data->precision() == flatbuf::Precision_SINGLE) {
+ *out = FLOAT;
+ } else {
+ *out = DOUBLE;
+ }
+ return Status::OK();
+}
+
+static Status TypeFromFlatbuffer(flatbuf::Type type,
+ const void* type_data, const std::vector<std::shared_ptr<Field>>& children,
+ std::shared_ptr<DataType>* out) {
+ switch (type) {
+ case flatbuf::Type_NONE:
+ return Status::Invalid("Type metadata cannot be none");
+ case flatbuf::Type_Int:
+ return IntFromFlatbuffer(static_cast<const flatbuf::Int*>(type_data), out);
+ case flatbuf::Type_Bit:
+ return Status::NotImplemented("Type is not implemented");
+ case flatbuf::Type_FloatingPoint:
+ return FloatFromFlatuffer(static_cast<const flatbuf::FloatingPoint*>(type_data),
+ out);
+ case flatbuf::Type_Binary:
+ case flatbuf::Type_Utf8:
+ return Status::NotImplemented("Type is not implemented");
+ case flatbuf::Type_Bool:
+ *out = BOOL;
+ return Status::OK();
+ case flatbuf::Type_Decimal:
+ case flatbuf::Type_Timestamp:
+ case flatbuf::Type_List:
+ if (children.size() != 1) {
+ return Status::Invalid("List must have exactly 1 child field");
+ }
+ *out = std::make_shared<ListType>(children[0]);
+ return Status::OK();
+ case flatbuf::Type_Tuple:
+ *out = std::make_shared<StructType>(children);
+ return Status::OK();
+ case flatbuf::Type_Union:
+ return Status::NotImplemented("Type is not implemented");
+ default:
+ return Status::Invalid("Unrecognized type");
+ }
+}
+
+// Forward declaration
+static Status FieldToFlatbuffer(FBB& fbb, const std::shared_ptr<Field>& field,
+ FieldOffset* offset);
+
+static Offset IntToFlatbuffer(FBB& fbb, int bitWidth,
+ bool is_signed) {
+ return flatbuf::CreateInt(fbb, bitWidth, is_signed).Union();
+}
+
+static Offset FloatToFlatbuffer(FBB& fbb,
+ flatbuf::Precision precision) {
+ return flatbuf::CreateFloatingPoint(fbb, precision).Union();
+}
+
+static Status ListToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
+ std::vector<FieldOffset>* out_children, Offset* offset) {
+ FieldOffset field;
+ RETURN_NOT_OK(FieldToFlatbuffer(fbb, type->child(0), &field));
+ out_children->push_back(field);
+ *offset = flatbuf::CreateList(fbb).Union();
+ return Status::OK();
+}
+
+static Status StructToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
+ std::vector<FieldOffset>* out_children, Offset* offset) {
+ FieldOffset field;
+ for (int i = 0; i < type->num_children(); ++i) {
+ RETURN_NOT_OK(FieldToFlatbuffer(fbb, type->child(i), &field));
+ out_children->push_back(field);
+ }
+ *offset = flatbuf::CreateTuple(fbb).Union();
+ return Status::OK();
+}
+
+#define INT_TO_FB_CASE(BIT_WIDTH, IS_SIGNED) \
+ *out_type = flatbuf::Type_Int; \
+ *offset = IntToFlatbuffer(fbb, BIT_WIDTH, IS_SIGNED); \
+ break;
+
+
+static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
+ std::vector<FieldOffset>* children,
+ flatbuf::Type* out_type, Offset* offset) {
+ switch (type->type) {
+ case Type::BOOL:
+ *out_type = flatbuf::Type_Bool;
+ *offset = flatbuf::CreateBool(fbb).Union();
+ break;
+ case Type::UINT8:
+ INT_TO_FB_CASE(8, false);
+ case Type::INT8:
+ INT_TO_FB_CASE(8, true);
+ case Type::UINT16:
+ INT_TO_FB_CASE(16, false);
+ case Type::INT16:
+ INT_TO_FB_CASE(16, true);
+ case Type::UINT32:
+ INT_TO_FB_CASE(32, false);
+ case Type::INT32:
+ INT_TO_FB_CASE(32, true);
+ case Type::UINT64:
+ INT_TO_FB_CASE(64, false);
+ case Type::INT64:
+ INT_TO_FB_CASE(64, true);
+ case Type::FLOAT:
+ *out_type = flatbuf::Type_FloatingPoint;
+ *offset = FloatToFlatbuffer(fbb, flatbuf::Precision_SINGLE);
+ break;
+ case Type::DOUBLE:
+ *out_type = flatbuf::Type_FloatingPoint;
+ *offset = FloatToFlatbuffer(fbb, flatbuf::Precision_DOUBLE);
+ break;
+ case Type::LIST:
+ *out_type = flatbuf::Type_List;
+ return ListToFlatbuffer(fbb, type, children, offset);
+ case Type::STRUCT:
+ *out_type = flatbuf::Type_Tuple;
+ return StructToFlatbuffer(fbb, type, children, offset);
+ default:
+ std::stringstream ss;
+ ss << "Unable to convert type: " << type->ToString()
+ << std::endl;
+ return Status::NotImplemented(ss.str());
+ }
+ return Status::OK();
+}
+
+static Status FieldToFlatbuffer(FBB& fbb, const std::shared_ptr<Field>& field,
+ FieldOffset* offset) {
+ auto fb_name = fbb.CreateString(field->name);
+
+ flatbuf::Type type_enum;
+ Offset type_data;
+ std::vector<FieldOffset> children;
+
+ RETURN_NOT_OK(TypeToFlatbuffer(fbb, field->type, &children, &type_enum, &type_data));
+ auto fb_children = fbb.CreateVector(children);
+
+ *offset = flatbuf::CreateField(fbb, fb_name, field->nullable, type_enum,
+ type_data, fb_children);
+
+ return Status::OK();
+}
+
+Status FieldFromFlatbuffer(const flatbuf::Field* field,
+ std::shared_ptr<Field>* out) {
+ std::shared_ptr<DataType> type;
+
+ auto children = field->children();
+ std::vector<std::shared_ptr<Field>> child_fields(children->size());
+ for (size_t i = 0; i < children->size(); ++i) {
+ RETURN_NOT_OK(FieldFromFlatbuffer(children->Get(i), &child_fields[i]));
+ }
+
+ RETURN_NOT_OK(TypeFromFlatbuffer(field->type_type(),
+ field->type(), child_fields, &type));
+
+ *out = std::make_shared<Field>(field->name()->str(), type);
+ return Status::OK();
+}
+
+// Implement MessageBuilder
+
+Status MessageBuilder::SetSchema(const Schema* schema) {
+ header_type_ = flatbuf::MessageHeader_Schema;
+
+ std::vector<FieldOffset> field_offsets;
+ for (int i = 0; i < schema->num_fields(); ++i) {
+ const std::shared_ptr<Field>& field = schema->field(i);
+ FieldOffset offset;
+ RETURN_NOT_OK(FieldToFlatbuffer(fbb_, field, &offset));
+ field_offsets.push_back(offset);
+ }
+
+ header_ = flatbuf::CreateSchema(fbb_, fbb_.CreateVector(field_offsets)).Union();
+ body_length_ = 0;
+ return Status::OK();
+}
+
+Status MessageBuilder::SetRecordBatch(int32_t length, int64_t body_length,
+ const std::vector<flatbuf::FieldNode>& nodes,
+ const std::vector<flatbuf::Buffer>& buffers) {
+ header_type_ = flatbuf::MessageHeader_RecordBatch;
+ header_ = flatbuf::CreateRecordBatch(fbb_, length,
+ fbb_.CreateVectorOfStructs(nodes),
+ fbb_.CreateVectorOfStructs(buffers)).Union();
+ body_length_ = body_length;
+
+ return Status::OK();
+}
+
+
+Status WriteDataHeader(int32_t length, int64_t body_length,
+ const std::vector<flatbuf::FieldNode>& nodes,
+ const std::vector<flatbuf::Buffer>& buffers,
+ std::shared_ptr<Buffer>* out) {
+ MessageBuilder message;
+ RETURN_NOT_OK(message.SetRecordBatch(length, body_length, nodes, buffers));
+ RETURN_NOT_OK(message.Finish());
+ return message.GetBuffer(out);
+}
+
+Status MessageBuilder::Finish() {
+ auto message = flatbuf::CreateMessage(fbb_, header_type_, header_,
+ body_length_);
+ fbb_.Finish(message);
+ return Status::OK();
+}
+
+Status MessageBuilder::GetBuffer(std::shared_ptr<Buffer>* out) {
+ // The message buffer is prefixed by the size of the complete flatbuffer as
+ // int32_t
+ // <int32_t: flatbuffer size><uint8_t*: flatbuffer data>
+ int32_t size = fbb_.GetSize();
+
+ auto result = std::make_shared<PoolBuffer>();
+ RETURN_NOT_OK(result->Resize(size + sizeof(int32_t)));
+
+ uint8_t* dst = result->mutable_data();
+ memcpy(dst, reinterpret_cast<int32_t*>(&size), sizeof(int32_t));
+ memcpy(dst + sizeof(int32_t), fbb_.GetBufferPointer(), size);
+
+ *out = result;
+ return Status::OK();
+}
+
+} // namespace ipc
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/metadata-internal.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata-internal.h b/cpp/src/arrow/ipc/metadata-internal.h
new file mode 100644
index 0000000..f7365d2
--- /dev/null
+++ b/cpp/src/arrow/ipc/metadata-internal.h
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_IPC_METADATA_INTERNAL_H
+#define ARROW_IPC_METADATA_INTERNAL_H
+
+#include <flatbuffers/flatbuffers.h>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/ipc/Message_generated.h"
+
+namespace arrow {
+
+namespace flatbuf = apache::arrow::flatbuf;
+
+class Buffer;
+struct Field;
+class Schema;
+class Status;
+
+namespace ipc {
+
+Status FieldFromFlatbuffer(const flatbuf::Field* field,
+ std::shared_ptr<Field>* out);
+
+class MessageBuilder {
+ public:
+ Status SetSchema(const Schema* schema);
+
+ Status SetRecordBatch(int32_t length, int64_t body_length,
+ const std::vector<flatbuf::FieldNode>& nodes,
+ const std::vector<flatbuf::Buffer>& buffers);
+
+ Status Finish();
+
+ Status GetBuffer(std::shared_ptr<Buffer>* out);
+
+ private:
+ flatbuf::MessageHeader header_type_;
+ flatbuffers::Offset<void> header_;
+ int64_t body_length_;
+ flatbuffers::FlatBufferBuilder fbb_;
+};
+
+Status WriteDataHeader(int32_t length, int64_t body_length,
+ const std::vector<flatbuf::FieldNode>& nodes,
+ const std::vector<flatbuf::Buffer>& buffers,
+ std::shared_ptr<Buffer>* out);
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_METADATA_INTERNAL_H
[2/3] arrow git commit: ARROW-67: C++ metadata flatbuffer
serialization and data movement to memory maps
Posted by we...@apache.org.
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/metadata.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc
new file mode 100644
index 0000000..642f21a
--- /dev/null
+++ b/cpp/src/arrow/ipc/metadata.cc
@@ -0,0 +1,238 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/metadata.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+// Generated C++ flatbuffer IDL
+#include "arrow/ipc/Message_generated.h"
+#include "arrow/ipc/metadata-internal.h"
+
+#include "arrow/schema.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+namespace flatbuf = apache::arrow::flatbuf;
+
+namespace ipc {
+
+Status WriteSchema(const Schema* schema, std::shared_ptr<Buffer>* out) {
+ MessageBuilder message;
+ RETURN_NOT_OK(message.SetSchema(schema));
+ RETURN_NOT_OK(message.Finish());
+ return message.GetBuffer(out);
+}
+
+//----------------------------------------------------------------------
+// Message reader
+
+class Message::Impl {
+ public:
+ explicit Impl(const std::shared_ptr<Buffer>& buffer,
+ const flatbuf::Message* message) :
+ buffer_(buffer),
+ message_(message) {}
+
+ Message::Type type() const {
+ switch (message_->header_type()) {
+ case flatbuf::MessageHeader_Schema:
+ return Message::SCHEMA;
+ case flatbuf::MessageHeader_DictionaryBatch:
+ return Message::DICTIONARY_BATCH;
+ case flatbuf::MessageHeader_RecordBatch:
+ return Message::RECORD_BATCH;
+ default:
+ return Message::NONE;
+ }
+ }
+
+ const void* header() const {
+ return message_->header();
+ }
+
+ int64_t body_length() const {
+ return message_->bodyLength();
+ }
+
+ private:
+ // Owns the memory this message accesses
+ std::shared_ptr<Buffer> buffer_;
+
+ const flatbuf::Message* message_;
+};
+
+class SchemaMessage::Impl {
+ public:
+ explicit Impl(const void* schema) :
+ schema_(static_cast<const flatbuf::Schema*>(schema)) {}
+
+ const flatbuf::Field* field(int i) const {
+ return schema_->fields()->Get(i);
+ }
+
+ int num_fields() const {
+ return schema_->fields()->size();
+ }
+
+ private:
+ const flatbuf::Schema* schema_;
+};
+
+Message::Message() {}
+
+Status Message::Open(const std::shared_ptr<Buffer>& buffer,
+ std::shared_ptr<Message>* out) {
+ std::shared_ptr<Message> result(new Message());
+
+ // The buffer is prefixed by its size as int32_t
+ const uint8_t* fb_head = buffer->data() + sizeof(int32_t);
+ const flatbuf::Message* message = flatbuf::GetMessage(fb_head);
+
+ // TODO(wesm): verify message
+ result->impl_.reset(new Impl(buffer, message));
+ *out = result;
+
+ return Status::OK();
+}
+
+Message::Type Message::type() const {
+ return impl_->type();
+}
+
+int64_t Message::body_length() const {
+ return impl_->body_length();
+}
+
+std::shared_ptr<Message> Message::get_shared_ptr() {
+ return this->shared_from_this();
+}
+
+std::shared_ptr<SchemaMessage> Message::GetSchema() {
+ return std::make_shared<SchemaMessage>(this->shared_from_this(),
+ impl_->header());
+}
+
+SchemaMessage::SchemaMessage(const std::shared_ptr<Message>& message,
+ const void* schema) {
+ message_ = message;
+ impl_.reset(new Impl(schema));
+}
+
+int SchemaMessage::num_fields() const {
+ return impl_->num_fields();
+}
+
+Status SchemaMessage::GetField(int i, std::shared_ptr<Field>* out) const {
+ const flatbuf::Field* field = impl_->field(i);
+ return FieldFromFlatbuffer(field, out);
+}
+
+Status SchemaMessage::GetSchema(std::shared_ptr<Schema>* out) const {
+ std::vector<std::shared_ptr<Field>> fields(num_fields());
+ for (int i = 0; i < this->num_fields(); ++i) {
+ RETURN_NOT_OK(GetField(i, &fields[i]));
+ }
+ *out = std::make_shared<Schema>(fields);
+ return Status::OK();
+}
+
+class RecordBatchMessage::Impl {
+ public:
+ explicit Impl(const void* batch) :
+ batch_(static_cast<const flatbuf::RecordBatch*>(batch)) {
+ nodes_ = batch_->nodes();
+ buffers_ = batch_->buffers();
+ }
+
+ const flatbuf::FieldNode* field(int i) const {
+ return nodes_->Get(i);
+ }
+
+ const flatbuf::Buffer* buffer(int i) const {
+ return buffers_->Get(i);
+ }
+
+ int32_t length() const {
+ return batch_->length();
+ }
+
+ int num_buffers() const {
+ return batch_->buffers()->size();
+ }
+
+ int num_fields() const {
+ return batch_->nodes()->size();
+ }
+
+ private:
+ const flatbuf::RecordBatch* batch_;
+ const flatbuffers::Vector<const flatbuf::FieldNode*>* nodes_;
+ const flatbuffers::Vector<const flatbuf::Buffer*>* buffers_;
+};
+
+std::shared_ptr<RecordBatchMessage> Message::GetRecordBatch() {
+ return std::make_shared<RecordBatchMessage>(this->shared_from_this(),
+ impl_->header());
+}
+
+RecordBatchMessage::RecordBatchMessage(const std::shared_ptr<Message>& message,
+ const void* batch) {
+ message_ = message;
+ impl_.reset(new Impl(batch));
+}
+
+// TODO(wesm): Copying the flatbuffer data isn't great, but this will do for
+// now
+FieldMetadata RecordBatchMessage::field(int i) const {
+ const flatbuf::FieldNode* node = impl_->field(i);
+
+ FieldMetadata result;
+ result.length = node->length();
+ result.null_count = node->null_count();
+ return result;
+}
+
+BufferMetadata RecordBatchMessage::buffer(int i) const {
+ const flatbuf::Buffer* buffer = impl_->buffer(i);
+
+ BufferMetadata result;
+ result.page = buffer->page();
+ result.offset = buffer->offset();
+ result.length = buffer->length();
+ return result;
+}
+
+int32_t RecordBatchMessage::length() const {
+ return impl_->length();
+}
+
+int RecordBatchMessage::num_buffers() const {
+ return impl_->num_buffers();
+}
+
+int RecordBatchMessage::num_fields() const {
+ return impl_->num_fields();
+}
+
+} // namespace ipc
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/metadata.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.h b/cpp/src/arrow/ipc/metadata.h
new file mode 100644
index 0000000..c728852
--- /dev/null
+++ b/cpp/src/arrow/ipc/metadata.h
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// C++ object model and user API for interprocess schema messaging
+
+#ifndef ARROW_IPC_METADATA_H
+#define ARROW_IPC_METADATA_H
+
+#include <cstdint>
+#include <memory>
+
+namespace arrow {
+
+class Buffer;
+struct Field;
+class Schema;
+class Status;
+
+namespace ipc {
+
+//----------------------------------------------------------------------
+// Message read/write APIs
+
+// Serialize arrow::Schema as a Flatbuffer
+Status WriteSchema(const Schema* schema, std::shared_ptr<Buffer>* out);
+
+//----------------------------------------------------------------------
+
+// Read interface classes. We do not fully deserialize the flatbuffers so that
+// individual fields metadata can be retrieved from very large schema without
+//
+
+class Message;
+
+// Container for serialized Schema metadata contained in an IPC message
+class SchemaMessage {
+ public:
+ // Accepts an opaque flatbuffer pointer
+ SchemaMessage(const std::shared_ptr<Message>& message, const void* schema);
+
+ int num_fields() const;
+
+ // Construct an arrow::Field for the i-th value in the metadata
+ Status GetField(int i, std::shared_ptr<Field>* out) const;
+
+ // Construct a complete Schema from the message. May be expensive for very
+ // large schemas if you are only interested in a few fields
+ Status GetSchema(std::shared_ptr<Schema>* out) const;
+
+ private:
+ // Parent, owns the flatbuffer data
+ std::shared_ptr<Message> message_;
+
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+// Field metadata
+struct FieldMetadata {
+ int32_t length;
+ int32_t null_count;
+};
+
+struct BufferMetadata {
+ int32_t page;
+ int64_t offset;
+ int64_t length;
+};
+
+// Container for serialized record batch metadata contained in an IPC message
+class RecordBatchMessage {
+ public:
+ // Accepts an opaque flatbuffer pointer
+ RecordBatchMessage(const std::shared_ptr<Message>& message,
+ const void* batch_meta);
+
+ FieldMetadata field(int i) const;
+ BufferMetadata buffer(int i) const;
+
+ int32_t length() const;
+ int num_buffers() const;
+ int num_fields() const;
+
+ private:
+ // Parent, owns the flatbuffer data
+ std::shared_ptr<Message> message_;
+
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+class DictionaryBatchMessage {
+ public:
+ int64_t id() const;
+ std::unique_ptr<RecordBatchMessage> data() const;
+};
+
+class Message : public std::enable_shared_from_this<Message> {
+ public:
+ enum Type {
+ NONE,
+ SCHEMA,
+ DICTIONARY_BATCH,
+ RECORD_BATCH
+ };
+
+ static Status Open(const std::shared_ptr<Buffer>& buffer,
+ std::shared_ptr<Message>* out);
+
+ std::shared_ptr<Message> get_shared_ptr();
+
+ int64_t body_length() const;
+
+ Type type() const;
+
+ // These methods only to be invoked if you have checked the message type
+ std::shared_ptr<SchemaMessage> GetSchema();
+ std::shared_ptr<RecordBatchMessage> GetRecordBatch();
+ std::shared_ptr<DictionaryBatchMessage> GetDictionaryBatch();
+
+ private:
+ Message();
+
+ // Hide serialization details from user API
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_METADATA_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h
new file mode 100644
index 0000000..0fccce9
--- /dev/null
+++ b/cpp/src/arrow/ipc/test-common.h
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_IPC_TEST_COMMON_H
+#define ARROW_IPC_TEST_COMMON_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arrow {
+namespace ipc {
+
+class MemoryMapFixture {
+ public:
+ void TearDown() {
+ for (auto path : tmp_files_) {
+ std::remove(path.c_str());
+ }
+ }
+
+ void CreateFile(const std::string path, int64_t size) {
+ FILE* file = fopen(path.c_str(), "w");
+ if (file != nullptr) {
+ tmp_files_.push_back(path);
+ }
+ ftruncate(fileno(file), size);
+ fclose(file);
+ }
+
+ private:
+ std::vector<std::string> tmp_files_;
+};
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_TEST_COMMON_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema-test.cc b/cpp/src/arrow/schema-test.cc
new file mode 100644
index 0000000..a1de1dc
--- /dev/null
+++ b/cpp/src/arrow/schema-test.cc
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/schema.h"
+#include "arrow/type.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+
+const auto INT32 = std::make_shared<Int32Type>();
+
+TEST(TestField, Basics) {
+ Field f0("f0", INT32);
+ Field f0_nn("f0", INT32, false);
+
+ ASSERT_EQ(f0.name, "f0");
+ ASSERT_EQ(f0.type->ToString(), INT32->ToString());
+
+ ASSERT_TRUE(f0.nullable);
+ ASSERT_FALSE(f0_nn.nullable);
+}
+
+TEST(TestField, Equals) {
+ Field f0("f0", INT32);
+ Field f0_nn("f0", INT32, false);
+ Field f0_other("f0", INT32);
+
+ ASSERT_EQ(f0, f0_other);
+ ASSERT_NE(f0, f0_nn);
+}
+
+class TestSchema : public ::testing::Test {
+ public:
+ void SetUp() {}
+};
+
+TEST_F(TestSchema, Basics) {
+ auto f0 = std::make_shared<Field>("f0", INT32);
+ auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(), false);
+ auto f1_optional = std::make_shared<Field>("f1", std::make_shared<UInt8Type>());
+
+ auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
+
+ vector<shared_ptr<Field>> fields = {f0, f1, f2};
+ auto schema = std::make_shared<Schema>(fields);
+
+ ASSERT_EQ(3, schema->num_fields());
+ ASSERT_EQ(f0, schema->field(0));
+ ASSERT_EQ(f1, schema->field(1));
+ ASSERT_EQ(f2, schema->field(2));
+
+ auto schema2 = std::make_shared<Schema>(fields);
+
+ vector<shared_ptr<Field>> fields3 = {f0, f1_optional, f2};
+ auto schema3 = std::make_shared<Schema>(fields3);
+ ASSERT_TRUE(schema->Equals(schema2));
+ ASSERT_FALSE(schema->Equals(schema3));
+
+ ASSERT_TRUE(schema->Equals(*schema2.get()));
+ ASSERT_FALSE(schema->Equals(*schema3.get()));
+}
+
+TEST_F(TestSchema, ToString) {
+ auto f0 = std::make_shared<Field>("f0", INT32);
+ auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(), false);
+ auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
+ auto f3 = std::make_shared<Field>("f3",
+ std::make_shared<ListType>(std::make_shared<Int16Type>()));
+
+ vector<shared_ptr<Field>> fields = {f0, f1, f2, f3};
+ auto schema = std::make_shared<Schema>(fields);
+
+ std::string result = schema->ToString();
+ std::string expected = R"(f0: int32
+f1: uint8 not null
+f2: string
+f3: list<item: int16>)";
+
+ ASSERT_EQ(expected, result);
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema.cc b/cpp/src/arrow/schema.cc
new file mode 100644
index 0000000..18aad0e
--- /dev/null
+++ b/cpp/src/arrow/schema.cc
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/schema.h"
+
+#include <memory>
+#include <string>
+#include <sstream>
+#include <vector>
+
+#include "arrow/type.h"
+
+namespace arrow {
+
+Schema::Schema(const std::vector<std::shared_ptr<Field>>& fields) :
+ fields_(fields) {}
+
+bool Schema::Equals(const Schema& other) const {
+ if (this == &other) return true;
+ if (num_fields() != other.num_fields()) {
+ return false;
+ }
+ for (int i = 0; i < num_fields(); ++i) {
+ if (!field(i)->Equals(*other.field(i).get())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool Schema::Equals(const std::shared_ptr<Schema>& other) const {
+ return Equals(*other.get());
+}
+
+std::string Schema::ToString() const {
+ std::stringstream buffer;
+
+ int i = 0;
+ for (auto field : fields_) {
+ if (i > 0) {
+ buffer << std::endl;
+ }
+ buffer << field->ToString();
+ ++i;
+ }
+ return buffer.str();
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/schema.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema.h b/cpp/src/arrow/schema.h
new file mode 100644
index 0000000..52f3c1c
--- /dev/null
+++ b/cpp/src/arrow/schema.h
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_SCHEMA_H
+#define ARROW_SCHEMA_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arrow {
+
+struct Field;
+
+class Schema {
+ public:
+ explicit Schema(const std::vector<std::shared_ptr<Field>>& fields);
+
+ // Returns true if all of the schema fields are equal
+ bool Equals(const Schema& other) const;
+ bool Equals(const std::shared_ptr<Schema>& other) const;
+
+ // Return the ith schema element. Does not boundscheck
+ const std::shared_ptr<Field>& field(int i) const {
+ return fields_[i];
+ }
+
+ // Render a string representation of the schema suitable for debugging
+ std::string ToString() const;
+
+ int num_fields() const {
+ return fields_.size();
+ }
+
+ private:
+ std::vector<std::shared_ptr<Field>> fields_;
+};
+
+} // namespace arrow
+
+#endif // ARROW_FIELD_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
new file mode 100644
index 0000000..4c7b8f8
--- /dev/null
+++ b/cpp/src/arrow/table-test.cc
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/types/primitive.h"
+#include "arrow/util/status.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+
+const auto INT16 = std::make_shared<Int16Type>();
+const auto UINT8 = std::make_shared<UInt8Type>();
+const auto INT32 = std::make_shared<Int32Type>();
+
+class TestTable : public TestBase {
+ public:
+ void MakeExample1(int length) {
+ auto f0 = std::make_shared<Field>("f0", INT32);
+ auto f1 = std::make_shared<Field>("f1", UINT8);
+ auto f2 = std::make_shared<Field>("f2", INT16);
+
+ vector<shared_ptr<Field>> fields = {f0, f1, f2};
+ schema_ = std::make_shared<Schema>(fields);
+
+ columns_ = {
+ std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
+ std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
+ std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length))
+ };
+ }
+
+ protected:
+ std::unique_ptr<Table> table_;
+ shared_ptr<Schema> schema_;
+ vector<std::shared_ptr<Column>> columns_;
+};
+
+TEST_F(TestTable, EmptySchema) {
+ auto empty_schema = shared_ptr<Schema>(new Schema({}));
+ table_.reset(new Table("data", empty_schema, columns_));
+ ASSERT_OK(table_->ValidateColumns());
+ ASSERT_EQ(0, table_->num_rows());
+ ASSERT_EQ(0, table_->num_columns());
+}
+
+TEST_F(TestTable, Ctors) {
+ int length = 100;
+ MakeExample1(length);
+
+ std::string name = "data";
+
+ table_.reset(new Table(name, schema_, columns_));
+ ASSERT_OK(table_->ValidateColumns());
+ ASSERT_EQ(name, table_->name());
+ ASSERT_EQ(length, table_->num_rows());
+ ASSERT_EQ(3, table_->num_columns());
+
+ table_.reset(new Table(name, schema_, columns_, length));
+ ASSERT_OK(table_->ValidateColumns());
+ ASSERT_EQ(name, table_->name());
+ ASSERT_EQ(length, table_->num_rows());
+}
+
+TEST_F(TestTable, Metadata) {
+ int length = 100;
+ MakeExample1(length);
+
+ std::string name = "data";
+ table_.reset(new Table(name, schema_, columns_));
+
+ ASSERT_TRUE(table_->schema()->Equals(schema_));
+
+ auto col = table_->column(0);
+ ASSERT_EQ(schema_->field(0)->name, col->name());
+ ASSERT_EQ(schema_->field(0)->type, col->type());
+}
+
+TEST_F(TestTable, InvalidColumns) {
+ // Check that columns are all the same length
+ int length = 100;
+ MakeExample1(length);
+
+ table_.reset(new Table("data", schema_, columns_, length - 1));
+ ASSERT_RAISES(Invalid, table_->ValidateColumns());
+
+ columns_.clear();
+
+ // Wrong number of columns
+ table_.reset(new Table("data", schema_, columns_, length));
+ ASSERT_RAISES(Invalid, table_->ValidateColumns());
+
+ columns_ = {
+ std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
+ std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
+ std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length - 1))
+ };
+
+ table_.reset(new Table("data", schema_, columns_, length));
+ ASSERT_RAISES(Invalid, table_->ValidateColumns());
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
new file mode 100644
index 0000000..e405c1d
--- /dev/null
+++ b/cpp/src/arrow/table.cc
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/table.h"
+
+#include <cstdlib>
+#include <memory>
+#include <sstream>
+
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+RowBatch::RowBatch(const std::shared_ptr<Schema>& schema, int num_rows,
+ const std::vector<std::shared_ptr<Array>>& columns) :
+ schema_(schema),
+ num_rows_(num_rows),
+ columns_(columns) {}
+
+const std::string& RowBatch::column_name(int i) const {
+ return schema_->field(i)->name;
+}
+
+Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+ const std::vector<std::shared_ptr<Column>>& columns) :
+ name_(name),
+ schema_(schema),
+ columns_(columns) {
+ if (columns.size() == 0) {
+ num_rows_ = 0;
+ } else {
+ num_rows_ = columns[0]->length();
+ }
+}
+
+Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+ const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows) :
+ name_(name),
+ schema_(schema),
+ columns_(columns),
+ num_rows_(num_rows) {}
+
+Status Table::ValidateColumns() const {
+ if (num_columns() != schema_->num_fields()) {
+ return Status::Invalid("Number of columns did not match schema");
+ }
+
+ // Make sure columns are all the same length
+ for (size_t i = 0; i < columns_.size(); ++i) {
+ const Column* col = columns_[i].get();
+ if (col == nullptr) {
+ std::stringstream ss;
+ ss << "Column " << i << " named " << col->name()
+ << " was null";
+ return Status::Invalid(ss.str());
+ }
+ if (col->length() != num_rows_) {
+ std::stringstream ss;
+ ss << "Column " << i << " named " << col->name()
+ << " expected length "
+ << num_rows_
+ << " but got length "
+ << col->length();
+ return Status::Invalid(ss.str());
+ }
+ }
+ return Status::OK();
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
new file mode 100644
index 0000000..e2f73a2
--- /dev/null
+++ b/cpp/src/arrow/table.h
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TABLE_H
+#define ARROW_TABLE_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arrow {
+
+class Array;
+class Column;
+class Schema;
+class Status;
+
+// A row batch is a simpler and more rigid table data structure intended for
+// use primarily in shared memory IPC. It contains a schema (metadata) and a
+// corresponding vector of equal-length Arrow arrays
+class RowBatch {
+ public:
+ // num_rows is a parameter to allow for row batches of a particular size not
+ // having any materialized columns. Each array should have the same length as
+ // num_rows
+ RowBatch(const std::shared_ptr<Schema>& schema, int num_rows,
+ const std::vector<std::shared_ptr<Array>>& columns);
+
+ // @returns: the table's schema
+ const std::shared_ptr<Schema>& schema() const {
+ return schema_;
+ }
+
+ // @returns: the i-th column
+ // Note: Does not boundscheck
+ const std::shared_ptr<Array>& column(int i) const {
+ return columns_[i];
+ }
+
+ const std::string& column_name(int i) const;
+
+ // @returns: the number of columns in the table
+ int num_columns() const {
+ return columns_.size();
+ }
+
+ // @returns: the number of rows (the corresponding length of each column)
+ int64_t num_rows() const {
+ return num_rows_;
+ }
+
+ private:
+ std::shared_ptr<Schema> schema_;
+ int num_rows_;
+ std::vector<std::shared_ptr<Array>> columns_;
+};
+
+// Immutable container of fixed-length columns conforming to a particular schema
+class Table {
+ public:
+ // If columns is zero-length, the table's number of rows is zero
+ Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+ const std::vector<std::shared_ptr<Column>>& columns);
+
+ // num_rows is a parameter to allow for tables of a particular size not
+ // having any materialized columns. Each column should therefore have the
+ // same length as num_rows -- you can validate this using
+ // Table::ValidateColumns
+ Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+ const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows);
+
+ // @returns: the table's name, if any (may be length 0)
+ const std::string& name() const {
+ return name_;
+ }
+
+ // @returns: the table's schema
+ const std::shared_ptr<Schema>& schema() const {
+ return schema_;
+ }
+
+ // Note: Does not boundscheck
+ // @returns: the i-th column
+ const std::shared_ptr<Column>& column(int i) const {
+ return columns_[i];
+ }
+
+ // @returns: the number of columns in the table
+ int num_columns() const {
+ return columns_.size();
+ }
+
+ // @returns: the number of rows (the corresponding length of each column)
+ int64_t num_rows() const {
+ return num_rows_;
+ }
+
+ // After construction, perform any checks to validate the input arguments
+ Status ValidateColumns() const;
+
+ private:
+ // The table's name, optional
+ std::string name_;
+
+ std::shared_ptr<Schema> schema_;
+ std::vector<std::shared_ptr<Column>> columns_;
+
+ int64_t num_rows_;
+};
+
+} // namespace arrow
+
+#endif // ARROW_TABLE_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/CMakeLists.txt b/cpp/src/arrow/table/CMakeLists.txt
deleted file mode 100644
index d9f00e7..0000000
--- a/cpp/src/arrow/table/CMakeLists.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#######################################
-# arrow_table
-#######################################
-
-# Headers: top level
-install(FILES
- column.h
- schema.h
- table.h
- DESTINATION include/arrow/table)
-
-ADD_ARROW_TEST(column-test)
-ADD_ARROW_TEST(schema-test)
-ADD_ARROW_TEST(table-test)
-
-ADD_ARROW_BENCHMARK(column-benchmark)
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column-benchmark.cc b/cpp/src/arrow/table/column-benchmark.cc
deleted file mode 100644
index c01146d..0000000
--- a/cpp/src/arrow/table/column-benchmark.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-
-#include "benchmark/benchmark.h"
-
-#include "arrow/test-util.h"
-#include "arrow/table/test-common.h"
-#include "arrow/types/integer.h"
-#include "arrow/util/memory-pool.h"
-
-namespace arrow {
-namespace {
- template <typename ArrayType>
- std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
- auto pool = GetDefaultMemoryPool();
- auto data = std::make_shared<PoolBuffer>(pool);
- auto nulls = std::make_shared<PoolBuffer>(pool);
- data->Resize(length * sizeof(typename ArrayType::value_type));
- nulls->Resize(util::bytes_for_bits(length));
- return std::make_shared<ArrayType>(length, data, 10, nulls);
- }
-} // anonymous namespace
-
-
-static void BM_BuildInt32ColumnByChunk(benchmark::State& state) { //NOLINT non-const reference
- ArrayVector arrays;
- for (int chunk_n = 0; chunk_n < state.range_x(); ++chunk_n) {
- arrays.push_back(MakePrimitive<Int32Array>(100, 10));
- }
- const auto INT32 = std::make_shared<Int32Type>();
- const auto field = std::make_shared<Field>("c0", INT32);
- std::unique_ptr<Column> column;
- while (state.KeepRunning()) {
- column.reset(new Column(field, arrays));
- }
-}
-
-BENCHMARK(BM_BuildInt32ColumnByChunk)->Range(5, 50000);
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column-test.cc b/cpp/src/arrow/table/column-test.cc
deleted file mode 100644
index 3b102e4..0000000
--- a/cpp/src/arrow/table/column-test.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/test-common.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "arrow/types/integer.h"
-
-using std::shared_ptr;
-using std::vector;
-
-namespace arrow {
-
-const auto INT32 = std::make_shared<Int32Type>();
-
-class TestColumn : public TestBase {
- protected:
- std::shared_ptr<ChunkedArray> data_;
- std::unique_ptr<Column> column_;
-};
-
-TEST_F(TestColumn, BasicAPI) {
- ArrayVector arrays;
- arrays.push_back(MakePrimitive<Int32Array>(100));
- arrays.push_back(MakePrimitive<Int32Array>(100, 10));
- arrays.push_back(MakePrimitive<Int32Array>(100, 20));
-
- auto field = std::make_shared<Field>("c0", INT32);
- column_.reset(new Column(field, arrays));
-
- ASSERT_EQ("c0", column_->name());
- ASSERT_TRUE(column_->type()->Equals(INT32));
- ASSERT_EQ(300, column_->length());
- ASSERT_EQ(30, column_->null_count());
- ASSERT_EQ(3, column_->data()->num_chunks());
-}
-
-TEST_F(TestColumn, ChunksInhomogeneous) {
- ArrayVector arrays;
- arrays.push_back(MakePrimitive<Int32Array>(100));
- arrays.push_back(MakePrimitive<Int32Array>(100, 10));
-
- auto field = std::make_shared<Field>("c0", INT32);
- column_.reset(new Column(field, arrays));
-
- ASSERT_OK(column_->ValidateData());
-
- arrays.push_back(MakePrimitive<Int16Array>(100, 10));
- column_.reset(new Column(field, arrays));
- ASSERT_RAISES(Invalid, column_->ValidateData());
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column.cc b/cpp/src/arrow/table/column.cc
deleted file mode 100644
index 573e650..0000000
--- a/cpp/src/arrow/table/column.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table/column.h"
-
-#include <memory>
-#include <sstream>
-
-#include "arrow/type.h"
-#include "arrow/util/status.h"
-
-namespace arrow {
-
-ChunkedArray::ChunkedArray(const ArrayVector& chunks) :
- chunks_(chunks) {
- length_ = 0;
- for (const std::shared_ptr<Array>& chunk : chunks) {
- length_ += chunk->length();
- null_count_ += chunk->null_count();
- }
-}
-
-Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks) :
- field_(field) {
- data_ = std::make_shared<ChunkedArray>(chunks);
-}
-
-Column::Column(const std::shared_ptr<Field>& field,
- const std::shared_ptr<Array>& data) :
- field_(field) {
- data_ = std::make_shared<ChunkedArray>(ArrayVector({data}));
-}
-
-Column::Column(const std::shared_ptr<Field>& field,
- const std::shared_ptr<ChunkedArray>& data) :
- field_(field),
- data_(data) {}
-
-Status Column::ValidateData() {
- for (int i = 0; i < data_->num_chunks(); ++i) {
- const std::shared_ptr<DataType>& type = data_->chunk(i)->type();
- if (!this->type()->Equals(type)) {
- std::stringstream ss;
- ss << "In chunk " << i << " expected type "
- << this->type()->ToString()
- << " but saw "
- << type->ToString();
- return Status::Invalid(ss.str());
- }
- }
- return Status::OK();
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column.h b/cpp/src/arrow/table/column.h
deleted file mode 100644
index dfc7516..0000000
--- a/cpp/src/arrow/table/column.h
+++ /dev/null
@@ -1,105 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TABLE_COLUMN_H
-#define ARROW_TABLE_COLUMN_H
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-typedef std::vector<std::shared_ptr<Array> > ArrayVector;
-
-// A data structure managing a list of primitive Arrow arrays logically as one
-// large array
-class ChunkedArray {
- public:
- explicit ChunkedArray(const ArrayVector& chunks);
-
- // @returns: the total length of the chunked array; computed on construction
- int64_t length() const {
- return length_;
- }
-
- int64_t null_count() const {
- return null_count_;
- }
-
- int num_chunks() const {
- return chunks_.size();
- }
-
- const std::shared_ptr<Array>& chunk(int i) const {
- return chunks_[i];
- }
-
- protected:
- ArrayVector chunks_;
- int64_t length_;
- int64_t null_count_;
-};
-
-// An immutable column data structure consisting of a field (type metadata) and
-// a logical chunked data array (which can be validated as all being the same
-// type).
-class Column {
- public:
- Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks);
- Column(const std::shared_ptr<Field>& field,
- const std::shared_ptr<ChunkedArray>& data);
-
- Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array>& data);
-
- int64_t length() const {
- return data_->length();
- }
-
- int64_t null_count() const {
- return data_->null_count();
- }
-
- // @returns: the column's name in the passed metadata
- const std::string& name() const {
- return field_->name;
- }
-
- // @returns: the column's type according to the metadata
- const std::shared_ptr<DataType>& type() const {
- return field_->type;
- }
-
- // @returns: the column's data as a chunked logical array
- const std::shared_ptr<ChunkedArray>& data() const {
- return data_;
- }
- // Verify that the column's array data is consistent with the passed field's
- // metadata
- Status ValidateData();
-
- protected:
- std::shared_ptr<Field> field_;
- std::shared_ptr<ChunkedArray> data_;
-};
-
-} // namespace arrow
-
-#endif // ARROW_TABLE_COLUMN_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema-test.cc b/cpp/src/arrow/table/schema-test.cc
deleted file mode 100644
index 9dfade2..0000000
--- a/cpp/src/arrow/table/schema-test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/schema.h"
-#include "arrow/type.h"
-#include "arrow/types/string.h"
-
-using std::shared_ptr;
-using std::vector;
-
-namespace arrow {
-
-const auto INT32 = std::make_shared<Int32Type>();
-
-TEST(TestField, Basics) {
- shared_ptr<DataType> ftype = INT32;
- shared_ptr<DataType> ftype_nn = std::make_shared<Int32Type>(false);
- Field f0("f0", ftype);
- Field f0_nn("f0", ftype_nn);
-
- ASSERT_EQ(f0.name, "f0");
- ASSERT_EQ(f0.type->ToString(), ftype->ToString());
-
- ASSERT_TRUE(f0.nullable());
- ASSERT_FALSE(f0_nn.nullable());
-}
-
-TEST(TestField, Equals) {
- shared_ptr<DataType> ftype = INT32;
- shared_ptr<DataType> ftype_nn = std::make_shared<Int32Type>(false);
-
- Field f0("f0", ftype);
- Field f0_nn("f0", ftype_nn);
- Field f0_other("f0", ftype);
-
- ASSERT_EQ(f0, f0_other);
- ASSERT_NE(f0, f0_nn);
-}
-
-class TestSchema : public ::testing::Test {
- public:
- void SetUp() {}
-};
-
-TEST_F(TestSchema, Basics) {
- auto f0 = std::make_shared<Field>("f0", INT32);
- auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(false));
- auto f1_optional = std::make_shared<Field>("f1", std::make_shared<UInt8Type>());
-
- auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
-
- vector<shared_ptr<Field> > fields = {f0, f1, f2};
- auto schema = std::make_shared<Schema>(fields);
-
- ASSERT_EQ(3, schema->num_fields());
- ASSERT_EQ(f0, schema->field(0));
- ASSERT_EQ(f1, schema->field(1));
- ASSERT_EQ(f2, schema->field(2));
-
- auto schema2 = std::make_shared<Schema>(fields);
-
- vector<shared_ptr<Field> > fields3 = {f0, f1_optional, f2};
- auto schema3 = std::make_shared<Schema>(fields3);
- ASSERT_TRUE(schema->Equals(schema2));
- ASSERT_FALSE(schema->Equals(schema3));
-
- ASSERT_TRUE(schema->Equals(*schema2.get()));
- ASSERT_FALSE(schema->Equals(*schema3.get()));
-}
-
-TEST_F(TestSchema, ToString) {
- auto f0 = std::make_shared<Field>("f0", std::make_shared<Int32Type>());
- auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(false));
- auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
- auto f3 = std::make_shared<Field>("f3",
- std::make_shared<ListType>(std::make_shared<Int16Type>()));
-
- vector<shared_ptr<Field> > fields = {f0, f1, f2, f3};
- auto schema = std::make_shared<Schema>(fields);
-
- std::string result = schema->ToString();
- std::string expected = R"(f0 int32
-f1 uint8 not null
-f2 string
-f3 list<int16>
-)";
-
- ASSERT_EQ(expected, result);
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema.cc b/cpp/src/arrow/table/schema.cc
deleted file mode 100644
index d49d0a7..0000000
--- a/cpp/src/arrow/table/schema.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table/schema.h"
-
-#include <memory>
-#include <string>
-#include <sstream>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-Schema::Schema(const std::vector<std::shared_ptr<Field> >& fields) :
- fields_(fields) {}
-
-bool Schema::Equals(const Schema& other) const {
- if (this == &other) return true;
- if (num_fields() != other.num_fields()) {
- return false;
- }
- for (int i = 0; i < num_fields(); ++i) {
- if (!field(i)->Equals(*other.field(i).get())) {
- return false;
- }
- }
- return true;
-}
-
-bool Schema::Equals(const std::shared_ptr<Schema>& other) const {
- return Equals(*other.get());
-}
-
-std::string Schema::ToString() const {
- std::stringstream buffer;
-
- for (auto field : fields_) {
- buffer << field->ToString() << std::endl;
- }
- return buffer.str();
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/schema.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema.h b/cpp/src/arrow/table/schema.h
deleted file mode 100644
index 103f01b..0000000
--- a/cpp/src/arrow/table/schema.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_SCHEMA_H
-#define ARROW_SCHEMA_H
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-class Schema {
- public:
- explicit Schema(const std::vector<std::shared_ptr<Field> >& fields);
-
- // Returns true if all of the schema fields are equal
- bool Equals(const Schema& other) const;
- bool Equals(const std::shared_ptr<Schema>& other) const;
-
- // Return the ith schema element. Does not boundscheck
- const std::shared_ptr<Field>& field(int i) const {
- return fields_[i];
- }
-
- // Render a string representation of the schema suitable for debugging
- std::string ToString() const;
-
- int num_fields() const {
- return fields_.size();
- }
-
- private:
- std::vector<std::shared_ptr<Field> > fields_;
-};
-
-} // namespace arrow
-
-#endif // ARROW_FIELD_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table-test.cc b/cpp/src/arrow/table/table-test.cc
deleted file mode 100644
index 8b354e8..0000000
--- a/cpp/src/arrow/table/table-test.cc
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/table.h"
-#include "arrow/table/test-common.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "arrow/types/integer.h"
-
-using std::shared_ptr;
-using std::vector;
-
-namespace arrow {
-
-const auto INT16 = std::make_shared<Int16Type>();
-const auto UINT8 = std::make_shared<UInt8Type>();
-const auto INT32 = std::make_shared<Int32Type>();
-
-class TestTable : public TestBase {
- public:
- void MakeExample1(int length) {
- auto f0 = std::make_shared<Field>("f0", INT32);
- auto f1 = std::make_shared<Field>("f1", UINT8);
- auto f2 = std::make_shared<Field>("f2", INT16);
-
- vector<shared_ptr<Field> > fields = {f0, f1, f2};
- schema_ = std::make_shared<Schema>(fields);
-
- columns_ = {
- std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
- std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
- std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length))
- };
- }
-
- protected:
- std::unique_ptr<Table> table_;
- shared_ptr<Schema> schema_;
- vector<std::shared_ptr<Column> > columns_;
-};
-
-TEST_F(TestTable, EmptySchema) {
- auto empty_schema = shared_ptr<Schema>(new Schema({}));
- table_.reset(new Table("data", empty_schema, columns_));
- ASSERT_OK(table_->ValidateColumns());
- ASSERT_EQ(0, table_->num_rows());
- ASSERT_EQ(0, table_->num_columns());
-}
-
-TEST_F(TestTable, Ctors) {
- int length = 100;
- MakeExample1(length);
-
- std::string name = "data";
-
- table_.reset(new Table(name, schema_, columns_));
- ASSERT_OK(table_->ValidateColumns());
- ASSERT_EQ(name, table_->name());
- ASSERT_EQ(length, table_->num_rows());
- ASSERT_EQ(3, table_->num_columns());
-
- table_.reset(new Table(name, schema_, columns_, length));
- ASSERT_OK(table_->ValidateColumns());
- ASSERT_EQ(name, table_->name());
- ASSERT_EQ(length, table_->num_rows());
-}
-
-TEST_F(TestTable, Metadata) {
- int length = 100;
- MakeExample1(length);
-
- std::string name = "data";
- table_.reset(new Table(name, schema_, columns_));
-
- ASSERT_TRUE(table_->schema()->Equals(schema_));
-
- auto col = table_->column(0);
- ASSERT_EQ(schema_->field(0)->name, col->name());
- ASSERT_EQ(schema_->field(0)->type, col->type());
-}
-
-TEST_F(TestTable, InvalidColumns) {
- // Check that columns are all the same length
- int length = 100;
- MakeExample1(length);
-
- table_.reset(new Table("data", schema_, columns_, length - 1));
- ASSERT_RAISES(Invalid, table_->ValidateColumns());
-
- columns_.clear();
-
- // Wrong number of columns
- table_.reset(new Table("data", schema_, columns_, length));
- ASSERT_RAISES(Invalid, table_->ValidateColumns());
-
- columns_ = {
- std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
- std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
- std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length - 1))
- };
-
- table_.reset(new Table("data", schema_, columns_, length));
- ASSERT_RAISES(Invalid, table_->ValidateColumns());
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table.cc b/cpp/src/arrow/table/table.cc
deleted file mode 100644
index 0c788b8..0000000
--- a/cpp/src/arrow/table/table.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table/table.h"
-
-#include <memory>
-#include <sstream>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/type.h"
-#include "arrow/util/status.h"
-
-namespace arrow {
-
-Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
- const std::vector<std::shared_ptr<Column> >& columns) :
- name_(name),
- schema_(schema),
- columns_(columns) {
- if (columns.size() == 0) {
- num_rows_ = 0;
- } else {
- num_rows_ = columns[0]->length();
- }
-}
-
-Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
- const std::vector<std::shared_ptr<Column> >& columns, int64_t num_rows) :
- name_(name),
- schema_(schema),
- columns_(columns),
- num_rows_(num_rows) {}
-
-Status Table::ValidateColumns() const {
- if (num_columns() != schema_->num_fields()) {
- return Status::Invalid("Number of columns did not match schema");
- }
-
- if (columns_.size() == 0) {
- return Status::OK();
- }
-
- // Make sure columns are all the same length
- for (size_t i = 0; i < columns_.size(); ++i) {
- const Column* col = columns_[i].get();
- if (col->length() != num_rows_) {
- std::stringstream ss;
- ss << "Column " << i << " expected length "
- << num_rows_
- << " but got length "
- << col->length();
- return Status::Invalid(ss.str());
- }
- }
- return Status::OK();
-}
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/table.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table.h b/cpp/src/arrow/table/table.h
deleted file mode 100644
index b012938..0000000
--- a/cpp/src/arrow/table/table.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TABLE_TABLE_H
-#define ARROW_TABLE_TABLE_H
-
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace arrow {
-
-class Column;
-class Schema;
-class Status;
-
-// Immutable container of fixed-length columns conforming to a particular schema
-class Table {
- public:
- // If columns is zero-length, the table's number of rows is zero
- Table(const std::string& name, const std::shared_ptr<Schema>& schema,
- const std::vector<std::shared_ptr<Column> >& columns);
-
- Table(const std::string& name, const std::shared_ptr<Schema>& schema,
- const std::vector<std::shared_ptr<Column> >& columns, int64_t num_rows);
-
- // @returns: the table's name, if any (may be length 0)
- const std::string& name() const {
- return name_;
- }
-
- // @returns: the table's schema
- const std::shared_ptr<Schema>& schema() const {
- return schema_;
- }
-
- // Note: Does not boundscheck
- // @returns: the i-th column
- const std::shared_ptr<Column>& column(int i) const {
- return columns_[i];
- }
-
- // @returns: the number of columns in the table
- int num_columns() const {
- return columns_.size();
- }
-
- // @returns: the number of rows (the corresponding length of each column)
- int64_t num_rows() const {
- return num_rows_;
- }
-
- // After construction, perform any checks to validate the input arguments
- Status ValidateColumns() const;
-
- private:
- // The table's name, optional
- std::string name_;
-
- std::shared_ptr<Schema> schema_;
- std::vector<std::shared_ptr<Column> > columns_;
-
- int64_t num_rows_;
-};
-
-} // namespace arrow
-
-#endif // ARROW_TABLE_TABLE_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/test-common.h b/cpp/src/arrow/table/test-common.h
deleted file mode 100644
index 50a5f6a..0000000
--- a/cpp/src/arrow/table/test-common.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/table.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "arrow/util/bit-util.h"
-#include "arrow/util/buffer.h"
-#include "arrow/util/memory-pool.h"
-
-namespace arrow {
-
-class TestBase : public ::testing::Test {
- public:
- void SetUp() {
- pool_ = GetDefaultMemoryPool();
- }
-
- template <typename ArrayType>
- std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
- auto data = std::make_shared<PoolBuffer>(pool_);
- auto nulls = std::make_shared<PoolBuffer>(pool_);
- EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type)));
- EXPECT_OK(nulls->Resize(util::bytes_for_bits(length)));
- return std::make_shared<ArrayType>(length, data, 10, nulls);
- }
-
- protected:
- MemoryPool* pool_;
-};
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index 0898c8e..a9fb2a7 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -18,26 +18,39 @@
#ifndef ARROW_TEST_UTIL_H_
#define ARROW_TEST_UTIL_H_
-#include <gtest/gtest.h>
+#include <cstdint>
#include <memory>
+#include <random>
#include <string>
#include <vector>
+#include "gtest/gtest.h"
+
+#include "arrow/type.h"
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
#include "arrow/util/bit-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/memory-pool.h"
#include "arrow/util/random.h"
#include "arrow/util/status.h"
#define ASSERT_RAISES(ENUM, expr) \
do { \
Status s = (expr); \
- ASSERT_TRUE(s.Is##ENUM()); \
+ if (!s.Is##ENUM()) { \
+ FAIL() << s.ToString(); \
+ } \
} while (0)
#define ASSERT_OK(expr) \
do { \
Status s = (expr); \
- ASSERT_TRUE(s.ok()); \
+ if (!s.ok()) { \
+ FAIL() << s.ToString(); \
+ } \
} while (0)
@@ -50,6 +63,27 @@
namespace arrow {
+class TestBase : public ::testing::Test {
+ public:
+ void SetUp() {
+ pool_ = default_memory_pool();
+ }
+
+ template <typename ArrayType>
+ std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
+ auto data = std::make_shared<PoolBuffer>(pool_);
+ auto nulls = std::make_shared<PoolBuffer>(pool_);
+ EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type)));
+ EXPECT_OK(nulls->Resize(util::bytes_for_bits(length)));
+ return std::make_shared<ArrayType>(length, data, 10, nulls);
+ }
+
+ protected:
+ MemoryPool* pool_;
+};
+
+namespace test {
+
template <typename T>
void randint(int64_t N, T lower, T upper, std::vector<T>* out) {
Random rng(random_seed());
@@ -84,6 +118,33 @@ void random_nulls(int64_t n, double pct_null, std::vector<bool>* nulls) {
}
}
+static inline void random_bytes(int n, uint32_t seed, uint8_t* out) {
+ std::mt19937 gen(seed);
+ std::uniform_int_distribution<int> d(0, 255);
+
+ for (int i = 0; i < n; ++i) {
+ out[i] = d(gen) & 0xFF;
+ }
+}
+
+template <typename T>
+void rand_uniform_int(int n, uint32_t seed, T min_value, T max_value, T* out) {
+ std::mt19937 gen(seed);
+ std::uniform_int_distribution<T> d(min_value, max_value);
+ for (int i = 0; i < n; ++i) {
+ out[i] = d(gen);
+ }
+}
+
+static inline int bitmap_popcount(const uint8_t* data, int length) {
+ int count = 0;
+ for (int i = 0; i < length; ++i) {
+ // TODO: accelerate this
+ if (util::get_bit(data, i)) ++count;
+ }
+ return count;
+}
+
static inline int null_count(const std::vector<uint8_t>& nulls) {
int result = 0;
for (size_t i = 0; i < nulls.size(); ++i) {
@@ -102,6 +163,7 @@ std::shared_ptr<Buffer> bytes_to_null_buffer(uint8_t* bytes, int length) {
return out;
}
+} // namespace test
} // namespace arrow
#endif // ARROW_TEST_UTIL_H_
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 0a2e817..f7f835e 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -24,45 +24,37 @@ namespace arrow {
std::string Field::ToString() const {
std::stringstream ss;
- ss << this->name << " " << this->type->ToString();
+ ss << this->name << ": " << this->type->ToString();
+ if (!this->nullable) {
+ ss << " not null";
+ }
return ss.str();
}
DataType::~DataType() {}
-StringType::StringType(bool nullable)
- : DataType(LogicalType::STRING, nullable) {}
-
-StringType::StringType(const StringType& other)
- : StringType(other.nullable) {}
+StringType::StringType() : DataType(Type::STRING) {}
std::string StringType::ToString() const {
std::string result(name());
- if (!nullable) {
- result.append(" not null");
- }
return result;
}
std::string ListType::ToString() const {
std::stringstream s;
- s << "list<" << value_type->ToString() << ">";
- if (!this->nullable) {
- s << " not null";
- }
+ s << "list<" << value_field()->ToString() << ">";
return s.str();
}
std::string StructType::ToString() const {
std::stringstream s;
s << "struct<";
- for (size_t i = 0; i < fields_.size(); ++i) {
+ for (int i = 0; i < this->num_children(); ++i) {
if (i > 0) s << ", ";
- const std::shared_ptr<Field>& field = fields_[i];
+ const std::shared_ptr<Field>& field = this->child(i);
s << field->name << ": " << field->type->ToString();
}
s << ">";
- if (!nullable) s << " not null";
return s.str();
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 00b01ea..5984b67 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -18,62 +18,34 @@
#ifndef ARROW_TYPE_H
#define ARROW_TYPE_H
+#include <cstdint>
#include <memory>
#include <string>
#include <vector>
namespace arrow {
-// Physical data type that describes the memory layout of values. See details
-// for each type
-enum class LayoutEnum: char {
- // A physical type consisting of some non-negative number of bytes
- BYTE = 0,
-
- // A physical type consisting of some non-negative number of bits
- BIT = 1,
-
- // A parametric variable-length value type. Full specification requires a
- // child logical type
- LIST = 2,
-
- // A collection of multiple equal-length child arrays. Parametric type taking
- // 1 or more child logical types
- STRUCT = 3,
-
- // An array with heterogeneous value types. Parametric types taking 1 or more
- // child logical types
- DENSE_UNION = 4,
- SPARSE_UNION = 5
-};
-
-
-struct LayoutType {
- LayoutEnum type;
- explicit LayoutType(LayoutEnum type) : type(type) {}
-};
-
// Data types in this library are all *logical*. They can be expressed as
// either a primitive physical type (bytes or bits of some fixed size), a
// nested type consisting of other data types, or another data type (e.g. a
// timestamp encoded as an int64)
-struct LogicalType {
+struct Type {
enum type {
// A degenerate NULL type represented as 0 bytes/bits
NA = 0,
- // Little-endian integer types
- UINT8 = 1,
- INT8 = 2,
- UINT16 = 3,
- INT16 = 4,
- UINT32 = 5,
- INT32 = 6,
- UINT64 = 7,
- INT64 = 8,
-
// A boolean value represented as 1 bit
- BOOL = 9,
+ BOOL = 1,
+
+ // Little-endian integer types
+ UINT8 = 2,
+ INT8 = 3,
+ UINT16 = 4,
+ INT16 = 5,
+ UINT32 = 6,
+ INT32 = 7,
+ UINT64 = 8,
+ INT64 = 9,
// 4-byte floating point value
FLOAT = 10,
@@ -131,30 +103,38 @@ struct LogicalType {
};
};
+struct Field;
+
struct DataType {
- LogicalType::type type;
- bool nullable;
+ Type::type type;
- explicit DataType(LogicalType::type type, bool nullable = true) :
- type(type),
- nullable(nullable) {}
+ std::vector<std::shared_ptr<Field>> children_;
+
+ explicit DataType(Type::type type) :
+ type(type) {}
virtual ~DataType();
bool Equals(const DataType* other) {
// Call with a pointer so more friendly to subclasses
- return this == other || (this->type == other->type &&
- this->nullable == other->nullable);
+ return this == other || (this->type == other->type);
}
bool Equals(const std::shared_ptr<DataType>& other) {
return Equals(other.get());
}
+ const std::shared_ptr<Field>& child(int i) const {
+ return children_[i];
+ }
+
+ int num_children() const {
+ return children_.size();
+ }
+
virtual std::string ToString() const = 0;
};
-typedef std::shared_ptr<LayoutType> LayoutPtr;
typedef std::shared_ptr<DataType> TypePtr;
// A field is a piece of metadata that includes (for now) a name and a data
@@ -166,9 +146,13 @@ struct Field {
// The field's data type
TypePtr type;
- Field(const std::string& name, const TypePtr& type) :
+ // Fields can be nullable
+ bool nullable;
+
+ Field(const std::string& name, const TypePtr& type, bool nullable = true) :
name(name),
- type(type) {}
+ type(type),
+ nullable(nullable) {}
bool operator==(const Field& other) const {
return this->Equals(other);
@@ -180,6 +164,7 @@ struct Field {
bool Equals(const Field& other) const {
return (this == &other) || (this->name == other.name &&
+ this->nullable == other.nullable &&
this->type->Equals(other.type.get()));
}
@@ -187,36 +172,12 @@ struct Field {
return Equals(*other.get());
}
- bool nullable() const {
- return this->type->nullable;
- }
-
std::string ToString() const;
};
-struct BytesType : public LayoutType {
- int size;
-
- explicit BytesType(int size)
- : LayoutType(LayoutEnum::BYTE),
- size(size) {}
-
- BytesType(const BytesType& other)
- : BytesType(other.size) {}
-};
-
-struct ListLayoutType : public LayoutType {
- LayoutPtr value_type;
-
- explicit ListLayoutType(const LayoutPtr& value_type)
- : LayoutType(LayoutEnum::BYTE),
- value_type(value_type) {}
-};
-
template <typename Derived>
struct PrimitiveType : public DataType {
- explicit PrimitiveType(bool nullable = true)
- : DataType(Derived::type_enum, nullable) {}
+ PrimitiveType() : DataType(Derived::type_enum) {}
std::string ToString() const override;
};
@@ -224,22 +185,19 @@ struct PrimitiveType : public DataType {
template <typename Derived>
inline std::string PrimitiveType<Derived>::ToString() const {
std::string result(static_cast<const Derived*>(this)->name());
- if (!nullable) {
- result.append(" not null");
- }
return result;
}
-#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
- typedef C_TYPE c_type; \
- static constexpr LogicalType::type type_enum = LogicalType::ENUM; \
- static constexpr int size = SIZE; \
- \
- explicit TYPENAME(bool nullable = true) \
- : PrimitiveType<TYPENAME>(nullable) {} \
- \
- static const char* name() { \
- return NAME; \
+#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
+ typedef C_TYPE c_type; \
+ static constexpr Type::type type_enum = Type::ENUM; \
+ static constexpr int size = SIZE; \
+ \
+ TYPENAME() \
+ : PrimitiveType<TYPENAME>() {} \
+ \
+ static const char* name() { \
+ return NAME; \
}
struct NullType : public PrimitiveType<NullType> {
@@ -292,11 +250,23 @@ struct DoubleType : public PrimitiveType<DoubleType> {
struct ListType : public DataType {
// List can contain any other logical value type
- TypePtr value_type;
+ explicit ListType(const std::shared_ptr<DataType>& value_type)
+ : DataType(Type::LIST) {
+ children_ = {std::make_shared<Field>("item", value_type)};
+ }
+
+ explicit ListType(const std::shared_ptr<Field>& value_field)
+ : DataType(Type::LIST) {
+ children_ = {value_field};
+ }
- explicit ListType(const TypePtr& value_type, bool nullable = true)
- : DataType(LogicalType::LIST, nullable),
- value_type(value_type) {}
+ const std::shared_ptr<Field>& value_field() const {
+ return children_[0];
+ }
+
+ const std::shared_ptr<DataType>& value_type() const {
+ return children_[0]->type;
+ }
static char const *name() {
return "list";
@@ -307,9 +277,7 @@ struct ListType : public DataType {
// String is a logical type consisting of a physical list of 1-byte values
struct StringType : public DataType {
- explicit StringType(bool nullable = true);
-
- StringType(const StringType& other);
+ StringType();
static char const *name() {
return "string";
@@ -319,20 +287,9 @@ struct StringType : public DataType {
};
struct StructType : public DataType {
- std::vector<std::shared_ptr<Field> > fields_;
-
- explicit StructType(const std::vector<std::shared_ptr<Field> >& fields,
- bool nullable = true)
- : DataType(LogicalType::STRUCT, nullable) {
- fields_ = fields;
- }
-
- const std::shared_ptr<Field>& field(int i) const {
- return fields_[i];
- }
-
- int num_children() const {
- return fields_.size();
+ explicit StructType(const std::vector<std::shared_ptr<Field>>& fields)
+ : DataType(Type::STRUCT) {
+ children_ = fields;
}
std::string ToString() const override;
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt
index 57cabde..595b3be 100644
--- a/cpp/src/arrow/types/CMakeLists.txt
+++ b/cpp/src/arrow/types/CMakeLists.txt
@@ -26,8 +26,6 @@ install(FILES
construct.h
datetime.h
decimal.h
- floating.h
- integer.h
json.h
list.h
primitive.h
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/boolean.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/boolean.h b/cpp/src/arrow/types/boolean.h
index a5023d7..1cb91f9 100644
--- a/cpp/src/arrow/types/boolean.h
+++ b/cpp/src/arrow/types/boolean.h
@@ -22,7 +22,7 @@
namespace arrow {
-typedef PrimitiveArrayImpl<BooleanType> BooleanArray;
+// typedef PrimitiveArrayImpl<BooleanType> BooleanArray;
class BooleanBuilder : public ArrayBuilder {
};
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/collection.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/collection.h b/cpp/src/arrow/types/collection.h
index 42a9c92..46d84f1 100644
--- a/cpp/src/arrow/types/collection.h
+++ b/cpp/src/arrow/types/collection.h
@@ -25,7 +25,7 @@
namespace arrow {
-template <LogicalType::type T>
+template <Type::type T>
struct CollectionType : public DataType {
std::vector<TypePtr> child_types_;
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index 43f01a3..290decd 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -19,24 +19,26 @@
#include <memory>
-#include "arrow/types/floating.h"
-#include "arrow/types/integer.h"
+#include "arrow/type.h"
+#include "arrow/types/primitive.h"
#include "arrow/types/list.h"
#include "arrow/types/string.h"
+#include "arrow/util/buffer.h"
#include "arrow/util/status.h"
namespace arrow {
class ArrayBuilder;
-// Initially looked at doing this with vtables, but shared pointers makes it
-// difficult
-
#define BUILDER_CASE(ENUM, BuilderType) \
- case LogicalType::ENUM: \
+ case Type::ENUM: \
out->reset(new BuilderType(pool, type)); \
return Status::OK();
+// Initially looked at doing this with vtables, but shared pointers makes it
+// difficult
+//
+// TODO(wesm): come up with a less monolithic strategy
Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
std::shared_ptr<ArrayBuilder>* out) {
switch (type->type) {
@@ -56,30 +58,41 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
BUILDER_CASE(STRING, StringBuilder);
- case LogicalType::LIST:
+ case Type::LIST:
{
std::shared_ptr<ArrayBuilder> value_builder;
const std::shared_ptr<DataType>& value_type = static_cast<ListType*>(
- type.get())->value_type;
+ type.get())->value_type();
RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
out->reset(new ListBuilder(pool, type, value_builder));
return Status::OK();
}
- // BUILDER_CASE(CHAR, CharBuilder);
-
- // BUILDER_CASE(VARCHAR, VarcharBuilder);
- // BUILDER_CASE(BINARY, BinaryBuilder);
-
- // BUILDER_CASE(DATE, DateBuilder);
- // BUILDER_CASE(TIMESTAMP, TimestampBuilder);
- // BUILDER_CASE(TIME, TimeBuilder);
+ default:
+ return Status::NotImplemented(type->ToString());
+ }
+}
- // BUILDER_CASE(LIST, ListBuilder);
- // BUILDER_CASE(STRUCT, StructBuilder);
- // BUILDER_CASE(DENSE_UNION, DenseUnionBuilder);
- // BUILDER_CASE(SPARSE_UNION, SparseUnionBuilder);
+#define MAKE_PRIMITIVE_ARRAY_CASE(ENUM, ArrayType) \
+ case Type::ENUM: \
+ out->reset(new ArrayType(type, length, data, null_count, nulls)); \
+ return Status::OK();
+Status MakePrimitiveArray(const std::shared_ptr<DataType>& type,
+ int32_t length, const std::shared_ptr<Buffer>& data,
+ int32_t null_count, const std::shared_ptr<Buffer>& nulls,
+ std::shared_ptr<Array>* out) {
+ switch (type->type) {
+ MAKE_PRIMITIVE_ARRAY_CASE(UINT8, UInt8Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(INT8, Int8Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(UINT16, UInt16Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(INT16, Int16Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(UINT32, UInt32Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(INT32, Int32Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(UINT64, UInt64Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(INT64, Int64Array);
+ MAKE_PRIMITIVE_ARRAY_CASE(FLOAT, FloatArray);
+ MAKE_PRIMITIVE_ARRAY_CASE(DOUBLE, DoubleArray);
default:
return Status::NotImplemented(type->ToString());
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/construct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h
index 59ebe1a..089c484 100644
--- a/cpp/src/arrow/types/construct.h
+++ b/cpp/src/arrow/types/construct.h
@@ -18,19 +18,26 @@
#ifndef ARROW_TYPES_CONSTRUCT_H
#define ARROW_TYPES_CONSTRUCT_H
+#include <cstdint>
#include <memory>
-#include "arrow/type.h"
-
namespace arrow {
+class Array;
class ArrayBuilder;
+class Buffer;
+struct DataType;
class MemoryPool;
class Status;
Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
std::shared_ptr<ArrayBuilder>* out);
+Status MakePrimitiveArray(const std::shared_ptr<DataType>& type,
+ int32_t length, const std::shared_ptr<Buffer>& data,
+ int32_t null_count, const std::shared_ptr<Buffer>& nulls,
+ std::shared_ptr<Array>* out);
+
} // namespace arrow
#endif // ARROW_BUILDER_H_
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/datetime.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/datetime.h b/cpp/src/arrow/types/datetime.h
index 765fc29..e57b66a 100644
--- a/cpp/src/arrow/types/datetime.h
+++ b/cpp/src/arrow/types/datetime.h
@@ -31,8 +31,8 @@ struct DateType : public DataType {
Unit unit;
- explicit DateType(Unit unit = Unit::DAY, bool nullable = true)
- : DataType(LogicalType::DATE, nullable),
+ explicit DateType(Unit unit = Unit::DAY)
+ : DataType(Type::DATE),
unit(unit) {}
DateType(const DateType& other)
@@ -41,10 +41,6 @@ struct DateType : public DataType {
static char const *name() {
return "date";
}
-
- // virtual std::string ToString() {
- // return name();
- // }
};
@@ -58,8 +54,8 @@ struct TimestampType : public DataType {
Unit unit;
- explicit TimestampType(Unit unit = Unit::MILLI, bool nullable = true)
- : DataType(LogicalType::TIMESTAMP, nullable),
+ explicit TimestampType(Unit unit = Unit::MILLI)
+ : DataType(Type::TIMESTAMP),
unit(unit) {}
TimestampType(const TimestampType& other)
@@ -68,10 +64,6 @@ struct TimestampType : public DataType {
static char const *name() {
return "timestamp";
}
-
- // virtual std::string ToString() {
- // return name();
- // }
};
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/floating.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/floating.cc b/cpp/src/arrow/types/floating.cc
deleted file mode 100644
index bde2826..0000000
--- a/cpp/src/arrow/types/floating.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/types/floating.h"
-
-namespace arrow {
-
-} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/floating.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/floating.h b/cpp/src/arrow/types/floating.h
deleted file mode 100644
index e752278..0000000
--- a/cpp/src/arrow/types/floating.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_FLOATING_H
-#define ARROW_TYPES_FLOATING_H
-
-#include <string>
-
-#include "arrow/types/primitive.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-typedef PrimitiveArrayImpl<FloatType> FloatArray;
-typedef PrimitiveArrayImpl<DoubleType> DoubleArray;
-
-typedef PrimitiveBuilder<FloatType, FloatArray> FloatBuilder;
-typedef PrimitiveBuilder<DoubleType, DoubleArray> DoubleBuilder;
-
-} // namespace arrow
-
-#endif // ARROW_TYPES_FLOATING_H