You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/23 02:45:22 UTC

[1/3] arrow git commit: ARROW-67: C++ metadata flatbuffer serialization and data movement to memory maps

Repository: arrow
Updated Branches:
  refs/heads/master 093f9bd8c -> 65db0da80


http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/integer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/integer.cc b/cpp/src/arrow/types/integer.cc
deleted file mode 100644
index 4696536..0000000
--- a/cpp/src/arrow/types/integer.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/types/integer.h"
-
-namespace arrow {
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/integer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/integer.h b/cpp/src/arrow/types/integer.h
deleted file mode 100644
index 5684191..0000000
--- a/cpp/src/arrow/types/integer.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_INTEGER_H
-#define ARROW_TYPES_INTEGER_H
-
-#include <cstdint>
-#include <string>
-
-#include "arrow/types/primitive.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-// Array containers
-
-typedef PrimitiveArrayImpl<UInt8Type> UInt8Array;
-typedef PrimitiveArrayImpl<Int8Type> Int8Array;
-
-typedef PrimitiveArrayImpl<UInt16Type> UInt16Array;
-typedef PrimitiveArrayImpl<Int16Type> Int16Array;
-
-typedef PrimitiveArrayImpl<UInt32Type> UInt32Array;
-typedef PrimitiveArrayImpl<Int32Type> Int32Array;
-
-typedef PrimitiveArrayImpl<UInt64Type> UInt64Array;
-typedef PrimitiveArrayImpl<Int64Type> Int64Array;
-
-// Builders
-
-typedef PrimitiveBuilder<UInt8Type, UInt8Array> UInt8Builder;
-typedef PrimitiveBuilder<UInt16Type, UInt16Array> UInt16Builder;
-typedef PrimitiveBuilder<UInt32Type, UInt32Array> UInt32Builder;
-typedef PrimitiveBuilder<UInt64Type, UInt64Array> UInt64Builder;
-
-typedef PrimitiveBuilder<Int8Type, Int8Array> Int8Builder;
-typedef PrimitiveBuilder<Int16Type, Int16Array> Int16Builder;
-typedef PrimitiveBuilder<Int32Type, Int32Array> Int32Builder;
-typedef PrimitiveBuilder<Int64Type, Int64Array> Int64Builder;
-
-} // namespace arrow
-
-#endif // ARROW_TYPES_INTEGER_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/json.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/json.cc b/cpp/src/arrow/types/json.cc
index 168e370..fb731ed 100644
--- a/cpp/src/arrow/types/json.cc
+++ b/cpp/src/arrow/types/json.cc
@@ -20,7 +20,6 @@
 #include <vector>
 
 #include "arrow/type.h"
-#include "arrow/types/string.h"
 #include "arrow/types/union.h"
 
 namespace arrow {

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/json.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/json.h b/cpp/src/arrow/types/json.h
index b67fb38..9c850af 100644
--- a/cpp/src/arrow/types/json.h
+++ b/cpp/src/arrow/types/json.h
@@ -28,8 +28,8 @@ struct JSONScalar : public DataType {
   static TypePtr dense_type;
   static TypePtr sparse_type;
 
-  explicit JSONScalar(bool dense = true, bool nullable = true)
-      : DataType(LogicalType::JSON_SCALAR, nullable),
+  explicit JSONScalar(bool dense = true)
+      : DataType(Type::JSON_SCALAR),
         dense(dense) {}
 };
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index 02991de..eb55ca8 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -15,20 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
 #include <cstdlib>
 #include <cstdint>
 #include <memory>
 #include <string>
 #include <vector>
 
+#include "gtest/gtest.h"
+
 #include "arrow/array.h"
+#include "arrow/builder.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
 #include "arrow/types/construct.h"
-#include "arrow/types/integer.h"
 #include "arrow/types/list.h"
-#include "arrow/types/string.h"
+#include "arrow/types/primitive.h"
 #include "arrow/types/test-common.h"
 #include "arrow/util/status.h"
 
@@ -39,27 +40,24 @@ using std::vector;
 
 namespace arrow {
 
-class ArrayBuilder;
-
 TEST(TypesTest, TestListType) {
   std::shared_ptr<DataType> vt = std::make_shared<UInt8Type>();
 
   ListType list_type(vt);
-  ASSERT_EQ(list_type.type, LogicalType::LIST);
+  ASSERT_EQ(list_type.type, Type::LIST);
 
   ASSERT_EQ(list_type.name(), string("list"));
-  ASSERT_EQ(list_type.ToString(), string("list<uint8>"));
+  ASSERT_EQ(list_type.ToString(), string("list<item: uint8>"));
 
-  ASSERT_EQ(list_type.value_type->type, vt->type);
-  ASSERT_EQ(list_type.value_type->type, vt->type);
+  ASSERT_EQ(list_type.value_type()->type, vt->type);
+  ASSERT_EQ(list_type.value_type()->type, vt->type);
 
-  std::shared_ptr<DataType> st = std::make_shared<StringType>(false);
-  std::shared_ptr<DataType> lt = std::make_shared<ListType>(st, false);
-  ASSERT_EQ(lt->ToString(), string("list<string not null> not null"));
+  std::shared_ptr<DataType> st = std::make_shared<StringType>();
+  std::shared_ptr<DataType> lt = std::make_shared<ListType>(st);
+  ASSERT_EQ(lt->ToString(), string("list<item: string>"));
 
-  ListType lt2(lt, false);
-  ASSERT_EQ(lt2.ToString(),
-      string("list<list<string not null> not null> not null"));
+  ListType lt2(lt);
+  ASSERT_EQ(lt2.ToString(), string("list<item: list<item: string>>"));
 }
 
 // ----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/list.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc
index 69a79a7..670ee4d 100644
--- a/cpp/src/arrow/types/list.cc
+++ b/cpp/src/arrow/types/list.cc
@@ -19,4 +19,33 @@
 
 namespace arrow {
 
+bool ListArray::EqualsExact(const ListArray& other) const {
+  if (this == &other) return true;
+  if (null_count_ != other.null_count_) {
+    return false;
+  }
+
+  bool equal_offsets = offset_buf_->Equals(*other.offset_buf_,
+      length_ + 1);
+  bool equal_nulls = true;
+  if (null_count_ > 0) {
+    equal_nulls = nulls_->Equals(*other.nulls_,
+        util::bytes_for_bits(length_));
+  }
+
+  if (!(equal_offsets && equal_nulls)) {
+    return false;
+  }
+
+  return values()->Equals(other.values());
+}
+
+bool ListArray::Equals(const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) return true;
+  if (this->type_enum() != arr->type_enum()) {
+    return false;
+  }
+  return EqualsExact(*static_cast<const ListArray*>(arr.get()));
+}
+
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index 210c76a..141f762 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -21,12 +21,10 @@
 #include <cstdint>
 #include <cstring>
 #include <memory>
-#include <string>
 
 #include "arrow/array.h"
 #include "arrow/builder.h"
 #include "arrow/type.h"
-#include "arrow/types/integer.h"
 #include "arrow/types/primitive.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
@@ -38,29 +36,19 @@ class MemoryPool;
 
 class ListArray : public Array {
  public:
-  ListArray() : Array(), offset_buf_(nullptr), offsets_(nullptr) {}
-
   ListArray(const TypePtr& type, int32_t length, std::shared_ptr<Buffer> offsets,
       const ArrayPtr& values,
       int32_t null_count = 0,
-      std::shared_ptr<Buffer> nulls = nullptr) {
-    Init(type, length, offsets, values, null_count, nulls);
-  }
-
-  virtual ~ListArray() {}
-
-  void Init(const TypePtr& type, int32_t length, std::shared_ptr<Buffer> offsets,
-      const ArrayPtr& values,
-      int32_t null_count = 0,
-      std::shared_ptr<Buffer> nulls = nullptr) {
+      std::shared_ptr<Buffer> nulls = nullptr) :
+      Array(type, length, null_count, nulls) {
     offset_buf_ = offsets;
     offsets_ = offsets == nullptr? nullptr :
       reinterpret_cast<const int32_t*>(offset_buf_->data());
-
     values_ = values;
-    Array::Init(type, length, null_count, nulls);
   }
 
+  virtual ~ListArray() {}
+
   // Return a shared pointer in case the requestor desires to share ownership
   // with this array.
   const std::shared_ptr<Array>& values() const {return values_;}
@@ -77,6 +65,9 @@ class ListArray : public Array {
   int32_t value_offset(int i) { return offsets_[i];}
   int32_t value_length(int i) { return offsets_[i + 1] - offsets_[i];}
 
+  bool EqualsExact(const ListArray& other) const;
+  bool Equals(const std::shared_ptr<Array>& arr) const override;
+
  protected:
   std::shared_ptr<Buffer> offset_buf_;
   const int32_t* offsets_;
@@ -137,8 +128,6 @@ class ListBuilder : public Int32Builder {
 
   template <typename Container>
   std::shared_ptr<Array> Transfer() {
-    auto result = std::make_shared<Container>();
-
     std::shared_ptr<Array> items = value_builder_->Finish();
 
     // Add final offset if the length is non-zero
@@ -146,8 +135,9 @@ class ListBuilder : public Int32Builder {
       raw_buffer()[length_] = items->length();
     }
 
-    result->Init(type_, length_, values_, items,
+    auto result = std::make_shared<Container>(type_, length_, values_, items,
         null_count_, nulls_);
+
     values_ = nulls_ = nullptr;
     capacity_ = length_ = null_count_ = 0;
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index f35a258..7eae8cd 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -15,21 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
-
 #include <cstdint>
 #include <memory>
 #include <string>
 #include <vector>
 
-#include "arrow/array.h"
+#include "gtest/gtest.h"
+
 #include "arrow/builder.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
-#include "arrow/types/boolean.h"
 #include "arrow/types/construct.h"
-#include "arrow/types/floating.h"
-#include "arrow/types/integer.h"
 #include "arrow/types/primitive.h"
 #include "arrow/types/test-common.h"
 #include "arrow/util/bit-util.h"
@@ -43,23 +39,17 @@ using std::vector;
 
 namespace arrow {
 
-TEST(TypesTest, TestBytesType) {
-  BytesType t1(3);
-
-  ASSERT_EQ(t1.type, LayoutEnum::BYTE);
-  ASSERT_EQ(t1.size, 3);
-}
-
+class Array;
 
 #define PRIMITIVE_TEST(KLASS, ENUM, NAME)       \
   TEST(TypesTest, TestPrimitive_##ENUM) {       \
     KLASS tp;                                   \
                                                 \
-    ASSERT_EQ(tp.type, LogicalType::ENUM);      \
+    ASSERT_EQ(tp.type, Type::ENUM);             \
     ASSERT_EQ(tp.name(), string(NAME));         \
                                                 \
     KLASS tp_copy = tp;                         \
-    ASSERT_EQ(tp_copy.type, LogicalType::ENUM); \
+    ASSERT_EQ(tp_copy.type, Type::ENUM);        \
   }
 
 PRIMITIVE_TEST(Int8Type, INT8, "int8");
@@ -109,22 +99,20 @@ class TestPrimitiveBuilder : public TestBuilder {
 
   void RandomData(int N, double pct_null = 0.1) {
     Attrs::draw(N, &draws_);
-    random_nulls(N, pct_null, &nulls_);
+    test::random_nulls(N, pct_null, &nulls_);
   }
 
   void CheckNullable() {
-    ArrayType expected;
     int size = builder_->length();
 
     auto ex_data = std::make_shared<Buffer>(
         reinterpret_cast<uint8_t*>(draws_.data()),
         size * sizeof(T));
 
-    auto ex_nulls = bytes_to_null_buffer(nulls_.data(), size);
-
-    int32_t ex_null_count = null_count(nulls_);
+    auto ex_nulls = test::bytes_to_null_buffer(nulls_.data(), size);
+    int32_t ex_null_count = test::null_count(nulls_);
 
-    expected.Init(size, ex_data, ex_null_count, ex_nulls);
+    auto expected = std::make_shared<ArrayType>(size, ex_data, ex_null_count, ex_nulls);
 
     std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
         builder_->Finish());
@@ -135,18 +123,17 @@ class TestPrimitiveBuilder : public TestBuilder {
     ASSERT_EQ(0, builder_->null_count());
     ASSERT_EQ(nullptr, builder_->buffer());
 
-    ASSERT_TRUE(result->Equals(expected));
+    ASSERT_TRUE(result->EqualsExact(*expected.get()));
     ASSERT_EQ(ex_null_count, result->null_count());
   }
 
   void CheckNonNullable() {
-    ArrayType expected;
     int size = builder_nn_->length();
 
     auto ex_data = std::make_shared<Buffer>(reinterpret_cast<uint8_t*>(draws_.data()),
         size * sizeof(T));
 
-    expected.Init(size, ex_data);
+    auto expected = std::make_shared<ArrayType>(size, ex_data);
 
     std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
         builder_nn_->Finish());
@@ -156,7 +143,7 @@ class TestPrimitiveBuilder : public TestBuilder {
     ASSERT_EQ(0, builder_nn_->capacity());
     ASSERT_EQ(nullptr, builder_nn_->buffer());
 
-    ASSERT_TRUE(result->Equals(expected));
+    ASSERT_TRUE(result->EqualsExact(*expected.get()));
     ASSERT_EQ(0, result->null_count());
   }
 
@@ -183,8 +170,8 @@ class TestPrimitiveBuilder : public TestBuilder {
 #define PINT_DECL(CapType, c_type, LOWER, UPPER)    \
   struct P##CapType {                               \
     PTYPE_DECL(CapType, c_type);                    \
-    static void draw(int N, vector<T>* draws) {  \
-      randint<T>(N, LOWER, UPPER, draws);           \
+    static void draw(int N, vector<T>* draws) {     \
+      test::randint<T>(N, LOWER, UPPER, draws);     \
     }                                               \
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/primitive.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc
index c86260b..32b8bfa 100644
--- a/cpp/src/arrow/types/primitive.cc
+++ b/cpp/src/arrow/types/primitive.cc
@@ -26,16 +26,16 @@ namespace arrow {
 // ----------------------------------------------------------------------
 // Primitive array base
 
-void PrimitiveArray::Init(const TypePtr& type, int32_t length,
+PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length,
     const std::shared_ptr<Buffer>& data,
     int32_t null_count,
-    const std::shared_ptr<Buffer>& nulls) {
-  Array::Init(type, length, null_count, nulls);
+    const std::shared_ptr<Buffer>& nulls) :
+    Array(type, length, null_count, nulls) {
   data_ = data;
   raw_data_ = data == nullptr? nullptr : data_->data();
 }
 
-bool PrimitiveArray::Equals(const PrimitiveArray& other) const {
+bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {
   if (this == &other) return true;
   if (null_count_ != other.null_count_) {
     return false;
@@ -50,4 +50,12 @@ bool PrimitiveArray::Equals(const PrimitiveArray& other) const {
   }
 }
 
+bool PrimitiveArray::Equals(const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) return true;
+  if (this->type_enum() != arr->type_enum()) {
+    return false;
+  }
+  return EqualsExact(*static_cast<const PrimitiveArray*>(arr.get()));
+}
+
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index 22ab59c..e01027c 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -21,7 +21,6 @@
 #include <cstdint>
 #include <cstring>
 #include <memory>
-#include <string>
 
 #include "arrow/array.h"
 #include "arrow/builder.h"
@@ -38,64 +37,57 @@ class MemoryPool;
 // Base class for fixed-size logical types
 class PrimitiveArray : public Array {
  public:
-  PrimitiveArray() : Array(), data_(nullptr), raw_data_(nullptr) {}
-
-  virtual ~PrimitiveArray() {}
-
-  void Init(const TypePtr& type, int32_t length,
+  PrimitiveArray(const TypePtr& type, int32_t length,
       const std::shared_ptr<Buffer>& data,
       int32_t null_count = 0,
       const std::shared_ptr<Buffer>& nulls = nullptr);
+  virtual ~PrimitiveArray() {}
 
   const std::shared_ptr<Buffer>& data() const { return data_;}
 
-  bool Equals(const PrimitiveArray& other) const;
+  bool EqualsExact(const PrimitiveArray& other) const;
+  bool Equals(const std::shared_ptr<Array>& arr) const override;
 
  protected:
   std::shared_ptr<Buffer> data_;
   const uint8_t* raw_data_;
 };
 
-
-template <typename TypeClass>
-class PrimitiveArrayImpl : public PrimitiveArray {
- public:
-  typedef typename TypeClass::c_type value_type;
-
-  PrimitiveArrayImpl() : PrimitiveArray() {}
-
-  virtual ~PrimitiveArrayImpl() {}
-
-  PrimitiveArrayImpl(int32_t length, const std::shared_ptr<Buffer>& data,
-      int32_t null_count = 0,
-      const std::shared_ptr<Buffer>& nulls = nullptr) {
-    Init(length, data, null_count, nulls);
-  }
-
-  void Init(int32_t length, const std::shared_ptr<Buffer>& data,
-      int32_t null_count = 0,
-      const std::shared_ptr<Buffer>& nulls = nullptr) {
-    TypePtr type(new TypeClass());
-    PrimitiveArray::Init(type, length, data, null_count, nulls);
-  }
-
-  bool Equals(const PrimitiveArrayImpl& other) const {
-    return PrimitiveArray::Equals(*static_cast<const PrimitiveArray*>(&other));
-  }
-
-  const value_type* raw_data() const {
-    return reinterpret_cast<const value_type*>(raw_data_);
-  }
-
-  value_type Value(int i) const {
-    return raw_data()[i];
-  }
-
-  TypeClass* exact_type() const {
-    return static_cast<TypeClass*>(type_);
-  }
+#define NUMERIC_ARRAY_DECL(NAME, TypeClass, T)                      \
+class NAME : public PrimitiveArray {                                \
+ public:                                                            \
+  using value_type = T;                                             \
+  using PrimitiveArray::PrimitiveArray;                             \
+  NAME(int32_t length, const std::shared_ptr<Buffer>& data,         \
+      int32_t null_count = 0,                                       \
+      const std::shared_ptr<Buffer>& nulls = nullptr) :             \
+      PrimitiveArray(std::make_shared<TypeClass>(), length, data,   \
+          null_count, nulls) {}                                     \
+                                                                    \
+  bool EqualsExact(const NAME& other) const {                       \
+    return PrimitiveArray::EqualsExact(                             \
+        *static_cast<const PrimitiveArray*>(&other));               \
+  }                                                                 \
+                                                                    \
+  const T* raw_data() const {                                       \
+    return reinterpret_cast<const T*>(raw_data_);                   \
+  }                                                                 \
+                                                                    \
+  T Value(int i) const {                                            \
+    return raw_data()[i];                                           \
+  }                                                                 \
 };
 
+NUMERIC_ARRAY_DECL(UInt8Array, UInt8Type, uint8_t);
+NUMERIC_ARRAY_DECL(Int8Array, Int8Type, int8_t);
+NUMERIC_ARRAY_DECL(UInt16Array, UInt16Type, uint16_t);
+NUMERIC_ARRAY_DECL(Int16Array, Int16Type, int16_t);
+NUMERIC_ARRAY_DECL(UInt32Array, UInt32Type, uint32_t);
+NUMERIC_ARRAY_DECL(Int32Array, Int32Type, int32_t);
+NUMERIC_ARRAY_DECL(UInt64Array, UInt64Type, uint64_t);
+NUMERIC_ARRAY_DECL(Int64Array, Int64Type, int64_t);
+NUMERIC_ARRAY_DECL(FloatArray, FloatType, float);
+NUMERIC_ARRAY_DECL(DoubleArray, DoubleType, double);
 
 template <typename Type, typename ArrayType>
 class PrimitiveBuilder : public ArrayBuilder {
@@ -202,8 +194,9 @@ class PrimitiveBuilder : public ArrayBuilder {
   }
 
   std::shared_ptr<Array> Finish() override {
-    std::shared_ptr<ArrayType> result = std::make_shared<ArrayType>();
-    result->PrimitiveArray::Init(type_, length_, values_, null_count_, nulls_);
+    std::shared_ptr<ArrayType> result = std::make_shared<ArrayType>(
+        type_, length_, values_, null_count_, nulls_);
+
     values_ = nulls_ = nullptr;
     capacity_ = length_ = null_count_ = 0;
     return result;
@@ -222,6 +215,21 @@ class PrimitiveBuilder : public ArrayBuilder {
   int elsize_;
 };
 
+// Builders
+
+typedef PrimitiveBuilder<UInt8Type, UInt8Array> UInt8Builder;
+typedef PrimitiveBuilder<UInt16Type, UInt16Array> UInt16Builder;
+typedef PrimitiveBuilder<UInt32Type, UInt32Array> UInt32Builder;
+typedef PrimitiveBuilder<UInt64Type, UInt64Array> UInt64Builder;
+
+typedef PrimitiveBuilder<Int8Type, Int8Array> Int8Builder;
+typedef PrimitiveBuilder<Int16Type, Int16Array> Int16Builder;
+typedef PrimitiveBuilder<Int32Type, Int32Array> Int32Builder;
+typedef PrimitiveBuilder<Int64Type, Int64Array> Int64Builder;
+
+typedef PrimitiveBuilder<FloatType, FloatArray> FloatBuilder;
+typedef PrimitiveBuilder<DoubleType, DoubleArray> DoubleBuilder;
+
 } // namespace arrow
 
 #endif  // ARROW_TYPES_PRIMITIVE_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
index 6381093..7dc3d68 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -15,21 +15,20 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
 #include <cstdint>
+#include <cstdlib>
 #include <memory>
 #include <string>
 #include <vector>
 
+#include "gtest/gtest.h"
+
 #include "arrow/array.h"
-#include "arrow/builder.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
-#include "arrow/types/construct.h"
-#include "arrow/types/integer.h"
+#include "arrow/types/primitive.h"
 #include "arrow/types/string.h"
 #include "arrow/types/test-common.h"
-#include "arrow/util/status.h"
 
 namespace arrow {
 
@@ -38,14 +37,14 @@ class Buffer;
 TEST(TypesTest, TestCharType) {
   CharType t1(5);
 
-  ASSERT_EQ(t1.type, LogicalType::CHAR);
+  ASSERT_EQ(t1.type, Type::CHAR);
   ASSERT_EQ(t1.size, 5);
 
   ASSERT_EQ(t1.ToString(), std::string("char(5)"));
 
   // Test copy constructor
   CharType t2 = t1;
-  ASSERT_EQ(t2.type, LogicalType::CHAR);
+  ASSERT_EQ(t2.type, Type::CHAR);
   ASSERT_EQ(t2.size, 5);
 }
 
@@ -53,22 +52,20 @@ TEST(TypesTest, TestCharType) {
 TEST(TypesTest, TestVarcharType) {
   VarcharType t1(5);
 
-  ASSERT_EQ(t1.type, LogicalType::VARCHAR);
+  ASSERT_EQ(t1.type, Type::VARCHAR);
   ASSERT_EQ(t1.size, 5);
-  ASSERT_EQ(t1.physical_type.size, 6);
 
   ASSERT_EQ(t1.ToString(), std::string("varchar(5)"));
 
   // Test copy constructor
   VarcharType t2 = t1;
-  ASSERT_EQ(t2.type, LogicalType::VARCHAR);
+  ASSERT_EQ(t2.type, Type::VARCHAR);
   ASSERT_EQ(t2.size, 5);
-  ASSERT_EQ(t2.physical_type.size, 6);
 }
 
 TEST(TypesTest, TestStringType) {
   StringType str;
-  ASSERT_EQ(str.type, LogicalType::STRING);
+  ASSERT_EQ(str.type, Type::STRING);
   ASSERT_EQ(str.name(), std::string("string"));
 }
 
@@ -90,15 +87,16 @@ class TestStringContainer : public ::testing::Test  {
     length_ = offsets_.size() - 1;
     int nchars = chars_.size();
 
-    value_buf_ = to_buffer(chars_);
+    value_buf_ = test::to_buffer(chars_);
     values_ = ArrayPtr(new UInt8Array(nchars, value_buf_));
 
-    offsets_buf_ = to_buffer(offsets_);
+    offsets_buf_ = test::to_buffer(offsets_);
 
-    nulls_buf_ = bytes_to_null_buffer(nulls_.data(), nulls_.size());
-    null_count_ = null_count(nulls_);
+    nulls_buf_ = test::bytes_to_null_buffer(nulls_.data(), nulls_.size());
+    null_count_ = test::null_count(nulls_);
 
-    strings_.Init(length_, offsets_buf_, values_, null_count_, nulls_buf_);
+    strings_ = std::make_shared<StringArray>(length_, offsets_buf_, values_,
+        null_count_, nulls_buf_);
   }
 
  protected:
@@ -116,28 +114,28 @@ class TestStringContainer : public ::testing::Test  {
   int length_;
 
   ArrayPtr values_;
-  StringArray strings_;
+  std::shared_ptr<StringArray> strings_;
 };
 
 
 TEST_F(TestStringContainer, TestArrayBasics) {
-  ASSERT_EQ(length_, strings_.length());
-  ASSERT_EQ(1, strings_.null_count());
+  ASSERT_EQ(length_, strings_->length());
+  ASSERT_EQ(1, strings_->null_count());
 }
 
 TEST_F(TestStringContainer, TestType) {
-  TypePtr type = strings_.type();
+  TypePtr type = strings_->type();
 
-  ASSERT_EQ(LogicalType::STRING, type->type);
-  ASSERT_EQ(LogicalType::STRING, strings_.logical_type());
+  ASSERT_EQ(Type::STRING, type->type);
+  ASSERT_EQ(Type::STRING, strings_->type_enum());
 }
 
 
 TEST_F(TestStringContainer, TestListFunctions) {
   int pos = 0;
   for (size_t i = 0; i < expected_.size(); ++i) {
-    ASSERT_EQ(pos, strings_.value_offset(i));
-    ASSERT_EQ(expected_[i].size(), strings_.value_length(i));
+    ASSERT_EQ(pos, strings_->value_offset(i));
+    ASSERT_EQ(expected_[i].size(), strings_->value_length(i));
     pos += expected_[i].size();
   }
 }
@@ -151,9 +149,9 @@ TEST_F(TestStringContainer, TestDestructor) {
 TEST_F(TestStringContainer, TestGetString) {
   for (size_t i = 0; i < expected_.size(); ++i) {
     if (nulls_[i]) {
-      ASSERT_TRUE(strings_.IsNull(i));
+      ASSERT_TRUE(strings_->IsNull(i));
     } else {
-      ASSERT_EQ(expected_[i], strings_.GetString(i));
+      ASSERT_EQ(expected_[i], strings_->GetString(i));
     }
   }
 }
@@ -199,7 +197,7 @@ TEST_F(TestStringBuilder, TestScalarAppend) {
   Done();
 
   ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps * null_count(is_null), result_->null_count());
+  ASSERT_EQ(reps * test::null_count(is_null), result_->null_count());
   ASSERT_EQ(reps * 6, result_->values()->length());
 
   int32_t length;

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h
index 8ccc0a9..2b3fba5 100644
--- a/cpp/src/arrow/types/string.h
+++ b/cpp/src/arrow/types/string.h
@@ -25,25 +25,21 @@
 
 #include "arrow/array.h"
 #include "arrow/type.h"
-#include "arrow/types/integer.h"
 #include "arrow/types/list.h"
+#include "arrow/types/primitive.h"
 #include "arrow/util/status.h"
 
 namespace arrow {
 
-class ArrayBuilder;
 class Buffer;
 class MemoryPool;
 
 struct CharType : public DataType {
   int size;
 
-  BytesType physical_type;
-
-  explicit CharType(int size, bool nullable = true)
-      : DataType(LogicalType::CHAR, nullable),
-        size(size),
-        physical_type(BytesType(size)) {}
+  explicit CharType(int size)
+      : DataType(Type::CHAR),
+        size(size) {}
 
   CharType(const CharType& other)
       : CharType(other.size) {}
@@ -56,54 +52,36 @@ struct CharType : public DataType {
 struct VarcharType : public DataType {
   int size;
 
-  BytesType physical_type;
-
-  explicit VarcharType(int size, bool nullable = true)
-      : DataType(LogicalType::VARCHAR, nullable),
-        size(size),
-        physical_type(BytesType(size + 1)) {}
+  explicit VarcharType(int size)
+      : DataType(Type::VARCHAR),
+        size(size) {}
   VarcharType(const VarcharType& other)
       : VarcharType(other.size) {}
 
   virtual std::string ToString() const;
 };
 
-static const LayoutPtr byte1(new BytesType(1));
-static const LayoutPtr physical_string = LayoutPtr(new ListLayoutType(byte1));
-
 // TODO: add a BinaryArray layer in between
 class StringArray : public ListArray {
  public:
-  StringArray() : ListArray(), bytes_(nullptr), raw_bytes_(nullptr) {}
-
-  StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
-      const ArrayPtr& values,
-      int32_t null_count = 0,
-      const std::shared_ptr<Buffer>& nulls = nullptr) {
-    Init(length, offsets, values, null_count, nulls);
-  }
-
-  void Init(const TypePtr& type, int32_t length,
+  StringArray(const TypePtr& type, int32_t length,
       const std::shared_ptr<Buffer>& offsets,
       const ArrayPtr& values,
       int32_t null_count = 0,
-      const std::shared_ptr<Buffer>& nulls = nullptr) {
-    ListArray::Init(type, length, offsets, values, null_count, nulls);
-
-    // TODO: type validation for values array
-
+      const std::shared_ptr<Buffer>& nulls = nullptr) :
+      ListArray(type, length, offsets, values, null_count, nulls) {
     // For convenience
     bytes_ = static_cast<UInt8Array*>(values.get());
     raw_bytes_ = bytes_->raw_data();
   }
 
-  void Init(int32_t length, const std::shared_ptr<Buffer>& offsets,
+  StringArray(int32_t length,
+      const std::shared_ptr<Buffer>& offsets,
       const ArrayPtr& values,
       int32_t null_count = 0,
-      const std::shared_ptr<Buffer>& nulls = nullptr) {
-    TypePtr type(new StringType());
-    Init(type, length, offsets, values, null_count, nulls);
-  }
+      const std::shared_ptr<Buffer>& nulls = nullptr) :
+      StringArray(std::make_shared<StringType>(), length, offsets, values,
+          null_count, nulls) {}
 
   // Compute the pointer t
   const uint8_t* GetValue(int i, int32_t* out_length) const {
@@ -125,9 +103,6 @@ class StringArray : public ListArray {
 };
 
 // Array builder
-
-
-
 class StringBuilder : public ListBuilder {
  public:
   explicit StringBuilder(MemoryPool* pool, const TypePtr& type) :

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc
index 9a4777e..d94396f 100644
--- a/cpp/src/arrow/types/struct-test.cc
+++ b/cpp/src/arrow/types/struct-test.cc
@@ -15,16 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
-
 #include <memory>
 #include <string>
 #include <vector>
 
+#include "gtest/gtest.h"
+
 #include "arrow/type.h"
-#include "arrow/types/integer.h"
-#include "arrow/types/string.h"
-#include "arrow/types/struct.h"
 
 using std::shared_ptr;
 using std::string;
@@ -42,13 +39,13 @@ TEST(TestStructType, Basics) {
   TypePtr f2_type = TypePtr(new UInt8Type());
   auto f2 = std::make_shared<Field>("f2", f2_type);
 
-  vector<shared_ptr<Field> > fields = {f0, f1, f2};
+  vector<shared_ptr<Field>> fields = {f0, f1, f2};
 
   StructType struct_type(fields);
 
-  ASSERT_TRUE(struct_type.field(0)->Equals(f0));
-  ASSERT_TRUE(struct_type.field(1)->Equals(f1));
-  ASSERT_TRUE(struct_type.field(2)->Equals(f2));
+  ASSERT_TRUE(struct_type.child(0)->Equals(f0));
+  ASSERT_TRUE(struct_type.child(1)->Equals(f1));
+  ASSERT_TRUE(struct_type.child(2)->Equals(f2));
 
   ASSERT_EQ(struct_type.ToString(), "struct<f0: int32, f1: string, f2: uint8>");
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/test-common.h b/cpp/src/arrow/types/test-common.h
index 1744efc..227aca6 100644
--- a/cpp/src/arrow/types/test-common.h
+++ b/cpp/src/arrow/types/test-common.h
@@ -18,11 +18,12 @@
 #ifndef ARROW_TYPES_TEST_COMMON_H
 #define ARROW_TYPES_TEST_COMMON_H
 
-#include <gtest/gtest.h>
 #include <memory>
 #include <string>
 #include <vector>
 
+#include "gtest/gtest.h"
+
 #include "arrow/test-util.h"
 #include "arrow/type.h"
 #include "arrow/util/memory-pool.h"
@@ -34,7 +35,7 @@ namespace arrow {
 class TestBuilder : public ::testing::Test {
  public:
   void SetUp() {
-    pool_ = GetDefaultMemoryPool();
+    pool_ = default_memory_pool();
     type_ = TypePtr(new UInt8Type());
     builder_.reset(new UInt8Builder(pool_, type_));
     builder_nn_.reset(new UInt8Builder(pool_, type_));

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/union.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/union.h b/cpp/src/arrow/types/union.h
index 9aff780..29cda90 100644
--- a/cpp/src/arrow/types/union.h
+++ b/cpp/src/arrow/types/union.h
@@ -30,8 +30,8 @@ namespace arrow {
 
 class Buffer;
 
-struct DenseUnionType : public CollectionType<LogicalType::DENSE_UNION> {
-  typedef CollectionType<LogicalType::DENSE_UNION> Base;
+struct DenseUnionType : public CollectionType<Type::DENSE_UNION> {
+  typedef CollectionType<Type::DENSE_UNION> Base;
 
   explicit DenseUnionType(const std::vector<TypePtr>& child_types) :
       Base() {
@@ -42,8 +42,8 @@ struct DenseUnionType : public CollectionType<LogicalType::DENSE_UNION> {
 };
 
 
-struct SparseUnionType : public CollectionType<LogicalType::SPARSE_UNION> {
-  typedef CollectionType<LogicalType::SPARSE_UNION> Base;
+struct SparseUnionType : public CollectionType<Type::SPARSE_UNION> {
+  typedef CollectionType<Type::SPARSE_UNION> Base;
 
   explicit SparseUnionType(const std::vector<TypePtr>& child_types) :
       Base() {
@@ -55,28 +55,20 @@ struct SparseUnionType : public CollectionType<LogicalType::SPARSE_UNION> {
 
 
 class UnionArray : public Array {
- public:
-  UnionArray() : Array() {}
-
  protected:
   // The data are types encoded as int16
   Buffer* types_;
-  std::vector<std::shared_ptr<Array> > children_;
+  std::vector<std::shared_ptr<Array>> children_;
 };
 
 
 class DenseUnionArray : public UnionArray {
- public:
-  DenseUnionArray() : UnionArray() {}
-
  protected:
   Buffer* offset_buf_;
 };
 
 
 class SparseUnionArray : public UnionArray {
- public:
-  SparseUnionArray() : UnionArray() {}
 };
 
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/bit-util-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc
index 7506ca5..220bff0 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
-
 #include "arrow/util/bit-util.h"
 
+#include "gtest/gtest.h"
+
 namespace arrow {
 
 TEST(UtilTests, TestNextPower2) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 5e7197f..1d2d1d5 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -19,7 +19,6 @@
 #define ARROW_UTIL_BIT_UTIL_H
 
 #include <cstdint>
-#include <cstdlib>
 #include <memory>
 
 namespace arrow {

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/buffer-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer-test.cc b/cpp/src/arrow/util/buffer-test.cc
index 9f1fd91..1d58226 100644
--- a/cpp/src/arrow/util/buffer-test.cc
+++ b/cpp/src/arrow/util/buffer-test.cc
@@ -15,11 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
 #include <cstdint>
 #include <limits>
 #include <string>
 
+#include "gtest/gtest.h"
+
 #include "arrow/test-util.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/status.h"

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc
index 50f4716..04cdcd7 100644
--- a/cpp/src/arrow/util/buffer.cc
+++ b/cpp/src/arrow/util/buffer.cc
@@ -40,7 +40,7 @@ std::shared_ptr<Buffer> MutableBuffer::GetImmutableView() {
 PoolBuffer::PoolBuffer(MemoryPool* pool) :
     ResizableBuffer(nullptr, 0) {
   if (pool == nullptr) {
-    pool = GetDefaultMemoryPool();
+    pool = default_memory_pool();
   }
   pool_ = pool;
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/memory-pool-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool-test.cc b/cpp/src/arrow/util/memory-pool-test.cc
index 954b5f9..6ef07a0 100644
--- a/cpp/src/arrow/util/memory-pool-test.cc
+++ b/cpp/src/arrow/util/memory-pool-test.cc
@@ -15,10 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
 #include <cstdint>
 #include <limits>
 
+#include "gtest/gtest.h"
+
 #include "arrow/test-util.h"
 #include "arrow/util/memory-pool.h"
 #include "arrow/util/status.h"
@@ -26,7 +27,7 @@
 namespace arrow {
 
 TEST(DefaultMemoryPool, MemoryTracking) {
-  MemoryPool* pool = GetDefaultMemoryPool();
+  MemoryPool* pool = default_memory_pool();
 
   uint8_t* data;
   ASSERT_OK(pool->Allocate(100, &data));
@@ -37,7 +38,7 @@ TEST(DefaultMemoryPool, MemoryTracking) {
 }
 
 TEST(DefaultMemoryPool, OOM) {
-  MemoryPool* pool = GetDefaultMemoryPool();
+  MemoryPool* pool = default_memory_pool();
 
   uint8_t* data;
   int64_t to_alloc = std::numeric_limits<int64_t>::max();

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/memory-pool.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool.cc b/cpp/src/arrow/util/memory-pool.cc
index 5820346..0b885e9 100644
--- a/cpp/src/arrow/util/memory-pool.cc
+++ b/cpp/src/arrow/util/memory-pool.cc
@@ -70,9 +70,9 @@ void InternalMemoryPool::Free(uint8_t* buffer, int64_t size) {
 
 InternalMemoryPool::~InternalMemoryPool() {}
 
-MemoryPool* GetDefaultMemoryPool() {
-  static InternalMemoryPool default_memory_pool;
-  return &default_memory_pool;
+MemoryPool* default_memory_pool() {
+  static InternalMemoryPool default_memory_pool_;
+  return &default_memory_pool_;
 }
 
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/memory-pool.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/memory-pool.h b/cpp/src/arrow/util/memory-pool.h
index a7cb10d..0d24786 100644
--- a/cpp/src/arrow/util/memory-pool.h
+++ b/cpp/src/arrow/util/memory-pool.h
@@ -34,7 +34,7 @@ class MemoryPool {
   virtual int64_t bytes_allocated() const = 0;
 };
 
-MemoryPool* GetDefaultMemoryPool();
+MemoryPool* default_memory_pool();
 
 } // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/status.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/status.cc b/cpp/src/arrow/util/status.cc
index c6e113e..43cb87e 100644
--- a/cpp/src/arrow/util/status.cc
+++ b/cpp/src/arrow/util/status.cc
@@ -54,6 +54,9 @@ std::string Status::CodeAsString() const {
     case StatusCode::Invalid:
       type = "Invalid";
       break;
+    case StatusCode::IOError:
+      type = "IOError";
+      break;
     case StatusCode::NotImplemented:
       type = "NotImplemented";
       break;

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/status.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/status.h b/cpp/src/arrow/util/status.h
index 47fda40..b593123 100644
--- a/cpp/src/arrow/util/status.h
+++ b/cpp/src/arrow/util/status.h
@@ -63,6 +63,7 @@ enum class StatusCode: char {
   OutOfMemory = 1,
   KeyError = 2,
   Invalid = 3,
+  IOError = 4,
 
   NotImplemented = 10,
 };
@@ -97,12 +98,17 @@ class Status {
     return Status(StatusCode::Invalid, msg, -1);
   }
 
+  static Status IOError(const std::string& msg) {
+    return Status(StatusCode::IOError, msg, -1);
+  }
+
   // Returns true iff the status indicates success.
   bool ok() const { return (state_ == NULL); }
 
   bool IsOutOfMemory() const { return code() == StatusCode::OutOfMemory; }
   bool IsKeyError() const { return code() == StatusCode::KeyError; }
   bool IsInvalid() const { return code() == StatusCode::Invalid; }
+  bool IsIOError() const { return code() == StatusCode::IOError; }
 
   // Return a string representation of this status suitable for printing.
   // Returns the string "OK" for success.

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/util/test_main.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/test_main.cc b/cpp/src/arrow/util/test_main.cc
index 00139f3..adc8466 100644
--- a/cpp/src/arrow/util/test_main.cc
+++ b/cpp/src/arrow/util/test_main.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
+#include "gtest/gtest.h"
 
 int main(int argc, char **argv) {
   ::testing::InitGoogleTest(&argc, argv);

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/thirdparty/build_thirdparty.sh
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/build_thirdparty.sh b/cpp/thirdparty/build_thirdparty.sh
index 294737c..3d5f532 100755
--- a/cpp/thirdparty/build_thirdparty.sh
+++ b/cpp/thirdparty/build_thirdparty.sh
@@ -17,6 +17,7 @@ else
     case $arg in
       "gtest")      F_GTEST=1 ;;
       "gbenchmark")      F_GBENCHMARK=1 ;;
+      "flatbuffers")      F_FLATBUFFERS=1 ;;
       *)            echo "Unknown module: $arg"; exit 1 ;;
     esac
   done
@@ -78,6 +79,14 @@ if [ -n "$F_ALL" -o -n "$F_GBENCHMARK" ]; then
   make VERBOSE=1 install || { echo "make $GBENCHMARK_ERROR" ; exit 1; }
 fi
 
+FLATBUFFERS_ERROR="failed for flatbuffers"
+if [ -n "$F_ALL" -o -n "$F_FLATBUFFERS" ]; then
+  cd $TP_DIR/$FLATBUFFERS_BASEDIR
+
+  CXXFLAGS=-fPIC cmake -DCMAKE_INSTALL_PREFIX:PATH=$PREFIX -DFLATBUFFERS_BUILD_TESTS=OFF . || { echo "cmake $FLATBUFFERS_ERROR" ; exit 1; }
+  make -j$PARALLEL
+  make install
+fi
 
 echo "---------------------"
 echo "Thirdparty dependencies built and installed into $PREFIX successfully"

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/thirdparty/download_thirdparty.sh
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/download_thirdparty.sh b/cpp/thirdparty/download_thirdparty.sh
index d22c559..d299afc 100755
--- a/cpp/thirdparty/download_thirdparty.sh
+++ b/cpp/thirdparty/download_thirdparty.sh
@@ -25,3 +25,8 @@ if [ ! -d ${GBENCHMARK_BASEDIR} ]; then
   echo "Fetching google benchmark"
   download_extract_and_cleanup $GBENCHMARK_URL
 fi
+
+if [ ! -d ${FLATBUFFERS_BASEDIR} ]; then
+  echo "Fetching flatbuffers"
+  download_extract_and_cleanup $FLATBUFFERS_URL
+fi

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/thirdparty/versions.sh
----------------------------------------------------------------------
diff --git a/cpp/thirdparty/versions.sh b/cpp/thirdparty/versions.sh
index 9cfc7cd..cb455b4 100755
--- a/cpp/thirdparty/versions.sh
+++ b/cpp/thirdparty/versions.sh
@@ -5,3 +5,7 @@ GTEST_BASEDIR=googletest-release-$GTEST_VERSION
 GBENCHMARK_VERSION=1.0.0
 GBENCHMARK_URL="https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
 GBENCHMARK_BASEDIR=benchmark-$GBENCHMARK_VERSION
+
+FLATBUFFERS_VERSION=1.3.0
+FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz"
+FLATBUFFERS_BASEDIR=flatbuffers-$FLATBUFFERS_VERSION

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/format/Message.fbs
----------------------------------------------------------------------
diff --git a/format/Message.fbs b/format/Message.fbs
new file mode 100644
index 0000000..3ffd203
--- /dev/null
+++ b/format/Message.fbs
@@ -0,0 +1,183 @@
+namespace apache.arrow.flatbuf;
+
+/// ----------------------------------------------------------------------
+/// Logical types and their metadata (if any)
+///
+/// These are stored in the flatbuffer in the Type union below
+
+/// A Tuple in the flatbuffer metadata is the same as an Arrow Struct
+/// (according to the physical memory layout). We used Tuple here as Struct is
+/// a reserved word in Flatbuffers
+table Tuple {
+}
+
+table List {
+}
+
+enum UnionMode:int { Sparse, Dense }
+
+table Union {
+  mode: UnionMode;
+}
+
+table Bit {
+}
+
+table Int {
+  bitWidth: int; // 1 to 64
+  is_signed: bool;
+}
+
+enum Precision:int {SINGLE, DOUBLE}
+
+table FloatingPoint {
+  precision: Precision;
+}
+
+table Utf8 {
+}
+
+table Binary {
+}
+
+table Bool {
+}
+
+table Decimal {
+  precision: int;
+  scale: int;
+}
+
+table Timestamp {
+  timezone: string;
+}
+
+table JSONScalar {
+  dense:bool=true;
+}
+
+/// ----------------------------------------------------------------------
+/// Top-level Type value, enabling extensible type-specific metadata. We can
+/// add new logical types to Type without breaking backwards compatibility
+
+union Type {
+  Int,
+  Bit,
+  FloatingPoint,
+  Binary,
+  Utf8,
+  Bool,
+  Decimal,
+  Timestamp,
+  List,
+  Tuple,
+  Union,
+  JSONScalar
+}
+
+/// ----------------------------------------------------------------------
+/// A field represents a named column in a record / row batch or child of a
+/// nested type.
+///
+/// - children is only for nested Arrow arrays
+/// - For primitive types, children will have length 0
+/// - nullable should default to true in general
+
+table Field {
+  // Name is not required, in i.e. a List
+  name: string;
+  nullable: bool;
+  type: Type;
+  children: [Field];
+}
+
+/// ----------------------------------------------------------------------
+/// A Schema describes the columns in a row batch
+
+table Schema {
+  fields: [Field];
+}
+
+/// ----------------------------------------------------------------------
+/// Data structures for describing a table row batch (a collection of
+/// equal-length Arrow arrays)
+
+/// A Buffer represents a single contiguous memory segment
+struct Buffer {
+  /// The shared memory page id where this buffer is located. Currently this is
+  /// not used
+  page: int;
+
+  /// The relative offset into the shared memory page where the bytes for this
+  /// buffer starts
+  offset: long;
+
+  /// The absolute length (in bytes) of the memory buffer. The memory is found
+  /// from offset (inclusive) to offset + length (non-inclusive).
+  length: long;
+}
+
+/// Metadata about a field at some level of a nested type tree (but not
+/// its children).
+///
+/// For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
+/// would have {length: 5, null_count: 2} for its List node, and {length: 6,
+/// null_count: 0} for its Int16 node, as separate FieldNode structs
+struct FieldNode {
+  /// The number of value slots in the Arrow array at this level of a nested
+  /// tree
+  length: int;
+
+  /// The number of observed nulls. Fields with null_count == 0 may choose not
+  /// to write their physical null bitmap out as a materialized buffer, instead
+  /// setting the length of the null buffer to 0.
+  null_count: int;
+}
+
+/// A data header describing the shared memory layout of a "record" or "row"
+/// batch. Some systems call this a "row batch" internally and others a "record
+/// batch".
+table RecordBatch {
+  /// number of records / rows. The arrays in the batch should all have this
+  /// length
+  length: int;
+
+  /// Nodes correspond to the pre-ordered flattened logical schema
+  nodes: [FieldNode];
+
+  /// Buffers correspond to the pre-ordered flattened buffer tree
+  ///
+  /// The number of buffers appended to this list depends on the schema. For
+  /// example, most primitive arrays will have 2 buffers, 1 for the null bitmap
+  /// and 1 for the values. For struct arrays, there will only be a single
+  /// buffer for the null bitmap
+  buffers: [Buffer];
+}
+
+/// ----------------------------------------------------------------------
+/// For sending dictionary encoding information. Any Field can be
+/// dictionary-encoded, but in this case none of its children may be
+/// dictionary-encoded.
+///
+/// TODO(wesm): To be documented in more detail
+
+table DictionaryBatch {
+  id: long;
+  data: RecordBatch;
+}
+
+/// ----------------------------------------------------------------------
+/// The root Message type
+
+/// This union enables us to easily send different message types without
+/// redundant storage, and in the future we can easily add new message types.
+union MessageHeader {
+  Schema, DictionaryBatch, RecordBatch
+}
+
+table Message {
+  header: MessageHeader;
+  bodyLength: long;
+}
+
+root_type Message;

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 8d93a15..9a08070 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -35,4 +35,6 @@ from pyarrow.schema import (null, bool_,
                             uint8, uint16, uint32, uint64,
                             float_, double, string,
                             list_, struct, field,
-                            DataType, Field, Schema)
+                            DataType, Field, Schema, schema)
+
+from pyarrow.array import RowBatch

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd
index d0d3486..de3c774 100644
--- a/python/pyarrow/array.pxd
+++ b/python/pyarrow/array.pxd
@@ -16,7 +16,7 @@
 # under the License.
 
 from pyarrow.includes.common cimport shared_ptr
-from pyarrow.includes.libarrow cimport CArray, LogicalType
+from pyarrow.includes.libarrow cimport CArray
 
 from pyarrow.scalar import NA
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
index bceb333..c5d40dd 100644
--- a/python/pyarrow/array.pyx
+++ b/python/pyarrow/array.pyx
@@ -28,6 +28,9 @@ from pyarrow.error cimport check_status
 cimport pyarrow.scalar as scalar
 from pyarrow.scalar import NA
 
+from pyarrow.schema cimport Schema
+import pyarrow.schema as schema
+
 def total_allocated_bytes():
     cdef MemoryPool* pool = pyarrow.GetMemoryPool()
     return pool.bytes_allocated()
@@ -155,12 +158,12 @@ cdef class StringArray(Array):
 
 
 cdef dict _array_classes = {
-    LogicalType_NA: NullArray,
-    LogicalType_BOOL: BooleanArray,
-    LogicalType_INT64: Int64Array,
-    LogicalType_DOUBLE: DoubleArray,
-    LogicalType_LIST: ListArray,
-    LogicalType_STRING: StringArray,
+    Type_NA: NullArray,
+    Type_BOOL: BooleanArray,
+    Type_INT64: Int64Array,
+    Type_DOUBLE: DoubleArray,
+    Type_LIST: ListArray,
+    Type_STRING: StringArray,
 }
 
 cdef object box_arrow_array(const shared_ptr[CArray]& sp_array):
@@ -190,3 +193,35 @@ def from_pylist(object list_obj, DataType type=None):
         raise NotImplementedError
 
     return box_arrow_array(sp_array)
+
+#----------------------------------------------------------------------
+# Table-like data structures
+
+cdef class RowBatch:
+    """
+
+    """
+    cdef readonly:
+        Schema schema
+        int num_rows
+        list arrays
+
+    def __cinit__(self, Schema schema, int num_rows, list arrays):
+        self.schema = schema
+        self.num_rows = num_rows
+        self.arrays = arrays
+
+        if len(self.schema) != len(arrays):
+            raise ValueError('Mismatch number of data arrays and '
+                             'schema fields')
+
+    def __len__(self):
+        return self.num_rows
+
+    property num_columns:
+
+        def __get__(self):
+            return len(self.arrays)
+
+    def __getitem__(self, i):
+        return self.arrays[i]

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index baba112..e6afcbd 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -21,31 +21,30 @@ from pyarrow.includes.common cimport *
 
 cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
-    enum LogicalType" arrow::LogicalType::type":
-        LogicalType_NA" arrow::LogicalType::NA"
+    enum Type" arrow::Type::type":
+        Type_NA" arrow::Type::NA"
 
-        LogicalType_BOOL" arrow::LogicalType::BOOL"
+        Type_BOOL" arrow::Type::BOOL"
 
-        LogicalType_UINT8" arrow::LogicalType::UINT8"
-        LogicalType_INT8" arrow::LogicalType::INT8"
-        LogicalType_UINT16" arrow::LogicalType::UINT16"
-        LogicalType_INT16" arrow::LogicalType::INT16"
-        LogicalType_UINT32" arrow::LogicalType::UINT32"
-        LogicalType_INT32" arrow::LogicalType::INT32"
-        LogicalType_UINT64" arrow::LogicalType::UINT64"
-        LogicalType_INT64" arrow::LogicalType::INT64"
+        Type_UINT8" arrow::Type::UINT8"
+        Type_INT8" arrow::Type::INT8"
+        Type_UINT16" arrow::Type::UINT16"
+        Type_INT16" arrow::Type::INT16"
+        Type_UINT32" arrow::Type::UINT32"
+        Type_INT32" arrow::Type::INT32"
+        Type_UINT64" arrow::Type::UINT64"
+        Type_INT64" arrow::Type::INT64"
 
-        LogicalType_FLOAT" arrow::LogicalType::FLOAT"
-        LogicalType_DOUBLE" arrow::LogicalType::DOUBLE"
+        Type_FLOAT" arrow::Type::FLOAT"
+        Type_DOUBLE" arrow::Type::DOUBLE"
 
-        LogicalType_STRING" arrow::LogicalType::STRING"
+        Type_STRING" arrow::Type::STRING"
 
-        LogicalType_LIST" arrow::LogicalType::LIST"
-        LogicalType_STRUCT" arrow::LogicalType::STRUCT"
+        Type_LIST" arrow::Type::LIST"
+        Type_STRUCT" arrow::Type::STRUCT"
 
     cdef cppclass CDataType" arrow::DataType":
-        LogicalType type
-        c_bool nullable
+        Type type
 
         c_bool Equals(const CDataType* other)
 
@@ -55,8 +54,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         int64_t bytes_allocated()
 
     cdef cppclass CListType" arrow::ListType"(CDataType):
-        CListType(const shared_ptr[CDataType]& value_type,
-                  c_bool nullable)
+        CListType(const shared_ptr[CDataType]& value_type)
 
     cdef cppclass CStringType" arrow::StringType"(CDataType):
         pass
@@ -65,21 +63,26 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         c_string name
         shared_ptr[CDataType] type
 
-        CField(const c_string& name, const shared_ptr[CDataType]& type)
+        c_bool nullable
+
+        CField(const c_string& name, const shared_ptr[CDataType]& type,
+               c_bool nullable)
 
     cdef cppclass CStructType" arrow::StructType"(CDataType):
-        CStructType(const vector[shared_ptr[CField]]& fields,
-                    c_bool nullable)
+        CStructType(const vector[shared_ptr[CField]]& fields)
 
     cdef cppclass CSchema" arrow::Schema":
-        CSchema(const shared_ptr[CField]& fields)
+        CSchema(const vector[shared_ptr[CField]]& fields)
+        const shared_ptr[CField]& field(int i)
+        int num_fields()
+        c_string ToString()
 
     cdef cppclass CArray" arrow::Array":
         const shared_ptr[CDataType]& type()
 
         int32_t length()
         int32_t null_count()
-        LogicalType logical_type()
+        Type type_enum()
 
         c_bool IsNull(int i)
 
@@ -122,3 +125,57 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
     cdef cppclass CStringArray" arrow::StringArray"(CListArray):
         c_string GetString(int i)
+
+
+cdef extern from "arrow/api.h" namespace "arrow" nogil:
+    # We can later add more of the common status factory methods as needed
+    cdef CStatus CStatus_OK "Status::OK"()
+
+    cdef cppclass CStatus "arrow::Status":
+        CStatus()
+
+        c_string ToString()
+
+        c_bool ok()
+        c_bool IsOutOfMemory()
+        c_bool IsKeyError()
+        c_bool IsNotImplemented()
+        c_bool IsInvalid()
+
+    cdef cppclass Buffer:
+        uint8_t* data()
+        int64_t size()
+
+
+cdef extern from "arrow/ipc/metadata.h" namespace "arrow::ipc" nogil:
+    cdef cppclass SchemaMessage:
+        int num_fields()
+        CStatus GetField(int i, shared_ptr[CField]* out)
+        CStatus GetSchema(shared_ptr[CSchema]* out)
+
+    cdef cppclass FieldMetadata:
+        pass
+
+    cdef cppclass BufferMetadata:
+        pass
+
+    cdef cppclass RecordBatchMessage:
+        pass
+
+    cdef cppclass DictionaryBatchMessage:
+        pass
+
+    enum MessageType" arrow::ipc::Message::Type":
+        MessageType_SCHEMA" arrow::ipc::Message::SCHEMA"
+        MessageType_RECORD_BATCH" arrow::ipc::Message::RECORD_BATCH"
+        MessageType_DICTIONARY_BATCH" arrow::ipc::Message::DICTIONARY_BATCH"
+
+    cdef cppclass Message:
+        CStatus Open(const shared_ptr[Buffer]& buf,
+                     shared_ptr[Message]* out)
+        int64_t body_length()
+        MessageType type()
+
+        shared_ptr[SchemaMessage] GetSchema()
+        shared_ptr[RecordBatchMessage] GetRecordBatch()
+        shared_ptr[DictionaryBatchMessage] GetDictionaryBatch()

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd
index 9a0c004..eedfc85 100644
--- a/python/pyarrow/includes/pyarrow.pxd
+++ b/python/pyarrow/includes/pyarrow.pxd
@@ -18,8 +18,7 @@
 # distutils: language = c++
 
 from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport (CArray, CDataType, LogicalType,
-                                        MemoryPool)
+from pyarrow.includes.libarrow cimport CArray, CDataType, Type, MemoryPool
 
 cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
     # We can later add more of the common status factory methods as needed
@@ -39,7 +38,7 @@ cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
         c_bool IsNotImplemented()
         c_bool IsArrowError()
 
-    shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable)
+    shared_ptr[CDataType] GetPrimitiveType(Type type)
     Status ConvertPySequence(object obj, shared_ptr[CArray]* out)
 
     MemoryPool* GetMemoryPool()

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
index 261a389..04f013d 100644
--- a/python/pyarrow/scalar.pyx
+++ b/python/pyarrow/scalar.pyx
@@ -172,18 +172,18 @@ cdef class ListValue(ArrayValue):
 
 
 cdef dict _scalar_classes = {
-    LogicalType_UINT8: Int8Value,
-    LogicalType_UINT16: Int16Value,
-    LogicalType_UINT32: Int32Value,
-    LogicalType_UINT64: Int64Value,
-    LogicalType_INT8: Int8Value,
-    LogicalType_INT16: Int16Value,
-    LogicalType_INT32: Int32Value,
-    LogicalType_INT64: Int64Value,
-    LogicalType_FLOAT: FloatValue,
-    LogicalType_DOUBLE: DoubleValue,
-    LogicalType_LIST: ListValue,
-    LogicalType_STRING: StringValue
+    Type_UINT8: Int8Value,
+    Type_UINT16: Int16Value,
+    Type_UINT32: Int32Value,
+    Type_UINT64: Int64Value,
+    Type_INT8: Int8Value,
+    Type_INT16: Int16Value,
+    Type_INT32: Int32Value,
+    Type_INT64: Int64Value,
+    Type_FLOAT: FloatValue,
+    Type_DOUBLE: DoubleValue,
+    Type_LIST: ListValue,
+    Type_STRING: StringValue
 }
 
 cdef object box_arrow_scalar(DataType type,

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/schema.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pxd b/python/pyarrow/schema.pxd
index 07b9bd0..61458b7 100644
--- a/python/pyarrow/schema.pxd
+++ b/python/pyarrow/schema.pxd
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from pyarrow.includes.common cimport shared_ptr
+from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport CDataType, CField, CSchema
 
 cdef class DataType:
@@ -33,9 +33,13 @@ cdef class Field:
     cdef readonly:
         DataType type
 
+    cdef init(self, const shared_ptr[CField]& field)
+
 cdef class Schema:
     cdef:
         shared_ptr[CSchema] sp_schema
         CSchema* schema
 
+    cdef init(self, const vector[shared_ptr[CField]]& fields)
+
 cdef DataType box_data_type(const shared_ptr[CDataType]& type)

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
index ea87872..b3bf02a 100644
--- a/python/pyarrow/schema.pyx
+++ b/python/pyarrow/schema.pyx
@@ -54,94 +54,153 @@ cdef class DataType:
 
 cdef class Field:
 
-    def __cinit__(self, object name, DataType type):
-        self.type = type
-        self.sp_field.reset(new CField(tobytes(name), type.sp_type))
-        self.field = self.sp_field.get()
+    def __cinit__(self):
+        pass
+
+    cdef init(self, const shared_ptr[CField]& field):
+        self.sp_field = field
+        self.field = field.get()
+
+    @classmethod
+    def from_py(cls, object name, DataType type, bint nullable=True):
+        cdef Field result = Field()
+        result.type = type
+        result.sp_field.reset(new CField(tobytes(name), type.sp_type,
+                                         nullable))
+        result.field = result.sp_field.get()
+
+        return result
 
     def __repr__(self):
         return 'Field({0!r}, type={1})'.format(self.name, str(self.type))
 
+    property nullable:
+
+        def __get__(self):
+            return self.field.nullable
+
     property name:
 
         def __get__(self):
             return frombytes(self.field.name)
 
+cdef class Schema:
+
+    def __cinit__(self):
+        pass
+
+    def __len__(self):
+        return self.schema.num_fields()
+
+    def __getitem__(self, i):
+        if i < 0 or i >= len(self):
+            raise IndexError("{0} is out of bounds".format(i))
+
+        cdef Field result = Field()
+        result.init(self.schema.field(i))
+        result.type = box_data_type(result.field.type)
+
+        return result
+
+    cdef init(self, const vector[shared_ptr[CField]]& fields):
+        self.schema = new CSchema(fields)
+        self.sp_schema.reset(self.schema)
+
+    @classmethod
+    def from_fields(cls, fields):
+        cdef:
+            Schema result
+            Field field
+            vector[shared_ptr[CField]] c_fields
+
+        c_fields.resize(len(fields))
+
+        for i in range(len(fields)):
+            field = fields[i]
+            c_fields[i] = field.sp_field
+
+        result = Schema()
+        result.init(c_fields)
+
+        return result
+
+    def __repr__(self):
+        return frombytes(self.schema.ToString())
+
 cdef dict _type_cache = {}
 
-cdef DataType primitive_type(LogicalType type, bint nullable=True):
-    if (type, nullable) in _type_cache:
-        return _type_cache[type, nullable]
+cdef DataType primitive_type(Type type):
+    if type in _type_cache:
+        return _type_cache[type]
 
     cdef DataType out = DataType()
-    out.init(pyarrow.GetPrimitiveType(type, nullable))
+    out.init(pyarrow.GetPrimitiveType(type))
 
-    _type_cache[type, nullable] = out
+    _type_cache[type] = out
     return out
 
 #------------------------------------------------------------
 # Type factory functions
 
-def field(name, type):
-    return Field(name, type)
+def field(name, type, bint nullable=True):
+    return Field.from_py(name, type, nullable)
 
 cdef set PRIMITIVE_TYPES = set([
-    LogicalType_NA, LogicalType_BOOL,
-    LogicalType_UINT8, LogicalType_INT8,
-    LogicalType_UINT16, LogicalType_INT16,
-    LogicalType_UINT32, LogicalType_INT32,
-    LogicalType_UINT64, LogicalType_INT64,
-    LogicalType_FLOAT, LogicalType_DOUBLE])
+    Type_NA, Type_BOOL,
+    Type_UINT8, Type_INT8,
+    Type_UINT16, Type_INT16,
+    Type_UINT32, Type_INT32,
+    Type_UINT64, Type_INT64,
+    Type_FLOAT, Type_DOUBLE])
 
 def null():
-    return primitive_type(LogicalType_NA)
+    return primitive_type(Type_NA)
 
-def bool_(c_bool nullable=True):
-    return primitive_type(LogicalType_BOOL, nullable)
+def bool_():
+    return primitive_type(Type_BOOL)
 
-def uint8(c_bool nullable=True):
-    return primitive_type(LogicalType_UINT8, nullable)
+def uint8():
+    return primitive_type(Type_UINT8)
 
-def int8(c_bool nullable=True):
-    return primitive_type(LogicalType_INT8, nullable)
+def int8():
+    return primitive_type(Type_INT8)
 
-def uint16(c_bool nullable=True):
-    return primitive_type(LogicalType_UINT16, nullable)
+def uint16():
+    return primitive_type(Type_UINT16)
 
-def int16(c_bool nullable=True):
-    return primitive_type(LogicalType_INT16, nullable)
+def int16():
+    return primitive_type(Type_INT16)
 
-def uint32(c_bool nullable=True):
-    return primitive_type(LogicalType_UINT32, nullable)
+def uint32():
+    return primitive_type(Type_UINT32)
 
-def int32(c_bool nullable=True):
-    return primitive_type(LogicalType_INT32, nullable)
+def int32():
+    return primitive_type(Type_INT32)
 
-def uint64(c_bool nullable=True):
-    return primitive_type(LogicalType_UINT64, nullable)
+def uint64():
+    return primitive_type(Type_UINT64)
 
-def int64(c_bool nullable=True):
-    return primitive_type(LogicalType_INT64, nullable)
+def int64():
+    return primitive_type(Type_INT64)
 
-def float_(c_bool nullable=True):
-    return primitive_type(LogicalType_FLOAT, nullable)
+def float_():
+    return primitive_type(Type_FLOAT)
 
-def double(c_bool nullable=True):
-    return primitive_type(LogicalType_DOUBLE, nullable)
+def double():
+    return primitive_type(Type_DOUBLE)
 
-def string(c_bool nullable=True):
+def string():
     """
     UTF8 string
     """
-    return primitive_type(LogicalType_STRING, nullable)
+    return primitive_type(Type_STRING)
 
-def list_(DataType value_type, c_bool nullable=True):
+def list_(DataType value_type):
     cdef DataType out = DataType()
-    out.init(shared_ptr[CDataType](
-        new CListType(value_type.sp_type, nullable)))
+    out.init(shared_ptr[CDataType](new CListType(value_type.sp_type)))
     return out
 
-def struct(fields, c_bool nullable=True):
+def struct(fields):
     """
 
     """
@@ -154,9 +213,11 @@ def struct(fields, c_bool nullable=True):
         c_fields.push_back(field.sp_field)
 
     out.init(shared_ptr[CDataType](
-        new CStructType(c_fields, nullable)))
+        new CStructType(c_fields)))
     return out
 
+def schema(fields):
+    return Schema.from_fields(fields)
 
 cdef DataType box_data_type(const shared_ptr[CDataType]& type):
     cdef DataType out = DataType()

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/tests/test_schema.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 0235526..2894ea8 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -18,6 +18,8 @@
 from pyarrow.compat import unittest
 import pyarrow as arrow
 
+A = arrow
+
 
 class TestTypes(unittest.TestCase):
 
@@ -28,15 +30,12 @@ class TestTypes(unittest.TestCase):
         for name in dtypes:
             factory = getattr(arrow, name)
             t = factory()
-            t_required = factory(False)
-
             assert str(t) == name
-            assert str(t_required) == '{0} not null'.format(name)
 
     def test_list(self):
         value_type = arrow.int32()
         list_type = arrow.list_(value_type)
-        assert str(list_type) == 'list<int32>'
+        assert str(list_type) == 'list<item: int32>'
 
     def test_string(self):
         t = arrow.string()
@@ -47,5 +46,26 @@ class TestTypes(unittest.TestCase):
         f = arrow.field('foo', t)
 
         assert f.name == 'foo'
+        assert f.nullable
         assert f.type is t
         assert repr(f) == "Field('foo', type=string)"
+
+        f = arrow.field('foo', t, False)
+        assert not f.nullable
+
+    def test_schema(self):
+        fields = [
+            A.field('foo', A.int32()),
+            A.field('bar', A.string()),
+            A.field('baz', A.list_(A.int8()))
+        ]
+        sch = A.schema(fields)
+
+        assert len(sch) == 3
+        assert sch[0].name == 'foo'
+        assert sch[0].type == fields[0].type
+
+        assert repr(sch) == """\
+foo: int32
+bar: string
+baz: list<item: int8>"""

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/pyarrow/tests/test_table.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
new file mode 100644
index 0000000..2e24445
--- /dev/null
+++ b/python/pyarrow/tests/test_table.py
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest
+import pyarrow as arrow
+
+A = arrow
+
+
+class TestRowBatch(unittest.TestCase):
+
+    def test_basics(self):
+        data = [
+            A.from_pylist(range(5)),
+            A.from_pylist([-10, -5, 0, 5, 10])
+        ]
+        num_rows = 5
+
+        descr = A.schema([A.field('c0', data[0].type),
+                          A.field('c1', data[1].type)])
+
+        batch = A.RowBatch(descr, num_rows, data)
+
+        assert len(batch) == num_rows
+        assert batch.num_rows == num_rows
+        assert batch.num_columns == len(data)

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/src/pyarrow/adapters/builtin.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc
index bb79052..acb13ac 100644
--- a/python/src/pyarrow/adapters/builtin.cc
+++ b/python/src/pyarrow/adapters/builtin.cc
@@ -27,7 +27,7 @@
 
 using arrow::ArrayBuilder;
 using arrow::DataType;
-using arrow::LogicalType;
+using arrow::Type;
 
 namespace pyarrow {
 
@@ -356,17 +356,17 @@ class ListConverter : public TypedConverter<arrow::ListBuilder> {
 // Dynamic constructor for sequence converters
 std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) {
   switch (type->type) {
-    case LogicalType::BOOL:
+    case Type::BOOL:
       return std::make_shared<BoolConverter>();
-    case LogicalType::INT64:
+    case Type::INT64:
       return std::make_shared<Int64Converter>();
-    case LogicalType::DOUBLE:
+    case Type::DOUBLE:
       return std::make_shared<DoubleConverter>();
-    case LogicalType::STRING:
+    case Type::STRING:
       return std::make_shared<StringConverter>();
-    case LogicalType::LIST:
+    case Type::LIST:
       return std::make_shared<ListConverter>();
-    case LogicalType::STRUCT:
+    case Type::STRUCT:
     default:
       return nullptr;
       break;
@@ -378,7 +378,7 @@ Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
   typed_builder_ = static_cast<arrow::ListBuilder*>(builder.get());
 
   value_converter_ = GetConverter(static_cast<arrow::ListType*>(
-          builder->type().get())->value_type);
+          builder->type().get())->value_type());
   if (value_converter_ == nullptr) {
     return Status::NotImplemented("value type not implemented");
   }
@@ -393,8 +393,8 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out) {
   PY_RETURN_NOT_OK(InferArrowType(obj, &size, &type));
 
   // Handle NA / NullType case
-  if (type->type == LogicalType::NA) {
-    out->reset(new arrow::Array(type, size, size));
+  if (type->type == Type::NA) {
+    out->reset(new arrow::NullArray(type, size));
     return Status::OK();
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/src/pyarrow/helpers.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/helpers.cc b/python/src/pyarrow/helpers.cc
index 0921fc4..08003aa 100644
--- a/python/src/pyarrow/helpers.cc
+++ b/python/src/pyarrow/helpers.cc
@@ -37,19 +37,14 @@ const std::shared_ptr<FloatType> FLOAT = std::make_shared<FloatType>();
 const std::shared_ptr<DoubleType> DOUBLE = std::make_shared<DoubleType>();
 const std::shared_ptr<StringType> STRING = std::make_shared<StringType>();
 
-#define GET_PRIMITIVE_TYPE(NAME, Type)          \
-  case LogicalType::NAME:                       \
-    if (nullable) {                             \
-      return NAME;                              \
-    } else {                                    \
-      return std::make_shared<Type>(nullable);  \
-    }                                           \
+#define GET_PRIMITIVE_TYPE(NAME, Class)         \
+  case Type::NAME:                              \
+    return NAME;                                \
     break;
 
-std::shared_ptr<DataType> GetPrimitiveType(LogicalType::type type,
-    bool nullable) {
+std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
   switch (type) {
-    case LogicalType::NA:
+    case Type::NA:
       return NA;
     GET_PRIMITIVE_TYPE(UINT8, UInt8Type);
     GET_PRIMITIVE_TYPE(INT8, Int8Type);

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/python/src/pyarrow/helpers.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/helpers.h b/python/src/pyarrow/helpers.h
index e41568d..ec42bb3 100644
--- a/python/src/pyarrow/helpers.h
+++ b/python/src/pyarrow/helpers.h
@@ -24,7 +24,7 @@
 namespace pyarrow {
 
 using arrow::DataType;
-using arrow::LogicalType;
+using arrow::Type;
 
 extern const std::shared_ptr<arrow::NullType> NA;
 extern const std::shared_ptr<arrow::BooleanType> BOOL;
@@ -40,8 +40,7 @@ extern const std::shared_ptr<arrow::FloatType> FLOAT;
 extern const std::shared_ptr<arrow::DoubleType> DOUBLE;
 extern const std::shared_ptr<arrow::StringType> STRING;
 
-std::shared_ptr<DataType> GetPrimitiveType(LogicalType::type type,
-    bool nullable);
+std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
 
 } // namespace pyarrow
 


[3/3] arrow git commit: ARROW-67: C++ metadata flatbuffer serialization and data movement to memory maps

Posted by we...@apache.org.
ARROW-67: C++ metadata flatbuffer serialization and data movement to memory maps

Several things here:

* Add Google flatbuffers dependency
* Flatbuffers IDL draft in collaboration with @jacques-n and @stevenmphillips
* Add Schema wrapper in Cython
* arrow::Schema conversion to/from flatbuffer representation
* Remove unneeded physical layout types from type.h
* Refactor ListType to be a nested type with a single child
* Implement shared memory round-trip for numeric row batches
* mmap-based shared memory interface and MemorySource abstract API

Quite a bit of judicious code cleaning and consolidation as part of this. For example, List types are now internally equivalent to a nested type with 1 named child field (versus a struct, which can have any number of child fields).

Associated JIRAs: ARROW-48, ARROW-57, ARROW-58

Author: Wes McKinney <we...@apache.org>

Closes #28 from wesm/cpp-ipc-draft and squashes the following commits:

0cef7ea [Wes McKinney] Add NullArray type now that Array is virtual, fix pyarrow build
5e841f7 [Wes McKinney] Create explicit PrimitiveArray subclasses to avoid unwanted template instantiation
6fa6319 [Wes McKinney] ARROW-28: Draft C++ shared memory IPC workflow and related refactoring / scaffolding / cleaning.


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/65db0da8
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/65db0da8
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/65db0da8

Branch: refs/heads/master
Commit: 65db0da80b6a1fb6887b7ac1df24e2423d41dfb9
Parents: 093f9bd
Author: Wes McKinney <we...@apache.org>
Authored: Tue Mar 22 18:45:13 2016 -0700
Committer: Wes McKinney <we...@apache.org>
Committed: Tue Mar 22 18:45:13 2016 -0700

----------------------------------------------------------------------
 ci/travis_before_script_cpp.sh          |   9 +-
 ci/travis_script_cpp.sh                 |   6 +-
 cpp/CMakeLists.txt                      |  96 +++++---
 cpp/cmake_modules/FindFlatbuffers.cmake |  95 ++++++++
 cpp/setup_build_env.sh                  |   5 +-
 cpp/src/arrow/CMakeLists.txt            |   8 +
 cpp/src/arrow/api.h                     |  11 +-
 cpp/src/arrow/array-test.cc             |  14 +-
 cpp/src/arrow/array.cc                  |  26 ++-
 cpp/src/arrow/array.h                   |  27 ++-
 cpp/src/arrow/builder.h                 |   2 +-
 cpp/src/arrow/column-benchmark.cc       |  54 +++++
 cpp/src/arrow/column-test.cc            |  75 +++++++
 cpp/src/arrow/column.cc                 |  70 ++++++
 cpp/src/arrow/column.h                  | 108 +++++++++
 cpp/src/arrow/ipc/.gitignore            |   1 +
 cpp/src/arrow/ipc/CMakeLists.txt        |  51 +++++
 cpp/src/arrow/ipc/adapter.cc            | 305 ++++++++++++++++++++++++++
 cpp/src/arrow/ipc/adapter.h             |  86 ++++++++
 cpp/src/arrow/ipc/ipc-adapter-test.cc   | 112 ++++++++++
 cpp/src/arrow/ipc/ipc-memory-test.cc    |  82 +++++++
 cpp/src/arrow/ipc/ipc-metadata-test.cc  |  99 +++++++++
 cpp/src/arrow/ipc/memory.cc             | 162 ++++++++++++++
 cpp/src/arrow/ipc/memory.h              | 131 +++++++++++
 cpp/src/arrow/ipc/metadata-internal.cc  | 317 +++++++++++++++++++++++++++
 cpp/src/arrow/ipc/metadata-internal.h   |  69 ++++++
 cpp/src/arrow/ipc/metadata.cc           | 238 ++++++++++++++++++++
 cpp/src/arrow/ipc/metadata.h            | 146 ++++++++++++
 cpp/src/arrow/ipc/test-common.h         |  53 +++++
 cpp/src/arrow/schema-test.cc            | 104 +++++++++
 cpp/src/arrow/schema.cc                 |  63 ++++++
 cpp/src/arrow/schema.h                  |  55 +++++
 cpp/src/arrow/table-test.cc             | 128 +++++++++++
 cpp/src/arrow/table.cc                  |  86 ++++++++
 cpp/src/arrow/table.h                   | 128 +++++++++++
 cpp/src/arrow/table/CMakeLists.txt      |  33 ---
 cpp/src/arrow/table/column-benchmark.cc |  55 -----
 cpp/src/arrow/table/column-test.cc      |  75 -------
 cpp/src/arrow/table/column.cc           |  68 ------
 cpp/src/arrow/table/column.h            | 105 ---------
 cpp/src/arrow/table/schema-test.cc      | 110 ----------
 cpp/src/arrow/table/schema.cc           |  58 -----
 cpp/src/arrow/table/schema.h            |  55 -----
 cpp/src/arrow/table/table-test.cc       | 128 -----------
 cpp/src/arrow/table/table.cc            |  73 ------
 cpp/src/arrow/table/table.h             |  82 -------
 cpp/src/arrow/table/test-common.h       |  54 -----
 cpp/src/arrow/test-util.h               |  68 +++++-
 cpp/src/arrow/type.cc                   |  24 +-
 cpp/src/arrow/type.h                    | 177 ++++++---------
 cpp/src/arrow/types/CMakeLists.txt      |   2 -
 cpp/src/arrow/types/boolean.h           |   2 +-
 cpp/src/arrow/types/collection.h        |   2 +-
 cpp/src/arrow/types/construct.cc        |  53 +++--
 cpp/src/arrow/types/construct.h         |  11 +-
 cpp/src/arrow/types/datetime.h          |  16 +-
 cpp/src/arrow/types/floating.cc         |  22 --
 cpp/src/arrow/types/floating.h          |  36 ---
 cpp/src/arrow/types/integer.cc          |  22 --
 cpp/src/arrow/types/integer.h           |  57 -----
 cpp/src/arrow/types/json.cc             |   1 -
 cpp/src/arrow/types/json.h              |   4 +-
 cpp/src/arrow/types/list-test.cc        |  28 ++-
 cpp/src/arrow/types/list.cc             |  29 +++
 cpp/src/arrow/types/list.h              |  28 +--
 cpp/src/arrow/types/primitive-test.cc   |  41 ++--
 cpp/src/arrow/types/primitive.cc        |  16 +-
 cpp/src/arrow/types/primitive.h         | 102 +++++----
 cpp/src/arrow/types/string-test.cc      |  54 +++--
 cpp/src/arrow/types/string.h            |  55 ++---
 cpp/src/arrow/types/struct-test.cc      |  15 +-
 cpp/src/arrow/types/test-common.h       |   5 +-
 cpp/src/arrow/types/union.h             |  18 +-
 cpp/src/arrow/util/bit-util-test.cc     |   4 +-
 cpp/src/arrow/util/bit-util.h           |   1 -
 cpp/src/arrow/util/buffer-test.cc       |   3 +-
 cpp/src/arrow/util/buffer.cc            |   2 +-
 cpp/src/arrow/util/memory-pool-test.cc  |   7 +-
 cpp/src/arrow/util/memory-pool.cc       |   6 +-
 cpp/src/arrow/util/memory-pool.h        |   2 +-
 cpp/src/arrow/util/status.cc            |   3 +
 cpp/src/arrow/util/status.h             |   6 +
 cpp/src/arrow/util/test_main.cc         |   2 +-
 cpp/thirdparty/build_thirdparty.sh      |   9 +
 cpp/thirdparty/download_thirdparty.sh   |   5 +
 cpp/thirdparty/versions.sh              |   4 +
 format/Message.fbs                      | 183 ++++++++++++++++
 python/pyarrow/__init__.py              |   4 +-
 python/pyarrow/array.pxd                |   2 +-
 python/pyarrow/array.pyx                |  47 +++-
 python/pyarrow/includes/libarrow.pxd    | 107 ++++++---
 python/pyarrow/includes/pyarrow.pxd     |   5 +-
 python/pyarrow/scalar.pyx               |  24 +-
 python/pyarrow/schema.pxd               |   6 +-
 python/pyarrow/schema.pyx               | 155 +++++++++----
 python/pyarrow/tests/test_schema.py     |  28 ++-
 python/pyarrow/tests/test_table.py      |  40 ++++
 python/src/pyarrow/adapters/builtin.cc  |  20 +-
 python/src/pyarrow/helpers.cc           |  15 +-
 python/src/pyarrow/helpers.h            |   5 +-
 100 files changed, 3888 insertions(+), 1613 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/ci/travis_before_script_cpp.sh
----------------------------------------------------------------------
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index 49dcc39..193c76f 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -19,7 +19,14 @@ echo $GTEST_HOME
 
 : ${ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install}
 
-cmake -DARROW_BUILD_BENCHMARKS=ON -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
+CMAKE_COMMON_FLAGS="-DARROW_BUILD_BENCHMARKS=ON -DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL"
+
+if [ $TRAVIS_OS_NAME == "linux" ]; then
+  cmake -DARROW_TEST_MEMCHECK=on $CMAKE_COMMON_FLAGS -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
+else
+  cmake $CMAKE_COMMON_FLAGS -DCMAKE_CXX_FLAGS="-Werror" $CPP_DIR
+fi
+
 make -j4
 make install
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/ci/travis_script_cpp.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_cpp.sh b/ci/travis_script_cpp.sh
index d96b98f..997bdf3 100755
--- a/ci/travis_script_cpp.sh
+++ b/ci/travis_script_cpp.sh
@@ -8,10 +8,6 @@ pushd $CPP_BUILD_DIR
 
 make lint
 
-if [ $TRAVIS_OS_NAME == "linux" ]; then
-  valgrind --tool=memcheck --leak-check=yes --error-exitcode=1 ctest -L unittest
-else
-  ctest -L unittest
-fi
+ctest -L unittest
 
 popd

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 268c1d1..6d70107 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -51,7 +51,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   option(ARROW_PARQUET
     "Build the Parquet adapter and link to libparquet"
     OFF)
-
+  option(ARROW_TEST_MEMCHECK
+	"Run the test suite using valgrind --tool=memcheck"
+	OFF)
   option(ARROW_BUILD_TESTS
     "Build the Arrow googletest unit tests"
     ON)
@@ -60,6 +62,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
     "Build the Arrow micro benchmarks"
     OFF)
 
+  option(ARROW_IPC
+    "Build the Arrow IPC extensions"
+    ON)
+
 endif()
 
 if(NOT ARROW_BUILD_TESTS)
@@ -260,17 +266,17 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
 include_directories(src)
 
 ############################################################
-# Benchmarking 
+# Benchmarking
 ############################################################
 # Add a new micro benchmark, with or without an executable that should be built.
 # If benchmarks are enabled then they will be run along side unit tests with ctest.
-# 'make runbenchmark' and 'make unittest' to build/run only benchmark or unittests, 
+# 'make runbenchmark' and 'make unittest' to build/run only benchmark or unittests,
 # respectively.
 #
 # REL_BENCHMARK_NAME is the name of the benchmark app. It may be a single component
 # (e.g. monotime-benchmark) or contain additional components (e.g.
 # net/net_util-benchmark). Either way, the last component must be a globally
-# unique name.  
+# unique name.
 
 # The benchmark will registered as unit test with ctest with a label
 # of 'benchmark'.
@@ -281,7 +287,7 @@ function(ADD_ARROW_BENCHMARK REL_BENCHMARK_NAME)
     return()
   endif()
   get_filename_component(BENCHMARK_NAME ${REL_BENCHMARK_NAME} NAME_WE)
-   
+
   if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME}.cc)
     # This benchmark has a corresponding .cc file, set it up as an executable.
     set(BENCHMARK_PATH "${EXECUTABLE_OUTPUT_PATH}/${BENCHMARK_NAME}")
@@ -294,7 +300,7 @@ function(ADD_ARROW_BENCHMARK REL_BENCHMARK_NAME)
     set(BENCHMARK_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME})
     set(NO_COLOR "")
   endif()
-  
+
   add_test(${BENCHMARK_NAME}
     ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} benchmark ${BENCHMARK_PATH} ${NO_COLOR})
   set_tests_properties(${BENCHMARK_NAME} PROPERTIES LABELS "benchmark")
@@ -345,9 +351,18 @@ function(ADD_ARROW_TEST REL_TEST_NAME)
     set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME})
   endif()
 
-  add_test(${TEST_NAME}
-    ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH})
+  if (ARROW_TEST_MEMCHECK)
+	SET_PROPERTY(TARGET ${TEST_NAME}
+	  APPEND_STRING PROPERTY
+	  COMPILE_FLAGS " -DARROW_VALGRIND")
+	add_test(${TEST_NAME}
+	  valgrind --tool=memcheck --leak-check=full --error-exitcode=1 ${TEST_PATH})
+  else()
+	add_test(${TEST_NAME}
+      ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH})
+  endif()
   set_tests_properties(${TEST_NAME} PROPERTIES LABELS "unittest")
+
   if(ARGN)
     set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN})
   endif()
@@ -403,7 +418,7 @@ if ("$ENV{GTEST_HOME}" STREQUAL "")
   set(GTest_HOME ${THIRDPARTY_DIR}/googletest-release-1.7.0)
 endif()
 
-## Google Benchmark 
+## Google Benchmark
 if ("$ENV{GBENCHMARK_HOME}" STREQUAL "")
   set(GBENCHMARK_HOME ${THIRDPARTY_DIR}/installed)
 endif()
@@ -487,24 +502,10 @@ if (UNIX)
   add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py
   --verbose=2
   --linelength=90
-  --filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/c++11
-    `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h`)
+  --filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/c++11,-runtime/references
+    `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed -e '/_generated/g'`)
 endif (UNIX)
 
-#----------------------------------------------------------------------
-# Parquet adapter
-
-if(ARROW_PARQUET)
-  find_package(Parquet REQUIRED)
-  include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
-  ADD_THIRDPARTY_LIB(parquet
-    STATIC_LIB ${PARQUET_STATIC_LIB}
-    SHARED_LIB ${PARQUET_SHARED_LIB})
-
-  add_subdirectory(src/arrow/parquet)
-  list(APPEND LINK_LIBS arrow_parquet parquet)
-endif()
-
 ############################################################
 # Subdirectories
 ############################################################
@@ -515,15 +516,18 @@ set(LIBARROW_LINK_LIBS
 set(ARROW_SRCS
   src/arrow/array.cc
   src/arrow/builder.cc
+  src/arrow/column.cc
+  src/arrow/schema.cc
+  src/arrow/table.cc
   src/arrow/type.cc
 
-  src/arrow/table/column.cc
-  src/arrow/table/schema.cc
-  src/arrow/table/table.cc
+  # IPC / Shared memory library; to be turned into an optional component
+  src/arrow/ipc/adapter.cc
+  src/arrow/ipc/memory.cc
+  src/arrow/ipc/metadata.cc
+  src/arrow/ipc/metadata-internal.cc
 
   src/arrow/types/construct.cc
-  src/arrow/types/floating.cc
-  src/arrow/types/integer.cc
   src/arrow/types/json.cc
   src/arrow/types/list.cc
   src/arrow/types/primitive.cc
@@ -559,9 +563,39 @@ target_link_libraries(arrow ${LIBARROW_LINK_LIBS})
 
 add_subdirectory(src/arrow)
 add_subdirectory(src/arrow/util)
-add_subdirectory(src/arrow/table)
 add_subdirectory(src/arrow/types)
 
 install(TARGETS arrow
   LIBRARY DESTINATION lib
   ARCHIVE DESTINATION lib)
+
+#----------------------------------------------------------------------
+# Parquet adapter library
+
+if(ARROW_PARQUET)
+  find_package(Parquet REQUIRED)
+  include_directories(SYSTEM ${PARQUET_INCLUDE_DIR})
+  ADD_THIRDPARTY_LIB(parquet
+    STATIC_LIB ${PARQUET_STATIC_LIB}
+    SHARED_LIB ${PARQUET_SHARED_LIB})
+
+  add_subdirectory(src/arrow/parquet)
+  list(APPEND LINK_LIBS arrow_parquet parquet)
+endif()
+
+#----------------------------------------------------------------------
+# IPC library
+
+## Flatbuffers
+if(ARROW_IPC)
+  find_package(Flatbuffers REQUIRED)
+  message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}")
+  message(STATUS "Flatbuffers static library: ${FLATBUFFERS_STATIC_LIB}")
+  message(STATUS "Flatbuffers compiler: ${FLATBUFFERS_COMPILER}")
+  include_directories(SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
+  add_library(flatbuffers STATIC IMPORTED)
+  set_target_properties(flatbuffers PROPERTIES
+	IMPORTED_LOCATION ${FLATBUFFERS_STATIC_LIB})
+
+  add_subdirectory(src/arrow/ipc)
+endif()

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/cmake_modules/FindFlatbuffers.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindFlatbuffers.cmake b/cpp/cmake_modules/FindFlatbuffers.cmake
new file mode 100644
index 0000000..ee472d1
--- /dev/null
+++ b/cpp/cmake_modules/FindFlatbuffers.cmake
@@ -0,0 +1,95 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Tries to find Flatbuffers headers and libraries.
+#
+# Usage of this module as follows:
+#
+#  find_package(Flatbuffers)
+#
+# Variables used by this module, they can change the default behaviour and need
+# to be set before calling find_package:
+#
+#  Flatbuffers_HOME -
+#   When set, this path is inspected instead of standard library locations as
+#   the root of the Flatbuffers installation.  The environment variable
+#   FLATBUFFERS_HOME overrides this veriable.
+#
+# This module defines
+#  FLATBUFFERS_INCLUDE_DIR, directory containing headers
+#  FLATBUFFERS_LIBS, directory containing flatbuffers libraries
+#  FLATBUFFERS_STATIC_LIB, path to libflatbuffers.a
+#  FLATBUFFERS_FOUND, whether flatbuffers has been found
+
+if( NOT "$ENV{FLATBUFFERS_HOME}" STREQUAL "")
+    file( TO_CMAKE_PATH "$ENV{FLATBUFFERS_HOME}" _native_path )
+    list( APPEND _flatbuffers_roots ${_native_path} )
+elseif ( Flatbuffers_HOME )
+    list( APPEND _flatbuffers_roots ${Flatbuffers_HOME} )
+endif()
+
+# Try the parameterized roots, if they exist
+if ( _flatbuffers_roots )
+    find_path( FLATBUFFERS_INCLUDE_DIR NAMES flatbuffers/flatbuffers.h
+        PATHS ${_flatbuffers_roots} NO_DEFAULT_PATH
+        PATH_SUFFIXES "include" )
+    find_library( FLATBUFFERS_LIBRARIES NAMES flatbuffers
+        PATHS ${_flatbuffers_roots} NO_DEFAULT_PATH
+        PATH_SUFFIXES "lib" )
+else ()
+    find_path( FLATBUFFERS_INCLUDE_DIR NAMES flatbuffers/flatbuffers.h )
+    find_library( FLATBUFFERS_LIBRARIES NAMES flatbuffers )
+endif ()
+
+find_program(FLATBUFFERS_COMPILER flatc
+  $ENV{FLATBUFFERS_HOME}/bin
+  /usr/local/bin
+  /usr/bin
+  NO_DEFAULT_PATH
+)
+
+if (FLATBUFFERS_INCLUDE_DIR AND FLATBUFFERS_LIBRARIES)
+  set(FLATBUFFERS_FOUND TRUE)
+  get_filename_component( FLATBUFFERS_LIBS ${FLATBUFFERS_LIBRARIES} PATH )
+  set(FLATBUFFERS_LIB_NAME libflatbuffers)
+  set(FLATBUFFERS_STATIC_LIB ${FLATBUFFERS_LIBS}/${FLATBUFFERS_LIB_NAME}.a)
+else ()
+  set(FLATBUFFERS_FOUND FALSE)
+endif ()
+
+if (FLATBUFFERS_FOUND)
+  if (NOT Flatbuffers_FIND_QUIETLY)
+    message(STATUS "Found the Flatbuffers library: ${FLATBUFFERS_LIBRARIES}")
+  endif ()
+else ()
+  if (NOT Flatbuffers_FIND_QUIETLY)
+    set(FLATBUFFERS_ERR_MSG "Could not find the Flatbuffers library. Looked in ")
+    if ( _flatbuffers_roots )
+      set(FLATBUFFERS_ERR_MSG "${FLATBUFFERS_ERR_MSG} in ${_flatbuffers_roots}.")
+    else ()
+      set(FLATBUFFERS_ERR_MSG "${FLATBUFFERS_ERR_MSG} system search paths.")
+    endif ()
+    if (Flatbuffers_FIND_REQUIRED)
+      message(FATAL_ERROR "${FLATBUFFERS_ERR_MSG}")
+    else (Flatbuffers_FIND_REQUIRED)
+      message(STATUS "${FLATBUFFERS_ERR_MSG}")
+    endif (Flatbuffers_FIND_REQUIRED)
+  endif ()
+endif ()
+
+mark_as_advanced(
+  FLATBUFFERS_INCLUDE_DIR
+  FLATBUFFERS_LIBS
+  FLATBUFFERS_STATIC_LIB
+  FLATBUFFERS_COMPILER
+)

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/setup_build_env.sh
----------------------------------------------------------------------
diff --git a/cpp/setup_build_env.sh b/cpp/setup_build_env.sh
index 04688e7..6520dbd 100755
--- a/cpp/setup_build_env.sh
+++ b/cpp/setup_build_env.sh
@@ -2,11 +2,12 @@
 
 SOURCE_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
 
-./thirdparty/download_thirdparty.sh || { echo "download_thirdparty.sh failed" ; return; } 
-./thirdparty/build_thirdparty.sh || { echo "build_thirdparty.sh failed" ; return; } 
+./thirdparty/download_thirdparty.sh || { echo "download_thirdparty.sh failed" ; return; }
+./thirdparty/build_thirdparty.sh || { echo "build_thirdparty.sh failed" ; return; }
 source thirdparty/versions.sh
 
 export GTEST_HOME=$SOURCE_DIR/thirdparty/$GTEST_BASEDIR
 export GBENCHMARK_HOME=$SOURCE_DIR/thirdparty/installed
+export FLATBUFFERS_HOME=$SOURCE_DIR/thirdparty/installed
 
 echo "Build env initialized"

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 73e6a9b..2d42edc 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -19,7 +19,10 @@
 install(FILES
   api.h
   array.h
+  column.h
   builder.h
+  schema.h
+  table.h
   type.h
   DESTINATION include/arrow)
 
@@ -30,3 +33,8 @@ install(FILES
 set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
 
 ADD_ARROW_TEST(array-test)
+ADD_ARROW_TEST(column-test)
+ADD_ARROW_TEST(schema-test)
+ADD_ARROW_TEST(table-test)
+
+ADD_ARROW_BENCHMARK(column-benchmark)

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index c73d4b3..7be7f88 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -22,20 +22,19 @@
 
 #include "arrow/array.h"
 #include "arrow/builder.h"
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
 #include "arrow/type.h"
 
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/table.h"
-
 #include "arrow/types/boolean.h"
 #include "arrow/types/construct.h"
-#include "arrow/types/floating.h"
-#include "arrow/types/integer.h"
 #include "arrow/types/list.h"
+#include "arrow/types/primitive.h"
 #include "arrow/types/string.h"
 #include "arrow/types/struct.h"
 
+#include "arrow/util/buffer.h"
 #include "arrow/util/memory-pool.h"
 #include "arrow/util/status.h"
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/array-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index df827aa..eded594 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -15,30 +15,26 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gtest/gtest.h>
-
 #include <cstdint>
 #include <cstdlib>
 #include <memory>
 #include <vector>
 
+#include "gtest/gtest.h"
+
 #include "arrow/array.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
-#include "arrow/types/integer.h"
 #include "arrow/types/primitive.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/memory-pool.h"
-#include "arrow/util/status.h"
 
 namespace arrow {
 
-static TypePtr int32 = TypePtr(new Int32Type());
-
 class TestArray : public ::testing::Test {
  public:
   void SetUp() {
-    pool_ = GetDefaultMemoryPool();
+    pool_ = default_memory_pool();
   }
 
  protected:
@@ -75,10 +71,10 @@ TEST_F(TestArray, TestIsNull) {
     if (x > 0) ++null_count;
   }
 
-  std::shared_ptr<Buffer> null_buf = bytes_to_null_buffer(nulls.data(),
+  std::shared_ptr<Buffer> null_buf = test::bytes_to_null_buffer(nulls.data(),
       nulls.size());
   std::unique_ptr<Array> arr;
-  arr.reset(new Array(int32, nulls.size(), null_count, null_buf));
+  arr.reset(new Int32Array(nulls.size(), nullptr, null_count, null_buf));
 
   ASSERT_EQ(null_count, arr->null_count());
   ASSERT_EQ(5, null_buf->size());

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index ee4ef66..5a5bc10 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -28,11 +28,6 @@ namespace arrow {
 
 Array::Array(const TypePtr& type, int32_t length, int32_t null_count,
     const std::shared_ptr<Buffer>& nulls) {
-  Init(type, length, null_count, nulls);
-}
-
-void Array::Init(const TypePtr& type, int32_t length, int32_t null_count,
-    const std::shared_ptr<Buffer>& nulls) {
   type_ = type;
   length_ = length;
   null_count_ = null_count;
@@ -42,4 +37,25 @@ void Array::Init(const TypePtr& type, int32_t length, int32_t null_count,
   }
 }
 
+bool Array::EqualsExact(const Array& other) const {
+  if (this == &other) return true;
+  if (length_ != other.length_ || null_count_ != other.null_count_ ||
+      type_enum() != other.type_enum()) {
+    return false;
+  }
+  if (null_count_ > 0) {
+    return nulls_->Equals(*other.nulls_, util::bytes_for_bits(length_));
+  } else {
+    return true;
+  }
+}
+
+bool NullArray::Equals(const std::shared_ptr<Array>& arr) const {
+  if (this == arr.get()) return true;
+  if (Type::NA != arr->type_enum()) {
+    return false;
+  }
+  return arr->length() == length_;
+}
+
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/array.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 85e853e..65fc0aa 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -40,20 +40,11 @@ class Buffer;
 // explicitly increment its reference count
 class Array {
  public:
-  Array() :
-      null_count_(0),
-      length_(0),
-      nulls_(nullptr),
-      null_bits_(nullptr) {}
-
   Array(const TypePtr& type, int32_t length, int32_t null_count = 0,
       const std::shared_ptr<Buffer>& nulls = nullptr);
 
   virtual ~Array() {}
 
-  void Init(const TypePtr& type, int32_t length, int32_t null_count,
-      const std::shared_ptr<Buffer>& nulls);
-
   // Determine if a slot is null. For inner loops. Does *not* boundscheck
   bool IsNull(int i) const {
     return null_count_ > 0 && util::get_bit(null_bits_, i);
@@ -63,12 +54,15 @@ class Array {
   int32_t null_count() const { return null_count_;}
 
   const std::shared_ptr<DataType>& type() const { return type_;}
-  LogicalType::type logical_type() const { return type_->type;}
+  Type::type type_enum() const { return type_->type;}
 
   const std::shared_ptr<Buffer>& nulls() const {
     return nulls_;
   }
 
+  bool EqualsExact(const Array& arr) const;
+  virtual bool Equals(const std::shared_ptr<Array>& arr) const = 0;
+
  protected:
   TypePtr type_;
   int32_t null_count_;
@@ -78,9 +72,22 @@ class Array {
   const uint8_t* null_bits_;
 
  private:
+  Array() {}
   DISALLOW_COPY_AND_ASSIGN(Array);
 };
 
+// Degenerate null type Array
+class NullArray : public Array {
+ public:
+  NullArray(const std::shared_ptr<DataType>& type, int32_t length) :
+      Array(type, length, length, nullptr) {}
+
+  explicit NullArray(int32_t length) :
+      NullArray(std::make_shared<NullType>(), length) {}
+
+  bool Equals(const std::shared_ptr<Array>& arr) const override;
+};
+
 typedef std::shared_ptr<Array> ArrayPtr;
 
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 8cc689c..d5d1fdf 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -99,7 +99,7 @@ class ArrayBuilder {
   int32_t capacity_;
 
   // Child value array builders. These are owned by this class
-  std::vector<std::unique_ptr<ArrayBuilder> > children_;
+  std::vector<std::unique_ptr<ArrayBuilder>> children_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/column-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column-benchmark.cc b/cpp/src/arrow/column-benchmark.cc
new file mode 100644
index 0000000..69ee52c
--- /dev/null
+++ b/cpp/src/arrow/column-benchmark.cc
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/test-util.h"
+#include "arrow/types/primitive.h"
+#include "arrow/util/memory-pool.h"
+
+namespace arrow {
+namespace {
+  template <typename ArrayType>
+  std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
+    auto pool = default_memory_pool();
+    auto data = std::make_shared<PoolBuffer>(pool);
+    auto nulls = std::make_shared<PoolBuffer>(pool);
+    data->Resize(length * sizeof(typename ArrayType::value_type));
+    nulls->Resize(util::bytes_for_bits(length));
+    return std::make_shared<ArrayType>(length, data, 10, nulls);
+  }
+}  // anonymous namespace
+
+
+static void BM_BuildInt32ColumnByChunk(benchmark::State& state) { //NOLINT non-const reference
+  ArrayVector arrays;
+  for (int chunk_n = 0; chunk_n < state.range_x(); ++chunk_n) {
+    arrays.push_back(MakePrimitive<Int32Array>(100, 10));
+  }
+  const auto INT32 = std::make_shared<Int32Type>();
+  const auto field = std::make_shared<Field>("c0", INT32);
+  std::unique_ptr<Column> column;
+  while (state.KeepRunning()) {
+    column.reset(new Column(field, arrays));
+  }
+}
+
+BENCHMARK(BM_BuildInt32ColumnByChunk)->Range(5, 50000);
+
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/column-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column-test.cc b/cpp/src/arrow/column-test.cc
new file mode 100644
index 0000000..0630785
--- /dev/null
+++ b/cpp/src/arrow/column-test.cc
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/types/primitive.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+
+const auto INT32 = std::make_shared<Int32Type>();
+
+class TestColumn : public TestBase {
+ protected:
+  std::shared_ptr<ChunkedArray> data_;
+  std::unique_ptr<Column> column_;
+};
+
+TEST_F(TestColumn, BasicAPI) {
+  ArrayVector arrays;
+  arrays.push_back(MakePrimitive<Int32Array>(100));
+  arrays.push_back(MakePrimitive<Int32Array>(100, 10));
+  arrays.push_back(MakePrimitive<Int32Array>(100, 20));
+
+  auto field = std::make_shared<Field>("c0", INT32);
+  column_.reset(new Column(field, arrays));
+
+  ASSERT_EQ("c0", column_->name());
+  ASSERT_TRUE(column_->type()->Equals(INT32));
+  ASSERT_EQ(300, column_->length());
+  ASSERT_EQ(30, column_->null_count());
+  ASSERT_EQ(3, column_->data()->num_chunks());
+}
+
+TEST_F(TestColumn, ChunksInhomogeneous) {
+  ArrayVector arrays;
+  arrays.push_back(MakePrimitive<Int32Array>(100));
+  arrays.push_back(MakePrimitive<Int32Array>(100, 10));
+
+  auto field = std::make_shared<Field>("c0", INT32);
+  column_.reset(new Column(field, arrays));
+
+  ASSERT_OK(column_->ValidateData());
+
+  arrays.push_back(MakePrimitive<Int16Array>(100, 10));
+  column_.reset(new Column(field, arrays));
+  ASSERT_RAISES(Invalid, column_->ValidateData());
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/column.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column.cc b/cpp/src/arrow/column.cc
new file mode 100644
index 0000000..46acf8d
--- /dev/null
+++ b/cpp/src/arrow/column.cc
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/column.h"
+
+#include <memory>
+#include <sstream>
+
+#include "arrow/array.h"
+#include "arrow/type.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+ChunkedArray::ChunkedArray(const ArrayVector& chunks) :
+    chunks_(chunks) {
+  length_ = 0;
+  null_count_ = 0;
+  for (const std::shared_ptr<Array>& chunk : chunks) {
+    length_ += chunk->length();
+    null_count_ += chunk->null_count();
+  }
+}
+
+Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks) :
+    field_(field) {
+  data_ = std::make_shared<ChunkedArray>(chunks);
+}
+
+Column::Column(const std::shared_ptr<Field>& field,
+    const std::shared_ptr<Array>& data) :
+    field_(field) {
+  data_ = std::make_shared<ChunkedArray>(ArrayVector({data}));
+}
+
+Column::Column(const std::shared_ptr<Field>& field,
+    const std::shared_ptr<ChunkedArray>& data) :
+    field_(field),
+    data_(data) {}
+
+Status Column::ValidateData() {
+  for (int i = 0; i < data_->num_chunks(); ++i) {
+    const std::shared_ptr<DataType>& type = data_->chunk(i)->type();
+    if (!this->type()->Equals(type)) {
+      std::stringstream ss;
+      ss << "In chunk " << i << " expected type "
+         << this->type()->ToString()
+         << " but saw "
+         << type->ToString();
+      return Status::Invalid(ss.str());
+    }
+  }
+  return Status::OK();
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/column.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column.h b/cpp/src/arrow/column.h
new file mode 100644
index 0000000..1ad97b2
--- /dev/null
+++ b/cpp/src/arrow/column.h
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_COLUMN_H
+#define ARROW_COLUMN_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/type.h"
+
+namespace arrow {
+
+class Array;
+class Status;
+
+typedef std::vector<std::shared_ptr<Array>> ArrayVector;
+
+// A data structure managing a list of primitive Arrow arrays logically as one
+// large array
+class ChunkedArray {
+ public:
+  explicit ChunkedArray(const ArrayVector& chunks);
+
+  // @returns: the total length of the chunked array; computed on construction
+  int64_t length() const {
+    return length_;
+  }
+
+  int64_t null_count() const {
+    return null_count_;
+  }
+
+  int num_chunks() const {
+    return chunks_.size();
+  }
+
+  const std::shared_ptr<Array>& chunk(int i) const {
+    return chunks_[i];
+  }
+
+ protected:
+  ArrayVector chunks_;
+  int64_t length_;
+  int64_t null_count_;
+};
+
+// An immutable column data structure consisting of a field (type metadata) and
+// a logical chunked data array (which can be validated as all being the same
+// type).
+class Column {
+ public:
+  Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks);
+  Column(const std::shared_ptr<Field>& field,
+      const std::shared_ptr<ChunkedArray>& data);
+
+  Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array>& data);
+
+  int64_t length() const {
+    return data_->length();
+  }
+
+  int64_t null_count() const {
+    return data_->null_count();
+  }
+
+  // @returns: the column's name in the passed metadata
+  const std::string& name() const {
+    return field_->name;
+  }
+
+  // @returns: the column's type according to the metadata
+  const std::shared_ptr<DataType>& type() const {
+    return field_->type;
+  }
+
+  // @returns: the column's data as a chunked logical array
+  const std::shared_ptr<ChunkedArray>& data() const {
+    return data_;
+  }
+  // Verify that the column's array data is consistent with the passed field's
+  // metadata
+  Status ValidateData();
+
+ protected:
+  std::shared_ptr<Field> field_;
+  std::shared_ptr<ChunkedArray> data_;
+};
+
+} // namespace arrow
+
+#endif  // ARROW_COLUMN_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/.gitignore
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/.gitignore b/cpp/src/arrow/ipc/.gitignore
new file mode 100644
index 0000000..8150d7e
--- /dev/null
+++ b/cpp/src/arrow/ipc/.gitignore
@@ -0,0 +1 @@
+*_generated.h
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
new file mode 100644
index 0000000..383684f
--- /dev/null
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#######################################
+# arrow_ipc
+#######################################
+
+# Headers: top level
+install(FILES
+  adapter.h
+  metadata.h
+  memory.h
+  DESTINATION include/arrow/ipc)
+
+ADD_ARROW_TEST(ipc-adapter-test)
+ADD_ARROW_TEST(ipc-memory-test)
+ADD_ARROW_TEST(ipc-metadata-test)
+
+# make clean will delete the generated file
+set_source_files_properties(Metadata_generated.h PROPERTIES GENERATED TRUE)
+
+set(OUTPUT_DIR ${CMAKE_SOURCE_DIR}/src/arrow/ipc)
+set(FBS_OUTPUT_FILES "${OUTPUT_DIR}/Message_generated.h")
+
+set(FBS_SRC ${CMAKE_SOURCE_DIR}/../format/Message.fbs)
+get_filename_component(ABS_FBS_SRC ${FBS_SRC} ABSOLUTE)
+
+add_custom_command(
+  OUTPUT ${FBS_OUTPUT_FILES}
+  COMMAND ${FLATBUFFERS_COMPILER} -c -o ${OUTPUT_DIR} ${ABS_FBS_SRC}
+  DEPENDS ${ABS_FBS_SRC}
+  COMMENT "Running flatc compiler on ${FBS_SRC}"
+  VERBATIM
+)
+
+add_custom_target(metadata_fbs DEPENDS ${FBS_OUTPUT_FILES})
+add_dependencies(arrow metadata_fbs)

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/adapter.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc
new file mode 100644
index 0000000..7cdb965
--- /dev/null
+++ b/cpp/src/arrow/ipc/adapter.cc
@@ -0,0 +1,305 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/adapter.h"
+
+#include <cstdint>
+#include <cstring>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/ipc/memory.h"
+#include "arrow/ipc/Message_generated.h"
+#include "arrow/ipc/metadata.h"
+#include "arrow/ipc/metadata-internal.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/types/construct.h"
+#include "arrow/types/primitive.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+namespace flatbuf = apache::arrow::flatbuf;
+
+namespace ipc {
+
+static bool IsPrimitive(const DataType* type) {
+  switch (type->type) {
+    // NA is null type or "no type", considered primitive for now
+    case Type::NA:
+    case Type::BOOL:
+    case Type::UINT8:
+    case Type::INT8:
+    case Type::UINT16:
+    case Type::INT16:
+    case Type::UINT32:
+    case Type::INT32:
+    case Type::UINT64:
+    case Type::INT64:
+    case Type::FLOAT:
+    case Type::DOUBLE:
+      return true;
+    default:
+      return false;
+  }
+}
+
+// ----------------------------------------------------------------------
+// Row batch write path
+
+Status VisitArray(const Array* arr, std::vector<flatbuf::FieldNode>* field_nodes,
+    std::vector<std::shared_ptr<Buffer>>* buffers) {
+  if (IsPrimitive(arr->type().get())) {
+    const PrimitiveArray* prim_arr = static_cast<const PrimitiveArray*>(arr);
+
+    field_nodes->push_back(
+        flatbuf::FieldNode(prim_arr->length(), prim_arr->null_count()));
+
+    if (prim_arr->null_count() > 0) {
+      buffers->push_back(prim_arr->nulls());
+    } else {
+      // Push a dummy zero-length buffer, not to be copied
+      buffers->push_back(std::make_shared<Buffer>(nullptr, 0));
+    }
+    buffers->push_back(prim_arr->data());
+  } else if (arr->type_enum() == Type::LIST) {
+    // TODO(wesm)
+    return Status::NotImplemented("List type");
+  } else if (arr->type_enum() == Type::STRUCT) {
+    // TODO(wesm)
+    return Status::NotImplemented("Struct type");
+  }
+
+  return Status::OK();
+}
+
+class RowBatchWriter {
+ public:
+  explicit RowBatchWriter(const RowBatch* batch) :
+      batch_(batch) {}
+
+  Status AssemblePayload() {
+    // Perform depth-first traversal of the row-batch
+    for (int i = 0; i < batch_->num_columns(); ++i) {
+      const Array* arr = batch_->column(i).get();
+      RETURN_NOT_OK(VisitArray(arr, &field_nodes_, &buffers_));
+    }
+    return Status::OK();
+  }
+
+  Status Write(MemorySource* dst, int64_t position, int64_t* data_header_offset) {
+    // Write out all the buffers contiguously and compute the total size of the
+    // memory payload
+    int64_t offset = 0;
+    for (size_t i = 0; i < buffers_.size(); ++i) {
+      const Buffer* buffer = buffers_[i].get();
+      int64_t size = buffer->size();
+
+      // TODO(wesm): We currently have no notion of shared memory page id's,
+      // but we've included it in the metadata IDL for when we have it in the
+      // future. Use page=0 for now
+      //
+      // Note that page ids are a bespoke notion for Arrow and not a feature we
+      // are using from any OS-level shared memory. The thought is that systems
+      // may (in the future) associate integer page id's with physical memory
+      // pages (according to whatever is the desired shared memory mechanism)
+      buffer_meta_.push_back(flatbuf::Buffer(0, position + offset, size));
+
+      if (size > 0) {
+        RETURN_NOT_OK(dst->Write(position + offset, buffer->data(), size));
+        offset += size;
+      }
+    }
+
+    // Now that we have computed the locations of all of the buffers in shared
+    // memory, the data header can be converted to a flatbuffer and written out
+    //
+    // Note: The memory written here is prefixed by the size of the flatbuffer
+    // itself as an int32_t. On reading from a MemorySource, you will have to
+    // determine the data header size then request a buffer such that you can
+    // construct the flatbuffer data accessor object (see arrow::ipc::Message)
+    std::shared_ptr<Buffer> data_header;
+    RETURN_NOT_OK(WriteDataHeader(batch_->num_rows(), offset,
+            field_nodes_, buffer_meta_, &data_header));
+
+    // Write the data header at the end
+    RETURN_NOT_OK(dst->Write(position + offset, data_header->data(),
+            data_header->size()));
+
+    *data_header_offset = position + offset;
+    return Status::OK();
+  }
+
+  // This must be called after invoking AssemblePayload
+  int64_t DataHeaderSize() {
+    // TODO(wesm): In case it is needed, compute the upper bound for the size
+    // of the buffer containing the flatbuffer data header.
+    return 0;
+  }
+
+  // Total footprint of buffers. This must be called after invoking
+  // AssemblePayload
+  int64_t TotalBytes() {
+    int64_t total = 0;
+    for (const std::shared_ptr<Buffer>& buffer : buffers_) {
+      total += buffer->size();
+    }
+    return total;
+  }
+
+ private:
+  const RowBatch* batch_;
+
+  std::vector<flatbuf::FieldNode> field_nodes_;
+  std::vector<flatbuf::Buffer> buffer_meta_;
+  std::vector<std::shared_ptr<Buffer>> buffers_;
+};
+
+Status WriteRowBatch(MemorySource* dst, const RowBatch* batch, int64_t position,
+    int64_t* header_offset) {
+  RowBatchWriter serializer(batch);
+  RETURN_NOT_OK(serializer.AssemblePayload());
+  return serializer.Write(dst, position, header_offset);
+}
+// ----------------------------------------------------------------------
+// Row batch read path
+
+static constexpr int64_t INIT_METADATA_SIZE = 4096;
+
+class RowBatchReader::Impl {
+ public:
+  Impl(MemorySource* source, const std::shared_ptr<RecordBatchMessage>& metadata) :
+      source_(source),
+      metadata_(metadata) {
+    num_buffers_ = metadata->num_buffers();
+    num_flattened_fields_ = metadata->num_fields();
+  }
+
+  Status AssembleBatch(const std::shared_ptr<Schema>& schema,
+      std::shared_ptr<RowBatch>* out) {
+    std::vector<std::shared_ptr<Array>> arrays(schema->num_fields());
+
+    // The field_index and buffer_index are incremented in NextArray based on
+    // how much of the batch is "consumed" (through nested data reconstruction,
+    // for example)
+    field_index_ = 0;
+    buffer_index_ = 0;
+    for (int i = 0; i < schema->num_fields(); ++i) {
+      const Field* field = schema->field(i).get();
+      RETURN_NOT_OK(NextArray(field, &arrays[i]));
+    }
+
+    *out = std::make_shared<RowBatch>(schema, metadata_->length(),
+        arrays);
+    return Status::OK();
+  }
+
+ private:
+  // Traverse the flattened record batch metadata and reassemble the
+  // corresponding array containers
+  Status NextArray(const Field* field, std::shared_ptr<Array>* out) {
+    const std::shared_ptr<DataType>& type = field->type;
+
+    // pop off a field
+    if (field_index_ >= num_flattened_fields_) {
+      return Status::Invalid("Ran out of field metadata, likely malformed");
+    }
+
+    // This only contains the length and null count, which we need to figure
+    // out what to do with the buffers. For example, if null_count == 0, then
+    // we can skip that buffer without reading from shared memory
+    FieldMetadata field_meta = metadata_->field(field_index_++);
+
+    if (IsPrimitive(type.get())) {
+      std::shared_ptr<Buffer> nulls;
+      std::shared_ptr<Buffer> data;
+      if (field_meta.null_count == 0) {
+        nulls = nullptr;
+        ++buffer_index_;
+      } else {
+        RETURN_NOT_OK(GetBuffer(buffer_index_++, &nulls));
+      }
+      if (field_meta.length > 0) {
+        RETURN_NOT_OK(GetBuffer(buffer_index_++, &data));
+      } else {
+        data.reset(new Buffer(nullptr, 0));
+      }
+      return MakePrimitiveArray(type, field_meta.length, data,
+          field_meta.null_count, nulls, out);
+    } else {
+      return Status::NotImplemented("Non-primitive types not complete yet");
+    }
+  }
+
+  Status GetBuffer(int buffer_index, std::shared_ptr<Buffer>* out) {
+    BufferMetadata metadata = metadata_->buffer(buffer_index);
+    return source_->ReadAt(metadata.offset, metadata.length, out);
+  }
+
+  MemorySource* source_;
+  std::shared_ptr<RecordBatchMessage> metadata_;
+
+  int field_index_;
+  int buffer_index_;
+  int num_buffers_;
+  int num_flattened_fields_;
+};
+
+Status RowBatchReader::Open(MemorySource* source, int64_t position,
+    std::shared_ptr<RowBatchReader>* out) {
+  std::shared_ptr<Buffer> metadata;
+  RETURN_NOT_OK(source->ReadAt(position, INIT_METADATA_SIZE, &metadata));
+
+  int32_t metadata_size = *reinterpret_cast<const int32_t*>(metadata->data());
+
+  // We may not need to call source->ReadAt again
+  if (metadata_size > static_cast<int>(INIT_METADATA_SIZE - sizeof(int32_t))) {
+    // We don't have enough data, read the indicated metadata size.
+    RETURN_NOT_OK(source->ReadAt(position + sizeof(int32_t),
+            metadata_size, &metadata));
+  }
+
+  // TODO(wesm): buffer slicing here would be better in case ReadAt returns
+  // allocated memory
+
+  std::shared_ptr<Message> message;
+  RETURN_NOT_OK(Message::Open(metadata, &message));
+
+  if (message->type() != Message::RECORD_BATCH) {
+    return Status::Invalid("Metadata message is not a record batch");
+  }
+
+  std::shared_ptr<RecordBatchMessage> batch_meta = message->GetRecordBatch();
+
+  std::shared_ptr<RowBatchReader> result(new RowBatchReader());
+  result->impl_.reset(new Impl(source, batch_meta));
+  *out = result;
+
+  return Status::OK();
+}
+
+Status RowBatchReader::GetRowBatch(const std::shared_ptr<Schema>& schema,
+    std::shared_ptr<RowBatch>* out) {
+  return impl_->AssembleBatch(schema, out);
+}
+
+
+} // namespace ipc
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/adapter.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/adapter.h b/cpp/src/arrow/ipc/adapter.h
new file mode 100644
index 0000000..26dea6d
--- /dev/null
+++ b/cpp/src/arrow/ipc/adapter.h
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Public API for writing and accessing (with zero copy, if possible) Arrow
+// data in shared memory
+
+#ifndef ARROW_IPC_ADAPTER_H
+#define ARROW_IPC_ADAPTER_H
+
+#include <cstdint>
+#include <memory>
+
+namespace arrow {
+
+class Array;
+class RowBatch;
+class Schema;
+class Status;
+
+namespace ipc {
+
+class MemorySource;
+class RecordBatchMessage;
+
+// ----------------------------------------------------------------------
+// Write path
+
+// Write the RowBatch (collection of equal-length Arrow arrays) to the memory
+// source at the indicated position
+//
+// First, each of the memory buffers are written out end-to-end in starting at
+// the indicated position.
+//
+// Then, this function writes the batch metadata as a flatbuffer (see
+// format/Message.fbs -- the RecordBatch message type) like so:
+//
+// <int32: metadata size> <uint8*: metadata>
+//
+// Finally, the memory offset to the start of the metadata / data header is
+// returned in an out-variable
+Status WriteRowBatch(MemorySource* dst, const RowBatch* batch, int64_t position,
+    int64_t* header_offset);
+
+// int64_t GetRowBatchMetadata(const RowBatch* batch);
+
+// Compute the precise number of bytes needed in a contiguous memory segment to
+// write the row batch. This involves generating the complete serialized
+// Flatbuffers metadata.
+int64_t GetRowBatchSize(const RowBatch* batch);
+
+// ----------------------------------------------------------------------
+// "Read" path; does not copy data if the MemorySource does not
+
+class RowBatchReader {
+ public:
+  static Status Open(MemorySource* source, int64_t position,
+      std::shared_ptr<RowBatchReader>* out);
+
+  // Reassemble the row batch. A Schema is required to be able to construct the
+  // right array containers
+  Status GetRowBatch(const std::shared_ptr<Schema>& schema,
+      std::shared_ptr<RowBatch>* out);
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_MEMORY_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/ipc-adapter-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-adapter-test.cc b/cpp/src/arrow/ipc/ipc-adapter-test.cc
new file mode 100644
index 0000000..d75998f
--- /dev/null
+++ b/cpp/src/arrow/ipc/ipc-adapter-test.cc
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/ipc/adapter.h"
+#include "arrow/ipc/memory.h"
+#include "arrow/ipc/test-common.h"
+
+#include "arrow/test-util.h"
+#include "arrow/types/primitive.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/memory-pool.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+namespace ipc {
+
+class TestWriteRowBatch : public ::testing::Test, public MemoryMapFixture {
+ public:
+  void SetUp() {
+    pool_ = default_memory_pool();
+  }
+  void TearDown() {
+    MemoryMapFixture::TearDown();
+  }
+
+  void InitMemoryMap(int64_t size) {
+    std::string path = "test-write-row-batch";
+    MemoryMapFixture::CreateFile(path, size);
+    ASSERT_OK(MemoryMappedSource::Open(path, MemorySource::READ_WRITE, &mmap_));
+  }
+
+ protected:
+  MemoryPool* pool_;
+  std::shared_ptr<MemoryMappedSource> mmap_;
+};
+
+const auto INT32 = std::make_shared<Int32Type>();
+
+TEST_F(TestWriteRowBatch, IntegerRoundTrip) {
+  const int length = 1000;
+
+  // Make the schema
+  auto f0 = std::make_shared<Field>("f0", INT32);
+  auto f1 = std::make_shared<Field>("f1", INT32);
+  std::shared_ptr<Schema> schema(new Schema({f0, f1}));
+
+  // Example data
+
+  auto data = std::make_shared<PoolBuffer>(pool_);
+  ASSERT_OK(data->Resize(length * sizeof(int32_t)));
+  test::rand_uniform_int(length, 0, 0, std::numeric_limits<int32_t>::max(),
+      reinterpret_cast<int32_t*>(data->mutable_data()));
+
+  auto nulls = std::make_shared<PoolBuffer>(pool_);
+  int null_bytes = util::bytes_for_bits(length);
+  ASSERT_OK(nulls->Resize(null_bytes));
+  test::random_bytes(null_bytes, 0, nulls->mutable_data());
+
+  auto a0 = std::make_shared<Int32Array>(length, data);
+  auto a1 = std::make_shared<Int32Array>(length, data,
+      test::bitmap_popcount(nulls->data(), length), nulls);
+
+  RowBatch batch(schema, length, {a0, a1});
+
+  // TODO(wesm): computing memory requirements for a row batch
+  // 64k is plenty of space
+  InitMemoryMap(1 << 16);
+
+  int64_t header_location;
+  ASSERT_OK(WriteRowBatch(mmap_.get(), &batch, 0, &header_location));
+
+  std::shared_ptr<RowBatchReader> result;
+  ASSERT_OK(RowBatchReader::Open(mmap_.get(), header_location, &result));
+
+  std::shared_ptr<RowBatch> batch_result;
+  ASSERT_OK(result->GetRowBatch(schema, &batch_result));
+  EXPECT_EQ(batch.num_rows(), batch_result->num_rows());
+
+  for (int i = 0; i < batch.num_columns(); ++i) {
+    EXPECT_TRUE(batch.column(i)->Equals(batch_result->column(i)))
+      << i << batch.column_name(i);
+  }
+}
+
+} // namespace ipc
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/ipc-memory-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-memory-test.cc b/cpp/src/arrow/ipc/ipc-memory-test.cc
new file mode 100644
index 0000000..332ad2a
--- /dev/null
+++ b/cpp/src/arrow/ipc/ipc-memory-test.cc
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/ipc/memory.h"
+#include "arrow/ipc/test-common.h"
+#include "arrow/test-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+namespace ipc {
+
+class TestMemoryMappedSource : public ::testing::Test, public MemoryMapFixture {
+ public:
+  void TearDown() {
+    MemoryMapFixture::TearDown();
+  }
+};
+
+TEST_F(TestMemoryMappedSource, InvalidUsages) {
+}
+
+TEST_F(TestMemoryMappedSource, WriteRead) {
+  const int64_t buffer_size = 1024;
+  std::vector<uint8_t> buffer(buffer_size);
+
+  test::random_bytes(1024, 0, buffer.data());
+
+  const int reps = 5;
+
+  std::string path = "ipc-write-read-test";
+  CreateFile(path, reps * buffer_size);
+
+  std::shared_ptr<MemoryMappedSource> result;
+  ASSERT_OK(MemoryMappedSource::Open(path, MemorySource::READ_WRITE, &result));
+
+  int64_t position = 0;
+
+  std::shared_ptr<Buffer> out_buffer;
+  for (int i = 0; i < reps; ++i) {
+    ASSERT_OK(result->Write(position, buffer.data(), buffer_size));
+    ASSERT_OK(result->ReadAt(position, buffer_size, &out_buffer));
+
+    ASSERT_EQ(0, memcmp(out_buffer->data(), buffer.data(), buffer_size));
+
+    position += buffer_size;
+  }
+}
+
+TEST_F(TestMemoryMappedSource, InvalidFile) {
+  std::string non_existent_path = "invalid-file-name-asfd";
+
+  std::shared_ptr<MemoryMappedSource> result;
+  ASSERT_RAISES(IOError, MemoryMappedSource::Open(non_existent_path,
+          MemorySource::READ_ONLY, &result));
+}
+
+} // namespace ipc
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/ipc-metadata-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-metadata-test.cc b/cpp/src/arrow/ipc/ipc-metadata-test.cc
new file mode 100644
index 0000000..ceabec0
--- /dev/null
+++ b/cpp/src/arrow/ipc/ipc-metadata-test.cc
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "gtest/gtest.h"
+
+#include "arrow/ipc/metadata.h"
+#include "arrow/schema.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+class Buffer;
+
+static inline void assert_schema_equal(const Schema* lhs, const Schema* rhs) {
+  if (!lhs->Equals(*rhs)) {
+    std::stringstream ss;
+    ss << "left schema: " << lhs->ToString() << std::endl
+       << "right schema: " << rhs->ToString() << std::endl;
+    FAIL() << ss.str();
+  }
+}
+
+class TestSchemaMessage : public ::testing::Test {
+ public:
+  void SetUp() {}
+
+  void CheckRoundtrip(const Schema* schema) {
+    std::shared_ptr<Buffer> buffer;
+    ASSERT_OK(ipc::WriteSchema(schema, &buffer));
+
+    std::shared_ptr<ipc::Message> message;
+    ASSERT_OK(ipc::Message::Open(buffer, &message));
+
+    ASSERT_EQ(ipc::Message::SCHEMA, message->type());
+
+    std::shared_ptr<ipc::SchemaMessage> schema_msg = message->GetSchema();
+    ASSERT_EQ(schema->num_fields(), schema_msg->num_fields());
+
+    std::shared_ptr<Schema> schema2;
+    ASSERT_OK(schema_msg->GetSchema(&schema2));
+
+    assert_schema_equal(schema, schema2.get());
+  }
+};
+
+const std::shared_ptr<DataType> INT32 = std::make_shared<Int32Type>();
+
+TEST_F(TestSchemaMessage, PrimitiveFields) {
+  auto f0 = std::make_shared<Field>("f0", std::make_shared<Int8Type>());
+  auto f1 = std::make_shared<Field>("f1", std::make_shared<Int16Type>());
+  auto f2 = std::make_shared<Field>("f2", std::make_shared<Int32Type>());
+  auto f3 = std::make_shared<Field>("f3", std::make_shared<Int64Type>());
+  auto f4 = std::make_shared<Field>("f4", std::make_shared<UInt8Type>());
+  auto f5 = std::make_shared<Field>("f5", std::make_shared<UInt16Type>());
+  auto f6 = std::make_shared<Field>("f6", std::make_shared<UInt32Type>());
+  auto f7 = std::make_shared<Field>("f7", std::make_shared<UInt64Type>());
+  auto f8 = std::make_shared<Field>("f8", std::make_shared<FloatType>());
+  auto f9 = std::make_shared<Field>("f9", std::make_shared<DoubleType>());
+  auto f10 = std::make_shared<Field>("f10", std::make_shared<BooleanType>());
+
+  Schema schema({f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10});
+  CheckRoundtrip(&schema);
+}
+
+TEST_F(TestSchemaMessage, NestedFields) {
+  auto type = std::make_shared<ListType>(std::make_shared<Int32Type>());
+  auto f0 = std::make_shared<Field>("f0", type);
+
+  std::shared_ptr<StructType> type2(new StructType({
+            std::make_shared<Field>("k1", INT32),
+              std::make_shared<Field>("k2", INT32),
+              std::make_shared<Field>("k3", INT32)}));
+  auto f1 = std::make_shared<Field>("f1", type2);
+
+  Schema schema({f0, f1});
+  CheckRoundtrip(&schema);
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/memory.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/memory.cc b/cpp/src/arrow/ipc/memory.cc
new file mode 100644
index 0000000..e630ccd
--- /dev/null
+++ b/cpp/src/arrow/ipc/memory.cc
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/memory.h"
+
+#include <sys/mman.h> // For memory-mapping
+#include <algorithm>
+#include <cerrno>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <sstream>
+#include <string>
+
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+namespace ipc {
+
+MemorySource::MemorySource(AccessMode access_mode) :
+    access_mode_(access_mode) {}
+
+MemorySource::~MemorySource() {}
+
+// Implement MemoryMappedSource
+
+class MemoryMappedSource::Impl {
+ public:
+  Impl() :
+      file_(nullptr),
+      is_open_(false),
+      data_(nullptr) {}
+
+  ~Impl() {
+    if (is_open_) {
+      munmap(data_, size_);
+      fclose(file_);
+    }
+  }
+
+  Status Open(const std::string& path, MemorySource::AccessMode mode) {
+    if (is_open_) {
+      return Status::IOError("A file is already open");
+    }
+
+    path_ = path;
+
+    if (mode == MemorySource::READ_WRITE) {
+      file_ = fopen(path.c_str(), "r+b");
+    } else {
+      file_ = fopen(path.c_str(), "rb");
+    }
+    if (file_ == nullptr) {
+      std::stringstream ss;
+      ss << "Unable to open file, errno: " << errno;
+      return Status::IOError(ss.str());
+    }
+
+    fseek(file_, 0L, SEEK_END);
+    if (ferror(file_)) {
+      return Status::IOError("Unable to seek to end of file");
+    }
+    size_ = ftell(file_);
+
+    fseek(file_, 0L, SEEK_SET);
+    is_open_ = true;
+
+    // TODO(wesm): Add read-only version of this
+    data_ = reinterpret_cast<uint8_t*>(mmap(nullptr, size_,
+            PROT_READ | PROT_WRITE,
+            MAP_SHARED, fileno(file_), 0));
+    if (data_ == nullptr) {
+      std::stringstream ss;
+      ss << "Memory mapping file failed, errno: " << errno;
+      return Status::IOError(ss.str());
+    }
+
+    return Status::OK();
+  }
+
+  int64_t size() const {
+    return size_;
+  }
+
+  uint8_t* data() {
+    return data_;
+  }
+
+ private:
+  std::string path_;
+  FILE* file_;
+  int64_t size_;
+  bool is_open_;
+
+  // The memory map
+  uint8_t* data_;
+};
+
+MemoryMappedSource::MemoryMappedSource(AccessMode access_mode) :
+    MemorySource(access_mode) {}
+
+Status MemoryMappedSource::Open(const std::string& path, AccessMode access_mode,
+    std::shared_ptr<MemoryMappedSource>* out) {
+  std::shared_ptr<MemoryMappedSource> result(new MemoryMappedSource(access_mode));
+
+  result->impl_.reset(new Impl());
+  RETURN_NOT_OK(result->impl_->Open(path, access_mode));
+
+  *out = result;
+  return Status::OK();
+}
+
+int64_t MemoryMappedSource::Size() const {
+  return impl_->size();
+}
+
+Status MemoryMappedSource::Close() {
+  // munmap handled in ::Impl dtor
+  return Status::OK();
+}
+
+Status MemoryMappedSource::ReadAt(int64_t position, int64_t nbytes,
+    std::shared_ptr<Buffer>* out) {
+  if (position < 0 || position >= impl_->size()) {
+    return Status::Invalid("position is out of bounds");
+  }
+
+  nbytes = std::min(nbytes, impl_->size() - position);
+  *out = std::make_shared<Buffer>(impl_->data() + position, nbytes);
+  return Status::OK();
+}
+
+Status MemoryMappedSource::Write(int64_t position, const uint8_t* data,
+    int64_t nbytes) {
+  if (position < 0 || position >= impl_->size()) {
+    return Status::Invalid("position is out of bounds");
+  }
+
+  // TODO(wesm): verify we are not writing past the end of the buffer
+  uint8_t* dst = impl_->data() + position;
+  memcpy(dst, data, nbytes);
+
+  return Status::OK();
+}
+
+} // namespace ipc
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/memory.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/memory.h b/cpp/src/arrow/ipc/memory.h
new file mode 100644
index 0000000..0b4d834
--- /dev/null
+++ b/cpp/src/arrow/ipc/memory.h
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Public API for different interprocess memory sharing mechanisms
+
+#ifndef ARROW_IPC_MEMORY_H
+#define ARROW_IPC_MEMORY_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "arrow/util/macros.h"
+
+namespace arrow {
+
+class Buffer;
+class MutableBuffer;
+class Status;
+
+namespace ipc {
+
+// Abstract output stream
+class OutputStream {
+ public:
+  virtual ~OutputStream() {}
+  // Close the output stream
+  virtual Status Close() = 0;
+
+  // The current position in the output stream
+  virtual int64_t Tell() const = 0;
+
+  // Write bytes to the stream
+  virtual Status Write(const uint8_t* data, int64_t length) = 0;
+};
+
+// An output stream that writes to a MutableBuffer, such as one obtained from a
+// memory map
+class BufferOutputStream : public OutputStream {
+ public:
+  explicit BufferOutputStream(const std::shared_ptr<MutableBuffer>& buffer):
+      buffer_(buffer) {}
+
+  // Implement the OutputStream interface
+  Status Close() override;
+  int64_t Tell() const override;
+  Status Write(const uint8_t* data, int64_t length) override;
+
+  // Returns the number of bytes remaining in the buffer
+  int64_t bytes_remaining() const;
+
+ private:
+  std::shared_ptr<MutableBuffer> buffer_;
+  int64_t capacity_;
+  int64_t position_;
+};
+
+class MemorySource {
+ public:
+  // Indicates the access permissions of the memory source
+  enum AccessMode {
+    READ_ONLY,
+    READ_WRITE
+  };
+
+  virtual ~MemorySource();
+
+  // Retrieve a buffer of memory from the source of the indicates size and at
+  // the indicated location
+  // @returns: arrow::Status indicating success / failure. The buffer is set
+  // into the *out argument
+  virtual Status ReadAt(int64_t position, int64_t nbytes,
+      std::shared_ptr<Buffer>* out) = 0;
+
+  virtual Status Close() = 0;
+
+  virtual Status Write(int64_t position, const uint8_t* data, int64_t nbytes) = 0;
+
+  // @return: the size in bytes of the memory source
+  virtual int64_t Size() const = 0;
+
+ protected:
+  explicit MemorySource(AccessMode access_mode = AccessMode::READ_WRITE);
+
+  AccessMode access_mode_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(MemorySource);
+};
+
+// A memory source that uses memory-mapped files for memory interactions
+class MemoryMappedSource : public MemorySource {
+ public:
+  static Status Open(const std::string& path, AccessMode access_mode,
+      std::shared_ptr<MemoryMappedSource>* out);
+
+  Status Close() override;
+
+  Status ReadAt(int64_t position, int64_t nbytes,
+      std::shared_ptr<Buffer>* out) override;
+
+  Status Write(int64_t position, const uint8_t* data, int64_t nbytes) override;
+
+  // @return: the size in bytes of the memory source
+  int64_t Size() const override;
+
+ private:
+  explicit MemoryMappedSource(AccessMode access_mode);
+  // Hide the internal details of this class for now
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_MEMORY_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/metadata-internal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
new file mode 100644
index 0000000..14b1869
--- /dev/null
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -0,0 +1,317 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/metadata-internal.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "arrow/ipc/Message_generated.h"
+#include "arrow/schema.h"
+#include "arrow/type.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+typedef flatbuffers::FlatBufferBuilder FBB;
+typedef flatbuffers::Offset<arrow::flatbuf::Field> FieldOffset;
+typedef flatbuffers::Offset<void> Offset;
+
+namespace arrow {
+
+namespace flatbuf = apache::arrow::flatbuf;
+
+namespace ipc {
+
+const std::shared_ptr<DataType> BOOL = std::make_shared<BooleanType>();
+const std::shared_ptr<DataType> INT8 = std::make_shared<Int8Type>();
+const std::shared_ptr<DataType> INT16 = std::make_shared<Int16Type>();
+const std::shared_ptr<DataType> INT32 = std::make_shared<Int32Type>();
+const std::shared_ptr<DataType> INT64 = std::make_shared<Int64Type>();
+const std::shared_ptr<DataType> UINT8 = std::make_shared<UInt8Type>();
+const std::shared_ptr<DataType> UINT16 = std::make_shared<UInt16Type>();
+const std::shared_ptr<DataType> UINT32 = std::make_shared<UInt32Type>();
+const std::shared_ptr<DataType> UINT64 = std::make_shared<UInt64Type>();
+const std::shared_ptr<DataType> FLOAT = std::make_shared<FloatType>();
+const std::shared_ptr<DataType> DOUBLE = std::make_shared<DoubleType>();
+
+static Status IntFromFlatbuffer(const flatbuf::Int* int_data,
+    std::shared_ptr<DataType>* out) {
+  if (int_data->bitWidth() % 8 != 0) {
+    return Status::NotImplemented("Integers not in cstdint are not implemented");
+  } else if (int_data->bitWidth() > 64) {
+    return Status::NotImplemented("Integers with more than 64 bits not implemented");
+  }
+
+  switch (int_data->bitWidth()) {
+    case 8:
+      *out = int_data->is_signed() ? INT8 : UINT8;
+      break;
+    case 16:
+      *out = int_data->is_signed() ? INT16 : UINT16;
+      break;
+    case 32:
+      *out = int_data->is_signed() ? INT32 : UINT32;
+      break;
+    case 64:
+      *out = int_data->is_signed() ? INT64 : UINT64;
+      break;
+    default:
+      *out = nullptr;
+      break;
+  }
+  return Status::OK();
+}
+
+static Status FloatFromFlatuffer(const flatbuf::FloatingPoint* float_data,
+    std::shared_ptr<DataType>* out) {
+  if (float_data->precision() == flatbuf::Precision_SINGLE) {
+    *out = FLOAT;
+  } else {
+    *out = DOUBLE;
+  }
+  return Status::OK();
+}
+
+static Status TypeFromFlatbuffer(flatbuf::Type type,
+    const void* type_data, const std::vector<std::shared_ptr<Field>>& children,
+    std::shared_ptr<DataType>* out) {
+  switch (type) {
+    case flatbuf::Type_NONE:
+      return Status::Invalid("Type metadata cannot be none");
+    case flatbuf::Type_Int:
+      return IntFromFlatbuffer(static_cast<const flatbuf::Int*>(type_data), out);
+    case flatbuf::Type_Bit:
+      return Status::NotImplemented("Type is not implemented");
+    case flatbuf::Type_FloatingPoint:
+      return FloatFromFlatuffer(static_cast<const flatbuf::FloatingPoint*>(type_data),
+          out);
+    case flatbuf::Type_Binary:
+    case flatbuf::Type_Utf8:
+      return Status::NotImplemented("Type is not implemented");
+    case flatbuf::Type_Bool:
+      *out = BOOL;
+      return Status::OK();
+    case flatbuf::Type_Decimal:
+    case flatbuf::Type_Timestamp:
+    case flatbuf::Type_List:
+      if (children.size() != 1) {
+        return Status::Invalid("List must have exactly 1 child field");
+      }
+      *out = std::make_shared<ListType>(children[0]);
+      return Status::OK();
+    case flatbuf::Type_Tuple:
+      *out = std::make_shared<StructType>(children);
+      return Status::OK();
+    case flatbuf::Type_Union:
+      return Status::NotImplemented("Type is not implemented");
+    default:
+      return Status::Invalid("Unrecognized type");
+  }
+}
+
+// Forward declaration
+static Status FieldToFlatbuffer(FBB& fbb, const std::shared_ptr<Field>& field,
+    FieldOffset* offset);
+
+static Offset IntToFlatbuffer(FBB& fbb, int bitWidth,
+    bool is_signed) {
+  return flatbuf::CreateInt(fbb, bitWidth, is_signed).Union();
+}
+
+static Offset FloatToFlatbuffer(FBB& fbb,
+    flatbuf::Precision precision) {
+  return flatbuf::CreateFloatingPoint(fbb, precision).Union();
+}
+
+static Status ListToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
+    std::vector<FieldOffset>* out_children, Offset* offset) {
+  FieldOffset field;
+  RETURN_NOT_OK(FieldToFlatbuffer(fbb, type->child(0), &field));
+  out_children->push_back(field);
+  *offset = flatbuf::CreateList(fbb).Union();
+  return Status::OK();
+}
+
+static Status StructToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
+    std::vector<FieldOffset>* out_children, Offset* offset) {
+  FieldOffset field;
+  for (int i = 0; i < type->num_children(); ++i) {
+    RETURN_NOT_OK(FieldToFlatbuffer(fbb, type->child(i), &field));
+    out_children->push_back(field);
+  }
+  *offset = flatbuf::CreateTuple(fbb).Union();
+  return Status::OK();
+}
+
+#define INT_TO_FB_CASE(BIT_WIDTH, IS_SIGNED)            \
+  *out_type = flatbuf::Type_Int;                        \
+  *offset = IntToFlatbuffer(fbb, BIT_WIDTH, IS_SIGNED); \
+  break;
+
+
+static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type,
+    std::vector<FieldOffset>* children,
+    flatbuf::Type* out_type, Offset* offset) {
+  switch (type->type) {
+    case Type::BOOL:
+      *out_type = flatbuf::Type_Bool;
+      *offset = flatbuf::CreateBool(fbb).Union();
+      break;
+    case Type::UINT8:
+      INT_TO_FB_CASE(8, false);
+    case Type::INT8:
+      INT_TO_FB_CASE(8, true);
+    case Type::UINT16:
+      INT_TO_FB_CASE(16, false);
+    case Type::INT16:
+      INT_TO_FB_CASE(16, true);
+    case Type::UINT32:
+      INT_TO_FB_CASE(32, false);
+    case Type::INT32:
+      INT_TO_FB_CASE(32, true);
+    case Type::UINT64:
+      INT_TO_FB_CASE(64, false);
+    case Type::INT64:
+      INT_TO_FB_CASE(64, true);
+    case Type::FLOAT:
+      *out_type = flatbuf::Type_FloatingPoint;
+      *offset = FloatToFlatbuffer(fbb, flatbuf::Precision_SINGLE);
+      break;
+    case Type::DOUBLE:
+      *out_type = flatbuf::Type_FloatingPoint;
+      *offset = FloatToFlatbuffer(fbb, flatbuf::Precision_DOUBLE);
+      break;
+    case Type::LIST:
+      *out_type = flatbuf::Type_List;
+      return ListToFlatbuffer(fbb, type, children, offset);
+    case Type::STRUCT:
+      *out_type = flatbuf::Type_Tuple;
+      return StructToFlatbuffer(fbb, type, children, offset);
+    default:
+      std::stringstream ss;
+      ss << "Unable to convert type: " << type->ToString()
+         << std::endl;
+      return Status::NotImplemented(ss.str());
+  }
+  return Status::OK();
+}
+
+static Status FieldToFlatbuffer(FBB& fbb, const std::shared_ptr<Field>& field,
+    FieldOffset* offset) {
+  auto fb_name = fbb.CreateString(field->name);
+
+  flatbuf::Type type_enum;
+  Offset type_data;
+  std::vector<FieldOffset> children;
+
+  RETURN_NOT_OK(TypeToFlatbuffer(fbb, field->type, &children, &type_enum, &type_data));
+  auto fb_children = fbb.CreateVector(children);
+
+  *offset = flatbuf::CreateField(fbb, fb_name, field->nullable, type_enum,
+      type_data, fb_children);
+
+  return Status::OK();
+}
+
+Status FieldFromFlatbuffer(const flatbuf::Field* field,
+    std::shared_ptr<Field>* out) {
+  std::shared_ptr<DataType> type;
+
+  auto children = field->children();
+  std::vector<std::shared_ptr<Field>> child_fields(children->size());
+  for (size_t i = 0; i < children->size(); ++i) {
+    RETURN_NOT_OK(FieldFromFlatbuffer(children->Get(i), &child_fields[i]));
+  }
+
+  RETURN_NOT_OK(TypeFromFlatbuffer(field->type_type(),
+          field->type(), child_fields, &type));
+
+  *out = std::make_shared<Field>(field->name()->str(), type);
+  return Status::OK();
+}
+
+// Implement MessageBuilder
+
+Status MessageBuilder::SetSchema(const Schema* schema) {
+  header_type_ = flatbuf::MessageHeader_Schema;
+
+  std::vector<FieldOffset> field_offsets;
+  for (int i = 0; i < schema->num_fields(); ++i) {
+    const std::shared_ptr<Field>& field = schema->field(i);
+    FieldOffset offset;
+    RETURN_NOT_OK(FieldToFlatbuffer(fbb_, field, &offset));
+    field_offsets.push_back(offset);
+  }
+
+  header_ = flatbuf::CreateSchema(fbb_, fbb_.CreateVector(field_offsets)).Union();
+  body_length_ = 0;
+  return Status::OK();
+}
+
+Status MessageBuilder::SetRecordBatch(int32_t length, int64_t body_length,
+    const std::vector<flatbuf::FieldNode>& nodes,
+    const std::vector<flatbuf::Buffer>& buffers) {
+  header_type_ = flatbuf::MessageHeader_RecordBatch;
+  header_ = flatbuf::CreateRecordBatch(fbb_, length,
+      fbb_.CreateVectorOfStructs(nodes),
+      fbb_.CreateVectorOfStructs(buffers)).Union();
+  body_length_ = body_length;
+
+  return Status::OK();
+}
+
+
+Status WriteDataHeader(int32_t length, int64_t body_length,
+    const std::vector<flatbuf::FieldNode>& nodes,
+    const std::vector<flatbuf::Buffer>& buffers,
+    std::shared_ptr<Buffer>* out) {
+  MessageBuilder message;
+  RETURN_NOT_OK(message.SetRecordBatch(length, body_length, nodes, buffers));
+  RETURN_NOT_OK(message.Finish());
+  return message.GetBuffer(out);
+}
+
+Status MessageBuilder::Finish() {
+  auto message = flatbuf::CreateMessage(fbb_, header_type_, header_,
+      body_length_);
+  fbb_.Finish(message);
+  return Status::OK();
+}
+
+Status MessageBuilder::GetBuffer(std::shared_ptr<Buffer>* out) {
+  // The message buffer is prefixed by the size of the complete flatbuffer as
+  // int32_t
+  // <int32_t: flatbuffer size><uint8_t*: flatbuffer data>
+  int32_t size = fbb_.GetSize();
+
+  auto result = std::make_shared<PoolBuffer>();
+  RETURN_NOT_OK(result->Resize(size + sizeof(int32_t)));
+
+  uint8_t* dst = result->mutable_data();
+  memcpy(dst, reinterpret_cast<int32_t*>(&size), sizeof(int32_t));
+  memcpy(dst + sizeof(int32_t), fbb_.GetBufferPointer(), size);
+
+  *out = result;
+  return Status::OK();
+}
+
+} // namespace ipc
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/metadata-internal.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata-internal.h b/cpp/src/arrow/ipc/metadata-internal.h
new file mode 100644
index 0000000..f7365d2
--- /dev/null
+++ b/cpp/src/arrow/ipc/metadata-internal.h
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_IPC_METADATA_INTERNAL_H
+#define ARROW_IPC_METADATA_INTERNAL_H
+
+#include <flatbuffers/flatbuffers.h>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/ipc/Message_generated.h"
+
+namespace arrow {
+
+namespace flatbuf = apache::arrow::flatbuf;
+
+class Buffer;
+struct Field;
+class Schema;
+class Status;
+
+namespace ipc {
+
+Status FieldFromFlatbuffer(const flatbuf::Field* field,
+    std::shared_ptr<Field>* out);
+
+class MessageBuilder {
+ public:
+  Status SetSchema(const Schema* schema);
+
+  Status SetRecordBatch(int32_t length, int64_t body_length,
+      const std::vector<flatbuf::FieldNode>& nodes,
+      const std::vector<flatbuf::Buffer>& buffers);
+
+  Status Finish();
+
+  Status GetBuffer(std::shared_ptr<Buffer>* out);
+
+ private:
+  flatbuf::MessageHeader header_type_;
+  flatbuffers::Offset<void> header_;
+  int64_t body_length_;
+  flatbuffers::FlatBufferBuilder fbb_;
+};
+
+Status WriteDataHeader(int32_t length, int64_t body_length,
+    const std::vector<flatbuf::FieldNode>& nodes,
+    const std::vector<flatbuf::Buffer>& buffers,
+    std::shared_ptr<Buffer>* out);
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_METADATA_INTERNAL_H


[2/3] arrow git commit: ARROW-67: C++ metadata flatbuffer serialization and data movement to memory maps

Posted by we...@apache.org.
http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/metadata.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc
new file mode 100644
index 0000000..642f21a
--- /dev/null
+++ b/cpp/src/arrow/ipc/metadata.cc
@@ -0,0 +1,238 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/metadata.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+// Generated C++ flatbuffer IDL
+#include "arrow/ipc/Message_generated.h"
+#include "arrow/ipc/metadata-internal.h"
+
+#include "arrow/schema.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+namespace flatbuf = apache::arrow::flatbuf;
+
+namespace ipc {
+
+Status WriteSchema(const Schema* schema, std::shared_ptr<Buffer>* out) {
+  MessageBuilder message;
+  RETURN_NOT_OK(message.SetSchema(schema));
+  RETURN_NOT_OK(message.Finish());
+  return message.GetBuffer(out);
+}
+
+//----------------------------------------------------------------------
+// Message reader
+
+class Message::Impl {
+ public:
+  explicit Impl(const std::shared_ptr<Buffer>& buffer,
+      const flatbuf::Message* message) :
+      buffer_(buffer),
+      message_(message) {}
+
+  Message::Type type() const {
+    switch (message_->header_type()) {
+      case flatbuf::MessageHeader_Schema:
+        return Message::SCHEMA;
+      case flatbuf::MessageHeader_DictionaryBatch:
+        return Message::DICTIONARY_BATCH;
+      case flatbuf::MessageHeader_RecordBatch:
+        return Message::RECORD_BATCH;
+      default:
+        return Message::NONE;
+    }
+  }
+
+  const void* header() const {
+    return message_->header();
+  }
+
+  int64_t body_length() const {
+    return message_->bodyLength();
+  }
+
+ private:
+  // Owns the memory this message accesses
+  std::shared_ptr<Buffer> buffer_;
+
+  const flatbuf::Message* message_;
+};
+
+class SchemaMessage::Impl {
+ public:
+  explicit Impl(const void* schema) :
+      schema_(static_cast<const flatbuf::Schema*>(schema)) {}
+
+  const flatbuf::Field* field(int i) const {
+    return schema_->fields()->Get(i);
+  }
+
+  int num_fields() const {
+    return schema_->fields()->size();
+  }
+
+ private:
+  const flatbuf::Schema* schema_;
+};
+
+Message::Message() {}
+
+Status Message::Open(const std::shared_ptr<Buffer>& buffer,
+    std::shared_ptr<Message>* out) {
+  std::shared_ptr<Message> result(new Message());
+
+  // The buffer is prefixed by its size as int32_t
+  const uint8_t* fb_head = buffer->data() + sizeof(int32_t);
+  const flatbuf::Message* message = flatbuf::GetMessage(fb_head);
+
+  // TODO(wesm): verify message
+  result->impl_.reset(new Impl(buffer, message));
+  *out = result;
+
+  return Status::OK();
+}
+
+Message::Type Message::type() const {
+  return impl_->type();
+}
+
+int64_t Message::body_length() const {
+  return impl_->body_length();
+}
+
+std::shared_ptr<Message> Message::get_shared_ptr() {
+  return this->shared_from_this();
+}
+
+std::shared_ptr<SchemaMessage> Message::GetSchema() {
+  return std::make_shared<SchemaMessage>(this->shared_from_this(),
+      impl_->header());
+}
+
+SchemaMessage::SchemaMessage(const std::shared_ptr<Message>& message,
+    const void* schema) {
+  message_ = message;
+  impl_.reset(new Impl(schema));
+}
+
+int SchemaMessage::num_fields() const {
+  return impl_->num_fields();
+}
+
+Status SchemaMessage::GetField(int i, std::shared_ptr<Field>* out) const {
+  const flatbuf::Field* field = impl_->field(i);
+  return FieldFromFlatbuffer(field, out);
+}
+
+Status SchemaMessage::GetSchema(std::shared_ptr<Schema>* out) const {
+  std::vector<std::shared_ptr<Field>> fields(num_fields());
+  for (int i = 0; i < this->num_fields(); ++i) {
+    RETURN_NOT_OK(GetField(i, &fields[i]));
+  }
+  *out = std::make_shared<Schema>(fields);
+  return Status::OK();
+}
+
+class RecordBatchMessage::Impl {
+ public:
+  explicit Impl(const void* batch) :
+      batch_(static_cast<const flatbuf::RecordBatch*>(batch)) {
+    nodes_ = batch_->nodes();
+    buffers_ = batch_->buffers();
+  }
+
+  const flatbuf::FieldNode* field(int i) const {
+    return nodes_->Get(i);
+  }
+
+  const flatbuf::Buffer* buffer(int i) const {
+    return buffers_->Get(i);
+  }
+
+  int32_t length() const {
+    return batch_->length();
+  }
+
+  int num_buffers() const {
+    return batch_->buffers()->size();
+  }
+
+  int num_fields() const {
+    return batch_->nodes()->size();
+  }
+
+ private:
+  const flatbuf::RecordBatch* batch_;
+  const flatbuffers::Vector<const flatbuf::FieldNode*>* nodes_;
+  const flatbuffers::Vector<const flatbuf::Buffer*>* buffers_;
+};
+
+std::shared_ptr<RecordBatchMessage> Message::GetRecordBatch() {
+  return std::make_shared<RecordBatchMessage>(this->shared_from_this(),
+      impl_->header());
+}
+
+RecordBatchMessage::RecordBatchMessage(const std::shared_ptr<Message>& message,
+    const void* batch) {
+  message_ = message;
+  impl_.reset(new Impl(batch));
+}
+
+// TODO(wesm): Copying the flatbuffer data isn't great, but this will do for
+// now
+FieldMetadata RecordBatchMessage::field(int i) const {
+  const flatbuf::FieldNode* node = impl_->field(i);
+
+  FieldMetadata result;
+  result.length = node->length();
+  result.null_count = node->null_count();
+  return result;
+}
+
+BufferMetadata RecordBatchMessage::buffer(int i) const {
+  const flatbuf::Buffer* buffer = impl_->buffer(i);
+
+  BufferMetadata result;
+  result.page = buffer->page();
+  result.offset = buffer->offset();
+  result.length = buffer->length();
+  return result;
+}
+
+int32_t RecordBatchMessage::length() const {
+  return impl_->length();
+}
+
+int RecordBatchMessage::num_buffers() const {
+  return impl_->num_buffers();
+}
+
+int RecordBatchMessage::num_fields() const {
+  return impl_->num_fields();
+}
+
+} // namespace ipc
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/metadata.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/metadata.h b/cpp/src/arrow/ipc/metadata.h
new file mode 100644
index 0000000..c728852
--- /dev/null
+++ b/cpp/src/arrow/ipc/metadata.h
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// C++ object model and user API for interprocess schema messaging
+
+#ifndef ARROW_IPC_METADATA_H
+#define ARROW_IPC_METADATA_H
+
+#include <cstdint>
+#include <memory>
+
+namespace arrow {
+
+class Buffer;
+struct Field;
+class Schema;
+class Status;
+
+namespace ipc {
+
+//----------------------------------------------------------------------
+// Message read/write APIs
+
+// Serialize arrow::Schema as a Flatbuffer
+Status WriteSchema(const Schema* schema, std::shared_ptr<Buffer>* out);
+
+//----------------------------------------------------------------------
+
+// Read interface classes. We do not fully deserialize the flatbuffers so that
+// individual fields metadata can be retrieved from very large schema without
+//
+
+class Message;
+
+// Container for serialized Schema metadata contained in an IPC message
+class SchemaMessage {
+ public:
+  // Accepts an opaque flatbuffer pointer
+  SchemaMessage(const std::shared_ptr<Message>& message, const void* schema);
+
+  int num_fields() const;
+
+  // Construct an arrow::Field for the i-th value in the metadata
+  Status GetField(int i, std::shared_ptr<Field>* out) const;
+
+  // Construct a complete Schema from the message. May be expensive for very
+  // large schemas if you are only interested in a few fields
+  Status GetSchema(std::shared_ptr<Schema>* out) const;
+
+ private:
+  // Parent, owns the flatbuffer data
+  std::shared_ptr<Message> message_;
+
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+// Field metadata
+struct FieldMetadata {
+  int32_t length;
+  int32_t null_count;
+};
+
+struct BufferMetadata {
+  int32_t page;
+  int64_t offset;
+  int64_t length;
+};
+
+// Container for serialized record batch metadata contained in an IPC message
+class RecordBatchMessage {
+ public:
+  // Accepts an opaque flatbuffer pointer
+  RecordBatchMessage(const std::shared_ptr<Message>& message,
+      const void* batch_meta);
+
+  FieldMetadata field(int i) const;
+  BufferMetadata buffer(int i) const;
+
+  int32_t length() const;
+  int num_buffers() const;
+  int num_fields() const;
+
+ private:
+  // Parent, owns the flatbuffer data
+  std::shared_ptr<Message> message_;
+
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+class DictionaryBatchMessage {
+ public:
+  int64_t id() const;
+  std::unique_ptr<RecordBatchMessage> data() const;
+};
+
+class Message : public std::enable_shared_from_this<Message> {
+ public:
+  enum Type {
+    NONE,
+    SCHEMA,
+    DICTIONARY_BATCH,
+    RECORD_BATCH
+  };
+
+  static Status Open(const std::shared_ptr<Buffer>& buffer,
+    std::shared_ptr<Message>* out);
+
+  std::shared_ptr<Message> get_shared_ptr();
+
+  int64_t body_length() const;
+
+  Type type() const;
+
+  // These methods only to be invoked if you have checked the message type
+  std::shared_ptr<SchemaMessage> GetSchema();
+  std::shared_ptr<RecordBatchMessage> GetRecordBatch();
+  std::shared_ptr<DictionaryBatchMessage> GetDictionaryBatch();
+
+ private:
+  Message();
+
+  // Hide serialization details from user API
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_METADATA_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/ipc/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h
new file mode 100644
index 0000000..0fccce9
--- /dev/null
+++ b/cpp/src/arrow/ipc/test-common.h
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_IPC_TEST_COMMON_H
+#define ARROW_IPC_TEST_COMMON_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arrow {
+namespace ipc {
+
+class MemoryMapFixture {
+ public:
+  void TearDown() {
+    for (auto path : tmp_files_) {
+      std::remove(path.c_str());
+    }
+  }
+
+  void CreateFile(const std::string path, int64_t size) {
+    FILE* file = fopen(path.c_str(), "w");
+    if (file != nullptr) {
+      tmp_files_.push_back(path);
+    }
+    ftruncate(fileno(file), size);
+    fclose(file);
+  }
+
+ private:
+  std::vector<std::string> tmp_files_;
+};
+
+} // namespace ipc
+} // namespace arrow
+
+#endif // ARROW_IPC_TEST_COMMON_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema-test.cc b/cpp/src/arrow/schema-test.cc
new file mode 100644
index 0000000..a1de1dc
--- /dev/null
+++ b/cpp/src/arrow/schema-test.cc
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/schema.h"
+#include "arrow/type.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+
+const auto INT32 = std::make_shared<Int32Type>();
+
+TEST(TestField, Basics) {
+  Field f0("f0", INT32);
+  Field f0_nn("f0", INT32, false);
+
+  ASSERT_EQ(f0.name, "f0");
+  ASSERT_EQ(f0.type->ToString(), INT32->ToString());
+
+  ASSERT_TRUE(f0.nullable);
+  ASSERT_FALSE(f0_nn.nullable);
+}
+
+TEST(TestField, Equals) {
+  Field f0("f0", INT32);
+  Field f0_nn("f0", INT32, false);
+  Field f0_other("f0", INT32);
+
+  ASSERT_EQ(f0, f0_other);
+  ASSERT_NE(f0, f0_nn);
+}
+
+class TestSchema : public ::testing::Test {
+ public:
+  void SetUp() {}
+};
+
+TEST_F(TestSchema, Basics) {
+  auto f0 = std::make_shared<Field>("f0", INT32);
+  auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(), false);
+  auto f1_optional = std::make_shared<Field>("f1", std::make_shared<UInt8Type>());
+
+  auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
+
+  vector<shared_ptr<Field>> fields = {f0, f1, f2};
+  auto schema = std::make_shared<Schema>(fields);
+
+  ASSERT_EQ(3, schema->num_fields());
+  ASSERT_EQ(f0, schema->field(0));
+  ASSERT_EQ(f1, schema->field(1));
+  ASSERT_EQ(f2, schema->field(2));
+
+  auto schema2 = std::make_shared<Schema>(fields);
+
+  vector<shared_ptr<Field>> fields3 = {f0, f1_optional, f2};
+  auto schema3 = std::make_shared<Schema>(fields3);
+  ASSERT_TRUE(schema->Equals(schema2));
+  ASSERT_FALSE(schema->Equals(schema3));
+
+  ASSERT_TRUE(schema->Equals(*schema2.get()));
+  ASSERT_FALSE(schema->Equals(*schema3.get()));
+}
+
+TEST_F(TestSchema, ToString) {
+  auto f0 = std::make_shared<Field>("f0", INT32);
+  auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(), false);
+  auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
+  auto f3 = std::make_shared<Field>("f3",
+      std::make_shared<ListType>(std::make_shared<Int16Type>()));
+
+  vector<shared_ptr<Field>> fields = {f0, f1, f2, f3};
+  auto schema = std::make_shared<Schema>(fields);
+
+  std::string result = schema->ToString();
+  std::string expected = R"(f0: int32
+f1: uint8 not null
+f2: string
+f3: list<item: int16>)";
+
+  ASSERT_EQ(expected, result);
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema.cc b/cpp/src/arrow/schema.cc
new file mode 100644
index 0000000..18aad0e
--- /dev/null
+++ b/cpp/src/arrow/schema.cc
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/schema.h"
+
+#include <memory>
+#include <string>
+#include <sstream>
+#include <vector>
+
+#include "arrow/type.h"
+
+namespace arrow {
+
+Schema::Schema(const std::vector<std::shared_ptr<Field>>& fields) :
+    fields_(fields) {}
+
+bool Schema::Equals(const Schema& other) const {
+  if (this == &other) return true;
+  if (num_fields() != other.num_fields()) {
+    return false;
+  }
+  for (int i = 0; i < num_fields(); ++i) {
+    if (!field(i)->Equals(*other.field(i).get())) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Schema::Equals(const std::shared_ptr<Schema>& other) const {
+  return Equals(*other.get());
+}
+
+std::string Schema::ToString() const {
+  std::stringstream buffer;
+
+  int i = 0;
+  for (auto field : fields_) {
+    if (i > 0) {
+      buffer << std::endl;
+    }
+    buffer << field->ToString();
+    ++i;
+  }
+  return buffer.str();
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/schema.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema.h b/cpp/src/arrow/schema.h
new file mode 100644
index 0000000..52f3c1c
--- /dev/null
+++ b/cpp/src/arrow/schema.h
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_SCHEMA_H
+#define ARROW_SCHEMA_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arrow {
+
+struct Field;
+
+class Schema {
+ public:
+  explicit Schema(const std::vector<std::shared_ptr<Field>>& fields);
+
+  // Returns true if all of the schema fields are equal
+  bool Equals(const Schema& other) const;
+  bool Equals(const std::shared_ptr<Schema>& other) const;
+
+  // Return the ith schema element. Does not boundscheck
+  const std::shared_ptr<Field>& field(int i) const {
+    return fields_[i];
+  }
+
+  // Render a string representation of the schema suitable for debugging
+  std::string ToString() const;
+
+  int num_fields() const {
+    return fields_.size();
+  }
+
+ private:
+  std::vector<std::shared_ptr<Field>> fields_;
+};
+
+} // namespace arrow
+
+#endif  // ARROW_FIELD_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
new file mode 100644
index 0000000..4c7b8f8
--- /dev/null
+++ b/cpp/src/arrow/table-test.cc
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/types/primitive.h"
+#include "arrow/util/status.h"
+
+using std::shared_ptr;
+using std::vector;
+
+namespace arrow {
+
+const auto INT16 = std::make_shared<Int16Type>();
+const auto UINT8 = std::make_shared<UInt8Type>();
+const auto INT32 = std::make_shared<Int32Type>();
+
+class TestTable : public TestBase {
+ public:
+  void MakeExample1(int length) {
+    auto f0 = std::make_shared<Field>("f0", INT32);
+    auto f1 = std::make_shared<Field>("f1", UINT8);
+    auto f2 = std::make_shared<Field>("f2", INT16);
+
+    vector<shared_ptr<Field>> fields = {f0, f1, f2};
+    schema_ = std::make_shared<Schema>(fields);
+
+    columns_ = {
+      std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
+      std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
+      std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length))
+    };
+  }
+
+ protected:
+  std::unique_ptr<Table> table_;
+  shared_ptr<Schema> schema_;
+  vector<std::shared_ptr<Column>> columns_;
+};
+
+TEST_F(TestTable, EmptySchema) {
+  auto empty_schema = shared_ptr<Schema>(new Schema({}));
+  table_.reset(new Table("data", empty_schema, columns_));
+  ASSERT_OK(table_->ValidateColumns());
+  ASSERT_EQ(0, table_->num_rows());
+  ASSERT_EQ(0, table_->num_columns());
+}
+
+TEST_F(TestTable, Ctors) {
+  int length = 100;
+  MakeExample1(length);
+
+  std::string name = "data";
+
+  table_.reset(new Table(name, schema_, columns_));
+  ASSERT_OK(table_->ValidateColumns());
+  ASSERT_EQ(name, table_->name());
+  ASSERT_EQ(length, table_->num_rows());
+  ASSERT_EQ(3, table_->num_columns());
+
+  table_.reset(new Table(name, schema_, columns_, length));
+  ASSERT_OK(table_->ValidateColumns());
+  ASSERT_EQ(name, table_->name());
+  ASSERT_EQ(length, table_->num_rows());
+}
+
+TEST_F(TestTable, Metadata) {
+  int length = 100;
+  MakeExample1(length);
+
+  std::string name = "data";
+  table_.reset(new Table(name, schema_, columns_));
+
+  ASSERT_TRUE(table_->schema()->Equals(schema_));
+
+  auto col = table_->column(0);
+  ASSERT_EQ(schema_->field(0)->name, col->name());
+  ASSERT_EQ(schema_->field(0)->type, col->type());
+}
+
+TEST_F(TestTable, InvalidColumns) {
+  // Check that columns are all the same length
+  int length = 100;
+  MakeExample1(length);
+
+  table_.reset(new Table("data", schema_, columns_, length - 1));
+  ASSERT_RAISES(Invalid, table_->ValidateColumns());
+
+  columns_.clear();
+
+  // Wrong number of columns
+  table_.reset(new Table("data", schema_, columns_, length));
+  ASSERT_RAISES(Invalid, table_->ValidateColumns());
+
+  columns_ = {
+    std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
+    std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
+    std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length - 1))
+  };
+
+  table_.reset(new Table("data", schema_, columns_, length));
+  ASSERT_RAISES(Invalid, table_->ValidateColumns());
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
new file mode 100644
index 0000000..e405c1d
--- /dev/null
+++ b/cpp/src/arrow/table.cc
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/table.h"
+
+#include <cstdlib>
+#include <memory>
+#include <sstream>
+
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+
+RowBatch::RowBatch(const std::shared_ptr<Schema>& schema, int num_rows,
+    const std::vector<std::shared_ptr<Array>>& columns) :
+    schema_(schema),
+    num_rows_(num_rows),
+    columns_(columns) {}
+
+const std::string& RowBatch::column_name(int i) const {
+  return schema_->field(i)->name;
+}
+
+Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+    const std::vector<std::shared_ptr<Column>>& columns) :
+    name_(name),
+    schema_(schema),
+    columns_(columns) {
+  if (columns.size() == 0) {
+    num_rows_ = 0;
+  } else {
+    num_rows_ = columns[0]->length();
+  }
+}
+
+Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+    const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows) :
+    name_(name),
+    schema_(schema),
+    columns_(columns),
+    num_rows_(num_rows) {}
+
+Status Table::ValidateColumns() const {
+  if (num_columns() != schema_->num_fields()) {
+    return Status::Invalid("Number of columns did not match schema");
+  }
+
+  // Make sure columns are all the same length
+  for (size_t i = 0; i < columns_.size(); ++i) {
+    const Column* col = columns_[i].get();
+    if (col == nullptr) {
+      std::stringstream ss;
+      ss << "Column " << i << " named " << col->name()
+         << " was null";
+      return Status::Invalid(ss.str());
+    }
+    if (col->length() != num_rows_) {
+      std::stringstream ss;
+      ss << "Column " << i << " named " << col->name()
+         << " expected length "
+         << num_rows_
+         << " but got length "
+         << col->length();
+      return Status::Invalid(ss.str());
+    }
+  }
+  return Status::OK();
+}
+
+} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
new file mode 100644
index 0000000..e2f73a2
--- /dev/null
+++ b/cpp/src/arrow/table.h
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TABLE_H
+#define ARROW_TABLE_H
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arrow {
+
+class Array;
+class Column;
+class Schema;
+class Status;
+
+// A row batch is a simpler and more rigid table data structure intended for
+// use primarily in shared memory IPC. It contains a schema (metadata) and a
+// corresponding vector of equal-length Arrow arrays
+class RowBatch {
+ public:
+  // num_rows is a parameter to allow for row batches of a particular size not
+  // having any materialized columns. Each array should have the same length as
+  // num_rows
+  RowBatch(const std::shared_ptr<Schema>& schema, int num_rows,
+      const std::vector<std::shared_ptr<Array>>& columns);
+
+  // @returns: the table's schema
+  const std::shared_ptr<Schema>& schema() const {
+    return schema_;
+  }
+
+  // @returns: the i-th column
+  // Note: Does not boundscheck
+  const std::shared_ptr<Array>& column(int i) const {
+    return columns_[i];
+  }
+
+  const std::string& column_name(int i) const;
+
+  // @returns: the number of columns in the table
+  int num_columns() const {
+    return columns_.size();
+  }
+
+  // @returns: the number of rows (the corresponding length of each column)
+  int64_t num_rows() const {
+    return num_rows_;
+  }
+
+ private:
+  std::shared_ptr<Schema> schema_;
+  int num_rows_;
+  std::vector<std::shared_ptr<Array>> columns_;
+};
+
+// Immutable container of fixed-length columns conforming to a particular schema
+class Table {
+ public:
+  // If columns is zero-length, the table's number of rows is zero
+  Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+      const std::vector<std::shared_ptr<Column>>& columns);
+
+  // num_rows is a parameter to allow for tables of a particular size not
+  // having any materialized columns. Each column should therefore have the
+  // same length as num_rows -- you can validate this using
+  // Table::ValidateColumns
+  Table(const std::string& name, const std::shared_ptr<Schema>& schema,
+      const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows);
+
+  // @returns: the table's name, if any (may be length 0)
+  const std::string& name() const {
+    return name_;
+  }
+
+  // @returns: the table's schema
+  const std::shared_ptr<Schema>& schema() const {
+    return schema_;
+  }
+
+  // Note: Does not boundscheck
+  // @returns: the i-th column
+  const std::shared_ptr<Column>& column(int i) const {
+    return columns_[i];
+  }
+
+  // @returns: the number of columns in the table
+  int num_columns() const {
+    return columns_.size();
+  }
+
+  // @returns: the number of rows (the corresponding length of each column)
+  int64_t num_rows() const {
+    return num_rows_;
+  }
+
+  // After construction, perform any checks to validate the input arguments
+  Status ValidateColumns() const;
+
+ private:
+  // The table's name, optional
+  std::string name_;
+
+  std::shared_ptr<Schema> schema_;
+  std::vector<std::shared_ptr<Column>> columns_;
+
+  int64_t num_rows_;
+};
+
+} // namespace arrow
+
+#endif  // ARROW_TABLE_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/CMakeLists.txt b/cpp/src/arrow/table/CMakeLists.txt
deleted file mode 100644
index d9f00e7..0000000
--- a/cpp/src/arrow/table/CMakeLists.txt
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#######################################
-# arrow_table
-#######################################
-
-# Headers: top level
-install(FILES
-  column.h
-  schema.h
-  table.h
-  DESTINATION include/arrow/table)
-
-ADD_ARROW_TEST(column-test)
-ADD_ARROW_TEST(schema-test)
-ADD_ARROW_TEST(table-test)
-
-ADD_ARROW_BENCHMARK(column-benchmark)

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column-benchmark.cc b/cpp/src/arrow/table/column-benchmark.cc
deleted file mode 100644
index c01146d..0000000
--- a/cpp/src/arrow/table/column-benchmark.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-
-#include "benchmark/benchmark.h"
-
-#include "arrow/test-util.h"
-#include "arrow/table/test-common.h"
-#include "arrow/types/integer.h"
-#include "arrow/util/memory-pool.h"
-
-namespace arrow {
-namespace {
-  template <typename ArrayType>
-  std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
-    auto pool = GetDefaultMemoryPool();
-    auto data = std::make_shared<PoolBuffer>(pool);
-    auto nulls = std::make_shared<PoolBuffer>(pool);
-    data->Resize(length * sizeof(typename ArrayType::value_type));
-    nulls->Resize(util::bytes_for_bits(length));
-    return std::make_shared<ArrayType>(length, data, 10, nulls);
-  }
-}  // anonymous namespace
-
-
-static void BM_BuildInt32ColumnByChunk(benchmark::State& state) { //NOLINT non-const reference
-  ArrayVector arrays;
-  for (int chunk_n = 0; chunk_n < state.range_x(); ++chunk_n) {
-    arrays.push_back(MakePrimitive<Int32Array>(100, 10));
-  }
-  const auto INT32 = std::make_shared<Int32Type>();
-  const auto field = std::make_shared<Field>("c0", INT32);
-  std::unique_ptr<Column> column;
-  while (state.KeepRunning()) {
-    column.reset(new Column(field, arrays));
-  }
-}
-
-BENCHMARK(BM_BuildInt32ColumnByChunk)->Range(5, 50000);
-
-}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column-test.cc b/cpp/src/arrow/table/column-test.cc
deleted file mode 100644
index 3b102e4..0000000
--- a/cpp/src/arrow/table/column-test.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/test-common.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "arrow/types/integer.h"
-
-using std::shared_ptr;
-using std::vector;
-
-namespace arrow {
-
-const auto INT32 = std::make_shared<Int32Type>();
-
-class TestColumn : public TestBase {
- protected:
-  std::shared_ptr<ChunkedArray> data_;
-  std::unique_ptr<Column> column_;
-};
-
-TEST_F(TestColumn, BasicAPI) {
-  ArrayVector arrays;
-  arrays.push_back(MakePrimitive<Int32Array>(100));
-  arrays.push_back(MakePrimitive<Int32Array>(100, 10));
-  arrays.push_back(MakePrimitive<Int32Array>(100, 20));
-
-  auto field = std::make_shared<Field>("c0", INT32);
-  column_.reset(new Column(field, arrays));
-
-  ASSERT_EQ("c0", column_->name());
-  ASSERT_TRUE(column_->type()->Equals(INT32));
-  ASSERT_EQ(300, column_->length());
-  ASSERT_EQ(30, column_->null_count());
-  ASSERT_EQ(3, column_->data()->num_chunks());
-}
-
-TEST_F(TestColumn, ChunksInhomogeneous) {
-  ArrayVector arrays;
-  arrays.push_back(MakePrimitive<Int32Array>(100));
-  arrays.push_back(MakePrimitive<Int32Array>(100, 10));
-
-  auto field = std::make_shared<Field>("c0", INT32);
-  column_.reset(new Column(field, arrays));
-
-  ASSERT_OK(column_->ValidateData());
-
-  arrays.push_back(MakePrimitive<Int16Array>(100, 10));
-  column_.reset(new Column(field, arrays));
-  ASSERT_RAISES(Invalid, column_->ValidateData());
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column.cc b/cpp/src/arrow/table/column.cc
deleted file mode 100644
index 573e650..0000000
--- a/cpp/src/arrow/table/column.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table/column.h"
-
-#include <memory>
-#include <sstream>
-
-#include "arrow/type.h"
-#include "arrow/util/status.h"
-
-namespace arrow {
-
-ChunkedArray::ChunkedArray(const ArrayVector& chunks) :
-    chunks_(chunks) {
-  length_ = 0;
-  for (const std::shared_ptr<Array>& chunk : chunks) {
-    length_ += chunk->length();
-    null_count_ += chunk->null_count();
-  }
-}
-
-Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks) :
-    field_(field) {
-  data_ = std::make_shared<ChunkedArray>(chunks);
-}
-
-Column::Column(const std::shared_ptr<Field>& field,
-    const std::shared_ptr<Array>& data) :
-    field_(field) {
-  data_ = std::make_shared<ChunkedArray>(ArrayVector({data}));
-}
-
-Column::Column(const std::shared_ptr<Field>& field,
-    const std::shared_ptr<ChunkedArray>& data) :
-    field_(field),
-    data_(data) {}
-
-Status Column::ValidateData() {
-  for (int i = 0; i < data_->num_chunks(); ++i) {
-    const std::shared_ptr<DataType>& type = data_->chunk(i)->type();
-    if (!this->type()->Equals(type)) {
-      std::stringstream ss;
-      ss << "In chunk " << i << " expected type "
-         << this->type()->ToString()
-         << " but saw "
-         << type->ToString();
-      return Status::Invalid(ss.str());
-    }
-  }
-  return Status::OK();
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/column.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column.h b/cpp/src/arrow/table/column.h
deleted file mode 100644
index dfc7516..0000000
--- a/cpp/src/arrow/table/column.h
+++ /dev/null
@@ -1,105 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TABLE_COLUMN_H
-#define ARROW_TABLE_COLUMN_H
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/array.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-typedef std::vector<std::shared_ptr<Array> > ArrayVector;
-
-// A data structure managing a list of primitive Arrow arrays logically as one
-// large array
-class ChunkedArray {
- public:
-  explicit ChunkedArray(const ArrayVector& chunks);
-
-  // @returns: the total length of the chunked array; computed on construction
-  int64_t length() const {
-    return length_;
-  }
-
-  int64_t null_count() const {
-    return null_count_;
-  }
-
-  int num_chunks() const {
-    return chunks_.size();
-  }
-
-  const std::shared_ptr<Array>& chunk(int i) const {
-    return chunks_[i];
-  }
-
- protected:
-  ArrayVector chunks_;
-  int64_t length_;
-  int64_t null_count_;
-};
-
-// An immutable column data structure consisting of a field (type metadata) and
-// a logical chunked data array (which can be validated as all being the same
-// type).
-class Column {
- public:
-  Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks);
-  Column(const std::shared_ptr<Field>& field,
-      const std::shared_ptr<ChunkedArray>& data);
-
-  Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array>& data);
-
-  int64_t length() const {
-    return data_->length();
-  }
-
-  int64_t null_count() const {
-    return data_->null_count();
-  }
-
-  // @returns: the column's name in the passed metadata
-  const std::string& name() const {
-    return field_->name;
-  }
-
-  // @returns: the column's type according to the metadata
-  const std::shared_ptr<DataType>& type() const {
-    return field_->type;
-  }
-
-  // @returns: the column's data as a chunked logical array
-  const std::shared_ptr<ChunkedArray>& data() const {
-    return data_;
-  }
-  // Verify that the column's array data is consistent with the passed field's
-  // metadata
-  Status ValidateData();
-
- protected:
-  std::shared_ptr<Field> field_;
-  std::shared_ptr<ChunkedArray> data_;
-};
-
-} // namespace arrow
-
-#endif  // ARROW_TABLE_COLUMN_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema-test.cc b/cpp/src/arrow/table/schema-test.cc
deleted file mode 100644
index 9dfade2..0000000
--- a/cpp/src/arrow/table/schema-test.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/schema.h"
-#include "arrow/type.h"
-#include "arrow/types/string.h"
-
-using std::shared_ptr;
-using std::vector;
-
-namespace arrow {
-
-const auto INT32 = std::make_shared<Int32Type>();
-
-TEST(TestField, Basics) {
-  shared_ptr<DataType> ftype = INT32;
-  shared_ptr<DataType> ftype_nn = std::make_shared<Int32Type>(false);
-  Field f0("f0", ftype);
-  Field f0_nn("f0", ftype_nn);
-
-  ASSERT_EQ(f0.name, "f0");
-  ASSERT_EQ(f0.type->ToString(), ftype->ToString());
-
-  ASSERT_TRUE(f0.nullable());
-  ASSERT_FALSE(f0_nn.nullable());
-}
-
-TEST(TestField, Equals) {
-  shared_ptr<DataType> ftype = INT32;
-  shared_ptr<DataType> ftype_nn = std::make_shared<Int32Type>(false);
-
-  Field f0("f0", ftype);
-  Field f0_nn("f0", ftype_nn);
-  Field f0_other("f0", ftype);
-
-  ASSERT_EQ(f0, f0_other);
-  ASSERT_NE(f0, f0_nn);
-}
-
-class TestSchema : public ::testing::Test {
- public:
-  void SetUp() {}
-};
-
-TEST_F(TestSchema, Basics) {
-  auto f0 = std::make_shared<Field>("f0", INT32);
-  auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(false));
-  auto f1_optional = std::make_shared<Field>("f1", std::make_shared<UInt8Type>());
-
-  auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
-
-  vector<shared_ptr<Field> > fields = {f0, f1, f2};
-  auto schema = std::make_shared<Schema>(fields);
-
-  ASSERT_EQ(3, schema->num_fields());
-  ASSERT_EQ(f0, schema->field(0));
-  ASSERT_EQ(f1, schema->field(1));
-  ASSERT_EQ(f2, schema->field(2));
-
-  auto schema2 = std::make_shared<Schema>(fields);
-
-  vector<shared_ptr<Field> > fields3 = {f0, f1_optional, f2};
-  auto schema3 = std::make_shared<Schema>(fields3);
-  ASSERT_TRUE(schema->Equals(schema2));
-  ASSERT_FALSE(schema->Equals(schema3));
-
-  ASSERT_TRUE(schema->Equals(*schema2.get()));
-  ASSERT_FALSE(schema->Equals(*schema3.get()));
-}
-
-TEST_F(TestSchema, ToString) {
-  auto f0 = std::make_shared<Field>("f0", std::make_shared<Int32Type>());
-  auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(false));
-  auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
-  auto f3 = std::make_shared<Field>("f3",
-      std::make_shared<ListType>(std::make_shared<Int16Type>()));
-
-  vector<shared_ptr<Field> > fields = {f0, f1, f2, f3};
-  auto schema = std::make_shared<Schema>(fields);
-
-  std::string result = schema->ToString();
-  std::string expected = R"(f0 int32
-f1 uint8 not null
-f2 string
-f3 list<int16>
-)";
-
-  ASSERT_EQ(expected, result);
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema.cc b/cpp/src/arrow/table/schema.cc
deleted file mode 100644
index d49d0a7..0000000
--- a/cpp/src/arrow/table/schema.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table/schema.h"
-
-#include <memory>
-#include <string>
-#include <sstream>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-Schema::Schema(const std::vector<std::shared_ptr<Field> >& fields) :
-    fields_(fields) {}
-
-bool Schema::Equals(const Schema& other) const {
-  if (this == &other) return true;
-  if (num_fields() != other.num_fields()) {
-    return false;
-  }
-  for (int i = 0; i < num_fields(); ++i) {
-    if (!field(i)->Equals(*other.field(i).get())) {
-      return false;
-    }
-  }
-  return true;
-}
-
-bool Schema::Equals(const std::shared_ptr<Schema>& other) const {
-  return Equals(*other.get());
-}
-
-std::string Schema::ToString() const {
-  std::stringstream buffer;
-
-  for (auto field : fields_) {
-    buffer << field->ToString() << std::endl;
-  }
-  return buffer.str();
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/schema.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema.h b/cpp/src/arrow/table/schema.h
deleted file mode 100644
index 103f01b..0000000
--- a/cpp/src/arrow/table/schema.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_SCHEMA_H
-#define ARROW_SCHEMA_H
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-class Schema {
- public:
-  explicit Schema(const std::vector<std::shared_ptr<Field> >& fields);
-
-  // Returns true if all of the schema fields are equal
-  bool Equals(const Schema& other) const;
-  bool Equals(const std::shared_ptr<Schema>& other) const;
-
-  // Return the ith schema element. Does not boundscheck
-  const std::shared_ptr<Field>& field(int i) const {
-    return fields_[i];
-  }
-
-  // Render a string representation of the schema suitable for debugging
-  std::string ToString() const;
-
-  int num_fields() const {
-    return fields_.size();
-  }
-
- private:
-  std::vector<std::shared_ptr<Field> > fields_;
-};
-
-} // namespace arrow
-
-#endif  // ARROW_FIELD_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table-test.cc b/cpp/src/arrow/table/table-test.cc
deleted file mode 100644
index 8b354e8..0000000
--- a/cpp/src/arrow/table/table-test.cc
+++ /dev/null
@@ -1,128 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/table.h"
-#include "arrow/table/test-common.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "arrow/types/integer.h"
-
-using std::shared_ptr;
-using std::vector;
-
-namespace arrow {
-
-const auto INT16 = std::make_shared<Int16Type>();
-const auto UINT8 = std::make_shared<UInt8Type>();
-const auto INT32 = std::make_shared<Int32Type>();
-
-class TestTable : public TestBase {
- public:
-  void MakeExample1(int length) {
-    auto f0 = std::make_shared<Field>("f0", INT32);
-    auto f1 = std::make_shared<Field>("f1", UINT8);
-    auto f2 = std::make_shared<Field>("f2", INT16);
-
-    vector<shared_ptr<Field> > fields = {f0, f1, f2};
-    schema_ = std::make_shared<Schema>(fields);
-
-    columns_ = {
-      std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
-      std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
-      std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length))
-    };
-  }
-
- protected:
-  std::unique_ptr<Table> table_;
-  shared_ptr<Schema> schema_;
-  vector<std::shared_ptr<Column> > columns_;
-};
-
-TEST_F(TestTable, EmptySchema) {
-  auto empty_schema = shared_ptr<Schema>(new Schema({}));
-  table_.reset(new Table("data", empty_schema, columns_));
-  ASSERT_OK(table_->ValidateColumns());
-  ASSERT_EQ(0, table_->num_rows());
-  ASSERT_EQ(0, table_->num_columns());
-}
-
-TEST_F(TestTable, Ctors) {
-  int length = 100;
-  MakeExample1(length);
-
-  std::string name = "data";
-
-  table_.reset(new Table(name, schema_, columns_));
-  ASSERT_OK(table_->ValidateColumns());
-  ASSERT_EQ(name, table_->name());
-  ASSERT_EQ(length, table_->num_rows());
-  ASSERT_EQ(3, table_->num_columns());
-
-  table_.reset(new Table(name, schema_, columns_, length));
-  ASSERT_OK(table_->ValidateColumns());
-  ASSERT_EQ(name, table_->name());
-  ASSERT_EQ(length, table_->num_rows());
-}
-
-TEST_F(TestTable, Metadata) {
-  int length = 100;
-  MakeExample1(length);
-
-  std::string name = "data";
-  table_.reset(new Table(name, schema_, columns_));
-
-  ASSERT_TRUE(table_->schema()->Equals(schema_));
-
-  auto col = table_->column(0);
-  ASSERT_EQ(schema_->field(0)->name, col->name());
-  ASSERT_EQ(schema_->field(0)->type, col->type());
-}
-
-TEST_F(TestTable, InvalidColumns) {
-  // Check that columns are all the same length
-  int length = 100;
-  MakeExample1(length);
-
-  table_.reset(new Table("data", schema_, columns_, length - 1));
-  ASSERT_RAISES(Invalid, table_->ValidateColumns());
-
-  columns_.clear();
-
-  // Wrong number of columns
-  table_.reset(new Table("data", schema_, columns_, length));
-  ASSERT_RAISES(Invalid, table_->ValidateColumns());
-
-  columns_ = {
-    std::make_shared<Column>(schema_->field(0), MakePrimitive<Int32Array>(length)),
-    std::make_shared<Column>(schema_->field(1), MakePrimitive<UInt8Array>(length)),
-    std::make_shared<Column>(schema_->field(2), MakePrimitive<Int16Array>(length - 1))
-  };
-
-  table_.reset(new Table("data", schema_, columns_, length));
-  ASSERT_RAISES(Invalid, table_->ValidateColumns());
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table.cc b/cpp/src/arrow/table/table.cc
deleted file mode 100644
index 0c788b8..0000000
--- a/cpp/src/arrow/table/table.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/table/table.h"
-
-#include <memory>
-#include <sstream>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/type.h"
-#include "arrow/util/status.h"
-
-namespace arrow {
-
-Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
-    const std::vector<std::shared_ptr<Column> >& columns) :
-    name_(name),
-    schema_(schema),
-    columns_(columns) {
-  if (columns.size() == 0) {
-    num_rows_ = 0;
-  } else {
-    num_rows_ = columns[0]->length();
-  }
-}
-
-Table::Table(const std::string& name, const std::shared_ptr<Schema>& schema,
-    const std::vector<std::shared_ptr<Column> >& columns, int64_t num_rows) :
-    name_(name),
-    schema_(schema),
-    columns_(columns),
-    num_rows_(num_rows) {}
-
-Status Table::ValidateColumns() const {
-  if (num_columns() != schema_->num_fields()) {
-    return Status::Invalid("Number of columns did not match schema");
-  }
-
-  if (columns_.size() == 0) {
-    return Status::OK();
-  }
-
-  // Make sure columns are all the same length
-  for (size_t i = 0; i < columns_.size(); ++i) {
-    const Column* col = columns_[i].get();
-    if (col->length() != num_rows_) {
-      std::stringstream ss;
-      ss << "Column " << i << " expected length "
-         << num_rows_
-         << " but got length "
-         << col->length();
-      return Status::Invalid(ss.str());
-    }
-  }
-  return Status::OK();
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/table.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table.h b/cpp/src/arrow/table/table.h
deleted file mode 100644
index b012938..0000000
--- a/cpp/src/arrow/table/table.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TABLE_TABLE_H
-#define ARROW_TABLE_TABLE_H
-
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace arrow {
-
-class Column;
-class Schema;
-class Status;
-
-// Immutable container of fixed-length columns conforming to a particular schema
-class Table {
- public:
-  // If columns is zero-length, the table's number of rows is zero
-  Table(const std::string& name, const std::shared_ptr<Schema>& schema,
-      const std::vector<std::shared_ptr<Column> >& columns);
-
-  Table(const std::string& name, const std::shared_ptr<Schema>& schema,
-      const std::vector<std::shared_ptr<Column> >& columns, int64_t num_rows);
-
-  // @returns: the table's name, if any (may be length 0)
-  const std::string& name() const {
-    return name_;
-  }
-
-  // @returns: the table's schema
-  const std::shared_ptr<Schema>& schema() const {
-    return schema_;
-  }
-
-  // Note: Does not boundscheck
-  // @returns: the i-th column
-  const std::shared_ptr<Column>& column(int i) const {
-    return columns_[i];
-  }
-
-  // @returns: the number of columns in the table
-  int num_columns() const {
-    return columns_.size();
-  }
-
-  // @returns: the number of rows (the corresponding length of each column)
-  int64_t num_rows() const {
-    return num_rows_;
-  }
-
-  // After construction, perform any checks to validate the input arguments
-  Status ValidateColumns() const;
-
- private:
-  // The table's name, optional
-  std::string name_;
-
-  std::shared_ptr<Schema> schema_;
-  std::vector<std::shared_ptr<Column> > columns_;
-
-  int64_t num_rows_;
-};
-
-} // namespace arrow
-
-#endif  // ARROW_TABLE_TABLE_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/table/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/test-common.h b/cpp/src/arrow/table/test-common.h
deleted file mode 100644
index 50a5f6a..0000000
--- a/cpp/src/arrow/table/test-common.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "arrow/table/column.h"
-#include "arrow/table/schema.h"
-#include "arrow/table/table.h"
-#include "arrow/test-util.h"
-#include "arrow/type.h"
-#include "arrow/util/bit-util.h"
-#include "arrow/util/buffer.h"
-#include "arrow/util/memory-pool.h"
-
-namespace arrow {
-
-class TestBase : public ::testing::Test {
- public:
-  void SetUp() {
-    pool_ = GetDefaultMemoryPool();
-  }
-
-  template <typename ArrayType>
-  std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
-    auto data = std::make_shared<PoolBuffer>(pool_);
-    auto nulls = std::make_shared<PoolBuffer>(pool_);
-    EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type)));
-    EXPECT_OK(nulls->Resize(util::bytes_for_bits(length)));
-    return std::make_shared<ArrayType>(length, data, 10, nulls);
-  }
-
- protected:
-  MemoryPool* pool_;
-};
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index 0898c8e..a9fb2a7 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -18,26 +18,39 @@
 #ifndef ARROW_TEST_UTIL_H_
 #define ARROW_TEST_UTIL_H_
 
-#include <gtest/gtest.h>
+#include <cstdint>
 #include <memory>
+#include <random>
 #include <string>
 #include <vector>
 
+#include "gtest/gtest.h"
+
+#include "arrow/type.h"
+#include "arrow/column.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
 #include "arrow/util/bit-util.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/memory-pool.h"
 #include "arrow/util/random.h"
 #include "arrow/util/status.h"
 
 #define ASSERT_RAISES(ENUM, expr)               \
   do {                                          \
     Status s = (expr);                          \
-    ASSERT_TRUE(s.Is##ENUM());                  \
+    if (!s.Is##ENUM()) {                        \
+      FAIL() << s.ToString();                   \
+    }                                           \
   } while (0)
 
 
 #define ASSERT_OK(expr)                         \
   do {                                          \
     Status s = (expr);                          \
-    ASSERT_TRUE(s.ok());                        \
+    if (!s.ok()) {                              \
+        FAIL() << s.ToString();                 \
+    }                                           \
   } while (0)
 
 
@@ -50,6 +63,27 @@
 
 namespace arrow {
 
+class TestBase : public ::testing::Test {
+ public:
+  void SetUp() {
+    pool_ = default_memory_pool();
+  }
+
+  template <typename ArrayType>
+  std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
+    auto data = std::make_shared<PoolBuffer>(pool_);
+    auto nulls = std::make_shared<PoolBuffer>(pool_);
+    EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type)));
+    EXPECT_OK(nulls->Resize(util::bytes_for_bits(length)));
+    return std::make_shared<ArrayType>(length, data, 10, nulls);
+  }
+
+ protected:
+  MemoryPool* pool_;
+};
+
+namespace test {
+
 template <typename T>
 void randint(int64_t N, T lower, T upper, std::vector<T>* out) {
   Random rng(random_seed());
@@ -84,6 +118,33 @@ void random_nulls(int64_t n, double pct_null, std::vector<bool>* nulls) {
   }
 }
 
+static inline void random_bytes(int n, uint32_t seed, uint8_t* out) {
+  std::mt19937 gen(seed);
+  std::uniform_int_distribution<int> d(0, 255);
+
+  for (int i = 0; i < n; ++i) {
+    out[i] = d(gen) & 0xFF;
+  }
+}
+
+template <typename T>
+void rand_uniform_int(int n, uint32_t seed, T min_value, T max_value, T* out) {
+  std::mt19937 gen(seed);
+  std::uniform_int_distribution<T> d(min_value, max_value);
+  for (int i = 0; i < n; ++i) {
+    out[i] = d(gen);
+  }
+}
+
+static inline int bitmap_popcount(const uint8_t* data, int length) {
+  int count = 0;
+  for (int i = 0; i < length; ++i) {
+    // TODO: accelerate this
+    if (util::get_bit(data, i)) ++count;
+  }
+  return count;
+}
+
 static inline int null_count(const std::vector<uint8_t>& nulls) {
   int result = 0;
   for (size_t i = 0; i < nulls.size(); ++i) {
@@ -102,6 +163,7 @@ std::shared_ptr<Buffer> bytes_to_null_buffer(uint8_t* bytes, int length) {
   return out;
 }
 
+} // namespace test
 } // namespace arrow
 
 #endif // ARROW_TEST_UTIL_H_

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 0a2e817..f7f835e 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -24,45 +24,37 @@ namespace arrow {
 
 std::string Field::ToString() const {
   std::stringstream ss;
-  ss << this->name << " " << this->type->ToString();
+  ss << this->name << ": " << this->type->ToString();
+  if (!this->nullable) {
+    ss << " not null";
+  }
   return ss.str();
 }
 
 DataType::~DataType() {}
 
-StringType::StringType(bool nullable)
-    : DataType(LogicalType::STRING, nullable) {}
-
-StringType::StringType(const StringType& other)
-    : StringType(other.nullable) {}
+StringType::StringType() : DataType(Type::STRING) {}
 
 std::string StringType::ToString() const {
   std::string result(name());
-  if (!nullable) {
-    result.append(" not null");
-  }
   return result;
 }
 
 std::string ListType::ToString() const {
   std::stringstream s;
-  s << "list<" << value_type->ToString() << ">";
-  if (!this->nullable) {
-    s << " not null";
-  }
+  s << "list<" << value_field()->ToString() << ">";
   return s.str();
 }
 
 std::string StructType::ToString() const {
   std::stringstream s;
   s << "struct<";
-  for (size_t i = 0; i < fields_.size(); ++i) {
+  for (int i = 0; i < this->num_children(); ++i) {
     if (i > 0) s << ", ";
-    const std::shared_ptr<Field>& field = fields_[i];
+    const std::shared_ptr<Field>& field = this->child(i);
     s << field->name << ": " << field->type->ToString();
   }
   s << ">";
-  if (!nullable) s << " not null";
   return s.str();
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 00b01ea..5984b67 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -18,62 +18,34 @@
 #ifndef ARROW_TYPE_H
 #define ARROW_TYPE_H
 
+#include <cstdint>
 #include <memory>
 #include <string>
 #include <vector>
 
 namespace arrow {
 
-// Physical data type that describes the memory layout of values. See details
-// for each type
-enum class LayoutEnum: char {
-  // A physical type consisting of some non-negative number of bytes
-  BYTE = 0,
-
-  // A physical type consisting of some non-negative number of bits
-  BIT = 1,
-
-  // A parametric variable-length value type. Full specification requires a
-  // child logical type
-  LIST = 2,
-
-  // A collection of multiple equal-length child arrays. Parametric type taking
-  // 1 or more child logical types
-  STRUCT = 3,
-
-  // An array with heterogeneous value types. Parametric types taking 1 or more
-  // child logical types
-  DENSE_UNION = 4,
-  SPARSE_UNION = 5
-};
-
-
-struct LayoutType {
-  LayoutEnum type;
-  explicit LayoutType(LayoutEnum type) : type(type) {}
-};
-
 // Data types in this library are all *logical*. They can be expressed as
 // either a primitive physical type (bytes or bits of some fixed size), a
 // nested type consisting of other data types, or another data type (e.g. a
 // timestamp encoded as an int64)
-struct LogicalType {
+struct Type {
   enum type {
     // A degenerate NULL type represented as 0 bytes/bits
     NA = 0,
 
-    // Little-endian integer types
-    UINT8 = 1,
-    INT8 = 2,
-    UINT16 = 3,
-    INT16 = 4,
-    UINT32 = 5,
-    INT32 = 6,
-    UINT64 = 7,
-    INT64 = 8,
-
     // A boolean value represented as 1 bit
-    BOOL = 9,
+    BOOL = 1,
+
+    // Little-endian integer types
+    UINT8 = 2,
+    INT8 = 3,
+    UINT16 = 4,
+    INT16 = 5,
+    UINT32 = 6,
+    INT32 = 7,
+    UINT64 = 8,
+    INT64 = 9,
 
     // 4-byte floating point value
     FLOAT = 10,
@@ -131,30 +103,38 @@ struct LogicalType {
   };
 };
 
+struct Field;
+
 struct DataType {
-  LogicalType::type type;
-  bool nullable;
+  Type::type type;
 
-  explicit DataType(LogicalType::type type, bool nullable = true) :
-      type(type),
-      nullable(nullable) {}
+  std::vector<std::shared_ptr<Field>> children_;
+
+  explicit DataType(Type::type type) :
+      type(type) {}
 
   virtual ~DataType();
 
   bool Equals(const DataType* other) {
     // Call with a pointer so more friendly to subclasses
-    return this == other || (this->type == other->type &&
-        this->nullable == other->nullable);
+    return this == other || (this->type == other->type);
   }
 
   bool Equals(const std::shared_ptr<DataType>& other) {
     return Equals(other.get());
   }
 
+  const std::shared_ptr<Field>& child(int i) const {
+    return children_[i];
+  }
+
+  int num_children() const {
+    return children_.size();
+  }
+
   virtual std::string ToString() const = 0;
 };
 
-typedef std::shared_ptr<LayoutType> LayoutPtr;
 typedef std::shared_ptr<DataType> TypePtr;
 
 // A field is a piece of metadata that includes (for now) a name and a data
@@ -166,9 +146,13 @@ struct Field {
   // The field's data type
   TypePtr type;
 
-  Field(const std::string& name, const TypePtr& type) :
+  // Fields can be nullable
+  bool nullable;
+
+  Field(const std::string& name, const TypePtr& type, bool nullable = true) :
       name(name),
-      type(type) {}
+      type(type),
+      nullable(nullable) {}
 
   bool operator==(const Field& other) const {
     return this->Equals(other);
@@ -180,6 +164,7 @@ struct Field {
 
   bool Equals(const Field& other) const {
     return (this == &other) || (this->name == other.name &&
+        this->nullable == other.nullable &&
         this->type->Equals(other.type.get()));
   }
 
@@ -187,36 +172,12 @@ struct Field {
     return Equals(*other.get());
   }
 
-  bool nullable() const {
-    return this->type->nullable;
-  }
-
   std::string ToString() const;
 };
 
-struct BytesType : public LayoutType {
-  int size;
-
-  explicit BytesType(int size)
-      : LayoutType(LayoutEnum::BYTE),
-        size(size) {}
-
-  BytesType(const BytesType& other)
-      : BytesType(other.size) {}
-};
-
-struct ListLayoutType : public LayoutType {
-  LayoutPtr value_type;
-
-  explicit ListLayoutType(const LayoutPtr& value_type)
-      : LayoutType(LayoutEnum::BYTE),
-        value_type(value_type) {}
-};
-
 template <typename Derived>
 struct PrimitiveType : public DataType {
-  explicit PrimitiveType(bool nullable = true)
-      : DataType(Derived::type_enum, nullable) {}
+  PrimitiveType() : DataType(Derived::type_enum) {}
 
   std::string ToString() const override;
 };
@@ -224,22 +185,19 @@ struct PrimitiveType : public DataType {
 template <typename Derived>
 inline std::string PrimitiveType<Derived>::ToString() const {
   std::string result(static_cast<const Derived*>(this)->name());
-  if (!nullable) {
-    result.append(" not null");
-  }
   return result;
 }
 
-#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME)          \
-  typedef C_TYPE c_type;                                            \
-  static constexpr LogicalType::type type_enum = LogicalType::ENUM; \
-  static constexpr int size = SIZE;                                 \
-                                                                    \
-  explicit TYPENAME(bool nullable = true)                           \
-      : PrimitiveType<TYPENAME>(nullable) {}                        \
-                                                                    \
-  static const char* name() {                                       \
-    return NAME;                                                    \
+#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME)  \
+  typedef C_TYPE c_type;                                    \
+  static constexpr Type::type type_enum = Type::ENUM;       \
+  static constexpr int size = SIZE;                         \
+                                                            \
+  TYPENAME()                                                \
+      : PrimitiveType<TYPENAME>() {}                        \
+                                                            \
+  static const char* name() {                               \
+    return NAME;                                            \
   }
 
 struct NullType : public PrimitiveType<NullType> {
@@ -292,11 +250,23 @@ struct DoubleType : public PrimitiveType<DoubleType> {
 
 struct ListType : public DataType {
   // List can contain any other logical value type
-  TypePtr value_type;
+  explicit ListType(const std::shared_ptr<DataType>& value_type)
+      : DataType(Type::LIST) {
+    children_ = {std::make_shared<Field>("item", value_type)};
+  }
+
+  explicit ListType(const std::shared_ptr<Field>& value_field)
+      : DataType(Type::LIST) {
+    children_ = {value_field};
+  }
 
-  explicit ListType(const TypePtr& value_type, bool nullable = true)
-      : DataType(LogicalType::LIST, nullable),
-        value_type(value_type) {}
+  const std::shared_ptr<Field>& value_field() const {
+    return children_[0];
+  }
+
+  const std::shared_ptr<DataType>& value_type() const {
+    return children_[0]->type;
+  }
 
   static char const *name() {
     return "list";
@@ -307,9 +277,7 @@ struct ListType : public DataType {
 
 // String is a logical type consisting of a physical list of 1-byte values
 struct StringType : public DataType {
-  explicit StringType(bool nullable = true);
-
-  StringType(const StringType& other);
+  StringType();
 
   static char const *name() {
     return "string";
@@ -319,20 +287,9 @@ struct StringType : public DataType {
 };
 
 struct StructType : public DataType {
-  std::vector<std::shared_ptr<Field> > fields_;
-
-  explicit StructType(const std::vector<std::shared_ptr<Field> >& fields,
-      bool nullable = true)
-      : DataType(LogicalType::STRUCT, nullable) {
-    fields_ = fields;
-  }
-
-  const std::shared_ptr<Field>& field(int i) const {
-    return fields_[i];
-  }
-
-  int num_children() const {
-    return fields_.size();
+  explicit StructType(const std::vector<std::shared_ptr<Field>>& fields)
+      : DataType(Type::STRUCT) {
+    children_ = fields;
   }
 
   std::string ToString() const override;

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt
index 57cabde..595b3be 100644
--- a/cpp/src/arrow/types/CMakeLists.txt
+++ b/cpp/src/arrow/types/CMakeLists.txt
@@ -26,8 +26,6 @@ install(FILES
   construct.h
   datetime.h
   decimal.h
-  floating.h
-  integer.h
   json.h
   list.h
   primitive.h

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/boolean.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/boolean.h b/cpp/src/arrow/types/boolean.h
index a5023d7..1cb91f9 100644
--- a/cpp/src/arrow/types/boolean.h
+++ b/cpp/src/arrow/types/boolean.h
@@ -22,7 +22,7 @@
 
 namespace arrow {
 
-typedef PrimitiveArrayImpl<BooleanType> BooleanArray;
+// typedef PrimitiveArrayImpl<BooleanType> BooleanArray;
 
 class BooleanBuilder : public ArrayBuilder {
 };

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/collection.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/collection.h b/cpp/src/arrow/types/collection.h
index 42a9c92..46d84f1 100644
--- a/cpp/src/arrow/types/collection.h
+++ b/cpp/src/arrow/types/collection.h
@@ -25,7 +25,7 @@
 
 namespace arrow {
 
-template <LogicalType::type T>
+template <Type::type T>
 struct CollectionType : public DataType {
   std::vector<TypePtr> child_types_;
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index 43f01a3..290decd 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -19,24 +19,26 @@
 
 #include <memory>
 
-#include "arrow/types/floating.h"
-#include "arrow/types/integer.h"
+#include "arrow/type.h"
+#include "arrow/types/primitive.h"
 #include "arrow/types/list.h"
 #include "arrow/types/string.h"
+#include "arrow/util/buffer.h"
 #include "arrow/util/status.h"
 
 namespace arrow {
 
 class ArrayBuilder;
 
-// Initially looked at doing this with vtables, but shared pointers makes it
-// difficult
-
 #define BUILDER_CASE(ENUM, BuilderType)         \
-    case LogicalType::ENUM:                     \
+    case Type::ENUM:                            \
       out->reset(new BuilderType(pool, type));  \
       return Status::OK();
 
+// Initially looked at doing this with vtables, but shared pointers makes it
+// difficult
+//
+// TODO(wesm): come up with a less monolithic strategy
 Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
     std::shared_ptr<ArrayBuilder>* out) {
   switch (type->type) {
@@ -56,30 +58,41 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
 
     BUILDER_CASE(STRING, StringBuilder);
 
-    case LogicalType::LIST:
+    case Type::LIST:
       {
         std::shared_ptr<ArrayBuilder> value_builder;
 
         const std::shared_ptr<DataType>& value_type = static_cast<ListType*>(
-            type.get())->value_type;
+            type.get())->value_type();
         RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
         out->reset(new ListBuilder(pool, type, value_builder));
         return Status::OK();
       }
-    // BUILDER_CASE(CHAR, CharBuilder);
-
-    // BUILDER_CASE(VARCHAR, VarcharBuilder);
-    // BUILDER_CASE(BINARY, BinaryBuilder);
-
-    // BUILDER_CASE(DATE, DateBuilder);
-    // BUILDER_CASE(TIMESTAMP, TimestampBuilder);
-    // BUILDER_CASE(TIME, TimeBuilder);
+    default:
+      return Status::NotImplemented(type->ToString());
+  }
+}
 
-    // BUILDER_CASE(LIST, ListBuilder);
-    // BUILDER_CASE(STRUCT, StructBuilder);
-    // BUILDER_CASE(DENSE_UNION, DenseUnionBuilder);
-    // BUILDER_CASE(SPARSE_UNION, SparseUnionBuilder);
+#define MAKE_PRIMITIVE_ARRAY_CASE(ENUM, ArrayType)                      \
+    case Type::ENUM:                                                    \
+      out->reset(new ArrayType(type, length, data, null_count, nulls)); \
+      return Status::OK();
 
+Status MakePrimitiveArray(const std::shared_ptr<DataType>& type,
+    int32_t length, const std::shared_ptr<Buffer>& data,
+    int32_t null_count, const std::shared_ptr<Buffer>& nulls,
+    std::shared_ptr<Array>* out) {
+  switch (type->type) {
+    MAKE_PRIMITIVE_ARRAY_CASE(UINT8, UInt8Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(INT8, Int8Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(UINT16, UInt16Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(INT16, Int16Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(UINT32, UInt32Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(INT32, Int32Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(UINT64, UInt64Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(INT64, Int64Array);
+    MAKE_PRIMITIVE_ARRAY_CASE(FLOAT, FloatArray);
+    MAKE_PRIMITIVE_ARRAY_CASE(DOUBLE, DoubleArray);
     default:
       return Status::NotImplemented(type->ToString());
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/construct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h
index 59ebe1a..089c484 100644
--- a/cpp/src/arrow/types/construct.h
+++ b/cpp/src/arrow/types/construct.h
@@ -18,19 +18,26 @@
 #ifndef ARROW_TYPES_CONSTRUCT_H
 #define ARROW_TYPES_CONSTRUCT_H
 
+#include <cstdint>
 #include <memory>
 
-#include "arrow/type.h"
-
 namespace arrow {
 
+class Array;
 class ArrayBuilder;
+class Buffer;
+struct DataType;
 class MemoryPool;
 class Status;
 
 Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
     std::shared_ptr<ArrayBuilder>* out);
 
+Status MakePrimitiveArray(const std::shared_ptr<DataType>& type,
+    int32_t length, const std::shared_ptr<Buffer>& data,
+    int32_t null_count, const std::shared_ptr<Buffer>& nulls,
+    std::shared_ptr<Array>* out);
+
 } // namespace arrow
 
 #endif // ARROW_BUILDER_H_

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/datetime.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/datetime.h b/cpp/src/arrow/types/datetime.h
index 765fc29..e57b66a 100644
--- a/cpp/src/arrow/types/datetime.h
+++ b/cpp/src/arrow/types/datetime.h
@@ -31,8 +31,8 @@ struct DateType : public DataType {
 
   Unit unit;
 
-  explicit DateType(Unit unit = Unit::DAY, bool nullable = true)
-      : DataType(LogicalType::DATE, nullable),
+  explicit DateType(Unit unit = Unit::DAY)
+      : DataType(Type::DATE),
         unit(unit) {}
 
   DateType(const DateType& other)
@@ -41,10 +41,6 @@ struct DateType : public DataType {
   static char const *name() {
     return "date";
   }
-
-  // virtual std::string ToString() {
-  //   return name();
-  // }
 };
 
 
@@ -58,8 +54,8 @@ struct TimestampType : public DataType {
 
   Unit unit;
 
-  explicit TimestampType(Unit unit = Unit::MILLI, bool nullable = true)
-      : DataType(LogicalType::TIMESTAMP, nullable),
+  explicit TimestampType(Unit unit = Unit::MILLI)
+      : DataType(Type::TIMESTAMP),
         unit(unit) {}
 
   TimestampType(const TimestampType& other)
@@ -68,10 +64,6 @@ struct TimestampType : public DataType {
   static char const *name() {
     return "timestamp";
   }
-
-  // virtual std::string ToString() {
-  //   return name();
-  // }
 };
 
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/floating.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/floating.cc b/cpp/src/arrow/types/floating.cc
deleted file mode 100644
index bde2826..0000000
--- a/cpp/src/arrow/types/floating.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/types/floating.h"
-
-namespace arrow {
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/65db0da8/cpp/src/arrow/types/floating.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/floating.h b/cpp/src/arrow/types/floating.h
deleted file mode 100644
index e752278..0000000
--- a/cpp/src/arrow/types/floating.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_FLOATING_H
-#define ARROW_TYPES_FLOATING_H
-
-#include <string>
-
-#include "arrow/types/primitive.h"
-#include "arrow/type.h"
-
-namespace arrow {
-
-typedef PrimitiveArrayImpl<FloatType> FloatArray;
-typedef PrimitiveArrayImpl<DoubleType> DoubleArray;
-
-typedef PrimitiveBuilder<FloatType, FloatArray> FloatBuilder;
-typedef PrimitiveBuilder<DoubleType, DoubleArray> DoubleBuilder;
-
-} // namespace arrow
-
-#endif // ARROW_TYPES_FLOATING_H