You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/11/18 19:58:59 UTC

[1/2] arrow git commit: ARROW-373: [C++] JSON serialization format for testing

Repository: arrow
Updated Branches:
  refs/heads/master 841709627 -> ed6ec3b76


http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 4fd50b7..589bdad 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -20,6 +20,8 @@
 #include <sstream>
 #include <string>
 
+#include "arrow/util/status.h"
+
 namespace arrow {
 
 std::string Field::ToString() const {
@@ -44,9 +46,24 @@ bool DataType::Equals(const DataType* other) const {
   return equals;
 }
 
+std::string BooleanType::ToString() const {
+  return name();
+}
+
+FloatingPointMeta::Precision HalfFloatType::precision() const {
+  return FloatingPointMeta::HALF;
+}
+
+FloatingPointMeta::Precision FloatType::precision() const {
+  return FloatingPointMeta::SINGLE;
+}
+
+FloatingPointMeta::Precision DoubleType::precision() const {
+  return FloatingPointMeta::DOUBLE;
+}
+
 std::string StringType::ToString() const {
-  std::string result(name());
-  return result;
+  return std::string("string");
 }
 
 std::string ListType::ToString() const {
@@ -56,7 +73,7 @@ std::string ListType::ToString() const {
 }
 
 std::string BinaryType::ToString() const {
-  return std::string(name());
+  return std::string("binary");
 }
 
 std::string StructType::ToString() const {
@@ -71,4 +88,103 @@ std::string StructType::ToString() const {
   return s.str();
 }
 
+std::string UnionType::ToString() const {
+  std::stringstream s;
+
+  if (mode == UnionMode::SPARSE) {
+    s << "union[sparse]<";
+  } else {
+    s << "union[dense]<";
+  }
+
+  for (size_t i = 0; i < children_.size(); ++i) {
+    if (i) { s << ", "; }
+    s << children_[i]->ToString();
+  }
+  s << ">";
+  return s.str();
+}
+
+int NullType::bit_width() const {
+  return 0;
+}
+
+std::string NullType::ToString() const {
+  return name();
+}
+
+// Visitors and template instantiation
+
+#define ACCEPT_VISITOR(TYPE) \
+  Status TYPE::Accept(TypeVisitor* visitor) const { return visitor->Visit(*this); }
+
+ACCEPT_VISITOR(NullType);
+ACCEPT_VISITOR(BooleanType);
+ACCEPT_VISITOR(BinaryType);
+ACCEPT_VISITOR(StringType);
+ACCEPT_VISITOR(ListType);
+ACCEPT_VISITOR(StructType);
+ACCEPT_VISITOR(DecimalType);
+ACCEPT_VISITOR(UnionType);
+ACCEPT_VISITOR(DateType);
+ACCEPT_VISITOR(TimeType);
+ACCEPT_VISITOR(TimestampType);
+ACCEPT_VISITOR(IntervalType);
+
+#define TYPE_FACTORY(NAME, KLASS)                                        \
+  std::shared_ptr<DataType> NAME() {                                     \
+    static std::shared_ptr<DataType> result = std::make_shared<KLASS>(); \
+    return result;                                                       \
+  }
+
+TYPE_FACTORY(null, NullType);
+TYPE_FACTORY(boolean, BooleanType);
+TYPE_FACTORY(int8, Int8Type);
+TYPE_FACTORY(uint8, UInt8Type);
+TYPE_FACTORY(int16, Int16Type);
+TYPE_FACTORY(uint16, UInt16Type);
+TYPE_FACTORY(int32, Int32Type);
+TYPE_FACTORY(uint32, UInt32Type);
+TYPE_FACTORY(int64, Int64Type);
+TYPE_FACTORY(uint64, UInt64Type);
+TYPE_FACTORY(float16, HalfFloatType);
+TYPE_FACTORY(float32, FloatType);
+TYPE_FACTORY(float64, DoubleType);
+TYPE_FACTORY(utf8, StringType);
+TYPE_FACTORY(binary, BinaryType);
+TYPE_FACTORY(date, DateType);
+
+std::shared_ptr<DataType> timestamp(TimeUnit unit) {
+  static std::shared_ptr<DataType> result = std::make_shared<TimestampType>();
+  return result;
+}
+
+std::shared_ptr<DataType> time(TimeUnit unit) {
+  static std::shared_ptr<DataType> result = std::make_shared<TimeType>();
+  return result;
+}
+
+std::shared_ptr<DataType> list(const std::shared_ptr<DataType>& value_type) {
+  return std::make_shared<ListType>(value_type);
+}
+
+std::shared_ptr<DataType> list(const std::shared_ptr<Field>& value_field) {
+  return std::make_shared<ListType>(value_field);
+}
+
+std::shared_ptr<DataType> struct_(const std::vector<std::shared_ptr<Field>>& fields) {
+  return std::make_shared<StructType>(fields);
+}
+
+std::shared_ptr<DataType> ARROW_EXPORT union_(
+    const std::vector<std::shared_ptr<Field>>& child_fields,
+    const std::vector<uint8_t>& type_ids, UnionMode mode) {
+  return std::make_shared<UnionType>(child_fields, type_ids, mode);
+}
+
+std::shared_ptr<Field> field(
+    const std::string& name, const TypePtr& type, bool nullable, int64_t dictionary) {
+  return std::make_shared<Field>(name, type, nullable, dictionary);
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index ea8516f..5b4d7bc 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -23,7 +23,9 @@
 #include <string>
 #include <vector>
 
+#include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/status.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -50,17 +52,20 @@ struct Type {
     UINT64 = 8,
     INT64 = 9,
 
+    // 2-byte floating point value
+    HALF_FLOAT = 10,
+
     // 4-byte floating point value
-    FLOAT = 10,
+    FLOAT = 11,
 
     // 8-byte floating point value
-    DOUBLE = 11,
+    DOUBLE = 12,
 
     // UTF8 variable-length string as List<Char>
     STRING = 13,
 
     // Variable-length bytes (no guarantee of UTF8-ness)
-    BINARY = 15,
+    BINARY = 14,
 
     // By default, int32 days since the UNIX epoch
     DATE = 16,
@@ -69,19 +74,16 @@ struct Type {
     // Default unit millisecond
     TIMESTAMP = 17,
 
-    // Timestamp as double seconds since the UNIX epoch
-    TIMESTAMP_DOUBLE = 18,
-
     // Exact time encoded with int64, default unit millisecond
-    TIME = 19,
+    TIME = 18,
+
+    // YEAR_MONTH or DAY_TIME interval in SQL style
+    INTERVAL = 19,
 
     // Precision- and scale-based decimal type. Storage type depends on the
     // parameters.
     DECIMAL = 20,
 
-    // Decimal value encoded as a text string
-    DECIMAL_TEXT = 21,
-
     // A list of some logical data type
     LIST = 30,
 
@@ -89,19 +91,16 @@ struct Type {
     STRUCT = 31,
 
     // Unions of logical types
-    DENSE_UNION = 32,
-    SPARSE_UNION = 33,
+    UNION = 32,
 
-    // Union<Null, Int32, Double, String, Bool>
-    JSON_SCALAR = 50,
+    // Timestamp as double seconds since the UNIX epoch
+    TIMESTAMP_DOUBLE = 33,
 
-    // User-defined type
-    USER = 60
+    // Decimal value encoded as a text string
+    DECIMAL_TEXT = 34,
   };
 };
 
-struct Field;
-
 struct ARROW_EXPORT DataType {
   Type::type type;
 
@@ -123,15 +122,32 @@ struct ARROW_EXPORT DataType {
 
   const std::shared_ptr<Field>& child(int i) const { return children_[i]; }
 
+  const std::vector<std::shared_ptr<Field>>& children() const { return children_; }
+
   int num_children() const { return children_.size(); }
 
-  virtual int value_size() const { return -1; }
+  virtual Status Accept(TypeVisitor* visitor) const = 0;
 
   virtual std::string ToString() const = 0;
 };
 
 typedef std::shared_ptr<DataType> TypePtr;
 
+struct ARROW_EXPORT FixedWidthMeta {
+  virtual int bit_width() const = 0;
+};
+
+struct ARROW_EXPORT IntegerMeta {
+  virtual bool is_signed() const = 0;
+};
+
+struct ARROW_EXPORT FloatingPointMeta {
+  enum Precision { HALF, SINGLE, DOUBLE };
+  virtual Precision precision() const = 0;
+};
+
+struct NoExtraMeta {};
+
 // A field is a piece of metadata that includes (for now) a name and a data
 // type
 struct ARROW_EXPORT Field {
@@ -139,7 +155,7 @@ struct ARROW_EXPORT Field {
   std::string name;
 
   // The field's data type
-  TypePtr type;
+  std::shared_ptr<DataType> type;
 
   // Fields can be nullable
   bool nullable;
@@ -148,8 +164,8 @@ struct ARROW_EXPORT Field {
   // 0 means it's not dictionary encoded
   int64_t dictionary;
 
-  Field(const std::string& name, const TypePtr& type, bool nullable = true,
-      int64_t dictionary = 0)
+  Field(const std::string& name, const std::shared_ptr<DataType>& type,
+      bool nullable = true, int64_t dictionary = 0)
       : name(name), type(type), nullable(nullable), dictionary(dictionary) {}
 
   bool operator==(const Field& other) const { return this->Equals(other); }
@@ -168,78 +184,112 @@ struct ARROW_EXPORT Field {
 };
 typedef std::shared_ptr<Field> FieldPtr;
 
-template <typename Derived>
-struct ARROW_EXPORT PrimitiveType : public DataType {
-  PrimitiveType() : DataType(Derived::type_enum) {}
+struct PrimitiveCType : public DataType {
+  using DataType::DataType;
+};
+
+template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
+struct ARROW_EXPORT CTypeImpl : public PrimitiveCType, public FixedWidthMeta {
+  using c_type = C_TYPE;
+  static constexpr Type::type type_id = TYPE_ID;
+
+  CTypeImpl() : PrimitiveCType(TYPE_ID) {}
 
+  int bit_width() const override { return sizeof(C_TYPE) * 8; }
+
+  Status Accept(TypeVisitor* visitor) const override {
+    return visitor->Visit(*static_cast<const DERIVED*>(this));
+  }
+
+  std::string ToString() const override { return std::string(DERIVED::name()); }
+};
+
+struct ARROW_EXPORT NullType : public DataType, public FixedWidthMeta {
+  static constexpr Type::type type_id = Type::NA;
+
+  NullType() : DataType(Type::NA) {}
+
+  int bit_width() const override;
+  Status Accept(TypeVisitor* visitor) const override;
   std::string ToString() const override;
+
+  static std::string name() { return "null"; }
+};
+
+template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
+struct IntegerTypeImpl : public CTypeImpl<DERIVED, TYPE_ID, C_TYPE>, public IntegerMeta {
+  bool is_signed() const override { return std::is_signed<C_TYPE>::value; }
 };
 
-template <typename Derived>
-inline std::string PrimitiveType<Derived>::ToString() const {
-  std::string result(static_cast<const Derived*>(this)->name());
-  return result;
-}
+struct ARROW_EXPORT BooleanType : public DataType, FixedWidthMeta {
+  static constexpr Type::type type_id = Type::BOOL;
 
-#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
-  typedef C_TYPE c_type;                                   \
-  static constexpr Type::type type_enum = Type::ENUM;      \
-                                                           \
-  TYPENAME() : PrimitiveType<TYPENAME>() {}                \
-                                                           \
-  virtual int value_size() const { return SIZE; }          \
-                                                           \
-  static const char* name() { return NAME; }
+  BooleanType() : DataType(Type::BOOL) {}
 
-struct ARROW_EXPORT NullType : public PrimitiveType<NullType> {
-  PRIMITIVE_DECL(NullType, void, NA, 0, "null");
+  Status Accept(TypeVisitor* visitor) const override;
+  std::string ToString() const override;
+
+  int bit_width() const override { return 1; }
+  static std::string name() { return "bool"; }
 };
 
-struct ARROW_EXPORT BooleanType : public PrimitiveType<BooleanType> {
-  PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool");
+struct ARROW_EXPORT UInt8Type : public IntegerTypeImpl<UInt8Type, Type::UINT8, uint8_t> {
+  static std::string name() { return "uint8"; }
 };
 
-struct ARROW_EXPORT UInt8Type : public PrimitiveType<UInt8Type> {
-  PRIMITIVE_DECL(UInt8Type, uint8_t, UINT8, 1, "uint8");
+struct ARROW_EXPORT Int8Type : public IntegerTypeImpl<Int8Type, Type::INT8, int8_t> {
+  static std::string name() { return "int8"; }
 };
 
-struct ARROW_EXPORT Int8Type : public PrimitiveType<Int8Type> {
-  PRIMITIVE_DECL(Int8Type, int8_t, INT8, 1, "int8");
+struct ARROW_EXPORT UInt16Type
+    : public IntegerTypeImpl<UInt16Type, Type::UINT16, uint16_t> {
+  static std::string name() { return "uint16"; }
 };
 
-struct ARROW_EXPORT UInt16Type : public PrimitiveType<UInt16Type> {
-  PRIMITIVE_DECL(UInt16Type, uint16_t, UINT16, 2, "uint16");
+struct ARROW_EXPORT Int16Type : public IntegerTypeImpl<Int16Type, Type::INT16, int16_t> {
+  static std::string name() { return "int16"; }
 };
 
-struct ARROW_EXPORT Int16Type : public PrimitiveType<Int16Type> {
-  PRIMITIVE_DECL(Int16Type, int16_t, INT16, 2, "int16");
+struct ARROW_EXPORT UInt32Type
+    : public IntegerTypeImpl<UInt32Type, Type::UINT32, uint32_t> {
+  static std::string name() { return "uint32"; }
 };
 
-struct ARROW_EXPORT UInt32Type : public PrimitiveType<UInt32Type> {
-  PRIMITIVE_DECL(UInt32Type, uint32_t, UINT32, 4, "uint32");
+struct ARROW_EXPORT Int32Type : public IntegerTypeImpl<Int32Type, Type::INT32, int32_t> {
+  static std::string name() { return "int32"; }
 };
 
-struct ARROW_EXPORT Int32Type : public PrimitiveType<Int32Type> {
-  PRIMITIVE_DECL(Int32Type, int32_t, INT32, 4, "int32");
+struct ARROW_EXPORT UInt64Type
+    : public IntegerTypeImpl<UInt64Type, Type::UINT64, uint64_t> {
+  static std::string name() { return "uint64"; }
 };
 
-struct ARROW_EXPORT UInt64Type : public PrimitiveType<UInt64Type> {
-  PRIMITIVE_DECL(UInt64Type, uint64_t, UINT64, 8, "uint64");
+struct ARROW_EXPORT Int64Type : public IntegerTypeImpl<Int64Type, Type::INT64, int64_t> {
+  static std::string name() { return "int64"; }
 };
 
-struct ARROW_EXPORT Int64Type : public PrimitiveType<Int64Type> {
-  PRIMITIVE_DECL(Int64Type, int64_t, INT64, 8, "int64");
+struct ARROW_EXPORT HalfFloatType
+    : public CTypeImpl<HalfFloatType, Type::HALF_FLOAT, uint16_t>,
+      public FloatingPointMeta {
+  Precision precision() const override;
+  static std::string name() { return "halffloat"; }
 };
 
-struct ARROW_EXPORT FloatType : public PrimitiveType<FloatType> {
-  PRIMITIVE_DECL(FloatType, float, FLOAT, 4, "float");
+struct ARROW_EXPORT FloatType : public CTypeImpl<FloatType, Type::FLOAT, float>,
+                                public FloatingPointMeta {
+  Precision precision() const override;
+  static std::string name() { return "float"; }
 };
 
-struct ARROW_EXPORT DoubleType : public PrimitiveType<DoubleType> {
-  PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double");
+struct ARROW_EXPORT DoubleType : public CTypeImpl<DoubleType, Type::DOUBLE, double>,
+                                 public FloatingPointMeta {
+  Precision precision() const override;
+  static std::string name() { return "double"; }
 };
 
-struct ARROW_EXPORT ListType : public DataType {
+struct ARROW_EXPORT ListType : public DataType, public NoExtraMeta {
+  static constexpr Type::type type_id = Type::LIST;
+
   // List can contain any other logical value type
   explicit ListType(const std::shared_ptr<DataType>& value_type)
       : ListType(std::make_shared<Field>("item", value_type)) {}
@@ -252,16 +302,21 @@ struct ARROW_EXPORT ListType : public DataType {
 
   const std::shared_ptr<DataType>& value_type() const { return children_[0]->type; }
 
-  static char const* name() { return "list"; }
-
+  Status Accept(TypeVisitor* visitor) const override;
   std::string ToString() const override;
+
+  static std::string name() { return "list"; }
 };
 
 // BinaryType type is reprsents lists of 1-byte values.
-struct ARROW_EXPORT BinaryType : public DataType {
+struct ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
+  static constexpr Type::type type_id = Type::BINARY;
+
   BinaryType() : BinaryType(Type::BINARY) {}
-  static char const* name() { return "binary"; }
+
+  Status Accept(TypeVisitor* visitor) const override;
   std::string ToString() const override;
+  static std::string name() { return "binary"; }
 
  protected:
   // Allow subclasses to change the logical type.
@@ -270,25 +325,160 @@ struct ARROW_EXPORT BinaryType : public DataType {
 
 // UTF encoded strings
 struct ARROW_EXPORT StringType : public BinaryType {
-  StringType() : BinaryType(Type::STRING) {}
+  static constexpr Type::type type_id = Type::STRING;
 
-  static char const* name() { return "string"; }
+  StringType() : BinaryType(Type::STRING) {}
 
+  Status Accept(TypeVisitor* visitor) const override;
   std::string ToString() const override;
+  static std::string name() { return "utf8"; }
 };
 
-struct ARROW_EXPORT StructType : public DataType {
+struct ARROW_EXPORT StructType : public DataType, public NoExtraMeta {
+  static constexpr Type::type type_id = Type::STRUCT;
+
   explicit StructType(const std::vector<std::shared_ptr<Field>>& fields)
       : DataType(Type::STRUCT) {
     children_ = fields;
   }
 
+  Status Accept(TypeVisitor* visitor) const override;
   std::string ToString() const override;
+  static std::string name() { return "struct"; }
+};
+
+struct ARROW_EXPORT DecimalType : public DataType {
+  static constexpr Type::type type_id = Type::DECIMAL;
+
+  explicit DecimalType(int precision_, int scale_)
+      : DataType(Type::DECIMAL), precision(precision_), scale(scale_) {}
+  int precision;
+  int scale;
+
+  Status Accept(TypeVisitor* visitor) const override;
+  std::string ToString() const override;
+  static std::string name() { return "decimal"; }
+};
+
+enum class UnionMode : char { SPARSE, DENSE };
+
+struct ARROW_EXPORT UnionType : public DataType {
+  static constexpr Type::type type_id = Type::UNION;
+
+  UnionType(const std::vector<std::shared_ptr<Field>>& child_fields,
+      const std::vector<uint8_t>& type_ids, UnionMode mode = UnionMode::SPARSE)
+      : DataType(Type::UNION), mode(mode), type_ids(type_ids) {
+    children_ = child_fields;
+  }
+
+  std::string ToString() const override;
+  static std::string name() { return "union"; }
+  Status Accept(TypeVisitor* visitor) const override;
+
+  UnionMode mode;
+  std::vector<uint8_t> type_ids;
+};
+
+struct ARROW_EXPORT DateType : public DataType, public NoExtraMeta {
+  static constexpr Type::type type_id = Type::DATE;
+
+  DateType() : DataType(Type::DATE) {}
+
+  Status Accept(TypeVisitor* visitor) const override;
+  std::string ToString() const override { return name(); }
+  static std::string name() { return "date"; }
+};
+
+enum class TimeUnit : char { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
+
+struct ARROW_EXPORT TimeType : public DataType {
+  static constexpr Type::type type_id = Type::TIME;
+  using Unit = TimeUnit;
+
+  TimeUnit unit;
+
+  explicit TimeType(TimeUnit unit = TimeUnit::MILLI) : DataType(Type::TIME), unit(unit) {}
+  TimeType(const TimeType& other) : TimeType(other.unit) {}
+
+  Status Accept(TypeVisitor* visitor) const override;
+  std::string ToString() const override { return name(); }
+  static std::string name() { return "time"; }
+};
+
+struct ARROW_EXPORT TimestampType : public DataType, public FixedWidthMeta {
+  using Unit = TimeUnit;
+
+  typedef int64_t c_type;
+  static constexpr Type::type type_id = Type::TIMESTAMP;
+
+  int bit_width() const override { return sizeof(int64_t) * 8; }
+
+  TimeUnit unit;
+
+  explicit TimestampType(TimeUnit unit = TimeUnit::MILLI)
+      : DataType(Type::TIMESTAMP), unit(unit) {}
+
+  TimestampType(const TimestampType& other) : TimestampType(other.unit) {}
+
+  Status Accept(TypeVisitor* visitor) const override;
+  std::string ToString() const override { return name(); }
+  static std::string name() { return "timestamp"; }
+};
+
+struct ARROW_EXPORT IntervalType : public DataType, public FixedWidthMeta {
+  enum class Unit : char { YEAR_MONTH = 0, DAY_TIME = 1 };
+
+  typedef int64_t c_type;
+  static constexpr Type::type type_id = Type::INTERVAL;
+
+  int bit_width() const override { return sizeof(int64_t) * 8; }
+
+  Unit unit;
+
+  explicit IntervalType(Unit unit = Unit::YEAR_MONTH)
+      : DataType(Type::INTERVAL), unit(unit) {}
+
+  IntervalType(const IntervalType& other) : IntervalType(other.unit) {}
+
+  Status Accept(TypeVisitor* visitor) const override;
+  std::string ToString() const override { return name(); }
+  static std::string name() { return "date"; }
 };
 
-// These will be defined elsewhere
-template <typename T>
-struct TypeTraits {};
+// Factory functions
+
+std::shared_ptr<DataType> ARROW_EXPORT null();
+std::shared_ptr<DataType> ARROW_EXPORT boolean();
+std::shared_ptr<DataType> ARROW_EXPORT int8();
+std::shared_ptr<DataType> ARROW_EXPORT int16();
+std::shared_ptr<DataType> ARROW_EXPORT int32();
+std::shared_ptr<DataType> ARROW_EXPORT int64();
+std::shared_ptr<DataType> ARROW_EXPORT uint8();
+std::shared_ptr<DataType> ARROW_EXPORT uint16();
+std::shared_ptr<DataType> ARROW_EXPORT uint32();
+std::shared_ptr<DataType> ARROW_EXPORT uint64();
+std::shared_ptr<DataType> ARROW_EXPORT float16();
+std::shared_ptr<DataType> ARROW_EXPORT float32();
+std::shared_ptr<DataType> ARROW_EXPORT float64();
+std::shared_ptr<DataType> ARROW_EXPORT utf8();
+std::shared_ptr<DataType> ARROW_EXPORT binary();
+
+std::shared_ptr<DataType> ARROW_EXPORT list(const std::shared_ptr<Field>& value_type);
+std::shared_ptr<DataType> ARROW_EXPORT list(const std::shared_ptr<DataType>& value_type);
+
+std::shared_ptr<DataType> ARROW_EXPORT date();
+std::shared_ptr<DataType> ARROW_EXPORT timestamp(TimeUnit unit);
+std::shared_ptr<DataType> ARROW_EXPORT time(TimeUnit unit);
+
+std::shared_ptr<DataType> ARROW_EXPORT struct_(
+    const std::vector<std::shared_ptr<Field>>& fields);
+
+std::shared_ptr<DataType> ARROW_EXPORT union_(
+    const std::vector<std::shared_ptr<Field>>& child_fields,
+    const std::vector<uint8_t>& type_ids, UnionMode mode = UnionMode::SPARSE);
+
+std::shared_ptr<Field> ARROW_EXPORT field(const std::string& name,
+    const std::shared_ptr<DataType>& type, bool nullable = true, int64_t dictionary = 0);
 
 }  // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/type_fwd.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
new file mode 100644
index 0000000..6d660f4
--- /dev/null
+++ b/cpp/src/arrow/type_fwd.h
@@ -0,0 +1,157 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TYPE_FWD_H
+#define ARROW_TYPE_FWD_H
+
+namespace arrow {
+
+class Status;
+
+struct DataType;
+class Array;
+class ArrayBuilder;
+struct Field;
+
+class Buffer;
+class MemoryPool;
+class RecordBatch;
+class Schema;
+
+struct NullType;
+class NullArray;
+
+struct BooleanType;
+class BooleanArray;
+class BooleanBuilder;
+
+struct BinaryType;
+class BinaryArray;
+class BinaryBuilder;
+
+struct StringType;
+class StringArray;
+class StringBuilder;
+
+struct ListType;
+class ListArray;
+class ListBuilder;
+
+struct StructType;
+class StructArray;
+class StructBuilder;
+
+struct DecimalType;
+class DecimalArray;
+
+struct UnionType;
+class UnionArray;
+
+template <typename TypeClass>
+class NumericArray;
+
+template <typename TypeClass>
+class NumericBuilder;
+
+#define _NUMERIC_TYPE_DECL(KLASS)                 \
+  struct KLASS##Type;                             \
+  using KLASS##Array = NumericArray<KLASS##Type>; \
+  using KLASS##Builder = NumericBuilder<KLASS##Type>;
+
+_NUMERIC_TYPE_DECL(Int8);
+_NUMERIC_TYPE_DECL(Int16);
+_NUMERIC_TYPE_DECL(Int32);
+_NUMERIC_TYPE_DECL(Int64);
+_NUMERIC_TYPE_DECL(UInt8);
+_NUMERIC_TYPE_DECL(UInt16);
+_NUMERIC_TYPE_DECL(UInt32);
+_NUMERIC_TYPE_DECL(UInt64);
+_NUMERIC_TYPE_DECL(HalfFloat);
+_NUMERIC_TYPE_DECL(Float);
+_NUMERIC_TYPE_DECL(Double);
+
+#undef _NUMERIC_TYPE_DECL
+
+struct DateType;
+class DateArray;
+
+struct TimeType;
+class TimeArray;
+
+struct TimestampType;
+using TimestampArray = NumericArray<TimestampType>;
+
+struct IntervalType;
+using IntervalArray = NumericArray<IntervalType>;
+
+class TypeVisitor {
+ public:
+  virtual Status Visit(const NullType& type) = 0;
+  virtual Status Visit(const BooleanType& type) = 0;
+  virtual Status Visit(const Int8Type& type) = 0;
+  virtual Status Visit(const Int16Type& type) = 0;
+  virtual Status Visit(const Int32Type& type) = 0;
+  virtual Status Visit(const Int64Type& type) = 0;
+  virtual Status Visit(const UInt8Type& type) = 0;
+  virtual Status Visit(const UInt16Type& type) = 0;
+  virtual Status Visit(const UInt32Type& type) = 0;
+  virtual Status Visit(const UInt64Type& type) = 0;
+  virtual Status Visit(const HalfFloatType& type) = 0;
+  virtual Status Visit(const FloatType& type) = 0;
+  virtual Status Visit(const DoubleType& type) = 0;
+  virtual Status Visit(const StringType& type) = 0;
+  virtual Status Visit(const BinaryType& type) = 0;
+  virtual Status Visit(const DateType& type) = 0;
+  virtual Status Visit(const TimeType& type) = 0;
+  virtual Status Visit(const TimestampType& type) = 0;
+  virtual Status Visit(const IntervalType& type) = 0;
+  virtual Status Visit(const DecimalType& type) = 0;
+  virtual Status Visit(const ListType& type) = 0;
+  virtual Status Visit(const StructType& type) = 0;
+  virtual Status Visit(const UnionType& type) = 0;
+};
+
+class ArrayVisitor {
+ public:
+  virtual Status Visit(const NullArray& array) = 0;
+  virtual Status Visit(const BooleanArray& array) = 0;
+  virtual Status Visit(const Int8Array& array) = 0;
+  virtual Status Visit(const Int16Array& array) = 0;
+  virtual Status Visit(const Int32Array& array) = 0;
+  virtual Status Visit(const Int64Array& array) = 0;
+  virtual Status Visit(const UInt8Array& array) = 0;
+  virtual Status Visit(const UInt16Array& array) = 0;
+  virtual Status Visit(const UInt32Array& array) = 0;
+  virtual Status Visit(const UInt64Array& array) = 0;
+  virtual Status Visit(const HalfFloatArray& array) = 0;
+  virtual Status Visit(const FloatArray& array) = 0;
+  virtual Status Visit(const DoubleArray& array) = 0;
+  virtual Status Visit(const StringArray& array) = 0;
+  virtual Status Visit(const BinaryArray& array) = 0;
+  virtual Status Visit(const DateArray& array) = 0;
+  virtual Status Visit(const TimeArray& array) = 0;
+  virtual Status Visit(const TimestampArray& array) = 0;
+  virtual Status Visit(const IntervalArray& array) = 0;
+  virtual Status Visit(const DecimalArray& array) = 0;
+  virtual Status Visit(const ListArray& array) = 0;
+  virtual Status Visit(const StructArray& array) = 0;
+  virtual Status Visit(const UnionArray& array) = 0;
+};
+
+}  // namespace arrow
+
+#endif  // ARROW_TYPE_FWD_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/type_traits.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
new file mode 100644
index 0000000..bbb8074
--- /dev/null
+++ b/cpp/src/arrow/type_traits.h
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_TYPE_TRAITS_H
+#define ARROW_TYPE_TRAITS_H
+
+#include <type_traits>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/bit-util.h"
+
+namespace arrow {
+
+template <typename T>
+struct TypeTraits {};
+
+template <>
+struct TypeTraits<UInt8Type> {
+  using ArrayType = UInt8Array;
+  using BuilderType = UInt8Builder;
+  static inline int bytes_required(int elements) { return elements; }
+};
+
+template <>
+struct TypeTraits<Int8Type> {
+  using ArrayType = Int8Array;
+  using BuilderType = Int8Builder;
+  static inline int bytes_required(int elements) { return elements; }
+};
+
+template <>
+struct TypeTraits<UInt16Type> {
+  using ArrayType = UInt16Array;
+  using BuilderType = UInt16Builder;
+
+  static inline int bytes_required(int elements) { return elements * sizeof(uint16_t); }
+};
+
+template <>
+struct TypeTraits<Int16Type> {
+  using ArrayType = Int16Array;
+  using BuilderType = Int16Builder;
+
+  static inline int bytes_required(int elements) { return elements * sizeof(int16_t); }
+};
+
+template <>
+struct TypeTraits<UInt32Type> {
+  using ArrayType = UInt32Array;
+  using BuilderType = UInt32Builder;
+
+  static inline int bytes_required(int elements) { return elements * sizeof(uint32_t); }
+};
+
+template <>
+struct TypeTraits<Int32Type> {
+  using ArrayType = Int32Array;
+  using BuilderType = Int32Builder;
+
+  static inline int bytes_required(int elements) { return elements * sizeof(int32_t); }
+};
+
+template <>
+struct TypeTraits<UInt64Type> {
+  using ArrayType = UInt64Array;
+  using BuilderType = UInt64Builder;
+
+  static inline int bytes_required(int elements) { return elements * sizeof(uint64_t); }
+};
+
+template <>
+struct TypeTraits<Int64Type> {
+  using ArrayType = Int64Array;
+  using BuilderType = Int64Builder;
+
+  static inline int bytes_required(int elements) { return elements * sizeof(int64_t); }
+};
+
+template <>
+struct TypeTraits<TimestampType> {
+  using ArrayType = TimestampArray;
+  // using BuilderType = TimestampBuilder;
+
+  static inline int bytes_required(int elements) { return elements * sizeof(int64_t); }
+};
+
+template <>
+struct TypeTraits<HalfFloatType> {
+  using ArrayType = HalfFloatArray;
+  using BuilderType = HalfFloatBuilder;
+
+  static inline int bytes_required(int elements) { return elements * sizeof(uint16_t); }
+};
+
+template <>
+struct TypeTraits<FloatType> {
+  using ArrayType = FloatArray;
+  using BuilderType = FloatBuilder;
+
+  static inline int bytes_required(int elements) { return elements * sizeof(float); }
+};
+
+template <>
+struct TypeTraits<DoubleType> {
+  using ArrayType = DoubleArray;
+  using BuilderType = DoubleBuilder;
+
+  static inline int bytes_required(int elements) { return elements * sizeof(double); }
+};
+
+template <>
+struct TypeTraits<BooleanType> {
+  using ArrayType = BooleanArray;
+  using BuilderType = BooleanBuilder;
+
+  static inline int bytes_required(int elements) {
+    return BitUtil::BytesForBits(elements);
+  }
+};
+
+template <>
+struct TypeTraits<StringType> {
+  using ArrayType = StringArray;
+  using BuilderType = StringBuilder;
+};
+
+template <>
+struct TypeTraits<BinaryType> {
+  using ArrayType = BinaryArray;
+  using BuilderType = BinaryBuilder;
+};
+
+// Not all type classes have a c_type
+template <typename T>
+struct as_void {
+  using type = void;
+};
+
+// The partial specialization will match if T has the ATTR_NAME member
+#define GET_ATTR(ATTR_NAME, DEFAULT)                                             \
+  template <typename T, typename Enable = void>                                  \
+  struct GetAttr_##ATTR_NAME {                                                   \
+    using type = DEFAULT;                                                        \
+  };                                                                             \
+                                                                                 \
+  template <typename T>                                                          \
+  struct GetAttr_##ATTR_NAME<T, typename as_void<typename T::ATTR_NAME>::type> { \
+    using type = typename T::ATTR_NAME;                                          \
+  };
+
+GET_ATTR(c_type, void);
+GET_ATTR(TypeClass, void);
+
+#undef GET_ATTR
+
+#define PRIMITIVE_TRAITS(T)                                                           \
+  using TypeClass = typename std::conditional<std::is_base_of<DataType, T>::value, T, \
+      typename GetAttr_TypeClass<T>::type>::type;                                     \
+  using c_type = typename GetAttr_c_type<TypeClass>::type;
+
+template <typename T>
+struct IsUnsignedInt {
+  PRIMITIVE_TRAITS(T);
+  static constexpr bool value =
+      std::is_integral<c_type>::value && std::is_unsigned<c_type>::value;
+};
+
+template <typename T>
+struct IsSignedInt {
+  PRIMITIVE_TRAITS(T);
+  static constexpr bool value =
+      std::is_integral<c_type>::value && std::is_signed<c_type>::value;
+};
+
+template <typename T>
+struct IsFloatingPoint {
+  PRIMITIVE_TRAITS(T);
+  static constexpr bool value = std::is_floating_point<c_type>::value;
+};
+
+}  // namespace arrow
+
+#endif  // ARROW_TYPE_TRAITS_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt
index 9f78169..6d59acf 100644
--- a/cpp/src/arrow/types/CMakeLists.txt
+++ b/cpp/src/arrow/types/CMakeLists.txt
@@ -21,7 +21,6 @@
 
 # Headers: top level
 install(FILES
-  collection.h
   construct.h
   datetime.h
   decimal.h

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/collection.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/collection.h b/cpp/src/arrow/types/collection.h
deleted file mode 100644
index 1712030..0000000
--- a/cpp/src/arrow/types/collection.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_COLLECTION_H
-#define ARROW_TYPES_COLLECTION_H
-
-#include <string>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-template <Type::type T>
-struct CollectionType : public DataType {
-  std::vector<TypePtr> child_types_;
-
-  CollectionType() : DataType(T) {}
-
-  const TypePtr& child(int i) const { return child_types_[i]; }
-
-  int num_children() const { return child_types_.size(); }
-};
-
-}  // namespace arrow
-
-#endif  // ARROW_TYPES_COLLECTION_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/datetime.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/datetime.h b/cpp/src/arrow/types/datetime.h
index 241a126..a8f8639 100644
--- a/cpp/src/arrow/types/datetime.h
+++ b/cpp/src/arrow/types/datetime.h
@@ -22,41 +22,6 @@
 
 #include "arrow/type.h"
 
-namespace arrow {
-
-struct DateType : public DataType {
-  enum class Unit : char { DAY = 0, MONTH = 1, YEAR = 2 };
-
-  Unit unit;
-
-  explicit DateType(Unit unit = Unit::DAY) : DataType(Type::DATE), unit(unit) {}
-
-  DateType(const DateType& other) : DateType(other.unit) {}
-
-  static char const* name() { return "date"; }
-};
-
-struct ARROW_EXPORT TimestampType : public DataType {
-  enum class Unit : char { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
-
-  typedef int64_t c_type;
-  static constexpr Type::type type_enum = Type::TIMESTAMP;
-
-  int value_size() const override { return sizeof(int64_t); }
-
-  Unit unit;
-
-  explicit TimestampType(Unit unit = Unit::MILLI)
-      : DataType(Type::TIMESTAMP), unit(unit) {}
-
-  TimestampType(const TimestampType& other) : TimestampType(other.unit) {}
-  virtual ~TimestampType() {}
-
-  std::string ToString() const override { return "timestamp"; }
-
-  static char const* name() { return "timestamp"; }
-};
-
-}  // namespace arrow
+namespace arrow {}  // namespace arrow
 
 #endif  // ARROW_TYPES_DATETIME_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/decimal.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/decimal.h b/cpp/src/arrow/types/decimal.h
index 6c497c5..b3ea3a5 100644
--- a/cpp/src/arrow/types/decimal.h
+++ b/cpp/src/arrow/types/decimal.h
@@ -23,18 +23,6 @@
 #include "arrow/type.h"
 #include "arrow/util/visibility.h"
 
-namespace arrow {
-
-struct ARROW_EXPORT DecimalType : public DataType {
-  explicit DecimalType(int precision_, int scale_)
-      : DataType(Type::DECIMAL), precision(precision_), scale(scale_) {}
-  int precision;
-  int scale;
-  static char const* name() { return "decimal"; }
-
-  std::string ToString() const override;
-};
-
-}  // namespace arrow
+namespace arrow {}  // namespace arrow
 
 #endif  // ARROW_TYPES_DECIMAL_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index 12c5394..cb9a8c1 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -141,7 +141,7 @@ TEST_F(TestListBuilder, TestAppendNull) {
   ASSERT_TRUE(result_->IsNull(0));
   ASSERT_TRUE(result_->IsNull(1));
 
-  ASSERT_EQ(0, result_->offsets()[0]);
+  ASSERT_EQ(0, result_->raw_offsets()[0]);
   ASSERT_EQ(0, result_->offset(1));
   ASSERT_EQ(0, result_->offset(2));
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/list.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc
index 4b1e821..d865632 100644
--- a/cpp/src/arrow/types/list.cc
+++ b/cpp/src/arrow/types/list.cc
@@ -155,4 +155,8 @@ void ListBuilder::Reset() {
   null_bitmap_ = nullptr;
 }
 
+Status ListArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index 9440ffe..bd93e8f 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -39,6 +39,8 @@ class MemoryPool;
 
 class ARROW_EXPORT ListArray : public Array {
  public:
+  using TypeClass = ListType;
+
   ListArray(const TypePtr& type, int32_t length, std::shared_ptr<Buffer> offsets,
       const ArrayPtr& values, int32_t null_count = 0,
       std::shared_ptr<Buffer> null_bitmap = nullptr)
@@ -56,13 +58,13 @@ class ARROW_EXPORT ListArray : public Array {
   // Return a shared pointer in case the requestor desires to share ownership
   // with this array.
   const std::shared_ptr<Array>& values() const { return values_; }
-  const std::shared_ptr<Buffer> offset_buffer() const {
+  std::shared_ptr<Buffer> offsets() const {
     return std::static_pointer_cast<Buffer>(offset_buffer_);
   }
 
   const std::shared_ptr<DataType>& value_type() const { return values_->type(); }
 
-  const int32_t* offsets() const { return offsets_; }
+  const int32_t* raw_offsets() const { return offsets_; }
 
   int32_t offset(int i) const { return offsets_[i]; }
 
@@ -76,6 +78,8 @@ class ARROW_EXPORT ListArray : public Array {
   bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
       const ArrayPtr& arr) const override;
 
+  Status Accept(ArrayVisitor* visitor) const override;
+
  protected:
   std::shared_ptr<Buffer> offset_buffer_;
   const int32_t* offsets_;

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index e47f6dc..bdc8ec0 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -25,6 +25,7 @@
 #include "arrow/builder.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/types/construct.h"
 #include "arrow/types/primitive.h"
 #include "arrow/types/test-common.h"
@@ -41,15 +42,15 @@ namespace arrow {
 
 class Array;
 
-#define PRIMITIVE_TEST(KLASS, ENUM, NAME) \
-  TEST(TypesTest, TestPrimitive_##ENUM) { \
-    KLASS tp;                             \
-                                          \
-    ASSERT_EQ(tp.type, Type::ENUM);       \
-    ASSERT_EQ(tp.name(), string(NAME));   \
-                                          \
-    KLASS tp_copy = tp;                   \
-    ASSERT_EQ(tp_copy.type, Type::ENUM);  \
+#define PRIMITIVE_TEST(KLASS, ENUM, NAME)   \
+  TEST(TypesTest, TestPrimitive_##ENUM) {   \
+    KLASS tp;                               \
+                                            \
+    ASSERT_EQ(tp.type, Type::ENUM);         \
+    ASSERT_EQ(tp.ToString(), string(NAME)); \
+                                            \
+    KLASS tp_copy = tp;                     \
+    ASSERT_EQ(tp_copy.type, Type::ENUM);    \
   }
 
 PRIMITIVE_TEST(Int8Type, INT8, "int8");
@@ -243,7 +244,8 @@ void TestPrimitiveBuilder<PBoolean>::Check(
 }
 
 typedef ::testing::Types<PBoolean, PUInt8, PUInt16, PUInt32, PUInt64, PInt8, PInt16,
-    PInt32, PInt64, PFloat, PDouble> Primitives;
+    PInt32, PInt64, PFloat, PDouble>
+    Primitives;
 
 TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives);
 
@@ -311,20 +313,6 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
   ASSERT_EQ(memory_before, this->pool_->bytes_allocated());
 }
 
-template <class T, class Builder>
-Status MakeArray(const vector<uint8_t>& valid_bytes, const vector<T>& draws, int size,
-    Builder* builder, ArrayPtr* out) {
-  // Append the first 1000
-  for (int i = 0; i < size; ++i) {
-    if (valid_bytes[i] > 0) {
-      RETURN_NOT_OK(builder->Append(draws[i]));
-    } else {
-      RETURN_NOT_OK(builder->AppendNull());
-    }
-  }
-  return builder->Finish(out);
-}
-
 TYPED_TEST(TestPrimitiveBuilder, Equality) {
   DECL_T();
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/primitive.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc
index d2288ba..14667ee 100644
--- a/cpp/src/arrow/types/primitive.cc
+++ b/cpp/src/arrow/types/primitive.cc
@@ -19,6 +19,7 @@
 
 #include <memory>
 
+#include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
 #include "arrow/util/logging.h"
@@ -48,13 +49,14 @@ bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {
     const uint8_t* this_data = raw_data_;
     const uint8_t* other_data = other.raw_data_;
 
-    int value_size = type_->value_size();
-    DCHECK_GT(value_size, 0);
+    auto size_meta = dynamic_cast<const FixedWidthMeta*>(type_.get());
+    int value_byte_size = size_meta->bit_width() / 8;
+    DCHECK_GT(value_byte_size, 0);
 
     for (int i = 0; i < length_; ++i) {
-      if (!IsNull(i) && memcmp(this_data, other_data, value_size)) { return false; }
-      this_data += value_size;
-      other_data += value_size;
+      if (!IsNull(i) && memcmp(this_data, other_data, value_byte_size)) { return false; }
+      this_data += value_byte_size;
+      other_data += value_byte_size;
     }
     return true;
   } else {
@@ -70,6 +72,11 @@ bool PrimitiveArray::Equals(const std::shared_ptr<Array>& arr) const {
   return EqualsExact(*static_cast<const PrimitiveArray*>(arr.get()));
 }
 
+template <typename T>
+Status NumericArray<T>::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 template class NumericArray<UInt8Type>;
 template class NumericArray<UInt16Type>;
 template class NumericArray<UInt32Type>;
@@ -79,9 +86,9 @@ template class NumericArray<Int16Type>;
 template class NumericArray<Int32Type>;
 template class NumericArray<Int64Type>;
 template class NumericArray<TimestampType>;
+template class NumericArray<HalfFloatType>;
 template class NumericArray<FloatType>;
 template class NumericArray<DoubleType>;
-template class NumericArray<BooleanType>;
 
 template <typename T>
 Status PrimitiveBuilder<T>::Init(int32_t capacity) {
@@ -145,8 +152,65 @@ Status PrimitiveBuilder<T>::Finish(std::shared_ptr<Array>* out) {
   return Status::OK();
 }
 
-template <>
-Status PrimitiveBuilder<BooleanType>::Append(
+template class PrimitiveBuilder<UInt8Type>;
+template class PrimitiveBuilder<UInt16Type>;
+template class PrimitiveBuilder<UInt32Type>;
+template class PrimitiveBuilder<UInt64Type>;
+template class PrimitiveBuilder<Int8Type>;
+template class PrimitiveBuilder<Int16Type>;
+template class PrimitiveBuilder<Int32Type>;
+template class PrimitiveBuilder<Int64Type>;
+template class PrimitiveBuilder<TimestampType>;
+template class PrimitiveBuilder<HalfFloatType>;
+template class PrimitiveBuilder<FloatType>;
+template class PrimitiveBuilder<DoubleType>;
+
+Status BooleanBuilder::Init(int32_t capacity) {
+  RETURN_NOT_OK(ArrayBuilder::Init(capacity));
+  data_ = std::make_shared<PoolBuffer>(pool_);
+
+  int64_t nbytes = BitUtil::BytesForBits(capacity);
+  RETURN_NOT_OK(data_->Resize(nbytes));
+  // TODO(emkornfield) valgrind complains without this
+  memset(data_->mutable_data(), 0, nbytes);
+
+  raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
+  return Status::OK();
+}
+
+Status BooleanBuilder::Resize(int32_t capacity) {
+  // XXX: Set floor size for now
+  if (capacity < kMinBuilderCapacity) { capacity = kMinBuilderCapacity; }
+
+  if (capacity_ == 0) {
+    RETURN_NOT_OK(Init(capacity));
+  } else {
+    RETURN_NOT_OK(ArrayBuilder::Resize(capacity));
+    const int64_t old_bytes = data_->size();
+    const int64_t new_bytes = BitUtil::BytesForBits(capacity);
+
+    RETURN_NOT_OK(data_->Resize(new_bytes));
+    raw_data_ = reinterpret_cast<uint8_t*>(data_->mutable_data());
+    memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes);
+  }
+  return Status::OK();
+}
+
+Status BooleanBuilder::Finish(std::shared_ptr<Array>* out) {
+  const int64_t bytes_required = BitUtil::BytesForBits(length_);
+
+  if (bytes_required > 0 && bytes_required < data_->size()) {
+    // Trim buffers
+    RETURN_NOT_OK(data_->Resize(bytes_required));
+  }
+  *out = std::make_shared<BooleanArray>(type_, length_, data_, null_count_, null_bitmap_);
+
+  data_ = null_bitmap_ = nullptr;
+  capacity_ = length_ = null_count_ = 0;
+  return Status::OK();
+}
+
+Status BooleanBuilder::Append(
     const uint8_t* values, int32_t length, const uint8_t* valid_bytes) {
   RETURN_NOT_OK(Reserve(length));
 
@@ -168,19 +232,6 @@ Status PrimitiveBuilder<BooleanType>::Append(
   return Status::OK();
 }
 
-template class PrimitiveBuilder<UInt8Type>;
-template class PrimitiveBuilder<UInt16Type>;
-template class PrimitiveBuilder<UInt32Type>;
-template class PrimitiveBuilder<UInt64Type>;
-template class PrimitiveBuilder<Int8Type>;
-template class PrimitiveBuilder<Int16Type>;
-template class PrimitiveBuilder<Int32Type>;
-template class PrimitiveBuilder<Int64Type>;
-template class PrimitiveBuilder<TimestampType>;
-template class PrimitiveBuilder<FloatType>;
-template class PrimitiveBuilder<DoubleType>;
-template class PrimitiveBuilder<BooleanType>;
-
 BooleanArray::BooleanArray(int32_t length, const std::shared_ptr<Buffer>& data,
     int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap)
     : PrimitiveArray(
@@ -235,4 +286,8 @@ bool BooleanArray::RangeEquals(int32_t start_idx, int32_t end_idx,
   return true;
 }
 
+Status BooleanArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index c71df58..a5a3704 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -26,6 +26,7 @@
 #include "arrow/array.h"
 #include "arrow/builder.h"
 #include "arrow/type.h"
+#include "arrow/type_fwd.h"
 #include "arrow/types/datetime.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
@@ -54,9 +55,10 @@ class ARROW_EXPORT PrimitiveArray : public Array {
   const uint8_t* raw_data_;
 };
 
-template <class TypeClass>
+template <class TYPE>
 class ARROW_EXPORT NumericArray : public PrimitiveArray {
  public:
+  using TypeClass = TYPE;
   using value_type = typename TypeClass::c_type;
   NumericArray(int32_t length, const std::shared_ptr<Buffer>& data,
       int32_t null_count = 0, const std::shared_ptr<Buffer>& null_bitmap = nullptr)
@@ -88,29 +90,15 @@ class ARROW_EXPORT NumericArray : public PrimitiveArray {
     return reinterpret_cast<const value_type*>(raw_data_);
   }
 
+  Status Accept(ArrayVisitor* visitor) const override;
+
   value_type Value(int i) const { return raw_data()[i]; }
 };
 
-#define NUMERIC_ARRAY_DECL(NAME, TypeClass) \
-  using NAME = NumericArray<TypeClass>;     \
-  extern template class ARROW_EXPORT NumericArray<TypeClass>;
-
-NUMERIC_ARRAY_DECL(UInt8Array, UInt8Type);
-NUMERIC_ARRAY_DECL(Int8Array, Int8Type);
-NUMERIC_ARRAY_DECL(UInt16Array, UInt16Type);
-NUMERIC_ARRAY_DECL(Int16Array, Int16Type);
-NUMERIC_ARRAY_DECL(UInt32Array, UInt32Type);
-NUMERIC_ARRAY_DECL(Int32Array, Int32Type);
-NUMERIC_ARRAY_DECL(UInt64Array, UInt64Type);
-NUMERIC_ARRAY_DECL(Int64Array, Int64Type);
-NUMERIC_ARRAY_DECL(TimestampArray, TimestampType);
-NUMERIC_ARRAY_DECL(FloatArray, FloatType);
-NUMERIC_ARRAY_DECL(DoubleArray, DoubleType);
-
 template <typename Type>
 class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder {
  public:
-  typedef typename Type::c_type value_type;
+  using value_type = typename Type::c_type;
 
   explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type)
       : ArrayBuilder(pool, type), data_(nullptr) {}
@@ -183,101 +171,27 @@ class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> {
   using PrimitiveBuilder<T>::raw_data_;
 };
 
-template <>
-struct TypeTraits<UInt8Type> {
-  typedef UInt8Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements; }
-};
-
-template <>
-struct TypeTraits<Int8Type> {
-  typedef Int8Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements; }
-};
-
-template <>
-struct TypeTraits<UInt16Type> {
-  typedef UInt16Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * sizeof(uint16_t); }
-};
-
-template <>
-struct TypeTraits<Int16Type> {
-  typedef Int16Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * sizeof(int16_t); }
-};
-
-template <>
-struct TypeTraits<UInt32Type> {
-  typedef UInt32Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * sizeof(uint32_t); }
-};
-
-template <>
-struct TypeTraits<Int32Type> {
-  typedef Int32Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * sizeof(int32_t); }
-};
-
-template <>
-struct TypeTraits<UInt64Type> {
-  typedef UInt64Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * sizeof(uint64_t); }
-};
-
-template <>
-struct TypeTraits<Int64Type> {
-  typedef Int64Array ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * sizeof(int64_t); }
-};
-
-template <>
-struct TypeTraits<TimestampType> {
-  typedef TimestampArray ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * sizeof(int64_t); }
-};
-template <>
-
-struct TypeTraits<FloatType> {
-  typedef FloatArray ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * sizeof(float); }
-};
-
-template <>
-struct TypeTraits<DoubleType> {
-  typedef DoubleArray ArrayType;
-
-  static inline int bytes_required(int elements) { return elements * sizeof(double); }
-};
-
 // Builders
 
-typedef NumericBuilder<UInt8Type> UInt8Builder;
-typedef NumericBuilder<UInt16Type> UInt16Builder;
-typedef NumericBuilder<UInt32Type> UInt32Builder;
-typedef NumericBuilder<UInt64Type> UInt64Builder;
+using UInt8Builder = NumericBuilder<UInt8Type>;
+using UInt16Builder = NumericBuilder<UInt16Type>;
+using UInt32Builder = NumericBuilder<UInt32Type>;
+using UInt64Builder = NumericBuilder<UInt64Type>;
 
-typedef NumericBuilder<Int8Type> Int8Builder;
-typedef NumericBuilder<Int16Type> Int16Builder;
-typedef NumericBuilder<Int32Type> Int32Builder;
-typedef NumericBuilder<Int64Type> Int64Builder;
-typedef NumericBuilder<TimestampType> TimestampBuilder;
+using Int8Builder = NumericBuilder<Int8Type>;
+using Int16Builder = NumericBuilder<Int16Type>;
+using Int32Builder = NumericBuilder<Int32Type>;
+using Int64Builder = NumericBuilder<Int64Type>;
+using TimestampBuilder = NumericBuilder<TimestampType>;
 
-typedef NumericBuilder<FloatType> FloatBuilder;
-typedef NumericBuilder<DoubleType> DoubleBuilder;
+using HalfFloatBuilder = NumericBuilder<HalfFloatType>;
+using FloatBuilder = NumericBuilder<FloatType>;
+using DoubleBuilder = NumericBuilder<DoubleType>;
 
 class ARROW_EXPORT BooleanArray : public PrimitiveArray {
  public:
+  using TypeClass = BooleanType;
+
   BooleanArray(int32_t length, const std::shared_ptr<Buffer>& data,
       int32_t null_count = 0, const std::shared_ptr<Buffer>& null_bitmap = nullptr);
   BooleanArray(const TypePtr& type, int32_t length, const std::shared_ptr<Buffer>& data,
@@ -288,28 +202,36 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray {
   bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
       const ArrayPtr& arr) const override;
 
+  Status Accept(ArrayVisitor* visitor) const override;
+
   const uint8_t* raw_data() const { return reinterpret_cast<const uint8_t*>(raw_data_); }
 
   bool Value(int i) const { return BitUtil::GetBit(raw_data(), i); }
 };
 
-template <>
-struct TypeTraits<BooleanType> {
-  typedef BooleanArray ArrayType;
-
-  static inline int bytes_required(int elements) {
-    return BitUtil::BytesForBits(elements);
-  }
-};
-
-class ARROW_EXPORT BooleanBuilder : public PrimitiveBuilder<BooleanType> {
+class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
  public:
   explicit BooleanBuilder(MemoryPool* pool, const TypePtr& type)
-      : PrimitiveBuilder<BooleanType>(pool, type) {}
+      : ArrayBuilder(pool, type), data_(nullptr) {}
 
   virtual ~BooleanBuilder() {}
 
-  using PrimitiveBuilder<BooleanType>::Append;
+  using ArrayBuilder::Advance;
+
+  // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
+  Status AppendNulls(const uint8_t* valid_bytes, int32_t length) {
+    RETURN_NOT_OK(Reserve(length));
+    UnsafeAppendToBitmap(valid_bytes, length);
+    return Status::OK();
+  }
+
+  Status AppendNull() {
+    RETURN_NOT_OK(Reserve(1));
+    UnsafeAppendToBitmap(false);
+    return Status::OK();
+  }
+
+  std::shared_ptr<Buffer> data() const { return data_; }
 
   // Scalar append
   Status Append(bool val) {
@@ -324,9 +246,39 @@ class ARROW_EXPORT BooleanBuilder : public PrimitiveBuilder<BooleanType> {
     return Status::OK();
   }
 
-  Status Append(uint8_t val) { return Append(static_cast<bool>(val)); }
+  // Vector append
+  //
+  // If passed, valid_bytes is of equal length to values, and any zero byte
+  // will be considered as a null for that slot
+  Status Append(
+      const uint8_t* values, int32_t length, const uint8_t* valid_bytes = nullptr);
+
+  Status Finish(std::shared_ptr<Array>* out) override;
+  Status Init(int32_t capacity) override;
+
+  // Increase the capacity of the builder to accommodate at least the indicated
+  // number of elements
+  Status Resize(int32_t capacity) override;
+
+ protected:
+  std::shared_ptr<PoolBuffer> data_;
+  uint8_t* raw_data_;
 };
 
+// Only instantiate these templates once
+extern template class ARROW_EXPORT NumericArray<Int8Type>;
+extern template class ARROW_EXPORT NumericArray<UInt8Type>;
+extern template class ARROW_EXPORT NumericArray<Int16Type>;
+extern template class ARROW_EXPORT NumericArray<UInt16Type>;
+extern template class ARROW_EXPORT NumericArray<Int32Type>;
+extern template class ARROW_EXPORT NumericArray<UInt32Type>;
+extern template class ARROW_EXPORT NumericArray<Int64Type>;
+extern template class ARROW_EXPORT NumericArray<UInt64Type>;
+extern template class ARROW_EXPORT NumericArray<HalfFloatType>;
+extern template class ARROW_EXPORT NumericArray<FloatType>;
+extern template class ARROW_EXPORT NumericArray<DoubleType>;
+extern template class ARROW_EXPORT NumericArray<TimestampType>;
+
 }  // namespace arrow
 
 #endif  // ARROW_TYPES_PRIMITIVE_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
index af87a14..3c4b12b 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -47,7 +47,7 @@ TEST(TypesTest, BinaryType) {
 TEST(TypesTest, TestStringType) {
   StringType str;
   ASSERT_EQ(str.type, Type::STRING);
-  ASSERT_EQ(str.name(), std::string("string"));
+  ASSERT_EQ(str.ToString(), std::string("string"));
 }
 
 // ----------------------------------------------------------------------
@@ -66,8 +66,8 @@ class TestStringContainer : public ::testing::Test {
 
   void MakeArray() {
     length_ = offsets_.size() - 1;
-    value_buf_ = test::to_buffer(chars_);
-    offsets_buf_ = test::to_buffer(offsets_);
+    value_buf_ = test::GetBufferFromVector(chars_);
+    offsets_buf_ = test::GetBufferFromVector(offsets_);
     null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
     null_count_ = test::null_count(valid_bytes_);
 
@@ -131,7 +131,7 @@ TEST_F(TestStringContainer, TestGetString) {
 
 TEST_F(TestStringContainer, TestEmptyStringComparison) {
   offsets_ = {0, 0, 0, 0, 0, 0};
-  offsets_buf_ = test::to_buffer(offsets_);
+  offsets_buf_ = test::GetBufferFromVector(offsets_);
   length_ = offsets_.size() - 1;
 
   auto strings_a = std::make_shared<StringArray>(
@@ -227,8 +227,8 @@ class TestBinaryContainer : public ::testing::Test {
 
   void MakeArray() {
     length_ = offsets_.size() - 1;
-    value_buf_ = test::to_buffer(chars_);
-    offsets_buf_ = test::to_buffer(offsets_);
+    value_buf_ = test::GetBufferFromVector(chars_);
+    offsets_buf_ = test::GetBufferFromVector(offsets_);
 
     null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
     null_count_ = test::null_count(valid_bytes_);

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/string.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.cc b/cpp/src/arrow/types/string.cc
index f6d26df..db963df 100644
--- a/cpp/src/arrow/types/string.cc
+++ b/cpp/src/arrow/types/string.cc
@@ -94,6 +94,10 @@ bool BinaryArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_
   return true;
 }
 
+Status BinaryArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 StringArray::StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
     const std::shared_ptr<Buffer>& data, int32_t null_count,
     const std::shared_ptr<Buffer>& null_bitmap)
@@ -104,6 +108,10 @@ Status StringArray::Validate() const {
   return BinaryArray::Validate();
 }
 
+Status StringArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 // This used to be a static member variable of BinaryBuilder, but it can cause
 // valgrind to report a (spurious?) memory leak when needed in other shared
 // libraries. The problem came up while adding explicit visibility to libarrow
@@ -122,8 +130,8 @@ Status BinaryBuilder::Finish(std::shared_ptr<Array>* out) {
   const auto list = std::dynamic_pointer_cast<ListArray>(result);
   auto values = std::dynamic_pointer_cast<UInt8Array>(list->values());
 
-  *out = std::make_shared<BinaryArray>(list->length(), list->offset_buffer(),
-      values->data(), list->null_count(), list->null_bitmap());
+  *out = std::make_shared<BinaryArray>(list->length(), list->offsets(), values->data(),
+      list->null_count(), list->null_bitmap());
   return Status::OK();
 }
 
@@ -134,8 +142,8 @@ Status StringBuilder::Finish(std::shared_ptr<Array>* out) {
   const auto list = std::dynamic_pointer_cast<ListArray>(result);
   auto values = std::dynamic_pointer_cast<UInt8Array>(list->values());
 
-  *out = std::make_shared<StringArray>(list->length(), list->offset_buffer(),
-      values->data(), list->null_count(), list->null_bitmap());
+  *out = std::make_shared<StringArray>(list->length(), list->offsets(), values->data(),
+      list->null_count(), list->null_bitmap());
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h
index aaba49c..c875243 100644
--- a/cpp/src/arrow/types/string.h
+++ b/cpp/src/arrow/types/string.h
@@ -37,6 +37,8 @@ class MemoryPool;
 
 class ARROW_EXPORT BinaryArray : public Array {
  public:
+  using TypeClass = BinaryType;
+
   BinaryArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
       const std::shared_ptr<Buffer>& data, int32_t null_count = 0,
       const std::shared_ptr<Buffer>& null_bitmap = nullptr);
@@ -60,6 +62,8 @@ class ARROW_EXPORT BinaryArray : public Array {
   std::shared_ptr<Buffer> data() const { return data_buffer_; }
   std::shared_ptr<Buffer> offsets() const { return offset_buffer_; }
 
+  const int32_t* raw_offsets() const { return offsets_; }
+
   int32_t offset(int i) const { return offsets_[i]; }
 
   // Neither of these functions will perform boundschecking
@@ -73,6 +77,8 @@ class ARROW_EXPORT BinaryArray : public Array {
 
   Status Validate() const override;
 
+  Status Accept(ArrayVisitor* visitor) const override;
+
  private:
   std::shared_ptr<Buffer> offset_buffer_;
   const int32_t* offsets_;
@@ -83,6 +89,8 @@ class ARROW_EXPORT BinaryArray : public Array {
 
 class ARROW_EXPORT StringArray : public BinaryArray {
  public:
+  using TypeClass = StringType;
+
   StringArray(int32_t length, const std::shared_ptr<Buffer>& offsets,
       const std::shared_ptr<Buffer>& data, int32_t null_count = 0,
       const std::shared_ptr<Buffer>& null_bitmap = nullptr);
@@ -96,6 +104,8 @@ class ARROW_EXPORT StringArray : public BinaryArray {
   }
 
   Status Validate() const override;
+
+  Status Accept(ArrayVisitor* visitor) const override;
 };
 
 // BinaryBuilder : public ListBuilder
@@ -109,6 +119,12 @@ class ARROW_EXPORT BinaryBuilder : public ListBuilder {
     return byte_builder_->Append(value, length);
   }
 
+  Status Append(const char* value, int32_t length) {
+    return Append(reinterpret_cast<const uint8_t*>(value), length);
+  }
+
+  Status Append(const std::string& value) { return Append(value.c_str(), value.size()); }
+
   Status Finish(std::shared_ptr<Array>* out) override;
 
  protected:
@@ -121,13 +137,9 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder {
   explicit StringBuilder(MemoryPool* pool, const TypePtr& type)
       : BinaryBuilder(pool, type) {}
 
-  Status Finish(std::shared_ptr<Array>* out) override;
-
-  Status Append(const std::string& value) { return Append(value.c_str(), value.size()); }
+  using BinaryBuilder::Append;
 
-  Status Append(const char* value, int32_t length) {
-    return BinaryBuilder::Append(reinterpret_cast<const uint8_t*>(value), length);
-  }
+  Status Finish(std::shared_ptr<Array>* out) override;
 
   Status Append(const std::vector<std::string>& values, uint8_t* null_bytes);
 };

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc
index 8e82c38..197d7d4 100644
--- a/cpp/src/arrow/types/struct-test.cc
+++ b/cpp/src/arrow/types/struct-test.cc
@@ -80,7 +80,7 @@ void ValidateBasicStructArray(const StructArray* result,
   ASSERT_EQ(4, list_char_arr->length());
   ASSERT_EQ(10, list_char_arr->values()->length());
   for (size_t i = 0; i < list_offsets.size(); ++i) {
-    ASSERT_EQ(list_offsets[i], list_char_arr->offsets()[i]);
+    ASSERT_EQ(list_offsets[i], list_char_arr->raw_offsets()[i]);
   }
   for (size_t i = 0; i < list_values.size(); ++i) {
     ASSERT_EQ(list_values[i], char_arr->Value(i));

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/struct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc
index 369c29d..0e0db23 100644
--- a/cpp/src/arrow/types/struct.cc
+++ b/cpp/src/arrow/types/struct.cc
@@ -87,6 +87,10 @@ Status StructArray::Validate() const {
   return Status::OK();
 }
 
+Status StructArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 Status StructBuilder::Finish(std::shared_ptr<Array>* out) {
   std::vector<std::shared_ptr<Array>> fields(field_builders_.size());
   for (size_t i = 0; i < field_builders_.size(); ++i) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/struct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.h b/cpp/src/arrow/types/struct.h
index 65b8daf..035af05 100644
--- a/cpp/src/arrow/types/struct.h
+++ b/cpp/src/arrow/types/struct.h
@@ -31,6 +31,8 @@ namespace arrow {
 
 class ARROW_EXPORT StructArray : public Array {
  public:
+  using TypeClass = StructType;
+
   StructArray(const TypePtr& type, int32_t length, std::vector<ArrayPtr>& field_arrays,
       int32_t null_count = 0, std::shared_ptr<Buffer> null_bitmap = nullptr)
       : Array(type, length, null_count, null_bitmap) {
@@ -55,6 +57,8 @@ class ARROW_EXPORT StructArray : public Array {
   bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx,
       const std::shared_ptr<Array>& arr) const override;
 
+  Status Accept(ArrayVisitor* visitor) const override;
+
  protected:
   // The child arrays corresponding to each field of the struct data type.
   std::vector<ArrayPtr> field_arrays_;

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/test-common.h b/cpp/src/arrow/types/test-common.h
index 1957636..6e6ab85 100644
--- a/cpp/src/arrow/types/test-common.h
+++ b/cpp/src/arrow/types/test-common.h
@@ -24,6 +24,8 @@
 
 #include "gtest/gtest.h"
 
+#include "arrow/array.h"
+#include "arrow/builder.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
 #include "arrow/util/memory-pool.h"
@@ -49,6 +51,20 @@ class TestBuilder : public ::testing::Test {
   unique_ptr<ArrayBuilder> builder_nn_;
 };
 
+template <class T, class Builder>
+Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>& values,
+    int size, Builder* builder, ArrayPtr* out) {
+  // Append the first 1000
+  for (int i = 0; i < size; ++i) {
+    if (valid_bytes[i] > 0) {
+      RETURN_NOT_OK(builder->Append(values[i]));
+    } else {
+      RETURN_NOT_OK(builder->AppendNull());
+    }
+  }
+  return builder->Finish(out);
+}
+
 }  // namespace arrow
 
 #endif  // ARROW_TYPES_TEST_COMMON_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/union.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/union.cc b/cpp/src/arrow/types/union.cc
index c891b4a..cc2934b 100644
--- a/cpp/src/arrow/types/union.cc
+++ b/cpp/src/arrow/types/union.cc
@@ -24,25 +24,4 @@
 
 #include "arrow/type.h"
 
-namespace arrow {
-
-static inline std::string format_union(const std::vector<TypePtr>& child_types) {
-  std::stringstream s;
-  s << "union<";
-  for (size_t i = 0; i < child_types.size(); ++i) {
-    if (i) { s << ", "; }
-    s << child_types[i]->ToString();
-  }
-  s << ">";
-  return s.str();
-}
-
-std::string DenseUnionType::ToString() const {
-  return format_union(child_types_);
-}
-
-std::string SparseUnionType::ToString() const {
-  return format_union(child_types_);
-}
-
-}  // namespace arrow
+namespace arrow {}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/types/union.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/union.h b/cpp/src/arrow/types/union.h
index d2ee9bd..44f39cc 100644
--- a/cpp/src/arrow/types/union.h
+++ b/cpp/src/arrow/types/union.h
@@ -24,32 +24,11 @@
 
 #include "arrow/array.h"
 #include "arrow/type.h"
-#include "arrow/types/collection.h"
 
 namespace arrow {
 
 class Buffer;
 
-struct DenseUnionType : public CollectionType<Type::DENSE_UNION> {
-  typedef CollectionType<Type::DENSE_UNION> Base;
-
-  explicit DenseUnionType(const std::vector<TypePtr>& child_types) : Base() {
-    child_types_ = child_types;
-  }
-
-  virtual std::string ToString() const;
-};
-
-struct SparseUnionType : public CollectionType<Type::SPARSE_UNION> {
-  typedef CollectionType<Type::SPARSE_UNION> Base;
-
-  explicit SparseUnionType(const std::vector<TypePtr>& child_types) : Base() {
-    child_types_ = child_types;
-  }
-
-  virtual std::string ToString() const;
-};
-
 class UnionArray : public Array {
  protected:
   // The data are types encoded as int16

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/util/logging.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index 06ee841..b22f07d 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -118,9 +118,9 @@ class CerrLog {
 class FatalLog : public CerrLog {
  public:
   explicit FatalLog(int /* severity */)  // NOLINT
-      : CerrLog(ARROW_FATAL) {}          // NOLINT
+      : CerrLog(ARROW_FATAL){}           // NOLINT
 
-  [[noreturn]] ~FatalLog() {
+            [[noreturn]] ~FatalLog() {
     if (has_logged_) { std::cerr << std::endl; }
     std::exit(1);
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/format/Metadata.md
----------------------------------------------------------------------
diff --git a/format/Metadata.md b/format/Metadata.md
index 653a4c7..a4878f3 100644
--- a/format/Metadata.md
+++ b/format/Metadata.md
@@ -98,6 +98,11 @@ Union:
   "typeIds" : [ /* integer */ ]
 }
 ```
+
+The `typeIds` field in the Union are the codes used to denote each type, which
+may be different from the index of the child array. This is so that the union
+type ids do not have to be enumerated from 0.
+
 Int:
 ```
 {


[2/2] arrow git commit: ARROW-373: [C++] JSON serialization format for testing

Posted by we...@apache.org.
ARROW-373: [C++] JSON serialization format for testing

C++ version of ARROW-372

Author: Wes McKinney <we...@twosigma.com>

Closes #202 from wesm/ARROW-373 and squashes the following commits:

d13a05f [Wes McKinney] Compiler warning
72c24fe [Wes McKinney] Add a minimal literal JSON example
a2cf47b [Wes McKinney] cpplint
3d9fcc2 [Wes McKinney] Complete round trip json file test with multiple record batches
2753449 [Wes McKinney] Complete draft json roundtrip implementation. tests not complete yet
3d6bbbd [Wes McKinney] Start high level writer scaffold
6bbd669 [Wes McKinney] Tweaks
e2e86b5 [Wes McKinney] Test JSON array roundtrip for numeric types, strings, lists, structs
82f108b [Wes McKinney] Refactoring. Array test scaffold
0891378 [Wes McKinney] Declare loop variables
6566343 [Wes McKinney] Recursively construct children for list/struct
35c2f85 [Wes McKinney] Refactoring. Start drafting string/list reader
f26402a [Wes McKinney] Install type_traits.h. cpplint
4fc7294 [Wes McKinney] Refactoring, type attribute consistency. Array reader compiles
2c93cce [Wes McKinney] WIP JSON array reader code path
932ba7a [Wes McKinney] Add ArrayVisitor methods, add enough metaprogramming to detect presence of c_type type member
15c1094 [Wes McKinney] Add type traits, refactoring, drafting json array writing. not working yet
209ba48 [Wes McKinney] More types refactoring. Strange linker error in pyarrow
379da3c [Wes McKinney] Implement union metadata JSON serialization
5fbea41 [Wes McKinney] Implement some more json types and add convenience factory functions
1c08233 [Wes McKinney] JSON schema roundtrip passing for many types
86c9559 [Wes McKinney] Add convenience factory functions for common types
3b9d14e [Wes McKinney] Add type-specific JSON metadata to schema writer
820b0f2 [Wes McKinney] Drafting JSON schema read/write
68ee7ab [Wes McKinney] Move forward declarations into type_fwd.h
1edf2a9 [Wes McKinney] Prototyping out visitor pattern for json serialization
24c1d5d [Wes McKinney] Some Types refactoring, add TypeVisitor abstract class. Add RapidJSON as external project


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ed6ec3b7
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ed6ec3b7
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ed6ec3b7

Branch: refs/heads/master
Commit: ed6ec3b76e1ac27fab85cd4bc74fbd61e8dfb27f
Parents: 8417096
Author: Wes McKinney <we...@twosigma.com>
Authored: Fri Nov 18 14:58:46 2016 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Fri Nov 18 14:58:46 2016 -0500

----------------------------------------------------------------------
 cpp/CMakeLists.txt                    |   19 +
 cpp/src/arrow/CMakeLists.txt          |    2 +
 cpp/src/arrow/array.cc                |   15 +
 cpp/src/arrow/array.h                 |   12 +
 cpp/src/arrow/column-test.cc          |    1 +
 cpp/src/arrow/io/hdfs.cc              |    8 +-
 cpp/src/arrow/io/libhdfs_shim.cc      |   26 +-
 cpp/src/arrow/ipc/CMakeLists.txt      |    7 +
 cpp/src/arrow/ipc/adapter.cc          |    2 +-
 cpp/src/arrow/ipc/ipc-json-test.cc    |  353 +++++++++
 cpp/src/arrow/ipc/json-internal.cc    | 1113 ++++++++++++++++++++++++++++
 cpp/src/arrow/ipc/json-internal.h     |  111 +++
 cpp/src/arrow/ipc/json.cc             |  219 ++++++
 cpp/src/arrow/ipc/json.h              |   92 +++
 cpp/src/arrow/ipc/test-common.h       |   14 +-
 cpp/src/arrow/schema-test.cc          |   52 +-
 cpp/src/arrow/schema.cc               |   15 +
 cpp/src/arrow/schema.h                |   12 +-
 cpp/src/arrow/test-util.h             |   51 +-
 cpp/src/arrow/type.cc                 |  122 ++-
 cpp/src/arrow/type.h                  |  338 +++++++--
 cpp/src/arrow/type_fwd.h              |  157 ++++
 cpp/src/arrow/type_traits.h           |  197 +++++
 cpp/src/arrow/types/CMakeLists.txt    |    1 -
 cpp/src/arrow/types/collection.h      |   41 -
 cpp/src/arrow/types/datetime.h        |   37 +-
 cpp/src/arrow/types/decimal.h         |   14 +-
 cpp/src/arrow/types/list-test.cc      |    2 +-
 cpp/src/arrow/types/list.cc           |    4 +
 cpp/src/arrow/types/list.h            |    8 +-
 cpp/src/arrow/types/primitive-test.cc |   36 +-
 cpp/src/arrow/types/primitive.cc      |   97 ++-
 cpp/src/arrow/types/primitive.h       |  190 ++---
 cpp/src/arrow/types/string-test.cc    |   12 +-
 cpp/src/arrow/types/string.cc         |   16 +-
 cpp/src/arrow/types/string.h          |   24 +-
 cpp/src/arrow/types/struct-test.cc    |    2 +-
 cpp/src/arrow/types/struct.cc         |    4 +
 cpp/src/arrow/types/struct.h          |    4 +
 cpp/src/arrow/types/test-common.h     |   16 +
 cpp/src/arrow/types/union.cc          |   23 +-
 cpp/src/arrow/types/union.h           |   21 -
 cpp/src/arrow/util/logging.h          |    4 +-
 format/Metadata.md                    |    5 +
 44 files changed, 3049 insertions(+), 450 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 6f95483..0bff752 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -545,6 +545,25 @@ if(ARROW_BUILD_BENCHMARKS)
   endif()
 endif()
 
+# RapidJSON, header only dependency
+if("$ENV{RAPIDJSON_HOME}" STREQUAL "")
+  ExternalProject_Add(rapidjson_ep
+    PREFIX "${CMAKE_BINARY_DIR}"
+    URL "https://github.com/miloyip/rapidjson/archive/v1.1.0.tar.gz"
+    URL_MD5 "badd12c511e081fec6c89c43a7027bce"
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND ""
+    BUILD_IN_SOURCE 1
+    INSTALL_COMMAND "")
+
+  ExternalProject_Get_Property(rapidjson_ep SOURCE_DIR)
+  set(RAPIDJSON_INCLUDE_DIR "${SOURCE_DIR}/include")
+else()
+  set(RAPIDJSON_INCLUDE_DIR "$ENV{RAPIDJSON_HOME}/include")
+endif()
+message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}")
+include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})
+
 ## Google PerfTools
 ##
 ## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index a9b2fec..81851bc 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -24,6 +24,8 @@ install(FILES
   schema.h
   table.h
   type.h
+  type_fwd.h
+  type_traits.h
   test-util.h
   DESTINATION include/arrow)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index e432a53..3262425 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -18,6 +18,7 @@
 #include "arrow/array.h"
 
 #include <cstdint>
+#include <cstring>
 
 #include "arrow/util/bit-util.h"
 #include "arrow/util/buffer.h"
@@ -25,6 +26,16 @@
 
 namespace arrow {
 
+Status GetEmptyBitmap(
+    MemoryPool* pool, int32_t length, std::shared_ptr<MutableBuffer>* result) {
+  auto buffer = std::make_shared<PoolBuffer>(pool);
+  RETURN_NOT_OK(buffer->Resize(BitUtil::BytesForBits(length)));
+  memset(buffer->mutable_data(), 0, buffer->size());
+
+  *result = buffer;
+  return Status::OK();
+}
+
 // ----------------------------------------------------------------------
 // Base array class
 
@@ -66,4 +77,8 @@ bool NullArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_st
   return true;
 }
 
+Status NullArray::Accept(ArrayVisitor* visitor) const {
+  return visitor->Visit(*this);
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/array.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index ff37323..ff2b70e 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -29,6 +29,8 @@
 namespace arrow {
 
 class Buffer;
+class MemoryPool;
+class MutableBuffer;
 class Status;
 
 // Immutable data array with some logical type and some length. Any memory is
@@ -70,6 +72,8 @@ class ARROW_EXPORT Array {
   // returning Status::OK.  This can be an expensive check.
   virtual Status Validate() const;
 
+  virtual Status Accept(ArrayVisitor* visitor) const = 0;
+
  protected:
   std::shared_ptr<DataType> type_;
   int32_t null_count_;
@@ -86,6 +90,8 @@ class ARROW_EXPORT Array {
 // Degenerate null type Array
 class ARROW_EXPORT NullArray : public Array {
  public:
+  using TypeClass = NullType;
+
   NullArray(const std::shared_ptr<DataType>& type, int32_t length)
       : Array(type, length, length, nullptr) {}
 
@@ -94,9 +100,15 @@ class ARROW_EXPORT NullArray : public Array {
   bool Equals(const std::shared_ptr<Array>& arr) const override;
   bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_index,
       const std::shared_ptr<Array>& arr) const override;
+
+  Status Accept(ArrayVisitor* visitor) const override;
 };
 
 typedef std::shared_ptr<Array> ArrayPtr;
+
+Status ARROW_EXPORT GetEmptyBitmap(
+    MemoryPool* pool, int32_t length, std::shared_ptr<MutableBuffer>* result);
+
 }  // namespace arrow
 
 #endif

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/column-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/column-test.cc b/cpp/src/arrow/column-test.cc
index 1edf313..ac3636d 100644
--- a/cpp/src/arrow/column-test.cc
+++ b/cpp/src/arrow/column-test.cc
@@ -22,6 +22,7 @@
 
 #include "gtest/gtest.h"
 
+#include "arrow/array.h"
 #include "arrow/column.h"
 #include "arrow/schema.h"
 #include "arrow/test-util.h"

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/io/hdfs.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/io/hdfs.cc b/cpp/src/arrow/io/hdfs.cc
index 6490a75..13491e7 100644
--- a/cpp/src/arrow/io/hdfs.cc
+++ b/cpp/src/arrow/io/hdfs.cc
@@ -289,13 +289,9 @@ class HdfsClient::HdfsClientImpl {
 
     // connect to HDFS with the builder object
     hdfsBuilder* builder = hdfsNewBuilder();
-    if (!config->host.empty()) {
-      hdfsBuilderSetNameNode(builder, config->host.c_str());
-    }
+    if (!config->host.empty()) { hdfsBuilderSetNameNode(builder, config->host.c_str()); }
     hdfsBuilderSetNameNodePort(builder, config->port);
-    if (!config->user.empty()) {
-      hdfsBuilderSetUserName(builder, config->user.c_str());
-    }
+    if (!config->user.empty()) { hdfsBuilderSetUserName(builder, config->user.c_str()); }
     if (!config->kerb_ticket.empty()) {
       hdfsBuilderSetKerbTicketCachePath(builder, config->kerb_ticket.c_str());
     }

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/io/libhdfs_shim.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/io/libhdfs_shim.cc b/cpp/src/arrow/io/libhdfs_shim.cc
index 1fee595..36b8a4e 100644
--- a/cpp/src/arrow/io/libhdfs_shim.cc
+++ b/cpp/src/arrow/io/libhdfs_shim.cc
@@ -74,12 +74,9 @@ static HINSTANCE libjvm_handle = NULL;
 // NOTE(wesm): cpplint does not like use of short and other imprecise C types
 
 static hdfsBuilder* (*ptr_hdfsNewBuilder)(void) = NULL;
-static void (*ptr_hdfsBuilderSetNameNode)(
-    hdfsBuilder* bld, const char* nn) = NULL;
-static void (*ptr_hdfsBuilderSetNameNodePort)(
-    hdfsBuilder* bld, tPort port) = NULL;
-static void (*ptr_hdfsBuilderSetUserName)(
-    hdfsBuilder* bld, const char* userName) = NULL;
+static void (*ptr_hdfsBuilderSetNameNode)(hdfsBuilder* bld, const char* nn) = NULL;
+static void (*ptr_hdfsBuilderSetNameNodePort)(hdfsBuilder* bld, tPort port) = NULL;
+static void (*ptr_hdfsBuilderSetUserName)(hdfsBuilder* bld, const char* userName) = NULL;
 static void (*ptr_hdfsBuilderSetKerbTicketCachePath)(
     hdfsBuilder* bld, const char* kerbTicketCachePath) = NULL;
 static hdfsFS (*ptr_hdfsBuilderConnect)(hdfsBuilder* bld) = NULL;
@@ -173,9 +170,9 @@ void hdfsBuilderSetUserName(hdfsBuilder* bld, const char* userName) {
   ptr_hdfsBuilderSetUserName(bld, userName);
 }
 
-void hdfsBuilderSetKerbTicketCachePath(hdfsBuilder* bld,
-    const char* kerbTicketCachePath) {
-  ptr_hdfsBuilderSetKerbTicketCachePath(bld , kerbTicketCachePath);
+void hdfsBuilderSetKerbTicketCachePath(
+    hdfsBuilder* bld, const char* kerbTicketCachePath) {
+  ptr_hdfsBuilderSetKerbTicketCachePath(bld, kerbTicketCachePath);
 }
 
 hdfsFS hdfsBuilderConnect(hdfsBuilder* bld) {
@@ -364,7 +361,7 @@ static std::vector<fs::path> get_potential_libhdfs_paths() {
   std::vector<fs::path> libhdfs_potential_paths;
   std::string file_name;
 
-  // OS-specific file name
+// OS-specific file name
 #ifdef __WIN32
   file_name = "hdfs.dll";
 #elif __APPLE__
@@ -374,10 +371,7 @@ static std::vector<fs::path> get_potential_libhdfs_paths() {
 #endif
 
   // Common paths
-  std::vector<fs::path> search_paths = {
-      fs::path(""),
-      fs::path(".")
-  };
+  std::vector<fs::path> search_paths = {fs::path(""), fs::path(".")};
 
   // Path from environment variable
   const char* hadoop_home = std::getenv("HADOOP_HOME");
@@ -387,9 +381,7 @@ static std::vector<fs::path> get_potential_libhdfs_paths() {
   }
 
   const char* libhdfs_dir = std::getenv("ARROW_LIBHDFS_DIR");
-  if (libhdfs_dir != nullptr) {
-    search_paths.push_back(fs::path(libhdfs_dir));
-  }
+  if (libhdfs_dir != nullptr) { search_paths.push_back(fs::path(libhdfs_dir)); }
 
   // All paths with file name
   for (auto& path : search_paths) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/ipc/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index d2db339..6955bcb 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -34,6 +34,8 @@ set(ARROW_IPC_TEST_LINK_LIBS
 set(ARROW_IPC_SRCS
   adapter.cc
   file.cc
+  json.cc
+  json-internal.cc
   metadata.cc
   metadata-internal.cc
 )
@@ -79,6 +81,10 @@ ADD_ARROW_TEST(ipc-metadata-test)
 ARROW_TEST_LINK_LIBRARIES(ipc-metadata-test
   ${ARROW_IPC_TEST_LINK_LIBS})
 
+ADD_ARROW_TEST(ipc-json-test)
+ARROW_TEST_LINK_LIBRARIES(ipc-json-test
+  ${ARROW_IPC_TEST_LINK_LIBS})
+
 # make clean will delete the generated file
 set_source_files_properties(Metadata_generated.h PROPERTIES GENERATED TRUE)
 
@@ -114,6 +120,7 @@ add_dependencies(arrow_objlib metadata_fbs)
 install(FILES
   adapter.h
   file.h
+  json.h
   metadata.h
   DESTINATION include/arrow/ipc)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/ipc/adapter.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc
index 74786bf..da718c0 100644
--- a/cpp/src/arrow/ipc/adapter.cc
+++ b/cpp/src/arrow/ipc/adapter.cc
@@ -106,7 +106,7 @@ Status VisitArray(const Array* arr, std::vector<flatbuf::FieldNode>* field_nodes
     buffers->push_back(binary_arr->data());
   } else if (arr->type_enum() == Type::LIST) {
     const auto list_arr = static_cast<const ListArray*>(arr);
-    buffers->push_back(list_arr->offset_buffer());
+    buffers->push_back(list_arr->offsets());
     RETURN_NOT_OK(VisitArray(
         list_arr->values().get(), field_nodes, buffers, max_recursion_depth - 1));
   } else if (arr->type_enum() == Type::STRUCT) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/ipc/ipc-json-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-json-test.cc b/cpp/src/arrow/ipc/ipc-json-test.cc
new file mode 100644
index 0000000..a51371c
--- /dev/null
+++ b/cpp/src/arrow/ipc/ipc-json-test.cc
@@ -0,0 +1,353 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "arrow/array.h"
+#include "arrow/ipc/json-internal.h"
+#include "arrow/ipc/json.h"
+#include "arrow/table.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/types/primitive.h"
+#include "arrow/types/string.h"
+#include "arrow/types/struct.h"
+#include "arrow/util/memory-pool.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+namespace ipc {
+
+void TestSchemaRoundTrip(const Schema& schema) {
+  rj::StringBuffer sb;
+  rj::Writer<rj::StringBuffer> writer(sb);
+
+  ASSERT_OK(WriteJsonSchema(schema, &writer));
+
+  rj::Document d;
+  d.Parse(sb.GetString());
+
+  std::shared_ptr<Schema> out;
+  ASSERT_OK(ReadJsonSchema(d, &out));
+
+  ASSERT_TRUE(schema.Equals(out));
+}
+
+void TestArrayRoundTrip(const Array& array) {
+  static std::string name = "dummy";
+
+  rj::StringBuffer sb;
+  rj::Writer<rj::StringBuffer> writer(sb);
+
+  ASSERT_OK(WriteJsonArray(name, array, &writer));
+
+  std::string array_as_json = sb.GetString();
+
+  rj::Document d;
+  d.Parse(array_as_json);
+
+  if (d.HasParseError()) { FAIL() << "JSON parsing failed"; }
+
+  std::shared_ptr<Array> out;
+  ASSERT_OK(ReadJsonArray(default_memory_pool(), d, array.type(), &out));
+
+  ASSERT_TRUE(array.Equals(out)) << array_as_json;
+}
+
+template <typename T, typename ValueType>
+void CheckPrimitive(const std::shared_ptr<DataType>& type,
+    const std::vector<bool>& is_valid, const std::vector<ValueType>& values) {
+  MemoryPool* pool = default_memory_pool();
+  typename TypeTraits<T>::BuilderType builder(pool, type);
+
+  for (size_t i = 0; i < values.size(); ++i) {
+    if (is_valid[i]) {
+      ASSERT_OK(builder.Append(values[i]));
+    } else {
+      ASSERT_OK(builder.AppendNull());
+    }
+  }
+
+  std::shared_ptr<Array> array;
+  ASSERT_OK(builder.Finish(&array));
+  TestArrayRoundTrip(*array.get());
+}
+
+template <typename TYPE, typename C_TYPE>
+void MakeArray(const std::shared_ptr<DataType>& type, const std::vector<bool>& is_valid,
+    const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
+  std::shared_ptr<Buffer> values_buffer;
+  std::shared_ptr<Buffer> values_bitmap;
+
+  ASSERT_OK(test::CopyBufferFromVector(values, &values_buffer));
+  ASSERT_OK(test::GetBitmapFromBoolVector(is_valid, &values_bitmap));
+
+  using ArrayType = typename TypeTraits<TYPE>::ArrayType;
+
+  int32_t null_count = 0;
+  for (bool val : is_valid) {
+    if (!val) { ++null_count; }
+  }
+
+  *out = std::make_shared<ArrayType>(type, static_cast<int32_t>(values.size()),
+      values_buffer, null_count, values_bitmap);
+}
+
+TEST(TestJsonSchemaWriter, FlatTypes) {
+  std::vector<std::shared_ptr<Field>> fields = {field("f0", int8()),
+      field("f1", int16(), false), field("f2", int32()), field("f3", int64(), false),
+      field("f4", uint8()), field("f5", uint16()), field("f6", uint32()),
+      field("f7", uint64()), field("f8", float32()), field("f9", float64()),
+      field("f10", utf8()), field("f11", binary()), field("f12", list(int32())),
+      field("f13", struct_({field("s1", int32()), field("s2", utf8())})),
+      field("f14", date()), field("f15", timestamp(TimeUnit::NANO)),
+      field("f16", time(TimeUnit::MICRO)),
+      field("f17", union_({field("u1", int8()), field("u2", time(TimeUnit::MILLI))},
+                       {0, 1}, UnionMode::DENSE))};
+
+  Schema schema(fields);
+  TestSchemaRoundTrip(schema);
+}
+
+template <typename T>
+void PrimitiveTypesCheckOne() {
+  using c_type = typename T::c_type;
+
+  std::vector<bool> is_valid = {true, false, true, true, true, false, true, true};
+  std::vector<c_type> values = {0, 1, 2, 3, 4, 5, 6, 7};
+  CheckPrimitive<T, c_type>(std::make_shared<T>(), is_valid, values);
+}
+
+TEST(TestJsonArrayWriter, PrimitiveTypes) {
+  PrimitiveTypesCheckOne<Int8Type>();
+  PrimitiveTypesCheckOne<Int16Type>();
+  PrimitiveTypesCheckOne<Int32Type>();
+  PrimitiveTypesCheckOne<Int64Type>();
+  PrimitiveTypesCheckOne<UInt8Type>();
+  PrimitiveTypesCheckOne<UInt16Type>();
+  PrimitiveTypesCheckOne<UInt32Type>();
+  PrimitiveTypesCheckOne<UInt64Type>();
+  PrimitiveTypesCheckOne<FloatType>();
+  PrimitiveTypesCheckOne<DoubleType>();
+
+  std::vector<bool> is_valid = {true, false, true, true, true, false, true, true};
+  std::vector<std::string> values = {"foo", "bar", "", "baz", "qux", "foo", "a", "1"};
+
+  CheckPrimitive<StringType, std::string>(utf8(), is_valid, values);
+  CheckPrimitive<BinaryType, std::string>(binary(), is_valid, values);
+}
+
+TEST(TestJsonArrayWriter, NestedTypes) {
+  auto value_type = int32();
+
+  std::vector<bool> values_is_valid = {true, false, true, true, false, true, true};
+  std::vector<int32_t> values = {0, 1, 2, 3, 4, 5, 6};
+
+  std::shared_ptr<Array> values_array;
+  MakeArray<Int32Type, int32_t>(int32(), values_is_valid, values, &values_array);
+
+  // List
+  std::vector<bool> list_is_valid = {true, false, true, true, true};
+  std::vector<int32_t> offsets = {0, 0, 0, 1, 4, 7};
+
+  std::shared_ptr<Buffer> list_bitmap;
+  ASSERT_OK(test::GetBitmapFromBoolVector(list_is_valid, &list_bitmap));
+  std::shared_ptr<Buffer> offsets_buffer = test::GetBufferFromVector(offsets);
+
+  ListArray list_array(list(value_type), 5, offsets_buffer, values_array, 1, list_bitmap);
+
+  TestArrayRoundTrip(list_array);
+
+  // Struct
+  std::vector<bool> struct_is_valid = {true, false, true, true, true, false, true};
+  std::shared_ptr<Buffer> struct_bitmap;
+  ASSERT_OK(test::GetBitmapFromBoolVector(struct_is_valid, &struct_bitmap));
+
+  auto struct_type =
+      struct_({field("f1", int32()), field("f2", int32()), field("f3", int32())});
+
+  std::vector<std::shared_ptr<Array>> fields = {values_array, values_array, values_array};
+  StructArray struct_array(
+      struct_type, static_cast<int>(struct_is_valid.size()), fields, 2, struct_bitmap);
+  TestArrayRoundTrip(struct_array);
+}
+
+// Data generation for test case below
+void MakeBatchArrays(const std::shared_ptr<Schema>& schema, const int num_rows,
+    std::vector<std::shared_ptr<Array>>* arrays) {
+  std::vector<bool> is_valid;
+  test::random_is_valid(num_rows, 0.25, &is_valid);
+
+  std::vector<int8_t> v1_values;
+  std::vector<int32_t> v2_values;
+
+  test::randint<int8_t>(num_rows, 0, 100, &v1_values);
+  test::randint<int32_t>(num_rows, 0, 100, &v2_values);
+
+  std::shared_ptr<Array> v1;
+  MakeArray<Int8Type, int8_t>(schema->field(0)->type, is_valid, v1_values, &v1);
+
+  std::shared_ptr<Array> v2;
+  MakeArray<Int32Type, int32_t>(schema->field(1)->type, is_valid, v2_values, &v2);
+
+  static const int kBufferSize = 10;
+  static uint8_t buffer[kBufferSize];
+  static uint32_t seed = 0;
+  StringBuilder string_builder(default_memory_pool(), utf8());
+  for (int i = 0; i < num_rows; ++i) {
+    if (!is_valid[i]) {
+      string_builder.AppendNull();
+    } else {
+      test::random_ascii(kBufferSize, seed++, buffer);
+      string_builder.Append(buffer, kBufferSize);
+    }
+  }
+  std::shared_ptr<Array> v3;
+  ASSERT_OK(string_builder.Finish(&v3));
+
+  arrays->emplace_back(v1);
+  arrays->emplace_back(v2);
+  arrays->emplace_back(v3);
+}
+
+TEST(TestJsonFileReadWrite, BasicRoundTrip) {
+  auto v1_type = int8();
+  auto v2_type = int32();
+  auto v3_type = utf8();
+
+  std::shared_ptr<Schema> schema(
+      new Schema({field("f1", v1_type), field("f2", v2_type), field("f3", v3_type)}));
+
+  std::unique_ptr<JsonWriter> writer;
+  ASSERT_OK(JsonWriter::Open(schema, &writer));
+
+  const int nbatches = 3;
+  std::vector<std::shared_ptr<RecordBatch>> batches;
+  for (int i = 0; i < nbatches; ++i) {
+    int32_t num_rows = 5 + i * 5;
+    std::vector<std::shared_ptr<Array>> arrays;
+
+    MakeBatchArrays(schema, num_rows, &arrays);
+    batches.emplace_back(std::make_shared<RecordBatch>(schema, num_rows, arrays));
+    ASSERT_OK(writer->WriteRecordBatch(arrays, num_rows));
+  }
+
+  std::string result;
+  ASSERT_OK(writer->Finish(&result));
+
+  std::unique_ptr<JsonReader> reader;
+
+  auto buffer = std::make_shared<Buffer>(
+      reinterpret_cast<const uint8_t*>(result.c_str()), static_cast<int>(result.size()));
+
+  ASSERT_OK(JsonReader::Open(buffer, &reader));
+  ASSERT_TRUE(reader->schema()->Equals(*schema.get()));
+
+  ASSERT_EQ(nbatches, reader->num_record_batches());
+
+  for (int i = 0; i < nbatches; ++i) {
+    std::shared_ptr<RecordBatch> batch;
+    ASSERT_OK(reader->GetRecordBatch(i, &batch));
+    ASSERT_TRUE(batch->Equals(*batches[i].get()));
+  }
+}
+
+TEST(TestJsonFileReadWrite, MinimalFormatExample) {
+  static const char* example = R"example(
+{
+  "schema": {
+    "fields": [
+      {
+        "name": "foo",
+        "type": {"name": "int", "isSigned": true, "bitWidth": 32},
+        "nullable": true, "children": [],
+        "typeLayout": [
+          {"type": "VALIDITY", "typeBitWidth": 1},
+          {"type": "DATA", "typeBitWidth": 32}
+        ]
+      },
+      {
+        "name": "bar",
+        "type": {"name": "floatingpoint", "precision": "DOUBLE"},
+        "nullable": true, "children": [],
+        "typeLayout": [
+          {"type": "VALIDITY", "typeBitWidth": 1},
+          {"type": "DATA", "typeBitWidth": 64}
+        ]
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "foo",
+          "count": 5,
+          "DATA": [1, 2, 3, 4, 5],
+          "VALIDITY": [1, 0, 1, 1, 1]
+        },
+        {
+          "name": "bar",
+          "count": 5,
+          "DATA": [1.0, 2.0, 3.0, 4.0, 5.0],
+          "VALIDITY": [1, 0, 0, 1, 1]
+        }
+      ]
+    }
+  ]
+}
+)example";
+
+  auto buffer = std::make_shared<Buffer>(
+      reinterpret_cast<const uint8_t*>(example), strlen(example));
+
+  std::unique_ptr<JsonReader> reader;
+  ASSERT_OK(JsonReader::Open(buffer, &reader));
+
+  Schema ex_schema({field("foo", int32()), field("bar", float64())});
+
+  ASSERT_TRUE(reader->schema()->Equals(ex_schema));
+  ASSERT_EQ(1, reader->num_record_batches());
+
+  std::shared_ptr<RecordBatch> batch;
+  ASSERT_OK(reader->GetRecordBatch(0, &batch));
+
+  std::vector<bool> foo_valid = {true, false, true, true, true};
+  std::vector<int32_t> foo_values = {1, 2, 3, 4, 5};
+  std::shared_ptr<Array> foo;
+  MakeArray<Int32Type, int32_t>(int32(), foo_valid, foo_values, &foo);
+  ASSERT_TRUE(batch->column(0)->Equals(foo));
+
+  std::vector<bool> bar_valid = {true, false, false, true, true};
+  std::vector<double> bar_values = {1, 2, 3, 4, 5};
+  std::shared_ptr<Array> bar;
+  MakeArray<DoubleType, double>(float64(), bar_valid, bar_values, &bar);
+  ASSERT_TRUE(batch->column(1)->Equals(bar));
+}
+
+}  // namespace ipc
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/ipc/json-internal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc
new file mode 100644
index 0000000..31fe35b
--- /dev/null
+++ b/cpp/src/arrow/ipc/json-internal.cc
@@ -0,0 +1,1113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/json-internal.h"
+
+#include <cstdint>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "rapidjson/stringbuffer.h"
+#include "rapidjson/writer.h"
+
+#include "arrow/array.h"
+#include "arrow/schema.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/types/list.h"
+#include "arrow/types/primitive.h"
+#include "arrow/types/string.h"
+#include "arrow/types/struct.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/memory-pool.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+namespace ipc {
+
+using RjArray = rj::Value::ConstArray;
+using RjObject = rj::Value::ConstObject;
+
+enum class BufferType : char { DATA, OFFSET, TYPE, VALIDITY };
+
+static std::string GetBufferTypeName(BufferType type) {
+  switch (type) {
+    case BufferType::DATA:
+      return "DATA";
+    case BufferType::OFFSET:
+      return "OFFSET";
+    case BufferType::TYPE:
+      return "TYPE";
+    case BufferType::VALIDITY:
+      return "VALIDITY";
+    default:
+      break;
+  }
+  return "UNKNOWN";
+}
+
+static std::string GetFloatingPrecisionName(FloatingPointMeta::Precision precision) {
+  switch (precision) {
+    case FloatingPointMeta::HALF:
+      return "HALF";
+    case FloatingPointMeta::SINGLE:
+      return "SINGLE";
+    case FloatingPointMeta::DOUBLE:
+      return "DOUBLE";
+    default:
+      break;
+  }
+  return "UNKNOWN";
+}
+
+static std::string GetTimeUnitName(TimeUnit unit) {
+  switch (unit) {
+    case TimeUnit::SECOND:
+      return "SECOND";
+    case TimeUnit::MILLI:
+      return "MILLISECOND";
+    case TimeUnit::MICRO:
+      return "MICROSECOND";
+    case TimeUnit::NANO:
+      return "NANOSECOND";
+    default:
+      break;
+  }
+  return "UNKNOWN";
+}
+
+class BufferLayout {
+ public:
+  BufferLayout(BufferType type, int bit_width) : type_(type), bit_width_(bit_width) {}
+
+  BufferType type() const { return type_; }
+  int bit_width() const { return bit_width_; }
+
+ private:
+  BufferType type_;
+  int bit_width_;
+};
+
+static const BufferLayout kValidityBuffer(BufferType::VALIDITY, 1);
+static const BufferLayout kOffsetBuffer(BufferType::OFFSET, 32);
+static const BufferLayout kTypeBuffer(BufferType::TYPE, 32);
+static const BufferLayout kBooleanBuffer(BufferType::DATA, 1);
+static const BufferLayout kValues64(BufferType::DATA, 64);
+static const BufferLayout kValues32(BufferType::DATA, 32);
+static const BufferLayout kValues16(BufferType::DATA, 16);
+static const BufferLayout kValues8(BufferType::DATA, 8);
+
+class JsonSchemaWriter : public TypeVisitor {
+ public:
+  explicit JsonSchemaWriter(const Schema& schema, RjWriter* writer)
+      : schema_(schema), writer_(writer) {}
+
+  Status Write() {
+    writer_->StartObject();
+    writer_->Key("fields");
+    writer_->StartArray();
+    for (const std::shared_ptr<Field>& field : schema_.fields()) {
+      RETURN_NOT_OK(VisitField(*field.get()));
+    }
+    writer_->EndArray();
+    writer_->EndObject();
+    return Status::OK();
+  }
+
+  Status VisitField(const Field& field) {
+    writer_->StartObject();
+
+    writer_->Key("name");
+    writer_->String(field.name.c_str());
+
+    writer_->Key("nullable");
+    writer_->Bool(field.nullable);
+
+    // Visit the type
+    RETURN_NOT_OK(field.type->Accept(this));
+    writer_->EndObject();
+
+    return Status::OK();
+  }
+
+  void SetNoChildren() {
+    writer_->Key("children");
+    writer_->StartArray();
+    writer_->EndArray();
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<NoExtraMeta, T>::value ||
+                              std::is_base_of<BooleanType, T>::value ||
+                              std::is_base_of<NullType, T>::value,
+      void>::type
+  WriteTypeMetadata(const T& type) {}
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<IntegerMeta, T>::value, void>::type
+  WriteTypeMetadata(const T& type) {
+    writer_->Key("bitWidth");
+    writer_->Int(type.bit_width());
+    writer_->Key("isSigned");
+    writer_->Bool(type.is_signed());
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<FloatingPointMeta, T>::value, void>::type
+  WriteTypeMetadata(const T& type) {
+    writer_->Key("precision");
+    writer_->String(GetFloatingPrecisionName(type.precision()));
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<IntervalType, T>::value, void>::type
+  WriteTypeMetadata(const T& type) {
+    writer_->Key("unit");
+    switch (type.unit) {
+      case IntervalType::Unit::YEAR_MONTH:
+        writer_->String("YEAR_MONTH");
+        break;
+      case IntervalType::Unit::DAY_TIME:
+        writer_->String("DAY_TIME");
+        break;
+    }
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<TimeType, T>::value ||
+                              std::is_base_of<TimestampType, T>::value,
+      void>::type
+  WriteTypeMetadata(const T& type) {
+    writer_->Key("unit");
+    writer_->String(GetTimeUnitName(type.unit));
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<DecimalType, T>::value, void>::type
+  WriteTypeMetadata(const T& type) {
+    writer_->Key("precision");
+    writer_->Int(type.precision);
+    writer_->Key("scale");
+    writer_->Int(type.scale);
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<UnionType, T>::value, void>::type
+  WriteTypeMetadata(const T& type) {
+    writer_->Key("mode");
+    switch (type.mode) {
+      case UnionMode::SPARSE:
+        writer_->String("SPARSE");
+        break;
+      case UnionMode::DENSE:
+        writer_->String("DENSE");
+        break;
+    }
+
+    // Write type ids
+    writer_->Key("typeIds");
+    writer_->StartArray();
+    for (size_t i = 0; i < type.type_ids.size(); ++i) {
+      writer_->Uint(type.type_ids[i]);
+    }
+    writer_->EndArray();
+  }
+
+  // TODO(wesm): Other Type metadata
+
+  template <typename T>
+  void WriteName(const std::string& typeclass, const T& type) {
+    writer_->Key("type");
+    writer_->StartObject();
+    writer_->Key("name");
+    writer_->String(typeclass);
+    WriteTypeMetadata(type);
+    writer_->EndObject();
+  }
+
+  template <typename T>
+  Status WritePrimitive(const std::string& typeclass, const T& type,
+      const std::vector<BufferLayout>& buffer_layout) {
+    WriteName(typeclass, type);
+    SetNoChildren();
+    WriteBufferLayout(buffer_layout);
+    return Status::OK();
+  }
+
+  template <typename T>
+  Status WriteVarBytes(const std::string& typeclass, const T& type) {
+    WriteName(typeclass, type);
+    SetNoChildren();
+    WriteBufferLayout({kValidityBuffer, kOffsetBuffer, kValues8});
+    return Status::OK();
+  }
+
+  void WriteBufferLayout(const std::vector<BufferLayout>& buffer_layout) {
+    writer_->Key("typeLayout");
+    writer_->StartArray();
+
+    for (const BufferLayout& buffer : buffer_layout) {
+      writer_->StartObject();
+      writer_->Key("type");
+      writer_->String(GetBufferTypeName(buffer.type()));
+
+      writer_->Key("typeBitWidth");
+      writer_->Int(buffer.bit_width());
+
+      writer_->EndObject();
+    }
+    writer_->EndArray();
+  }
+
+  Status WriteChildren(const std::vector<std::shared_ptr<Field>>& children) {
+    writer_->Key("children");
+    writer_->StartArray();
+    for (const std::shared_ptr<Field>& field : children) {
+      RETURN_NOT_OK(VisitField(*field.get()));
+    }
+    writer_->EndArray();
+    return Status::OK();
+  }
+
+  Status Visit(const NullType& type) override { return WritePrimitive("null", type, {}); }
+
+  Status Visit(const BooleanType& type) override {
+    return WritePrimitive("bool", type, {kValidityBuffer, kBooleanBuffer});
+  }
+
+  Status Visit(const Int8Type& type) override {
+    return WritePrimitive("int", type, {kValidityBuffer, kValues8});
+  }
+
+  Status Visit(const Int16Type& type) override {
+    return WritePrimitive("int", type, {kValidityBuffer, kValues16});
+  }
+
+  Status Visit(const Int32Type& type) override {
+    return WritePrimitive("int", type, {kValidityBuffer, kValues32});
+  }
+
+  Status Visit(const Int64Type& type) override {
+    return WritePrimitive("int", type, {kValidityBuffer, kValues64});
+  }
+
+  Status Visit(const UInt8Type& type) override {
+    return WritePrimitive("int", type, {kValidityBuffer, kValues8});
+  }
+
+  Status Visit(const UInt16Type& type) override {
+    return WritePrimitive("int", type, {kValidityBuffer, kValues16});
+  }
+
+  Status Visit(const UInt32Type& type) override {
+    return WritePrimitive("int", type, {kValidityBuffer, kValues32});
+  }
+
+  Status Visit(const UInt64Type& type) override {
+    return WritePrimitive("int", type, {kValidityBuffer, kValues64});
+  }
+
+  Status Visit(const HalfFloatType& type) override {
+    return WritePrimitive("floatingpoint", type, {kValidityBuffer, kValues16});
+  }
+
+  Status Visit(const FloatType& type) override {
+    return WritePrimitive("floatingpoint", type, {kValidityBuffer, kValues32});
+  }
+
+  Status Visit(const DoubleType& type) override {
+    return WritePrimitive("floatingpoint", type, {kValidityBuffer, kValues64});
+  }
+
+  Status Visit(const StringType& type) override { return WriteVarBytes("utf8", type); }
+
+  Status Visit(const BinaryType& type) override { return WriteVarBytes("binary", type); }
+
+  Status Visit(const DateType& type) override {
+    return WritePrimitive("date", type, {kValidityBuffer, kValues64});
+  }
+
+  Status Visit(const TimeType& type) override {
+    return WritePrimitive("time", type, {kValidityBuffer, kValues64});
+  }
+
+  Status Visit(const TimestampType& type) override {
+    return WritePrimitive("timestamp", type, {kValidityBuffer, kValues64});
+  }
+
+  Status Visit(const IntervalType& type) override {
+    return WritePrimitive("interval", type, {kValidityBuffer, kValues64});
+  }
+
+  Status Visit(const DecimalType& type) override { return Status::NotImplemented("NYI"); }
+
+  Status Visit(const ListType& type) override {
+    WriteName("list", type);
+    RETURN_NOT_OK(WriteChildren(type.children()));
+    WriteBufferLayout({kValidityBuffer, kOffsetBuffer});
+    return Status::OK();
+  }
+
+  Status Visit(const StructType& type) override {
+    WriteName("struct", type);
+    WriteChildren(type.children());
+    WriteBufferLayout({kValidityBuffer, kTypeBuffer});
+    return Status::OK();
+  }
+
+  Status Visit(const UnionType& type) override {
+    WriteName("union", type);
+    WriteChildren(type.children());
+
+    if (type.mode == UnionMode::SPARSE) {
+      WriteBufferLayout({kValidityBuffer, kTypeBuffer});
+    } else {
+      WriteBufferLayout({kValidityBuffer, kTypeBuffer, kOffsetBuffer});
+    }
+    return Status::OK();
+  }
+
+ private:
+  const Schema& schema_;
+  RjWriter* writer_;
+};
+
+class JsonArrayWriter : public ArrayVisitor {
+ public:
+  explicit JsonArrayWriter(const std::string& name, const Array& array, RjWriter* writer)
+      : name_(name), array_(array), writer_(writer) {}
+
+  Status Write() { return VisitArray(name_, array_); }
+
+  Status VisitArray(const std::string& name, const Array& arr) {
+    writer_->StartObject();
+    writer_->Key("name");
+    writer_->String(name);
+
+    writer_->Key("count");
+    writer_->Int(arr.length());
+
+    RETURN_NOT_OK(arr.Accept(this));
+
+    writer_->EndObject();
+    return Status::OK();
+  }
+
+  template <typename T>
+  typename std::enable_if<IsSignedInt<T>::value, void>::type WriteDataValues(
+      const T& arr) {
+    const auto data = arr.raw_data();
+    for (int i = 0; i < arr.length(); ++i) {
+      writer_->Int64(data[i]);
+    }
+  }
+
+  template <typename T>
+  typename std::enable_if<IsUnsignedInt<T>::value, void>::type WriteDataValues(
+      const T& arr) {
+    const auto data = arr.raw_data();
+    for (int i = 0; i < arr.length(); ++i) {
+      writer_->Uint64(data[i]);
+    }
+  }
+
+  template <typename T>
+  typename std::enable_if<IsFloatingPoint<T>::value, void>::type WriteDataValues(
+      const T& arr) {
+    const auto data = arr.raw_data();
+    for (int i = 0; i < arr.length(); ++i) {
+      writer_->Double(data[i]);
+    }
+  }
+
+  // String (Utf8), Binary
+  template <typename T>
+  typename std::enable_if<std::is_base_of<BinaryArray, T>::value, void>::type
+  WriteDataValues(const T& arr) {
+    for (int i = 0; i < arr.length(); ++i) {
+      int32_t length;
+      const char* buf = reinterpret_cast<const char*>(arr.GetValue(i, &length));
+      writer_->String(buf, length);
+    }
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<BooleanArray, T>::value, void>::type
+  WriteDataValues(const T& arr) {
+    for (int i = 0; i < arr.length(); ++i) {
+      writer_->Bool(arr.Value(i));
+    }
+  }
+
+  template <typename T>
+  void WriteDataField(const T& arr) {
+    writer_->Key("DATA");
+    writer_->StartArray();
+    WriteDataValues(arr);
+    writer_->EndArray();
+  }
+
+  template <typename T>
+  void WriteOffsetsField(const T* offsets, int32_t length) {
+    writer_->Key("OFFSETS");
+    writer_->StartArray();
+    for (int i = 0; i < length; ++i) {
+      writer_->Int64(offsets[i]);
+    }
+    writer_->EndArray();
+  }
+
+  void WriteValidityField(const Array& arr) {
+    writer_->Key("VALIDITY");
+    writer_->StartArray();
+    if (arr.null_count() > 0) {
+      for (int i = 0; i < arr.length(); ++i) {
+        writer_->Int(arr.IsNull(i) ? 0 : 1);
+      }
+    } else {
+      for (int i = 0; i < arr.length(); ++i) {
+        writer_->Int(1);
+      }
+    }
+    writer_->EndArray();
+  }
+
+  void SetNoChildren() {
+    writer_->Key("children");
+    writer_->StartArray();
+    writer_->EndArray();
+  }
+
+  template <typename T>
+  Status WritePrimitive(const T& array) {
+    WriteValidityField(array);
+    WriteDataField(array);
+    SetNoChildren();
+    return Status::OK();
+  }
+
+  template <typename T>
+  Status WriteVarBytes(const T& array) {
+    WriteValidityField(array);
+    WriteOffsetsField(array.raw_offsets(), array.length() + 1);
+    WriteDataField(array);
+    SetNoChildren();
+    return Status::OK();
+  }
+
+  Status WriteChildren(const std::vector<std::shared_ptr<Field>>& fields,
+      const std::vector<std::shared_ptr<Array>>& arrays) {
+    writer_->Key("children");
+    writer_->StartArray();
+    for (size_t i = 0; i < fields.size(); ++i) {
+      RETURN_NOT_OK(VisitArray(fields[i]->name, *arrays[i].get()));
+    }
+    writer_->EndArray();
+    return Status::OK();
+  }
+
+  Status Visit(const NullArray& array) override {
+    SetNoChildren();
+    return Status::OK();
+  }
+
+  Status Visit(const BooleanArray& array) override { return WritePrimitive(array); }
+
+  Status Visit(const Int8Array& array) override { return WritePrimitive(array); }
+
+  Status Visit(const Int16Array& array) override { return WritePrimitive(array); }
+
+  Status Visit(const Int32Array& array) override { return WritePrimitive(array); }
+
+  Status Visit(const Int64Array& array) override { return WritePrimitive(array); }
+
+  Status Visit(const UInt8Array& array) override { return WritePrimitive(array); }
+
+  Status Visit(const UInt16Array& array) override { return WritePrimitive(array); }
+
+  Status Visit(const UInt32Array& array) override { return WritePrimitive(array); }
+
+  Status Visit(const UInt64Array& array) override { return WritePrimitive(array); }
+
+  Status Visit(const HalfFloatArray& array) override { return WritePrimitive(array); }
+
+  Status Visit(const FloatArray& array) override { return WritePrimitive(array); }
+
+  Status Visit(const DoubleArray& array) override { return WritePrimitive(array); }
+
+  Status Visit(const StringArray& array) override { return WriteVarBytes(array); }
+
+  Status Visit(const BinaryArray& array) override { return WriteVarBytes(array); }
+
+  Status Visit(const DateArray& array) override { return Status::NotImplemented("date"); }
+
+  Status Visit(const TimeArray& array) override { return Status::NotImplemented("time"); }
+
+  Status Visit(const TimestampArray& array) override {
+    return Status::NotImplemented("timestamp");
+  }
+
+  Status Visit(const IntervalArray& array) override {
+    return Status::NotImplemented("interval");
+  }
+
+  Status Visit(const DecimalArray& array) override {
+    return Status::NotImplemented("decimal");
+  }
+
+  Status Visit(const ListArray& array) override {
+    WriteValidityField(array);
+    WriteOffsetsField(array.raw_offsets(), array.length() + 1);
+    auto type = static_cast<const ListType*>(array.type().get());
+    return WriteChildren(type->children(), {array.values()});
+  }
+
+  Status Visit(const StructArray& array) override {
+    WriteValidityField(array);
+    auto type = static_cast<const StructType*>(array.type().get());
+    return WriteChildren(type->children(), array.fields());
+  }
+
+  Status Visit(const UnionArray& array) override {
+    return Status::NotImplemented("union");
+  }
+
+ private:
+  const std::string& name_;
+  const Array& array_;
+  RjWriter* writer_;
+};
+
+class JsonSchemaReader {
+ public:
+  explicit JsonSchemaReader(const rj::Value& json_schema) : json_schema_(json_schema) {}
+
+  Status GetSchema(std::shared_ptr<Schema>* schema) {
+    const auto& obj_schema = json_schema_.GetObject();
+
+    const auto& json_fields = obj_schema.FindMember("fields");
+    RETURN_NOT_ARRAY("fields", json_fields, obj_schema);
+
+    std::vector<std::shared_ptr<Field>> fields;
+    RETURN_NOT_OK(GetFieldsFromArray(json_fields->value, &fields));
+
+    *schema = std::make_shared<Schema>(fields);
+    return Status::OK();
+  }
+
+  Status GetFieldsFromArray(
+      const rj::Value& obj, std::vector<std::shared_ptr<Field>>* fields) {
+    const auto& values = obj.GetArray();
+
+    fields->resize(values.Size());
+    for (size_t i = 0; i < fields->size(); ++i) {
+      RETURN_NOT_OK(GetField(values[i], &(*fields)[i]));
+    }
+    return Status::OK();
+  }
+
+  Status GetField(const rj::Value& obj, std::shared_ptr<Field>* field) {
+    if (!obj.IsObject()) { return Status::Invalid("Field was not a JSON object"); }
+    const auto& json_field = obj.GetObject();
+
+    const auto& json_name = json_field.FindMember("name");
+    RETURN_NOT_STRING("name", json_name, json_field);
+
+    const auto& json_nullable = json_field.FindMember("nullable");
+    RETURN_NOT_BOOL("nullable", json_nullable, json_field);
+
+    const auto& json_type = json_field.FindMember("type");
+    RETURN_NOT_OBJECT("type", json_type, json_field);
+
+    const auto& json_children = json_field.FindMember("children");
+    RETURN_NOT_ARRAY("children", json_children, json_field);
+
+    std::vector<std::shared_ptr<Field>> children;
+    RETURN_NOT_OK(GetFieldsFromArray(json_children->value, &children));
+
+    std::shared_ptr<DataType> type;
+    RETURN_NOT_OK(GetType(json_type->value.GetObject(), children, &type));
+
+    *field = std::make_shared<Field>(
+        json_name->value.GetString(), type, json_nullable->value.GetBool());
+    return Status::OK();
+  }
+
+  Status GetInteger(
+      const rj::Value::ConstObject& json_type, std::shared_ptr<DataType>* type) {
+    const auto& json_bit_width = json_type.FindMember("bitWidth");
+    RETURN_NOT_INT("bitWidth", json_bit_width, json_type);
+
+    const auto& json_is_signed = json_type.FindMember("isSigned");
+    RETURN_NOT_BOOL("isSigned", json_is_signed, json_type);
+
+    bool is_signed = json_is_signed->value.GetBool();
+    int bit_width = json_bit_width->value.GetInt();
+
+    switch (bit_width) {
+      case 8:
+        *type = is_signed ? int8() : uint8();
+        break;
+      case 16:
+        *type = is_signed ? int16() : uint16();
+        break;
+      case 32:
+        *type = is_signed ? int32() : uint32();
+        break;
+      case 64:
+        *type = is_signed ? int64() : uint64();
+        break;
+      default:
+        std::stringstream ss;
+        ss << "Invalid bit width: " << bit_width;
+        return Status::Invalid(ss.str());
+    }
+    return Status::OK();
+  }
+
+  Status GetFloatingPoint(const RjObject& json_type, std::shared_ptr<DataType>* type) {
+    const auto& json_precision = json_type.FindMember("precision");
+    RETURN_NOT_STRING("precision", json_precision, json_type);
+
+    std::string precision = json_precision->value.GetString();
+
+    if (precision == "DOUBLE") {
+      *type = float64();
+    } else if (precision == "SINGLE") {
+      *type = float32();
+    } else if (precision == "HALF") {
+      *type = float16();
+    } else {
+      std::stringstream ss;
+      ss << "Invalid precision: " << precision;
+      return Status::Invalid(ss.str());
+    }
+    return Status::OK();
+  }
+
+  template <typename T>
+  Status GetTimeLike(const RjObject& json_type, std::shared_ptr<DataType>* type) {
+    const auto& json_unit = json_type.FindMember("unit");
+    RETURN_NOT_STRING("unit", json_unit, json_type);
+
+    std::string unit_str = json_unit->value.GetString();
+
+    TimeUnit unit;
+
+    if (unit_str == "SECOND") {
+      unit = TimeUnit::SECOND;
+    } else if (unit_str == "MILLISECOND") {
+      unit = TimeUnit::MILLI;
+    } else if (unit_str == "MICROSECOND") {
+      unit = TimeUnit::MICRO;
+    } else if (unit_str == "NANOSECOND") {
+      unit = TimeUnit::NANO;
+    } else {
+      std::stringstream ss;
+      ss << "Invalid time unit: " << unit_str;
+      return Status::Invalid(ss.str());
+    }
+
+    *type = std::make_shared<T>(unit);
+
+    return Status::OK();
+  }
+
+  Status GetUnion(const RjObject& json_type,
+      const std::vector<std::shared_ptr<Field>>& children,
+      std::shared_ptr<DataType>* type) {
+    const auto& json_mode = json_type.FindMember("mode");
+    RETURN_NOT_STRING("mode", json_mode, json_type);
+
+    std::string mode_str = json_mode->value.GetString();
+    UnionMode mode;
+
+    if (mode_str == "SPARSE") {
+      mode = UnionMode::SPARSE;
+    } else if (mode_str == "DENSE") {
+      mode = UnionMode::DENSE;
+    } else {
+      std::stringstream ss;
+      ss << "Invalid union mode: " << mode_str;
+      return Status::Invalid(ss.str());
+    }
+
+    const auto& json_type_ids = json_type.FindMember("typeIds");
+    RETURN_NOT_ARRAY("typeIds", json_type_ids, json_type);
+
+    std::vector<uint8_t> type_ids;
+    const auto& id_array = json_type_ids->value.GetArray();
+    for (const rj::Value& val : id_array) {
+      DCHECK(val.IsUint());
+      type_ids.push_back(val.GetUint());
+    }
+
+    *type = union_(children, type_ids, mode);
+
+    return Status::OK();
+  }
+
+  Status GetType(const RjObject& json_type,
+      const std::vector<std::shared_ptr<Field>>& children,
+      std::shared_ptr<DataType>* type) {
+    const auto& json_type_name = json_type.FindMember("name");
+    RETURN_NOT_STRING("name", json_type_name, json_type);
+
+    std::string type_name = json_type_name->value.GetString();
+
+    if (type_name == "int") {
+      return GetInteger(json_type, type);
+    } else if (type_name == "floatingpoint") {
+      return GetFloatingPoint(json_type, type);
+    } else if (type_name == "bool") {
+      *type = boolean();
+    } else if (type_name == "utf8") {
+      *type = utf8();
+    } else if (type_name == "binary") {
+      *type = binary();
+    } else if (type_name == "null") {
+      *type = null();
+    } else if (type_name == "date") {
+      *type = date();
+    } else if (type_name == "time") {
+      return GetTimeLike<TimeType>(json_type, type);
+    } else if (type_name == "timestamp") {
+      return GetTimeLike<TimestampType>(json_type, type);
+    } else if (type_name == "list") {
+      *type = list(children[0]);
+    } else if (type_name == "struct") {
+      *type = struct_(children);
+    } else {
+      return GetUnion(json_type, children, type);
+    }
+    return Status::OK();
+  }
+
+ private:
+  const rj::Value& json_schema_;
+};
+
+class JsonArrayReader {
+ public:
+  explicit JsonArrayReader(MemoryPool* pool) : pool_(pool) {}
+
+  Status GetValidityBuffer(const std::vector<bool>& is_valid, int32_t* null_count,
+      std::shared_ptr<Buffer>* validity_buffer) {
+    int length = static_cast<int>(is_valid.size());
+
+    std::shared_ptr<MutableBuffer> out_buffer;
+    RETURN_NOT_OK(GetEmptyBitmap(pool_, length, &out_buffer));
+    uint8_t* bitmap = out_buffer->mutable_data();
+
+    *null_count = 0;
+    for (int i = 0; i < length; ++i) {
+      if (!is_valid[i]) {
+        ++(*null_count);
+        continue;
+      }
+      BitUtil::SetBit(bitmap, i);
+    }
+
+    *validity_buffer = out_buffer;
+    return Status::OK();
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<PrimitiveCType, T>::value ||
+                              std::is_base_of<BooleanType, T>::value,
+      Status>::type
+  ReadArray(const RjObject& json_array, int32_t length, const std::vector<bool>& is_valid,
+      const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array) {
+    typename TypeTraits<T>::BuilderType builder(pool_, type);
+
+    const auto& json_data = json_array.FindMember("DATA");
+    RETURN_NOT_ARRAY("DATA", json_data, json_array);
+
+    const auto& json_data_arr = json_data->value.GetArray();
+
+    DCHECK_EQ(static_cast<int32_t>(json_data_arr.Size()), length);
+    for (int i = 0; i < length; ++i) {
+      if (!is_valid[i]) {
+        builder.AppendNull();
+        continue;
+      }
+
+      const rj::Value& val = json_data_arr[i];
+      if (IsSignedInt<T>::value) {
+        DCHECK(val.IsInt());
+        builder.Append(val.GetInt64());
+      } else if (IsUnsignedInt<T>::value) {
+        DCHECK(val.IsUint());
+        builder.Append(val.GetUint64());
+      } else if (IsFloatingPoint<T>::value) {
+        DCHECK(val.IsFloat());
+        builder.Append(val.GetFloat());
+      } else if (std::is_base_of<BooleanType, T>::value) {
+        DCHECK(val.IsBool());
+        builder.Append(val.GetBool());
+      } else {
+        // We are in the wrong function
+        return Status::Invalid(type->ToString());
+      }
+    }
+
+    return builder.Finish(array);
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<BinaryType, T>::value, Status>::type ReadArray(
+      const RjObject& json_array, int32_t length, const std::vector<bool>& is_valid,
+      const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array) {
+    typename TypeTraits<T>::BuilderType builder(pool_, type);
+
+    const auto& json_data = json_array.FindMember("DATA");
+    RETURN_NOT_ARRAY("DATA", json_data, json_array);
+
+    const auto& json_data_arr = json_data->value.GetArray();
+
+    DCHECK_EQ(static_cast<int32_t>(json_data_arr.Size()), length);
+    for (int i = 0; i < length; ++i) {
+      if (!is_valid[i]) {
+        builder.AppendNull();
+        continue;
+      }
+
+      const rj::Value& val = json_data_arr[i];
+      DCHECK(val.IsString());
+      builder.Append(val.GetString());
+    }
+
+    return builder.Finish(array);
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<ListType, T>::value, Status>::type ReadArray(
+      const RjObject& json_array, int32_t length, const std::vector<bool>& is_valid,
+      const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array) {
+    const auto& json_offsets = json_array.FindMember("OFFSETS");
+    RETURN_NOT_ARRAY("OFFSETS", json_offsets, json_array);
+    const auto& json_offsets_arr = json_offsets->value.GetArray();
+
+    int32_t null_count = 0;
+    std::shared_ptr<Buffer> validity_buffer;
+    RETURN_NOT_OK(GetValidityBuffer(is_valid, &null_count, &validity_buffer));
+
+    auto offsets_buffer = std::make_shared<PoolBuffer>(pool_);
+    RETURN_NOT_OK(offsets_buffer->Resize((length + 1) * sizeof(int32_t)));
+    int32_t* offsets = reinterpret_cast<int32_t*>(offsets_buffer->mutable_data());
+
+    for (int i = 0; i < length + 1; ++i) {
+      const rj::Value& val = json_offsets_arr[i];
+      DCHECK(val.IsInt());
+      offsets[i] = val.GetInt();
+    }
+
+    std::vector<std::shared_ptr<Array>> children;
+    RETURN_NOT_OK(GetChildren(json_array, type, &children));
+    DCHECK_EQ(children.size(), 1);
+
+    *array = std::make_shared<ListArray>(
+        type, length, offsets_buffer, children[0], null_count, validity_buffer);
+
+    return Status::OK();
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<StructType, T>::value, Status>::type ReadArray(
+      const RjObject& json_array, int32_t length, const std::vector<bool>& is_valid,
+      const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array) {
+    int32_t null_count = 0;
+    std::shared_ptr<Buffer> validity_buffer;
+    RETURN_NOT_OK(GetValidityBuffer(is_valid, &null_count, &validity_buffer));
+
+    std::vector<std::shared_ptr<Array>> fields;
+    RETURN_NOT_OK(GetChildren(json_array, type, &fields));
+
+    *array =
+        std::make_shared<StructArray>(type, length, fields, null_count, validity_buffer);
+
+    return Status::OK();
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_base_of<NullType, T>::value, Status>::type ReadArray(
+      const RjObject& json_array, int32_t length, const std::vector<bool>& is_valid,
+      const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array) {
+    *array = std::make_shared<NullArray>(type, length);
+    return Status::OK();
+  }
+
+  Status GetChildren(const RjObject& json_array, const std::shared_ptr<DataType>& type,
+      std::vector<std::shared_ptr<Array>>* array) {
+    const auto& json_children = json_array.FindMember("children");
+    RETURN_NOT_ARRAY("children", json_children, json_array);
+    const auto& json_children_arr = json_children->value.GetArray();
+
+    if (type->num_children() != static_cast<int>(json_children_arr.Size())) {
+      std::stringstream ss;
+      ss << "Expected " << type->num_children() << " children, but got "
+         << json_children_arr.Size();
+      return Status::Invalid(ss.str());
+    }
+
+    for (int i = 0; i < static_cast<int>(json_children_arr.Size()); ++i) {
+      const rj::Value& json_child = json_children_arr[i];
+      DCHECK(json_child.IsObject());
+
+      std::shared_ptr<Field> child_field = type->child(i);
+
+      auto it = json_child.FindMember("name");
+      RETURN_NOT_STRING("name", it, json_child);
+
+      DCHECK_EQ(it->value.GetString(), child_field->name);
+      std::shared_ptr<Array> child;
+      RETURN_NOT_OK(GetArray(json_children_arr[i], child_field->type, &child));
+      array->emplace_back(child);
+    }
+
+    return Status::OK();
+  }
+
+  Status GetArray(const rj::Value& obj, const std::shared_ptr<DataType>& type,
+      std::shared_ptr<Array>* array) {
+    if (!obj.IsObject()) {
+      return Status::Invalid("Array element was not a JSON object");
+    }
+    const auto& json_array = obj.GetObject();
+
+    const auto& json_length = json_array.FindMember("count");
+    RETURN_NOT_INT("count", json_length, json_array);
+    int32_t length = json_length->value.GetInt();
+
+    const auto& json_valid_iter = json_array.FindMember("VALIDITY");
+    RETURN_NOT_ARRAY("VALIDITY", json_valid_iter, json_array);
+
+    const auto& json_validity = json_valid_iter->value.GetArray();
+
+    DCHECK_EQ(static_cast<int>(json_validity.Size()), length);
+
+    std::vector<bool> is_valid;
+    for (const rj::Value& val : json_validity) {
+      DCHECK(val.IsInt());
+      is_valid.push_back(static_cast<bool>(val.GetInt()));
+    }
+
+#define TYPE_CASE(TYPE) \
+  case TYPE::type_id:   \
+    return ReadArray<TYPE>(json_array, length, is_valid, type, array);
+
+#define NOT_IMPLEMENTED_CASE(TYPE_ENUM)      \
+  case Type::TYPE_ENUM: {                    \
+    std::stringstream ss;                    \
+    ss << type->ToString();                  \
+    return Status::NotImplemented(ss.str()); \
+  }
+
+    switch (type->type) {
+      TYPE_CASE(NullType);
+      TYPE_CASE(BooleanType);
+      TYPE_CASE(UInt8Type);
+      TYPE_CASE(Int8Type);
+      TYPE_CASE(UInt16Type);
+      TYPE_CASE(Int16Type);
+      TYPE_CASE(UInt32Type);
+      TYPE_CASE(Int32Type);
+      TYPE_CASE(UInt64Type);
+      TYPE_CASE(Int64Type);
+      TYPE_CASE(HalfFloatType);
+      TYPE_CASE(FloatType);
+      TYPE_CASE(DoubleType);
+      TYPE_CASE(StringType);
+      TYPE_CASE(BinaryType);
+      NOT_IMPLEMENTED_CASE(DATE);
+      NOT_IMPLEMENTED_CASE(TIMESTAMP);
+      NOT_IMPLEMENTED_CASE(TIME);
+      NOT_IMPLEMENTED_CASE(INTERVAL);
+      TYPE_CASE(ListType);
+      TYPE_CASE(StructType);
+      NOT_IMPLEMENTED_CASE(UNION);
+      default:
+        std::stringstream ss;
+        ss << type->ToString();
+        return Status::NotImplemented(ss.str());
+    }
+
+#undef TYPE_CASE
+#undef NOT_IMPLEMENTED_CASE
+
+    return Status::OK();
+  }
+
+ private:
+  MemoryPool* pool_;
+};
+
+Status WriteJsonSchema(const Schema& schema, RjWriter* json_writer) {
+  JsonSchemaWriter converter(schema, json_writer);
+  return converter.Write();
+}
+
+Status ReadJsonSchema(const rj::Value& json_schema, std::shared_ptr<Schema>* schema) {
+  JsonSchemaReader converter(json_schema);
+  return converter.GetSchema(schema);
+}
+
+Status WriteJsonArray(
+    const std::string& name, const Array& array, RjWriter* json_writer) {
+  JsonArrayWriter converter(name, array, json_writer);
+  return converter.Write();
+}
+
+Status ReadJsonArray(MemoryPool* pool, const rj::Value& json_array,
+    const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array) {
+  JsonArrayReader converter(pool);
+  return converter.GetArray(json_array, type, array);
+}
+
+Status ReadJsonArray(MemoryPool* pool, const rj::Value& json_array, const Schema& schema,
+    std::shared_ptr<Array>* array) {
+  if (!json_array.IsObject()) { return Status::Invalid("Element was not a JSON object"); }
+
+  const auto& json_obj = json_array.GetObject();
+
+  const auto& json_name = json_obj.FindMember("name");
+  RETURN_NOT_STRING("name", json_name, json_obj);
+
+  std::string name = json_name->value.GetString();
+
+  std::shared_ptr<Field> result = nullptr;
+  for (const std::shared_ptr<Field>& field : schema.fields()) {
+    if (field->name == name) {
+      result = field;
+      break;
+    }
+  }
+
+  if (result == nullptr) {
+    std::stringstream ss;
+    ss << "Field named " << name << " not found in schema";
+    return Status::KeyError(ss.str());
+  }
+
+  return ReadJsonArray(pool, json_array, result->type, array);
+}
+
+}  // namespace ipc
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/ipc/json-internal.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/json-internal.h b/cpp/src/arrow/ipc/json-internal.h
new file mode 100644
index 0000000..0c167a4
--- /dev/null
+++ b/cpp/src/arrow/ipc/json-internal.h
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_IPC_JSON_INTERNAL_H
+#define ARROW_IPC_JSON_INTERNAL_H
+
+#define RAPIDJSON_HAS_STDSTRING 1
+#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1
+#define RAPIDJSON_HAS_CXX11_RANGE_FOR 1
+
+#include <memory>
+#include <sstream>
+#include <string>
+
+#include "rapidjson/document.h"
+#include "rapidjson/stringbuffer.h"
+#include "rapidjson/writer.h"
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace rj = rapidjson;
+using RjWriter = rj::Writer<rj::StringBuffer>;
+
+#define RETURN_NOT_FOUND(TOK, NAME, PARENT) \
+  if (NAME == PARENT.MemberEnd()) {         \
+    std::stringstream ss;                   \
+    ss << "field " << TOK << " not found";  \
+    return Status::Invalid(ss.str());       \
+  }
+
+#define RETURN_NOT_STRING(TOK, NAME, PARENT) \
+  RETURN_NOT_FOUND(TOK, NAME, PARENT);       \
+  if (!NAME->value.IsString()) {             \
+    std::stringstream ss;                    \
+    ss << "field was not a string"           \
+       << " line " << __LINE__;              \
+    return Status::Invalid(ss.str());        \
+  }
+
+#define RETURN_NOT_BOOL(TOK, NAME, PARENT) \
+  RETURN_NOT_FOUND(TOK, NAME, PARENT);     \
+  if (!NAME->value.IsBool()) {             \
+    std::stringstream ss;                  \
+    ss << "field was not a boolean"        \
+       << " line " << __LINE__;            \
+    return Status::Invalid(ss.str());      \
+  }
+
+#define RETURN_NOT_INT(TOK, NAME, PARENT) \
+  RETURN_NOT_FOUND(TOK, NAME, PARENT);    \
+  if (!NAME->value.IsInt()) {             \
+    std::stringstream ss;                 \
+    ss << "field was not an int"          \
+       << " line " << __LINE__;           \
+    return Status::Invalid(ss.str());     \
+  }
+
+#define RETURN_NOT_ARRAY(TOK, NAME, PARENT) \
+  RETURN_NOT_FOUND(TOK, NAME, PARENT);      \
+  if (!NAME->value.IsArray()) {             \
+    std::stringstream ss;                   \
+    ss << "field was not an array"          \
+       << " line " << __LINE__;             \
+    return Status::Invalid(ss.str());       \
+  }
+
+#define RETURN_NOT_OBJECT(TOK, NAME, PARENT) \
+  RETURN_NOT_FOUND(TOK, NAME, PARENT);       \
+  if (!NAME->value.IsObject()) {             \
+    std::stringstream ss;                    \
+    ss << "field was not an object"          \
+       << " line " << __LINE__;              \
+    return Status::Invalid(ss.str());        \
+  }
+
+namespace arrow {
+namespace ipc {
+
+// TODO(wesm): Only exporting these because arrow_ipc does not have a static
+// library at the moment. Better to not export
+Status ARROW_EXPORT WriteJsonSchema(const Schema& schema, RjWriter* json_writer);
+Status ARROW_EXPORT WriteJsonArray(
+    const std::string& name, const Array& array, RjWriter* json_writer);
+
+Status ARROW_EXPORT ReadJsonSchema(
+    const rj::Value& json_obj, std::shared_ptr<Schema>* schema);
+Status ARROW_EXPORT ReadJsonArray(MemoryPool* pool, const rj::Value& json_obj,
+    const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* array);
+
+Status ARROW_EXPORT ReadJsonArray(MemoryPool* pool, const rj::Value& json_obj,
+    const Schema& schema, std::shared_ptr<Array>* array);
+
+}  // namespace ipc
+}  // namespace arrow
+
+#endif  // ARROW_IPC_JSON_INTERNAL_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/ipc/json.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/json.cc b/cpp/src/arrow/ipc/json.cc
new file mode 100644
index 0000000..2281611
--- /dev/null
+++ b/cpp/src/arrow/ipc/json.cc
@@ -0,0 +1,219 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/ipc/json.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/ipc/json-internal.h"
+#include "arrow/schema.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/util/buffer.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/memory-pool.h"
+#include "arrow/util/status.h"
+
+namespace arrow {
+namespace ipc {
+
+// ----------------------------------------------------------------------
+// Writer implementation
+
+class JsonWriter::JsonWriterImpl {
+ public:
+  explicit JsonWriterImpl(const std::shared_ptr<Schema>& schema) : schema_(schema) {
+    writer_.reset(new RjWriter(string_buffer_));
+  }
+
+  Status Start() {
+    writer_->StartObject();
+
+    writer_->Key("schema");
+    RETURN_NOT_OK(WriteJsonSchema(*schema_.get(), writer_.get()));
+
+    // Record batches
+    writer_->Key("batches");
+    writer_->StartArray();
+    return Status::OK();
+  }
+
+  Status Finish(std::string* result) {
+    writer_->EndArray();  // Record batches
+    writer_->EndObject();
+
+    *result = string_buffer_.GetString();
+    return Status::OK();
+  }
+
+  Status WriteRecordBatch(
+      const std::vector<std::shared_ptr<Array>>& columns, int32_t num_rows) {
+    DCHECK_EQ(static_cast<int>(columns.size()), schema_->num_fields());
+
+    writer_->StartObject();
+    writer_->Key("count");
+    writer_->Int(num_rows);
+
+    writer_->Key("columns");
+    writer_->StartArray();
+
+    for (int i = 0; i < schema_->num_fields(); ++i) {
+      const std::shared_ptr<Array>& column = columns[i];
+
+      DCHECK_EQ(num_rows, column->length())
+          << "Array length did not match record batch length";
+
+      RETURN_NOT_OK(
+          WriteJsonArray(schema_->field(i)->name, *column.get(), writer_.get()));
+    }
+
+    writer_->EndArray();
+    writer_->EndObject();
+    return Status::OK();
+  }
+
+ private:
+  std::shared_ptr<Schema> schema_;
+
+  rj::StringBuffer string_buffer_;
+  std::unique_ptr<RjWriter> writer_;
+};
+
+JsonWriter::JsonWriter(const std::shared_ptr<Schema>& schema) {
+  impl_.reset(new JsonWriterImpl(schema));
+}
+
+JsonWriter::~JsonWriter() {}
+
+Status JsonWriter::Open(
+    const std::shared_ptr<Schema>& schema, std::unique_ptr<JsonWriter>* writer) {
+  *writer = std::unique_ptr<JsonWriter>(new JsonWriter(schema));
+  return (*writer)->impl_->Start();
+}
+
+Status JsonWriter::Finish(std::string* result) {
+  return impl_->Finish(result);
+}
+
+Status JsonWriter::WriteRecordBatch(
+    const std::vector<std::shared_ptr<Array>>& columns, int32_t num_rows) {
+  return impl_->WriteRecordBatch(columns, num_rows);
+}
+
+// ----------------------------------------------------------------------
+// Reader implementation
+
+class JsonReader::JsonReaderImpl {
+ public:
+  JsonReaderImpl(MemoryPool* pool, const std::shared_ptr<Buffer>& data)
+      : pool_(pool), data_(data) {}
+
+  Status ParseAndReadSchema() {
+    doc_.Parse(reinterpret_cast<const rj::Document::Ch*>(data_->data()),
+        static_cast<size_t>(data_->size()));
+    if (doc_.HasParseError()) { return Status::IOError("JSON parsing failed"); }
+
+    auto it = doc_.FindMember("schema");
+    RETURN_NOT_OBJECT("schema", it, doc_);
+    RETURN_NOT_OK(ReadJsonSchema(it->value, &schema_));
+
+    it = doc_.FindMember("batches");
+    RETURN_NOT_ARRAY("batches", it, doc_);
+    record_batches_ = &it->value;
+
+    return Status::OK();
+  }
+
+  Status GetRecordBatch(int i, std::shared_ptr<RecordBatch>* batch) const {
+    DCHECK_GE(i, 0) << "i out of bounds";
+    DCHECK_LT(i, static_cast<int>(record_batches_->GetArray().Size()))
+        << "i out of bounds";
+
+    const auto& batch_val = record_batches_->GetArray()[i];
+    DCHECK(batch_val.IsObject());
+
+    const auto& batch_obj = batch_val.GetObject();
+
+    auto it = batch_obj.FindMember("count");
+    RETURN_NOT_INT("count", it, batch_obj);
+    int32_t num_rows = static_cast<int32_t>(it->value.GetInt());
+
+    it = batch_obj.FindMember("columns");
+    RETURN_NOT_ARRAY("columns", it, batch_obj);
+    const auto& json_columns = it->value.GetArray();
+
+    std::vector<std::shared_ptr<Array>> columns(json_columns.Size());
+    for (size_t i = 0; i < columns.size(); ++i) {
+      const std::shared_ptr<DataType>& type = schema_->field(i)->type;
+      RETURN_NOT_OK(ReadJsonArray(pool_, json_columns[i], type, &columns[i]));
+    }
+
+    *batch = std::make_shared<RecordBatch>(schema_, num_rows, columns);
+    return Status::OK();
+  }
+
+  std::shared_ptr<Schema> schema() const { return schema_; }
+
+  int num_record_batches() const {
+    return static_cast<int>(record_batches_->GetArray().Size());
+  }
+
+ private:
+  MemoryPool* pool_;
+  std::shared_ptr<Buffer> data_;
+  rj::Document doc_;
+
+  const rj::Value* record_batches_;
+
+  std::shared_ptr<Schema> schema_;
+};
+
+JsonReader::JsonReader(MemoryPool* pool, const std::shared_ptr<Buffer>& data) {
+  impl_.reset(new JsonReaderImpl(pool, data));
+}
+
+JsonReader::~JsonReader() {}
+
+Status JsonReader::Open(
+    const std::shared_ptr<Buffer>& data, std::unique_ptr<JsonReader>* reader) {
+  return Open(default_memory_pool(), data, reader);
+}
+
+Status JsonReader::Open(MemoryPool* pool, const std::shared_ptr<Buffer>& data,
+    std::unique_ptr<JsonReader>* reader) {
+  *reader = std::unique_ptr<JsonReader>(new JsonReader(pool, data));
+  return (*reader)->impl_->ParseAndReadSchema();
+}
+
+std::shared_ptr<Schema> JsonReader::schema() const {
+  return impl_->schema();
+}
+
+int JsonReader::num_record_batches() const {
+  return impl_->num_record_batches();
+}
+
+Status JsonReader::GetRecordBatch(int i, std::shared_ptr<RecordBatch>* batch) const {
+  return impl_->GetRecordBatch(i, batch);
+}
+
+}  // namespace ipc
+}  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/ipc/json.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/json.h b/cpp/src/arrow/ipc/json.h
new file mode 100644
index 0000000..7395be4
--- /dev/null
+++ b/cpp/src/arrow/ipc/json.h
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Implement Arrow JSON serialization format
+
+#ifndef ARROW_IPC_JSON_H
+#define ARROW_IPC_JSON_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace io {
+
+class OutputStream;
+class ReadableFileInterface;
+
+}  // namespace io
+
+namespace ipc {
+
+class ARROW_EXPORT JsonWriter {
+ public:
+  ~JsonWriter();
+
+  static Status Open(
+      const std::shared_ptr<Schema>& schema, std::unique_ptr<JsonWriter>* out);
+
+  // TODO(wesm): Write dictionaries
+
+  Status WriteRecordBatch(
+      const std::vector<std::shared_ptr<Array>>& columns, int32_t num_rows);
+
+  Status Finish(std::string* result);
+
+ private:
+  explicit JsonWriter(const std::shared_ptr<Schema>& schema);
+
+  // Hide RapidJSON details from public API
+  class JsonWriterImpl;
+  std::unique_ptr<JsonWriterImpl> impl_;
+};
+
+// TODO(wesm): Read from a file stream rather than an in-memory buffer
+class ARROW_EXPORT JsonReader {
+ public:
+  ~JsonReader();
+
+  static Status Open(MemoryPool* pool, const std::shared_ptr<Buffer>& data,
+      std::unique_ptr<JsonReader>* reader);
+
+  // Use the default memory pool
+  static Status Open(
+      const std::shared_ptr<Buffer>& data, std::unique_ptr<JsonReader>* reader);
+
+  std::shared_ptr<Schema> schema() const;
+
+  int num_record_batches() const;
+
+  // Read a record batch from the file
+  Status GetRecordBatch(int i, std::shared_ptr<RecordBatch>* batch) const;
+
+ private:
+  JsonReader(MemoryPool* pool, const std::shared_ptr<Buffer>& data);
+
+  // Hide RapidJSON details from public API
+  class JsonReaderImpl;
+  std::unique_ptr<JsonReaderImpl> impl_;
+};
+
+}  // namespace ipc
+}  // namespace arrow
+
+#endif  // ARROW_IPC_JSON_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/ipc/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h
index 784e238..9abc20d 100644
--- a/cpp/src/arrow/ipc/test-common.h
+++ b/cpp/src/arrow/ipc/test-common.h
@@ -27,6 +27,7 @@
 #include "arrow/array.h"
 #include "arrow/table.h"
 #include "arrow/test-util.h"
+#include "arrow/type.h"
 #include "arrow/types/list.h"
 #include "arrow/types/primitive.h"
 #include "arrow/types/string.h"
@@ -39,15 +40,14 @@ namespace arrow {
 namespace ipc {
 
 const auto kInt32 = std::make_shared<Int32Type>();
-const auto kListInt32 = std::make_shared<ListType>(kInt32);
-const auto kListListInt32 = std::make_shared<ListType>(kListInt32);
+const auto kListInt32 = list(kInt32);
+const auto kListListInt32 = list(kListInt32);
 
 Status MakeRandomInt32Array(
     int32_t length, bool include_nulls, MemoryPool* pool, std::shared_ptr<Array>* out) {
   std::shared_ptr<PoolBuffer> data;
   test::MakeRandomInt32PoolBuffer(length, pool, &data);
-  const auto kInt32 = std::make_shared<Int32Type>();
-  Int32Builder builder(pool, kInt32);
+  Int32Builder builder(pool, int32());
   if (include_nulls) {
     std::shared_ptr<PoolBuffer> valid_bytes;
     test::MakeRandomBytePoolBuffer(length, pool, &valid_bytes);
@@ -134,8 +134,8 @@ Status MakeRandomBinaryArray(
 
 Status MakeStringTypesRecordBatch(std::shared_ptr<RecordBatch>* out) {
   const int32_t length = 500;
-  auto string_type = std::make_shared<StringType>();
-  auto binary_type = std::make_shared<BinaryType>();
+  auto string_type = utf8();
+  auto binary_type = binary();
   auto f0 = std::make_shared<Field>("f0", string_type);
   auto f1 = std::make_shared<Field>("f1", binary_type);
   std::shared_ptr<Schema> schema(new Schema({f0, f1}));
@@ -233,7 +233,7 @@ Status MakeDeeplyNestedList(std::shared_ptr<RecordBatch>* out) {
   const bool include_nulls = true;
   RETURN_NOT_OK(MakeRandomInt32Array(1000, include_nulls, pool, &array));
   for (int i = 0; i < 63; ++i) {
-    type = std::static_pointer_cast<DataType>(std::make_shared<ListType>(type));
+    type = std::static_pointer_cast<DataType>(list(type));
     RETURN_NOT_OK(MakeRandomListArray(array, batch_length, include_nulls, pool, &array));
   }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema-test.cc b/cpp/src/arrow/schema-test.cc
index 8cc80be..4826199 100644
--- a/cpp/src/arrow/schema-test.cc
+++ b/cpp/src/arrow/schema-test.cc
@@ -29,23 +29,21 @@ using std::vector;
 
 namespace arrow {
 
-const auto INT32 = std::make_shared<Int32Type>();
-
 TEST(TestField, Basics) {
-  Field f0("f0", INT32);
-  Field f0_nn("f0", INT32, false);
+  Field f0("f0", int32());
+  Field f0_nn("f0", int32(), false);
 
   ASSERT_EQ(f0.name, "f0");
-  ASSERT_EQ(f0.type->ToString(), INT32->ToString());
+  ASSERT_EQ(f0.type->ToString(), int32()->ToString());
 
   ASSERT_TRUE(f0.nullable);
   ASSERT_FALSE(f0_nn.nullable);
 }
 
 TEST(TestField, Equals) {
-  Field f0("f0", INT32);
-  Field f0_nn("f0", INT32, false);
-  Field f0_other("f0", INT32);
+  Field f0("f0", int32());
+  Field f0_nn("f0", int32(), false);
+  Field f0_other("f0", int32());
 
   ASSERT_EQ(f0, f0_other);
   ASSERT_NE(f0, f0_nn);
@@ -57,11 +55,11 @@ class TestSchema : public ::testing::Test {
 };
 
 TEST_F(TestSchema, Basics) {
-  auto f0 = std::make_shared<Field>("f0", INT32);
-  auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(), false);
-  auto f1_optional = std::make_shared<Field>("f1", std::make_shared<UInt8Type>());
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", uint8(), false);
+  auto f1_optional = field("f1", uint8());
 
-  auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
+  auto f2 = field("f2", utf8());
 
   vector<shared_ptr<Field>> fields = {f0, f1, f2};
   auto schema = std::make_shared<Schema>(fields);
@@ -83,11 +81,10 @@ TEST_F(TestSchema, Basics) {
 }
 
 TEST_F(TestSchema, ToString) {
-  auto f0 = std::make_shared<Field>("f0", INT32);
-  auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(), false);
-  auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
-  auto f3 = std::make_shared<Field>(
-      "f3", std::make_shared<ListType>(std::make_shared<Int16Type>()));
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", uint8(), false);
+  auto f2 = field("f2", utf8());
+  auto f3 = field("f3", list(int16()));
 
   vector<shared_ptr<Field>> fields = {f0, f1, f2, f3};
   auto schema = std::make_shared<Schema>(fields);
@@ -101,4 +98,25 @@ f3: list<item: int16>)";
   ASSERT_EQ(expected, result);
 }
 
+TEST_F(TestSchema, GetFieldByName) {
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", uint8(), false);
+  auto f2 = field("f2", utf8());
+  auto f3 = field("f3", list(int16()));
+
+  vector<shared_ptr<Field>> fields = {f0, f1, f2, f3};
+  auto schema = std::make_shared<Schema>(fields);
+
+  std::shared_ptr<Field> result;
+
+  result = schema->GetFieldByName("f1");
+  ASSERT_TRUE(f1->Equals(result));
+
+  result = schema->GetFieldByName("f3");
+  ASSERT_TRUE(f3->Equals(result));
+
+  result = schema->GetFieldByName("not-found");
+  ASSERT_TRUE(result == nullptr);
+}
+
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema.cc b/cpp/src/arrow/schema.cc
index ff3ea19..cd8256e 100644
--- a/cpp/src/arrow/schema.cc
+++ b/cpp/src/arrow/schema.cc
@@ -42,6 +42,21 @@ bool Schema::Equals(const std::shared_ptr<Schema>& other) const {
   return Equals(*other.get());
 }
 
+std::shared_ptr<Field> Schema::GetFieldByName(const std::string& name) {
+  if (fields_.size() > 0 && name_to_index_.size() == 0) {
+    for (size_t i = 0; i < fields_.size(); ++i) {
+      name_to_index_[fields_[i]->name] = i;
+    }
+  }
+
+  auto it = name_to_index_.find(name);
+  if (it == name_to_index_.end()) {
+    return nullptr;
+  } else {
+    return fields_[it->second];
+  }
+}
+
 std::string Schema::ToString() const {
   std::stringstream buffer;
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/schema.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/schema.h b/cpp/src/arrow/schema.h
index 4301968..0e1ab5c 100644
--- a/cpp/src/arrow/schema.h
+++ b/cpp/src/arrow/schema.h
@@ -20,14 +20,14 @@
 
 #include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
+#include "arrow/type.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
 
-struct Field;
-
 class ARROW_EXPORT Schema {
  public:
   explicit Schema(const std::vector<std::shared_ptr<Field>>& fields);
@@ -37,7 +37,12 @@ class ARROW_EXPORT Schema {
   bool Equals(const std::shared_ptr<Schema>& other) const;
 
   // Return the ith schema element. Does not boundscheck
-  const std::shared_ptr<Field>& field(int i) const { return fields_[i]; }
+  std::shared_ptr<Field> field(int i) const { return fields_[i]; }
+
+  // Returns nullptr if name not found
+  std::shared_ptr<Field> GetFieldByName(const std::string& name);
+
+  const std::vector<std::shared_ptr<Field>>& fields() const { return fields_; }
 
   // Render a string representation of the schema suitable for debugging
   std::string ToString() const;
@@ -46,6 +51,7 @@ class ARROW_EXPORT Schema {
 
  private:
   std::vector<std::shared_ptr<Field>> fields_;
+  std::unordered_map<std::string, int> name_to_index_;
 };
 
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/ed6ec3b7/cpp/src/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index ac56f5e..ab4b980 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -27,6 +27,7 @@
 
 #include "gtest/gtest.h"
 
+#include "arrow/array.h"
 #include "arrow/column.h"
 #include "arrow/schema.h"
 #include "arrow/table.h"
@@ -102,20 +103,57 @@ void random_real(int n, uint32_t seed, T min_value, T max_value, std::vector<T>*
 }
 
 template <typename T>
-std::shared_ptr<Buffer> to_buffer(const std::vector<T>& values) {
+std::shared_ptr<Buffer> GetBufferFromVector(const std::vector<T>& values) {
   return std::make_shared<Buffer>(
       reinterpret_cast<const uint8_t*>(values.data()), values.size() * sizeof(T));
 }
 
+template <typename T>
+inline Status CopyBufferFromVector(
+    const std::vector<T>& values, std::shared_ptr<Buffer>* result) {
+  int64_t nbytes = static_cast<int>(values.size()) * sizeof(T);
+
+  auto buffer = std::make_shared<PoolBuffer>(default_memory_pool());
+  RETURN_NOT_OK(buffer->Resize(nbytes));
+  memcpy(buffer->mutable_data(), values.data(), nbytes);
+
+  *result = buffer;
+  return Status::OK();
+}
+
+static inline Status GetBitmapFromBoolVector(
+    const std::vector<bool>& is_valid, std::shared_ptr<Buffer>* result) {
+  int length = static_cast<int>(is_valid.size());
+
+  std::shared_ptr<MutableBuffer> buffer;
+  RETURN_NOT_OK(GetEmptyBitmap(default_memory_pool(), length, &buffer));
+
+  uint8_t* bitmap = buffer->mutable_data();
+  for (int i = 0; i < length; ++i) {
+    if (is_valid[i]) { BitUtil::SetBit(bitmap, i); }
+  }
+
+  *result = buffer;
+  return Status::OK();
+}
+
 // Sets approximately pct_null of the first n bytes in null_bytes to zero
 // and the rest to non-zero (true) values.
-void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
+static inline void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
   Random rng(random_seed());
   for (int i = 0; i < n; ++i) {
     null_bytes[i] = rng.NextDoubleFraction() > pct_null;
   }
 }
 
+static inline void random_is_valid(
+    int64_t n, double pct_null, std::vector<bool>* is_valid) {
+  Random rng(random_seed());
+  for (int i = 0; i < n; ++i) {
+    is_valid->push_back(rng.NextDoubleFraction() > pct_null);
+  }
+}
+
 static inline void random_bytes(int n, uint32_t seed, uint8_t* out) {
   std::mt19937 gen(seed);
   std::uniform_int_distribution<int> d(0, 255);
@@ -125,6 +163,15 @@ static inline void random_bytes(int n, uint32_t seed, uint8_t* out) {
   }
 }
 
+static inline void random_ascii(int n, uint32_t seed, uint8_t* out) {
+  std::mt19937 gen(seed);
+  std::uniform_int_distribution<int> d(65, 122);
+
+  for (int i = 0; i < n; ++i) {
+    out[i] = d(gen) & 0xFF;
+  }
+}
+
 template <typename T>
 void rand_uniform_int(int n, uint32_t seed, T min_value, T max_value, T* out) {
   DCHECK(out);