You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/03/08 00:02:26 UTC

[1/2] arrow git commit: ARROW-31: Python: prototype user object model, add PyList conversion path with type inference

Repository: arrow
Updated Branches:
  refs/heads/master 571343bbe -> 9afb66778


http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/tests/test_array.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/test_array.py b/python/arrow/tests/test_array.py
new file mode 100644
index 0000000..8eaa533
--- /dev/null
+++ b/python/arrow/tests/test_array.py
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.compat import unittest
+import arrow
+
+
+class TestArrayAPI(unittest.TestCase):
+
+    def test_getitem_NA(self):
+        arr = arrow.from_pylist([1, None, 2])
+        assert arr[1] is arrow.NA

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/test_convert_builtin.py b/python/arrow/tests/test_convert_builtin.py
new file mode 100644
index 0000000..57e6ab9
--- /dev/null
+++ b/python/arrow/tests/test_convert_builtin.py
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.compat import unittest
+import arrow
+
+
+class TestConvertList(unittest.TestCase):
+
+    def test_boolean(self):
+        pass
+
+    def test_empty_list(self):
+        arr = arrow.from_pylist([])
+        assert len(arr) == 0
+        assert arr.null_count == 0
+        assert arr.type == arrow.null()
+
+    def test_all_none(self):
+        arr = arrow.from_pylist([None, None])
+        assert len(arr) == 2
+        assert arr.null_count == 2
+        assert arr.type == arrow.null()
+
+    def test_integer(self):
+        arr = arrow.from_pylist([1, None, 3, None])
+        assert len(arr) == 4
+        assert arr.null_count == 2
+        assert arr.type == arrow.int64()
+
+    def test_garbage_collection(self):
+        import gc
+        bytes_before = arrow.total_allocated_bytes()
+        arrow.from_pylist([1, None, 3, None])
+        gc.collect()
+        assert arrow.total_allocated_bytes() == bytes_before
+
+    def test_double(self):
+        data = [1.5, 1, None, 2.5, None, None]
+        arr = arrow.from_pylist(data)
+        assert len(arr) == 6
+        assert arr.null_count == 3
+        assert arr.type == arrow.double()
+
+    def test_string(self):
+        data = ['foo', b'bar', None, 'arrow']
+        arr = arrow.from_pylist(data)
+        assert len(arr) == 4
+        assert arr.null_count == 1
+        assert arr.type == arrow.string()
+
+    def test_mixed_nesting_levels(self):
+        arrow.from_pylist([1, 2, None])
+        arrow.from_pylist([[1], [2], None])
+        arrow.from_pylist([[1], [2], [None]])
+
+        with self.assertRaises(arrow.ArrowException):
+            arrow.from_pylist([1, 2, [1]])
+
+        with self.assertRaises(arrow.ArrowException):
+            arrow.from_pylist([1, 2, []])
+
+        with self.assertRaises(arrow.ArrowException):
+            arrow.from_pylist([[1], [2], [None, [1]]])
+
+    def test_list_of_int(self):
+        data = [[1, 2, 3], [], None, [1, 2]]
+        arr = arrow.from_pylist(data)
+        assert len(arr) == 4
+        assert arr.null_count == 1
+        assert arr.type == arrow.list_(arrow.int64())

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/tests/test_schema.py
----------------------------------------------------------------------
diff --git a/python/arrow/tests/test_schema.py b/python/arrow/tests/test_schema.py
new file mode 100644
index 0000000..a89edd7
--- /dev/null
+++ b/python/arrow/tests/test_schema.py
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.compat import unittest
+import arrow
+
+
+class TestTypes(unittest.TestCase):
+
+    def test_integers(self):
+        dtypes = ['int8', 'int16', 'int32', 'int64',
+                  'uint8', 'uint16', 'uint32', 'uint64']
+
+        for name in dtypes:
+            factory = getattr(arrow, name)
+            t = factory()
+            t_required = factory(False)
+
+            assert str(t) == name
+            assert str(t_required) == '{0} not null'.format(name)
+
+    def test_list(self):
+        value_type = arrow.int32()
+        list_type = arrow.list_(value_type)
+        assert str(list_type) == 'list<int32>'
+
+    def test_string(self):
+        t = arrow.string()
+        assert str(t) == 'string'
+
+    def test_field(self):
+        t = arrow.string()
+        f = arrow.field('foo', t)
+
+        assert f.name == 'foo'
+        assert f.type is t
+        assert repr(f) == "Field('foo', type=string)"

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/setup.py
----------------------------------------------------------------------
diff --git a/python/setup.py b/python/setup.py
index f6b0a4b..9a0de07 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -124,7 +124,10 @@ class build_ext(_build_ext):
                              static_lib_option, source]
 
             self.spawn(cmake_command)
-            self.spawn(['make'])
+            args = ['make']
+            if 'PYARROW_PARALLEL' in os.environ:
+                args.append('-j{0}'.format(os.environ['PYARROW_PARALLEL']))
+            self.spawn(args)
         else:
             import shlex
             cmake_generator = 'Visual Studio 14 2015'
@@ -207,7 +210,7 @@ class build_ext(_build_ext):
             return name + suffix
 
     def get_cmake_cython_names(self):
-        return ['config', 'parquet']
+        return ['array', 'config', 'error', 'parquet', 'scalar', 'schema']
 
     def get_names(self):
         return self._found_names

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/src/pyarrow/adapters/builtin.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc
new file mode 100644
index 0000000..ae84fa1
--- /dev/null
+++ b/python/src/pyarrow/adapters/builtin.cc
@@ -0,0 +1,415 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <Python.h>
+#include <sstream>
+
+#include "pyarrow/adapters/builtin.h"
+
+#include <arrow/api.h>
+
+#include "pyarrow/status.h"
+
+using arrow::ArrayBuilder;
+using arrow::DataType;
+using arrow::LogicalType;
+
+namespace pyarrow {
+
+static inline bool IsPyInteger(PyObject* obj) {
+#if PYARROW_IS_PY2
+  return PyLong_Check(obj) || PyInt_Check(obj);
+#else
+  return PyLong_Check(obj);
+#endif
+}
+
+static inline bool IsPyBaseString(PyObject* obj) {
+#if PYARROW_IS_PY2
+  return PyString_Check(obj) || PyUnicode_Check(obj);
+#else
+  return PyUnicode_Check(obj);
+#endif
+}
+
+class ScalarVisitor {
+ public:
+  ScalarVisitor() :
+      total_count_(0),
+      none_count_(0),
+      bool_count_(0),
+      int_count_(0),
+      float_count_(0),
+      string_count_(0) {}
+
+  void Visit(PyObject* obj) {
+    ++total_count_;
+    if (obj == Py_None) {
+      ++none_count_;
+    } else if (PyFloat_Check(obj)) {
+      ++float_count_;
+    } else if (IsPyInteger(obj)) {
+      ++int_count_;
+    } else if (IsPyBaseString(obj)) {
+      ++string_count_;
+    } else {
+      // TODO(wesm): accumulate error information somewhere
+    }
+  }
+
+  std::shared_ptr<DataType> GetType() {
+    // TODO(wesm): handling mixed-type cases
+    if (float_count_) {
+      return arrow::DOUBLE;
+    } else if (int_count_) {
+      // TODO(wesm): tighter type later
+      return arrow::INT64;
+    } else if (bool_count_) {
+      return arrow::BOOL;
+    } else if (string_count_) {
+      return arrow::STRING;
+    } else {
+      return arrow::NA;
+    }
+  }
+
+  int64_t total_count() const {
+    return total_count_;
+  }
+
+ private:
+  int64_t total_count_;
+  int64_t none_count_;
+  int64_t bool_count_;
+  int64_t int_count_;
+  int64_t float_count_;
+  int64_t string_count_;
+
+  // Place to accumulate errors
+  // std::vector<Status> errors_;
+};
+
+static constexpr int MAX_NESTING_LEVELS = 32;
+
+class SeqVisitor {
+ public:
+  SeqVisitor() :
+      max_nesting_level_(0) {
+    memset(nesting_histogram_, 0, MAX_NESTING_LEVELS * sizeof(int));
+  }
+
+  Status Visit(PyObject* obj, int level=0) {
+    Py_ssize_t size = PySequence_Size(obj);
+
+    if (level > max_nesting_level_) {
+      max_nesting_level_ = level;
+    }
+
+    for (int64_t i = 0; i < size; ++i) {
+      // TODO(wesm): Error checking?
+      // TODO(wesm): Specialize for PyList_GET_ITEM?
+      OwnedRef item_ref(PySequence_GetItem(obj, i));
+      PyObject* item = item_ref.obj();
+
+      if (PyList_Check(item)) {
+        PY_RETURN_NOT_OK(Visit(item, level + 1));
+      } else if (PyDict_Check(item)) {
+        return Status::NotImplemented("No type inference for dicts");
+      } else {
+        // We permit nulls at any level of nesting
+        if (item == Py_None) {
+          // TODO
+        } else {
+          ++nesting_histogram_[level];
+          scalars_.Visit(item);
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> GetType() {
+    if (scalars_.total_count() == 0) {
+      if (max_nesting_level_ == 0) {
+        return arrow::NA;
+      } else {
+        return nullptr;
+      }
+    } else {
+      std::shared_ptr<DataType> result = scalars_.GetType();
+      for (int i = 0; i < max_nesting_level_; ++i) {
+        result = std::make_shared<arrow::ListType>(result);
+      }
+      return result;
+    }
+  }
+
+  Status Validate() const {
+    if (scalars_.total_count() > 0) {
+      if (num_nesting_levels() > 1) {
+        return Status::ValueError("Mixed nesting levels not supported");
+      } else if (max_observed_level() < max_nesting_level_) {
+        return Status::ValueError("Mixed nesting levels not supported");
+      }
+    }
+    return Status::OK();
+  }
+
+  int max_observed_level() const {
+    int result = 0;
+    for (int i = 0; i < MAX_NESTING_LEVELS; ++i) {
+      if (nesting_histogram_[i] > 0) {
+        result = i;
+      }
+    }
+    return result;
+  }
+
+  int num_nesting_levels() const {
+    int result = 0;
+    for (int i = 0; i < MAX_NESTING_LEVELS; ++i) {
+      if (nesting_histogram_[i] > 0) {
+        ++result;
+      }
+    }
+    return result;
+  }
+
+ private:
+  ScalarVisitor scalars_;
+
+  // Track observed
+  int max_nesting_level_;
+  int nesting_histogram_[MAX_NESTING_LEVELS];
+};
+
+// Non-exhaustive type inference
+static Status InferArrowType(PyObject* obj, int64_t* size,
+    std::shared_ptr<DataType>* out_type) {
+  *size = PySequence_Size(obj);
+  if (PyErr_Occurred()) {
+    // Not a sequence
+    PyErr_Clear();
+    return Status::TypeError("Object is not a sequence");
+  }
+
+  // For 0-length sequences, refuse to guess
+  if (*size == 0) {
+    *out_type = arrow::NA;
+  }
+
+  SeqVisitor seq_visitor;
+  PY_RETURN_NOT_OK(seq_visitor.Visit(obj));
+  PY_RETURN_NOT_OK(seq_visitor.Validate());
+
+  *out_type = seq_visitor.GetType();
+  return Status::OK();
+}
+
+// Marshal Python sequence (list, tuple, etc.) to Arrow array
+class SeqConverter {
+ public:
+  virtual Status Init(const std::shared_ptr<ArrayBuilder>& builder) {
+    builder_ = builder;
+    return Status::OK();
+  }
+
+  virtual Status AppendData(PyObject* seq) = 0;
+
+ protected:
+  std::shared_ptr<ArrayBuilder> builder_;
+};
+
+template <typename BuilderType>
+class TypedConverter : public SeqConverter {
+ public:
+  Status Init(const std::shared_ptr<ArrayBuilder>& builder) override {
+    builder_ = builder;
+    typed_builder_ = static_cast<BuilderType*>(builder.get());
+    return Status::OK();
+  }
+
+ protected:
+  BuilderType* typed_builder_;
+};
+
+class BoolConverter : public TypedConverter<arrow::BooleanBuilder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    return Status::OK();
+  }
+};
+
+class Int64Converter : public TypedConverter<arrow::Int64Builder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    int64_t val;
+    Py_ssize_t size = PySequence_Size(seq);
+    for (int64_t i = 0; i < size; ++i) {
+      OwnedRef item(PySequence_GetItem(seq, i));
+      if (item.obj() == Py_None) {
+        RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
+      } else {
+        val = PyLong_AsLongLong(item.obj());
+        RETURN_IF_PYERROR();
+        RETURN_ARROW_NOT_OK(typed_builder_->Append(val));
+      }
+    }
+    return Status::OK();
+  }
+};
+
+class DoubleConverter : public TypedConverter<arrow::DoubleBuilder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    int64_t val;
+    Py_ssize_t size = PySequence_Size(seq);
+    for (int64_t i = 0; i < size; ++i) {
+      OwnedRef item(PySequence_GetItem(seq, i));
+      if (item.obj() == Py_None) {
+        RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
+      } else {
+        val = PyFloat_AsDouble(item.obj());
+        RETURN_IF_PYERROR();
+        RETURN_ARROW_NOT_OK(typed_builder_->Append(val));
+      }
+    }
+    return Status::OK();
+  }
+};
+
+class StringConverter : public TypedConverter<arrow::StringBuilder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    PyObject* item;
+    PyObject* bytes_obj;
+    OwnedRef tmp;
+    const char* bytes;
+    int32_t length;
+    Py_ssize_t size = PySequence_Size(seq);
+    for (int64_t i = 0; i < size; ++i) {
+      item = PySequence_GetItem(seq, i);
+      OwnedRef holder(item);
+
+      if (item == Py_None) {
+        RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
+        continue;
+      } else if (PyUnicode_Check(item)) {
+        tmp.reset(PyUnicode_AsUTF8String(item));
+        RETURN_IF_PYERROR();
+        bytes_obj = tmp.obj();
+      } else if (PyBytes_Check(item)) {
+        bytes_obj = item;
+      } else {
+        return Status::TypeError("Non-string value encountered");
+      }
+      // No error checking
+      length = PyBytes_GET_SIZE(bytes_obj);
+      bytes = PyBytes_AS_STRING(bytes_obj);
+      RETURN_ARROW_NOT_OK(typed_builder_->Append(bytes, length));
+    }
+    return Status::OK();
+  }
+};
+
+class ListConverter : public TypedConverter<arrow::ListBuilder> {
+ public:
+  Status Init(const std::shared_ptr<ArrayBuilder>& builder) override;
+
+  Status AppendData(PyObject* seq) override {
+    Py_ssize_t size = PySequence_Size(seq);
+    for (int64_t i = 0; i < size; ++i) {
+      OwnedRef item(PySequence_GetItem(seq, i));
+      if (item.obj() == Py_None) {
+        RETURN_ARROW_NOT_OK(typed_builder_->AppendNull());
+      } else {
+        typed_builder_->Append();
+        PY_RETURN_NOT_OK(value_converter_->AppendData(item.obj()));
+      }
+    }
+    return Status::OK();
+  }
+ protected:
+  std::shared_ptr<SeqConverter> value_converter_;
+};
+
+// Dynamic constructor for sequence converters
+std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) {
+  switch (type->type) {
+    case LogicalType::BOOL:
+      return std::make_shared<BoolConverter>();
+    case LogicalType::INT64:
+      return std::make_shared<Int64Converter>();
+    case LogicalType::DOUBLE:
+      return std::make_shared<DoubleConverter>();
+    case LogicalType::STRING:
+      return std::make_shared<StringConverter>();
+    case LogicalType::LIST:
+      return std::make_shared<ListConverter>();
+    case LogicalType::STRUCT:
+    default:
+      return nullptr;
+      break;
+  }
+}
+
+Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
+  builder_ = builder;
+  typed_builder_ = static_cast<arrow::ListBuilder*>(builder.get());
+
+  value_converter_ = GetConverter(static_cast<arrow::ListType*>(
+          builder->type().get())->value_type);
+  if (value_converter_ == nullptr) {
+    return Status::NotImplemented("value type not implemented");
+  }
+
+  value_converter_->Init(typed_builder_->value_builder());
+  return Status::OK();
+}
+
+Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out) {
+  std::shared_ptr<DataType> type;
+  int64_t size;
+  PY_RETURN_NOT_OK(InferArrowType(obj, &size, &type));
+
+  // Handle NA / NullType case
+  if (type->type == LogicalType::NA) {
+    out->reset(new arrow::Array(type, size, size));
+    return Status::OK();
+  }
+
+  std::shared_ptr<SeqConverter> converter = GetConverter(type);
+  if (converter == nullptr) {
+    std::stringstream ss;
+    ss << "No type converter implemented for "
+       << type->ToString();
+    return Status::NotImplemented(ss.str());
+  }
+
+  // Give the sequence converter an array builder
+  std::shared_ptr<ArrayBuilder> builder;
+  RETURN_ARROW_NOT_OK(arrow::MakeBuilder(GetMemoryPool(), type, &builder));
+  converter->Init(builder);
+
+  PY_RETURN_NOT_OK(converter->AppendData(obj));
+
+  *out = builder->Finish();
+
+  return Status::OK();
+}
+
+} // namespace pyarrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/src/pyarrow/adapters/builtin.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/builtin.h b/python/src/pyarrow/adapters/builtin.h
new file mode 100644
index 0000000..24886f4
--- /dev/null
+++ b/python/src/pyarrow/adapters/builtin.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Functions for converting between CPython built-in data structures and Arrow
+// data structures
+
+#ifndef PYARROW_ADAPTERS_BUILTIN_H
+#define PYARROW_ADAPTERS_BUILTIN_H
+
+#include <Python.h>
+
+#include <memory>
+
+#include "pyarrow/common.h"
+
+namespace arrow { class Array; }
+
+namespace pyarrow {
+
+class Status;
+
+Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out);
+
+} // namespace pyarrow
+
+#endif // PYARROW_ADAPTERS_BUILTIN_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/src/pyarrow/adapters/pandas.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/pandas.h b/python/src/pyarrow/adapters/pandas.h
new file mode 100644
index 0000000..a4f4163
--- /dev/null
+++ b/python/src/pyarrow/adapters/pandas.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Functions for converting between pandas's NumPy-based data representation
+// and Arrow data structures
+
+#ifndef PYARROW_ADAPTERS_PANDAS_H
+#define PYARROW_ADAPTERS_PANDAS_H
+
+namespace pyarrow {
+
+} // namespace pyarrow
+
+#endif // PYARROW_ADAPTERS_PANDAS_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/src/pyarrow/api.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/api.h b/python/src/pyarrow/api.h
index c2285de..72be6af 100644
--- a/python/src/pyarrow/api.h
+++ b/python/src/pyarrow/api.h
@@ -18,4 +18,11 @@
 #ifndef PYARROW_API_H
 #define PYARROW_API_H
 
+#include "pyarrow/status.h"
+
+#include "pyarrow/helpers.h"
+
+#include "pyarrow/adapters/builtin.h"
+#include "pyarrow/adapters/pandas.h"
+
 #endif // PYARROW_API_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/src/pyarrow/common.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/common.cc b/python/src/pyarrow/common.cc
new file mode 100644
index 0000000..a2748f9
--- /dev/null
+++ b/python/src/pyarrow/common.cc
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "pyarrow/common.h"
+
+#include <cstdlib>
+#include <mutex>
+#include <sstream>
+
+#include <arrow/util/memory-pool.h>
+#include <arrow/util/status.h>
+
+#include "pyarrow/status.h"
+
+namespace pyarrow {
+
+class PyArrowMemoryPool : public arrow::MemoryPool {
+ public:
+  PyArrowMemoryPool() : bytes_allocated_(0) {}
+  virtual ~PyArrowMemoryPool() {}
+
+  arrow::Status Allocate(int64_t size, uint8_t** out) override {
+    std::lock_guard<std::mutex> guard(pool_lock_);
+    *out = static_cast<uint8_t*>(std::malloc(size));
+    if (*out == nullptr) {
+      std::stringstream ss;
+      ss << "malloc of size " << size << " failed";
+      return arrow::Status::OutOfMemory(ss.str());
+    }
+
+    bytes_allocated_ += size;
+
+    return arrow::Status::OK();
+  }
+
+  int64_t bytes_allocated() const override {
+    std::lock_guard<std::mutex> guard(pool_lock_);
+    return bytes_allocated_;
+  }
+
+  void Free(uint8_t* buffer, int64_t size) override {
+    std::lock_guard<std::mutex> guard(pool_lock_);
+    std::free(buffer);
+    bytes_allocated_ -= size;
+  }
+
+ private:
+  mutable std::mutex pool_lock_;
+  int64_t bytes_allocated_;
+};
+
+arrow::MemoryPool* GetMemoryPool() {
+  static PyArrowMemoryPool memory_pool;
+  return &memory_pool;
+}
+
+} // namespace pyarrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/src/pyarrow/common.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/common.h b/python/src/pyarrow/common.h
new file mode 100644
index 0000000..a43e4d2
--- /dev/null
+++ b/python/src/pyarrow/common.h
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PYARROW_COMMON_H
+#define PYARROW_COMMON_H
+
+#include <Python.h>
+
+namespace arrow { class MemoryPool; }
+
+namespace pyarrow {
+
+#define PYARROW_IS_PY2 PY_MAJOR_VERSION < 2
+
+#define RETURN_ARROW_NOT_OK(s) do {             \
+    arrow::Status _s = (s);                     \
+    if (!_s.ok()) {                             \
+      return Status::ArrowError(s.ToString());  \
+    }                                           \
+  } while (0);
+
+class OwnedRef {
+ public:
+  OwnedRef() : obj_(nullptr) {}
+
+  OwnedRef(PyObject* obj) :
+      obj_(obj) {}
+
+  ~OwnedRef() {
+    Py_XDECREF(obj_);
+  }
+
+  void reset(PyObject* obj) {
+    if (obj_ != nullptr) {
+      Py_XDECREF(obj_);
+    }
+    obj_ = obj;
+  }
+
+  PyObject* obj() const{
+    return obj_;
+  }
+
+ private:
+  PyObject* obj_;
+};
+
+struct PyObjectStringify {
+  OwnedRef tmp_obj;
+  const char* bytes;
+
+  PyObjectStringify(PyObject* obj) {
+    PyObject* bytes_obj;
+    if (PyUnicode_Check(obj)) {
+      bytes_obj = PyUnicode_AsUTF8String(obj);
+      tmp_obj.reset(bytes_obj);
+    } else {
+      bytes_obj = obj;
+    }
+    bytes = PyBytes_AsString(bytes_obj);
+  }
+};
+
+// TODO(wesm): We can just let errors pass through. To be explored later
+#define RETURN_IF_PYERROR()                         \
+  if (PyErr_Occurred()) {                           \
+    PyObject *exc_type, *exc_value, *traceback;     \
+    PyErr_Fetch(&exc_type, &exc_value, &traceback); \
+    PyObjectStringify stringified(exc_value);       \
+    std::string message(stringified.bytes);         \
+    Py_DECREF(exc_type);                            \
+    Py_DECREF(exc_value);                           \
+    Py_DECREF(traceback);                           \
+    return Status::UnknownError(message);           \
+  }
+
+arrow::MemoryPool* GetMemoryPool();
+
+} // namespace pyarrow
+
+#endif // PYARROW_COMMON_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/src/pyarrow/helpers.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/helpers.cc b/python/src/pyarrow/helpers.cc
new file mode 100644
index 0000000..d0969da
--- /dev/null
+++ b/python/src/pyarrow/helpers.cc
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "pyarrow/helpers.h"
+
+#include <arrow/api.h>
+
+using namespace arrow;
+
+namespace pyarrow {
+
+#define GET_PRIMITIVE_TYPE(NAME, Type)          \
+  case LogicalType::NAME:                       \
+    if (nullable) {                             \
+      return NAME;                              \
+    } else {                                    \
+      return std::make_shared<Type>(nullable);  \
+    }                                           \
+    break;
+
+std::shared_ptr<DataType> GetPrimitiveType(LogicalType::type type,
+    bool nullable) {
+  switch (type) {
+    case LogicalType::NA:
+      return NA;
+    GET_PRIMITIVE_TYPE(UINT8, UInt8Type);
+    GET_PRIMITIVE_TYPE(INT8, Int8Type);
+    GET_PRIMITIVE_TYPE(UINT16, UInt16Type);
+    GET_PRIMITIVE_TYPE(INT16, Int16Type);
+    GET_PRIMITIVE_TYPE(UINT32, UInt32Type);
+    GET_PRIMITIVE_TYPE(INT32, Int32Type);
+    GET_PRIMITIVE_TYPE(UINT64, UInt64Type);
+    GET_PRIMITIVE_TYPE(INT64, Int64Type);
+    GET_PRIMITIVE_TYPE(BOOL, BooleanType);
+    GET_PRIMITIVE_TYPE(FLOAT, FloatType);
+    GET_PRIMITIVE_TYPE(DOUBLE, DoubleType);
+    GET_PRIMITIVE_TYPE(STRING, StringType);
+    default:
+      return nullptr;
+  }
+}
+
+} // namespace pyarrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/src/pyarrow/helpers.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/helpers.h b/python/src/pyarrow/helpers.h
new file mode 100644
index 0000000..1a24f05
--- /dev/null
+++ b/python/src/pyarrow/helpers.h
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PYARROW_HELPERS_H
+#define PYARROW_HELPERS_H
+
+#include <arrow/api.h>
+#include <memory>
+
+namespace pyarrow {
+
+using arrow::DataType;
+using arrow::LogicalType;
+
+std::shared_ptr<DataType> GetPrimitiveType(LogicalType::type type,
+    bool nullable);
+
+} // namespace pyarrow
+
+#endif // PYARROW_HELPERS_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/src/pyarrow/init.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/init.cc b/python/src/pyarrow/init.cc
index c36f413..acd851e 100644
--- a/python/src/pyarrow/init.cc
+++ b/python/src/pyarrow/init.cc
@@ -17,13 +17,9 @@
 
 #include "pyarrow/init.h"
 
-namespace arrow {
-
-namespace py {
+namespace pyarrow {
 
 void pyarrow_init() {
 }
 
-} // namespace py
-
-} // namespace arrow
+} // namespace pyarrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/src/pyarrow/init.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/init.h b/python/src/pyarrow/init.h
index 1fc9f10..71e67a2 100644
--- a/python/src/pyarrow/init.h
+++ b/python/src/pyarrow/init.h
@@ -18,14 +18,10 @@
 #ifndef PYARROW_INIT_H
 #define PYARROW_INIT_H
 
-namespace arrow {
-
-namespace py {
+namespace pyarrow {
 
 void pyarrow_init();
 
-} // namespace py
-
-} // namespace arrow
+} // namespace pyarrow
 
 #endif // PYARROW_INIT_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/src/pyarrow/status.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/status.cc b/python/src/pyarrow/status.cc
new file mode 100644
index 0000000..1cd54f6
--- /dev/null
+++ b/python/src/pyarrow/status.cc
@@ -0,0 +1,92 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A Status encapsulates the result of an operation.  It may indicate success,
+// or it may indicate an error with an associated error message.
+//
+// Multiple threads can invoke const methods on a Status without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Status must use
+// external synchronization.
+
+#include "pyarrow/status.h"
+
+#include <assert.h>
+#include <cstdint>
+#include <cstring>
+
+namespace pyarrow {
+
+Status::Status(StatusCode code, const std::string& msg, int16_t posix_code) {
+  assert(code != StatusCode::OK);
+  const uint32_t size = msg.size();
+  char* result = new char[size + 7];
+  memcpy(result, &size, sizeof(size));
+  result[4] = static_cast<char>(code);
+  memcpy(result + 5, &posix_code, sizeof(posix_code));
+  memcpy(result + 7, msg.c_str(), msg.size());
+  state_ = result;
+}
+
+const char* Status::CopyState(const char* state) {
+  uint32_t size;
+  memcpy(&size, state, sizeof(size));
+  char* result = new char[size + 7];
+  memcpy(result, state, size + 7);
+  return result;
+}
+
+std::string Status::CodeAsString() const {
+  if (state_ == NULL) {
+    return "OK";
+  }
+
+  const char* type;
+  switch (code()) {
+    case StatusCode::OK:
+      type = "OK";
+      break;
+    case StatusCode::OutOfMemory:
+      type = "Out of memory";
+      break;
+    case StatusCode::KeyError:
+      type = "Key error";
+      break;
+    case StatusCode::TypeError:
+      type = "Value error";
+      break;
+    case StatusCode::ValueError:
+      type = "Value error";
+      break;
+    case StatusCode::IOError:
+      type = "IO error";
+      break;
+    case StatusCode::NotImplemented:
+      type = "Not implemented";
+      break;
+    case StatusCode::ArrowError:
+      type = "Arrow C++ error";
+      break;
+    case StatusCode::UnknownError:
+      type = "Unknown error";
+      break;
+  }
+  return std::string(type);
+}
+
+std::string Status::ToString() const {
+  std::string result(CodeAsString());
+  if (state_ == NULL) {
+    return result;
+  }
+
+  result.append(": ");
+
+  uint32_t length;
+  memcpy(&length, state_, sizeof(length));
+  result.append(reinterpret_cast<const char*>(state_ + 7), length);
+  return result;
+}
+
+} // namespace pyarrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/src/pyarrow/status.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/status.h b/python/src/pyarrow/status.h
new file mode 100644
index 0000000..cb8c8ad
--- /dev/null
+++ b/python/src/pyarrow/status.h
@@ -0,0 +1,144 @@
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+//
+// A Status encapsulates the result of an operation.  It may indicate success,
+// or it may indicate an error with an associated error message.
+//
+// Multiple threads can invoke const methods on a Status without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same Status must use
+// external synchronization.
+
+#ifndef PYARROW_STATUS_H_
+#define PYARROW_STATUS_H_
+
+#include <cstdint>
+#include <cstring>
+#include <string>
+
+namespace pyarrow {
+
+#define PY_RETURN_NOT_OK(s) do {                \
+    Status _s = (s);                            \
+    if (!_s.ok()) return _s;                    \
+  } while (0);
+
+enum class StatusCode: char {
+  OK = 0,
+  OutOfMemory = 1,
+  KeyError = 2,
+  TypeError = 3,
+  ValueError = 4,
+  IOError = 5,
+  NotImplemented = 6,
+
+  ArrowError = 7,
+
+  UnknownError = 10
+};
+
+class Status {
+ public:
+  // Create a success status.
+  Status() : state_(NULL) { }
+  ~Status() { delete[] state_; }
+
+  // Copy the specified status.
+  Status(const Status& s);
+  void operator=(const Status& s);
+
+  // Return a success status.
+  static Status OK() { return Status(); }
+
+  // Return error status of an appropriate type.
+  static Status OutOfMemory(const std::string& msg, int16_t posix_code = -1) {
+    return Status(StatusCode::OutOfMemory, msg, posix_code);
+  }
+
+  static Status KeyError(const std::string& msg) {
+    return Status(StatusCode::KeyError, msg, -1);
+  }
+
+  static Status TypeError(const std::string& msg) {
+    return Status(StatusCode::TypeError, msg, -1);
+  }
+
+  static Status IOError(const std::string& msg) {
+    return Status(StatusCode::IOError, msg, -1);
+  }
+
+  static Status ValueError(const std::string& msg) {
+    return Status(StatusCode::ValueError, msg, -1);
+  }
+
+  static Status NotImplemented(const std::string& msg) {
+    return Status(StatusCode::NotImplemented, msg, -1);
+  }
+
+  static Status UnknownError(const std::string& msg) {
+    return Status(StatusCode::UnknownError, msg, -1);
+  }
+
+  static Status ArrowError(const std::string& msg) {
+    return Status(StatusCode::ArrowError, msg, -1);
+  }
+
+  // Returns true iff the status indicates success.
+  bool ok() const { return (state_ == NULL); }
+
+  bool IsOutOfMemory() const { return code() == StatusCode::OutOfMemory; }
+  bool IsKeyError() const { return code() == StatusCode::KeyError; }
+  bool IsIOError() const { return code() == StatusCode::IOError; }
+  bool IsTypeError() const { return code() == StatusCode::TypeError; }
+  bool IsValueError() const { return code() == StatusCode::ValueError; }
+
+  bool IsUnknownError() const { return code() == StatusCode::UnknownError; }
+
+  bool IsArrowError() const { return code() == StatusCode::ArrowError; }
+
+  // Return a string representation of this status suitable for printing.
+  // Returns the string "OK" for success.
+  std::string ToString() const;
+
+  // Return a string representation of the status code, without the message
+  // text or posix code information.
+  std::string CodeAsString() const;
+
+  // Get the POSIX code associated with this Status, or -1 if there is none.
+  int16_t posix_code() const;
+
+ private:
+  // OK status has a NULL state_.  Otherwise, state_ is a new[] array
+  // of the following form:
+  //    state_[0..3] == length of message
+  //    state_[4]    == code
+  //    state_[5..6] == posix_code
+  //    state_[7..]  == message
+  const char* state_;
+
+  StatusCode code() const {
+    return ((state_ == NULL) ?
+        StatusCode::OK : static_cast<StatusCode>(state_[4]));
+  }
+
+  Status(StatusCode code, const std::string& msg, int16_t posix_code);
+  static const char* CopyState(const char* s);
+};
+
+inline Status::Status(const Status& s) {
+  state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_);
+}
+
+inline void Status::operator=(const Status& s) {
+  // The following condition catches both aliasing (when this == &s),
+  // and the common case where both s and *this are ok.
+  if (state_ != s.state_) {
+    delete[] state_;
+    state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_);
+  }
+}
+
+}  // namespace pyarrow
+
+#endif // PYARROW_STATUS_H_


[2/2] arrow git commit: ARROW-31: Python: prototype user object model, add PyList conversion path with type inference

Posted by we...@apache.org.
ARROW-31: Python: prototype user object model, add PyList conversion path with type inference

Depends on ARROW-7. Pretty mundane stuff but got to start somewhere. I'm going to do a little more in this patch (handle normal lists of strings and lists of other supported Python types) before merging.

Author: Wes McKinney <we...@apache.org>

Closes #19 from wesm/ARROW-31 and squashes the following commits:

2345541 [Wes McKinney] Test basic conversion of nested lists
1d4618b [Wes McKinney] Prototype string and double converters
b02b296 [Wes McKinney] Type inference for lists and lists-of-lists
8c3891c [Wes McKinney] Smoke test that array garbage collection deallocates memory
c28bf09 [Wes McKinney] Build array successfully, without validating contents
731544a [Wes McKinney] Move PrimitiveType::ToString template back to type.h
b5b5b82 [Wes McKinney] Failing test stubs, raise on null array
edb451c [Wes McKinney] Add a few data type smoke tests
47fd78e [Wes McKinney] Add unit test stub
07c1379 [Wes McKinney] Move some bits from arrow/type.h to type.cc
3a774fb [Wes McKinney] Add Status::ToString impls. Unit test stub
4e206fc [Wes McKinney] Add pandas converter placeholder
102ed36 [Wes McKinney] Cython array box scaffold builds
94f122f [Wes McKinney] Basic object model for sequence->arrow conversions
bdb02e7 [Wes McKinney] Use shared_ptr with dynamic make_builder too
d5655ba [Wes McKinney] Clean up array builder API to return shared_ptr<Array>
4132bda [Wes McKinney] Essential scaffolding -- error handling, memory pools, etc. -- to work toward converting Python lists to Arrow arrays
55e69a2 [Wes McKinney] Typed array stubs
ac8c796 [Wes McKinney] Cache primitive data type instances
8f7edaf [Wes McKinney] Consolidate Field and data type subclasses. Add more Python stubs
ea2f3ec [Wes McKinney] Bootstrap end-to-end exposure in Python, wrap DataType and Field types


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/9afb6677
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/9afb6677
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/9afb6677

Branch: refs/heads/master
Commit: 9afb667783b8cedbe6e9d6ee5eb02d35cf1d0f79
Parents: 571343b
Author: Wes McKinney <we...@apache.org>
Authored: Mon Mar 7 15:02:56 2016 -0800
Committer: Wes McKinney <we...@apache.org>
Committed: Mon Mar 7 15:02:56 2016 -0800

----------------------------------------------------------------------
 cpp/CMakeLists.txt                         |  83 ++---
 cpp/src/arrow/CMakeLists.txt               |   1 -
 cpp/src/arrow/api.h                        |  21 ++
 cpp/src/arrow/builder.h                    |  10 +-
 cpp/src/arrow/field.cc                     |  31 --
 cpp/src/arrow/field.h                      |  63 ----
 cpp/src/arrow/table/CMakeLists.txt         |  15 -
 cpp/src/arrow/table/column-test.cc         |   1 -
 cpp/src/arrow/table/column.cc              |   2 +-
 cpp/src/arrow/table/column.h               |   2 +-
 cpp/src/arrow/table/schema-test.cc         |   9 +-
 cpp/src/arrow/table/schema.cc              |   2 +-
 cpp/src/arrow/table/schema.h               |   1 -
 cpp/src/arrow/table/table-test.cc          |   1 -
 cpp/src/arrow/table/table.cc               |   2 +-
 cpp/src/arrow/table/test-common.h          |   1 -
 cpp/src/arrow/type.cc                      |  49 +++
 cpp/src/arrow/type.h                       | 143 ++++++--
 cpp/src/arrow/types/CMakeLists.txt         |  22 +-
 cpp/src/arrow/types/boolean.h              |   3 +-
 cpp/src/arrow/types/construct.cc           |  21 +-
 cpp/src/arrow/types/construct.h            |   6 +-
 cpp/src/arrow/types/json.cc                |   5 +-
 cpp/src/arrow/types/list-test.cc           |  24 +-
 cpp/src/arrow/types/list.cc                |  12 -
 cpp/src/arrow/types/list.h                 |  51 +--
 cpp/src/arrow/types/null.h                 |  34 --
 cpp/src/arrow/types/primitive-test.cc      |  64 ++--
 cpp/src/arrow/types/primitive.h            |  22 +-
 cpp/src/arrow/types/string-test.cc         |  11 +-
 cpp/src/arrow/types/string.h               |  41 +--
 cpp/src/arrow/types/struct-test.cc         |  19 +-
 cpp/src/arrow/types/struct.cc              |  18 -
 cpp/src/arrow/types/struct.h               |  21 +-
 cpp/src/arrow/util/CMakeLists.txt          |  20 +-
 cpp/src/arrow/util/buffer.cc               |   8 +
 cpp/src/arrow/util/buffer.h                |   2 +
 cpp/src/arrow/util/status.cc               |  40 +++
 python/CMakeLists.txt                      |  21 +-
 python/arrow/__init__.py                   |  34 ++
 python/arrow/array.pxd                     |  85 +++++
 python/arrow/array.pyx                     | 179 ++++++++++
 python/arrow/config.pyx                    |   2 +-
 python/arrow/error.pxd                     |  20 ++
 python/arrow/error.pyx                     |  30 ++
 python/arrow/includes/arrow.pxd            |  75 ++++-
 python/arrow/includes/common.pxd           |   4 +-
 python/arrow/includes/pyarrow.pxd          |  24 +-
 python/arrow/scalar.pxd                    |  47 +++
 python/arrow/scalar.pyx                    |  28 ++
 python/arrow/schema.pxd                    |  39 +++
 python/arrow/schema.pyx                    | 150 +++++++++
 python/arrow/tests/test_array.py           |  26 ++
 python/arrow/tests/test_convert_builtin.py |  85 +++++
 python/arrow/tests/test_schema.py          |  51 +++
 python/setup.py                            |   7 +-
 python/src/pyarrow/adapters/builtin.cc     | 415 ++++++++++++++++++++++++
 python/src/pyarrow/adapters/builtin.h      |  40 +++
 python/src/pyarrow/adapters/pandas.h       |  28 ++
 python/src/pyarrow/api.h                   |   7 +
 python/src/pyarrow/common.cc               |  71 ++++
 python/src/pyarrow/common.h                |  95 ++++++
 python/src/pyarrow/helpers.cc              |  57 ++++
 python/src/pyarrow/helpers.h               |  34 ++
 python/src/pyarrow/init.cc                 |   8 +-
 python/src/pyarrow/init.h                  |   8 +-
 python/src/pyarrow/status.cc               |  92 ++++++
 python/src/pyarrow/status.h                | 144 ++++++++
 68 files changed, 2290 insertions(+), 497 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8042661..e8cb88c 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -37,18 +37,17 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
   set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
 endif()
 
-# Enable using a custom GCC toolchain to build Arrow
-if (NOT "$ENV{ARROW_GCC_ROOT}" STREQUAL "")
-  set(GCC_ROOT $ENV{ARROW_GCC_ROOT})
-  set(CMAKE_C_COMPILER ${GCC_ROOT}/bin/gcc)
-  set(CMAKE_CXX_COMPILER ${GCC_ROOT}/bin/g++)
-endif()
-
 if(APPLE)
   # In newer versions of CMake, this is the default setting
   set(CMAKE_MACOSX_RPATH 1)
 endif()
 
+find_program(CCACHE_FOUND ccache)
+if(CCACHE_FOUND)
+  set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
+  set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
+endif(CCACHE_FOUND)
+
 # ----------------------------------------------------------------------
 # cmake options
 
@@ -126,38 +125,16 @@ endif ()
 # Add common flags
 set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}")
 
-# Required to avoid static linking errors with dependencies
-add_definitions(-fPIC)
-
 # Determine compiler version
 include(CompilerInfo)
 
 if ("${COMPILER_FAMILY}" STREQUAL "clang")
-  # Clang helpfully provides a few extensions from C++11 such as the 'override'
-  # keyword on methods. This doesn't change behavior, and we selectively enable
-  # it in src/gutil/port.h only on clang. So, we can safely use it, and don't want
-  # to trigger warnings when we do so.
-  # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++11-extensions")
-
   # Using Clang with ccache causes a bunch of spurious warnings that are
   # purportedly fixed in the next version of ccache. See the following for details:
   #
   #   http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html
   #   http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments")
-
-  # Only hardcode -fcolor-diagnostics if stderr is opened on a terminal. Otherwise
-  # the color codes show up as noisy artifacts.
-  #
-  # This test is imperfect because 'cmake' and 'make' can be run independently
-  # (with different terminal options), and we're testing during the former.
-  execute_process(COMMAND test -t 2 RESULT_VARIABLE ARROW_IS_TTY)
-  if ((${ARROW_IS_TTY} EQUAL 0) AND (NOT ("$ENV{TERM}" STREQUAL "dumb")))
-    message("Running in a controlling terminal")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics")
-  else()
-    message("Running without a controlling terminal or in a dumb terminal")
-  endif()
 endif()
 
 # Sanity check linking option.
@@ -279,12 +256,6 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
 include_directories(src)
 
 ############################################################
-# Visibility
-############################################################
-# For generate_export_header() and add_compiler_export_flags().
-include(GenerateExportHeader)
-
-############################################################
 # Testing
 ############################################################
 
@@ -456,21 +427,32 @@ endif()
 # Subdirectories
 ############################################################
 
-add_subdirectory(src/arrow)
-add_subdirectory(src/arrow/util)
-add_subdirectory(src/arrow/table)
-add_subdirectory(src/arrow/types)
-
-set(LINK_LIBS
-  arrow_util
-  arrow_table
-  arrow_types)
+set(LIBARROW_LINK_LIBS
+)
 
 set(ARROW_SRCS
   src/arrow/array.cc
   src/arrow/builder.cc
-  src/arrow/field.cc
   src/arrow/type.cc
+
+  src/arrow/table/column.cc
+  src/arrow/table/schema.cc
+  src/arrow/table/table.cc
+
+  src/arrow/types/construct.cc
+  src/arrow/types/floating.cc
+  src/arrow/types/integer.cc
+  src/arrow/types/json.cc
+  src/arrow/types/list.cc
+  src/arrow/types/primitive.cc
+  src/arrow/types/string.cc
+  src/arrow/types/struct.cc
+  src/arrow/types/union.cc
+
+  src/arrow/util/bit-util.cc
+  src/arrow/util/buffer.cc
+  src/arrow/util/memory-pool.cc
+  src/arrow/util/status.cc
 )
 
 set(LIBARROW_LINKAGE "SHARED")
@@ -479,8 +461,15 @@ add_library(arrow
   ${LIBARROW_LINKAGE}
   ${ARROW_SRCS}
 )
-target_link_libraries(arrow ${LINK_LIBS})
-set_target_properties(arrow PROPERTIES LINKER_LANGUAGE CXX)
+set_target_properties(arrow
+  PROPERTIES
+  LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
+target_link_libraries(arrow ${LIBARROW_LINK_LIBS})
+
+add_subdirectory(src/arrow)
+add_subdirectory(src/arrow/util)
+add_subdirectory(src/arrow/table)
+add_subdirectory(src/arrow/types)
 
 install(TARGETS arrow
   LIBRARY DESTINATION lib

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 102a8a1..77326ce 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -20,7 +20,6 @@ install(FILES
   api.h
   array.h
   builder.h
-  field.h
   type.h
   DESTINATION include/arrow)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index 899e8aa..c73d4b3 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -15,7 +15,28 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// Coarse public API while the library is in development
+
 #ifndef ARROW_API_H
 #define ARROW_API_H
 
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/type.h"
+
+#include "arrow/table/column.h"
+#include "arrow/table/schema.h"
+#include "arrow/table/table.h"
+
+#include "arrow/types/boolean.h"
+#include "arrow/types/construct.h"
+#include "arrow/types/floating.h"
+#include "arrow/types/integer.h"
+#include "arrow/types/list.h"
+#include "arrow/types/string.h"
+#include "arrow/types/struct.h"
+
+#include "arrow/util/memory-pool.h"
+#include "arrow/util/status.h"
+
 #endif // ARROW_API_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 491b913..8cc689c 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -32,7 +32,7 @@ class Array;
 class MemoryPool;
 class PoolBuffer;
 
-static constexpr int32_t MIN_BUILDER_CAPACITY = 1 << 8;
+static constexpr int32_t MIN_BUILDER_CAPACITY = 1 << 5;
 
 // Base class for all data array builders
 class ArrayBuilder {
@@ -78,12 +78,16 @@ class ArrayBuilder {
 
   // Creates new array object to hold the contents of the builder and transfers
   // ownership of the data
-  virtual Status ToArray(Array** out) = 0;
+  virtual std::shared_ptr<Array> Finish() = 0;
+
+  const std::shared_ptr<DataType>& type() const {
+    return type_;
+  }
 
  protected:
   MemoryPool* pool_;
 
-  TypePtr type_;
+  std::shared_ptr<DataType> type_;
 
   // When nulls are first appended to the builder, the null bitmap is allocated
   std::shared_ptr<PoolBuffer> nulls_;

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/field.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/field.cc b/cpp/src/arrow/field.cc
deleted file mode 100644
index 4568d90..0000000
--- a/cpp/src/arrow/field.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/field.h"
-
-#include <sstream>
-#include <string>
-
-namespace arrow {
-
-std::string Field::ToString() const {
-  std::stringstream ss;
-  ss << this->name << " " << this->type->ToString();
-  return ss.str();
-}
-
-} // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/field.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/field.h b/cpp/src/arrow/field.h
deleted file mode 100644
index 89a450c..0000000
--- a/cpp/src/arrow/field.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_FIELD_H
-#define ARROW_FIELD_H
-
-#include <string>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-// A field is a piece of metadata that includes (for now) a name and a data
-// type
-
-struct Field {
-  // Field name
-  std::string name;
-
-  // The field's data type
-  TypePtr type;
-
-  Field(const std::string& name, const TypePtr& type) :
-      name(name),
-      type(type) {}
-
-  bool operator==(const Field& other) const {
-    return this->Equals(other);
-  }
-
-  bool operator!=(const Field& other) const {
-    return !this->Equals(other);
-  }
-
-  bool Equals(const Field& other) const {
-    return (this == &other) || (this->name == other.name &&
-        this->type->Equals(other.type.get()));
-  }
-
-  bool nullable() const {
-    return this->type->nullable;
-  }
-
-  std::string ToString() const;
-};
-
-} // namespace arrow
-
-#endif  // ARROW_FIELD_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/CMakeLists.txt b/cpp/src/arrow/table/CMakeLists.txt
index 68bf314..26d843d 100644
--- a/cpp/src/arrow/table/CMakeLists.txt
+++ b/cpp/src/arrow/table/CMakeLists.txt
@@ -19,21 +19,6 @@
 # arrow_table
 #######################################
 
-set(TABLE_SRCS
-  column.cc
-  schema.cc
-  table.cc
-)
-
-set(TABLE_LIBS
-)
-
-add_library(arrow_table STATIC
-  ${TABLE_SRCS}
-)
-target_link_libraries(arrow_table ${TABLE_LIBS})
-SET_TARGET_PROPERTIES(arrow_table PROPERTIES LINKER_LANGUAGE CXX)
-
 # Headers: top level
 install(FILES
   column.h

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/column-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column-test.cc b/cpp/src/arrow/table/column-test.cc
index 4959b82..bf95932 100644
--- a/cpp/src/arrow/table/column-test.cc
+++ b/cpp/src/arrow/table/column-test.cc
@@ -21,7 +21,6 @@
 #include <string>
 #include <vector>
 
-#include "arrow/field.h"
 #include "arrow/table/column.h"
 #include "arrow/table/schema.h"
 #include "arrow/table/test-common.h"

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/column.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column.cc b/cpp/src/arrow/table/column.cc
index d68b491..573e650 100644
--- a/cpp/src/arrow/table/column.cc
+++ b/cpp/src/arrow/table/column.cc
@@ -20,7 +20,7 @@
 #include <memory>
 #include <sstream>
 
-#include "arrow/field.h"
+#include "arrow/type.h"
 #include "arrow/util/status.h"
 
 namespace arrow {

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/column.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/column.h b/cpp/src/arrow/table/column.h
index 64423bf..dfc7516 100644
--- a/cpp/src/arrow/table/column.h
+++ b/cpp/src/arrow/table/column.h
@@ -23,7 +23,7 @@
 #include <vector>
 
 #include "arrow/array.h"
-#include "arrow/field.h"
+#include "arrow/type.h"
 
 namespace arrow {
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/schema-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema-test.cc b/cpp/src/arrow/table/schema-test.cc
index 0cf1b3c..d6725cc 100644
--- a/cpp/src/arrow/table/schema-test.cc
+++ b/cpp/src/arrow/table/schema-test.cc
@@ -20,7 +20,6 @@
 #include <string>
 #include <vector>
 
-#include "arrow/field.h"
 #include "arrow/table/schema.h"
 #include "arrow/type.h"
 #include "arrow/types/string.h"
@@ -97,10 +96,10 @@ TEST_F(TestSchema, ToString) {
   auto schema = std::make_shared<Schema>(fields);
 
   std::string result = schema->ToString();
-  std::string expected = R"(f0 ?int32
-f1 uint8
-f2 ?string
-f3 ?list<?int16>
+  std::string expected = R"(f0 int32
+f1 uint8 not null
+f2 string
+f3 list<int16>
 )";
 
   ASSERT_EQ(expected, result);

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/schema.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema.cc b/cpp/src/arrow/table/schema.cc
index fb3b4d6..d49d0a7 100644
--- a/cpp/src/arrow/table/schema.cc
+++ b/cpp/src/arrow/table/schema.cc
@@ -22,7 +22,7 @@
 #include <sstream>
 #include <vector>
 
-#include "arrow/field.h"
+#include "arrow/type.h"
 
 namespace arrow {
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/schema.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/schema.h b/cpp/src/arrow/table/schema.h
index d04e3f6..103f01b 100644
--- a/cpp/src/arrow/table/schema.h
+++ b/cpp/src/arrow/table/schema.h
@@ -22,7 +22,6 @@
 #include <string>
 #include <vector>
 
-#include "arrow/field.h"
 #include "arrow/type.h"
 
 namespace arrow {

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/table-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table-test.cc b/cpp/src/arrow/table/table-test.cc
index dd4f74c..c4fdb06 100644
--- a/cpp/src/arrow/table/table-test.cc
+++ b/cpp/src/arrow/table/table-test.cc
@@ -21,7 +21,6 @@
 #include <string>
 #include <vector>
 
-#include "arrow/field.h"
 #include "arrow/table/column.h"
 #include "arrow/table/schema.h"
 #include "arrow/table/table.h"

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/table.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/table.cc b/cpp/src/arrow/table/table.cc
index 4cefc92..0c788b8 100644
--- a/cpp/src/arrow/table/table.cc
+++ b/cpp/src/arrow/table/table.cc
@@ -20,9 +20,9 @@
 #include <memory>
 #include <sstream>
 
-#include "arrow/field.h"
 #include "arrow/table/column.h"
 #include "arrow/table/schema.h"
+#include "arrow/type.h"
 #include "arrow/util/status.h"
 
 namespace arrow {

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/table/test-common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/table/test-common.h b/cpp/src/arrow/table/test-common.h
index efe2f22..50a5f6a 100644
--- a/cpp/src/arrow/table/test-common.h
+++ b/cpp/src/arrow/table/test-common.h
@@ -21,7 +21,6 @@
 #include <string>
 #include <vector>
 
-#include "arrow/field.h"
 #include "arrow/table/column.h"
 #include "arrow/table/schema.h"
 #include "arrow/table/table.h"

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index ff145e2..2657708 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -17,8 +17,56 @@
 
 #include "arrow/type.h"
 
+#include <sstream>
+#include <string>
+
 namespace arrow {
 
+std::string Field::ToString() const {
+  std::stringstream ss;
+  ss << this->name << " " << this->type->ToString();
+  return ss.str();
+}
+
+DataType::~DataType() {}
+
+StringType::StringType(bool nullable)
+    : DataType(LogicalType::STRING, nullable) {}
+
+StringType::StringType(const StringType& other)
+    : StringType(other.nullable) {}
+
+std::string StringType::ToString() const {
+  std::string result(name());
+  if (!nullable) {
+    result.append(" not null");
+  }
+  return result;
+}
+
+std::string ListType::ToString() const {
+  std::stringstream s;
+  s << "list<" << value_type->ToString() << ">";
+  if (!this->nullable) {
+    s << " not null";
+  }
+  return s.str();
+}
+
+std::string StructType::ToString() const {
+  std::stringstream s;
+  s << "struct<";
+  for (size_t i = 0; i < fields_.size(); ++i) {
+    if (i > 0) s << ", ";
+    const std::shared_ptr<Field>& field = fields_[i];
+    s << field->name << ": " << field->type->ToString();
+  }
+  s << ">";
+  if (!nullable) s << " not null";
+  return s.str();
+}
+
+const std::shared_ptr<NullType> NA = std::make_shared<NullType>();
 const std::shared_ptr<BooleanType> BOOL = std::make_shared<BooleanType>();
 const std::shared_ptr<UInt8Type> UINT8 = std::make_shared<UInt8Type>();
 const std::shared_ptr<UInt16Type> UINT16 = std::make_shared<UInt16Type>();
@@ -30,5 +78,6 @@ const std::shared_ptr<Int32Type> INT32 = std::make_shared<Int32Type>();
 const std::shared_ptr<Int64Type> INT64 = std::make_shared<Int64Type>();
 const std::shared_ptr<FloatType> FLOAT = std::make_shared<FloatType>();
 const std::shared_ptr<DoubleType> DOUBLE = std::make_shared<DoubleType>();
+const std::shared_ptr<StringType> STRING = std::make_shared<StringType>();
 
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 4193a0e..e78e494 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -20,6 +20,7 @@
 
 #include <memory>
 #include <string>
+#include <vector>
 
 namespace arrow {
 
@@ -71,49 +72,46 @@ struct LogicalType {
     UINT64 = 7,
     INT64 = 8,
 
-    // A boolean value represented as 1 byte
-    BOOL = 9,
-
     // A boolean value represented as 1 bit
-    BIT = 10,
+    BOOL = 9,
 
     // 4-byte floating point value
-    FLOAT = 11,
+    FLOAT = 10,
 
     // 8-byte floating point value
-    DOUBLE = 12,
+    DOUBLE = 11,
 
     // CHAR(N): fixed-length UTF8 string with length N
-    CHAR = 13,
+    CHAR = 12,
 
     // UTF8 variable-length string as List<Char>
-    STRING = 14,
+    STRING = 13,
 
     // VARCHAR(N): Null-terminated string type embedded in a CHAR(N + 1)
-    VARCHAR = 15,
+    VARCHAR = 14,
 
     // Variable-length bytes (no guarantee of UTF8-ness)
-    BINARY = 16,
+    BINARY = 15,
 
     // By default, int32 days since the UNIX epoch
-    DATE = 17,
+    DATE = 16,
 
     // Exact timestamp encoded with int64 since UNIX epoch
     // Default unit millisecond
-    TIMESTAMP = 18,
+    TIMESTAMP = 17,
 
     // Timestamp as double seconds since the UNIX epoch
-    TIMESTAMP_DOUBLE = 19,
+    TIMESTAMP_DOUBLE = 18,
 
     // Exact time encoded with int64, default unit millisecond
-    TIME = 20,
+    TIME = 19,
 
     // Precision- and scale-based decimal type. Storage type depends on the
     // parameters.
-    DECIMAL = 21,
+    DECIMAL = 20,
 
     // Decimal value encoded as a text string
-    DECIMAL_TEXT = 22,
+    DECIMAL_TEXT = 21,
 
     // A list of some logical data type
     LIST = 30,
@@ -141,7 +139,9 @@ struct DataType {
       type(type),
       nullable(nullable) {}
 
-  virtual bool Equals(const DataType* other) {
+  virtual ~DataType();
+
+  bool Equals(const DataType* other) {
     // Call with a pointer so more friendly to subclasses
     return this == other || (this->type == other->type &&
         this->nullable == other->nullable);
@@ -154,10 +154,45 @@ struct DataType {
   virtual std::string ToString() const = 0;
 };
 
-
 typedef std::shared_ptr<LayoutType> LayoutPtr;
 typedef std::shared_ptr<DataType> TypePtr;
 
+// A field is a piece of metadata that includes (for now) a name and a data
+// type
+struct Field {
+  // Field name
+  std::string name;
+
+  // The field's data type
+  TypePtr type;
+
+  Field(const std::string& name, const TypePtr& type) :
+      name(name),
+      type(type) {}
+
+  bool operator==(const Field& other) const {
+    return this->Equals(other);
+  }
+
+  bool operator!=(const Field& other) const {
+    return !this->Equals(other);
+  }
+
+  bool Equals(const Field& other) const {
+    return (this == &other) || (this->name == other.name &&
+        this->type->Equals(other.type.get()));
+  }
+
+  bool Equals(const std::shared_ptr<Field>& other) const {
+    return Equals(*other.get());
+  }
+
+  bool nullable() const {
+    return this->type->nullable;
+  }
+
+  std::string ToString() const;
+};
 
 struct BytesType : public LayoutType {
   int size;
@@ -183,16 +218,18 @@ struct PrimitiveType : public DataType {
   explicit PrimitiveType(bool nullable = true)
       : DataType(Derived::type_enum, nullable) {}
 
-  virtual std::string ToString() const {
-    std::string result;
-    if (nullable) {
-      result.append("?");
-    }
-    result.append(static_cast<const Derived*>(this)->name());
-    return result;
-  }
+  std::string ToString() const override;
 };
 
+template <typename Derived>
+inline std::string PrimitiveType<Derived>::ToString() const {
+  std::string result(static_cast<const Derived*>(this)->name());
+  if (!nullable) {
+    result.append(" not null");
+  }
+  return result;
+}
+
 #define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME)          \
   typedef C_TYPE c_type;                                            \
   static constexpr LogicalType::type type_enum = LogicalType::ENUM; \
@@ -205,6 +242,10 @@ struct PrimitiveType : public DataType {
     return NAME;                                                    \
   }
 
+struct NullType : public PrimitiveType<NullType> {
+  PRIMITIVE_DECL(NullType, void, NA, 0, "null");
+};
+
 struct BooleanType : public PrimitiveType<BooleanType> {
   PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool");
 };
@@ -249,6 +290,55 @@ struct DoubleType : public PrimitiveType<DoubleType> {
   PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double");
 };
 
+struct ListType : public DataType {
+  // List can contain any other logical value type
+  TypePtr value_type;
+
+  explicit ListType(const TypePtr& value_type, bool nullable = true)
+      : DataType(LogicalType::LIST, nullable),
+        value_type(value_type) {}
+
+  static char const *name() {
+    return "list";
+  }
+
+  std::string ToString() const override;
+};
+
+// String is a logical type consisting of a physical list of 1-byte values
+struct StringType : public DataType {
+  explicit StringType(bool nullable = true);
+
+  StringType(const StringType& other);
+
+  static char const *name() {
+    return "string";
+  }
+
+  std::string ToString() const override;
+};
+
+struct StructType : public DataType {
+  std::vector<std::shared_ptr<Field> > fields_;
+
+  explicit StructType(const std::vector<std::shared_ptr<Field> >& fields,
+      bool nullable = true)
+      : DataType(LogicalType::STRUCT, nullable) {
+    fields_ = fields;
+  }
+
+  const std::shared_ptr<Field>& field(int i) const {
+    return fields_[i];
+  }
+
+  int num_children() const {
+    return fields_.size();
+  }
+
+  std::string ToString() const override;
+};
+
+extern const std::shared_ptr<NullType> NA;
 extern const std::shared_ptr<BooleanType> BOOL;
 extern const std::shared_ptr<UInt8Type> UINT8;
 extern const std::shared_ptr<UInt16Type> UINT16;
@@ -260,6 +350,7 @@ extern const std::shared_ptr<Int32Type> INT32;
 extern const std::shared_ptr<Int64Type> INT64;
 extern const std::shared_ptr<FloatType> FLOAT;
 extern const std::shared_ptr<DoubleType> DOUBLE;
+extern const std::shared_ptr<StringType> STRING;
 
 } // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt
index e090aea..57cabde 100644
--- a/cpp/src/arrow/types/CMakeLists.txt
+++ b/cpp/src/arrow/types/CMakeLists.txt
@@ -19,31 +19,11 @@
 # arrow_types
 #######################################
 
-set(TYPES_SRCS
-  construct.cc
-  floating.cc
-  integer.cc
-  json.cc
-  list.cc
-  primitive.cc
-  string.cc
-  struct.cc
-  union.cc
-)
-
-set(TYPES_LIBS
-)
-
-add_library(arrow_types STATIC
-  ${TYPES_SRCS}
-)
-target_link_libraries(arrow_types ${TYPES_LIBS})
-SET_TARGET_PROPERTIES(arrow_types PROPERTIES LINKER_LANGUAGE CXX)
-
 # Headers: top level
 install(FILES
   boolean.h
   collection.h
+  construct.h
   datetime.h
   decimal.h
   floating.h

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/boolean.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/boolean.h b/cpp/src/arrow/types/boolean.h
index 8fc9cfd..a5023d7 100644
--- a/cpp/src/arrow/types/boolean.h
+++ b/cpp/src/arrow/types/boolean.h
@@ -24,7 +24,8 @@ namespace arrow {
 
 typedef PrimitiveArrayImpl<BooleanType> BooleanArray;
 
-// typedef PrimitiveBuilder<BooleanType, BooleanArray> BooleanBuilder;
+class BooleanBuilder : public ArrayBuilder {
+};
 
 } // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/construct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc
index 05d6b27..43f01a3 100644
--- a/cpp/src/arrow/types/construct.cc
+++ b/cpp/src/arrow/types/construct.cc
@@ -32,13 +32,13 @@ class ArrayBuilder;
 // Initially looked at doing this with vtables, but shared pointers makes it
 // difficult
 
-#define BUILDER_CASE(ENUM, BuilderType)                                 \
-    case LogicalType::ENUM:                                             \
-      *out = static_cast<ArrayBuilder*>(new BuilderType(pool, type));   \
+#define BUILDER_CASE(ENUM, BuilderType)         \
+    case LogicalType::ENUM:                     \
+      out->reset(new BuilderType(pool, type));  \
       return Status::OK();
 
-Status make_builder(MemoryPool* pool, const TypePtr& type,
-    ArrayBuilder** out) {
+Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+    std::shared_ptr<ArrayBuilder>* out) {
   switch (type->type) {
     BUILDER_CASE(UINT8, UInt8Builder);
     BUILDER_CASE(INT8, Int8Builder);
@@ -58,13 +58,12 @@ Status make_builder(MemoryPool* pool, const TypePtr& type,
 
     case LogicalType::LIST:
       {
-        ListType* list_type = static_cast<ListType*>(type.get());
-        ArrayBuilder* value_builder;
-        RETURN_NOT_OK(make_builder(pool, list_type->value_type, &value_builder));
+        std::shared_ptr<ArrayBuilder> value_builder;
 
-        // The ListBuilder takes ownership of the value_builder
-        ListBuilder* builder = new ListBuilder(pool, type, value_builder);
-        *out = static_cast<ArrayBuilder*>(builder);
+        const std::shared_ptr<DataType>& value_type = static_cast<ListType*>(
+            type.get())->value_type;
+        RETURN_NOT_OK(MakeBuilder(pool, value_type, &value_builder));
+        out->reset(new ListBuilder(pool, type, value_builder));
         return Status::OK();
       }
     // BUILDER_CASE(CHAR, CharBuilder);

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/construct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h
index b5ba436..59ebe1a 100644
--- a/cpp/src/arrow/types/construct.h
+++ b/cpp/src/arrow/types/construct.h
@@ -18,6 +18,8 @@
 #ifndef ARROW_TYPES_CONSTRUCT_H
 #define ARROW_TYPES_CONSTRUCT_H
 
+#include <memory>
+
 #include "arrow/type.h"
 
 namespace arrow {
@@ -26,8 +28,8 @@ class ArrayBuilder;
 class MemoryPool;
 class Status;
 
-Status make_builder(MemoryPool* pool, const TypePtr& type,
-    ArrayBuilder** out);
+Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
+    std::shared_ptr<ArrayBuilder>* out);
 
 } // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/json.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/json.cc b/cpp/src/arrow/types/json.cc
index b29b957..168e370 100644
--- a/cpp/src/arrow/types/json.cc
+++ b/cpp/src/arrow/types/json.cc
@@ -19,10 +19,7 @@
 
 #include <vector>
 
-#include "arrow/types/boolean.h"
-#include "arrow/types/integer.h"
-#include "arrow/types/floating.h"
-#include "arrow/types/null.h"
+#include "arrow/type.h"
 #include "arrow/types/string.h"
 #include "arrow/types/union.h"
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/list-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc
index b4bbd28..02991de 100644
--- a/cpp/src/arrow/types/list-test.cc
+++ b/cpp/src/arrow/types/list-test.cc
@@ -32,6 +32,7 @@
 #include "arrow/types/test-common.h"
 #include "arrow/util/status.h"
 
+using std::shared_ptr;
 using std::string;
 using std::unique_ptr;
 using std::vector;
@@ -47,17 +48,18 @@ TEST(TypesTest, TestListType) {
   ASSERT_EQ(list_type.type, LogicalType::LIST);
 
   ASSERT_EQ(list_type.name(), string("list"));
-  ASSERT_EQ(list_type.ToString(), string("?list<?uint8>"));
+  ASSERT_EQ(list_type.ToString(), string("list<uint8>"));
 
   ASSERT_EQ(list_type.value_type->type, vt->type);
   ASSERT_EQ(list_type.value_type->type, vt->type);
 
   std::shared_ptr<DataType> st = std::make_shared<StringType>(false);
   std::shared_ptr<DataType> lt = std::make_shared<ListType>(st, false);
-  ASSERT_EQ(lt->ToString(), string("list<string>"));
+  ASSERT_EQ(lt->ToString(), string("list<string not null> not null"));
 
   ListType lt2(lt, false);
-  ASSERT_EQ(lt2.ToString(), string("list<list<string>>"));
+  ASSERT_EQ(lt2.ToString(),
+      string("list<list<string not null> not null> not null"));
 }
 
 // ----------------------------------------------------------------------
@@ -71,23 +73,21 @@ class TestListBuilder : public TestBuilder {
     value_type_ = TypePtr(new Int32Type());
     type_ = TypePtr(new ListType(value_type_));
 
-    ArrayBuilder* tmp;
-    ASSERT_OK(make_builder(pool_, type_, &tmp));
-    builder_.reset(static_cast<ListBuilder*>(tmp));
+    std::shared_ptr<ArrayBuilder> tmp;
+    ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+    builder_ = std::dynamic_pointer_cast<ListBuilder>(tmp);
   }
 
   void Done() {
-    Array* out;
-    ASSERT_OK(builder_->ToArray(&out));
-    result_.reset(static_cast<ListArray*>(out));
+    result_ = std::dynamic_pointer_cast<ListArray>(builder_->Finish());
   }
 
  protected:
   TypePtr value_type_;
   TypePtr type_;
 
-  unique_ptr<ListBuilder> builder_;
-  unique_ptr<ListArray> result_;
+  shared_ptr<ListBuilder> builder_;
+  shared_ptr<ListArray> result_;
 };
 
 
@@ -116,7 +116,7 @@ TEST_F(TestListBuilder, TestBasics) {
   vector<int> lengths = {3, 0, 4};
   vector<uint8_t> is_null = {0, 1, 0};
 
-  Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder());
+  Int32Builder* vb = static_cast<Int32Builder*>(builder_->value_builder().get());
 
   int pos = 0;
   for (size_t i = 0; i < lengths.size(); ++i) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/list.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc
index 577d71d..69a79a7 100644
--- a/cpp/src/arrow/types/list.cc
+++ b/cpp/src/arrow/types/list.cc
@@ -17,18 +17,6 @@
 
 #include "arrow/types/list.h"
 
-#include <sstream>
-#include <string>
-
 namespace arrow {
 
-std::string ListType::ToString() const {
-  std::stringstream s;
-  if (this->nullable) {
-    s << "?";
-  }
-  s << "list<" << value_type->ToString() << ">";
-  return s.str();
-}
-
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/list.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h
index f39fe5c..f40a824 100644
--- a/cpp/src/arrow/types/list.h
+++ b/cpp/src/arrow/types/list.h
@@ -36,21 +36,6 @@ namespace arrow {
 
 class MemoryPool;
 
-struct ListType : public DataType {
-  // List can contain any other logical value type
-  TypePtr value_type;
-
-  explicit ListType(const TypePtr& value_type, bool nullable = true)
-      : DataType(LogicalType::LIST, nullable),
-        value_type(value_type) {}
-
-  static char const *name() {
-    return "list";
-  }
-
-  virtual std::string ToString() const;
-};
-
 class ListArray : public Array {
  public:
   ListArray() : Array(), offset_buf_(nullptr), offsets_(nullptr) {}
@@ -106,10 +91,9 @@ class ListArray : public Array {
 class ListBuilder : public Int32Builder {
  public:
   ListBuilder(MemoryPool* pool, const TypePtr& type,
-      ArrayBuilder* value_builder)
-      : Int32Builder(pool, type) {
-    value_builder_.reset(value_builder);
-  }
+      std::shared_ptr<ArrayBuilder> value_builder)
+      : Int32Builder(pool, type),
+        value_builder_(value_builder) {}
 
   Status Init(int32_t elements) {
     // One more than requested.
@@ -147,30 +131,27 @@ class ListBuilder : public Int32Builder {
     return Status::OK();
   }
 
-  // Initialize an array type instance with the results of this builder
-  // Transfers ownership of all buffers
   template <typename Container>
-  Status Transfer(Container* out) {
-    Array* child_values;
-    RETURN_NOT_OK(value_builder_->ToArray(&child_values));
+  std::shared_ptr<Array> Transfer() {
+    auto result = std::make_shared<Container>();
+
+    std::shared_ptr<Array> items = value_builder_->Finish();
 
     // Add final offset if the length is non-zero
     if (length_) {
-      raw_buffer()[length_] = child_values->length();
+      raw_buffer()[length_] = items->length();
     }
 
-    out->Init(type_, length_, values_, ArrayPtr(child_values),
+    result->Init(type_, length_, values_, items,
         null_count_, nulls_);
     values_ = nulls_ = nullptr;
     capacity_ = length_ = null_count_ = 0;
-    return Status::OK();
+
+    return result;
   }
 
-  virtual Status ToArray(Array** out) {
-    ListArray* result = new ListArray();
-    RETURN_NOT_OK(Transfer(result));
-    *out = static_cast<Array*>(result);
-    return Status::OK();
+  std::shared_ptr<Array> Finish() override {
+    return Transfer<ListArray>();
   }
 
   // Start a new variable-length list slot
@@ -198,10 +179,12 @@ class ListBuilder : public Int32Builder {
     return Append(true);
   }
 
-  ArrayBuilder* value_builder() const { return value_builder_.get();}
+  const std::shared_ptr<ArrayBuilder>& value_builder() const {
+    return value_builder_;
+  }
 
  protected:
-  std::unique_ptr<ArrayBuilder> value_builder_;
+  std::shared_ptr<ArrayBuilder> value_builder_;
 };
 
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/null.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/null.h b/cpp/src/arrow/types/null.h
deleted file mode 100644
index c67f752..0000000
--- a/cpp/src/arrow/types/null.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_TYPES_NULL_H
-#define ARROW_TYPES_NULL_H
-
-#include <string>
-#include <vector>
-
-#include "arrow/type.h"
-
-namespace arrow {
-
-struct NullType : public PrimitiveType<NullType> {
-  PRIMITIVE_DECL(NullType, void, NA, 0, "null");
-};
-
-} // namespace arrow
-
-#endif // ARROW_TYPES_NULL_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/primitive-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc
index 02eaaa7..f35a258 100644
--- a/cpp/src/arrow/types/primitive-test.cc
+++ b/cpp/src/arrow/types/primitive-test.cc
@@ -37,6 +37,7 @@
 #include "arrow/util/status.h"
 
 using std::string;
+using std::shared_ptr;
 using std::unique_ptr;
 using std::vector;
 
@@ -98,12 +99,12 @@ class TestPrimitiveBuilder : public TestBuilder {
 
     type_ = Attrs::type();
 
-    ArrayBuilder* tmp;
-    ASSERT_OK(make_builder(pool_, type_, &tmp));
-    builder_.reset(static_cast<BuilderType*>(tmp));
+    std::shared_ptr<ArrayBuilder> tmp;
+    ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+    builder_ = std::dynamic_pointer_cast<BuilderType>(tmp);
 
-    ASSERT_OK(make_builder(pool_, type_, &tmp));
-    builder_nn_.reset(static_cast<BuilderType*>(tmp));
+    ASSERT_OK(MakeBuilder(pool_, type_, &tmp));
+    builder_nn_ = std::dynamic_pointer_cast<BuilderType>(tmp);
   }
 
   void RandomData(int N, double pct_null = 0.1) {
@@ -112,7 +113,6 @@ class TestPrimitiveBuilder : public TestBuilder {
   }
 
   void CheckNullable() {
-    ArrayType result;
     ArrayType expected;
     int size = builder_->length();
 
@@ -125,7 +125,9 @@ class TestPrimitiveBuilder : public TestBuilder {
     int32_t ex_null_count = null_count(nulls_);
 
     expected.Init(size, ex_data, ex_null_count, ex_nulls);
-    ASSERT_OK(builder_->Transfer(&result));
+
+    std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
+        builder_->Finish());
 
     // Builder is now reset
     ASSERT_EQ(0, builder_->length());
@@ -133,12 +135,11 @@ class TestPrimitiveBuilder : public TestBuilder {
     ASSERT_EQ(0, builder_->null_count());
     ASSERT_EQ(nullptr, builder_->buffer());
 
-    ASSERT_TRUE(result.Equals(expected));
-    ASSERT_EQ(ex_null_count, result.null_count());
+    ASSERT_TRUE(result->Equals(expected));
+    ASSERT_EQ(ex_null_count, result->null_count());
   }
 
   void CheckNonNullable() {
-    ArrayType result;
     ArrayType expected;
     int size = builder_nn_->length();
 
@@ -146,22 +147,24 @@ class TestPrimitiveBuilder : public TestBuilder {
         size * sizeof(T));
 
     expected.Init(size, ex_data);
-    ASSERT_OK(builder_nn_->Transfer(&result));
+
+    std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(
+        builder_nn_->Finish());
 
     // Builder is now reset
     ASSERT_EQ(0, builder_nn_->length());
     ASSERT_EQ(0, builder_nn_->capacity());
     ASSERT_EQ(nullptr, builder_nn_->buffer());
 
-    ASSERT_TRUE(result.Equals(expected));
-    ASSERT_EQ(0, result.null_count());
+    ASSERT_TRUE(result->Equals(expected));
+    ASSERT_EQ(0, result->null_count());
   }
 
  protected:
   TypePtr type_;
   TypePtr type_nn_;
-  unique_ptr<BuilderType> builder_;
-  unique_ptr<BuilderType> builder_nn_;
+  shared_ptr<BuilderType> builder_;
+  shared_ptr<BuilderType> builder_nn_;
 
   vector<T> draws_;
   vector<uint8_t> nulls_;
@@ -225,15 +228,36 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) {
     ASSERT_OK(this->builder_->AppendNull());
   }
 
-  Array* result;
-  ASSERT_OK(this->builder_->ToArray(&result));
-  unique_ptr<Array> holder(result);
+  auto result = this->builder_->Finish();
 
   for (int i = 0; i < size; ++i) {
     ASSERT_TRUE(result->IsNull(i));
   }
 }
 
+TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
+  DECL_T();
+
+  int size = 10000;
+
+  vector<T>& draws = this->draws_;
+  vector<uint8_t>& nulls = this->nulls_;
+
+  int64_t memory_before = this->pool_->bytes_allocated();
+
+  this->RandomData(size);
+
+  int i;
+  for (i = 0; i < size; ++i) {
+    ASSERT_OK(this->builder_->Append(draws[i], nulls[i] > 0));
+  }
+
+  do {
+    std::shared_ptr<Array> result = this->builder_->Finish();
+  } while (false);
+
+  ASSERT_EQ(memory_before, this->pool_->bytes_allocated());
+}
 
 TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
   DECL_T();
@@ -331,11 +355,11 @@ TYPED_TEST(TestPrimitiveBuilder, TestResize) {
 }
 
 TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
-  int n = 100;
-  ASSERT_OK(this->builder_->Reserve(n));
+  ASSERT_OK(this->builder_->Reserve(10));
   ASSERT_EQ(0, this->builder_->length());
   ASSERT_EQ(MIN_BUILDER_CAPACITY, this->builder_->capacity());
 
+  ASSERT_OK(this->builder_->Reserve(90));
   ASSERT_OK(this->builder_->Advance(100));
   ASSERT_OK(this->builder_->Reserve(MIN_BUILDER_CAPACITY));
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/primitive.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h
index 09d43e7..1073bb6 100644
--- a/cpp/src/arrow/types/primitive.h
+++ b/cpp/src/arrow/types/primitive.h
@@ -64,6 +64,8 @@ class PrimitiveArrayImpl : public PrimitiveArray {
 
   PrimitiveArrayImpl() : PrimitiveArray() {}
 
+  virtual ~PrimitiveArrayImpl() {}
+
   PrimitiveArrayImpl(int32_t length, const std::shared_ptr<Buffer>& data,
       int32_t null_count = 0,
       const std::shared_ptr<Buffer>& nulls = nullptr) {
@@ -197,24 +199,12 @@ class PrimitiveBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  // Initialize an array type instance with the results of this builder
-  // Transfers ownership of all buffers
-  Status Transfer(PrimitiveArray* out) {
-    out->Init(type_, length_, values_, null_count_, nulls_);
+  std::shared_ptr<Array> Finish() override {
+    std::shared_ptr<ArrayType> result = std::make_shared<ArrayType>();
+    result->PrimitiveArray::Init(type_, length_, values_, null_count_, nulls_);
     values_ = nulls_ = nullptr;
     capacity_ = length_ = null_count_ = 0;
-    return Status::OK();
-  }
-
-  Status Transfer(ArrayType* out) {
-    return Transfer(static_cast<PrimitiveArray*>(out));
-  }
-
-  virtual Status ToArray(Array** out) {
-    ArrayType* result = new ArrayType();
-    RETURN_NOT_OK(Transfer(result));
-    *out = static_cast<Array*>(result);
-    return Status::OK();
+    return result;
   }
 
   value_type* raw_buffer() {

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/string-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc
index 9af6672..8e82fd9 100644
--- a/cpp/src/arrow/types/string-test.cc
+++ b/cpp/src/arrow/types/string-test.cc
@@ -166,23 +166,18 @@ class TestStringBuilder : public TestBuilder {
   void SetUp() {
     TestBuilder::SetUp();
     type_ = TypePtr(new StringType());
-
-    ArrayBuilder* tmp;
-    ASSERT_OK(make_builder(pool_, type_, &tmp));
-    builder_.reset(static_cast<StringBuilder*>(tmp));
+    builder_.reset(new StringBuilder(pool_, type_));
   }
 
   void Done() {
-    Array* out;
-    ASSERT_OK(builder_->ToArray(&out));
-    result_.reset(static_cast<StringArray*>(out));
+    result_ = std::dynamic_pointer_cast<StringArray>(builder_->Finish());
   }
 
  protected:
   TypePtr type_;
 
   std::unique_ptr<StringBuilder> builder_;
-  std::unique_ptr<StringArray> result_;
+  std::shared_ptr<StringArray> result_;
 };
 
 TEST_F(TestStringBuilder, TestScalarAppend) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/string.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h
index 5795cfe..8ccc0a9 100644
--- a/cpp/src/arrow/types/string.h
+++ b/cpp/src/arrow/types/string.h
@@ -71,28 +71,6 @@ struct VarcharType : public DataType {
 static const LayoutPtr byte1(new BytesType(1));
 static const LayoutPtr physical_string = LayoutPtr(new ListLayoutType(byte1));
 
-// String is a logical type consisting of a physical list of 1-byte values
-struct StringType : public DataType {
-  explicit StringType(bool nullable = true)
-      : DataType(LogicalType::STRING, nullable) {}
-
-  StringType(const StringType& other)
-      : StringType() {}
-
-  static char const *name() {
-    return "string";
-  }
-
-  virtual std::string ToString() const {
-    std::string result;
-    if (nullable) {
-      result.append("?");
-    }
-    result.append(name());
-    return result;
-  }
-};
-
 // TODO: add a BinaryArray layer in between
 class StringArray : public ListArray {
  public:
@@ -153,26 +131,23 @@ class StringArray : public ListArray {
 class StringBuilder : public ListBuilder {
  public:
   explicit StringBuilder(MemoryPool* pool, const TypePtr& type) :
-      ListBuilder(pool, type,
-          static_cast<ArrayBuilder*>(new UInt8Builder(pool, value_type_))) {
+      ListBuilder(pool, type, std::make_shared<UInt8Builder>(pool, value_type_)) {
     byte_builder_ = static_cast<UInt8Builder*>(value_builder_.get());
   }
 
   Status Append(const std::string& value) {
-    RETURN_NOT_OK(ListBuilder::Append());
-    return byte_builder_->Append(reinterpret_cast<const uint8_t*>(value.c_str()),
-        value.size());
+    return Append(value.c_str(), value.size());
   }
 
-  Status Append(const uint8_t* value, int32_t length);
+  Status Append(const char* value, int32_t length) {
+    RETURN_NOT_OK(ListBuilder::Append());
+    return byte_builder_->Append(reinterpret_cast<const uint8_t*>(value), length);
+  }
   Status Append(const std::vector<std::string>& values,
                 uint8_t* null_bytes);
 
-  virtual Status ToArray(Array** out) {
-    StringArray* result = new StringArray();
-    RETURN_NOT_OK(ListBuilder::Transfer(result));
-    *out = static_cast<Array*>(result);
-    return Status::OK();
+  std::shared_ptr<Array> Finish() override {
+    return ListBuilder::Transfer<StringArray>();
   }
 
  protected:

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/struct-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc
index df61571..9a4777e 100644
--- a/cpp/src/arrow/types/struct-test.cc
+++ b/cpp/src/arrow/types/struct-test.cc
@@ -17,15 +17,16 @@
 
 #include <gtest/gtest.h>
 
+#include <memory>
 #include <string>
 #include <vector>
 
-#include "arrow/field.h"
 #include "arrow/type.h"
 #include "arrow/types/integer.h"
 #include "arrow/types/string.h"
 #include "arrow/types/struct.h"
 
+using std::shared_ptr;
 using std::string;
 using std::vector;
 
@@ -33,23 +34,23 @@ namespace arrow {
 
 TEST(TestStructType, Basics) {
   TypePtr f0_type = TypePtr(new Int32Type());
-  Field f0("f0", f0_type);
+  auto f0 = std::make_shared<Field>("f0", f0_type);
 
   TypePtr f1_type = TypePtr(new StringType());
-  Field f1("f1", f1_type);
+  auto f1 = std::make_shared<Field>("f1", f1_type);
 
   TypePtr f2_type = TypePtr(new UInt8Type());
-  Field f2("f2", f2_type);
+  auto f2 = std::make_shared<Field>("f2", f2_type);
 
-  vector<Field> fields = {f0, f1, f2};
+  vector<shared_ptr<Field> > fields = {f0, f1, f2};
 
   StructType struct_type(fields);
 
-  ASSERT_TRUE(struct_type.field(0).Equals(f0));
-  ASSERT_TRUE(struct_type.field(1).Equals(f1));
-  ASSERT_TRUE(struct_type.field(2).Equals(f2));
+  ASSERT_TRUE(struct_type.field(0)->Equals(f0));
+  ASSERT_TRUE(struct_type.field(1)->Equals(f1));
+  ASSERT_TRUE(struct_type.field(2)->Equals(f2));
 
-  ASSERT_EQ(struct_type.ToString(), "?struct<f0: ?int32, f1: ?string, f2: ?uint8>");
+  ASSERT_EQ(struct_type.ToString(), "struct<f0: int32, f1: string, f2: uint8>");
 
   // TODO: out of bounds for field(...)
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/struct.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc
index 6b233bc..02af600 100644
--- a/cpp/src/arrow/types/struct.cc
+++ b/cpp/src/arrow/types/struct.cc
@@ -17,24 +17,6 @@
 
 #include "arrow/types/struct.h"
 
-#include <cstdlib>
-#include <memory>
-#include <sstream>
-#include <string>
-
 namespace arrow {
 
-std::string StructType::ToString() const {
-  std::stringstream s;
-  if (nullable) s << "?";
-  s << "struct<";
-  for (size_t i = 0; i < fields_.size(); ++i) {
-    if (i > 0) s << ", ";
-    const Field& field  = fields_[i];
-    s << field.name << ": " << field.type->ToString();
-  }
-  s << ">";
-  return s.str();
-}
-
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/types/struct.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/types/struct.h b/cpp/src/arrow/types/struct.h
index e575c31..5842534 100644
--- a/cpp/src/arrow/types/struct.h
+++ b/cpp/src/arrow/types/struct.h
@@ -18,33 +18,14 @@
 #ifndef ARROW_TYPES_STRUCT_H
 #define ARROW_TYPES_STRUCT_H
 
+#include <memory>
 #include <string>
 #include <vector>
 
-#include "arrow/field.h"
 #include "arrow/type.h"
 
 namespace arrow {
 
-struct StructType : public DataType {
-  std::vector<Field> fields_;
-
-  explicit StructType(const std::vector<Field>& fields, bool nullable = true)
-      : DataType(LogicalType::STRUCT, nullable) {
-    fields_ = fields;
-  }
-
-  const Field& field(int i) const {
-    return fields_[i];
-  }
-
-  int num_children() const {
-    return fields_.size();
-  }
-
-  virtual std::string ToString() const;
-};
-
 } // namespace arrow
 
 #endif // ARROW_TYPES_STRUCT_H

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index c53f307..4272ce4 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -19,22 +19,6 @@
 # arrow_util
 #######################################
 
-set(UTIL_SRCS
-  bit-util.cc
-  buffer.cc
-  memory-pool.cc
-  status.cc
-)
-
-set(UTIL_LIBS
-)
-
-add_library(arrow_util STATIC
-  ${UTIL_SRCS}
-)
-target_link_libraries(arrow_util ${UTIL_LIBS})
-SET_TARGET_PROPERTIES(arrow_util PROPERTIES LINKER_LANGUAGE CXX)
-
 # Headers: top level
 install(FILES
   bit-util.h
@@ -50,7 +34,7 @@ install(FILES
 
 add_library(arrow_test_util)
 target_link_libraries(arrow_test_util
-  arrow_util)
+)
 
 SET_TARGET_PROPERTIES(arrow_test_util PROPERTIES LINKER_LANGUAGE CXX)
 
@@ -64,7 +48,6 @@ add_library(arrow_test_main
 if (APPLE)
   target_link_libraries(arrow_test_main
     gtest
-	arrow_util
 	arrow_test_util
     dl)
   set_target_properties(arrow_test_main
@@ -72,7 +55,6 @@ if (APPLE)
 else()
   target_link_libraries(arrow_test_main
     gtest
-	arrow_util
 	arrow_test_util
     pthread
     dl

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/util/buffer.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc
index 3f3807d..50f4716 100644
--- a/cpp/src/arrow/util/buffer.cc
+++ b/cpp/src/arrow/util/buffer.cc
@@ -31,6 +31,8 @@ Buffer::Buffer(const std::shared_ptr<Buffer>& parent, int64_t offset,
   parent_ = parent;
 }
 
+Buffer::~Buffer() {}
+
 std::shared_ptr<Buffer> MutableBuffer::GetImmutableView() {
   return std::make_shared<Buffer>(this->get_shared_ptr(), 0, size());
 }
@@ -43,6 +45,12 @@ PoolBuffer::PoolBuffer(MemoryPool* pool) :
   pool_ = pool;
 }
 
+PoolBuffer::~PoolBuffer() {
+  if (mutable_data_ != nullptr) {
+    pool_->Free(mutable_data_, capacity_);
+  }
+}
+
 Status PoolBuffer::Reserve(int64_t new_capacity) {
   if (!mutable_data_ || new_capacity > capacity_) {
     uint8_t* new_data;

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/util/buffer.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/buffer.h b/cpp/src/arrow/util/buffer.h
index 8704723..0c3e210 100644
--- a/cpp/src/arrow/util/buffer.h
+++ b/cpp/src/arrow/util/buffer.h
@@ -39,6 +39,7 @@ class Buffer : public std::enable_shared_from_this<Buffer> {
   Buffer(const uint8_t* data, int64_t size) :
       data_(data),
       size_(size) {}
+  virtual ~Buffer();
 
   // An offset into data that is owned by another buffer, but we want to be
   // able to retain a valid pointer to it even after other shared_ptr's to the
@@ -136,6 +137,7 @@ class ResizableBuffer : public MutableBuffer {
 class PoolBuffer : public ResizableBuffer {
  public:
   explicit PoolBuffer(MemoryPool* pool = nullptr);
+  virtual ~PoolBuffer();
 
   virtual Status Resize(int64_t new_size);
   virtual Status Reserve(int64_t new_capacity);

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/cpp/src/arrow/util/status.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/status.cc b/cpp/src/arrow/util/status.cc
index c64b8a3..c6e113e 100644
--- a/cpp/src/arrow/util/status.cc
+++ b/cpp/src/arrow/util/status.cc
@@ -35,4 +35,44 @@ const char* Status::CopyState(const char* state) {
   return result;
 }
 
+std::string Status::CodeAsString() const {
+  if (state_ == NULL) {
+    return "OK";
+  }
+
+  const char* type;
+  switch (code()) {
+    case StatusCode::OK:
+      type = "OK";
+      break;
+    case StatusCode::OutOfMemory:
+      type = "Out of memory";
+      break;
+    case StatusCode::KeyError:
+      type = "Key error";
+      break;
+    case StatusCode::Invalid:
+      type = "Invalid";
+      break;
+    case StatusCode::NotImplemented:
+      type = "NotImplemented";
+      break;
+  }
+  return std::string(type);
+}
+
+std::string Status::ToString() const {
+  std::string result(CodeAsString());
+  if (state_ == NULL) {
+    return result;
+  }
+
+  result.append(": ");
+
+  uint32_t length;
+  memcpy(&length, state_, sizeof(length));
+  result.append(reinterpret_cast<const char*>(state_ + 7), length);
+  return result;
+}
+
 } // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index df55bfa..8fdd829 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -45,6 +45,12 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
   set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
 endif()
 
+find_program(CCACHE_FOUND ccache)
+if(CCACHE_FOUND)
+  set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
+  set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
+endif(CCACHE_FOUND)
+
 ############################################################
 # Compiler flags
 ############################################################
@@ -389,7 +395,12 @@ add_subdirectory(src/pyarrow)
 add_subdirectory(src/pyarrow/util)
 
 set(PYARROW_SRCS
+  src/pyarrow/common.cc
+  src/pyarrow/helpers.cc
   src/pyarrow/init.cc
+  src/pyarrow/status.cc
+
+  src/pyarrow/adapters/builtin.cc
 )
 
 set(LINK_LIBS
@@ -410,18 +421,16 @@ endif()
 # Setup and build Cython modules
 ############################################################
 
-foreach(pyx_api_file
-    arrow/config.pyx
-    arrow/parquet.pyx)
-    set_source_files_properties(${pyx_api_file} PROPERTIES CYTHON_API 1)
-endforeach(pyx_api_file)
-
 set(USE_RELATIVE_RPATH ON)
 set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
 
 set(CYTHON_EXTENSIONS
+  array
   config
+  error
   parquet
+  scalar
+  schema
 )
 
 foreach(module ${CYTHON_EXTENSIONS})

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/arrow/__init__.py b/python/arrow/__init__.py
index e69de29..3c049b8 100644
--- a/python/arrow/__init__.py
+++ b/python/arrow/__init__.py
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# flake8: noqa
+
+from arrow.array import (Array, from_pylist, total_allocated_bytes,
+                         BooleanArray, NumericArray,
+                         Int8Array, UInt8Array,
+                         ListArray, StringArray)
+
+from arrow.error import ArrowException
+
+from arrow.scalar import ArrayValue, NA, Scalar
+
+from arrow.schema import (null, bool_,
+                          int8, int16, int32, int64,
+                          uint8, uint16, uint32, uint64,
+                          float_, double, string,
+                          list_, struct, field,
+                          DataType, Field, Schema)

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/array.pxd b/python/arrow/array.pxd
new file mode 100644
index 0000000..e32d277
--- /dev/null
+++ b/python/arrow/array.pxd
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.includes.common cimport shared_ptr
+from arrow.includes.arrow cimport CArray, LogicalType
+
+from arrow.scalar import NA
+
+from arrow.schema cimport DataType
+
+cdef extern from "Python.h":
+    int PySlice_Check(object)
+
+cdef class Array:
+    cdef:
+        shared_ptr[CArray] sp_array
+        CArray* ap
+
+    cdef readonly:
+        DataType type
+
+    cdef init(self, const shared_ptr[CArray]& sp_array)
+    cdef _getitem(self, int i)
+
+
+cdef class BooleanArray(Array):
+    pass
+
+
+cdef class NumericArray(Array):
+    pass
+
+
+cdef class Int8Array(NumericArray):
+    pass
+
+
+cdef class UInt8Array(NumericArray):
+    pass
+
+
+cdef class Int16Array(NumericArray):
+    pass
+
+
+cdef class UInt16Array(NumericArray):
+    pass
+
+
+cdef class Int32Array(NumericArray):
+    pass
+
+
+cdef class UInt32Array(NumericArray):
+    pass
+
+
+cdef class Int64Array(NumericArray):
+    pass
+
+
+cdef class UInt64Array(NumericArray):
+    pass
+
+
+cdef class ListArray(Array):
+    pass
+
+
+cdef class StringArray(Array):
+    pass

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/array.pyx b/python/arrow/array.pyx
new file mode 100644
index 0000000..3a3210d
--- /dev/null
+++ b/python/arrow/array.pyx
@@ -0,0 +1,179 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from arrow.includes.arrow cimport *
+cimport arrow.includes.pyarrow as pyarrow
+
+from arrow.compat import frombytes, tobytes
+from arrow.error cimport check_status
+
+from arrow.scalar import NA
+
+def total_allocated_bytes():
+    cdef MemoryPool* pool = pyarrow.GetMemoryPool()
+    return pool.bytes_allocated()
+
+
+cdef class Array:
+
+    cdef init(self, const shared_ptr[CArray]& sp_array):
+        self.sp_array = sp_array
+        self.ap = sp_array.get()
+        self.type = DataType()
+        self.type.init(self.sp_array.get().type())
+
+    property null_count:
+
+        def __get__(self):
+            return self.sp_array.get().null_count()
+
+    def __len__(self):
+        return self.sp_array.get().length()
+
+    def isnull(self):
+        raise NotImplemented
+
+    def __getitem__(self, key):
+        cdef:
+            Py_ssize_t n = len(self)
+
+        if PySlice_Check(key):
+            start = key.start or 0
+            while start < 0:
+                start += n
+
+            stop = key.stop if key.stop is not None else n
+            while stop < 0:
+                stop += n
+
+            step = key.step or 1
+            if step != 1:
+                raise NotImplementedError
+            else:
+                return self.slice(start, stop)
+
+        while key < 0:
+            key += len(self)
+
+        if self.ap.IsNull(key):
+            return NA
+        else:
+            return self._getitem(key)
+
+    cdef _getitem(self, int i):
+        raise NotImplementedError
+
+    def slice(self, start, end):
+        pass
+
+
+cdef class NullArray(Array):
+    pass
+
+
+cdef class BooleanArray(Array):
+    pass
+
+
+cdef class NumericArray(Array):
+    pass
+
+
+cdef class Int8Array(NumericArray):
+    pass
+
+
+cdef class UInt8Array(NumericArray):
+    pass
+
+
+cdef class Int16Array(NumericArray):
+    pass
+
+
+cdef class UInt16Array(NumericArray):
+    pass
+
+
+cdef class Int32Array(NumericArray):
+    pass
+
+
+cdef class UInt32Array(NumericArray):
+    pass
+
+
+cdef class Int64Array(NumericArray):
+    pass
+
+
+cdef class UInt64Array(NumericArray):
+    pass
+
+
+cdef class FloatArray(NumericArray):
+    pass
+
+
+cdef class DoubleArray(NumericArray):
+    pass
+
+
+cdef class ListArray(Array):
+    pass
+
+
+cdef class StringArray(Array):
+    pass
+
+
+cdef dict _array_classes = {
+    LogicalType_NA: NullArray,
+    LogicalType_BOOL: BooleanArray,
+    LogicalType_INT64: Int64Array,
+    LogicalType_DOUBLE: DoubleArray,
+    LogicalType_LIST: ListArray,
+    LogicalType_STRING: StringArray,
+}
+
+cdef object box_arrow_array(const shared_ptr[CArray]& sp_array):
+    if sp_array.get() == NULL:
+        raise ValueError('Array was NULL')
+
+    cdef CDataType* data_type = sp_array.get().type().get()
+
+    if data_type == NULL:
+        raise ValueError('Array data type was NULL')
+
+    cdef Array arr = _array_classes[data_type.type]()
+    arr.init(sp_array)
+    return arr
+
+
+def from_pylist(object list_obj, type=None):
+    """
+    Convert Python list to Arrow array
+    """
+    cdef:
+        shared_ptr[CArray] sp_array
+
+    check_status(pyarrow.ConvertPySequence(list_obj, &sp_array))
+    return box_arrow_array(sp_array)

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/config.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/config.pyx b/python/arrow/config.pyx
index 8f10beb..521bc06 100644
--- a/python/arrow/config.pyx
+++ b/python/arrow/config.pyx
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # cython: embedsignature = True
 
-cdef extern from 'pyarrow/init.h' namespace 'arrow::py':
+cdef extern from 'pyarrow/init.h' namespace 'pyarrow':
     void pyarrow_init()
 
 pyarrow_init()

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/error.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/error.pxd b/python/arrow/error.pxd
new file mode 100644
index 0000000..c18cb3e
--- /dev/null
+++ b/python/arrow/error.pxd
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.includes.pyarrow cimport *
+
+cdef check_status(const Status& status)

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/error.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/error.pyx b/python/arrow/error.pyx
new file mode 100644
index 0000000..f1d5163
--- /dev/null
+++ b/python/arrow/error.pyx
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.includes.common cimport c_string
+
+from arrow.compat import frombytes
+
+class ArrowException(Exception):
+    pass
+
+cdef check_status(const Status& status):
+    if status.ok():
+        return
+
+    cdef c_string c_message = status.ToString()
+    raise ArrowException(frombytes(c_message))

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/includes/arrow.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/arrow.pxd b/python/arrow/includes/arrow.pxd
index 3635ceb..fde5de9 100644
--- a/python/arrow/includes/arrow.pxd
+++ b/python/arrow/includes/arrow.pxd
@@ -20,4 +20,77 @@
 from arrow.includes.common cimport *
 
 cdef extern from "arrow/api.h" namespace "arrow" nogil:
-    pass
+
+    enum LogicalType" arrow::LogicalType::type":
+        LogicalType_NA" arrow::LogicalType::NA"
+
+        LogicalType_BOOL" arrow::LogicalType::BOOL"
+
+        LogicalType_UINT8" arrow::LogicalType::UINT8"
+        LogicalType_INT8" arrow::LogicalType::INT8"
+        LogicalType_UINT16" arrow::LogicalType::UINT16"
+        LogicalType_INT16" arrow::LogicalType::INT16"
+        LogicalType_UINT32" arrow::LogicalType::UINT32"
+        LogicalType_INT32" arrow::LogicalType::INT32"
+        LogicalType_UINT64" arrow::LogicalType::UINT64"
+        LogicalType_INT64" arrow::LogicalType::INT64"
+
+        LogicalType_FLOAT" arrow::LogicalType::FLOAT"
+        LogicalType_DOUBLE" arrow::LogicalType::DOUBLE"
+
+        LogicalType_STRING" arrow::LogicalType::STRING"
+
+        LogicalType_LIST" arrow::LogicalType::LIST"
+        LogicalType_STRUCT" arrow::LogicalType::STRUCT"
+
+    cdef cppclass CDataType" arrow::DataType":
+        LogicalType type
+        c_bool nullable
+
+        c_bool Equals(const CDataType* other)
+
+        c_string ToString()
+
+    cdef cppclass MemoryPool" arrow::MemoryPool":
+        int64_t bytes_allocated()
+
+    cdef cppclass CListType" arrow::ListType"(CDataType):
+        CListType(const shared_ptr[CDataType]& value_type,
+                  c_bool nullable)
+
+    cdef cppclass CStringType" arrow::StringType"(CDataType):
+        pass
+
+    cdef cppclass CField" arrow::Field":
+        c_string name
+        shared_ptr[CDataType] type
+
+        CField(const c_string& name, const shared_ptr[CDataType]& type)
+
+    cdef cppclass CStructType" arrow::StructType"(CDataType):
+        CStructType(const vector[shared_ptr[CField]]& fields,
+                    c_bool nullable)
+
+    cdef cppclass CSchema" arrow::Schema":
+        CSchema(const shared_ptr[CField]& fields)
+
+    cdef cppclass CArray" arrow::Array":
+        const shared_ptr[CDataType]& type()
+
+        int32_t length()
+        int32_t null_count()
+        LogicalType logical_type()
+
+        c_bool IsNull(int i)
+
+    cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray):
+        pass
+
+    cdef cppclass CInt8Array" arrow::Int8Array"(CArray):
+        pass
+
+    cdef cppclass CListArray" arrow::ListArray"(CArray):
+        pass
+
+    cdef cppclass CStringArray" arrow::StringArray"(CListArray):
+        pass

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/includes/common.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/common.pxd b/python/arrow/includes/common.pxd
index f2fc826..839427a 100644
--- a/python/arrow/includes/common.pxd
+++ b/python/arrow/includes/common.pxd
@@ -19,7 +19,7 @@
 
 from libc.stdint cimport *
 from libcpp cimport bool as c_bool
-from libcpp.string cimport string
+from libcpp.string cimport string as c_string
 from libcpp.vector cimport vector
 
 # This must be included for cerr and other things to work
@@ -29,6 +29,8 @@ cdef extern from "<iostream>":
 cdef extern from "<memory>" namespace "std" nogil:
 
     cdef cppclass shared_ptr[T]:
+        shared_ptr()
+        shared_ptr(T*)
         T* get()
         void reset()
         void reset(T* p)

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/includes/pyarrow.pxd b/python/arrow/includes/pyarrow.pxd
index dcef663..3eed5b8 100644
--- a/python/arrow/includes/pyarrow.pxd
+++ b/python/arrow/includes/pyarrow.pxd
@@ -18,6 +18,28 @@
 # distutils: language = c++
 
 from arrow.includes.common cimport *
+from arrow.includes.arrow cimport (CArray, CDataType, LogicalType,
+                                   MemoryPool)
 
 cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
-    pass
+    # We can later add more of the common status factory methods as needed
+    cdef Status Status_OK "Status::OK"()
+
+    cdef cppclass Status:
+        Status()
+
+        c_string ToString()
+
+        c_bool ok()
+        c_bool IsOutOfMemory()
+        c_bool IsKeyError()
+        c_bool IsTypeError()
+        c_bool IsIOError()
+        c_bool IsValueError()
+        c_bool IsNotImplemented()
+        c_bool IsArrowError()
+
+    shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable)
+    Status ConvertPySequence(object obj, shared_ptr[CArray]* out)
+
+    MemoryPool* GetMemoryPool()

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/scalar.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/scalar.pxd b/python/arrow/scalar.pxd
new file mode 100644
index 0000000..e193c09
--- /dev/null
+++ b/python/arrow/scalar.pxd
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.includes.common cimport *
+from arrow.includes.arrow cimport CArray, CListArray
+
+from arrow.schema cimport DataType
+
+cdef class Scalar:
+    cdef readonly:
+        DataType type
+
+
+cdef class NAType(Scalar):
+    pass
+
+
+cdef class ArrayValue(Scalar):
+    cdef:
+        shared_ptr[CArray] array
+        int index
+
+
+cdef class Int8Value(ArrayValue):
+    pass
+
+
+cdef class ListValue(ArrayValue):
+    pass
+
+
+cdef class StringValue(ArrayValue):
+    pass

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/scalar.pyx b/python/arrow/scalar.pyx
new file mode 100644
index 0000000..78dadec
--- /dev/null
+++ b/python/arrow/scalar.pyx
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import arrow.schema as schema
+
+cdef class NAType(Scalar):
+
+    def __cinit__(self):
+        self.type = schema.null()
+
+    def __repr__(self):
+        return 'NA'
+
+NA = NAType()

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/schema.pxd
----------------------------------------------------------------------
diff --git a/python/arrow/schema.pxd b/python/arrow/schema.pxd
new file mode 100644
index 0000000..487c246
--- /dev/null
+++ b/python/arrow/schema.pxd
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from arrow.includes.common cimport shared_ptr
+from arrow.includes.arrow cimport CDataType, CField, CSchema
+
+cdef class DataType:
+    cdef:
+        shared_ptr[CDataType] sp_type
+        CDataType* type
+
+    cdef init(self, const shared_ptr[CDataType]& type)
+
+cdef class Field:
+    cdef:
+        shared_ptr[CField] sp_field
+        CField* field
+
+    cdef readonly:
+        DataType type
+
+cdef class Schema:
+    cdef:
+        shared_ptr[CSchema] sp_schema
+        CSchema* schema

http://git-wip-us.apache.org/repos/asf/arrow/blob/9afb6677/python/arrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/arrow/schema.pyx b/python/arrow/schema.pyx
new file mode 100644
index 0000000..63cd6e8
--- /dev/null
+++ b/python/arrow/schema.pyx
@@ -0,0 +1,150 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+########################################
+# Data types, fields, schemas, and so forth
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from arrow.compat import frombytes, tobytes
+from arrow.includes.arrow cimport *
+cimport arrow.includes.pyarrow as pyarrow
+
+cimport cpython
+
+cdef class DataType:
+
+    def __cinit__(self):
+        pass
+
+    cdef init(self, const shared_ptr[CDataType]& type):
+        self.sp_type = type
+        self.type = type.get()
+
+    def __str__(self):
+        return frombytes(self.type.ToString())
+
+    def __repr__(self):
+        return 'DataType({0})'.format(str(self))
+
+    def __richcmp__(DataType self, DataType other, int op):
+        if op == cpython.Py_EQ:
+            return self.type.Equals(other.type)
+        elif op == cpython.Py_NE:
+            return not self.type.Equals(other.type)
+        else:
+            raise TypeError('Invalid comparison')
+
+
+cdef class Field:
+
+    def __cinit__(self, object name, DataType type):
+        self.type = type
+        self.sp_field.reset(new CField(tobytes(name), type.sp_type))
+        self.field = self.sp_field.get()
+
+    def __repr__(self):
+        return 'Field({0!r}, type={1})'.format(self.name, str(self.type))
+
+    property name:
+
+        def __get__(self):
+            return frombytes(self.field.name)
+
+cdef dict _type_cache = {}
+
+cdef DataType primitive_type(LogicalType type, bint nullable=True):
+    if (type, nullable) in _type_cache:
+        return _type_cache[type, nullable]
+
+    cdef DataType out = DataType()
+    out.init(pyarrow.GetPrimitiveType(type, nullable))
+
+    _type_cache[type, nullable] = out
+    return out
+
+#------------------------------------------------------------
+# Type factory functions
+
+def field(name, type):
+    return Field(name, type)
+
+def null():
+    return primitive_type(LogicalType_NA)
+
+def bool_(c_bool nullable=True):
+    return primitive_type(LogicalType_BOOL, nullable)
+
+def uint8(c_bool nullable=True):
+    return primitive_type(LogicalType_UINT8, nullable)
+
+def int8(c_bool nullable=True):
+    return primitive_type(LogicalType_INT8, nullable)
+
+def uint16(c_bool nullable=True):
+    return primitive_type(LogicalType_UINT16, nullable)
+
+def int16(c_bool nullable=True):
+    return primitive_type(LogicalType_INT16, nullable)
+
+def uint32(c_bool nullable=True):
+    return primitive_type(LogicalType_UINT32, nullable)
+
+def int32(c_bool nullable=True):
+    return primitive_type(LogicalType_INT32, nullable)
+
+def uint64(c_bool nullable=True):
+    return primitive_type(LogicalType_UINT64, nullable)
+
+def int64(c_bool nullable=True):
+    return primitive_type(LogicalType_INT64, nullable)
+
+def float_(c_bool nullable=True):
+    return primitive_type(LogicalType_FLOAT, nullable)
+
+def double(c_bool nullable=True):
+    return primitive_type(LogicalType_DOUBLE, nullable)
+
+def string(c_bool nullable=True):
+    """
+    UTF8 string
+    """
+    return primitive_type(LogicalType_STRING, nullable)
+
+def list_(DataType value_type, c_bool nullable=True):
+    cdef DataType out = DataType()
+    out.init(shared_ptr[CDataType](
+        new CListType(value_type.sp_type, nullable)))
+    return out
+
+def struct(fields, c_bool nullable=True):
+    """
+
+    """
+    cdef:
+        DataType out = DataType()
+        Field field
+        vector[shared_ptr[CField]] c_fields
+
+    for field in fields:
+        c_fields.push_back(field.sp_field)
+
+    out.init(shared_ptr[CDataType](
+        new CStructType(c_fields, nullable)))
+    return out