You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/12/04 03:25:07 UTC
[arrow] branch master updated: ARROW-3834: [Doc] Merge C++ and Python documentation

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 80fd4ef  ARROW-3834: [Doc] Merge C++ and Python documentation
80fd4ef is described below

commit 80fd4efe39481699704ccb184ad97999ca233f25
Author: Uwe L. Korn <uw...@xhochy.com>
AuthorDate: Mon Dec 3 21:24:23 2018 -0600

    ARROW-3834: [Doc] Merge C++ and Python documentation
    
    Author: Uwe L. Korn <uw...@xhochy.com>
    Author: Korn, Uwe <Uw...@blue-yonder.com>
    
    Closes #2856 from xhochy/doc-merge and squashes the following commits:
    
    b2e620432 <Uwe L. Korn> Review comments
    3576a856b <Uwe L. Korn> Add more C++ docs
    0934d440a <Uwe L. Korn> Fix Python docs build
    7e0f97e29 <Uwe L. Korn> Rename doc to docs
    528c24a7a <Uwe L. Korn> Convert format docs to reST
    02b6242e1 <Uwe L. Korn> Add doc generation to docker-compose
    0bfadd2c8 <Uwe L. Korn> Add pre-commit check for RAT
    744e31287 <Uwe L. Korn> Fix references to format documents
    507a49676 <Uwe L. Korn> Move doc to top-level
    cd660999e <Uwe L. Korn> Move Sphinx doc to top-level directory
    6fe07c6c4 <Uwe L. Korn> Build C++ API docs
    8df9302b2 <Uwe L. Korn> Add breathe as a requirement
    a3af2c4e5 <Uwe L. Korn> Fix linter issues
    7f7e8ad33 <Korn, Uwe> Fix Sphinx build for sphinx>=1.8
    157b98132 <Korn, Uwe> Merge C++ and Python documentation
---
 .dockerignore                                      |   2 +-
 .gitignore                                         |   5 +
 .pre-commit-config.yaml                            |   8 +
 ci/conda_env_python.yml                            |   1 +
 python/doc/.gitignore => ci/conda_env_sphinx.yml   |   8 +-
 ci/docker_build_sphinx.sh                          |  30 +
 ci/travis_script_python.sh                         |  11 +-
 cpp/apidoc/Doxyfile                                |   2 +-
 cpp/apidoc/index.md                                |  57 --
 cpp/src/arrow/array.h                              |   1 +
 cpp/src/arrow/flight/CMakeLists.txt                |   2 +-
 cpp/src/arrow/ipc/CMakeLists.txt                   |   8 +-
 dev/gen_apidocs/create_documents.sh                |   9 -
 dev/release/rat_exclude_files.txt                  |   2 +-
 dev/release/run-rat.sh                             |   8 +-
 docker-compose.yml                                 |  16 +-
 {python/doc => docs}/.gitignore                    |   2 +-
 {python/doc => docs}/Benchmarks.md                 |   0
 .gitignore => docs/Dockerfile                      |  34 +-
 {python/doc => docs}/Makefile                      |   0
 {python/doc => docs}/environment.yml               |   0
 {python/doc => docs}/requirements.txt              |   1 +
 {python/doc => docs}/source/_static/stub           |   0
 {python/doc => docs}/source/_templates/layout.html |   0
 {python/doc => docs}/source/conf.py                |  44 +-
 .../source/install.rst => docs/source/cpp/api.rst  |  40 +-
 docs/source/cpp/api/array.rst                      |  81 +++
 .../install.rst => docs/source/cpp/api/memory.rst  |  54 +-
 docs/source/cpp/index.rst                          |  88 +++
 {format => docs/source/format}/Arrow.graffle       | Bin
 {format => docs/source/format}/Arrow.png           | Bin
 {format => docs/source/format}/File.fbs            |   0
 {format => docs/source/format}/Flight.proto        |   0
 docs/source/format/Guidelines.rst                  |  43 ++
 docs/source/format/IPC.rst                         | 237 ++++++++
 format/Layout.md => docs/source/format/Layout.rst  | 640 ++++++++++-----------
 {format => docs/source/format}/Message.fbs         |   0
 docs/source/format/Metadata.rst                    | 394 +++++++++++++
 docs/source/format/README.rst                      |  53 ++
 {format => docs/source/format}/Schema.fbs          |   0
 {format => docs/source/format}/Tensor.fbs          |   0
 {python/doc => docs}/source/index.rst              |  40 +-
 {python/doc/source => docs/source/python}/api.rst  |   0
 {python/doc/source => docs/source/python}/csv.rst  |   0
 {python/doc/source => docs/source/python}/data.rst |   0
 .../source => docs/source/python}/development.rst  |   0
 .../source => docs/source/python}/extending.rst    |   0
 .../source => docs/source/python}/filesystems.rst  |   0
 .../source/python}/getting_involved.rst            |   0
 .../doc/source => docs/source/python}/index.rst    |  22 +-
 .../doc/source => docs/source/python}/install.rst  |   0
 {python/doc/source => docs/source/python}/ipc.rst  |   0
 .../doc/source => docs/source/python}/memory.rst   |   0
 .../doc/source => docs/source/python}/numpy.rst    |   4 +-
 .../doc/source => docs/source/python}/pandas.rst   |   4 +-
 .../doc/source => docs/source/python}/parquet.rst  |   0
 .../doc/source => docs/source/python}/plasma.rst   |   0
 format/Guidelines.md                               |  35 --
 format/IPC.md                                      | 253 --------
 format/Metadata.md                                 | 409 -------------
 format/README.md                                   |  53 --
 java/flight/pom.xml                                |   2 +-
 java/format/pom.xml                                |   8 +-
 python/README.md                                   |   6 +-
 64 files changed, 1407 insertions(+), 1310 deletions(-)

diff --git a/.dockerignore b/.dockerignore
index 1c6bc1e..2c6db20 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -17,6 +17,7 @@
 
 .git
 docker_cache
+docs/_build
 
 # IDE
 .vscode
@@ -49,7 +50,6 @@ python/dist
 python/*.egg-info
 python/*.egg
 python/*.pyc
-python/doc/_build
 __pycache__/
 */__pycache__/
 */*/__pycache__/
diff --git a/.gitignore b/.gitignore
index 79a2a8e..5817efd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+apache-rat-*.jar
+arrow-src.tar
+
 # Compiled source
 *.a
 *.dll
@@ -34,7 +37,9 @@ MANIFEST
 *.iml
 
 cpp/.idea/
+cpp/apidoc/xml/
 python/.eggs/
+python/doc/
 .vscode
 .idea/
 .pytest_cache/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3066c5e..4e0c7b2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,6 +21,14 @@
 # To run all hooks on all files use `pre-commit run -a`
 
 repos:
+  - repo: local
+    hooks:
+      - id: rat
+        name: rat
+        language: system
+        entry: bash -c "git archive HEAD --prefix=apache-arrow/ --output=arrow-src.tar && ./dev/release/run-rat.sh arrow-src.tar"
+        always_run: true
+        pass_filenames: false
   - repo: git://github.com/pre-commit/pre-commit-hooks
     sha: v1.2.3
     hooks:
diff --git a/ci/conda_env_python.yml b/ci/conda_env_python.yml
index 37ec654..429851e 100644
--- a/ci/conda_env_python.yml
+++ b/ci/conda_env_python.yml
@@ -21,5 +21,6 @@ numpy
 pandas
 pytest
 python
+rsync
 setuptools
 setuptools_scm
diff --git a/python/doc/.gitignore b/ci/conda_env_sphinx.yml
similarity index 89%
copy from python/doc/.gitignore
copy to ci/conda_env_sphinx.yml
index 3bee39f..af6b407 100644
--- a/python/doc/.gitignore
+++ b/ci/conda_env_sphinx.yml
@@ -15,5 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-_build
-source/generated
\ No newline at end of file
+# Requirements for building the documentation
+breathe
+doxygen
+ipython
+sphinx
+sphinx_rtd_theme
diff --git a/ci/docker_build_sphinx.sh b/ci/docker_build_sphinx.sh
new file mode 100755
index 0000000..9578043
--- /dev/null
+++ b/ci/docker_build_sphinx.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -ex
+
+pushd /arrow/cpp/apidoc
+doxygen
+popd
+
+pushd /arrow/python
+python setup.py build_sphinx -s ../docs/source --build-dir ../docs/_build
+popd
+
+mkdir -p /arrow/site/asf-site/docs/latest
+rsync -r /arrow/docs/_build/html/ /arrow/site/asf-site/docs/latest/
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 608e1ce..e4290ed 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -61,11 +61,7 @@ conda install -y -q pip \
 
 if [ "$ARROW_TRAVIS_PYTHON_DOCS" == "1" ] && [ "$PYTHON_VERSION" == "3.6" ]; then
   # Install documentation dependencies
-  conda install -y -q \
-        ipython \
-        numpydoc \
-        sphinx=1.7.9 \
-        sphinx_rtd_theme
+  conda install -y -c conda-forge --file ci/conda_env_sphinx.yml
 fi
 
 # ARROW-2093: PyTorch increases the size of our conda dependency stack
@@ -190,7 +186,10 @@ if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
 fi
 
 if [ "$ARROW_TRAVIS_PYTHON_DOCS" == "1" ] && [ "$PYTHON_VERSION" == "3.6" ]; then
-  cd doc
+  pushd ../cpp/apidoc
+  doxygen
+  popd
+  cd ../docs
   sphinx-build -q -b html -d _build/doctrees -W source _build/html
 fi
 
diff --git a/cpp/apidoc/Doxyfile b/cpp/apidoc/Doxyfile
index 3ec9af9..e528587 100644
--- a/cpp/apidoc/Doxyfile
+++ b/cpp/apidoc/Doxyfile
@@ -1919,7 +1919,7 @@ MAN_LINKS              = NO
 # captures the structure of the code including all documentation.
 # The default value is: NO.
 
-GENERATE_XML           = NO
+GENERATE_XML           = YES
 
 # The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
 # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
diff --git a/cpp/apidoc/index.md b/cpp/apidoc/index.md
index 46ee500..c887a74 100644
--- a/cpp/apidoc/index.md
+++ b/cpp/apidoc/index.md
@@ -41,60 +41,3 @@ Table of Contents
    * [Convert a vector of row-wise data into an Arrow table](tutorials/row_wise_conversion.md)
    * [Using the Plasma In-Memory Object Store](tutorials/plasma.md)
    * [Use Plasma to Access Tensors from C++ in Python](tutorials/tensor_to_py.md)
-
-Getting Started
----------------
-
-The most basic structure in Arrow is an `arrow::Array`. It holds a sequence
-of values with known length all having the same type. It consists of the data
-itself and an additional bitmap that indicates if the corresponding entry of
-array is a null-value. Note that for array with zero null entries, we can omit
-this bitmap.
-
-As Arrow objects are immutable, there are classes provided that should help you
-build these objects. To build an array of `int64_t` elements, we can use the
-`arrow::Int64Builder`. In the following example, we build an array of the range
-1 to 8 where the element that should hold the number 4 is nulled.
-
-    Int64Builder builder;
-    builder.Append(1);
-    builder.Append(2);
-    builder.Append(3);
-    builder.AppendNull();
-    builder.Append(5);
-    builder.Append(6);
-    builder.Append(7);
-    builder.Append(8);
-
-    std::shared_ptr<Array> array;
-    builder.Finish(&array);
-
-The resulting Array (which can be casted to `arrow::Int64Array` if you want
-to access its values) then consists of two `arrow::Buffer`. The first one is
-the null bitmap holding a single byte with the bits `0|0|0|0|1|0|0|0`.
-As we use [least-significant bit (LSB) numbering](https://en.wikipedia.org/wiki/Bit_numbering)
-this indicates that the fourth entry in the array is null. The second
-buffer is simply an `int64_t` array containing all the above values.
-As the fourth entry is null, the value at that position in the buffer is
-undefined.
-
-    // Cast the Array to its actual type to access its data
-    std::shared_ptr<Int64Array> int64_array = std::static_pointer_cast<Int64Array>(array);
-
-    // Get the pointer to the null bitmap.
-    const uint8_t* null_bitmap = int64_array->null_bitmap_data();
-
-    // Get the pointer to the actual data
-    const int64_t* data = int64_array->raw_values();
-
-In the above example, we have yet skipped explaining two things in the code.
-On constructing the builder, we have passed `arrow::int64()` to it. This is
-the type information with which the resulting array will be annotated. In
-this simple form, it is solely a `std::shared_ptr<arrow::Int64Type>`
-instantiation.
-
-Furthermore, we have passed `arrow::default_memory_pool()` to the constructor.
-This `arrow::MemoryPool` is used for the allocations of heap memory. Besides
-tracking the amount of memory allocated, the allocator also ensures that the
-allocated memory regions are 64-byte aligned (as required by the Arrow
-specification).
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 0274c15..73f49e8 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -387,6 +387,7 @@ class ARROW_EXPORT PrimitiveArray : public FlatArray {
   const uint8_t* raw_values_;
 };
 
+/// Concrete Array class for numeric data.
 template <typename TYPE>
 class ARROW_EXPORT NumericArray : public PrimitiveArray {
  public:
diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index bc22d60..847884a 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -35,7 +35,7 @@ SET(ARROW_FLIGHT_STATIC_LINK_LIBS
 # TODO(wesm): Protobuf shared vs static linking
 
 set(FLIGHT_PROTO_PATH "${CMAKE_SOURCE_DIR}/../format")
-set(FLIGHT_PROTO ${CMAKE_SOURCE_DIR}/../format/Flight.proto)
+set(FLIGHT_PROTO ${CMAKE_SOURCE_DIR}/../docs/source/format/Flight.proto)
 
 set(FLIGHT_GENERATED_PROTO_FILES
   "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.cc"
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index 13ed9b9..c0ff87f 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -52,10 +52,10 @@ set(FBS_OUTPUT_FILES
   "${OUTPUT_DIR}/feather_generated.h")
 
 set(FBS_SRC
-  ${CMAKE_SOURCE_DIR}/../format/Message.fbs
-  ${CMAKE_SOURCE_DIR}/../format/File.fbs
-  ${CMAKE_SOURCE_DIR}/../format/Schema.fbs
-  ${CMAKE_SOURCE_DIR}/../format/Tensor.fbs
+  ${CMAKE_SOURCE_DIR}/../docs/source/format/Message.fbs
+  ${CMAKE_SOURCE_DIR}/../docs/source/format/File.fbs
+  ${CMAKE_SOURCE_DIR}/../docs/source/format/Schema.fbs
+  ${CMAKE_SOURCE_DIR}/../docs/source/format/Tensor.fbs
   ${CMAKE_CURRENT_SOURCE_DIR}/feather.fbs)
 
 foreach(FIL ${FBS_SRC})
diff --git a/dev/gen_apidocs/create_documents.sh b/dev/gen_apidocs/create_documents.sh
index 6a3b065..ee8f8c8 100755
--- a/dev/gen_apidocs/create_documents.sh
+++ b/dev/gen_apidocs/create_documents.sh
@@ -87,15 +87,6 @@ rsync -r doc/parquet-glib/html/ ../../site/asf-site/docs/c_glib/parquet-glib
 popd
 popd
 
-# Now Python documentation can be built
-pushd arrow/python
-python setup.py build_ext --build-type=$ARROW_BUILD_TYPE \
-    --with-plasma --with-parquet --inplace
-python setup.py build_sphinx -s doc/source
-mkdir -p ../site/asf-site/docs/python
-rsync -r doc/_build/html/ ../site/asf-site/docs/python
-popd
-
 # Make C++ documentation
 pushd arrow/cpp/apidoc
 rm -rf html/*
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index edf3b42..e5e0411 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -114,6 +114,7 @@ dev/tasks/linux-packages/debian/plasma-store-server.install
 dev/tasks/linux-packages/debian/rules
 dev/tasks/linux-packages/debian/source/format
 dev/tasks/linux-packages/debian/watch
+docs/requirements.txt
 go/arrow/go.sum
 go/arrow/Gopkg.lock
 go/arrow/internal/cpu/*
@@ -124,7 +125,6 @@ js/.npmignore
 js/closure-compiler-scripts/*
 python/cmake_modules
 python/cmake_modules/*
-python/doc/requirements.txt
 python/MANIFEST.in
 python/pyarrow/includes/__init__.pxd
 python/pyarrow/tests/__init__.py
diff --git a/dev/release/run-rat.sh b/dev/release/run-rat.sh
index 53a322a..587e93a 100755
--- a/dev/release/run-rat.sh
+++ b/dev/release/run-rat.sh
@@ -18,10 +18,14 @@
 # under the License.
 #
 
+RAT_VERSION=0.12
+
 # download apache rat
-curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/0.12/apache-rat-0.12.jar > apache-rat-0.12.jar
+if [ ! -f apache-rat-${RAT_VERSION}.jar ]; then
+  curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar > apache-rat-${RAT_VERSION}.jar
+fi
 
-RAT="java -jar apache-rat-0.12.jar -x "
+RAT="java -jar apache-rat-${RAT_VERSION}.jar -x "
 
 RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 7fa4e01..d6f1100 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -152,7 +152,7 @@ services:
   ######################### Tools and Linters #################################
 
   # TODO(kszucs): site
-  # TODO(kszucs): apidoc
+  # TODO(kszucs): {cpp,java,glib,js}-apidoc
 
   lint:
     # Usage:
@@ -178,12 +178,26 @@ services:
 
   clang-format:
     # Usage:
+    #   docker-compose build cpp
+    #   docker-compose build python
     #   docker-compose build lint
     #   docker-compose run clang-format
     image: arrow:lint
     command: arrow/dev/lint/run_clang_format.sh
     volumes: *ubuntu-volumes
 
+  docs:
+    # Usage:
+    #   docker-compose build cpp
+    #   docker-compose build python
+    #   docker-compose build docs
+    #   docker-compose run docs
+    image: arrow:docs
+    build:
+      context: .
+      dockerfile: docs/Dockerfile
+    volumes: *volumes
+
   ######################### Integration Tests #################################
 
   # impala:
diff --git a/python/doc/.gitignore b/docs/.gitignore
similarity index 97%
rename from python/doc/.gitignore
rename to docs/.gitignore
index 3bee39f..d2e9f6c 100644
--- a/python/doc/.gitignore
+++ b/docs/.gitignore
@@ -16,4 +16,4 @@
 # under the License.
 
 _build
-source/generated
\ No newline at end of file
+source/python/generated
diff --git a/python/doc/Benchmarks.md b/docs/Benchmarks.md
similarity index 100%
rename from python/doc/Benchmarks.md
rename to docs/Benchmarks.md
diff --git a/.gitignore b/docs/Dockerfile
similarity index 73%
copy from .gitignore
copy to docs/Dockerfile
index 79a2a8e..4908110 100644
--- a/.gitignore
+++ b/docs/Dockerfile
@@ -15,30 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Compiled source
-*.a
-*.dll
-*.o
-*.py[ocd]
-*.so
-*.so.*
-*.dylib
-.build_cache_dir
-dependency-reduced-pom.xml
-MANIFEST
+FROM arrow:python-3.6
 
-# Generated Visual Studio files
-*.vcxproj
-*.vcxproj.*
-*.sln
-*.iml
-
-cpp/.idea/
-python/.eggs/
-.vscode
-.idea/
-.pytest_cache/
-pkgs
-.Rproj.user
-arrow.Rcheck/
-docker_cache
+ADD ci/conda_env_sphinx.yml /arrow/ci/
+RUN conda install -c conda-forge \
+        --file arrow/ci/conda_env_sphinx.yml && \
+    conda clean --all
+CMD arrow/ci/docker_build_cpp.sh && \
+    arrow/ci/docker_build_python.sh && \
+    arrow/ci/docker_build_sphinx.sh
diff --git a/python/doc/Makefile b/docs/Makefile
similarity index 100%
rename from python/doc/Makefile
rename to docs/Makefile
diff --git a/python/doc/environment.yml b/docs/environment.yml
similarity index 100%
rename from python/doc/environment.yml
rename to docs/environment.yml
diff --git a/python/doc/requirements.txt b/docs/requirements.txt
similarity index 86%
rename from python/doc/requirements.txt
rename to docs/requirements.txt
index f3c3414..7e33455 100644
--- a/python/doc/requirements.txt
+++ b/docs/requirements.txt
@@ -1,3 +1,4 @@
+breathe
 ipython
 matplotlib
 numpydoc
diff --git a/python/doc/source/_static/stub b/docs/source/_static/stub
similarity index 100%
rename from python/doc/source/_static/stub
rename to docs/source/_static/stub
diff --git a/python/doc/source/_templates/layout.html b/docs/source/_templates/layout.html
similarity index 100%
rename from python/doc/source/_templates/layout.html
rename to docs/source/_templates/layout.html
diff --git a/python/doc/source/conf.py b/docs/source/conf.py
similarity index 93%
rename from python/doc/source/conf.py
rename to docs/source/conf.py
index f832790..1cadef1 100644
--- a/python/doc/source/conf.py
+++ b/docs/source/conf.py
@@ -30,7 +30,7 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-import glob
+import pyarrow
 import os
 import sys
 
@@ -57,16 +57,17 @@ extensions = [
     'sphinx.ext.viewcode',
     'sphinx.ext.napoleon',
     'IPython.sphinxext.ipython_directive',
-    'IPython.sphinxext.ipython_console_highlighting'
+    'IPython.sphinxext.ipython_console_highlighting',
+    'breathe'
 ]
 
 # Show members for classes in .. autosummary
-autodoc_default_flags = [
-    'members',
-    'undoc-members',
-    'show-inheritance',
-    'inherited-members'
-]
+autodoc_default_options = {
+    'members': None,
+    'undoc-members': None,
+    'show-inheritance': None,
+    'inherited-members': None
+}
 
 # ipython directive options
 ipython_mplbackend = ''
@@ -77,13 +78,16 @@ napoleon_use_rtype = False
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 
+breathe_projects = {"arrow_cpp": "../../cpp/apidoc/xml"}
+breathe_default_project = "arrow_cpp"
+
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
 
-autosummary_generate = glob.glob("*.rst")
+source_suffix = ['.rst']
+
+autosummary_generate = True
 
 # The encoding of source files.
 #
@@ -93,7 +97,7 @@ autosummary_generate = glob.glob("*.rst")
 master_doc = 'index'
 
 # General information about the project.
-project = u'pyarrow'
+project = u'Apache Arrow'
 copyright = u'2016-2018 Apache Software Foundation'
 author = u'Apache Software Foundation'
 
@@ -102,9 +106,9 @@ author = u'Apache Software Foundation'
 # built documents.
 #
 # The short X.Y version.
-version = u''
+version = pyarrow.__version__
 # The full version, including alpha/beta/rc tags.
-release = u''
+release = pyarrow.__version__
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -180,7 +184,7 @@ html_theme_options = {
 # The name for this set of Sphinx documents.
 # "<project> v<release> documentation" by default.
 #
-# html_title = u'pyarrow v0.1.0'
+html_title = u'Apache Arrow v{}'.format(version)
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
 #
@@ -280,7 +284,7 @@ html_static_path = ['_static']
 # html_search_scorer = 'scorer.js'
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'pyarrowdoc'
+htmlhelp_basename = 'arrowdoc'
 
 # -- Options for LaTeX output ---------------------------------------------
 
@@ -306,7 +310,7 @@ latex_elements = {
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'pyarrow.tex', u'pyarrow Documentation',
+    (master_doc, 'arrow.tex', u'Apache Arrow Documentation',
      u'Apache Arrow Team', 'manual'),
 ]
 
@@ -348,7 +352,7 @@ latex_documents = [
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    (master_doc, 'pyarrow', u'pyarrow Documentation',
+    (master_doc, 'arrow', u'Apache Arrow Documentation',
      [author], 1)
 ]
 
@@ -363,8 +367,8 @@ man_pages = [
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'pyarrow', u'pyarrow Documentation',
-     author, 'pyarrow', 'One line description of project.',
+    (master_doc, 'arrow', u'Apache Arrow Documentation',
+     author, 'Apache Arrow', 'One line description of project.',
      'Miscellaneous'),
 ]
 
diff --git a/python/doc/source/install.rst b/docs/source/cpp/api.rst
similarity index 51%
copy from python/doc/source/install.rst
copy to docs/source/cpp/api.rst
index d07d900..894ed1f 100644
--- a/python/doc/source/install.rst
+++ b/docs/source/cpp/api.rst
@@ -15,37 +15,13 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-Install PyArrow
-===============
+*************
+API Reference
+*************
 
-Conda
------
+.. toctree::
+   :maxdepth: 2
+   :caption: Getting Started
 
-To install the latest version of PyArrow from conda-forge using conda:
-
-.. code-block:: bash
-
-    conda install -c conda-forge pyarrow
-
-Pip
----
-
-Install the latest version from PyPI (Windows, Linux, and macOS):
-
-.. code-block:: bash
-
-    pip install pyarrow
-
-If you encounter any importing issues of the pip wheels on Windows, you may
-need to install the `Visual C++ Redistributable for Visual Studio 2015
-<https://www.microsoft.com/en-us/download/details.aspx?id=48145>`_.
-
-.. note::
-
-   Windows packages are only available for Python 3.5 and higher (this is also
-   true for TensorFlow and any package that is implemented with modern C++).
-
-Installing from source
-----------------------
-
-See :ref:`development`.
+   api/array
+   api/memory
diff --git a/docs/source/cpp/api/array.rst b/docs/source/cpp/api/array.rst
new file mode 100644
index 0000000..aed1876
--- /dev/null
+++ b/docs/source/cpp/api/array.rst
@@ -0,0 +1,81 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Array types
+=============
+
+.. doxygenclass:: arrow::Array
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::DictionaryArray
+   :project: arrow_cpp
+   :members:
+
+non-nested array types
+----------------------
+
+.. doxygenclass:: arrow::FlatArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::NullArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::BinaryArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::StringArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::PrimitiveArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::BooleanArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::FixedSizeBinaryArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::Decimal128Array
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::NumericArray
+   :project: arrow_cpp
+   :members:
+
+nested array types
+------------------
+
+.. doxygenclass:: arrow::UnionArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::ListArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::StructArray
+   :project: arrow_cpp
+   :members:
diff --git a/python/doc/source/install.rst b/docs/source/cpp/api/memory.rst
similarity index 51%
copy from python/doc/source/install.rst
copy to docs/source/cpp/api/memory.rst
index d07d900..fbb5dc8 100644
--- a/python/doc/source/install.rst
+++ b/docs/source/cpp/api/memory.rst
@@ -15,37 +15,43 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-Install PyArrow
-===============
+Memory (management)
+===================
 
-Conda
------
+Basic containers
+----------------
 
-To install the latest version of PyArrow from conda-forge using conda:
+.. doxygenclass:: arrow::Buffer
+   :project: arrow_cpp
+   :members:
 
-.. code-block:: bash
+.. doxygenclass:: arrow::MutableBuffer
+   :project: arrow_cpp
+   :members:
 
-    conda install -c conda-forge pyarrow
+.. doxygenclass:: arrow::ResizableBuffer
+   :project: arrow_cpp
+   :members:
 
-Pip
----
+.. doxygenclass:: arrow::BufferBuilder
+   :project: arrow_cpp
+   :members:
 
-Install the latest version from PyPI (Windows, Linux, and macOS):
+Memory Pools
+------------
 
-.. code-block:: bash
+.. doxygenfunction:: arrow::default_memory_pool
+   :project: arrow_cpp
+   :outline:
 
-    pip install pyarrow
+.. doxygenclass:: arrow::MemoryPool
+   :project: arrow_cpp
+   :members:
 
-If you encounter any importing issues of the pip wheels on Windows, you may
-need to install the `Visual C++ Redistributable for Visual Studio 2015
-<https://www.microsoft.com/en-us/download/details.aspx?id=48145>`_.
+.. doxygenclass:: arrow::LoggingMemoryPool
+   :project: arrow_cpp
+   :members:
 
-.. note::
-
-   Windows packages are only available for Python 3.5 and higher (this is also
-   true for TensorFlow and any package that is implemented with modern C++).
-
-Installing from source
-----------------------
-
-See :ref:`development`.
+.. doxygenclass:: arrow::ProxyMemoryPool
+   :project: arrow_cpp
+   :members:
diff --git a/docs/source/cpp/index.rst b/docs/source/cpp/index.rst
new file mode 100644
index 0000000..4f874ba
--- /dev/null
+++ b/docs/source/cpp/index.rst
@@ -0,0 +1,88 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+C++ Implementation
+==================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Getting Started
+
+   api
+
+Getting Started
+---------------
+
+The most basic structure in Arrow is an :cpp:class:`arrow::Array`. It holds a sequence
+of values with known length all having the same type. It consists of the data
+itself and an additional bitmap that indicates if the corresponding entry of
+array is a null-value. Note that for array with zero null entries, we can omit
+this bitmap.
+
+As Arrow objects are immutable, there are classes provided that should help you
+build these objects. To build an array of ``int64_t`` elements, we can use the
+:cpp:class:`arrow::Int64Builder`. In the following example, we build an array of
+the range 1 to 8 where the element that should hold the number 4 is nulled.
+
+.. code::
+
+    Int64Builder builder;
+    builder.Append(1);
+    builder.Append(2);
+    builder.Append(3);
+    builder.AppendNull();
+    builder.Append(5);
+    builder.Append(6);
+    builder.Append(7);
+    builder.Append(8);
+
+    std::shared_ptr<Array> array;
+    builder.Finish(&array);
+
+The resulting Array (which can be casted to :cpp:class:`arrow::Int64Array` if you want
+to access its values) then consists of two :cpp:class:`arrow::Buffer`. The first one is
+the null bitmap holding a single byte with the bits ``0|0|0|0|1|0|0|0``.
+As we use  `least-significant bit (LSB) numbering`_.
+this indicates that the fourth entry in the array is null. The second
+buffer is simply an ``int64_t`` array containing all the above values.
+As the fourth entry is null, the value at that position in the buffer is
+undefined.
+
+.. code::
+
+    // Cast the Array to its actual type to access its data
+    std::shared_ptr<Int64Array> int64_array = std::static_pointer_cast<Int64Array>(array);
+
+    // Get the pointer to the null bitmap.
+    const uint8_t* null_bitmap = int64_array->null_bitmap_data();
+
+    // Get the pointer to the actual data
+    const int64_t* data = int64_array->raw_values();
+
+In the above example, we have yet skipped explaining two things in the code.
+On constructing the builder, we have passed :cpp:func:`arrow::int64()` to it. This is
+the type information with which the resulting array will be annotated. In
+this simple form, it is solely a :cpp:class:`std::shared_ptr<arrow::Int64Type>`
+instantiation.
+
+Furthermore, we have passed :cpp:func:`arrow::default_memory_pool()` to the constructor.
+This :cpp:class:`arrow::MemoryPool` is used for the allocations of heap memory. Besides
+tracking the amount of memory allocated, the allocator also ensures that the
+allocated memory regions are 64-byte aligned (as required by the Arrow
+specification).
+
+.. _least-significant bit (LSB) numbering: https://en.wikipedia.org/wiki/Bit_numbering
diff --git a/format/Arrow.graffle b/docs/source/format/Arrow.graffle
similarity index 100%
rename from format/Arrow.graffle
rename to docs/source/format/Arrow.graffle
diff --git a/format/Arrow.png b/docs/source/format/Arrow.png
similarity index 100%
rename from format/Arrow.png
rename to docs/source/format/Arrow.png
diff --git a/format/File.fbs b/docs/source/format/File.fbs
similarity index 100%
rename from format/File.fbs
rename to docs/source/format/File.fbs
diff --git a/format/Flight.proto b/docs/source/format/Flight.proto
similarity index 100%
rename from format/Flight.proto
rename to docs/source/format/Flight.proto
diff --git a/docs/source/format/Guidelines.rst b/docs/source/format/Guidelines.rst
new file mode 100644
index 0000000..5b03220
--- /dev/null
+++ b/docs/source/format/Guidelines.rst
@@ -0,0 +1,43 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Implementation guidelines
+=========================
+
+An execution engine (or framework, or UDF executor, or storage engine, etc) can implements only a subset of the Arrow spec and/or extend it given the following constraints:
+
+Implementing a subset the spec
+------------------------------
+
+If only producing (and not consuming) arrow vectors.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Any subset of the vector spec and the corresponding metadata can be implemented.
+
+If consuming and producing vectors
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There is a minimal subset of vectors to be supported.
+Production of a subset of vectors and their corresponding metadata is always fine.
+Consumption of vectors should at least convert the unsupported input vectors to the supported subset (for example Timestamp.millis to timestamp.micros or int32 to int64)
+
+Extensibility
+-------------
+
+An execution engine implementor can also extend their memory representation with their own vectors internally as long as they are never exposed. Before sending data to another system expecting Arrow data these custom vectors should be converted to a type that exist in the Arrow spec.
+An example of this is operating on compressed data.
+These custom vectors are not exchanged externally and there is no support for custom metadata.
diff --git a/docs/source/format/IPC.rst b/docs/source/format/IPC.rst
new file mode 100644
index 0000000..8cb74b8
--- /dev/null
+++ b/docs/source/format/IPC.rst
@@ -0,0 +1,237 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Interprocess messaging / communication (IPC)
+============================================
+
+Encapsulated message format
+---------------------------
+
+Data components in the stream and file formats are represented as encapsulated
+*messages* consisting of:
+
+* A length prefix indicating the metadata size
+* The message metadata as a `Flatbuffer`_
+* Padding bytes to an 8-byte boundary
+* The message body, which must be a multiple of 8 bytes
+
+Schematically, we have: ::
+
+    <metadata_size: int32>
+    <metadata_flatbuffer: bytes>
+    <padding>
+    <message body>
+
+The complete serialized message must be a multiple of 8 bytes so that messages
+can be relocated between streams. Otherwise the amount of padding between the
+metadata and the message body could be non-deterministic.
+
+The ``metadata_size`` includes the size of the flatbuffer plus padding. The
+``Message`` flatbuffer includes a version number, the particular message (as a
+flatbuffer union), and the size of the message body: ::
+
+    table Message {
+      version: org.apache.arrow.flatbuf.MetadataVersion;
+      header: MessageHeader;
+      bodyLength: long;
+    }
+
+Currently, we support 4 types of messages:
+
+* Schema
+* RecordBatch
+* DictionaryBatch
+* Tensor
+
+Streaming format
+----------------
+
+We provide a streaming format for record batches. It is presented as a sequence
+of encapsulated messages, each of which follows the format above. The schema
+comes first in the stream, and it is the same for all of the record batches
+that follow. If any fields in the schema are dictionary-encoded, one or more
+``DictionaryBatch`` messages will be included. ``DictionaryBatch`` and
+``RecordBatch`` messages may be interleaved, but before any dictionary key is used
+in a ``RecordBatch`` it should be defined in a ``DictionaryBatch``. ::
+
+    <SCHEMA>
+    <DICTIONARY 0>
+    ...
+    <DICTIONARY k - 1>
+    <RECORD BATCH 0>
+    ...
+    <DICTIONARY x DELTA>
+    ...
+    <DICTIONARY y DELTA>
+    ...
+    <RECORD BATCH n - 1>
+    <EOS [optional]: int32>
+
+When a stream reader implementation is reading a stream, after each message, it
+may read the next 4 bytes to know how large the message metadata that follows
+is. Once the message flatbuffer is read, you can then read the message body.
+
+The stream writer can signal end-of-stream (EOS) either by writing a 0 length
+as an ``int32`` or simply closing the stream interface.
+
+File format
+-----------
+
+We define a "file format" supporting random access in a very similar format to
+the streaming format. The file starts and ends with a magic string ``ARROW1``
+(plus padding). What follows in the file is identical to the stream format. At
+the end of the file, we write a *footer* containing a redundant copy of the
+schema (which is a part of the streaming format) plus memory offsets and sizes
+for each of the data blocks in the file. This enables random access any record
+batch in the file. See ``File.fbs`` for the precise details of the file
+footer.
+
+Schematically we have: ::
+
+    <magic number "ARROW1">
+    <empty padding bytes [to 8 byte boundary]>
+    <STREAMING FORMAT>
+    <FOOTER>
+    <FOOTER SIZE: int32>
+    <magic number "ARROW1">
+
+In the file format, there is no requirement that dictionary keys should be
+defined in a ``DictionaryBatch`` before they are used in a ``RecordBatch``, as long
+as the keys are defined somewhere in the file.
+
+RecordBatch body structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``RecordBatch`` metadata contains a depth-first (pre-order) flattened set of
+field metadata and physical memory buffers (some comments from ``Message.fbs``
+have been shortened / removed): ::
+
+    table RecordBatch {
+      length: long;
+      nodes: [FieldNode];
+      buffers: [Buffer];
+    }
+
+    struct FieldNode {
+      length: long;
+      null_count: long;
+    }
+
+    struct Buffer {
+      /// The relative offset into the shared memory page where the bytes for this
+      /// buffer starts
+      offset: long;
+
+      /// The absolute length (in bytes) of the memory buffer. The memory is found
+      /// from offset (inclusive) to offset + length (non-inclusive).
+      length: long;
+    }
+
+In the context of a file, the ``page`` is not used, and the ``Buffer`` offsets use
+as a frame of reference the start of the message body. So, while in a general
+IPC setting these offsets may be anyplace in one or more shared memory regions,
+in the file format the offsets start from 0.
+
+The location of a record batch and the size of the metadata block as well as
+the body of buffers is stored in the file footer: ::
+
+    struct Block {
+      offset: long;
+      metaDataLength: int;
+      bodyLength: long;
+    }
+
+The ``metaDataLength`` here includes the metadata length prefix, serialized
+metadata, and any additional padding bytes, and by construction must be a
+multiple of 8 bytes.
+
+Some notes about this
+
+* The ``Block`` offset indicates the starting byte of the record batch.
+* The metadata length includes the flatbuffer size, the record batch metadata
+  flatbuffer, and any padding bytes
+
+Dictionary Batches
+~~~~~~~~~~~~~~~~~~
+
+Dictionaries are written in the stream and file formats as a sequence of record
+batches, each having a single field. The complete semantic schema for a
+sequence of record batches, therefore, consists of the schema along with all of
+the dictionaries. The dictionary types are found in the schema, so it is
+necessary to read the schema to first determine the dictionary types so that
+the dictionaries can be properly interpreted. ::
+
+    table DictionaryBatch {
+      id: long;
+      data: RecordBatch;
+      isDelta: boolean = false;
+    }
+
+The dictionary ``id`` in the message metadata can be referenced one or more times
+in the schema, so that dictionaries can even be used for multiple fields. See
+the :doc:`Layout` document for more about the semantics of
+dictionary-encoded data.
+
+The dictionary ``isDelta`` flag allows dictionary batches to be modified
+mid-stream.  A dictionary batch with ``isDelta`` set indicates that its vector
+should be concatenated with those of any previous batches with the same ``id``. A
+stream which encodes one column, the list of strings
+``["A", "B", "C", "B", "D", "C", "E", "A"]``, with a delta dictionary batch could
+take the form: ::
+
+    <SCHEMA>
+    <DICTIONARY 0>
+    (0) "A"
+    (1) "B"
+    (2) "C"
+
+    <RECORD BATCH 0>
+    0
+    1
+    2
+    1
+
+    <DICTIONARY 0 DELTA>
+    (3) "D"
+    (4) "E"
+
+    <RECORD BATCH 1>
+    3
+    2
+    4
+    0
+    EOS
+
+Tensor (Multi-dimensional Array) Message Format
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``Tensor`` message types provides a way to write a multidimensional array of
+fixed-size values (such as a NumPy ndarray) using Arrow's shared memory
+tools. Arrow implementations in general are not required to implement this data
+format, though we provide a reference implementation in C++.
+
+When writing a standalone encapsulated tensor message, we use the format as
+indicated above, but additionally align the starting offset of the metadata as
+well as the starting offset of the tensor body (if writing to a shared memory
+region) to be multiples of 64 bytes: ::
+
+    <PADDING>
+    <metadata size: int32>
+    <metadata>
+    <tensor body>
+
+.. _Flatbuffer: https://github.com/google/flatbuffers
diff --git a/format/Layout.md b/docs/source/format/Layout.rst
similarity index 53%
rename from format/Layout.md
rename to docs/source/format/Layout.rst
index 80af1d3..868a99b 100644
--- a/format/Layout.md
+++ b/docs/source/format/Layout.rst
@@ -1,25 +1,25 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+..   http://www.apache.org/licenses/LICENSE-2.0
 
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
 
-# Arrow: Physical memory layout
+Physical memory layout
+======================
 
-## Definitions / Terminology
+Definitions / Terminology
+-------------------------
 
 Since different projects have used different words to describe various
 concepts, here is a small glossary to help disambiguate.
@@ -35,21 +35,22 @@ concepts, here is a small glossary to help disambiguate.
   in bit width or byte width
 * Nested or parametric type: a data type whose full structure depends on one or
   more other child relative types. Two fully-specified nested types are equal
-  if and only if their child types are equal. For example, `List<U>` is distinct
-  from `List<V>` iff U and V are different relative types.
+  if and only if their child types are equal. For example, ``List<U>`` is distinct
+  from ``List<V>`` iff U and V are different relative types.
 * Relative type or simply type (unqualified): either a specific primitive type
   or a fully-specified nested type. When we say slot we mean a relative type
   value, not necessarily any physical storage region.
 * Logical type: A data type that is implemented using some relative (physical)
   type. For example, Decimal values are stored as 16 bytes in a fixed byte
-  size array. Similarly, strings can be stored as `List<1-byte>`.
+  size array. Similarly, strings can be stored as ``List<1-byte>``.
 * Parent and child arrays: names to express relationships between physical
-  value arrays in a nested type structure. For example, a `List<T>`-type parent
+  value arrays in a nested type structure. For example, a ``List<T>``-type parent
   array has a T-type array as its child (see more on lists below).
 * Leaf node or leaf: A primitive value array that may or may not be a child
   array of some array with a nested type.
 
-## Requirements, goals, and non-goals
+Requirements, goals, and non-goals
+----------------------------------
 
 Base requirements
 
@@ -59,7 +60,7 @@ Base requirements
   proprietary systems that utilize the open source components.
 * All array slots are accessible in constant time, with complexity growing
   linearly in the nesting level
-* Capable of representing fully-materialized and decoded / decompressed [Parquet][5]
+* Capable of representing fully-materialized and decoded / decompressed `Parquet`_
   data
 * It is required to have all the contiguous memory buffers in an IPC payload
   aligned at 8-byte boundaries. In other words, each buffer must start at
@@ -75,14 +76,16 @@ Base requirements
   be migrated to a different address space (e.g. via a memcpy-type of
   operation) without altering their contents.
 
-## Goals (for this document)
+Goals (for this document)
+-------------------------
 
 * To describe relative types (physical value types and a preliminary set of
   nested types) sufficient for an unambiguous implementation
 * Memory layout and random access patterns for each relative type
 * Null value representation
 
-## Non-goals (for this document)
+Non-goals (for this document)
+-----------------------------
 
 * To enumerate or specify logical types that can be implemented as primitive
   (fixed-width) value types. For example: signed and unsigned integers,
@@ -98,7 +101,8 @@ Base requirements
 * Any memory management or reference counting subsystem
 * To enumerate or specify types of encodings or compression support
 
-## Byte Order ([Endianness][3])
+Byte Order (`Endianness`_)
+---------------------------
 
 The Arrow format is little endian by default.
 The Schema metadata has an endianness field indicating endianness of RecordBatches.
@@ -109,7 +113,8 @@ that does not match the underlying system. The reference implementation is focus
 Little Endian and provides tests for it. Eventually we may provide automatic conversion
 via byte swapping.
 
-## Alignment and Padding
+Alignment and Padding
+---------------------
 
 As noted above, all buffers must be aligned in memory at 8-byte boundaries and padded
 to a length that is a multiple of 8 bytes.  The alignment requirement follows best
@@ -117,10 +122,10 @@ practices for optimized memory access:
 
 * Elements in numeric arrays will be guaranteed to be retrieved via aligned access.
 * On some architectures alignment can help limit partially used cache lines.
-* 64 byte alignment is recommended by the [Intel performance guide][2] for
+* 64 byte alignment is recommended by the `Intel performance guide`_ for
   data-structures over 64 bytes (which will be a common case for Arrow Arrays).
 
-Recommending padding to a multiple of 64 bytes allows for using [SIMD][4] instructions
+Recommending padding to a multiple of 64 bytes allows for using `SIMD`_ instructions
 consistently in loops without additional conditional checks.
 This should allow for simpler, efficient and CPU cache-friendly code.
 The specific padding length was chosen because it matches the largest known
@@ -129,27 +134,30 @@ words, we can load the entire 64-byte buffer into a 512-bit wide SIMD register
 and get data-level parallelism on all the columnar values packed into the 64-byte
 buffer. Guaranteed padding can also allow certain compilers
 to generate more optimized code directly (e.g. One can safely use Intel's
-`-qopt-assume-safe-padding`).
+``-qopt-assume-safe-padding``).
 
 Unless otherwise noted, padded bytes do not need to have a specific value.
 
-## Array lengths
+Array lengths
+-------------
 
 Array lengths are represented in the Arrow metadata as a 64-bit signed
 integer. An implementation of Arrow is considered valid even if it only
 supports lengths up to the maximum 32-bit signed integer, though. If using
 Arrow in a multi-language environment, we recommend limiting lengths to
-2<sup>31</sup> - 1 elements or less. Larger data sets can be represented using
+2 :sup:`31` - 1 elements or less. Larger data sets can be represented using
 multiple array chunks.
 
-## Null count
+Null count
+----------
 
 The number of null value slots is a property of the physical array and
 considered part of the data structure. The null count is represented in the
 Arrow metadata as a 64-bit signed integer, as it may be as large as the array
 length.
 
-## Null bitmaps
+Null bitmaps
+------------
 
 Any relative type can have null value slots, whether primitive or nested type.
 
@@ -159,25 +167,21 @@ and large enough to have at least 1 bit for each array
 slot.
 
 Whether any array slot is valid (non-null) is encoded in the respective bits of
-this bitmap. A 1 (set bit) for index `j` indicates that the value is not null,
+this bitmap. A 1 (set bit) for index ``j`` indicates that the value is not null,
 while a 0 (bit not set) indicates that it is null. Bitmaps are to be
-initialized to be all unset at allocation time (this includes padding).
+initialized to be all unset at allocation time (this includes padding).::
 
-```
-is_valid[j] -> bitmap[j / 8] & (1 << (j % 8))
-```
+    is_valid[j] -> bitmap[j / 8] & (1 << (j % 8))
 
-We use [least-significant bit (LSB) numbering][1] (also known as
+We use `least-significant bit (LSB) numbering`_ (also known as
 bit-endianness). This means that within a group of 8 bits, we read
-right-to-left:
+right-to-left: ::
 
-```
-values = [0, 1, null, 2, null, 3]
+    values = [0, 1, null, 2, null, 3]
 
-bitmap
-j mod 8   7  6  5  4  3  2  1  0
-          0  0  1  0  1  0  1  1
-```
+    bitmap
+    j mod 8   7  6  5  4  3  2  1  0
+              0  0  1  0  1  0  1  1
 
 Arrays having a 0 null count may choose to not allocate the null
 bitmap. Implementations may choose to always allocate one anyway as a matter of
@@ -186,7 +190,8 @@ convenience, but this should be noted when memory is being shared.
 Nested type arrays have their own null bitmap and null count regardless of
 the null count and null bits of their child arrays.
 
-## Primitive value arrays
+Primitive value arrays
+----------------------
 
 A primitive value array represents a fixed-length array of values each having
 the same physical slot width typically measured in bytes, though the spec also
@@ -200,66 +205,64 @@ The associated null bitmap is contiguously allocated (as described above) but
 does not need to be adjacent in memory to the values buffer.
 
 
-### Example Layout: Int32 Array
-For example a primitive array of int32s:
+Example Layout: Int32 Array
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-[1, null, 2, 4, 8]
+For example a primitive array of int32s: ::
 
-Would look like:
+    [1, null, 2, 4, 8]
 
-```
-* Length: 5, Null count: 1
-* Null bitmap buffer:
+Would look like: ::
 
-  |Byte 0 (validity bitmap) | Bytes 1-63            |
-  |-------------------------|-----------------------|
-  | 00011101                | 0 (padding)           |
+    * Length: 5, Null count: 1
+    * Null bitmap buffer:
 
-* Value Buffer:
+      |Byte 0 (validity bitmap) | Bytes 1-63            |
+      |-------------------------|-----------------------|
+      | 00011101                | 0 (padding)           |
+
+    * Value Buffer:
 
-  |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-63 |
-  |------------|-------------|-------------|-------------|-------------|-------------|
-  | 1          | unspecified | 2           | 4           | 8           | unspecified |
-```
+      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-63 |
+      |------------|-------------|-------------|-------------|-------------|-------------|
+      | 1          | unspecified | 2           | 4           | 8           | unspecified |
 
-### Example Layout: Non-null int32 Array
+Example Layout: Non-null int32 Array
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-[1, 2, 3, 4, 8] has two possible layouts:
+``[1, 2, 3, 4, 8]`` has two possible layouts: ::
 
-```
-* Length: 5, Null count: 0
-* Null bitmap buffer:
+    * Length: 5, Null count: 0
+    * Null bitmap buffer:
 
-  | Byte 0 (validity bitmap) | Bytes 1-63            |
-  |--------------------------|-----------------------|
-  | 00011111                 | 0 (padding)           |
+      | Byte 0 (validity bitmap) | Bytes 1-63            |
+      |--------------------------|-----------------------|
+      | 00011111                 | 0 (padding)           |
 
-* Value Buffer:
+    * Value Buffer:
 
-  |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | bytes 12-15 | bytes 16-19 | Bytes 20-63 |
-  |------------|-------------|-------------|-------------|-------------|-------------|
-  | 1          | 2           | 3           | 4           | 8           | unspecified |
-```
+      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | bytes 12-15 | bytes 16-19 | Bytes 20-63 |
+      |------------|-------------|-------------|-------------|-------------|-------------|
+      | 1          | 2           | 3           | 4           | 8           | unspecified |
 
-or with the bitmap elided:
+or with the bitmap elided: ::
 
-```
-* Length 5, Null count: 0
-* Null bitmap buffer: Not required
-* Value Buffer:
+    * Length 5, Null count: 0
+    * Null bitmap buffer: Not required
+    * Value Buffer:
 
-  |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | bytes 12-15 | bytes 16-19 | Bytes 20-63 |
-  |------------|-------------|-------------|-------------|-------------|-------------|
-  | 1          | 2           | 3           | 4           | 8           | unspecified |
-```
+      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | bytes 12-15 | bytes 16-19 | Bytes 20-63 |
+      |------------|-------------|-------------|-------------|-------------|-------------|
+      | 1          | 2           | 3           | 4           | 8           | unspecified |
 
-## List type
+List type
+---------
 
 List is a nested type in which each array slot contains a variable-size
 sequence of values all having the same relative type (heterogeneity can be
 achieved through unions, described later).
 
-A list type is specified like `List<T>`, where `T` is any relative type
+A list type is specified like ``List<T>``, where ``T`` is any relative type
 (primitive or nested).
 
 A list-array is represented by the combination of the following:
@@ -267,93 +270,92 @@ A list-array is represented by the combination of the following:
 * A values array, a child array of type T. T may also be a nested type.
 * An offsets buffer containing 32-bit signed integers with length equal to the
   length of the top-level array plus one. Note that this limits the size of the
-  values array to 2<sup>31</sup>-1.
+  values array to 2 :sup:`31` -1.
 
 The offsets array encodes a start position in the values array, and the length
 of the value in each slot is computed using the first difference with the next
 element in the offsets array. For example, the position and length of slot j is
-computed as:
+computed as: ::
 
-```
-slot_position = offsets[j]
-slot_length = offsets[j + 1] - offsets[j]  // (for 0 <= j < length)
-```
+    slot_position = offsets[j]
+    slot_length = offsets[j + 1] - offsets[j]  // (for 0 <= j < length)
 
 The first value in the offsets array is 0, and the last element is the length
 of the values array.
 
-### Example Layout: `List<Char>` Array
-Let's consider an example, the type `List<Char>`, where Char is a 1-byte
+Example Layout: ``List<Char>`` Array
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Let's consider an example, the type ``List<Char>``, where Char is a 1-byte
 logical type.
 
-For an array of length 4 with respective values:
+For an array of length 4 with respective values: ::
 
-[['j', 'o', 'e'], null, ['m', 'a', 'r', 'k'], []]
+    [['j', 'o', 'e'], null, ['m', 'a', 'r', 'k'], []]
 
-will have the following representation:
+will have the following representation: ::
 
-```
-* Length: 4, Null count: 1
-* Null bitmap buffer:
+    * Length: 4, Null count: 1
+    * Null bitmap buffer:
 
-  | Byte 0 (validity bitmap) | Bytes 1-63            |
-  |--------------------------|-----------------------|
-  | 00001101                 | 0 (padding)           |
+      | Byte 0 (validity bitmap) | Bytes 1-63            |
+      |--------------------------|-----------------------|
+      | 00001101                 | 0 (padding)           |
 
-* Offsets buffer (int32)
+    * Offsets buffer (int32)
 
-  | Bytes 0-3  | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-63 |
-  |------------|-------------|-------------|-------------|-------------|-------------|
-  | 0          | 3           | 3           | 7           | 7           | unspecified |
+      | Bytes 0-3  | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-63 |
+      |------------|-------------|-------------|-------------|-------------|-------------|
+      | 0          | 3           | 3           | 7           | 7           | unspecified |
 
-* Values array (char array):
-  * Length: 7,  Null count: 0
-  * Null bitmap buffer: Not required
+    * Values array (char array):
+      * Length: 7,  Null count: 0
+      * Null bitmap buffer: Not required
 
-    | Bytes 0-6  | Bytes 7-63  |
-    |------------|-------------|
-    | joemark    | unspecified |
-```
+        | Bytes 0-6  | Bytes 7-63  |
+        |------------|-------------|
+        | joemark    | unspecified |
 
-### Example Layout: `List<List<byte>>`
-[[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], [[9, 10]]]
+Example Layout: ``List<List<byte>>``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-will be be represented as follows:
+``[[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], [[9, 10]]]``
 
-```
-* Length 3
-* Nulls count: 0
-* Null bitmap buffer: Not required
-* Offsets buffer (int32)
+will be be represented as follows: ::
 
-  | Bytes 0-3  | Bytes 4-7  | Bytes 8-11 | Bytes 12-15 | Bytes 16-63 |
-  |------------|------------|------------|-------------|-------------|
-  | 0          |  2         |  5         |  6          | unspecified |
+    * Length 3
+    * Nulls count: 0
+    * Null bitmap buffer: Not required
+    * Offsets buffer (int32)
 
-* Values array (`List<byte>`)
-  * Length: 6, Null count: 1
-  * Null bitmap buffer:
+      | Bytes 0-3  | Bytes 4-7  | Bytes 8-11 | Bytes 12-15 | Bytes 16-63 |
+      |------------|------------|------------|-------------|-------------|
+      | 0          |  2         |  5         |  6          | unspecified |
 
-    | Byte 0 (validity bitmap) | Bytes 1-63  |
-    |--------------------------|-------------|
-    | 00110111                 | 0 (padding) |
+    * Values array (`List<byte>`)
+      * Length: 6, Null count: 1
+      * Null bitmap buffer:
 
-  * Offsets buffer (int32)
+        | Byte 0 (validity bitmap) | Bytes 1-63  |
+        |--------------------------|-------------|
+        | 00110111                 | 0 (padding) |
 
-    | Bytes 0-27           | Bytes 28-63 |
-    |----------------------|-------------|
-    | 0, 2, 4, 7, 7, 8, 10 | unspecified |
+      * Offsets buffer (int32)
 
-  * Values array (bytes):
-    * Length: 10, Null count: 0
-    * Null bitmap buffer: Not required
+        | Bytes 0-27           | Bytes 28-63 |
+        |----------------------|-------------|
+        | 0, 2, 4, 7, 7, 8, 10 | unspecified |
+
+      * Values array (bytes):
+        * Length: 10, Null count: 0
+        * Null bitmap buffer: Not required
 
-      | Bytes 0-9                     | Bytes 10-63 |
-      |-------------------------------|-------------|
-      | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 | unspecified |
-```
+          | Bytes 0-9                     | Bytes 10-63 |
+          |-------------------------------|-------------|
+          | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 | unspecified |
 
-## Struct type
+Struct type
+-----------
 
 A struct is a nested type parameterized by an ordered sequence of relative
 types (which can all be distinct), called its fields.
@@ -367,69 +369,66 @@ A struct array must still have an allocated null bitmap, if it has one or more n
 Physically, a struct type has one child array for each field. The child arrays are independent and need not be adjacent to each other in memory.
 
 For example, the struct (field names shown here as strings for illustration
-purposes)
+purposes)::
 
-```
-Struct <
-  name: String (= List<char>),
-  age: Int32
->
-```
+    Struct <
+      name: String (= List<char>),
+      age: Int32
+    >
 
-has two child arrays, one List<char> array (layout as above) and one 4-byte
-primitive value array having Int32 logical type.
+has two child arrays, one ``List<char>`` array (layout as above) and one 4-byte
+primitive value array having ``Int32`` logical type.
 
-### Example Layout: `Struct<List<char>, Int32>`:
-The layout for [{'joe', 1}, {null, 2}, null, {'mark', 4}] would be:
+Example Layout: ``Struct<List<char>, Int32>``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-```
-* Length: 4, Null count: 1
-* Null bitmap buffer:
+The layout for ``[{'joe', 1}, {null, 2}, null, {'mark', 4}]`` would be: ::
 
-  |Byte 0 (validity bitmap) | Bytes 1-63            |
-  |-------------------------|-----------------------|
-  | 00001011                | 0 (padding)           |
-
-* Children arrays:
-  * field-0 array (`List<char>`):
-    * Length: 4, Null count: 2
+    * Length: 4, Null count: 1
     * Null bitmap buffer:
 
-      | Byte 0 (validity bitmap) | Bytes 1-63            |
-      |--------------------------|-----------------------|
-      | 00001001                 | 0 (padding)           |
-
-    * Offsets buffer:
+      |Byte 0 (validity bitmap) | Bytes 1-63            |
+      |-------------------------|-----------------------|
+      | 00001011                | 0 (padding)           |
 
-      | Bytes 0-19     |
-      |----------------|
-      | 0, 3, 3, 3, 7  |
+    * Children arrays:
+      * field-0 array (`List<char>`):
+        * Length: 4, Null count: 2
+        * Null bitmap buffer:
 
-     * Values array:
-        * Length: 7, Null count: 0
-        * Null bitmap buffer: Not required
+          | Byte 0 (validity bitmap) | Bytes 1-63            |
+          |--------------------------|-----------------------|
+          | 00001001                 | 0 (padding)           |
 
-        * Value buffer:
+        * Offsets buffer:
 
-          | Bytes 0-6      |
+          | Bytes 0-19     |
           |----------------|
-          | joemark        |
+          | 0, 3, 3, 3, 7  |
 
-  * field-1 array (int32 array):
-    * Length: 4, Null count: 1
-    * Null bitmap buffer:
+         * Values array:
+            * Length: 7, Null count: 0
+            * Null bitmap buffer: Not required
 
-      | Byte 0 (validity bitmap) | Bytes 1-63            |
-      |--------------------------|-----------------------|
-      | 00001011                 | 0 (padding)           |
+            * Value buffer:
 
-    * Value Buffer:
+              | Bytes 0-6      |
+              |----------------|
+              | joemark        |
 
-      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-63 |
-      |------------|-------------|-------------|-------------|-------------|
-      | 1          | 2           | unspecified | 4           | unspecified |
+      * field-1 array (int32 array):
+        * Length: 4, Null count: 1
+        * Null bitmap buffer:
 
-```
+          | Byte 0 (validity bitmap) | Bytes 1-63            |
+          |--------------------------|-----------------------|
+          | 00001011                 | 0 (padding)           |
+
+        * Value Buffer:
+
+          |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-63 |
+          |------------|-------------|-------------|-------------|-------------|
+          | 1          | 2           | unspecified | 4           | unspecified |
 
 While a struct does not have physical storage for each of its semantic slots
 (i.e. each scalar C-like struct), an entire struct slot can be set to null via
@@ -444,7 +443,8 @@ for the null struct but are 'hidden' from the consumer by the parent array's
 null bitmap.  However, when treated independently corresponding
 values of the children array will be non-null.
 
-## Dense union type
+Dense union type
+----------------
 
 A dense union is semantically similar to a struct, and contains an ordered
 sequence of relative types. While a struct contains multiple arrays, a union is
@@ -466,58 +466,58 @@ of overhead for each value. Its physical layout is as follows:
   offsets for each child value array must be in order / increasing.
 
 Critically, the dense union allows for minimal overhead in the ubiquitous
-union-of-structs with non-overlapping-fields use case (`Union<s1: Struct1, s2:
-Struct2, s3: Struct3, ...>`)
+union-of-structs with non-overlapping-fields use case (``Union<s1: Struct1, s2:
+Struct2, s3: Struct3, ...>``)
 
-### Example Layout: Dense union
+Example Layout: Dense union
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 An example layout for logical union of:
-`Union<f: float, i: int32>` having the values:
-[{f=1.2}, null, {f=3.4}, {i=5}]
+``Union<f: float, i: int32>`` having the values:
+``[{f=1.2}, null, {f=3.4}, {i=5}]``::
 
-```
-* Length: 4, Null count: 1
-* Null bitmap buffer:
-  |Byte 0 (validity bitmap) | Bytes 1-63            |
-  |-------------------------|-----------------------|
-  |00001101                 | 0 (padding)           |
+    * Length: 4, Null count: 1
+    * Null bitmap buffer:
+      |Byte 0 (validity bitmap) | Bytes 1-63            |
+      |-------------------------|-----------------------|
+      |00001101                 | 0 (padding)           |
 
-* Types buffer:
+    * Types buffer:
 
-  |Byte 0   | Byte 1      | Byte 2   | Byte 3   | Bytes 4-63  |
-  |---------|-------------|----------|----------|-------------|
-  | 0       | unspecified | 0        | 1        | unspecified |
+      |Byte 0   | Byte 1      | Byte 2   | Byte 3   | Bytes 4-63  |
+      |---------|-------------|----------|----------|-------------|
+      | 0       | unspecified | 0        | 1        | unspecified |
 
-* Offset buffer:
+    * Offset buffer:
 
-  |Byte 0-3 | Byte 4-7    | Byte 8-11 | Byte 12-15 | Bytes 16-63 |
-  |---------|-------------|-----------|------------|-------------|
-  | 0       | unspecified | 1         | 0          | unspecified |
+      |Byte 0-3 | Byte 4-7    | Byte 8-11 | Byte 12-15 | Bytes 16-63 |
+      |---------|-------------|-----------|------------|-------------|
+      | 0       | unspecified | 1         | 0          | unspecified |
 
-* Children arrays:
-  * Field-0 array (f: float):
-    * Length: 2, nulls: 0
-    * Null bitmap buffer: Not required
+    * Children arrays:
+      * Field-0 array (f: float):
+        * Length: 2, nulls: 0
+        * Null bitmap buffer: Not required
 
-    * Value Buffer:
+        * Value Buffer:
 
-      | Bytes 0-7 | Bytes 8-63  |
-      |-----------|-------------|
-      | 1.2, 3.4  | unspecified |
+          | Bytes 0-7 | Bytes 8-63  |
+          |-----------|-------------|
+          | 1.2, 3.4  | unspecified |
 
 
-  * Field-1 array (i: int32):
-    * Length: 1, nulls: 0
-    * Null bitmap buffer: Not required
+      * Field-1 array (i: int32):
+        * Length: 1, nulls: 0
+        * Null bitmap buffer: Not required
 
-    * Value Buffer:
+        * Value Buffer:
 
-      | Bytes 0-3 | Bytes 4-63  |
-      |-----------|-------------|
-      | 5         | unspecified |
-```
+          | Bytes 0-3 | Bytes 4-63  |
+          |-----------|-------------|
+          | 5         | unspecified |
 
-## Sparse union type
+Sparse union type
+-----------------
 
 A sparse union has the same structure as a dense union, with the omission of
 the offsets array. In this case, the child arrays are each equal in length to
@@ -529,75 +529,75 @@ union, it has some advantages that may be desirable in certain use cases:
 * A sparse union is more amenable to vectorized expression evaluation in some use cases.
 * Equal-length arrays can be interpreted as a union by only defining the types array.
 
-### Example layout: `SparseUnion<u0: Int32, u1: Float, u2: List<Char>>`
+Example layout: ``SparseUnion<u0: Int32, u1: Float, u2: List<Char>>``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-For the union array:
+For the union array: ::
 
-[{u0=5}, {u1=1.2}, {u2='joe'}, {u1=3.4}, {u0=4}, {u2='mark'}]
+    [{u0=5}, {u1=1.2}, {u2='joe'}, {u1=3.4}, {u0=4}, {u2='mark'}]
 
-will have the following layout:
-```
-* Length: 6, Null count: 0
-* Null bitmap buffer: Not required
+will have the following layout: ::
 
-* Types buffer:
+    * Length: 6, Null count: 0
+    * Null bitmap buffer: Not required
 
- | Byte 0     | Byte 1      | Byte 2      | Byte 3      | Byte 4      | Byte 5       | Bytes  6-63           |
- |------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
- | 0          | 1           | 2           | 1           | 0           | 2            | unspecified (padding) |
+    * Types buffer:
 
-* Children arrays:
+     | Byte 0     | Byte 1      | Byte 2      | Byte 3      | Byte 4      | Byte 5       | Bytes  6-63           |
+     |------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
+     | 0          | 1           | 2           | 1           | 0           | 2            | unspecified (padding) |
 
-  * u0 (Int32):
-    * Length: 6, Null count: 4
-    * Null bitmap buffer:
+    * Children arrays:
 
-      |Byte 0 (validity bitmap) | Bytes 1-63            |
-      |-------------------------|-----------------------|
-      |00010001                 | 0 (padding)           |
+      * u0 (Int32):
+        * Length: 6, Null count: 4
+        * Null bitmap buffer:
 
-    * Value buffer:
+          |Byte 0 (validity bitmap) | Bytes 1-63            |
+          |-------------------------|-----------------------|
+          |00010001                 | 0 (padding)           |
 
-      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23  | Bytes 24-63           |
-      |------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
-      | 5          | unspecified | unspecified | unspecified | 4           |  unspecified | unspecified (padding) |
+        * Value buffer:
 
-  * u1 (float):
-    * Length: 6, Null count: 4
-    * Null bitmap buffer:
+          |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23  | Bytes 24-63           |
+          |------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
+          | 5          | unspecified | unspecified | unspecified | 4           |  unspecified | unspecified (padding) |
 
-      |Byte 0 (validity bitmap) | Bytes 1-63            |
-      |-------------------------|-----------------------|
-      | 00001010                | 0 (padding)           |
+      * u1 (float):
+        * Length: 6, Null count: 4
+        * Null bitmap buffer:
 
-    * Value buffer:
+          |Byte 0 (validity bitmap) | Bytes 1-63            |
+          |-------------------------|-----------------------|
+          | 00001010                | 0 (padding)           |
 
-      |Bytes 0-3    | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23  | Bytes 24-63           |
-      |-------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
-      | unspecified |  1.2        | unspecified | 3.4         | unspecified |  unspecified | unspecified (padding) |
+        * Value buffer:
 
-  * u2 (`List<char>`)
-    * Length: 6, Null count: 4
-    * Null bitmap buffer:
+          |Bytes 0-3    | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23  | Bytes 24-63           |
+          |-------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
+          | unspecified |  1.2        | unspecified | 3.4         | unspecified |  unspecified | unspecified (padding) |
 
-      | Byte 0 (validity bitmap) | Bytes 1-63            |
-      |--------------------------|-----------------------|
-      | 00100100                 | 0 (padding)           |
+      * u2 (`List<char>`)
+        * Length: 6, Null count: 4
+        * Null bitmap buffer:
 
-    * Offsets buffer (int32)
+          | Byte 0 (validity bitmap) | Bytes 1-63            |
+          |--------------------------|-----------------------|
+          | 00100100                 | 0 (padding)           |
 
-      | Bytes 0-3  | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23 | Bytes 24-27 | Bytes 28-63 |
-      |------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|
-      | 0          | 0           | 0           | 3           | 3           | 3           | 7           | unspecified |
+        * Offsets buffer (int32)
 
-    * Values array (char array):
-      * Length: 7,  Null count: 0
-      * Null bitmap buffer: Not required
+          | Bytes 0-3  | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23 | Bytes 24-27 | Bytes 28-63 |
+          |------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|
+          | 0          | 0           | 0           | 3           | 3           | 3           | 7           | unspecified |
 
-        | Bytes 0-7  | Bytes 8-63            |
-        |------------|-----------------------|
-        | joemark    | unspecified (padding) |
-```
+        * Values array (char array):
+          * Length: 7,  Null count: 0
+          * Null bitmap buffer: Not required
+
+            | Bytes 0-7  | Bytes 8-63            |
+            |------------|-----------------------|
+            | joemark    | unspecified (padding) |
 
 Note that nested types in a sparse union must be internally consistent
 (e.g. see the List in the diagram), i.e. random access at any index j
@@ -610,55 +610,55 @@ even if the null bitmap of the parent union array indicates the slot is
 null.  Additionally, a child array may have a non-null slot even if
 the types array indicates that a slot contains a different type at the index.
 
-## Dictionary encoding
+Dictionary encoding
+-------------------
 
 When a field is dictionary encoded, the values are represented by an array of
 Int32 representing the index of the value in the dictionary.  The Dictionary is
 received as one or more DictionaryBatches with the id referenced by a
-dictionary attribute defined in the metadata ([Message.fbs][7]) in the Field
+dictionary attribute defined in the metadata (Message.fbs) in the Field
 table.  The dictionary has the same layout as the type of the field would
 dictate. Each entry in the dictionary can be accessed by its index in the
 DictionaryBatches.  When a Schema references a Dictionary id, it must send at
 least one DictionaryBatch for this id.
 
-As an example, you could have the following data:
-```
-type: List<String>
-
-[
- ['a', 'b'],
- ['a', 'b'],
- ['a', 'b'],
- ['c', 'd', 'e'],
- ['c', 'd', 'e'],
- ['c', 'd', 'e'],
- ['c', 'd', 'e'],
- ['a', 'b']
-]
-```
-In dictionary-encoded form, this could appear as:
-```
-data List<String> (dictionary-encoded, dictionary id i)
-indices: [0, 0, 0, 1, 1, 1, 0]
-
-dictionary i
-
-type: List<String>
-
-[
- ['a', 'b'],
- ['c', 'd', 'e'],
-]
-```
-
-## References
-
-Apache Drill Documentation - [Value Vectors][6]
-
-[1]: https://en.wikipedia.org/wiki/Bit_numbering
-[2]: https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors
-[3]: https://en.wikipedia.org/wiki/Endianness
-[4]: https://software.intel.com/en-us/node/600110
-[5]: https://parquet.apache.org/documentation/latest/
-[6]: https://drill.apache.org/docs/value-vectors/
-[7]: https://github.com/apache/arrow/blob/master/format/Message.fbs
+As an example, you could have the following data: ::
+
+    type: List<String>
+
+    [
+     ['a', 'b'],
+     ['a', 'b'],
+     ['a', 'b'],
+     ['c', 'd', 'e'],
+     ['c', 'd', 'e'],
+     ['c', 'd', 'e'],
+     ['c', 'd', 'e'],
+     ['a', 'b']
+    ]
+
+In dictionary-encoded form, this could appear as: ::
+
+    data List<String> (dictionary-encoded, dictionary id i)
+    indices: [0, 0, 0, 1, 1, 1, 0]
+
+    dictionary i
+
+    type: List<String>
+
+    [
+     ['a', 'b'],
+     ['c', 'd', 'e'],
+    ]
+
+References
+----------
+
+Apache Drill Documentation - `Value Vectors`_
+
+.. _least-significant bit (LSB) numbering: https://en.wikipedia.org/wiki/Bit_numbering
+.. _Intel performance guide: https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors
+.. _Endianness: https://en.wikipedia.org/wiki/Endianness
+.. _SIMD: https://software.intel.com/en-us/node/600110
+.. _Parquet: https://parquet.apache.org/documentation/latest/
+.. _Value Vectors: https://drill.apache.org/docs/value-vectors/
diff --git a/format/Message.fbs b/docs/source/format/Message.fbs
similarity index 100%
rename from format/Message.fbs
rename to docs/source/format/Message.fbs
diff --git a/docs/source/format/Metadata.rst b/docs/source/format/Metadata.rst
new file mode 100644
index 0000000..4ed82e0
--- /dev/null
+++ b/docs/source/format/Metadata.rst
@@ -0,0 +1,394 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Metadata: Logical types, schemas, data headers
+==============================================
+
+This is documentation for the Arrow metadata specification, which enables
+systems to communicate the
+
+* Logical array types (which are implemented using the physical memory layouts
+  specified in :doc:`Layout`)
+
+* Schemas for table-like collections of Arrow data structures
+
+* "Data headers" indicating the physical locations of memory buffers sufficient
+  to reconstruct a Arrow data structures without copying memory.
+
+Canonical implementation
+------------------------
+
+We are using `Flatbuffers`_ for low-overhead reading and writing of the Arrow
+metadata. See ``Message.fbs``.
+
+Schemas
+-------
+
+The ``Schema`` type describes a table-like structure consisting of any number of
+Arrow arrays, each of which can be interpreted as a column in the table. A
+schema by itself does not describe the physical structure of any particular set
+of data.
+
+A schema consists of a sequence of **fields**, which are metadata describing
+the columns. The Flatbuffers IDL for a field is: ::
+
+    table Field {
+      // Name is not required, in i.e. a List
+      name: string;
+      nullable: bool;
+      type: Type;
+
+      // Present only if the field is dictionary encoded
+      dictionary: DictionaryEncoding;
+
+      // children apply only to Nested data types like Struct, List and Union
+      children: [Field];
+
+      // User-defined metadata
+      custom_metadata: [ KeyValue ];
+    }
+
+The ``type`` is the logical type of the field. Nested types, such as List,
+Struct, and Union, have a sequence of child fields.
+
+A JSON representation of the schema is also provided:
+
+Field: ::
+
+    {
+      "name" : "name_of_the_field",
+      "nullable" : false,
+      "type" : /* Type */,
+      "children" : [ /* Field */ ],
+    }
+
+Type: ::
+
+    {
+      "name" : "null|struct|list|union|int|floatingpoint|utf8|binary|fixedsizebinary|bool|decimal|date|time|timestamp|interval"
+      // fields as defined in the Flatbuffer depending on the type name
+    }
+
+Union: ::
+
+    {
+      "name" : "union",
+      "mode" : "Sparse|Dense",
+      "typeIds" : [ /* integer */ ]
+    }
+
+The ``typeIds`` field in the Union are the codes used to denote each type, which
+may be different from the index of the child array. This is so that the union
+type ids do not have to be enumerated from 0.
+
+Int: ::
+
+    {
+      "name" : "int",
+      "bitWidth" : /* integer */,
+      "isSigned" : /* boolean */
+    }
+
+FloatingPoint: ::
+
+    {
+      "name" : "floatingpoint",
+      "precision" : "HALF|SINGLE|DOUBLE"
+    }
+
+Decimal: ::
+
+    {
+      "name" : "decimal",
+      "precision" : /* integer */,
+      "scale" : /* integer */
+    }
+
+Timestamp: ::
+
+    {
+      "name" : "timestamp",
+      "unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND"
+    }
+
+Date: ::
+
+    {
+      "name" : "date",
+      "unit" : "DAY|MILLISECOND"
+    }
+
+Time: ::
+
+    {
+      "name" : "time",
+      "unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND",
+      "bitWidth": /* integer: 32 or 64 */
+    }
+
+Interval: ::
+
+    {
+      "name" : "interval",
+      "unit" : "YEAR_MONTH|DAY_TIME"
+    }
+
+Schema: ::
+
+    {
+      "fields" : [
+        /* Field */
+      ]
+    }
+
+Record data headers
+-------------------
+
+A record batch is a collection of top-level named, equal length Arrow arrays
+(or vectors). If one of the arrays contains nested data, its child arrays are
+not required to be the same length as the top-level arrays.
+
+One can be thought of as a realization of a particular schema. The metadata
+describing a particular record batch is called a "data header". Here is the
+Flatbuffers IDL for a record batch data header: ::
+
+    table RecordBatch {
+      length: long;
+      nodes: [FieldNode];
+      buffers: [Buffer];
+    }
+
+The ``RecordBatch`` metadata provides for record batches with length exceeding
+2 :sup:`31` - 1, but Arrow implementations are not required to implement support
+beyond this size.
+
+The ``nodes`` and ``buffers`` fields are produced by a depth-first traversal /
+flattening of a schema (possibly containing nested types) for a given in-memory
+data set.
+
+Buffers
+~~~~~~~
+
+A buffer is metadata describing a contiguous memory region relative to some
+virtual address space. This may include:
+
+* Shared memory, e.g. a memory-mapped file
+* An RPC message received in-memory
+* Data in a file
+
+The key form of the Buffer type is: ::
+
+    struct Buffer {
+      offset: long;
+      length: long;
+    }
+
+In the context of a record batch, each field has some number of buffers
+associated with it, which are derived from their physical memory layout.
+
+Each logical type (separate from its children, if it is a nested type) has a
+deterministic number of buffers associated with it. These will be specified in
+the logical types section.
+
+Field metadata
+~~~~~~~~~~~~~~
+
+The ``FieldNode`` values contain metadata about each level in a nested type
+hierarchy. ::
+
+    struct FieldNode {
+      /// The number of value slots in the Arrow array at this level of a nested
+      /// tree
+      length: long;
+
+      /// The number of observed nulls.
+      null_count: lohng;
+    }
+
+The ``FieldNode`` metadata provides for fields with length exceeding 2 :sup:`31` - 1,
+but Arrow implementations are not required to implement support for large
+arrays.
+
+Flattening of nested data
+-------------------------
+
+Nested types are flattened in the record batch in depth-first order. When
+visiting each field in the nested type tree, the metadata is appended to the
+top-level ``fields`` array and the buffers associated with that field (but not
+its children) are appended to the ``buffers`` array.
+
+For example, let's consider the schema ::
+
+    col1: Struct<a: Int32, b: List<Int64>, c: Float64>
+    col2: Utf8
+
+The flattened version of this is: ::
+
+    FieldNode 0: Struct name='col1'
+    FieldNode 1: Int32 name=a'
+    FieldNode 2: List name='b'
+    FieldNode 3: Int64 name='item'  # arbitrary
+    FieldNode 4: Float64 name='c'
+    FieldNode 5: Utf8 name='col2'
+
+For the buffers produced, we would have the following (as described in more
+detail for each type below): ::
+
+    buffer 0: field 0 validity bitmap
+
+    buffer 1: field 1 validity bitmap
+    buffer 2: field 1 values <int32_t*>
+
+    buffer 3: field 2 validity bitmap
+    buffer 4: field 2 list offsets <int32_t*>
+
+    buffer 5: field 3 validity bitmap
+    buffer 6: field 3 values <int64_t*>
+
+    buffer 7: field 4 validity bitmap
+    buffer 8: field 4 values <double*>
+
+    buffer 9: field 5 validity bitmap
+    buffer 10: field 5 offsets <int32_t*>
+    buffer 11: field 5 data <uint8_t*>
+
+Logical types
+-------------
+
+A logical type consists of a type name and metadata along with an explicit
+mapping to a physical memory representation. These may fall into some different
+categories:
+
+* Types represented as fixed-width primitive arrays (for example: C-style
+  integers and floating point numbers)
+* Types having equivalent memory layout to a physical nested type (e.g. strings
+  use the list representation, but logically are not nested types)
+
+Integers
+~~~~~~~~
+
+In the first version of Arrow we provide the standard 8-bit through 64-bit size
+standard C integer types, both signed and unsigned:
+
+* Signed types: Int8, Int16, Int32, Int64
+* Unsigned types: UInt8, UInt16, UInt32, UInt64
+
+The IDL looks like: ::
+
+    table Int {
+      bitWidth: int;
+      is_signed: bool;
+    }
+
+The integer endianness is currently set globally at the schema level. If a
+schema is set to be little-endian, then all integer types occurring within must
+be little-endian. Integers that are part of other data representations, such as
+list offsets and union types, must have the same endianness as the entire
+record batch.
+
+Floating point numbers
+~~~~~~~~~~~~~~~~~~~~~~
+
+We provide 3 types of floating point numbers as fixed bit-width primitive array
+
+- Half precision, 16-bit width
+- Single precision, 32-bit width
+- Double precision, 64-bit width
+
+The IDL looks like: ::
+
+    enum Precision:int {HALF, SINGLE, DOUBLE}
+
+    table FloatingPoint {
+      precision: Precision;
+    }
+
+Boolean
+~~~~~~~
+
+The Boolean logical type is represented as a 1-bit wide primitive physical
+type. The bits are numbered using least-significant bit (LSB) ordering.
+
+Like other fixed bit-width primitive types, boolean data appears as 2 buffers
+in the data header (one bitmap for the validity vector and one for the values).
+
+List
+~~~~
+
+The ``List`` logical type is the logical (and identically-named) counterpart to
+the List physical type.
+
+In data header form, the list field node contains 2 buffers:
+
+* Validity bitmap
+* List offsets
+
+The buffers associated with a list's child field are handled recursively
+according to the child logical type (e.g. ``List<Utf8>`` vs. ``List<Boolean>``).
+
+Utf8 and Binary
+~~~~~~~~~~~~~~~
+
+We specify two logical types for variable length bytes:
+
+* ``Utf8`` data is Unicode values with UTF-8 encoding
+* ``Binary`` is any other variable length bytes
+
+These types both have the same memory layout as the nested type ``List<UInt8>``,
+with the constraint that the inner bytes can contain no null values. From a
+logical type perspective they are primitive, not nested types.
+
+In data header form, while ``List<UInt8>`` would appear as 2 field nodes (``List``
+and ``UInt8``) and 4 buffers (2 for each of the nodes, as per above), these types
+have a simplified representation single field node (of ``Utf8`` or ``Binary``
+logical type, which have no children) and 3 buffers:
+
+* Validity bitmap
+* List offsets
+* Byte data
+
+Decimal
+~~~~~~~
+
+Decimals are represented as a 2's complement 128-bit (16 byte) signed integer
+in little-endian byte order.
+
+Timestamp
+~~~~~~~~~
+
+All timestamps are stored as a 64-bit integer, with one of four unit
+resolutions: second, millisecond, microsecond, and nanosecond.
+
+Date
+~~~~
+
+We support two different date types:
+
+* Days since the UNIX epoch as a 32-bit integer
+* Milliseconds since the UNIX epoch as a 64-bit integer
+
+Time
+~~~~
+
+Time supports the same unit resolutions: second, millisecond, microsecond, and
+nanosecond. We represent time as the smallest integer accommodating the
+indicated unit. For second and millisecond: 32-bit, for the others 64-bit.
+
+Dictionary encoding
+-------------------
+
+.. _Flatbuffers: http://github.com/google/flatbuffers
diff --git a/docs/source/format/README.rst b/docs/source/format/README.rst
new file mode 100644
index 0000000..f2f770b
--- /dev/null
+++ b/docs/source/format/README.rst
@@ -0,0 +1,53 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Arrow specification documents
+=============================
+
+Currently, the Arrow specification consists of these pieces:
+
+- Metadata specification (see :doc:`Metadata`)
+- Physical memory layout specification (see :doc:`Layout`)
+- Logical Types, Schemas, and Record Batch Metadata (see Schema.fbs)
+- Encapsulated Messages (see Message.fbs)
+- Mechanics of messaging between Arrow systems (IPC, RPC, etc.) (see :doc:`IPC`)
+- Tensor (Multi-dimensional array) Metadata (see Tensor.fbs)
+
+The metadata currently uses Google's `flatbuffers library`_ for serializing a
+couple related pieces of information:
+
+- Schemas for tables or record (row) batches. This contains the logical types,
+  field names, and other metadata. Schemas do not contain any information about
+  actual data.
+- *Data headers* for record (row) batches. These must correspond to a known
+  schema, and enable a system to send and receive Arrow row batches in a form
+  that can be precisely disassembled or reconstructed.
+
+Arrow Format Maturity and Stability
+-----------------------------------
+
+We have made significant progress hardening the Arrow in-memory format and
+Flatbuffer metadata since the project started in February 2016. We have
+integration tests which verify binary compatibility between the Java and C++
+implementations, for example.
+
+Major versions may still include breaking changes to the memory format or
+metadata, so it is recommended to use the same released version of all
+libraries in your applications for maximum compatibility. Data stored in the
+Arrow IPC formats should not be used for long term storage.
+
+.. _flatbuffers library: http://github.com/google/flatbuffers
diff --git a/format/Schema.fbs b/docs/source/format/Schema.fbs
similarity index 100%
rename from format/Schema.fbs
rename to docs/source/format/Schema.fbs
diff --git a/format/Tensor.fbs b/docs/source/format/Tensor.fbs
similarity index 100%
rename from format/Tensor.fbs
rename to docs/source/format/Tensor.fbs
diff --git a/python/doc/source/index.rst b/docs/source/index.rst
similarity index 62%
copy from python/doc/source/index.rst
copy to docs/source/index.rst
index 712b105..fa6c683 100644
--- a/python/doc/source/index.rst
+++ b/docs/source/index.rst
@@ -15,8 +15,8 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-Python bindings for Apache Arrow
-================================
+Apache Arrow
+============
 
 Apache Arrow is a cross-language development platform for in-memory data. It
 specifies a standardized language-independent columnar memory format for flat
@@ -24,31 +24,19 @@ and hierarchical data, organized for efficient analytic operations on modern
 hardware. It also provides computational libraries and zero-copy streaming
 messaging and interprocess communication.
 
-The Arrow Python bindings have first-class integration with NumPy, pandas, and
-built-in Python objects.
+.. toctree::
+   :maxdepth: 1
+   :caption: Memory Format
 
-This is the documentation of the Python API of Apache Arrow. For more details
-on the format and other language bindings see
-`the main page for Arrow <https://arrow.apache.org/>`_. Here will we only
-detail the usage of the Python API for Arrow and the leaf libraries that add
-additional functionality such as reading Apache Parquet files into Arrow
-structures.
+   format/README
+   format/Guidelines
+   format/Layout
+   format/Metadata
+   format/IPC
 
 .. toctree::
    :maxdepth: 2
-   :caption: Getting Started
-
-   install
-   development
-   memory
-   data
-   ipc
-   filesystems
-   plasma
-   numpy
-   pandas
-   csv
-   parquet
-   extending
-   api
-   getting_involved
+   :caption: Languages
+
+   cpp/index
+   python/index
diff --git a/python/doc/source/api.rst b/docs/source/python/api.rst
similarity index 100%
rename from python/doc/source/api.rst
rename to docs/source/python/api.rst
diff --git a/python/doc/source/csv.rst b/docs/source/python/csv.rst
similarity index 100%
rename from python/doc/source/csv.rst
rename to docs/source/python/csv.rst
diff --git a/python/doc/source/data.rst b/docs/source/python/data.rst
similarity index 100%
rename from python/doc/source/data.rst
rename to docs/source/python/data.rst
diff --git a/python/doc/source/development.rst b/docs/source/python/development.rst
similarity index 100%
rename from python/doc/source/development.rst
rename to docs/source/python/development.rst
diff --git a/python/doc/source/extending.rst b/docs/source/python/extending.rst
similarity index 100%
rename from python/doc/source/extending.rst
rename to docs/source/python/extending.rst
diff --git a/python/doc/source/filesystems.rst b/docs/source/python/filesystems.rst
similarity index 100%
rename from python/doc/source/filesystems.rst
rename to docs/source/python/filesystems.rst
diff --git a/python/doc/source/getting_involved.rst b/docs/source/python/getting_involved.rst
similarity index 100%
rename from python/doc/source/getting_involved.rst
rename to docs/source/python/getting_involved.rst
diff --git a/python/doc/source/index.rst b/docs/source/python/index.rst
similarity index 61%
rename from python/doc/source/index.rst
rename to docs/source/python/index.rst
index 712b105..5628219 100644
--- a/python/doc/source/index.rst
+++ b/docs/source/python/index.rst
@@ -15,24 +15,17 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-Python bindings for Apache Arrow
-================================
-
-Apache Arrow is a cross-language development platform for in-memory data. It
-specifies a standardized language-independent columnar memory format for flat
-and hierarchical data, organized for efficient analytic operations on modern
-hardware. It also provides computational libraries and zero-copy streaming
-messaging and interprocess communication.
+Python bindings
+===============
 
 The Arrow Python bindings have first-class integration with NumPy, pandas, and
-built-in Python objects.
+built-in Python objects. They are based on the C++ implementation of Arrow.
 
 This is the documentation of the Python API of Apache Arrow. For more details
-on the format and other language bindings see
-`the main page for Arrow <https://arrow.apache.org/>`_. Here will we only
-detail the usage of the Python API for Arrow and the leaf libraries that add
-additional functionality such as reading Apache Parquet files into Arrow
-structures.
+on the format and other language bindings see the parent documentation.
+Here will we only detail the usage of the Python API for Arrow and the leaf
+libraries that add additional functionality such as reading Apache Parquet
+files into Arrow structures.
 
 .. toctree::
    :maxdepth: 2
@@ -52,3 +45,4 @@ structures.
    extending
    api
    getting_involved
+
diff --git a/python/doc/source/install.rst b/docs/source/python/install.rst
similarity index 100%
rename from python/doc/source/install.rst
rename to docs/source/python/install.rst
diff --git a/python/doc/source/ipc.rst b/docs/source/python/ipc.rst
similarity index 100%
rename from python/doc/source/ipc.rst
rename to docs/source/python/ipc.rst
diff --git a/python/doc/source/memory.rst b/docs/source/python/memory.rst
similarity index 100%
rename from python/doc/source/memory.rst
rename to docs/source/python/memory.rst
diff --git a/python/doc/source/numpy.rst b/docs/source/python/numpy.rst
similarity index 97%
rename from python/doc/source/numpy.rst
rename to docs/source/python/numpy.rst
index 303e182..870f9cb 100644
--- a/python/doc/source/numpy.rst
+++ b/docs/source/python/numpy.rst
@@ -17,8 +17,8 @@
 
 .. _numpy_interop:
 
-Using PyArrow with NumPy
-========================
+NumPy Integration
+=================
 
 PyArrow allows converting back and forth from
 `NumPy <https://www.numpy.org/>`_ arrays to Arrow :ref:`Arrays <data.array>`.
diff --git a/python/doc/source/pandas.rst b/docs/source/python/pandas.rst
similarity index 99%
rename from python/doc/source/pandas.rst
rename to docs/source/python/pandas.rst
index 6ade171..16b4ff6 100644
--- a/python/doc/source/pandas.rst
+++ b/docs/source/python/pandas.rst
@@ -17,8 +17,8 @@
 
 .. _pandas_interop:
 
-Using PyArrow with pandas
-=========================
+Pandas Integration
+==================
 
 To interface with `pandas <https://pandas.pydata.org/>`_, PyArrow provides
 various conversion routines to consume pandas structures and convert back
diff --git a/python/doc/source/parquet.rst b/docs/source/python/parquet.rst
similarity index 100%
rename from python/doc/source/parquet.rst
rename to docs/source/python/parquet.rst
diff --git a/python/doc/source/plasma.rst b/docs/source/python/plasma.rst
similarity index 100%
rename from python/doc/source/plasma.rst
rename to docs/source/python/plasma.rst
diff --git a/format/Guidelines.md b/format/Guidelines.md
deleted file mode 100644
index 7b5f3a1..0000000
--- a/format/Guidelines.md
+++ /dev/null
@@ -1,35 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-# Implementation guidelines
-
-An execution engine (or framework, or UDF executor, or storage engine, etc) can implements only a subset of the Arrow spec and/or extend it given the following constraints:
-
-## Implementing a subset the spec
-### If only producing (and not consuming) arrow vectors.
-Any subset of the vector spec and the corresponding metadata can be implemented.
-
-### If consuming and producing vectors
-There is a minimal subset of vectors to be supported.
-Production of a subset of vectors and their corresponding metadata is always fine.
-Consumption of vectors should at least convert the unsupported input vectors to the supported subset (for example Timestamp.millis to timestamp.micros or int32 to int64)
-
-## Extensibility
-An execution engine implementor can also extend their memory representation with their own vectors internally as long as they are never exposed. Before sending data to another system expecting Arrow data these custom vectors should be converted to a type that exist in the Arrow spec.
-An example of this is operating on compressed data.
-These custom vectors are not exchanged externally and there is no support for custom metadata.
diff --git a/format/IPC.md b/format/IPC.md
deleted file mode 100644
index 97c1790..0000000
--- a/format/IPC.md
+++ /dev/null
@@ -1,253 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Interprocess messaging / communication (IPC)
-
-## Encapsulated message format
-
-Data components in the stream and file formats are represented as encapsulated
-*messages* consisting of:
-
-* A length prefix indicating the metadata size
-* The message metadata as a [Flatbuffer][3]
-* Padding bytes to an 8-byte boundary
-* The message body, which must be a multiple of 8 bytes
-
-Schematically, we have:
-
-```
-<metadata_size: int32>
-<metadata_flatbuffer: bytes>
-<padding>
-<message body>
-```
-
-The complete serialized message must be a multiple of 8 bytes so that messages
-can be relocated between streams. Otherwise the amount of padding between the
-metadata and the message body could be non-deterministic.
-
-The `metadata_size` includes the size of the flatbuffer plus padding. The
-`Message` flatbuffer includes a version number, the particular message (as a
-flatbuffer union), and the size of the message body:
-
-```
-table Message {
-  version: org.apache.arrow.flatbuf.MetadataVersion;
-  header: MessageHeader;
-  bodyLength: long;
-}
-```
-
-Currently, we support 4 types of messages:
-
-* Schema
-* RecordBatch
-* DictionaryBatch
-* Tensor
-
-## Streaming format
-
-We provide a streaming format for record batches. It is presented as a sequence
-of encapsulated messages, each of which follows the format above. The schema
-comes first in the stream, and it is the same for all of the record batches
-that follow. If any fields in the schema are dictionary-encoded, one or more
-`DictionaryBatch` messages will be included. `DictionaryBatch` and
-`RecordBatch` messages may be interleaved, but before any dictionary key is used
-in a `RecordBatch` it should be defined in a `DictionaryBatch`.
-
-```
-<SCHEMA>
-<DICTIONARY 0>
-...
-<DICTIONARY k - 1>
-<RECORD BATCH 0>
-...
-<DICTIONARY x DELTA>
-...
-<DICTIONARY y DELTA>
-...
-<RECORD BATCH n - 1>
-<EOS [optional]: int32>
-```
-
-When a stream reader implementation is reading a stream, after each message, it
-may read the next 4 bytes to know how large the message metadata that follows
-is. Once the message flatbuffer is read, you can then read the message body.
-
-The stream writer can signal end-of-stream (EOS) either by writing a 0 length
-as an `int32` or simply closing the stream interface.
-
-## File format
-
-We define a "file format" supporting random access in a very similar format to
-the streaming format. The file starts and ends with a magic string `ARROW1`
-(plus padding). What follows in the file is identical to the stream format. At
-the end of the file, we write a *footer* containing a redundant copy of the
-schema (which is a part of the streaming format) plus memory offsets and sizes
-for each of the data blocks in the file. This enables random access any record
-batch in the file. See [format/File.fbs][1] for the precise details of the file
-footer.
-
-Schematically we have:
-
-```
-<magic number "ARROW1">
-<empty padding bytes [to 8 byte boundary]>
-<STREAMING FORMAT>
-<FOOTER>
-<FOOTER SIZE: int32>
-<magic number "ARROW1">
-```
-
-In the file format, there is no requirement that dictionary keys should be
-defined in a `DictionaryBatch` before they are used in a `RecordBatch`, as long
-as the keys are defined somewhere in the file.
-
-### RecordBatch body structure
-
-The `RecordBatch` metadata contains a depth-first (pre-order) flattened set of
-field metadata and physical memory buffers (some comments from [Message.fbs][2]
-have been shortened / removed):
-
-```
-table RecordBatch {
-  length: long;
-  nodes: [FieldNode];
-  buffers: [Buffer];
-}
-
-struct FieldNode {
-  length: long;
-  null_count: long;
-}
-
-struct Buffer {
-  /// The relative offset into the shared memory page where the bytes for this
-  /// buffer starts
-  offset: long;
-
-  /// The absolute length (in bytes) of the memory buffer. The memory is found
-  /// from offset (inclusive) to offset + length (non-inclusive).
-  length: long;
-}
-```
-
-In the context of a file, the `page` is not used, and the `Buffer` offsets use
-as a frame of reference the start of the message body. So, while in a general
-IPC setting these offsets may be anyplace in one or more shared memory regions,
-in the file format the offsets start from 0.
-
-The location of a record batch and the size of the metadata block as well as
-the body of buffers is stored in the file footer:
-
-```
-struct Block {
-  offset: long;
-  metaDataLength: int;
-  bodyLength: long;
-}
-```
-
-The `metaDataLength` here includes the metadata length prefix, serialized
-metadata, and any additional padding bytes, and by construction must be a
-multiple of 8 bytes.
-
-Some notes about this
-
-* The `Block` offset indicates the starting byte of the record batch.
-* The metadata length includes the flatbuffer size, the record batch metadata
-  flatbuffer, and any padding bytes
-
-### Dictionary Batches
-
-Dictionaries are written in the stream and file formats as a sequence of record
-batches, each having a single field. The complete semantic schema for a
-sequence of record batches, therefore, consists of the schema along with all of
-the dictionaries. The dictionary types are found in the schema, so it is
-necessary to read the schema to first determine the dictionary types so that
-the dictionaries can be properly interpreted.
-
-```
-table DictionaryBatch {
-  id: long;
-  data: RecordBatch;
-  isDelta: boolean = false;
-}
-```
-
-The dictionary `id` in the message metadata can be referenced one or more times
-in the schema, so that dictionaries can even be used for multiple fields. See
-the [Physical Layout][4] document for more about the semantics of
-dictionary-encoded data.
-
-The dictionary `isDelta` flag allows dictionary batches to be modified
-mid-stream.  A dictionary batch with `isDelta` set indicates that its vector
-should be concatenated with those of any previous batches with the same `id`. A
-stream which encodes one column, the list of strings
-`["A", "B", "C", "B", "D", "C", "E", "A"]`, with a delta dictionary batch could
-take the form:
-
-```
-<SCHEMA>
-<DICTIONARY 0>
-(0) "A"
-(1) "B"
-(2) "C"
-
-<RECORD BATCH 0>
-0
-1
-2
-1
-
-<DICTIONARY 0 DELTA>
-(3) "D"
-(4) "E"
-
-<RECORD BATCH 1>
-3
-2
-4
-0
-EOS
-```
-
-### Tensor (Multi-dimensional Array) Message Format
-
-The `Tensor` message types provides a way to write a multidimensional array of
-fixed-size values (such as a NumPy ndarray) using Arrow's shared memory
-tools. Arrow implementations in general are not required to implement this data
-format, though we provide a reference implementation in C++.
-
-When writing a standalone encapsulated tensor message, we use the format as
-indicated above, but additionally align the starting offset of the metadata as
-well as the starting offset of the tensor body (if writing to a shared memory
-region) to be multiples of 64 bytes:
-
-```
-<PADDING>
-<metadata size: int32>
-<metadata>
-<tensor body>
-```
-
-[1]: https://github.com/apache/arrow/blob/master/format/File.fbs
-[2]: https://github.com/apache/arrow/blob/master/format/Message.fbs
-[3]: https://github.com/google/flatbuffers
-[4]: https://github.com/apache/arrow/blob/master/format/Layout.md
diff --git a/format/Metadata.md b/format/Metadata.md
deleted file mode 100644
index 33d5065..0000000
--- a/format/Metadata.md
+++ /dev/null
@@ -1,409 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Metadata: Logical types, schemas, data headers
-
-This is documentation for the Arrow metadata specification, which enables
-systems to communicate the
-
-* Logical array types (which are implemented using the physical memory layouts
-  specified in [Layout.md][1])
-
-* Schemas for table-like collections of Arrow data structures
-
-* "Data headers" indicating the physical locations of memory buffers sufficient
-  to reconstruct a Arrow data structures without copying memory.
-
-## Canonical implementation
-
-We are using [Flatbuffers][2] for low-overhead reading and writing of the Arrow
-metadata. See [Message.fbs][3].
-
-## Schemas
-
-The `Schema` type describes a table-like structure consisting of any number of
-Arrow arrays, each of which can be interpreted as a column in the table. A
-schema by itself does not describe the physical structure of any particular set
-of data.
-
-A schema consists of a sequence of **fields**, which are metadata describing
-the columns. The Flatbuffers IDL for a field is:
-
-```
-table Field {
-  // Name is not required, in i.e. a List
-  name: string;
-  nullable: bool;
-  type: Type;
-
-  // Present only if the field is dictionary encoded
-  dictionary: DictionaryEncoding;
-
-  // children apply only to Nested data types like Struct, List and Union
-  children: [Field];
-
-  // User-defined metadata
-  custom_metadata: [ KeyValue ];
-}
-```
-
-The `type` is the logical type of the field. Nested types, such as List,
-Struct, and Union, have a sequence of child fields.
-
-A JSON representation of the schema is also provided:
-Field:
-```
-{
-  "name" : "name_of_the_field",
-  "nullable" : false,
-  "type" : /* Type */,
-  "children" : [ /* Field */ ],
-}
-```
-
-Type:
-```
-{
-  "name" : "null|struct|list|union|int|floatingpoint|utf8|binary|fixedsizebinary|bool|decimal|date|time|timestamp|interval"
-  // fields as defined in the Flatbuffer depending on the type name
-}
-```
-
-Union:
-```
-{
-  "name" : "union",
-  "mode" : "Sparse|Dense",
-  "typeIds" : [ /* integer */ ]
-}
-```
-
-The `typeIds` field in the Union are the codes used to denote each type, which
-may be different from the index of the child array. This is so that the union
-type ids do not have to be enumerated from 0.
-
-Int:
-```
-{
-  "name" : "int",
-  "bitWidth" : /* integer */,
-  "isSigned" : /* boolean */
-}
-```
-FloatingPoint:
-```
-{
-  "name" : "floatingpoint",
-  "precision" : "HALF|SINGLE|DOUBLE"
-}
-```
-Decimal:
-```
-{
-  "name" : "decimal",
-  "precision" : /* integer */,
-  "scale" : /* integer */
-}
-```
-
-Timestamp:
-
-```
-{
-  "name" : "timestamp",
-  "unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND"
-}
-```
-
-Date:
-
-```
-{
-  "name" : "date",
-  "unit" : "DAY|MILLISECOND"
-}
-```
-
-Time:
-
-```
-{
-  "name" : "time",
-  "unit" : "SECOND|MILLISECOND|MICROSECOND|NANOSECOND",
-  "bitWidth": /* integer: 32 or 64 */
-}
-```
-
-Interval:
-
-```
-{
-  "name" : "interval",
-  "unit" : "YEAR_MONTH|DAY_TIME"
-}
-```
-Schema:
-```
-{
-  "fields" : [
-    /* Field */
-  ]
-}
-```
-
-## Record data headers
-
-A record batch is a collection of top-level named, equal length Arrow arrays
-(or vectors). If one of the arrays contains nested data, its child arrays are
-not required to be the same length as the top-level arrays.
-
-One can be thought of as a realization of a particular schema. The metadata
-describing a particular record batch is called a "data header". Here is the
-Flatbuffers IDL for a record batch data header
-
-```
-table RecordBatch {
-  length: long;
-  nodes: [FieldNode];
-  buffers: [Buffer];
-}
-```
-
-The `RecordBatch` metadata provides for record batches with length exceeding
-2^31 - 1, but Arrow implementations are not required to implement support
-beyond this size.
-
-The `nodes` and `buffers` fields are produced by a depth-first traversal /
-flattening of a schema (possibly containing nested types) for a given in-memory
-data set.
-
-### Buffers
-
-A buffer is metadata describing a contiguous memory region relative to some
-virtual address space. This may include:
-
-* Shared memory, e.g. a memory-mapped file
-* An RPC message received in-memory
-* Data in a file
-
-The key form of the Buffer type is:
-
-```
-struct Buffer {
-  offset: long;
-  length: long;
-}
-```
-
-In the context of a record batch, each field has some number of buffers
-associated with it, which are derived from their physical memory layout.
-
-Each logical type (separate from its children, if it is a nested type) has a
-deterministic number of buffers associated with it. These will be specified in
-the logical types section.
-
-### Field metadata
-
-The `FieldNode` values contain metadata about each level in a nested type
-hierarchy.
-
-```
-struct FieldNode {
-  /// The number of value slots in the Arrow array at this level of a nested
-  /// tree
-  length: long;
-
-  /// The number of observed nulls.
-  null_count: lohng;
-}
-```
-
-The `FieldNode` metadata provides for fields with length exceeding 2^31 - 1,
-but Arrow implementations are not required to implement support for large
-arrays.
-
-## Flattening of nested data
-
-Nested types are flattened in the record batch in depth-first order. When
-visiting each field in the nested type tree, the metadata is appended to the
-top-level `fields` array and the buffers associated with that field (but not
-its children) are appended to the `buffers` array.
-
-For example, let's consider the schema
-
-```
-col1: Struct<a: Int32, b: List<Int64>, c: Float64>
-col2: Utf8
-```
-
-The flattened version of this is:
-
-```
-FieldNode 0: Struct name='col1'
-FieldNode 1: Int32 name=a'
-FieldNode 2: List name='b'
-FieldNode 3: Int64 name='item'  # arbitrary
-FieldNode 4: Float64 name='c'
-FieldNode 5: Utf8 name='col2'
-```
-
-For the buffers produced, we would have the following (as described in more
-detail for each type below):
-
-```
-buffer 0: field 0 validity bitmap
-
-buffer 1: field 1 validity bitmap
-buffer 2: field 1 values <int32_t*>
-
-buffer 3: field 2 validity bitmap
-buffer 4: field 2 list offsets <int32_t*>
-
-buffer 5: field 3 validity bitmap
-buffer 6: field 3 values <int64_t*>
-
-buffer 7: field 4 validity bitmap
-buffer 8: field 4 values <double*>
-
-buffer 9: field 5 validity bitmap
-buffer 10: field 5 offsets <int32_t*>
-buffer 11: field 5 data <uint8_t*>
-```
-
-## Logical types
-
-A logical type consists of a type name and metadata along with an explicit
-mapping to a physical memory representation. These may fall into some different
-categories:
-
-* Types represented as fixed-width primitive arrays (for example: C-style
-  integers and floating point numbers)
-* Types having equivalent memory layout to a physical nested type (e.g. strings
-  use the list representation, but logically are not nested types)
-
-### Integers
-
-In the first version of Arrow we provide the standard 8-bit through 64-bit size
-standard C integer types, both signed and unsigned:
-
-* Signed types: Int8, Int16, Int32, Int64
-* Unsigned types: UInt8, UInt16, UInt32, UInt64
-
-The IDL looks like:
-
-```
-table Int {
-  bitWidth: int;
-  is_signed: bool;
-}
-```
-
-The integer endianness is currently set globally at the schema level. If a
-schema is set to be little-endian, then all integer types occurring within must
-be little-endian. Integers that are part of other data representations, such as
-list offsets and union types, must have the same endianness as the entire
-record batch.
-
-### Floating point numbers
-
-We provide 3 types of floating point numbers as fixed bit-width primitive array
-
-- Half precision, 16-bit width
-- Single precision, 32-bit width
-- Double precision, 64-bit width
-
-The IDL looks like:
-
-```
-enum Precision:int {HALF, SINGLE, DOUBLE}
-
-table FloatingPoint {
-  precision: Precision;
-}
-```
-
-### Boolean
-
-The Boolean logical type is represented as a 1-bit wide primitive physical
-type. The bits are numbered using least-significant bit (LSB) ordering.
-
-Like other fixed bit-width primitive types, boolean data appears as 2 buffers
-in the data header (one bitmap for the validity vector and one for the values).
-
-### List
-
-The `List` logical type is the logical (and identically-named) counterpart to
-the List physical type.
-
-In data header form, the list field node contains 2 buffers:
-
-* Validity bitmap
-* List offsets
-
-The buffers associated with a list's child field are handled recursively
-according to the child logical type (e.g. `List<Utf8>` vs. `List<Boolean>`).
-
-### Utf8 and Binary
-
-We specify two logical types for variable length bytes:
-
-* `Utf8` data is Unicode values with UTF-8 encoding
-* `Binary` is any other variable length bytes
-
-These types both have the same memory layout as the nested type `List<UInt8>`,
-with the constraint that the inner bytes can contain no null values. From a
-logical type perspective they are primitive, not nested types.
-
-In data header form, while `List<UInt8>` would appear as 2 field nodes (`List`
-and `UInt8`) and 4 buffers (2 for each of the nodes, as per above), these types
-have a simplified representation single field node (of `Utf8` or `Binary`
-logical type, which have no children) and 3 buffers:
-
-* Validity bitmap
-* List offsets
-* Byte data
-
-### Decimal
-
-Decimals are represented as a 2's complement 128-bit (16 byte) signed integer
-in little-endian byte order.
-
-### Timestamp
-
-All timestamps are stored as a 64-bit integer, with one of four unit
-resolutions: second, millisecond, microsecond, and nanosecond.
-
-### Date
-
-We support two different date types:
-
-* Days since the UNIX epoch as a 32-bit integer
-* Milliseconds since the UNIX epoch as a 64-bit integer
-
-### Time
-
-Time supports the same unit resolutions: second, millisecond, microsecond, and
-nanosecond. We represent time as the smallest integer accommodating the
-indicated unit. For second and millisecond: 32-bit, for the others 64-bit.
-
-## Dictionary encoding
-
-[1]: https://github.com/apache/arrow/blob/master/format/Layout.md
-[2]: http://github.com/google/flatbuffers
-[3]: https://github.com/apache/arrow/blob/master/format/Message.fbs
diff --git a/format/README.md b/format/README.md
deleted file mode 100644
index c87ac2a..0000000
--- a/format/README.md
+++ /dev/null
@@ -1,53 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-## Arrow specification documents
-
-Currently, the Arrow specification consists of these pieces:
-
-- Metadata specification (see Metadata.md)
-- Physical memory layout specification (see Layout.md)
-- Logical Types, Schemas, and Record Batch Metadata (see Schema.fbs)
-- Encapsulated Messages (see Message.fbs)
-- Mechanics of messaging between Arrow systems (IPC, RPC, etc.) (see IPC.md)
-- Tensor (Multi-dimensional array) Metadata (see Tensor.fbs)
-
-The metadata currently uses Google's [flatbuffers library][1] for serializing a
-couple related pieces of information:
-
-- Schemas for tables or record (row) batches. This contains the logical types,
-  field names, and other metadata. Schemas do not contain any information about
-  actual data.
-- *Data headers* for record (row) batches. These must correspond to a known
-   schema, and enable a system to send and receive Arrow row batches in a form
-   that can be precisely disassembled or reconstructed.
-
-## Arrow Format Maturity and Stability
-
-We have made significant progress hardening the Arrow in-memory format and
-Flatbuffer metadata since the project started in February 2016. We have
-integration tests which verify binary compatibility between the Java and C++
-implementations, for example.
-
-Major versions may still include breaking changes to the memory format or
-metadata, so it is recommended to use the same released version of all
-libraries in your applications for maximum compatibility. Data stored in the
-Arrow IPC formats should not be used for long term storage.
-
-[1]: http://github.com/google/flatbuffers
diff --git a/java/flight/pom.xml b/java/flight/pom.xml
index c6de29f..238d99f 100644
--- a/java/flight/pom.xml
+++ b/java/flight/pom.xml
@@ -165,7 +165,7 @@
           <execution>
             <id>src</id>
             <configuration>
-              <protoSourceRoot>${basedir}/../../format/</protoSourceRoot>
+              <protoSourceRoot>${basedir}/../../docs/source/format/</protoSourceRoot>
               <outputDirectory>${project.build.directory}/generated-sources/protobuf</outputDirectory>
             </configuration>
             <goals>
diff --git a/java/format/pom.xml b/java/format/pom.xml
index d5ccd5f..3adbc01 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -104,10 +104,10 @@
               <argument>-j</argument>
               <argument>-o</argument>
               <argument>${flatc.generated.files}</argument>
-              <argument>../../format/Schema.fbs</argument>
-              <argument>../../format/Tensor.fbs</argument>
-              <argument>../../format/File.fbs</argument>
-              <argument>../../format/Message.fbs</argument>
+              <argument>../../docs/source/format/Schema.fbs</argument>
+              <argument>../../docs/source/format/Tensor.fbs</argument>
+              <argument>../../docs/source/format/File.fbs</argument>
+              <argument>../../docs/source/format/Message.fbs</argument>
             </arguments>
           </configuration>
         </execution>
diff --git a/python/README.md b/python/README.md
index a0d727e..d91f02d 100644
--- a/python/README.md
+++ b/python/README.md
@@ -79,10 +79,10 @@ and look for the "custom options" section.
 ### Building the documentation
 
 ```bash
-pip install -r doc/requirements.txt
-python setup.py build_sphinx -s doc/source
+pip install -r ../docs/requirements.txt
+python setup.py build_sphinx -s ../doc/source
 ```
 
 [2]: https://github.com/apache/arrow/blob/master/python/doc/source/development.rst
 [3]: https://github.com/pandas-dev/pandas
-[4]: https://docs.pytest.org/en/latest/
\ No newline at end of file
+[4]: https://docs.pytest.org/en/latest/