You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pa...@apache.org on 2023/06/15 01:23:27 UTC
[arrow-nanoarrow] branch main updated: feat(python): Python schema, array, and array view skeleton (#117)

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 2f05e99  feat(python): Python schema, array, and array view skeleton (#117)
2f05e99 is described below

commit 2f05e99d18638cdd07675642f9a7dd2a73211066
Author: Dewey Dunnington <de...@dunnington.ca>
AuthorDate: Wed Jun 14 22:23:21 2023 -0300

    feat(python): Python schema, array, and array view skeleton (#117)
    
    This PR is an attempt to add minimum usable Python bindings to the
    nanoarrow C library. That minimum scope is essentially just the ability
    to extract field values from
    `ArrowSchema`/`ArrowArray`/`ArrowArrayStream` objects in a way that will
    not crash Python. This PR also includes bindings for nanoarrow's
    `ArrowSchemaView` (so that the parameters of parameterized types can be
    extracted) and `ArrowArrayView` (so that buffer types/sizes can be
    exported using the Python buffer protocol).
    
    I've updated [the
    README](https://github.com/paleolimbot/arrow-nanoarrow/tree/python-tidbits/python#readme)
    to showcase the extent of the bindings as implemented in this PR;
    several basic examples are also provided below.
    
    Example schema usage:
    
    ```python
    import nanoarrow as na
    import pyarrow as pa
    schema = na.schema(pa.decimal128(10, 3))
    print(schema.format)
    #> d:10,3
    print(schema.view().decimal_precision)
    #> 10
    print(schema.view().decimal_scale)
    #> 3
    ```
    
    Example Array usage:
    
    ```python
    array = na.array(pa.array(["one", "two", "three", None]))
    print(array.length)
    #> 4
    print(array.null_count)
    #> 1
    
    import numpy as np
    view = array.view()
    [np.array(buffer) for buffer in view.buffers]
    #> [array([7], dtype=uint8),
    #>  array([ 0,  3,  6, 11, 11], dtype=int32),
    #>  array([b'o', b'n', b'e', b't', b'w', b'o', b't', b'h', b'r', b'e', b'e'],
    #>        dtype='|S1')]
    ```
    
    Example ArrayStream usage:
    
    ```python
    pa_array_child = pa.array([1, 2, 3], pa.int32())
    pa_array = pa.record_batch([pa_array_child], names=["some_column"])
    reader = pa.RecordBatchReader.from_batches(pa_array.schema, [pa_array])
    array_stream = na.array_stream(reader)
    
    print(array_stream.get_schema())
    #> struct<some_column: int32>
    
    for array in array_stream:
        print(array.length)
    #> 3
    
    print(array_stream.get_next() is None)
    #> True
    ```
    
    ---------
    
    Co-authored-by: Joris Van den Bossche <jo...@gmail.com>
---
 .github/workflows/python.yaml                     |  30 +-
 python/{src/nanoarrow/__init__.py => .coveragerc} |   6 +-
 python/.gitignore                                 |   7 +-
 python/{src/nanoarrow/__init__.py => MANIFEST.in} |   7 +-
 python/README.ipynb                               | 392 ++++++++++
 python/README.md                                  | 192 ++++-
 python/bootstrap.py                               | 199 +++++
 python/{src => }/nanoarrow/__init__.py            |   5 +-
 python/nanoarrow/_lib.pyx                         | 903 ++++++++++++++++++++++
 python/nanoarrow/lib.py                           |  69 ++
 python/pyproject.toml                             |   8 +-
 python/setup.py                                   |  42 +-
 python/src/nanoarrow/_lib.pyx                     |  86 ---
 python/src/nanoarrow/nanoarrow_c.pxd              | 127 ---
 python/tests/test_nanoarrow.py                    | 302 +++++++-
 src/nanoarrow/nanoarrow_types.h                   |   6 +
 16 files changed, 2104 insertions(+), 277 deletions(-)

diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
index 7d93578..4b599f7 100644
--- a/.github/workflows/python.yaml
+++ b/.github/workflows/python.yaml
@@ -40,7 +40,7 @@ jobs:
 
     steps:
       - uses: actions/checkout@v3
-      
+
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
         with:
@@ -57,3 +57,31 @@ jobs:
       - name: Run tests
         run: |
           pytest python/tests -v -s
+
+      - name: Run doctests
+        if: success() && matrix.python-version == '3.10'
+        run: |
+          # Needs editable install to run --doctest-cython
+          pip install pytest-cython
+          pip install -e python
+          pytest python --doctest-cython
+
+      - name: Coverage
+        if: success() && matrix.python-version == '3.10'
+        run: |
+          pip uninstall --yes nanoarrow
+          pip install pytest-cov Cython
+          pushd python
+
+          # Build with Cython + gcc coverage options
+          NANOARROW_PYTHON_COVERAGE=1 python setup.py build_ext --inplace
+
+          # Run tests + coverage.py (generates .coverage + coverage.xml files)
+          python -m pytest --cov ./nanoarrow
+          python -m coverage xml
+
+      - name: Upload coverage to codecov
+        if: success() && matrix.python-version == '3.10'
+        uses: codecov/codecov-action@v2
+        with:
+          files: 'python/coverage.xml'
diff --git a/python/src/nanoarrow/__init__.py b/python/.coveragerc
similarity index 91%
copy from python/src/nanoarrow/__init__.py
copy to python/.coveragerc
index 1586e60..1fb6a24 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/.coveragerc
@@ -15,6 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from ._lib import (  # noqa: F401
-    as_numpy_array,
-)
+# .coveragerc to control coverage.py
+[run]
+plugins = Cython.Coverage
diff --git a/python/.gitignore b/python/.gitignore
index fcf8363..b372452 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -16,9 +16,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-src/nanoarrow/nanoarrow.c
-src/nanoarrow/nanoarrow.h
-src/nanoarrow/*.cpp
+nanoarrow/nanoarrow.c
+nanoarrow/nanoarrow.h
+nanoarrow/nanoarrow_c.pxd
+nanoarrow/*.c
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/python/src/nanoarrow/__init__.py b/python/MANIFEST.in
similarity index 87%
copy from python/src/nanoarrow/__init__.py
copy to python/MANIFEST.in
index 1586e60..93ed2fd 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/MANIFEST.in
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from ._lib import (  # noqa: F401
-    as_numpy_array,
-)
+exclude bootstrap.py
+include nanoarrow/nanoarrow.c
+include nanoarrow/nanoarrow.h
+include nanoarrow/nanoarrow_c.pxd
diff --git a/python/README.ipynb b/python/README.ipynb
new file mode 100644
index 0000000..d89d4c4
--- /dev/null
+++ b/python/README.ipynb
@@ -0,0 +1,392 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<!---\n",
+    "  Licensed to the Apache Software Foundation (ASF) under one\n",
+    "  or more contributor license agreements.  See the NOTICE file\n",
+    "  distributed with this work for additional information\n",
+    "  regarding copyright ownership.  The ASF licenses this file\n",
+    "  to you under the Apache License, Version 2.0 (the\n",
+    "  \"License\"); you may not use this file except in compliance\n",
+    "  with the License.  You may obtain a copy of the License at\n",
+    "\n",
+    "    http://www.apache.org/licenses/LICENSE-2.0\n",
+    "\n",
+    "  Unless required by applicable law or agreed to in writing,\n",
+    "  software distributed under the License is distributed on an\n",
+    "  \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
+    "  KIND, either express or implied.  See the License for the\n",
+    "  specific language governing permissions and limitations\n",
+    "  under the License.\n",
+    "-->\n",
+    "\n",
+    "<!-- Render with jupyter nbconvert --to markdown README.ipynb -->\n",
+    "\n",
+    "# nanoarrow for Python\n",
+    "\n",
+    "The nanoarrow Python package provides bindings to the nanoarrow C library. Like\n",
+    "the nanoarrow C library, it provides tools to facilitate the use of the\n",
+    "[Arrow C Data](https://arrow.apache.org/docs/format/CDataInterface.html) \n",
+    "and [Arrow C Stream](https://arrow.apache.org/docs/format/CStreamInterface.html) \n",
+    "interfaces.\n",
+    "\n",
+    "## Installation\n",
+    "\n",
+    "Python bindings for nanoarrow are not yet available on PyPI. You can install via\n",
+    "URL (requires a C compiler):\n",
+    "\n",
+    "```bash\n",
+    "python -m pip install \"https://github.com/apache/arrow-nanoarrow/archive/refs/heads/main.zip#egg=nanoarrow&subdirectory=python\"\n",
+    "```\n",
+    "\n",
+    "If you can import the namespace, you're good to go!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nanoarrow as na"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example\n",
+    "\n",
+    "The Arrow C Data and Arrow C Stream interfaces are comprised of three structures: the `ArrowSchema` which represents a data type of an array, the `ArrowArray` which represents the values of an array, and an `ArrowArrayStream`, which represents zero or more `ArrowArray`s with a common `ArrowSchema`. All three can be wrapped by Python objects using the nanoarrow Python package.\n",
+    "\n",
+    "### Schemas\n",
+    "\n",
+    "Use `nanoarrow.schema()` to convert a data type-like object to an `ArrowSchema`. This is currently only implemented for pyarrow objects."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pyarrow as pa\n",
+    "schema = na.schema(pa.decimal128(10, 3))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can extract the fields of a `Schema` object one at a time or parse it into a view to extract deserialized parameters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "d:10,3\n",
+      "10\n",
+      "3\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(schema.format)\n",
+    "print(schema.view().decimal_precision)\n",
+    "print(schema.view().decimal_scale)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `nanoarrow.schema()` helper is currently only implemented for pyarrow objects. If your data type has an `_export_to_c()`-like function, you can get the address of a freshly-allocated `ArrowSchema` as well:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'int32'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "schema = na.Schema.allocate()\n",
+    "pa.int32()._export_to_c(schema._addr())\n",
+    "schema.view().type"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `Schema` object cleans up after itself: when the object is deleted, the underlying `Schema` is released."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Arrays\n",
+    "\n",
+    "You can use `nanoarrow.array()` to convert an array-like object to a `nanoarrow.Array`, optionally attaching a `Schema` that can be used to interpret its contents. This is currently only implemented for pyarrow objects."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "array = na.array(pa.array([\"one\", \"two\", \"three\", None]))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Like the `Schema`, you can inspect an `Array` by extracting fields individually:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "4\n",
+      "1\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(array.length)\n",
+    "print(array.null_count)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "...and parse the `Array`/`Schema` combination into a view whose contents is more readily accessible."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[array([7], dtype=uint8),\n",
+       " array([ 0,  3,  6, 11, 11], dtype=int32),\n",
+       " array([b'o', b'n', b'e', b't', b'w', b'o', b't', b'h', b'r', b'e', b'e'],\n",
+       "       dtype='|S1')]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "view = array.view()\n",
+    "[np.array(buffer) for buffer in view.buffers]"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Like the `Schema`, you can allocate an empty one and access its address with `_addr()` to pass to other array-exporting functions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "array = na.Array.allocate(na.Schema.allocate())\n",
+    "pa.array([1, 2, 3])._export_to_c(array._addr(), array.schema._addr())\n",
+    "array.length"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Array streams\n",
+    "\n",
+    "You can use `nanoarrow.array_stream()` to convert an object representing a sequence of `Array`s with a common `Schema` to a `nanoarrow.ArrayStream`. This is currently only implemented for pyarrow objects."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pa_array_child = pa.array([1, 2, 3], pa.int32())\n",
+    "pa_array = pa.record_batch([pa_array_child], names=[\"some_column\"])\n",
+    "reader = pa.RecordBatchReader.from_batches(pa_array.schema, [pa_array])\n",
+    "array_stream = na.array_stream(reader)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can pull the next array from the stream using `.get_next()` or use it like an interator. The `.get_next()` method will return `None` when there are no more arrays in the stream."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "struct<some_column: int32>\n",
+      "3\n",
+      "True\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(array_stream.get_schema())\n",
+    "\n",
+    "for array in array_stream:\n",
+    "    print(array.length)\n",
+    "\n",
+    "print(array_stream.get_next() is None)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can also get the address of a freshly-allocated stream to pass to a suitable exporting function:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "struct<some_column: int32>"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "array_stream = na.ArrayStream.allocate()\n",
+    "reader._export_to_c(array_stream._addr())\n",
+    "array_stream.get_schema()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Development\n",
+    "\n",
+    "Python bindings for nanoarrow are managed with [setuptools](https://setuptools.pypa.io/en/latest/index.html).\n",
+    "This means you can build the project using:\n",
+    "\n",
+    "```shell\n",
+    "git clone https://github.com/apache/arrow-nanoarrow.git\n",
+    "cd arrow-nanoarrow/python\n",
+    "pip install -e .\n",
+    "```\n",
+    "\n",
+    "Tests use [pytest](https://docs.pytest.org/):\n",
+    "\n",
+    "```shell\n",
+    "# Install dependencies\n",
+    "pip install -e .[test]\n",
+    "\n",
+    "# Run tests\n",
+    "pytest -vvx\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/python/README.md b/python/README.md
index 701896b..db898d2 100644
--- a/python/README.md
+++ b/python/README.md
@@ -17,28 +17,196 @@
   under the License.
 -->
 
+<!-- Render with jupyter nbconvert --to markdown README.ipynb -->
+
 # nanoarrow for Python
 
-Python bindings for nanoarrow.
-## Building
+The nanoarrow Python package provides bindings to the nanoarrow C library. Like
+the nanoarrow C library, it provides tools to facilitate the use of the
+[Arrow C Data](https://arrow.apache.org/docs/format/CDataInterface.html) 
+and [Arrow C Stream](https://arrow.apache.org/docs/format/CStreamInterface.html) 
+interfaces.
+
+## Installation
+
+Python bindings for nanoarrow are not yet available on PyPI. You can install via
+URL (requires a C compiler):
+
+```bash
+python -m pip install "https://github.com/apache/arrow-nanoarrow/archive/refs/heads/main.zip#egg=nanoarrow&subdirectory=python"
+```
+
+If you can import the namespace, you're good to go!
+
+
+```python
+import nanoarrow as na
+```
+
+## Example
+
+The Arrow C Data and Arrow C Stream interfaces are comprised of three structures: the `ArrowSchema` which represents a data type of an array, the `ArrowArray` which represents the values of an array, and an `ArrowArrayStream`, which represents zero or more `ArrowArray`s with a common `ArrowSchema`. All three can be wrapped by Python objects using the nanoarrow Python package.
+
+### Schemas
+
+Use `nanoarrow.schema()` to convert a data type-like object to an `ArrowSchema`. This is currently only implemented for pyarrow objects.
+
+
+```python
+import pyarrow as pa
+schema = na.schema(pa.decimal128(10, 3))
+```
+
+You can extract the fields of a `Schema` object one at a time or parse it into a view to extract deserialized parameters.
+
+
+```python
+print(schema.format)
+print(schema.view().decimal_precision)
+print(schema.view().decimal_scale)
+```
+
+    d:10,3
+    10
+    3
+
+
+The `nanoarrow.schema()` helper is currently only implemented for pyarrow objects. If your data type has an `_export_to_c()`-like function, you can get the address of a freshly-allocated `ArrowSchema` as well:
+
+
+```python
+schema = na.Schema.allocate()
+pa.int32()._export_to_c(schema._addr())
+schema.view().type
+```
+
+
+
+
+    'int32'
+
+
+
+The `Schema` object cleans up after itself: when the object is deleted, the underlying `Schema` is released.
+
+### Arrays
+
+You can use `nanoarrow.array()` to convert an array-like object to a `nanoarrow.Array`, optionally attaching a `Schema` that can be used to interpret its contents. This is currently only implemented for pyarrow objects.
+
+
+```python
+array = na.array(pa.array(["one", "two", "three", None]))
+```
+
+Like the `Schema`, you can inspect an `Array` by extracting fields individually:
+
+
+```python
+print(array.length)
+print(array.null_count)
+```
+
+    4
+    1
+
+
+...and parse the `Array`/`Schema` combination into a view whose contents is more readily accessible.
 
-Python libraries are managed with [setuptools][setuptools]. In general, that
-means all projects can be built as follows:
+
+```python
+import numpy as np
+view = array.view()
+[np.array(buffer) for buffer in view.buffers]
+```
+
+
+
+
+    [array([7], dtype=uint8),
+     array([ 0,  3,  6, 11, 11], dtype=int32),
+     array([b'o', b'n', b'e', b't', b'w', b'o', b't', b'h', b'r', b'e', b'e'],
+           dtype='|S1')]
+
+
+
+Like the `Schema`, you can allocate an empty one and access its address with `_addr()` to pass to other array-exporting functions.
+
+
+```python
+array = na.Array.allocate(na.Schema.allocate())
+pa.array([1, 2, 3])._export_to_c(array._addr(), array.schema._addr())
+array.length
+```
+
+
+
+
+    3
+
+
+
+### Array streams
+
+You can use `nanoarrow.array_stream()` to convert an object representing a sequence of `Array`s with a common `Schema` to a `nanoarrow.ArrayStream`. This is currently only implemented for pyarrow objects.
+
+
+```python
+pa_array_child = pa.array([1, 2, 3], pa.int32())
+pa_array = pa.record_batch([pa_array_child], names=["some_column"])
+reader = pa.RecordBatchReader.from_batches(pa_array.schema, [pa_array])
+array_stream = na.array_stream(reader)
+```
+
+You can pull the next array from the stream using `.get_next()` or use it like an interator. The `.get_next()` method will return `None` when there are no more arrays in the stream.
+
+
+```python
+print(array_stream.get_schema())
+
+for array in array_stream:
+    print(array.length)
+
+print(array_stream.get_next() is None)
+```
+
+    struct<some_column: int32>
+    3
+    True
+
+
+You can also get the address of a freshly-allocated stream to pass to a suitable exporting function:
+
+
+```python
+array_stream = na.ArrayStream.allocate()
+reader._export_to_c(array_stream._addr())
+array_stream.get_schema()
+```
+
+
+
+
+    struct<some_column: int32>
+
+
+
+## Development
+
+Python bindings for nanoarrow are managed with [setuptools](https://setuptools.pypa.io/en/latest/index.html).
+This means you can build the project using:
 
 ```shell
-$ cd python
-$ pip install -e .
+git clone https://github.com/apache/arrow-nanoarrow.git
+cd arrow-nanoarrow/python
+pip install -e .
 ```
 
-Tests use [pytest][pytest]:
+Tests use [pytest](https://docs.pytest.org/):
 
 ```shell
 # Install dependencies
-$ pip install -e .[test]
+pip install -e .[test]
 
 # Run tests
-$ pytest -vvx
+pytest -vvx
 ```
-
-[pytest]: https://docs.pytest.org/
-[setuptools]: https://setuptools.pypa.io/en/latest/index.html
\ No newline at end of file
diff --git a/python/bootstrap.py b/python/bootstrap.py
new file mode 100644
index 0000000..39b4fd9
--- /dev/null
+++ b/python/bootstrap.py
@@ -0,0 +1,199 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import re
+import os
+import shutil
+
+# Generate the nanoarrow_c.pxd file used by the Cython extension
+class NanoarrowPxdGenerator:
+
+    def __init__(self):
+       self._define_regexes()
+
+    def generate_nanoarrow_pxd(self, file_in, file_out):
+        file_in_name = os.path.basename(file_in)
+
+        # Read the nanoarrow.h header
+        content = None
+        with open(file_in, 'r') as input:
+            content = input.read()
+
+        # Strip comments
+        content = self.re_comment.sub('', content)
+
+        # Find types and function definitions
+        types = self._find_types(content)
+        func_defs = self._find_func_defs(content)
+
+        # Make corresponding cython definitions
+        types_cython = [self._type_to_cython(t, '    ') for t in types]
+        func_defs_cython = [self._func_def_to_cython(d, '    ') for d in func_defs]
+
+        # Unindent the header
+        header = self.re_newline_plus_indent.sub('\n', self._pxd_header())
+
+        # Write nanoarrow_c.pxd
+        with open(file_out, 'wb') as output:
+            output.write(header.encode('UTF-8'))
+
+            output.write(f'\ncdef extern from "{file_in_name}" nogil:\n'.encode("UTF-8"))
+
+            # A few things we add in manually
+            output.write(b'\n')
+            output.write(b'    ctypedef int ArrowErrorCode\n')
+            output.write(b'    cdef int NANOARROW_OK\n')
+            output.write(b'\n')
+
+            for type in types_cython:
+                output.write(type.encode('UTF-8'))
+                output.write(b'\n\n')
+
+            for func_def in func_defs_cython:
+                output.write(func_def.encode('UTF-8'))
+                output.write(b'\n')
+
+    def _define_regexes(self):
+        self.re_comment = re.compile(r'\s*//[^\n]*')
+        self.re_type = re.compile(r'(?P<type>struct|union|enum) (?P<name>Arrow[^ ]+) {(?P<body>[^}]*)}')
+        self.re_func_def = re.compile(r'\n(static inline )?(?P<const>const )?(struct|enum )?(?P<return_type>[A-Za-z0-9_*]+) (?P<name>Arrow[A-Za-z]+)\((?P<args>[^\)]*)\);')
+        self.re_tagged_type = re.compile(r'(?P<type>struct|union|enum) (?P<name>Arrow[A-Za-z]+)')
+        self.re_struct_delim = re.compile(r';\s*')
+        self.re_enum_delim = re.compile(r',\s*')
+        self.re_whitespace = re.compile(r'\s+')
+        self.re_newline_plus_indent = re.compile(r'\n +')
+
+    def _strip_comments(self, content):
+        return self.re_comment.sub('', content)
+
+    def _find_types(self, content):
+        return [m.groupdict() for m in self.re_type.finditer(content)]
+
+    def _find_func_defs(self, content):
+        return [m.groupdict() for m in self.re_func_def.finditer(content)]
+
+    def _type_to_cython(self, t, indent=''):
+        type = t['type']
+        name = t['name']
+        body = self.re_tagged_type.sub(r'\2', t['body'].strip())
+        if type == 'enum':
+            items = [item for item in self.re_enum_delim.split(body) if item]
+        else:
+            items = [item for item in self.re_struct_delim.split(body) if item]
+
+        cython_body = f'\n{indent}    '.join([''] + items)
+        return f'{indent}{type} {name}:{cython_body}'
+
+    def _func_def_to_cython(self, d, indent=''):
+        return_type = d['return_type'].strip()
+        if d['const']:
+            return_type = 'const ' + return_type
+        name = d['name']
+        args = re.sub(r'\s+', ' ', d['args'].strip())
+        args = self.re_tagged_type.sub(r'\2', args)
+
+        # Cython doesn't do (void)
+        if args == 'void':
+            args = ''
+
+        return f'{indent}{return_type} {name}({args})'
+
+    def _pxd_header(self):
+        return """
+        # Licensed to the Apache Software Foundation (ASF) under one
+        # or more contributor license agreements.  See the NOTICE file
+        # distributed with this work for additional information
+        # regarding copyright ownership.  The ASF licenses this file
+        # to you under the Apache License, Version 2.0 (the
+        # "License"); you may not use this file except in compliance
+        # with the License.  You may obtain a copy of the License at
+        #
+        #   http://www.apache.org/licenses/LICENSE-2.0
+        #
+        # Unless required by applicable law or agreed to in writing,
+        # software distributed under the License is distributed on an
+        # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+        # KIND, either express or implied.  See the License for the
+        # specific language governing permissions and limitations
+        # under the License.
+
+        # cython: language_level = 3
+
+        from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t
+        """
+
+# Runs cmake -DNANOARROW_BUNDLE=ON if cmake exists or copies nanoarrow.c/h
+# from ../dist if it does not. Running cmake is safer because it will sync
+# any changes from nanoarrow C library sources in the checkout but is not
+# strictly necessary for things like installing from GitHub.
+def copy_or_generate_nanoarrow_c():
+    this_wd = os.getcwd()
+    this_dir = os.path.abspath(os.path.dirname(__file__))
+    source_dir = os.path.dirname(this_dir)
+
+    maybe_nanoarrow_h = os.path.join(this_dir, 'nanoarrow/nanoarrow.h')
+    maybe_nanoarrow_c = os.path.join(this_dir, 'nanoarrow/nanoarrow.c')
+    for f in (maybe_nanoarrow_c, maybe_nanoarrow_h):
+        if os.path.exists(f):
+            os.unlink(f)
+
+    is_cmake_dir = 'CMakeLists.txt' in os.listdir(source_dir)
+    is_in_nanoarrow_repo = 'nanoarrow.h' in os.listdir(os.path.join(source_dir, 'src', 'nanoarrow'))
+    has_cmake = os.system('cmake --version') == 0
+    build_dir = os.path.join(this_dir, '_cmake')
+
+    if has_cmake and is_cmake_dir and is_in_nanoarrow_repo:
+        try:
+            os.mkdir(build_dir)
+            os.chdir(build_dir)
+            os.system(f'cmake ../.. -DNANOARROW_BUNDLE=ON -DNANOARROW_NAMESPACE=PythonPkg')
+            os.system(f'cmake --install . --prefix=../nanoarrow')
+        finally:
+            if os.path.exists(build_dir):
+                # Can fail on Windows with permission issues
+                try:
+                    shutil.rmtree(build_dir)
+                except Exception as e:
+                    print(f'Failed to remove _cmake temp directory: {str(e)}')
+            os.chdir(this_wd)
+
+    elif is_in_nanoarrow_repo:
+        shutil.copyfile()
+    else:
+        raise ValueError('Attempt to build source distribution outside the nanoarrow repo')
+
+    if not os.path.exists(os.path.join(this_dir, 'nanoarrow/nanoarrow.h')):
+        raise ValueError('Attempt to vendor nanoarrow.c/h failed')
+
+    maybe_nanoarrow_hpp = os.path.join(this_dir, 'nanoarrow/nanoarrow.hpp')
+    if os.path.exists(maybe_nanoarrow_hpp):
+        os.unlink(maybe_nanoarrow_hpp)
+
+# Runs the pxd generator with some information about the file name
+def generate_nanoarrow_pxd():
+     this_dir = os.path.abspath(os.path.dirname(__file__))
+     maybe_nanoarrow_h = os.path.join(this_dir, 'nanoarrow/nanoarrow.h')
+     maybe_nanoarrow_pxd = os.path.join(this_dir, 'nanoarrow/nanoarrow_c.pxd')
+
+     NanoarrowPxdGenerator().generate_nanoarrow_pxd(
+        maybe_nanoarrow_h,
+        maybe_nanoarrow_pxd
+    )
+
+if __name__ == '__main__':
+    copy_or_generate_nanoarrow_c()
+    generate_nanoarrow_pxd()
diff --git a/python/src/nanoarrow/__init__.py b/python/nanoarrow/__init__.py
similarity index 87%
rename from python/src/nanoarrow/__init__.py
rename to python/nanoarrow/__init__.py
index 1586e60..bb43726 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/nanoarrow/__init__.py
@@ -15,6 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from ._lib import (  # noqa: F401
-    as_numpy_array,
-)
+from ._lib import c_version, Schema, Array, ArrayView, ArrayStream
+from .lib import schema, array, array_stream
diff --git a/python/nanoarrow/_lib.pyx b/python/nanoarrow/_lib.pyx
new file mode 100644
index 0000000..b5210e3
--- /dev/null
+++ b/python/nanoarrow/_lib.pyx
@@ -0,0 +1,903 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+# cython: linetrace=True
+
+"""Low-level nanoarrow Python bindings
+
+This Cython extension provides low-level Python wrappers around the
+Arrow C Data and Arrow C Stream interface structs. In general, there
+is one wrapper per C struct and pointer validity is managed by keeping
+strong references to Python objects. These wrappers are intended to
+be literal and stay close to the structure definitions.
+"""
+
+from libc.stdint cimport uintptr_t, int64_t
+from cpython.mem cimport PyMem_Malloc, PyMem_Free
+from cpython.bytes cimport PyBytes_FromStringAndSize
+from cpython cimport Py_buffer
+from nanoarrow_c cimport *
+
+def c_version():
+    """Return the nanoarrow C library version string
+    """
+    return ArrowNanoarrowVersion().decode("UTF-8")
+
+
+cdef class SchemaHolder:
+    """Memory holder for an ArrowSchema
+
+    This class is responsible for the lifecycle of the ArrowSchema
+    whose memory it is responsible for. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
+    cdef ArrowSchema c_schema
+
+    def __cinit__(self):
+        self.c_schema.release = NULL
+
+    def __dealloc__(self):
+        if self.c_schema.release != NULL:
+          self.c_schema.release(&self.c_schema)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_schema
+
+
+cdef class ArrayHolder:
+    """Memory holder for an ArrowArray
+
+    This class is responsible for the lifecycle of the ArrowArray
+    whose memory it is responsible. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
+    cdef ArrowArray c_array
+
+    def __cinit__(self):
+        self.c_array.release = NULL
+
+    def __dealloc__(self):
+        if self.c_array.release != NULL:
+          self.c_array.release(&self.c_array)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array
+
+cdef class ArrayStreamHolder:
+    """Memory holder for an ArrowArrayStream
+
+    This class is responsible for the lifecycle of the ArrowArrayStream
+    whose memory it is responsible. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
+    cdef ArrowArrayStream c_array_stream
+
+    def __cinit__(self):
+        self.c_array_stream.release = NULL
+
+    def __dealloc__(self):
+        if self.c_array_stream.release != NULL:
+          self.c_array_stream.release(&self.c_array_stream)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array_stream
+
+
+cdef class ArrayViewHolder:
+    """Memory holder for an ArrowArrayView
+
+    This class is responsible for the lifecycle of the ArrowArrayView
+    whose memory it is responsible. When this object is deleted,
+    ArrowArrayViewReset() is called on the contents.
+    """
+    cdef ArrowArrayView c_array_view
+
+    def __cinit__(self):
+        ArrowArrayViewInitFromType(&self.c_array_view, NANOARROW_TYPE_UNINITIALIZED)
+
+    def __dealloc__(self):
+        ArrowArrayViewReset(&self.c_array_view)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array_view
+
+
+class NanoarrowException(RuntimeError):
+    """An error resulting from a call to the nanoarrow C library
+
+    Calls to the nanoarrow C library and/or the Arrow C Stream interface
+    callbacks return an errno error code and sometimes a message with extra
+    detail. This exception wraps a RuntimeError to format a suitable message
+    and store the components of the original error.
+    """
+
+    def __init__(self, what, code, message=""):
+        self.what = what
+        self.code = code
+        self.message = message
+
+        if self.message == "":
+            super().__init__(f"{self.what} failed ({self.code})")
+        else:
+            super().__init__(f"{self.what} failed ({self.code}): {self.message}")
+
+
+cdef class Error:
+    """Memory holder for an ArrowError
+
+    ArrowError is the C struct that is optionally passed to nanoarrow functions
+    when a detailed error message might be returned. This class holds a C
+    reference to the object and provides helpers for raising exceptions based
+    on the contained message.
+    """
+    cdef ArrowError c_error
+
+    def __cinit__(self):
+        self.c_error.message[0] = 0
+
+    def raise_message(self, what, code):
+        """Raise a NanoarrowException from this message
+        """
+        raise NanoarrowException(what, code, self.c_error.message.decode("UTF-8"))
+
+    @staticmethod
+    def raise_error(what, code):
+        """Raise a NanoarrowException without a message
+        """
+        raise NanoarrowException(what, code, "")
+
+
+cdef class Schema:
+    """ArrowSchema wrapper
+
+    This class provides a user-facing interface to access the fields of
+    an ArrowSchema as defined in the Arrow C Data interface. These objects
+    are usually created using `nanoarrow.schema()`. This Python wrapper
+    allows access to schema fields but does not automatically deserialize
+    their content: use `.view()` to validate and deserialize the content
+    into a more easily inspectable object.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import nanoarrow as na
+    >>> schema = na.schema(pa.int32())
+    >>> schema.is_valid()
+    True
+    >>> schema.format
+    'i'
+    >>> schema.name
+    ''
+    >>> schema_view = schema.view()
+    >>> schema_view.type
+    'int32'
+    """
+    cdef object _base
+    cdef ArrowSchema* _ptr
+
+    @staticmethod
+    def allocate():
+        base = SchemaHolder()
+        return Schema(base, base._addr())
+
+    def __cinit__(self, object base, uintptr_t addr):
+        self._base = base,
+        self._ptr = <ArrowSchema*>addr
+
+    def _addr(self):
+        return <uintptr_t>self._ptr
+
+    def is_valid(self):
+        return self._ptr != NULL and self._ptr.release != NULL
+
+    def _assert_valid(self):
+        if self._ptr == NULL:
+            raise RuntimeError("schema is NULL")
+        if self._ptr.release == NULL:
+            raise RuntimeError("schema is released")
+
+    def __repr__(self):
+        cdef int64_t n_chars = ArrowSchemaToString(self._ptr, NULL, 0, True)
+        cdef char* out = <char*>PyMem_Malloc(n_chars + 1)
+        if not out:
+            raise MemoryError()
+
+        ArrowSchemaToString(self._ptr, out, n_chars + 1, True)
+        out_str = out.decode("UTF-8")
+        PyMem_Free(out)
+
+        return out_str
+
+    @property
+    def format(self):
+        self._assert_valid()
+        if self._ptr.format != NULL:
+            return self._ptr.format.decode("UTF-8")
+
+    @property
+    def name(self):
+        self._assert_valid()
+        if self._ptr.name != NULL:
+            return self._ptr.name.decode("UTF-8")
+        else:
+            return None
+
+    @property
+    def flags(self):
+        return self._ptr.flags
+
+    @property
+    def metadata(self):
+        self._assert_valid()
+        if self._ptr.metadata != NULL:
+            return SchemaMetadata(self, <uintptr_t>self._ptr.metadata)
+        else:
+            return None
+
+    @property
+    def children(self):
+        self._assert_valid()
+        return SchemaChildren(self)
+
+    @property
+    def dictionary(self):
+        self._assert_valid()
+        if self._ptr.dictionary != NULL:
+            return Schema(self, <uintptr_t>self._ptr.dictionary)
+        else:
+            return None
+
+    def view(self):
+        self._assert_valid()
+        schema_view = SchemaView()
+        cdef Error error = Error()
+        cdef int result = ArrowSchemaViewInit(&schema_view._schema_view, self._ptr, &error.c_error)
+        if result != NANOARROW_OK:
+            error.raise_message("ArrowSchemaViewInit()", result)
+
+        return schema_view
+
+
+cdef class SchemaView:
+    """ArrowSchemaView wrapper
+
+    The ArrowSchemaView is a nanoarrow C library structure that facilitates
+    access to the deserialized content of an ArrowSchema (e.g., parameter
+    values for parameterized types). This wrapper extends that facility to Python.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import nanoarrow as na
+    >>> schema = na.schema(pa.decimal128(10, 3))
+    >>> schema_view = schema.view()
+    >>> schema_view.type
+    'decimal128'
+    >>> schema_view.decimal_bitwidth
+    128
+    >>> schema_view.decimal_precision
+    10
+    >>> schema_view.decimal_scale
+    3
+    """
+    cdef ArrowSchemaView _schema_view
+
+    _fixed_size_types = (
+        NANOARROW_TYPE_FIXED_SIZE_LIST,
+        NANOARROW_TYPE_FIXED_SIZE_BINARY
+    )
+
+    _decimal_types = (
+        NANOARROW_TYPE_DECIMAL128,
+        NANOARROW_TYPE_DECIMAL256
+    )
+
+    _time_unit_types = (
+        NANOARROW_TYPE_TIME32,
+        NANOARROW_TYPE_TIME64,
+        NANOARROW_TYPE_DURATION,
+        NANOARROW_TYPE_TIMESTAMP
+    )
+
+    _union_types = (
+        NANOARROW_TYPE_DENSE_UNION,
+        NANOARROW_TYPE_SPARSE_UNION
+    )
+
+    def __cinit__(self):
+        self._schema_view.type = NANOARROW_TYPE_UNINITIALIZED
+        self._schema_view.storage_type = NANOARROW_TYPE_UNINITIALIZED
+
+    @property
+    def type(self):
+        cdef const char* type_str = ArrowTypeString(self._schema_view.type)
+        if type_str != NULL:
+            return type_str.decode('UTF-8')
+
+    @property
+    def storage_type(self):
+        cdef const char* type_str = ArrowTypeString(self._schema_view.storage_type)
+        if type_str != NULL:
+            return type_str.decode('UTF-8')
+
+    @property
+    def fixed_size(self):
+        if self._schema_view.type in SchemaView._fixed_size_types:
+            return self._schema_view.fixed_size
+
+    @property
+    def decimal_bitwidth(self):
+        if self._schema_view.type in SchemaView._decimal_types:
+            return self._schema_view.decimal_bitwidth
+
+    @property
+    def decimal_precision(self):
+        if self._schema_view.type in SchemaView._decimal_types:
+            return self._schema_view.decimal_precision
+
+    @property
+    def decimal_scale(self):
+        if self._schema_view.type in SchemaView._decimal_types:
+            return self._schema_view.decimal_scale
+
+    @property
+    def time_unit(self):
+        if self._schema_view.type in SchemaView._time_unit_types:
+            return ArrowTimeUnitString(self._schema_view.time_unit).decode('UTF-8')
+
+    @property
+    def timezone(self):
+        if self._schema_view.type == NANOARROW_TYPE_TIMESTAMP:
+            return self._schema_view.timezone.decode('UTF_8')
+
+    @property
+    def union_type_ids(self):
+        if self._schema_view.type in SchemaView._union_types:
+            type_ids_str = self._schema_view.union_type_ids.decode('UTF-8').split(',')
+            return (int(type_id) for type_id in type_ids_str)
+
+    @property
+    def extension_name(self):
+        if self._schema_view.extension_name.data != NULL:
+            name_bytes = PyBytes_FromStringAndSize(
+                self._schema_view.extension_name.data,
+                self._schema_view.extension_name.size_bytes
+            )
+            return name_bytes.decode('UTF-8')
+
+    @property
+    def extension_metadata(self):
+        if self._schema_view.extension_name.data != NULL:
+            return PyBytes_FromStringAndSize(
+                self._schema_view.extension_metadata.data,
+                self._schema_view.extension_metadata.size_bytes
+            )
+
+cdef class Array:
+    """ArrowArray wrapper
+
+    This class provides a user-facing interface to access the fields of
+    an ArrowArray as defined in the Arrow C Data interface, holding an
+    optional reference to a Schema that can be used to safely deserialize
+    the content. These objects are usually created using `nanoarrow.array()`.
+    This Python wrapper allows access to array fields but does not
+    automatically deserialize their content: use `.view()` to validate and
+    deserialize the content into a more easily inspectable object.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import numpy as np
+    >>> import nanoarrow as na
+    >>> array = na.array(pa.array(["one", "two", "three", None]))
+    >>> array.length
+    4
+    >>> array.null_count
+    1
+    >>> array_view = array.view()
+    """
+    cdef object _base
+    cdef ArrowArray* _ptr
+    cdef Schema _schema
+
+    @staticmethod
+    def allocate(Schema schema):
+        base = ArrayHolder()
+        return Array(base, base._addr(), schema)
+
+    def __cinit__(self, object base, uintptr_t addr, Schema schema):
+        self._base = base,
+        self._ptr = <ArrowArray*>addr
+        self._schema = schema
+
+    def _addr(self):
+        return <uintptr_t>self._ptr
+
+    def is_valid(self):
+        return self._ptr != NULL and self._ptr.release != NULL
+
+    def _assert_valid(self):
+        if self._ptr == NULL:
+            raise RuntimeError("Array is NULL")
+        if self._ptr.release == NULL:
+            raise RuntimeError("Array is released")
+
+    @property
+    def schema(self):
+        return self._schema
+
+    @property
+    def length(self):
+        self._assert_valid()
+        return self._ptr.length
+
+    @property
+    def offset(self):
+        self._assert_valid()
+        return self._ptr.offset
+
+    @property
+    def null_count(self):
+        return self._ptr.null_count
+
+    @property
+    def buffers(self):
+        return tuple(<uintptr_t>self._ptr.buffers[i] for i in range(self._ptr.n_buffers))
+
+    @property
+    def children(self):
+        return ArrayChildren(self)
+
+    @property
+    def dictionary(self):
+        self._assert_valid()
+        if self._ptr.dictionary != NULL:
+            return Array(self, <uintptr_t>self._ptr.dictionary, self._schema.dictionary)
+        else:
+            return None
+
+    def view(self):
+        cdef ArrayViewHolder holder = ArrayViewHolder()
+
+        cdef Error error = Error()
+        cdef int result = ArrowArrayViewInitFromSchema(&holder.c_array_view,
+                                                       self._schema._ptr, &error.c_error)
+        if result != NANOARROW_OK:
+            error.raise_message("ArrowArrayViewInitFromSchema()", result)
+
+        result = ArrowArrayViewSetArray(&holder.c_array_view, self._ptr, &error.c_error)
+        if result != NANOARROW_OK:
+            error.raise_message("ArrowArrayViewSetArray()", result)
+
+        return ArrayView(holder, holder._addr(), self._schema, self)
+
+
+cdef class ArrayView:
+    """ArrowArrayView wrapper
+
+    The ArrowArrayView is a nanoarrow C library structure that provides
+    structured access to buffers addresses, buffer sizes, and buffer
+    data types. The buffer data is usually propagated from an ArrowArray
+    but can also be propagated from other types of objects (e.g., serialized
+    IPC). The offset and length of this view are independent of its parent
+    (i.e., this object can also represent a slice of its parent).
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import numpy as np
+    >>> import nanoarrow as na
+    >>> array_view = na.array(pa.array(["one", "two", "three", None])).view()
+    >>> np.array(array_view.buffers[1])
+    array([ 0,  3,  6, 11, 11], dtype=int32)
+    >>> np.array(array_view.buffers[2])
+    array([b'o', b'n', b'e', b't', b'w', b'o', b't', b'h', b'r', b'e', b'e'],
+          dtype='|S1')
+    """
+    cdef object _base
+    cdef ArrowArrayView* _ptr
+    cdef Schema _schema
+    cdef object _base_buffer
+
+    def __cinit__(self, object base, uintptr_t addr, Schema schema, object base_buffer):
+        self._base = base
+        self._ptr = <ArrowArrayView*>addr
+        self._schema = schema
+        self._base_buffer = base_buffer
+
+    @property
+    def length(self):
+        return self._ptr.length
+
+    @property
+    def offset(self):
+        return self._ptr.offset
+
+    @property
+    def null_count(self):
+        return self._ptr.null_count
+
+    @property
+    def children(self):
+        return ArrayViewChildren(self)
+
+    @property
+    def buffers(self):
+        return ArrayViewBuffers(self)
+
+    @property
+    def dictionary(self):
+        if self._ptr.dictionary == NULL:
+            return None
+        else:
+            return ArrayView(
+                self,
+                <uintptr_t>self._ptr.dictionary,
+                self._schema.dictionary,
+                None
+            )
+
+    @property
+    def schema(self):
+        return self._schema
+
+
+cdef class SchemaChildren:
+    """Wrapper for a lazily-resolved list of Schema children
+    """
+    cdef Schema _parent
+    cdef int64_t _length
+
+    def __cinit__(self, Schema parent):
+        self._parent = parent
+        self._length = parent._ptr.n_children
+
+    def __len__(self):
+        return self._length
+
+    def __getitem__(self, k):
+        k = int(k)
+        if k < 0 or k >= self._length:
+            raise IndexError(f"{k} out of range [0, {self._length})")
+
+        return Schema(self._parent, self._child_addr(k))
+
+    cdef _child_addr(self, int64_t i):
+        cdef ArrowSchema** children = self._parent._ptr.children
+        cdef ArrowSchema* child = children[i]
+        return <uintptr_t>child
+
+
+cdef class SchemaMetadata:
+    """Wrapper for a lazily-parsed Schema.metadata string
+    """
+
+    cdef object _parent
+    cdef const char* _metadata
+    cdef ArrowMetadataReader _reader
+
+    def __cinit__(self, object parent, uintptr_t ptr):
+        self._parent = parent
+        self._metadata = <const char*>ptr
+
+    def _init_reader(self):
+        cdef int result = ArrowMetadataReaderInit(&self._reader, self._metadata)
+        if result != NANOARROW_OK:
+            Error.raise_error("ArrowMetadataReaderInit()", result)
+
+    def __len__(self):
+        self._init_reader()
+        return self._reader.remaining_keys
+
+    def __iter__(self):
+        cdef ArrowStringView key
+        cdef ArrowStringView value
+        self._init_reader()
+        while self._reader.remaining_keys > 0:
+            ArrowMetadataReaderRead(&self._reader, &key, &value)
+            key_obj = PyBytes_FromStringAndSize(key.data, key.size_bytes).decode('UTF-8')
+            value_obj = PyBytes_FromStringAndSize(value.data, value.size_bytes)
+            yield key_obj, value_obj
+
+
+cdef class ArrayChildren:
+    """Wrapper for a lazily-resolved list of Array children
+    """
+    cdef Array _parent
+    cdef int64_t _length
+
+    def __cinit__(self, Array parent):
+        self._parent = parent
+        self._length = parent._ptr.n_children
+
+    def __len__(self):
+        return self._length
+
+    def __getitem__(self, k):
+        k = int(k)
+        if k < 0 or k >= self._length:
+            raise IndexError(f"{k} out of range [0, {self._length})")
+        return Array(self._parent, self._child_addr(k), self._parent.schema.children[k])
+
+    cdef _child_addr(self, int64_t i):
+        cdef ArrowArray** children = self._parent._ptr.children
+        cdef ArrowArray* child = children[i]
+        return <uintptr_t>child
+
+
+cdef class ArrayViewChildren:
+    """Wrapper for a lazily-resolved list of ArrayView children
+    """
+    cdef ArrayView _parent
+    cdef int64_t _length
+
+    def __cinit__(self, ArrayView parent):
+        self._parent = parent
+        self._length = parent._ptr.n_children
+
+    def __len__(self):
+        return self._length
+
+    def __getitem__(self, k):
+        k = int(k)
+        if k < 0 or k >= self._length:
+            raise IndexError(f"{k} out of range [0, {self._length})")
+        return ArrayView(
+            self._parent,
+            self._child_addr(k),
+            self._parent._schema.children[k],
+            None
+        )
+
+    cdef _child_addr(self, int64_t i):
+        cdef ArrowArrayView** children = self._parent._ptr.children
+        cdef ArrowArrayView* child = children[i]
+        return <uintptr_t>child
+
+
+cdef class BufferView:
+    """Wrapper for Array buffer content
+
+    This object is a Python wrapper around a buffer held by an Array.
+    It implements the Python buffer protocol and is best accessed through
+    another implementor (e.g., `np.array(array_view.buffers[1])`)). Note that
+    this buffer content does not apply any parent offset.
+    """
+    cdef object _base
+    cdef ArrowBufferView* _ptr
+    cdef ArrowBufferType _buffer_type
+    cdef ArrowType _buffer_data_type
+    cdef Py_ssize_t _element_size_bits
+    cdef Py_ssize_t _shape
+    cdef Py_ssize_t _strides
+
+    def __cinit__(self, object base, uintptr_t addr,
+                 ArrowBufferType buffer_type, ArrowType buffer_data_type,
+                 Py_ssize_t element_size_bits):
+        self._base = base
+        self._ptr = <ArrowBufferView*>addr
+        self._buffer_type = buffer_type
+        self._buffer_data_type = buffer_data_type
+        self._element_size_bits = element_size_bits
+        self._strides = self._item_size()
+        self._shape = self._ptr.size_bytes // self._strides
+
+
+    cdef Py_ssize_t _item_size(self):
+        if self._buffer_data_type == NANOARROW_TYPE_BOOL:
+            return 1
+        elif self._buffer_data_type == NANOARROW_TYPE_STRING:
+            return 1
+        elif self._buffer_data_type == NANOARROW_TYPE_BINARY:
+            return 1
+        else:
+            return self._element_size_bits // 8
+
+    cdef const char* _get_format(self):
+        if self._buffer_data_type == NANOARROW_TYPE_INT8:
+            return "b"
+        elif self._buffer_data_type == NANOARROW_TYPE_UINT8:
+            return "B"
+        elif self._buffer_data_type == NANOARROW_TYPE_INT16:
+            return "h"
+        elif self._buffer_data_type == NANOARROW_TYPE_UINT16:
+            return "H"
+        elif self._buffer_data_type == NANOARROW_TYPE_INT32:
+            return "i"
+        elif self._buffer_data_type == NANOARROW_TYPE_UINT32:
+            return "I"
+        elif self._buffer_data_type == NANOARROW_TYPE_INT64:
+            return "l"
+        elif self._buffer_data_type == NANOARROW_TYPE_UINT64:
+            return "L"
+        elif self._buffer_data_type == NANOARROW_TYPE_FLOAT:
+            return "f"
+        elif self._buffer_data_type == NANOARROW_TYPE_DOUBLE:
+            return "d"
+        elif self._buffer_data_type == NANOARROW_TYPE_STRING:
+            return "c"
+        else:
+            return "B"
+
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        buffer.buf = <void*>self._ptr.data.data
+        buffer.format = self._get_format()
+        buffer.internal = NULL
+        buffer.itemsize = self._strides
+        buffer.len = self._ptr.size_bytes
+        buffer.ndim = 1
+        buffer.obj = self
+        buffer.readonly = 1
+        buffer.shape = &self._shape
+        buffer.strides = &self._strides
+        buffer.suboffsets = NULL
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
+
+cdef class ArrayViewBuffers:
+    """A lazily-resolved list of ArrayView buffers
+    """
+    cdef ArrayView _array_view
+    cdef int64_t _length
+
+    def __cinit__(self, ArrayView array_view):
+        self._array_view = array_view
+        self._length = 3
+        for i in range(3):
+            if self._array_view._ptr.layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE:
+                self._length = i
+                break
+
+    def __len__(self):
+        return self._length
+
+    def __getitem__(self, k):
+        k = int(k)
+        if k < 0 or k >= self._length:
+            raise IndexError(f"{k} out of range [0, {self._length})")
+        cdef ArrowBufferView* buffer_view = &(self._array_view._ptr.buffer_views[k])
+        if buffer_view.data.data == NULL:
+            return None
+
+        return BufferView(
+            self._array_view,
+            <uintptr_t>buffer_view,
+            self._array_view._ptr.layout.buffer_type[k],
+            self._array_view._ptr.layout.buffer_data_type[k],
+            self._array_view._ptr.layout.element_size_bits[k]
+        )
+
+
+cdef class ArrayStream:
+    """ArrowArrayStream wrapper
+
+    This class provides a user-facing interface to access the fields of
+    an ArrowArrayStream as defined in the Arrow C Stream interface.
+    These objects are usually created using `nanoarrow.array_stream()`.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import nanoarrow as na
+    >>> pa_column = pa.array([1, 2, 3], pa.int32())
+    >>> pa_batch = pa.record_batch([pa_column], names=["col1"])
+    >>> pa_reader = pa.RecordBatchReader.from_batches(pa_batch.schema, [pa_batch])
+    >>> array_stream = na.array_stream(pa_reader)
+    >>> array_stream.get_schema()
+    struct<col1: int32>
+    >>> array_stream.get_next().length
+    3
+    >>> array_stream.get_next() is None
+    Traceback (most recent call last):
+      ...
+    StopIteration
+    """
+    cdef object _base
+    cdef ArrowArrayStream* _ptr
+    cdef object _cached_schema
+
+    def __cinit__(self, object base, uintptr_t addr):
+        self._base = base
+        self._ptr = <ArrowArrayStream*>addr
+        self._cached_schema = None
+
+    def _addr(self):
+        return <uintptr_t>self._ptr
+
+    def is_valid(self):
+        return self._ptr != NULL and self._ptr.release != NULL
+
+    def _assert_valid(self):
+        if self._ptr == NULL:
+            raise RuntimeError("array stream pointer is NULL")
+        if self._ptr.release == NULL:
+            raise RuntimeError("array stream is released")
+
+    def _get_schema(self, Schema schema):
+        self._assert_valid()
+        cdef int code = self._ptr.get_schema(self._ptr, schema._ptr)
+        cdef const char* message = NULL
+        if code != NANOARROW_OK:
+            message = self._ptr.get_last_error(self._ptr)
+            if message != NULL:
+                raise NanoarrowException(
+                    "ArrowArrayStream::get_schema()",
+                    code,
+                    message.decode("UTF-8")
+                )
+            else:
+                raise NanoarrowException("ArrowArrayStream::get_schema()", code)
+
+        self._cached_schema = schema
+
+    def get_schema(self):
+        """Get the schema associated with this stream
+        """
+        out = Schema.allocate()
+        self._get_schema(out)
+        return out
+
+    def get_next(self):
+        """Get the next Array from this stream
+
+        Returns None when there are no more arrays in this stream.
+        """
+        self._assert_valid()
+
+        # We return a reference to the same Python object for each
+        # Array that is returned. This is independent of get_schema(),
+        # which is guaranteed to call the C object's callback and
+        # faithfully pass on the returned value.
+        if self._cached_schema is None:
+            self._cached_schema = Schema.allocate()
+            self._get_schema(self._cached_schema)
+
+        cdef Array array = Array.allocate(self._cached_schema)
+        cdef int code = self._ptr.get_next(self._ptr, array._ptr)
+        cdef const char* message = NULL
+        if code != NANOARROW_OK:
+            message = self._ptr.get_last_error(self._ptr)
+            if message != NULL:
+                raise NanoarrowException(
+                    "ArrowArrayStream::get_next()",
+                    code,
+                    message.decode("UTF-8")
+                )
+            else:
+                raise NanoarrowException("ArrowArrayStream::get_next()", code)
+
+        if not array.is_valid():
+            raise StopIteration()
+        else:
+            return array
+
+    def __iter__(self):
+        while True:
+            yield self.get_next()
+
+    @staticmethod
+    def allocate():
+        base = ArrayStreamHolder()
+        return ArrayStream(base, base._addr())
diff --git a/python/nanoarrow/lib.py b/python/nanoarrow/lib.py
new file mode 100644
index 0000000..a3c27e7
--- /dev/null
+++ b/python/nanoarrow/lib.py
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from ._lib import Schema, Array, ArrayStream
+
+
+def schema(obj):
+    if isinstance(obj, Schema):
+        return obj
+
+    # Not particularly safe because _export_to_c() could be exporting an
+    # array, schema, or array_stream. The ideal
+    # solution here would be something like __arrow_c_schema__()
+    if hasattr(obj, "_export_to_c"):
+        out = Schema.allocate()
+        obj._export_to_c(out._addr())
+        return out
+    else:
+        raise TypeError(
+            f"Can't convert object of type {type(obj).__name__} to nanoarrow.Schema"
+        )
+
+
+def array(obj):
+    if isinstance(obj, Array):
+        return obj
+
+    # Somewhat safe because calling _export_to_c() with two arguments will
+    # not fail with a crash (but will fail with a confusing error). The ideal
+    # solution here would be something like __arrow_c_array__()
+    if hasattr(obj, "_export_to_c"):
+        out = Array.allocate(Schema.allocate())
+        obj._export_to_c(out._addr(), out.schema._addr())
+        return out
+    else:
+        raise TypeError(
+            f"Can't convert object of type {type(obj).__name__} to nanoarrow.Array"
+        )
+
+
+def array_stream(obj):
+    if isinstance(obj, Schema):
+        return obj
+
+    # Not particularly safe because _export_to_c() could be exporting an
+    # array, schema, or array_stream. The ideal
+    # solution here would be something like __arrow_c_array_stream__()
+    if hasattr(obj, "_export_to_c"):
+        out = ArrayStream.allocate()
+        obj._export_to_c(out._addr())
+        return out
+    else:
+        raise TypeError(
+            f"Can't convert object of type {type(obj).__name__} to nanoarrow.ArrowArrayStream"
+        )
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 1cc2c17..743cebe 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -19,14 +19,13 @@
 [project]
 name = "nanoarrow"
 version = "1.0.0-alpha0"
-description = ""
+description = "Python bindings to the nanoarrow C library"
 authors = [{name = "Apache Arrow Developers", email = "dev@arrow.apache.org"}]
 license = {text = "Apache-2.0"}
 requires-python = ">=3.8"
-dependencies = ["numpy"]
 
 [project.optional-dependencies]
-test = ["pyarrow", "pytest"]
+test = ["pyarrow", "pytest", "numpy"]
 
 [project.urls]
 homepage = "https://arrow.apache.org"
@@ -36,7 +35,6 @@ repository = "https://github.com/apache/arrow-nanoarrow"
 requires = [
     "setuptools >= 61.0.0",
     "setuptools-scm",
-    "Cython",
-    "oldest-supported-numpy",
+    "Cython"
 ]
 build-backend = "setuptools.build_meta"
diff --git a/python/setup.py b/python/setup.py
index f6f7efb..4222cd8 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -17,33 +17,43 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import shutil
-from pathlib import Path
-
+import os
+import sys
+import subprocess
 from setuptools import Extension, setup
 
-import numpy as np
+# Run bootstrap.py to run cmake generating a fresh bundle based on this
+# checkout or copy from ../dist if the caller doesn't have cmake available.
+# Note that bootstrap.py won't exist if building from sdist.
+this_dir = os.path.dirname(__file__)
+bootstrap_py = os.path.join(this_dir, "bootstrap.py")
+if os.path.exists(bootstrap_py):
+    subprocess.run([sys.executable, bootstrap_py])
 
 
-# setuptools gets confused by relative paths that extend above the project root
-target = Path(__file__).parent / "src" / "nanoarrow"
-shutil.copy(
-    Path(__file__).parent / "../dist/nanoarrow.c", target / "nanoarrow.c"
-)
-shutil.copy(
-    Path(__file__).parent / "../dist/nanoarrow.h", target / "nanoarrow.h"
-)
+# Set some extra flags for compiling with coverage support
+if os.getenv("NANOARROW_PYTHON_COVERAGE") == "1":
+    coverage_compile_args = ["--coverage"]
+    coverage_link_args = ["--coverage"]
+    coverage_define_macros = [("CYTHON_TRACE", 1)]
+else:
+    coverage_compile_args = []
+    coverage_link_args = []
+    coverage_define_macros = []
 
 setup(
     ext_modules=[
         Extension(
             name="nanoarrow._lib",
-            include_dirs=[np.get_include(), "src/nanoarrow"],
-            language="c++",
+            include_dirs=["nanoarrow"],
+            language="c",
             sources=[
-                "src/nanoarrow/_lib.pyx",
-                "src/nanoarrow/nanoarrow.c",
+                "nanoarrow/_lib.pyx",
+                "nanoarrow/nanoarrow.c",
             ],
+            extra_compile_args=coverage_compile_args,
+            extra_link_args=coverage_link_args,
+            define_macros=coverage_define_macros,
         )
     ]
 )
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
deleted file mode 100644
index a6b4da1..0000000
--- a/python/src/nanoarrow/_lib.pyx
+++ /dev/null
@@ -1,86 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: language_level = 3
-
-"""Low-level nanoarrow Python bindings."""
-
-from libc.stdint cimport uint8_t, uintptr_t
-
-from nanoarrow_c cimport *
-
-import numpy as np
-cimport numpy as cnp
-
-cnp.import_array()
-
-
-cdef dict _numpy_type_map = {
-    NANOARROW_TYPE_UINT8: cnp.NPY_UINT8,
-    NANOARROW_TYPE_INT8: cnp.NPY_INT8,
-    NANOARROW_TYPE_UINT16: cnp.NPY_UINT16,
-    NANOARROW_TYPE_INT16: cnp.NPY_INT16,
-    NANOARROW_TYPE_UINT32: cnp.NPY_UINT32,
-    NANOARROW_TYPE_INT32: cnp.NPY_INT32,
-    NANOARROW_TYPE_UINT64: cnp.NPY_UINT64,
-    NANOARROW_TYPE_INT64: cnp.NPY_INT64,
-    NANOARROW_TYPE_HALF_FLOAT: cnp.NPY_FLOAT16,
-    NANOARROW_TYPE_FLOAT: cnp.NPY_FLOAT32,
-    NANOARROW_TYPE_DOUBLE: cnp.NPY_FLOAT64,
-}
-
-
-def as_numpy_array(arr):
-    cdef ArrowSchema schema
-    cdef ArrowArray array
-    cdef ArrowArrayView array_view
-    cdef ArrowError error
-
-    arr._export_to_c(<uintptr_t> &array, <uintptr_t> &schema)
-    ArrowArrayViewInitFromSchema(&array_view, &schema, &error)
-
-    # primitive arrays have DATA as the second buffer
-    if array_view.layout.buffer_type[1] != NANOARROW_BUFFER_TYPE_DATA:
-        raise TypeError("Cannot convert a non-primitive array")
-
-    # disallow nulls for this method
-    if array.null_count > 0:
-        raise ValueError("Cannot convert array with nulls")
-    elif array.null_count < 0:
-        # not yet computed
-        if array_view.layout.buffer_type[0] == NANOARROW_BUFFER_TYPE_VALIDITY:
-            if array.buffers[0] != NULL:
-                null_count = ArrowBitCountSet(
-                    <const uint8_t *>array.buffers[0], array.offset, array.length
-                )
-                if null_count > 0:
-                    raise ValueError("Cannot convert array with nulls")
-
-    cdef int type_num
-    if array_view.storage_type in _numpy_type_map:
-        type_num = _numpy_type_map[array_view.storage_type]
-    else:
-        raise NotImplementedError(array_view.storage_type)
-
-    cdef cnp.npy_intp dims[1]
-    dims[0] = array.length
-    cdef cnp.ndarray result = cnp.PyArray_New(
-        np.ndarray, 1, dims, type_num, NULL, <void *> array.buffers[1], -1, 0, <object>NULL
-    )
-    # TODO set base
-
-    return result
diff --git a/python/src/nanoarrow/nanoarrow_c.pxd b/python/src/nanoarrow/nanoarrow_c.pxd
deleted file mode 100644
index 440f449..0000000
--- a/python/src/nanoarrow/nanoarrow_c.pxd
+++ /dev/null
@@ -1,127 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: language_level = 3
-
-from libc.stdint cimport int64_t, int8_t, uint8_t
-
-
-cdef extern from "nanoarrow.h":
-    struct ArrowSchema:
-        const char* format
-        int64_t n_children
-        void (*release)(ArrowSchema*)
-
-    struct ArrowArray:
-        int64_t length
-        int64_t null_count
-        int64_t offset
-        const void** buffers
-        void (*release)(ArrowArray*)
-
-    struct ArrowArrayStream:
-        int (*get_schema)(ArrowArrayStream* stream, ArrowSchema* out)
-
-    ctypedef int ArrowErrorCode
-
-    enum ArrowType:
-        NANOARROW_TYPE_UNINITIALIZED = 0
-        NANOARROW_TYPE_NA = 1
-        NANOARROW_TYPE_BOOL
-        NANOARROW_TYPE_UINT8
-        NANOARROW_TYPE_INT8
-        NANOARROW_TYPE_UINT16
-        NANOARROW_TYPE_INT16
-        NANOARROW_TYPE_UINT32
-        NANOARROW_TYPE_INT32
-        NANOARROW_TYPE_UINT64
-        NANOARROW_TYPE_INT64
-        NANOARROW_TYPE_HALF_FLOAT
-        NANOARROW_TYPE_FLOAT
-        NANOARROW_TYPE_DOUBLE
-        NANOARROW_TYPE_STRING
-        NANOARROW_TYPE_BINARY
-        NANOARROW_TYPE_FIXED_SIZE_BINARY
-        NANOARROW_TYPE_DATE32
-        NANOARROW_TYPE_DATE64
-        NANOARROW_TYPE_TIMESTAMP
-        NANOARROW_TYPE_TIME32
-        NANOARROW_TYPE_TIME64
-        NANOARROW_TYPE_INTERVAL_MONTHS
-        NANOARROW_TYPE_INTERVAL_DAY_TIME
-        NANOARROW_TYPE_DECIMAL128
-        NANOARROW_TYPE_DECIMAL256
-        NANOARROW_TYPE_LIST
-        NANOARROW_TYPE_STRUCT
-        NANOARROW_TYPE_SPARSE_UNION
-        NANOARROW_TYPE_DENSE_UNION
-        NANOARROW_TYPE_DICTIONARY
-        NANOARROW_TYPE_MAP
-        NANOARROW_TYPE_EXTENSION
-        NANOARROW_TYPE_FIXED_SIZE_LIST
-        NANOARROW_TYPE_DURATION
-        NANOARROW_TYPE_LARGE_STRING
-        NANOARROW_TYPE_LARGE_BINARY
-        NANOARROW_TYPE_LARGE_LIST
-        NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
-
-    enum ArrowBufferType:
-        NANOARROW_BUFFER_TYPE_NONE
-        NANOARROW_BUFFER_TYPE_VALIDITY
-        NANOARROW_BUFFER_TYPE_TYPE_ID
-        NANOARROW_BUFFER_TYPE_UNION_OFFSET
-        NANOARROW_BUFFER_TYPE_DATA_OFFSET
-        NANOARROW_BUFFER_TYPE_DATA
-
-    struct ArrowError:
-        pass
-
-    const char* ArrowErrorMessage(ArrowError* error)
-
-    struct ArrowLayout:
-        ArrowBufferType buffer_type[3]
-        int64_t element_size_bits[3]
-        int64_t child_size_elements
-
-    cdef union buffer_data:
-        const void* data
-        const int8_t* as_int8
-        const uint8_t* as_uint8
-
-    struct ArrowBufferView:
-        buffer_data data
-        int64_t size_bytes
-
-    struct ArrowBuffer:
-        uint8_t* data
-        int64_t size_bytes
-
-    struct ArrowBitmap:
-        ArrowBuffer buffer
-        int64_t size_bits
-
-    struct ArrowArrayView:
-        ArrowArray* array
-        ArrowType storage_type
-        ArrowLayout layout
-        ArrowBufferView buffer_views[3]
-        int64_t n_children
-        ArrowArrayView** children
-
-    ArrowErrorCode ArrowArrayViewInitFromSchema(ArrowArrayView* array_view, ArrowSchema* schema, ArrowError* error)
-    ArrowErrorCode ArrowArrayViewSetArray(ArrowArrayView* array_view, ArrowArray* array, ArrowError* error)
-    int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to)
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index fd76534..3162274 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -1,27 +1,293 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+import re
 import numpy as np
 import pyarrow as pa
+import pytest
 
-import nanoarrow
+import nanoarrow as na
 
-import pytest
 
+def test_c_version():
+    re_version = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+(-SNAPSHOT)?$")
+    assert re_version.match(na.c_version()) is not None
+
+
+def test_schema_helper():
+    schema = na.Schema.allocate()
+    assert na.schema(schema) is schema
+
+    schema = na.schema(pa.null())
+    assert isinstance(schema, na.Schema)
+
+    with pytest.raises(TypeError):
+        na.schema(None)
+
+
+def test_array_helper():
+    array = na.Array.allocate(na.Schema.allocate())
+    assert na.array(array) is array
+
+    array = na.array(pa.array([], pa.null()))
+    assert isinstance(array, na.Array)
+
+    with pytest.raises(TypeError):
+        na.schema(None)
+
+
+def test_schema_basic():
+    schema = na.Schema.allocate()
+    assert schema.is_valid() is False
+    assert repr(schema) == "[invalid: schema is released]"
+
+    schema = na.schema(pa.schema([pa.field("some_name", pa.int32())]))
+
+    assert schema.format == "+s"
+    assert schema.flags == 0
+    assert schema.metadata is None
+    assert len(schema.children) == 1
+    assert schema.children[0].format == "i"
+    assert schema.children[0].name == "some_name"
+    assert repr(schema.children[0]) == "int32"
+    assert schema.dictionary is None
+
+    with pytest.raises(IndexError):
+        schema.children[1]
+
+
+def test_schema_dictionary():
+    schema = na.schema(pa.dictionary(pa.int32(), pa.utf8()))
+    assert schema.format == "i"
+    assert schema.dictionary.format == "u"
+
+
+def test_schema_metadata():
+    meta = {"key1": "value1", "key2": "value2"}
+    schema = na.schema(pa.field("", pa.int32(), metadata=meta))
+
+    assert len(schema.metadata) == 2
+
+    meta2 = {k: v for k, v in schema.metadata}
+    assert list(meta2.keys()) == ["key1", "key2"]
+    assert list(meta2.values()) == [b"value1", b"value2"]
+
+
+def test_schema_view():
+    schema = na.Schema.allocate()
+    with pytest.raises(RuntimeError):
+        schema.view()
+
+    schema = na.schema(pa.int32())
+    view = schema.view()
+    assert view.type == "int32"
+    assert view.storage_type == "int32"
+
+    assert view.fixed_size is None
+    assert view.decimal_bitwidth is None
+    assert view.decimal_scale is None
+    assert view.time_unit is None
+    assert view.timezone is None
+    assert view.union_type_ids is None
+    assert view.extension_name is None
+    assert view.extension_metadata is None
+
+
+def test_schema_view_extra_params():
+    schema = na.schema(pa.binary(12))
+    view = schema.view()
+    assert view.fixed_size == 12
+
+    schema = na.schema(pa.list_(pa.int32(), 12))
+    assert view.fixed_size == 12
+
+    schema = na.schema(pa.decimal128(10, 3))
+    view = schema.view()
+    assert view.decimal_bitwidth == 128
+    assert view.decimal_precision == 10
+    assert view.decimal_scale == 3
+
+    schema = na.schema(pa.decimal256(10, 3))
+    view = schema.view()
+    assert view.decimal_bitwidth == 256
+    assert view.decimal_precision == 10
+    assert view.decimal_scale == 3
+
+    schema = na.schema(pa.duration("us"))
+    view = schema.view()
+    assert view.time_unit == "us"
+
+    schema = na.schema(pa.timestamp("us", tz="America/Halifax"))
+    view = schema.view()
+    assert view.type == "timestamp"
+    assert view.storage_type == "int64"
+    assert view.time_unit == "us"
+    assert view.timezone == "America/Halifax"
+
+    meta = {
+        "ARROW:extension:name": "some_name",
+        "ARROW:extension:metadata": "some_metadata",
+    }
+    schema = na.schema(pa.field("", pa.int32(), metadata=meta))
+    view = schema.view()
+    assert view.extension_name == "some_name"
+    assert view.extension_metadata == b"some_metadata"
+
+
+def test_array():
+    array = na.array(pa.array([1, 2, 3], pa.int32()))
+    assert array.is_valid() is True
+    assert array.length == 3
+    assert array.offset == 0
+    assert array.null_count == 0
+    assert len(array.buffers) == 2
+    assert array.buffers[0] == 0
+    assert len(array.children) == 0
+    assert array.dictionary is None
+
+    with pytest.raises(IndexError):
+        array.children[1]
+
+
+def test_array_view():
+    array = na.array(pa.array([1, 2, 3], pa.int32()))
+    view = array.view()
+
+    assert view.schema is array.schema
+
+    data_buffer = memoryview(view.buffers[1])
+    data_buffer_copy = bytes(data_buffer)
+    assert len(data_buffer_copy) == 12
+
+    if sys.byteorder == "little":
+        assert data_buffer_copy == b"\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00"
+    else:
+        assert data_buffer_copy == b"\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03"
+
+    with pytest.raises(IndexError):
+        view.children[1]
+
+
+def test_array_view_recursive():
+    pa_array_child = pa.array([1, 2, 3], pa.int32())
+    pa_array = pa.record_batch([pa_array_child], names=["some_column"])
+
+    array = na.array(pa_array)
+
+    assert array.schema.format == "+s"
+    assert array.length == 3
+    assert len(array.children) == 1
+
+    assert array.children[0].schema.format == "i"
+    assert array.children[0].length == 3
+    assert array.children[0].schema._addr() == array.schema.children[0]._addr()
+
+    view = array.view()
+    assert len(view.buffers) == 1
+    assert len(view.children) == 1
+    assert view.schema._addr() == array.schema._addr()
+
+    assert len(view.children[0].buffers) == 2
+    assert view.children[0].schema._addr() == array.schema.children[0]._addr()
+    assert view.children[0].schema._addr() == array.children[0].schema._addr()
+
+
+def test_array_view_dictionary():
+    pa_array = pa.array(["a", "b", "b"], pa.dictionary(pa.int32(), pa.utf8()))
+    array = na.array(pa_array)
+
+    assert array.schema.format == "i"
+    assert array.dictionary.schema.format == "u"
+
+    view = array.view()
+    assert len(view.buffers) == 2
+    assert len(view.dictionary.buffers) == 3
+
+
+def test_buffers_data():
+    data_types = [
+        (pa.uint8(), np.uint8()),
+        (pa.int8(), np.int8()),
+        (pa.uint16(), np.uint16()),
+        (pa.int16(), np.int16()),
+        (pa.uint32(), np.uint32()),
+        (pa.int32(), np.int32()),
+        (pa.uint64(), np.uint64()),
+        (pa.int64(), np.int64()),
+        (pa.float32(), np.float32()),
+        (pa.float64(), np.float64()),
+    ]
+
+    for pa_type, np_type in data_types:
+        view = na.array(pa.array([0, 1, 2], pa_type)).view()
+        np.testing.assert_array_equal(
+            np.array(view.buffers[1]), np.array([0, 1, 2], np_type)
+        )
+
+
+def test_buffers_string():
+    view = na.array(pa.array(["a", "bc", "def"])).view()
+
+    assert view.buffers[0] is None
+    np.testing.assert_array_equal(
+        np.array(view.buffers[1]), np.array([0, 1, 3, 6], np.int32())
+    )
+    np.testing.assert_array_equal(
+        np.array(view.buffers[2]), np.array(list("abcdef"), dtype="|S1")
+    )
+
+
+def test_buffers_binary():
+    view = na.array(pa.array([b"a", b"bc", b"def"])).view()
+
+    assert view.buffers[0] is None
+    np.testing.assert_array_equal(
+        np.array(view.buffers[1]), np.array([0, 1, 3, 6], np.int32())
+    )
+    np.testing.assert_array_equal(np.array(view.buffers[2]), np.array(list(b"abcdef")))
+
+
+def test_array_stream():
+    array_stream = na.ArrayStream.allocate()
+    assert array_stream.is_valid() is False
+    with pytest.raises(RuntimeError):
+        array_stream.get_schema()
+    with pytest.raises(RuntimeError):
+        array_stream.get_next()
+
+    pa_array_child = pa.array([1, 2, 3], pa.int32())
+    pa_array = pa.record_batch([pa_array_child], names=["some_column"])
+    reader = pa.RecordBatchReader.from_batches(pa_array.schema, [pa_array])
+    array_stream = na.array_stream(reader)
 
-def test_as_numpy_array():
-    
-    arr = pa.array([1, 2, 3])
-    result = nanoarrow.as_numpy_array(arr)
-    expected = arr.to_numpy()
-    np.testing.assert_array_equal(result, expected)
+    assert array_stream.is_valid() is True
+    array = array_stream.get_next()
+    assert array.schema.children[0].name == "some_column"
+    with pytest.raises(StopIteration):
+        array_stream.get_next()
 
-    arr = pa.array([1, 2, 3], pa.uint8())
-    result = nanoarrow.as_numpy_array(arr)
-    expected = arr.to_numpy()
-    np.testing.assert_array_equal(result, expected)
 
-    arr = pa.array([1, 2, None])
-    with pytest.raises(ValueError, match="Cannot convert array with nulls"):
-        nanoarrow.as_numpy_array(arr)
+def test_array_stream_iter():
+    pa_array_child = pa.array([1, 2, 3], pa.int32())
+    pa_array = pa.record_batch([pa_array_child], names=["some_column"])
+    reader = pa.RecordBatchReader.from_batches(pa_array.schema, [pa_array])
+    array_stream = na.array_stream(reader)
 
-    arr = pa.array([[1], [2, 3]])
-    with pytest.raises(TypeError, match="Cannot convert a non-primitive array"):
-        nanoarrow.as_numpy_array(arr)
+    arrays = list(array_stream)
+    assert len(arrays) == 1
+    assert arrays[0].schema.children[0].name == "some_column"
diff --git a/src/nanoarrow/nanoarrow_types.h b/src/nanoarrow/nanoarrow_types.h
index 9fb3cc1..2408a52 100644
--- a/src/nanoarrow/nanoarrow_types.h
+++ b/src/nanoarrow/nanoarrow_types.h
@@ -301,6 +301,8 @@ enum ArrowType {
 /// \ingroup nanoarrow-utils
 ///
 /// Returns NULL for invalid values for type
+static inline const char* ArrowTypeString(enum ArrowType type);
+
 static inline const char* ArrowTypeString(enum ArrowType type) {
   switch (type) {
     case NANOARROW_TYPE_NA:
@@ -419,6 +421,8 @@ enum ArrowValidationLevel {
 /// \ingroup nanoarrow-utils
 ///
 /// Returns NULL for invalid values for time_unit
+static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit);
+
 static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
   switch (time_unit) {
     case NANOARROW_TIME_UNIT_SECOND:
@@ -461,6 +465,8 @@ struct ArrowStringView {
 
 /// \brief Return a view of a const C string
 /// \ingroup nanoarrow-utils
+static inline struct ArrowStringView ArrowCharView(const char* value);
+
 static inline struct ArrowStringView ArrowCharView(const char* value) {
   struct ArrowStringView out;