You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2020/04/07 02:01:11 UTC
[arrow] branch master updated: ARROW-8275: [Python] Update Feather documentation for V2, Python IPC API cleanups / deprecations

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new c9f0a02  ARROW-8275: [Python] Update Feather documentation for V2, Python IPC API cleanups / deprecations
c9f0a02 is described below

commit c9f0a02be10b21659b3a2f6655ef454b678ec163
Author: Wes McKinney <we...@apache.org>
AuthorDate: Mon Apr 6 20:58:35 2020 -0500

    ARROW-8275: [Python] Update Feather documentation for V2, Python IPC API cleanups / deprecations
    
    This splits out the Feather documentation into its own section and explains the V2 changes (support for all Arrow types and compression).
    
    This adds a FutureWarning to most of the `pyarrow.ipc` functions that are in the `pyarrow.*` namespace. Since these functions may cause confusion amongst non-advanced users, it's most clear what they are when accessed via the `pyarrow.ipc` namespace, for example `pa.ipc.read_schema`. This is consistent with the prior deprecation of `pa.open_stream` and `pa.open_file`.
    
    Also disables failure-on-warning when using 'make html' to build Sphinx docs. Fix various Sphinx warnings. I had to pin Sphinx 2.4.4 on account of the newly released Sphinx 3.0.0 not being compatible with our Sphinx project, see ARROW-8340.
    
    Closes #6843 from wesm/ARROW-8275
    
    Authored-by: Wes McKinney <we...@apache.org>
    Signed-off-by: Wes McKinney <we...@apache.org>
---
 ci/conda_env_sphinx.yml                     |   3 +-
 docs/Makefile                               |   6 +-
 docs/requirements.txt                       |   2 +-
 docs/source/developers/documentation.rst    |   6 ++
 docs/source/format/Integration.rst          |   7 ++
 docs/source/python/api/dataset.rst          |   7 +-
 docs/source/python/api/filesystems.rst      |   3 -
 docs/source/python/api/formats.rst          |   1 +
 docs/source/python/api/ipc.rst              |  26 ++++---
 docs/source/python/feather.rst              | 109 ++++++++++++++++++++++++++++
 docs/source/python/index.rst                |   1 +
 docs/source/python/ipc.rst                  |  65 +++--------------
 python/pyarrow/__init__.py                  |  36 +++++++--
 python/pyarrow/_hdfs.pyx                    |  11 +--
 python/pyarrow/ipc.py                       |  25 ++++++-
 python/pyarrow/tests/test_cuda.py           |   2 +-
 python/pyarrow/tests/test_extension_type.py |   2 +-
 python/pyarrow/tests/test_ipc.py            |  56 +++++++-------
 python/pyarrow/tests/test_tensor.py         |  20 ++---
 19 files changed, 252 insertions(+), 136 deletions(-)

diff --git a/ci/conda_env_sphinx.yml b/ci/conda_env_sphinx.yml
index af6b407..318ef75 100644
--- a/ci/conda_env_sphinx.yml
+++ b/ci/conda_env_sphinx.yml
@@ -19,5 +19,6 @@
 breathe
 doxygen
 ipython
-sphinx
+# Pinned per ARROW-8340
+sphinx=2.4.4
 sphinx_rtd_theme
diff --git a/docs/Makefile b/docs/Makefile
index 5798f27..e38bc91 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -20,7 +20,11 @@
 #
 
 # You can set these variables from the command line.
-SPHINXOPTS    = -j8 -W
+
+# Do not fail the build if there are warnings
+# SPHINXOPTS    = -j8 -W
+SPHINXOPTS    = -j8
+
 SPHINXBUILD   = sphinx-build
 PAPER         =
 BUILDDIR      = _build
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 77ca657..8041140 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,5 +1,5 @@
 breathe
 ipython
 numpydoc
-sphinx
+sphinx==2.4.4
 sphinx_rtd_theme
diff --git a/docs/source/developers/documentation.rst b/docs/source/developers/documentation.rst
index d2503b1..e2e9e88 100644
--- a/docs/source/developers/documentation.rst
+++ b/docs/source/developers/documentation.rst
@@ -75,6 +75,12 @@ These two steps are mandatory and must be executed in order.
       make html
       popd
 
+.. note::
+
+   Note that building the documentation may fail if your build of pyarrow is
+   not sufficiently comprehensive. Portions of the Python API documentation
+   will also not build without CUDA support having been built.
+
 After these steps are completed, the documentation is rendered in HTML
 format in ``docs/_build/html``.  In particular, you can point your browser
 at ``docs/_build/html/index.html`` to read the docs and review any changes
diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst
index 53fd557..04161b9 100644
--- a/docs/source/format/Integration.rst
+++ b/docs/source/format/Integration.rst
@@ -38,6 +38,7 @@ distribution and environment for running the tests by using
 `miniconda <https://conda.io/miniconda.html>`_. On Linux this is:
 
 .. code-block:: shell
+
    MINICONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
    wget -O miniconda.sh $MINICONDA_URL
    bash miniconda.sh -b -p miniconda
@@ -50,16 +51,19 @@ distribution and environment for running the tests by using
 If you are on macOS, instead use the URL:
 
 .. code-block:: shell
+
    MINICONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
 
 Once you have Python, you can install archery
 
 .. code-block:: shell
+
    pip install -e dev/archery
 
 The integration tests are run using the ``archery integration`` command.
 
 .. code-block:: shell
+
    archery integration --help
 
 In order to run integration tests, you'll first need to build each component
@@ -74,12 +78,14 @@ Depending on which components you have built, you can enable and add them to
 the archery test run. For example, if you only have the C++ project built, run:
 
 .. code-block:: shell
+
    archery integration --with-cpp=1
 
 
 For Java, it may look like:
 
 .. code-block:: shell
+
    VERSION=0.11.0-SNAPSHOT
    export ARROW_JAVA_INTEGRATION_JAR=$JAVA_DIR/tools/target/arrow-tools-$VERSION-jar-with-dependencies.jar
    archery integration --with-cpp=1 --with-java=1
@@ -87,6 +93,7 @@ For Java, it may look like:
 To run all tests, including Flight integration tests, do:
 
 .. code-block:: shell
+
    archery integration --with-all --run-flight
 
 Note that we run these tests in continuous integration, and the CI job uses
diff --git a/docs/source/python/api/dataset.rst b/docs/source/python/api/dataset.rst
index e7dea33..4cc59ac 100644
--- a/docs/source/python/api/dataset.rst
+++ b/docs/source/python/api/dataset.rst
@@ -33,7 +33,6 @@ Factory functions
 .. autosummary::
    :toctree: ../generated/
 
-   source
    dataset
    partitioning
    field
@@ -51,11 +50,9 @@ Classes
    PartitioningFactory
    DirectoryPartitioning
    HivePartitioning
-   Source
-   FileSystemSource
+   FileSystemDataset
    FileSystemFactoryOptions
-   FileSystemSourceFactory
+   FileSystemDatasetFactory
    Dataset
-   ScannerBuilder
    Scanner
    Expression
diff --git a/docs/source/python/api/filesystems.rst b/docs/source/python/api/filesystems.rst
index f4b7923..8023a8a 100644
--- a/docs/source/python/api/filesystems.rst
+++ b/docs/source/python/api/filesystems.rst
@@ -38,9 +38,6 @@ Concrete Subclasses
 .. autosummary::
    :toctree: ../generated/
 
-   LocalFileSystemOptions
    LocalFileSystem
-   S3Options
    S3FileSystem
-   HdfsOptions
    HadoopFileSystem
diff --git a/docs/source/python/api/formats.rst b/docs/source/python/api/formats.rst
index f5736f6..bb95905 100644
--- a/docs/source/python/api/formats.rst
+++ b/docs/source/python/api/formats.rst
@@ -44,6 +44,7 @@ Feather Files
    :toctree: ../generated/
 
    read_feather
+   read_table
    write_feather
 
 .. _api.json:
diff --git a/docs/source/python/api/ipc.rst b/docs/source/python/api/ipc.rst
index bd14d30..f4c0a17 100644
--- a/docs/source/python/api/ipc.rst
+++ b/docs/source/python/api/ipc.rst
@@ -28,20 +28,22 @@ Inter-Process Communication
 .. autosummary::
    :toctree: ../generated/
 
+   ipc.new_file
    ipc.open_file
+   ipc.new_stream
    ipc.open_stream
-   Message
-   MessageReader
-   RecordBatchFileReader
-   RecordBatchFileWriter
-   RecordBatchStreamReader
-   RecordBatchStreamWriter
-   read_message
-   read_record_batch
-   get_record_batch_size
-   read_tensor
-   write_tensor
-   get_tensor_size
+   ipc.read_message
+   ipc.read_record_batch
+   ipc.get_record_batch_size
+   ipc.read_tensor
+   ipc.write_tensor
+   ipc.get_tensor_size
+   ipc.Message
+   ipc.MessageReader
+   ipc.RecordBatchFileReader
+   ipc.RecordBatchFileWriter
+   ipc.RecordBatchStreamReader
+   ipc.RecordBatchStreamWriter
 
 Serialization
 -------------
diff --git a/docs/source/python/feather.rst b/docs/source/python/feather.rst
new file mode 100644
index 0000000..026ea98
--- /dev/null
+++ b/docs/source/python/feather.rst
@@ -0,0 +1,109 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+.. _feather:
+
+Feather File Format
+===================
+
+Feather is a portable file format for storing Arrow tables or data frames (from
+languages like Python or R) that utilizes the :ref:`Arrow IPC format <ipc>`
+internally. Feather was created early in the Arrow project as a proof of
+concept for fast, language-agnostic data frame storage for Python (pandas) and
+R. There are two file format versions for Feather:
+
+* Version 2 (V2), the default version, which is exactly represented as the
+  Arrow IPC file format on disk. V2 files support storing all Arrow data types
+  as well as compression with LZ4 or ZSTD. V2 was first made available in
+  Apache Arrow 0.17.0.
+* Version 1 (V1), a legacy version available starting in 2016, replaced by
+  V2. V1 files are distinct from Arrow IPC files and lack many features, such
+  as the ability to store all Arrow data types. V1 files also lack compression
+  support. We intend to maintain read support for V1 for the foreseeable
+  future.
+
+The ``pyarrow.feather`` module contains the read and write functions for the
+format. :func:`~pyarrow.feather.write_feather` accepts either a
+:class:`~pyarrow.Table` or ``pandas.DataFrame`` object:
+
+.. code-block:: python
+
+   import pyarrow.feather as feather
+   feather.write_feather(df, '/path/to/file')
+
+:func:`~pyarrow.feather.read_feather` reads a Feather file as a
+``pandas.DataFrame``. :func:`~pyarrow.feather.read_table` reads a Feather file
+as a :class:`~pyarrow.Table`. Internally, :func:`~pyarrow.feather.read_feather`
+simply calls :func:`~pyarrow.feather.read_table` and the result is converted to
+pandas:
+
+.. code-block:: python
+
+   # Result is pandas.DataFrame
+   read_df = feather.read_feather('/path/to/file')
+
+   # Result is pyarrow.Table
+   read_arrow = feather.read_table('/path/to/file')
+
+These functions can read and write with file-paths or file-like objects. For
+example:
+
+.. code-block:: python
+
+   with open('/path/to/file', 'wb') as f:
+       feather.write_feather(df, f)
+
+   with open('/path/to/file', 'rb') as f:
+       read_df = feather.read_feather(f)
+
+A file input to ``read_feather`` must support seeking.
+
+Using Compression
+-----------------
+
+As of Apache Arrow version 0.17.0, Feather V2 files (the default version)
+support two fast compression libraries, LZ4 (using the frame format) and
+ZSTD. LZ4 is used by default if it is available (which it should be if you
+obtained pyarrow through a normal package manager):
+
+.. code-block:: python
+
+   # Uses LZ4 by default
+   feather.write_feather(df, file_path)
+
+   # Use LZ4 explicitly
+   feather.write_feather(df, file_path, compression='lz4')
+
+   # Use ZSTD
+   feather.write_feather(df, file_path, compression='zstd')
+
+   # Do not compress
+   feather.write_feather(df, file_path, compression='uncompressed')
+
+Note that the default LZ4 compression generally yields much smaller files
+without sacrificing much read or write performance. In some instances,
+LZ4-compressed files may be faster to read and write than uncompressed due to
+reduced disk IO requirements.
+
+Writing Version 1 (V1) Files
+----------------------------
+
+For compatibility with libraries without support for Version 2 files, you can
+write the version 1 format by passing ``version=1`` to ``write_feather``. We
+intend to maintain read support for V1 for the foreseeable future.
diff --git a/docs/source/python/index.rst b/docs/source/python/index.rst
index 22a29be..2c7ec3d 100644
--- a/docs/source/python/index.rst
+++ b/docs/source/python/index.rst
@@ -44,6 +44,7 @@ files into Arrow structures.
    pandas
    timestamps
    csv
+   feather
    json
    parquet
    cuda
diff --git a/docs/source/python/ipc.rst b/docs/source/python/ipc.rst
index 2a45b96..b7a032d 100644
--- a/docs/source/python/ipc.rst
+++ b/docs/source/python/ipc.rst
@@ -53,18 +53,19 @@ First, let's create a small record batch:
        pa.array([True, None, False, True])
    ]
 
-   batch = pa.RecordBatch.from_arrays(data, ['f0', 'f1', 'f2'])
+   batch = pa.record_batch(data, names=['f0', 'f1', 'f2'])
    batch.num_rows
    batch.num_columns
 
 Now, we can begin writing a stream containing some number of these batches. For
-this we use :class:`~pyarrow.RecordBatchStreamWriter`, which can write to a writeable
-``NativeFile`` object or a writeable Python object:
+this we use :class:`~pyarrow.RecordBatchStreamWriter`, which can write to a
+writeable ``NativeFile`` object or a writeable Python object. For convenience,
+this one can be created with :func:`~pyarrow.ipc.new_stream`:
 
 .. ipython:: python
 
    sink = pa.BufferOutputStream()
-   writer = pa.RecordBatchStreamWriter(sink, batch.schema)
+   writer = pa.ipc.new_stream(sink, batch.schema)
 
 Here we used an in-memory Arrow buffer stream, but this could have been a
 socket or some other IO sink.
@@ -108,12 +109,13 @@ Writing and Reading Random Access Files
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The :class:`~pyarrow.RecordBatchFileWriter` has the same API as
-:class:`~pyarrow.RecordBatchStreamWriter`:
+:class:`~pyarrow.RecordBatchStreamWriter`. You can create one with
+:func:`~pyarrow.ipc.new_file`:
 
 .. ipython:: python
 
    sink = pa.BufferOutputStream()
-   writer = pa.RecordBatchFileWriter(sink, batch.schema)
+   writer = pa.ipc.new_file(sink, batch.schema)
 
    for i in range(10):
       writer.write_batch(batch)
@@ -125,7 +127,7 @@ The :class:`~pyarrow.RecordBatchFileWriter` has the same API as
 The difference between :class:`~pyarrow.RecordBatchFileReader` and
 :class:`~pyarrow.RecordBatchStreamReader` is that the input source must have a
 ``seek`` method for random access. The stream reader only requires read
-operations. We can also use the ``pyarrow.ipc.open_file`` method to open a file:
+operations. We can also use the :func:`~pyarrow.ipc.open_file` method to open a file:
 
 .. ipython:: python
 
@@ -338,52 +340,3 @@ objects not containing any Python objects:
    df_components = serialized_df.to_components()
    original_df = context.deserialize_components(df_components)
    original_df
-
-Feather Format
---------------
-
-Feather is a lightweight file-format for data frames that uses the Arrow memory
-layout for data representation on disk. It was created early in the Arrow
-project as a proof of concept for fast, language-agnostic data frame storage
-for Python (pandas) and R.
-
-Compared with Arrow streams and files, Feather has some limitations:
-
-* Only non-nested data types and categorical (dictionary-encoded) types are
-  supported
-* Supports only a single batch of rows, where general Arrow streams support an
-  arbitrary number
-* Supports limited scalar value types, adequate only for representing typical
-  data found in R and pandas
-
-We would like to continue to innovate in the Feather format, but we must wait
-for an R implementation for Arrow to mature.
-
-The ``pyarrow.feather`` module contains the read and write functions for the
-format. The input and output are ``pandas.DataFrame`` objects:
-
-.. code-block:: python
-
-   import pyarrow.feather as feather
-
-   feather.write_feather(df, '/path/to/file')
-   read_df = feather.read_feather('/path/to/file')
-
-``read_feather`` supports multithreaded reads, and may yield faster performance
-on some files:
-
-.. code-block:: python
-
-   read_df = feather.read_feather('/path/to/file', nthreads=4)
-
-These functions can read and write with file-like objects. For example:
-
-.. code-block:: python
-
-   with open('/path/to/file', 'wb') as f:
-       feather.write_feather(df, f)
-
-   with open('/path/to/file', 'rb') as f:
-       read_df = feather.read_feather(f)
-
-A file input to ``read_feather`` must support seeking.
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index c52ee51..a2d4a2c 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -162,13 +162,7 @@ from pyarrow.filesystem import FileSystem, LocalFileSystem
 from pyarrow.hdfs import HadoopFileSystem
 import pyarrow.hdfs as hdfs
 
-from pyarrow.ipc import (Message, MessageReader,
-                         RecordBatchFileReader, RecordBatchFileWriter,
-                         RecordBatchStreamReader, RecordBatchStreamWriter,
-                         read_message, read_record_batch, read_schema,
-                         read_tensor, write_tensor,
-                         get_record_batch_size, get_tensor_size,
-                         serialize_pandas, deserialize_pandas)
+from pyarrow.ipc import serialize_pandas, deserialize_pandas
 import pyarrow.ipc as ipc
 
 
@@ -200,6 +194,29 @@ def _plasma_store_entry_point():
 
 from pyarrow.util import _deprecate_api  # noqa
 
+read_message = _deprecate_api("read_message", "ipc.read_message",
+                              ipc.read_message, "0.17.0")
+
+read_record_batch = _deprecate_api("read_record_batch",
+                                   "ipc.read_record_batch",
+                                   ipc.read_record_batch, "0.17.0")
+
+read_schema = _deprecate_api("read_schema", "ipc.read_schema",
+                             ipc.read_schema, "0.17.0")
+
+read_tensor = _deprecate_api("read_tensor", "ipc.read_tensor",
+                             ipc.read_tensor, "0.17.0")
+
+write_tensor = _deprecate_api("write_tensor", "ipc.write_tensor",
+                             ipc.write_tensor, "0.17.0")
+
+get_record_batch_size = _deprecate_api("get_record_batch_size",
+                                       "ipc.get_record_batch_size",
+                                       ipc.get_record_batch_size, "0.17.0")
+
+get_tensor_size = _deprecate_api("get_tensor_size",
+                                 "ipc.get_tensor_size",
+                                 ipc.get_tensor_size, "0.17.0")
 
 open_stream = _deprecate_api("open_stream", "ipc.open_stream",
                              ipc.open_stream, "0.17.0")
@@ -207,6 +224,11 @@ open_stream = _deprecate_api("open_stream", "ipc.open_stream",
 open_file = _deprecate_api("open_file", "ipc.open_file", ipc.open_file,
                            "0.17.0")
 
+# TODO: Deprecate these somehow in the pyarrow namespace
+from pyarrow.ipc import (Message, MessageReader,
+                         RecordBatchFileReader, RecordBatchFileWriter,
+                         RecordBatchStreamReader, RecordBatchStreamWriter)
+
 # ----------------------------------------------------------------------
 # Returning absolute path to the pyarrow include directory (if bundled, e.g. in
 # wheels)
diff --git a/python/pyarrow/_hdfs.pyx b/python/pyarrow/_hdfs.pyx
index 4225095..8505515 100644
--- a/python/pyarrow/_hdfs.pyx
+++ b/python/pyarrow/_hdfs.pyx
@@ -81,13 +81,10 @@ cdef class HadoopFileSystem(FileSystem):
         """
         Instantiate HadoopFileSystem object from an URI string.
 
-        The following two calls are equivalent:
-
-        HadoopFileSystem.from_uri(
-            'hdfs://localhost:8020/?user=test&replication=1'
-        )
-
-        HadoopFileSystem('localhost', port=8020, user='test', replication=1)
+        The following two calls are equivalent
+        * HadoopFileSystem.from_uri('hdfs://localhost:8020/?user=test'
+                                    '&replication=1')
+        * HadoopFileSystem('localhost', port=8020, user='test', replication=1)
 
         Parameters
         ----------
diff --git a/python/pyarrow/ipc.py b/python/pyarrow/ipc.py
index e74428a..f76969b 100644
--- a/python/pyarrow/ipc.py
+++ b/python/pyarrow/ipc.py
@@ -119,6 +119,17 @@ def _get_legacy_format_default(use_legacy_format):
         return use_legacy_format
 
 
+def new_stream(sink, schema, use_legacy_format=None):
+    return RecordBatchStreamWriter(sink, schema,
+                                   use_legacy_format=use_legacy_format)
+
+
+new_stream.__doc__ = """\
+Create an Arrow columnar IPC stream writer instance
+
+{}""".format(_ipc_writer_class_doc)
+
+
 def open_stream(source):
     """
     Create reader for Arrow streaming format.
@@ -127,9 +138,6 @@ def open_stream(source):
     ----------
     source : bytes/buffer-like, pyarrow.NativeFile, or file-like Python object
         Either an in-memory buffer, or a readable file object.
-    footer_offset : int, default None
-        If the file is embedded in some larger file, this is the byte offset to
-        the very end of the file data.
 
     Returns
     -------
@@ -138,6 +146,17 @@ def open_stream(source):
     return RecordBatchStreamReader(source)
 
 
+def new_file(sink, schema, use_legacy_format=None):
+    return RecordBatchFileWriter(sink, schema,
+                                 use_legacy_format=use_legacy_format)
+
+
+new_file.__doc__ = """\
+Create an Arrow columnar IPC file writer instance
+
+{}""".format(_ipc_writer_class_doc)
+
+
 def open_file(source, footer_offset=None):
     """
     Create reader for Arrow file format.
diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py
index 0e4d3c4..48e2f37 100644
--- a/python/pyarrow/tests/test_cuda.py
+++ b/python/pyarrow/tests/test_cuda.py
@@ -674,7 +674,7 @@ def test_batch_serialize():
     cuda.read_record_batch(cbuf, batch.schema)
     buf = cbuf.copy_to_host()
     assert hbuf.equals(buf)
-    batch2 = pa.read_record_batch(buf, batch.schema)
+    batch2 = pa.ipc.read_record_batch(buf, batch.schema)
     assert hbuf.equals(batch2.serialize())
     assert batch.num_columns == batch2.num_columns
     assert batch.num_rows == batch2.num_rows
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index 0d07ac1..cf5021b 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -421,7 +421,7 @@ def test_parquet(tmpdir, registered_period_type):
 
     import base64
     decoded_schema = base64.b64decode(meta.metadata[b"ARROW:schema"])
-    schema = pa.read_schema(pa.BufferReader(decoded_schema))
+    schema = pa.ipc.read_schema(pa.BufferReader(decoded_schema))
     assert schema.field("ext").metadata == {
         b'ARROW:extension:metadata': b'freq=D',
         b'ARROW:extension:name': b'pandas.period'}
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 53d1fe4..cda38d6 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -55,7 +55,7 @@ class IpcFixture:
         df = pd.DataFrame({
             'one': np.random.randn(nrows),
             'two': ['foo', np.nan, 'bar', 'bazbaz', 'qux']})
-        batch = pa.RecordBatch.from_pandas(df)
+        batch = pa.record_batch(df)
 
         writer = self._get_writer(self.sink, batch.schema)
 
@@ -64,7 +64,7 @@ class IpcFixture:
         for i in range(num_batches):
             unique_df = df.copy()
             unique_df['one'] = np.random.randn(len(df))
-            batch = pa.RecordBatch.from_pandas(unique_df)
+            batch = pa.record_batch(unique_df)
             frames.append(unique_df)
             batches.append(batch)
 
@@ -82,7 +82,7 @@ class IpcFixture:
 class FileFormatFixture(IpcFixture):
 
     def _get_writer(self, sink, schema):
-        return pa.RecordBatchFileWriter(sink, schema)
+        return pa.ipc.new_file(sink, schema)
 
     def _check_roundtrip(self, as_table=False):
         _, batches = self.write_batches(as_table=as_table)
@@ -105,7 +105,7 @@ class StreamFormatFixture(IpcFixture):
     use_legacy_ipc_format = False
 
     def _get_writer(self, sink, schema):
-        return pa.RecordBatchStreamWriter(
+        return pa.ipc.new_stream(
             sink,
             schema,
             use_legacy_format=self.use_legacy_ipc_format
@@ -315,16 +315,16 @@ def test_stream_simple_roundtrip(stream_fixture, use_legacy_ipc_format):
 def test_envvar_set_legacy_ipc_format():
     schema = pa.schema([pa.field('foo', pa.int32())])
 
-    writer = pa.RecordBatchStreamWriter(pa.BufferOutputStream(), schema)
+    writer = pa.ipc.new_stream(pa.BufferOutputStream(), schema)
     assert not writer._use_legacy_format
-    writer = pa.RecordBatchFileWriter(pa.BufferOutputStream(), schema)
+    writer = pa.ipc.new_file(pa.BufferOutputStream(), schema)
     assert not writer._use_legacy_format
 
     import os
     os.environ['ARROW_PRE_0_15_IPC_FORMAT'] = '1'
-    writer = pa.RecordBatchStreamWriter(pa.BufferOutputStream(), schema)
+    writer = pa.ipc.new_stream(pa.BufferOutputStream(), schema)
     assert writer._use_legacy_format
-    writer = pa.RecordBatchFileWriter(pa.BufferOutputStream(), schema)
+    writer = pa.ipc.new_file(pa.BufferOutputStream(), schema)
     assert writer._use_legacy_format
 
     del os.environ['ARROW_PRE_0_15_IPC_FORMAT']
@@ -388,10 +388,10 @@ def test_message_serialize_read_message(example_messages):
     buf = msg.serialize()
     reader = pa.BufferReader(buf.to_pybytes() * 2)
 
-    restored = pa.read_message(buf)
-    restored2 = pa.read_message(reader)
-    restored3 = pa.read_message(buf.to_pybytes())
-    restored4 = pa.read_message(reader)
+    restored = pa.ipc.read_message(buf)
+    restored2 = pa.ipc.read_message(reader)
+    restored3 = pa.ipc.read_message(buf.to_pybytes())
+    restored4 = pa.ipc.read_message(reader)
 
     assert msg.equals(restored)
     assert msg.equals(restored2)
@@ -399,10 +399,10 @@ def test_message_serialize_read_message(example_messages):
     assert msg.equals(restored4)
 
     with pytest.raises(pa.ArrowInvalid, match="Corrupted message"):
-        pa.read_message(pa.BufferReader(b'ab'))
+        pa.ipc.read_message(pa.BufferReader(b'ab'))
 
     with pytest.raises(EOFError):
-        pa.read_message(reader)
+        pa.ipc.read_message(reader)
 
 
 def test_message_read_from_compressed(example_messages):
@@ -415,8 +415,8 @@ def test_message_read_from_compressed(example_messages):
 
         compressed_buf = raw_out.getvalue()
 
-        result = pa.read_message(pa.input_stream(compressed_buf,
-                                                 compression='gzip'))
+        result = pa.ipc.read_message(pa.input_stream(compressed_buf,
+                                                     compression='gzip'))
         assert result.equals(message)
 
 
@@ -424,7 +424,7 @@ def test_message_read_record_batch(example_messages):
     batches, messages = example_messages
 
     for batch, message in zip(batches, messages[1:]):
-        read_batch = pa.read_record_batch(message, batch.schema)
+        read_batch = pa.ipc.read_record_batch(message, batch.schema)
         assert read_batch.equals(batch)
 
 
@@ -433,12 +433,12 @@ def test_read_record_batch_on_stream_error_message():
     batch = pa.record_batch([pa.array([b"foo"], type=pa.utf8())],
                             names=['strs'])
     stream = pa.BufferOutputStream()
-    with pa.RecordBatchStreamWriter(stream, batch.schema) as writer:
+    with pa.ipc.new_stream(stream, batch.schema) as writer:
         writer.write_batch(batch)
     buf = stream.getvalue()
     with pytest.raises(IOError,
                        match="type record batch but got schema"):
-        pa.read_record_batch(buf, batch.schema)
+        pa.ipc.read_record_batch(buf, batch.schema)
 
 
 # ----------------------------------------------------------------------
@@ -576,7 +576,7 @@ def test_ipc_stream_no_batches():
                                  names=['a', 'b'])
 
     sink = pa.BufferOutputStream()
-    with pa.RecordBatchStreamWriter(sink, table.schema):
+    with pa.ipc.new_stream(sink, table.schema):
         pass
 
     source = sink.getvalue()
@@ -593,7 +593,7 @@ def test_get_record_batch_size():
     df = pd.DataFrame({'foo': np.random.randn(N)})
 
     batch = pa.RecordBatch.from_pandas(df)
-    assert pa.get_record_batch_size(batch) > (N * itemsize)
+    assert pa.ipc.get_record_batch_size(batch) > (N * itemsize)
 
 
 def _check_serialize_pandas_round_trip(df, use_threads=False):
@@ -692,8 +692,8 @@ def test_schema_batch_serialize_methods():
     s_schema = batch.schema.serialize()
     s_batch = batch.serialize()
 
-    recons_schema = pa.read_schema(s_schema)
-    recons_batch = pa.read_record_batch(s_batch, recons_schema)
+    recons_schema = pa.ipc.read_schema(s_schema)
+    recons_batch = pa.ipc.read_record_batch(s_batch, recons_schema)
     assert recons_batch.equals(batch)
 
 
@@ -707,7 +707,7 @@ def test_schema_serialization_with_metadata():
     schema = pa.schema([f0, f1], metadata=schema_metadata)
 
     s_schema = schema.serialize()
-    recons_schema = pa.read_schema(s_schema)
+    recons_schema = pa.ipc.read_schema(s_schema)
 
     assert recons_schema.equals(schema)
     assert recons_schema.metadata == schema_metadata
@@ -718,7 +718,7 @@ def test_schema_serialization_with_metadata():
 def test_deprecated_pyarrow_ns_apis():
     table = pa.table([pa.array([1, 2, 3, 4])], names=['a'])
     sink = pa.BufferOutputStream()
-    with pa.RecordBatchStreamWriter(sink, table.schema) as writer:
+    with pa.ipc.new_stream(sink, table.schema) as writer:
         writer.write(table)
 
     with pytest.warns(FutureWarning,
@@ -726,14 +726,14 @@ def test_deprecated_pyarrow_ns_apis():
         pa.open_stream(sink.getvalue())
 
     sink = pa.BufferOutputStream()
-    with pa.RecordBatchFileWriter(sink, table.schema) as writer:
+    with pa.ipc.new_file(sink, table.schema) as writer:
         writer.write(table)
     with pytest.warns(FutureWarning, match="please use pyarrow.ipc.open_file"):
         pa.open_file(sink.getvalue())
 
 
 def write_file(batch, sink):
-    with pa.RecordBatchFileWriter(sink, batch.schema) as writer:
+    with pa.ipc.new_file(sink, batch.schema) as writer:
         writer.write_batch(batch)
 
 
@@ -748,7 +748,7 @@ def test_write_empty_ipc_file():
     schema = pa.schema([('field', pa.int64())])
 
     sink = pa.BufferOutputStream()
-    with pa.RecordBatchFileWriter(sink, schema):
+    with pa.ipc.new_file(sink, schema):
         pass
 
     buf = sink.getvalue()
diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py
index 1c8d7e5..0817f90 100644
--- a/python/pyarrow/tests/test_tensor.py
+++ b/python/pyarrow/tests/test_tensor.py
@@ -94,10 +94,10 @@ def test_tensor_ipc_roundtrip(tmpdir):
     path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-roundtrip')
     mmap = pa.create_memory_map(path, 1024)
 
-    pa.write_tensor(tensor, mmap)
+    pa.ipc.write_tensor(tensor, mmap)
 
     mmap.seek(0)
-    result = pa.read_tensor(mmap)
+    result = pa.ipc.read_tensor(mmap)
 
     assert result.equals(tensor)
 
@@ -110,10 +110,10 @@ def test_tensor_ipc_read_from_compressed(tempdir):
     path = tempdir / 'tensor-compressed-file'
 
     out_stream = pa.output_stream(path, compression='gzip')
-    pa.write_tensor(tensor, out_stream)
+    pa.ipc.write_tensor(tensor, out_stream)
     out_stream.close()
 
-    result = pa.read_tensor(pa.input_stream(path, compression='gzip'))
+    result = pa.ipc.read_tensor(pa.input_stream(path, compression='gzip'))
     assert result.equals(tensor)
 
 
@@ -129,10 +129,10 @@ def test_tensor_ipc_strided(tmpdir):
 
     for tensor in [tensor1, tensor2]:
         mmap.seek(0)
-        pa.write_tensor(tensor, mmap)
+        pa.ipc.write_tensor(tensor, mmap)
 
         mmap.seek(0)
-        result = pa.read_tensor(mmap)
+        result = pa.ipc.read_tensor(mmap)
 
         assert result.equals(tensor)
 
@@ -167,20 +167,20 @@ def test_tensor_hashing():
 def test_tensor_size():
     data = np.random.randn(10, 4)
     tensor = pa.Tensor.from_numpy(data)
-    assert pa.get_tensor_size(tensor) > (data.size * 8)
+    assert pa.ipc.get_tensor_size(tensor) > (data.size * 8)
 
 
 def test_read_tensor(tmpdir):
     # Create and write tensor tensor
     data = np.random.randn(10, 4)
     tensor = pa.Tensor.from_numpy(data)
-    data_size = pa.get_tensor_size(tensor)
+    data_size = pa.ipc.get_tensor_size(tensor)
     path = os.path.join(str(tmpdir), 'pyarrow-tensor-ipc-read-tensor')
     write_mmap = pa.create_memory_map(path, data_size)
-    pa.write_tensor(tensor, write_mmap)
+    pa.ipc.write_tensor(tensor, write_mmap)
     # Try to read tensor
     read_mmap = pa.memory_map(path, mode='r')
-    array = pa.read_tensor(read_mmap).to_numpy()
+    array = pa.ipc.read_tensor(read_mmap).to_numpy()
     np.testing.assert_equal(data, array)