You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2020/05/19 07:34:12 UTC

[arrow] branch master updated: ARROW-8846: [Dev][Python] Autoformat Python files with archery

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 1164079  ARROW-8846: [Dev][Python] Autoformat Python files with archery
1164079 is described below

commit 1164079d5442c3910c18549bfcd2e68d4554b909
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Tue May 19 09:33:39 2020 +0200

    ARROW-8846: [Dev][Python] Autoformat Python files with archery
    
    `archery lint --flake8` becomes `archery lint --python` and now recognizes the `--fix` option.
    
    Reformatting involves running `autopep8`.
    
    Closes #7215 from pitrou/ARROW-8846-archery-autopep8
    
    Authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 dev/archery/archery/cli.py                   |   3 +-
 dev/archery/archery/lang/python.py           |  11 ++-
 dev/archery/archery/utils/command.py         |   2 +-
 dev/archery/archery/utils/lint.py            |  66 ++++++++++++++---
 dev/archery/archery/utils/rat.py             |   2 +-
 dev/archery/requirements-lint.txt            |   3 +-
 docs/source/developers/archery.rst           |  84 ++++++++++++++++++++++
 docs/source/developers/benchmarks.rst        |  26 ++-----
 docs/source/developers/cpp/development.rst   |   6 +-
 docs/source/developers/docker.rst            |  30 ++------
 docs/source/developers/documentation.rst     |   8 ++-
 docs/source/developers/integration.rst       |  74 -------------------
 docs/source/developers/python.rst            |  20 ++----
 docs/source/format/Integration.rst           |  31 +-------
 docs/source/index.rst                        |   2 +-
 python/pyarrow/__init__.py                   |   6 +-
 python/pyarrow/_cuda.pyx                     |   2 +
 python/pyarrow/compat.py                     |   8 ++-
 python/pyarrow/feather.py                    |   5 +-
 python/pyarrow/includes/libarrow.pxd         |   2 +
 python/pyarrow/includes/libarrow_dataset.pxd |   4 ++
 python/pyarrow/includes/libarrow_flight.pxd  |   7 ++
 python/pyarrow/includes/libarrow_fs.pxd      |   2 +
 python/pyarrow/io.pxi                        |   1 +
 python/pyarrow/ipc.pxi                       |   1 +
 python/pyarrow/ipc.py                        |   1 +
 python/pyarrow/lib.pxd                       |   1 +
 python/pyarrow/orc.py                        |   1 +
 python/pyarrow/pandas_compat.py              |   8 +--
 python/pyarrow/parquet.py                    |   9 ++-
 python/pyarrow/plasma.py                     |   2 +-
 python/pyarrow/scalar.pxi                    |   1 +
 python/pyarrow/serialization.py              |  12 ++--
 python/pyarrow/tensor.pxi                    |   8 ++-
 python/pyarrow/tests/conftest.py             |   2 +-
 python/pyarrow/tests/strategies.py           |   2 +-
 python/pyarrow/tests/test_array.py           |   4 +-
 python/pyarrow/tests/test_csv.py             |  66 ++++++++---------
 python/pyarrow/tests/test_cython.py          |   2 +-
 python/pyarrow/tests/test_json.py            |   8 +--
 python/pyarrow/tests/test_orc.py             |  12 ++--
 python/pyarrow/tests/test_pandas.py          | 102 +++++++++++++--------------
 python/pyarrow/tests/test_parquet.py         |   4 +-
 python/pyarrow/tests/test_plasma.py          |  14 ++--
 python/pyarrow/tests/test_serialization.py   |   6 +-
 python/pyarrow/types.pxi                     |   3 +
 python/setup.cfg                             |   4 ++
 python/setup.py                              |   8 +--
 48 files changed, 368 insertions(+), 318 deletions(-)

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 7227af7..15c05a4 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -258,7 +258,8 @@ lint_checks = [
     LintCheck('clang-tidy', "Lint C++ files with clang-tidy."),
     LintCheck('cpplint', "Lint C++ files with cpplint."),
     LintCheck('iwyu', "Lint changed C++ files with Include-What-You-Use."),
-    LintCheck('flake8', "Lint Python files with flake8."),
+    LintCheck('python',
+              "Format and lint Python files with autopep8 and flake8."),
     LintCheck('numpydoc', "Lint Python files with numpydoc."),
     LintCheck('cmake-format', "Format CMake files with cmake-format.py."),
     LintCheck('rat',
diff --git a/dev/archery/archery/lang/python.py b/dev/archery/archery/lang/python.py
index 223354e..9b79971 100644
--- a/dev/archery/archery/lang/python.py
+++ b/dev/archery/archery/lang/python.py
@@ -26,7 +26,7 @@ except ImportError:
 else:
     have_numpydoc = True
 
-from ..utils.command import Command, default_bin
+from ..utils.command import Command, capture_stdout, default_bin
 
 
 class Flake8(Command):
@@ -34,6 +34,15 @@ class Flake8(Command):
         self.bin = default_bin(flake8_bin, "flake8")
 
 
+class Autopep8(Command):
+    def __init__(self, autopep8_bin=None):
+        self.bin = default_bin(autopep8_bin, "autopep8")
+
+    @capture_stdout()
+    def run_captured(self, *args, **kwargs):
+        return self.run(*args, **kwargs)
+
+
 def _tokenize_signature(s):
     lines = s.encode('ascii').splitlines()
     generator = iter(lines).__next__
diff --git a/dev/archery/archery/utils/command.py b/dev/archery/archery/utils/command.py
index 3ef6abe..2e27f08 100644
--- a/dev/archery/archery/utils/command.py
+++ b/dev/archery/archery/utils/command.py
@@ -25,7 +25,7 @@ from .logger import logger, ctx
 
 def default_bin(name, default):
     assert(default)
-    env_name = "ARCHERY_%s_BIN".format(default.upper())
+    env_name = "ARCHERY_{0}_BIN".format(default.upper())
     return name if name else os.environ.get(env_name, default)
 
 
diff --git a/dev/archery/archery/utils/lint.py b/dev/archery/archery/utils/lint.py
index d24f55b..0101147 100644
--- a/dev/archery/archery/utils/lint.py
+++ b/dev/archery/archery/utils/lint.py
@@ -17,6 +17,7 @@
 
 import gzip
 import os
+from pathlib import Path
 
 import click
 
@@ -26,7 +27,7 @@ from .git import git
 from .logger import logger
 from ..lang.cpp import CppCMakeDefinition, CppConfiguration
 from ..lang.rust import Cargo
-from ..lang.python import Flake8, NumpyDoc
+from ..lang.python import Autopep8, Flake8, NumpyDoc
 from .rat import Rat, exclusion_from_globs
 from .tmpdir import tmpdir
 
@@ -106,16 +107,61 @@ def cmake_linter(src, fix=False):
     yield LintResult.from_cmd(cmake_format("--check"))
 
 
-def python_linter(src):
-    """Run flake8 linter on python/pyarrow, and dev/. """
-    logger.info("Running Python linters")
-    flake8 = Flake8()
+def python_linter(src, fix=False):
+    """Run Python linters on python/pyarrow, python/examples, setup.py
+    and dev/. """
+    setup_py = os.path.join(src.python, "setup.py")
+    setup_cfg = os.path.join(src.python, "setup.cfg")
+
+    logger.info("Running Python formatter (autopep8)")
+
+    autopep8 = Autopep8()
+    if not autopep8.available:
+        logger.error(
+            "Python formatter requested but autopep8 binary not found. "
+            "Please run `pip install -r dev/archery/requirements-lint.txt`")
+        return
 
+    # Gather files for autopep8
+    patterns = ["python/pyarrow/**/*.py",
+                "python/pyarrow/**/*.pyx",
+                "python/pyarrow/**/*.pxd",
+                "python/pyarrow/**/*.pxi",
+                "python/examples/**/*.py",
+                "dev/archery/**/*.py",
+                ]
+    files = [setup_py]
+    for pattern in patterns:
+        files += list(map(str, Path(src.path).glob(pattern)))
+
+    args = ['--global-config', setup_cfg, '--ignore-local-config']
+    if fix:
+        args += ['-j0', '--in-place']
+        args += sorted(files)
+        yield LintResult.from_cmd(autopep8(*args))
+    else:
+        # XXX `-j0` doesn't work well with `--exit-code`, so instead
+        # we capture the diff and check whether it's empty
+        # (https://github.com/hhatto/autopep8/issues/543)
+        args += ['-j0', '--diff']
+        args += sorted(files)
+        diff = autopep8.run_captured(*args)
+        if diff:
+            print(diff.decode('utf8'))
+            yield LintResult(success=False)
+        else:
+            yield LintResult(success=True)
+
+    # Run flake8 after autopep8 (the latter may have modified some files)
+    logger.info("Running Python linter (flake8)")
+
+    flake8 = Flake8()
     if not flake8.available:
-        logger.error("Python linter requested but flake8 binary not found.")
+        logger.error(
+            "Python linter requested but flake8 binary not found. "
+            "Please run `pip install -r dev/archery/requirements-lint.txt`")
         return
 
-    setup_py = os.path.join(src.python, "setup.py")
     yield LintResult.from_cmd(flake8(setup_py, src.pyarrow,
                                      os.path.join(src.python, "examples"),
                                      src.dev, check=False))
@@ -291,7 +337,7 @@ def docker_linter(src):
 
 def linter(src, fix=False, *, clang_format=False, cpplint=False,
            clang_tidy=False, iwyu=False, iwyu_all=False,
-           flake8=False, numpydoc=False, cmake_format=False, rat=False,
+           python=False, numpydoc=False, cmake_format=False, rat=False,
            r=False, rust=False, docker=False):
     """Run all linters."""
     with tmpdir(prefix="arrow-lint-") as root:
@@ -311,8 +357,8 @@ def linter(src, fix=False, *, clang_format=False, cpplint=False,
                                       iwyu_all=iwyu_all,
                                       fix=fix))
 
-        if flake8:
-            results.extend(python_linter(src))
+        if python:
+            results.extend(python_linter(src, fix=fix))
 
         if numpydoc:
             results.extend(python_numpydoc())
diff --git a/dev/archery/archery/utils/rat.py b/dev/archery/archery/utils/rat.py
index ce78f9f..e7fe19a 100644
--- a/dev/archery/archery/utils/rat.py
+++ b/dev/archery/archery/utils/rat.py
@@ -36,7 +36,7 @@ class Rat(Jar):
 
     @capture_stdout(strip=False)
     def run_report(self, archive_path, **kwargs):
-        return self.run("--xml",  archive_path, **kwargs)
+        return self.run("--xml", archive_path, **kwargs)
 
     def report(self, archive_path, **kwargs):
         return RatReport(self.run_report(archive_path, **kwargs))
diff --git a/dev/archery/requirements-lint.txt b/dev/archery/requirements-lint.txt
index b1c02f6..fc7f339 100644
--- a/dev/archery/requirements-lint.txt
+++ b/dev/archery/requirements-lint.txt
@@ -1,2 +1,3 @@
+autopep8
 flake8
-cmake_format==0.5.2
\ No newline at end of file
+cmake_format==0.5.2
diff --git a/docs/source/developers/archery.rst b/docs/source/developers/archery.rst
new file mode 100644
index 0000000..012dffb
--- /dev/null
+++ b/docs/source/developers/archery.rst
@@ -0,0 +1,84 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _archery:
+
+Daily Development using Archery
+===============================
+
+To ease some of the daily development tasks, we developed a Python-written
+utility called Archery.
+
+Installation
+------------
+
+Archery requires Python 3.5 or later. It is recommended to install archery in
+*editable* mode with the ``-e`` flag to automatically update the intallation
+when pulling the Arrow repository.
+
+.. code:: bash
+
+   pip install -e dev/archery
+
+Usage
+-----
+
+You can inspect Archery usage by passing the ``--help`` flag:
+
+.. code:: bash
+
+   $ archery --help
+   Usage: archery [OPTIONS] COMMAND [ARGS]...
+
+     Apache Arrow developer utilities.
+
+     See sub-commands help with `archery <cmd> --help`.
+
+   Options:
+     --debug      Increase logging with debugging output.
+     --pdb        Invoke pdb on uncaught exception.
+     -q, --quiet  Silence executed commands.
+     --help       Show this message and exit.
+
+   Commands:
+     benchmark    Arrow benchmarking.
+     build        Initialize an Arrow C++ build
+     docker       Interact with docker-compose based builds.
+     integration  Execute protocol and Flight integration tests
+     lint         Check Arrow source tree for errors
+     numpydoc     Lint python docstring with NumpyDoc
+     trigger-bot
+
+Archery exposes independent subcommands, each of which provides dedicated
+help output, for example:
+
+.. code:: bash
+
+   $ archery docker --help
+   Usage: archery docker [OPTIONS] COMMAND [ARGS]...
+
+     Interact with docker-compose based builds.
+
+   Options:
+     --src <arrow_src>  Specify Arrow source directory.
+     --help             Show this message and exit.
+
+   Commands:
+     images  List the available docker-compose images.
+     push    Push the generated docker-compose image.
+     run     Execute docker-compose builds.
+
diff --git a/docs/source/developers/benchmarks.rst b/docs/source/developers/benchmarks.rst
index d85dc2b..31dcc76 100644
--- a/docs/source/developers/benchmarks.rst
+++ b/docs/source/developers/benchmarks.rst
@@ -17,30 +17,14 @@
 
 .. _benchmarks:
 
-**********
+==========
 Benchmarks
-**********
-
-Archery
-=======
-
-``archery`` is a python library and command line utility made to interact with
-Arrow's sources. The main feature is the benchmarking process.
-
-Installation
-~~~~~~~~~~~~
-
-The simplest way to install archery is with pip from the top-level directory.
-It is recommended to use the ``-e,--editable`` flag so that pip don't copy
-the module files but uses the actual sources.
-
-.. code-block:: shell
+==========
 
-  pip install -e dev/archery
-  archery --help
+Setup
+=====
 
-  # optional: enable bash/zsh autocompletion
-  eval "$(_ARCHERY_COMPLETE=source archery)"
+First install the :ref:`Archery <archery>` utility to run the benchmark suite.
 
 Running the benchmark suite
 ===========================
diff --git a/docs/source/developers/cpp/development.rst b/docs/source/developers/cpp/development.rst
index f77abac..e8a3226 100644
--- a/docs/source/developers/cpp/development.rst
+++ b/docs/source/developers/cpp/development.rst
@@ -91,7 +91,8 @@ following checks:
   compiler warnings with ``-DBUILD_WARNING_LEVEL=CHECKIN``. Note that
   there are classes of warnings (such as ``-Wdocumentation``, see more
   on this below) that are not caught by ``gcc``.
-* Passes various C++ (and others) style checks, checked with ``archery lint``
+* Passes various C++ (and others) style checks, checked with the ``lint``
+  subcommand to :ref:`Archery <archery>`.
 * CMake files pass style checks, can be fixed by running
   ``run-cmake-format.py`` from the root of the repository. This requires Python
   3 and `cmake_format <https://github.com/cheshirekow/cmake_format>`_ (note:
@@ -114,9 +115,6 @@ target that is executable from the root of the repository:
 
    docker-compose run lint
 
-See :ref:`integration` for more information about the project's
-``docker-compose`` configuration.
-
 Cleaning includes with include-what-you-use (IWYU)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/developers/docker.rst b/docs/source/developers/docker.rst
index 7bb4553..cdf77a7 100644
--- a/docs/source/developers/docker.rst
+++ b/docs/source/developers/docker.rst
@@ -15,37 +15,21 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
+.. _docker-builds:
+
 Running Docker Builds
 =====================
 
-Most of our Linux based continuous integration tasks are decoupled from public
-CI services using docker and docker-compose. Keeping the CI configuration
+Most of our Linux based Continuous Integration tasks are decoupled from public
+CI services using `Docker <https://docs.docker.com/>`_ and
+`docker-compose <https://docs.docker.com/compose/>`_.  Keeping the CI configuration
 minimal makes local reproducibility possible.
 
 Usage
 -----
 
-There are multiple ways to execute the docker based builds. The recommended is
-to use the archery tool:
-
-Installation
-~~~~~~~~~~~~
-
-``archery`` requires ``python>=3.5``. It is recommended to install archery in
-``editable`` mode with the ``-e`` flag to automatically update the intallation
-by pulling the arrow repository.
-
-.. code:: bash
-
-    pip install -e dev/archery[docker]
-
-For the available commands and options invoke the installed archery commands
-with the ``--help`` flag:
-
-.. code:: bash
-
-    archery docker --help
-    archery docker run --help
+There are multiple ways to execute the docker based builds.
+The recommended way is to use the :ref:`Archery <archery>` tool:
 
 Examples
 ~~~~~~~~
diff --git a/docs/source/developers/documentation.rst b/docs/source/developers/documentation.rst
index 5878aa5..f024a1f 100644
--- a/docs/source/developers/documentation.rst
+++ b/docs/source/developers/documentation.rst
@@ -89,11 +89,15 @@ you made.
 Building with Docker
 --------------------
 
-You can use Archery to build the documentation within a docker container.
-For installation and usage see `Running Docker Builds`_ section.
+You can use :ref:`Archery <archery>` to build the documentation within a
+Docker container.
 
 .. code-block:: shell
 
   archery docker run ubuntu-docs
 
 The final output is located under ``docs/_build/html``.
+
+.. seealso::
+
+   :ref:`docker_builds`.
diff --git a/docs/source/developers/integration.rst b/docs/source/developers/integration.rst
deleted file mode 100644
index e6ce3be..0000000
--- a/docs/source/developers/integration.rst
+++ /dev/null
@@ -1,74 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-.. _integration:
-
-Integration Testing
-===================
-
-Prerequisites
--------------
-
-Arrow uses `Docker <https://docs.docker.com/>`_ and
-`docker-compose <https://docs.docker.com/compose/>`_ for integration testing.
-You can follow the installation `instructions <https://docs.docker.com/compose/install/>`_.
-
-Docker images (services)
-------------------------
-
-The docker-compose services are defined in the ``docker-compose.yml`` file.
-Each service usually correspond to a language binding or an important service
-to test with Arrow.
-
-Services are configured with 2 local mounts, ``/arrow`` for the top-level
-source directory and ``/build`` for caching build artifacts. The source level
-directory mount can be paired with git checkout to test a specific commit. The
-build mount is used for caching and sharing state between staged images.
-
-- *c_glib*: Builds the GLib bindings
-- *cpp*: Builds the C++ project
-- *go*: Builds the go project
-- *java*: Builds the Java project
-- *js*: Builds the Javascript project
-- *python*: Builds the python bindings
-- *r*: Builds the R bindings
-- *rust*: Builds the rust project
-- *lint*: Run various lint on the C++ sources
-- *iwyu*: Run include-what-you-use on the C++ sources
-- *clang-format*: Run clang-format on the C++ sources, modifying in place
-- *clang-tidy*: Run clang-tidy on the C++ sources, outputting recommendations
-- *docs*: Builds this documentation
-
-You can build and run a service by using the `build` and `run` docker-compose
-sub-command, e.g. `docker-compose build python && docker-compose run python`.
-We do not publish the build images, you need to build them manually. This
-method requires the user to build the images in reverse dependency order.
-
-.. code-block:: shell
-
-   # Build and run manually
-   docker-compose build conda-cpp
-   docker-compose build conda-python
-   docker-compose run conda-python
-
-To simplify this, Archery provides a command for it:
-
-.. code-block:: shell
-
-   archery docker run conda-python
-
-See `Running Docker Builds`_ for more details.
diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst
index be5e9c6..75a51b4 100644
--- a/docs/source/developers/python.rst
+++ b/docs/source/developers/python.rst
@@ -18,9 +18,9 @@
 .. currentmodule:: pyarrow
 .. _python-development:
 
-******************
+==================
 Python Development
-******************
+==================
 
 This page provides general Python development guidelines and source build
 instructions for all platforms.
@@ -29,22 +29,18 @@ Coding Style
 ============
 
 We follow a similar PEP8-like coding style to the `pandas project
-<https://github.com/pandas-dev/pandas>`_.
-
-The code must pass ``flake8`` (available from pip or conda) or it will fail the
-build. Check for style errors before submitting your pull request with:
+<https://github.com/pandas-dev/pandas>`_.  To check style issues, use the
+:ref:`Archery <archery>` subcommand ``lint``:
 
 .. code-block:: shell
 
-   flake8 .
-   flake8 --config=.flake8.cython .
+   archery lint --python
 
-The package ``autopep8`` (also available from pip or conda) can automatically
-fix many of the errors reported by ``flake8``:
+Some of the issues can be automatically fixed by passing the ``--fix`` option:
 
 .. code-block:: shell
 
-   autopep8 --in-place --global-config=.flake8.cython pyarrow/table.pxi
+   archery lint --python --fix
 
 Unit Testing
 ============
@@ -55,9 +51,7 @@ like so:
 
 .. code-block:: shell
 
-   pushd arrow/python
    pytest pyarrow
-   popd
 
 Package requirements to run the unit tests are found in
 ``requirements-test.txt`` and can be installed if needed with ``pip install -r
diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst
index 571d0fa..a3e4205 100644
--- a/docs/source/format/Integration.rst
+++ b/docs/source/format/Integration.rst
@@ -32,33 +32,8 @@ Our strategy for integration testing between Arrow implementations is:
 Running integration tests
 -------------------------
 
-The integration test data generator and runner uses ``archery``, a Python script
-that requires Python 3.6 or higher. You can create a standalone Python
-distribution and environment for running the tests by using
-`miniconda <https://conda.io/miniconda.html>`_. On Linux this is:
-
-.. code-block:: shell
-
-   MINICONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-   wget -O miniconda.sh $MINICONDA_URL
-   bash miniconda.sh -b -p miniconda
-   export PATH=`pwd`/miniconda/bin:$PATH
-
-   conda create -n arrow-integration python=3.6 nomkl numpy six
-   conda activate arrow-integration
-
-
-If you are on macOS, instead use the URL:
-
-.. code-block:: shell
-
-   MINICONDA_URL=https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
-
-Once you have Python, you can install archery
-
-.. code-block:: shell
-
-   pip install -e dev/archery
+The integration test data generator and runner are implemented inside
+the :ref:`Archery <archery>` utility.
 
 The integration tests are run using the ``archery integration`` command.
 
@@ -101,7 +76,7 @@ docker-compose. You may also run the docker-compose job locally, or at least
 refer to it if you have questions about how to build other languages or enable
 certain tests.
 
-See :ref:`integration` for more information about the project's
+See :ref:`docker-builds` for more information about the project's
 ``docker-compose`` configuration.
 
 JSON test data format
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 021e2d5..f83d763 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -67,7 +67,7 @@ such topics as:
    developers/contributing
    developers/cpp/index
    developers/python
-   developers/integration
+   developers/archery
    developers/crossbow
    developers/docker
    developers/benchmarks
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 9ae0d9c..e8b8c6e 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -40,6 +40,7 @@ except ImportError:
     try:
         import setuptools_scm
         # Code duplicated from setup.py to avoid a dependency on each other
+
         def parse_git(root, **kwargs):
             """
             Parse function for setuptools_scm that ignores tags for non-C++
@@ -186,6 +187,7 @@ import pyarrow.types as types
 
 # Entry point for starting the plasma store
 
+
 def _plasma_store_entry_point():
     """Entry point for starting the plasma store.
 
@@ -202,6 +204,7 @@ def _plasma_store_entry_point():
 # ----------------------------------------------------------------------
 # Deprecations
 
+
 from pyarrow.util import _deprecate_api  # noqa
 
 read_message = _deprecate_api("read_message", "ipc.read_message",
@@ -218,7 +221,7 @@ read_tensor = _deprecate_api("read_tensor", "ipc.read_tensor",
                              ipc.read_tensor, "0.17.0")
 
 write_tensor = _deprecate_api("write_tensor", "ipc.write_tensor",
-                             ipc.write_tensor, "0.17.0")
+                              ipc.write_tensor, "0.17.0")
 
 get_record_batch_size = _deprecate_api("get_record_batch_size",
                                        "ipc.get_record_batch_size",
@@ -243,6 +246,7 @@ from pyarrow.ipc import (Message, MessageReader,
 # Returning absolute path to the pyarrow include directory (if bundled, e.g. in
 # wheels)
 
+
 def get_include():
     """
     Return absolute path to directory containing Arrow C++ include
diff --git a/python/pyarrow/_cuda.pyx b/python/pyarrow/_cuda.pyx
index af9f422..c59b8dd 100644
--- a/python/pyarrow/_cuda.pyx
+++ b/python/pyarrow/_cuda.pyx
@@ -726,6 +726,7 @@ cdef class BufferReader(NativeFile):
     may expect to be able to do anything other than pointer arithmetic
     on the returned buffers.
     """
+
     def __cinit__(self, CudaBuffer obj):
         self.buffer = obj
         self.reader = new CCudaBufferReader(self.buffer.buffer)
@@ -774,6 +775,7 @@ cdef class BufferWriter(NativeFile):
     By default writes are unbuffered. Use set_buffer_size to enable
     buffering.
     """
+
     def __cinit__(self, CudaBuffer buffer):
         self.buffer = buffer
         self.writer = new CCudaBufferWriter(self.buffer.cuda_buffer)
diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
index 22931a2..c2e7c32 100644
--- a/python/pyarrow/compat.py
+++ b/python/pyarrow/compat.py
@@ -30,16 +30,19 @@ except ImportError:
 
 from collections.abc import Iterable, Mapping, Sequence
 
+
 def guid():
     from uuid import uuid4
     return uuid4().hex
 
+
 def tobytes(o):
     if isinstance(o, str):
         return o.encode('utf8')
     else:
         return o
 
+
 def frombytes(o, *, safe=False):
     if safe:
         return o.decode('utf8', errors='replace')
@@ -60,6 +63,7 @@ try:
 except ImportError:
     pickle = builtin_pickle
 
+
 def encode_file_path(path):
     if isinstance(path, str):
         # POSIX systems can handle utf-8. UTF8 is converted to utf16-le in
@@ -120,9 +124,9 @@ except ImportError:
 
         names, formats, offsets = zip(*fields)
         # names may be (title, names) tuples
-        nametups = (n  if isinstance(n, tuple) else (None, n) for n in names)
+        nametups = (n if isinstance(n, tuple) else (None, n) for n in names)
         titles, names = zip(*nametups)
         return np.dtype({'names': names, 'formats': formats, 'titles': titles,
-                            'offsets': offsets, 'itemsize': offset})
+                         'offsets': offsets, 'itemsize': offset})
 
 __all__ = []
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 6071b5e..7b813af 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -40,6 +40,7 @@ class FeatherDataset:
     validate_schema : bool, default True
         Check that individual file schemas are all the same / compatible
     """
+
     def __init__(self, path_or_paths, validate_schema=True):
         _check_pandas_version()
         self.paths = path_or_paths
@@ -142,8 +143,8 @@ def write_feather(df, dest, compression=None, compression_level=None,
     """
     if _pandas_api.have_pandas:
         _check_pandas_version()
-        if (_pandas_api.has_sparse
-                and isinstance(df, _pandas_api.pd.SparseDataFrame)):
+        if (_pandas_api.has_sparse and
+                isinstance(df, _pandas_api.pd.SparseDataFrame)):
             df = df.to_dense()
 
     if _pandas_api.is_data_frame(df):
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index f888a30..8e1c512 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1460,8 +1460,10 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
     cdef cppclass CCastOptions" arrow::compute::CastOptions":
         CCastOptions()
         CCastOptions(c_bool safe)
+
         @staticmethod
         CCastOptions Safe()
+
         @staticmethod
         CCastOptions Unsafe()
         c_bool allow_int_overflow
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index a4731d7..0493892 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -96,6 +96,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
 
     cdef cppclass CScanOptions "arrow::dataset::ScanOptions":
         CRecordBatchProjector projector
+
         @staticmethod
         shared_ptr[CScanOptions] Make(shared_ptr[CSchema] schema)
 
@@ -260,6 +261,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
     cdef cppclass CDirectoryPartitioning \
             "arrow::dataset::DirectoryPartitioning"(CPartitioning):
         CDirectoryPartitioning(shared_ptr[CSchema] schema)
+
         @staticmethod
         shared_ptr[CPartitioningFactory] MakeFactory(
             vector[c_string] field_names)
@@ -267,6 +269,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
     cdef cppclass CHivePartitioning \
             "arrow::dataset::HivePartitioning"(CPartitioning):
         CHivePartitioning(shared_ptr[CSchema] schema)
+
         @staticmethod
         shared_ptr[CPartitioningFactory] MakeFactory()
 
@@ -302,6 +305,7 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
             shared_ptr[CFileFormat] format,
             CFileSystemFactoryOptions options
         )
+
         @staticmethod
         CResult[shared_ptr[CDatasetFactory]] MakeFromSelector "Make"(
             shared_ptr[CFileSystem] filesystem,
diff --git a/python/pyarrow/includes/libarrow_flight.pxd b/python/pyarrow/includes/libarrow_flight.pxd
index 0dba924..cd5f9d0 100644
--- a/python/pyarrow/includes/libarrow_flight.pxd
+++ b/python/pyarrow/includes/libarrow_flight.pxd
@@ -66,6 +66,7 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
         c_string cmd
         vector[c_string] path
         CStatus SerializeToString(c_string* out)
+
         @staticmethod
         CStatus Deserialize(const c_string& serialized,
                             CFlightDescriptor* out)
@@ -76,6 +77,7 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
         c_string ticket
         bint operator==(CTicket)
         CStatus SerializeToString(c_string* out)
+
         @staticmethod
         CStatus Deserialize(const c_string& serialized, CTicket* out)
 
@@ -90,10 +92,13 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
 
         @staticmethod
         CStatus Parse(c_string& uri_string, CLocation* location)
+
         @staticmethod
         CStatus ForGrpcTcp(c_string& host, int port, CLocation* location)
+
         @staticmethod
         CStatus ForGrpcTls(c_string& host, int port, CLocation* location)
+
         @staticmethod
         CStatus ForGrpcUnix(c_string& path, CLocation* location)
 
@@ -113,6 +118,7 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
         CFlightDescriptor& descriptor()
         const vector[CFlightEndpoint]& endpoints()
         CStatus SerializeToString(c_string* out)
+
         @staticmethod
         CStatus Deserialize(const c_string& serialized,
                             unique_ptr[CFlightInfo]* out)
@@ -327,6 +333,7 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
     cdef cppclass FlightStatusDetail" arrow::flight::FlightStatusDetail":
         CFlightStatusCode code()
         c_string extra_info()
+
         @staticmethod
         shared_ptr[FlightStatusDetail] UnwrapStatus(const CStatus& status)
 
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index 3483673..a794753 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -132,6 +132,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
 
         @staticmethod
         CS3Options Defaults()
+
         @staticmethod
         CS3Options FromAccessKey(const c_string& access_key,
                                  const c_string& secret_key)
@@ -150,6 +151,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         int32_t buffer_size
         int16_t replication
         int64_t default_block_size
+
         @staticmethod
         CResult[CHdfsOptions] FromUriString "FromUri"(
             const c_string& uri_string)
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 198bfb7..b1032d6 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -1194,6 +1194,7 @@ cdef class CompressedInputStream(NativeFile):
     compression : str
         The compression type ("bz2", "brotli", "gzip", "lz4" or "zstd").
     """
+
     def __init__(self, NativeFile stream, str compression not None):
         cdef:
             Codec codec = Codec(compression)
diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index 1b395b3..a91eabd 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -22,6 +22,7 @@ cdef class Message:
     """
     Container for an Arrow IPC message with metadata and optional body
     """
+
     def __cinit__(self):
         pass
 
diff --git a/python/pyarrow/ipc.py b/python/pyarrow/ipc.py
index f76969b..fbbf98a 100644
--- a/python/pyarrow/ipc.py
+++ b/python/pyarrow/ipc.py
@@ -96,6 +96,7 @@ class RecordBatchFileReader(lib._RecordBatchFileReader, _ReadPandasOption):
         If the file is embedded in some larger file, this is the byte offset to
         the very end of the file data
     """
+
     def __init__(self, source, footer_offset=None):
         self._open(source, footer_offset=footer_offset)
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index da6d28c..e629c0e 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -154,6 +154,7 @@ cdef class KeyValueMetadata(_Metadata):
         const CKeyValueMetadata* metadata
 
     cdef void init(self, const shared_ptr[const CKeyValueMetadata]& wrapped)
+
     @staticmethod
     cdef wrap(const shared_ptr[const CKeyValueMetadata]& sp)
     cdef inline shared_ptr[const CKeyValueMetadata] unwrap(self) nogil
diff --git a/python/pyarrow/orc.py b/python/pyarrow/orc.py
index f335ce7..d1f0925 100644
--- a/python/pyarrow/orc.py
+++ b/python/pyarrow/orc.py
@@ -68,6 +68,7 @@ class ORCFile:
         Readable source. For passing Python file objects or byte buffers,
         see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
     """
+
     def __init__(self, source):
         self.reader = _orc.ORCReader()
         self.reader.open(source)
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 55c38c9..a2a461f 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -363,8 +363,8 @@ def _get_columns_to_convert(df, schema, preserve_index, columns):
     index_column_names = []
     for i, index_level in enumerate(index_levels):
         name = _index_level_name(index_level, i, column_names)
-        if (isinstance(index_level, _pandas_api.pd.RangeIndex)
-                and preserve_index is None):
+        if (isinstance(index_level, _pandas_api.pd.RangeIndex) and
+                preserve_index is None):
             descr = _get_range_index_descriptor(index_level)
         else:
             columns_to_convert.append(index_level)
@@ -773,8 +773,8 @@ def table_to_blockmanager(options, table, categories=None,
 # dataframe (complex not included since not supported by Arrow)
 _pandas_supported_numpy_types = {
     str(np.dtype(typ))
-    for typ in (np.sctypes['int'] + np.sctypes['uint'] + np.sctypes['float']
-                + ['object', 'bool'])
+    for typ in (np.sctypes['int'] + np.sctypes['uint'] + np.sctypes['float'] +
+                ['object', 'bool'])
 }
 
 
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 9da33b1..51542ee 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -93,9 +93,9 @@ def _check_filters(filters, check_null_strings=True):
             for conjunction in filters:
                 for col, op, val in conjunction:
                     if (
-                        isinstance(val, list)
-                        and all(_check_contains_null(v) for v in val)
-                        or _check_contains_null(val)
+                        isinstance(val, list) and
+                        all(_check_contains_null(v) for v in val) or
+                        _check_contains_null(val)
                     ):
                         raise NotImplementedError(
                             "Null-terminated binary strings are not supported "
@@ -192,6 +192,7 @@ class ParquetFile:
         If positive, perform read buffering when deserializing individual
         column chunks. Otherwise IO calls are unbuffered.
     """
+
     def __init__(self, source, metadata=None, common_metadata=None,
                  read_dictionary=None, memory_map=False, buffer_size=0):
         self.reader = ParquetReader()
@@ -619,6 +620,7 @@ class ParquetDatasetPiece:
     row_group : int, default None
         Row group to load. By default, reads all row groups.
     """
+
     def __init__(self, path, open_file_func=partial(open, mode='rb'),
                  file_options=None, row_group=None, partition_keys=None):
         self.path = _stringify_path(path)
@@ -1366,6 +1368,7 @@ class _ParquetDatasetV2:
     """
     ParquetDataset shim using the Dataset API under the hood.
     """
+
     def __init__(self, path_or_paths, filesystem=None, filters=None,
                  partitioning="hive", read_dictionary=None, buffer_size=None,
                  memory_map=False, **kwargs):
diff --git a/python/pyarrow/plasma.py b/python/pyarrow/plasma.py
index 251b9db..a4bf79b 100644
--- a/python/pyarrow/plasma.py
+++ b/python/pyarrow/plasma.py
@@ -25,7 +25,7 @@ import sys
 import tempfile
 import time
 
-from pyarrow._plasma import (ObjectID, ObjectNotAvailable, # noqa
+from pyarrow._plasma import (ObjectID, ObjectNotAvailable,  # noqa
                              PlasmaBuffer, PlasmaClient, connect,
                              PlasmaObjectExists, PlasmaObjectNotFound,
                              PlasmaStoreFull)
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 3538483..64e7412 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -30,6 +30,7 @@ cdef class NullType(Scalar):
     Singleton for null array elements.
     """
     # TODO rename this NullValue?
+
     def __cinit__(self):
         global NA
         if NA is not None:
diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
index fb82f64..55d7260 100644
--- a/python/pyarrow/serialization.py
+++ b/python/pyarrow/serialization.py
@@ -150,8 +150,8 @@ def _register_custom_pandas_handlers(context):
     )
 
     def _serialize_pandas_dataframe(obj):
-        if (pdcompat._pandas_api.has_sparse
-                and isinstance(obj, pd.SparseDataFrame)):
+        if (pdcompat._pandas_api.has_sparse and
+                isinstance(obj, pd.SparseDataFrame)):
             raise NotImplementedError(
                 sparse_type_error_msg.format('SparseDataFrame')
             )
@@ -162,8 +162,8 @@ def _register_custom_pandas_handlers(context):
         return pdcompat.serialized_dict_to_dataframe(data)
 
     def _serialize_pandas_series(obj):
-        if (pdcompat._pandas_api.has_sparse
-                and isinstance(obj, pd.SparseSeries)):
+        if (pdcompat._pandas_api.has_sparse and
+                isinstance(obj, pd.SparseSeries)):
             raise NotImplementedError(
                 sparse_type_error_msg.format('SparseSeries')
             )
@@ -302,7 +302,7 @@ def _register_collections_serialization_handlers(serialization_context):
 def _register_scipy_handlers(serialization_context):
     try:
         from scipy.sparse import (csr_matrix, csc_matrix, coo_matrix,
-                                  isspmatrix_coo,  isspmatrix_csr,
+                                  isspmatrix_coo, isspmatrix_csr,
                                   isspmatrix_csc, isspmatrix)
 
         def _serialize_scipy_sparse(obj):
@@ -320,7 +320,7 @@ def _register_scipy_handlers(serialization_context):
 
             else:
                 raise NotImplementedError(
-                        "Serialization of {} is not supported.".format(obj[0]))
+                    "Serialization of {} is not supported.".format(obj[0]))
 
         def _deserialize_scipy_sparse(data):
             if data[0] == 'coo':
diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi
index f74e3c8..edea4ef 100644
--- a/python/pyarrow/tensor.pxi
+++ b/python/pyarrow/tensor.pxi
@@ -605,8 +605,9 @@ shape: {0.shape}""".format(self)
         indices = np.require(obj.indices, dtype='i8')
 
         check_status(NdarraysToSparseCSCMatrix(c_default_memory_pool(),
-                     obj.data, indptr, indices, c_shape,
-                     c_dim_names, &csparse_tensor))
+                                               obj.data, indptr, indices,
+                                               c_shape, c_dim_names,
+                                               &csparse_tensor))
         return pyarrow_wrap_sparse_csc_matrix(csparse_tensor)
 
     @staticmethod
@@ -646,7 +647,8 @@ shape: {0.shape}""".format(self)
         cdef PyObject* out_indices
 
         check_status(SparseCSCMatrixToNdarray(self.sp_sparse_tensor, self,
-                     &out_data, &out_indptr, &out_indices))
+                                              &out_data, &out_indptr,
+                                              &out_indices))
 
         data = PyObject_to_object(out_data)
         indptr = PyObject_to_object(out_indptr)
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index ce72f2e..6b2ca56 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -104,7 +104,7 @@ except ImportError:
     pass
 
 try:
-    import pyarrow.orc # noqa
+    import pyarrow.orc  # noqa
     defaults['orc'] = True
 except ImportError:
     pass
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index 2d58080..088f291 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -110,7 +110,7 @@ def list_types(item_strategy=primitive_types):
     return (
         st.builds(pa.list_, item_strategy) |
         st.builds(pa.large_list, item_strategy)
-        )
+    )
 
 
 def struct_types(item_strategy=primitive_types):
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 3785d0e..dd9e9e6 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1208,7 +1208,7 @@ def test_cast_from_null():
         pa.struct([pa.field('a', pa.int32()),
                    pa.field('b', pa.list_(pa.int8())),
                    pa.field('c', pa.string())]),
-        ]
+    ]
     for out_type in out_types:
         _check_cast_case((in_data, in_type, in_data, out_type))
 
@@ -1218,7 +1218,7 @@ def test_cast_from_null():
                   pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
         pa.union([pa.field('a', pa.binary(10)),
                   pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
-        ]
+    ]
     in_arr = pa.array(in_data, type=pa.null())
     for out_type in out_types:
         with pytest.raises(NotImplementedError):
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 0992f14..abc9d31 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -234,7 +234,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             "ef": ["ij", "mn"],
             "gh": ["kl", "op"],
-            }
+        }
 
         opts.skip_rows = 3
         table = self.read_bytes(rows, read_options=opts)
@@ -242,7 +242,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             "mn": [],
             "op": [],
-            }
+        }
 
         opts.skip_rows = 4
         with pytest.raises(pa.ArrowInvalid):
@@ -257,7 +257,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             "ij": ["mn"],
             "kl": ["op"],
-            }
+        }
 
     def test_header_column_names(self):
         rows = b"ab,cd\nef,gh\nij,kl\nmn,op\n"
@@ -269,7 +269,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             "x": ["ab", "ef", "ij", "mn"],
             "y": ["cd", "gh", "kl", "op"],
-            }
+        }
 
         opts.skip_rows = 3
         table = self.read_bytes(rows, read_options=opts)
@@ -277,7 +277,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             "x": ["mn"],
             "y": ["op"],
-            }
+        }
 
         opts.skip_rows = 4
         table = self.read_bytes(rows, read_options=opts)
@@ -285,7 +285,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             "x": [],
             "y": [],
-            }
+        }
 
         opts.skip_rows = 5
         with pytest.raises(pa.ArrowInvalid):
@@ -308,7 +308,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             "x": ["ij", "mn"],
             "y": ["kl", "op"],
-            }
+        }
 
     def test_header_autogenerate_column_names(self):
         rows = b"ab,cd\nef,gh\nij,kl\nmn,op\n"
@@ -320,7 +320,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             "f0": ["ab", "ef", "ij", "mn"],
             "f1": ["cd", "gh", "kl", "op"],
-            }
+        }
 
         opts.skip_rows = 3
         table = self.read_bytes(rows, read_options=opts)
@@ -328,7 +328,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             "f0": ["mn"],
             "f1": ["op"],
-            }
+        }
 
         # Not enough rows, impossible to infer number of columns
         opts.skip_rows = 4
@@ -344,7 +344,7 @@ class BaseTestCSVRead:
         self.check_names(table, ["ab"])
         assert table.to_pydict() == {
             "ab": ["ef", "ij", "mn"],
-            }
+        }
 
         # Order of include_columns is respected, regardless of CSV order
         convert_options.include_columns = ['cd', 'ab']
@@ -355,7 +355,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             "cd": ["gh", "kl", "op"],
             "ab": ["ef", "ij", "mn"],
-            }
+        }
 
         # Include a column not in the CSV file => raises by default
         convert_options.include_columns = ['xx', 'ab', 'yy']
@@ -381,7 +381,7 @@ class BaseTestCSVRead:
             "xx": [None, None, None],
             "ab": ["ef", "ij", "mn"],
             "yy": [None, None, None],
-            }
+        }
 
         # Combining with `column_names`
         read_options.column_names = ["xx", "yy"]
@@ -394,7 +394,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             "yy": ["cd", "gh", "kl", "op"],
             "cd": [None, None, None, None],
-            }
+        }
 
         # And with `column_types` as well
         convert_options.column_types = {"yy": pa.binary(),
@@ -407,7 +407,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             "yy": [b"cd", b"gh", b"kl", b"op"],
             "cd": [None, None, None, None],
-            }
+        }
 
     def test_simple_ints(self):
         # Infer integer columns
@@ -421,7 +421,7 @@ class BaseTestCSVRead:
             'a': [1, 4],
             'b': [2, 5],
             'c': [3, 6],
-            }
+        }
 
     def test_simple_varied(self):
         # Infer various kinds of data
@@ -437,7 +437,7 @@ class BaseTestCSVRead:
             'b': [2, -5],
             'c': ["3", "foo"],
             'd': [False, True],
-            }
+        }
 
     def test_simple_nulls(self):
         # Infer various kinds of data, with nulls
@@ -460,7 +460,7 @@ class BaseTestCSVRead:
             'd': [None, None, None],
             'e': [b"3", b"nan", b"\xff"],
             'f': [None, True, False],
-            }
+        }
 
     def test_simple_timestamps(self):
         # Infer a timestamp column
@@ -472,7 +472,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             'a': [1970, 1989],
             'b': [datetime(1970, 1, 1), datetime(1989, 7, 14)],
-            }
+        }
 
     def test_auto_dict_encode(self):
         opts = ConvertOptions(auto_dict_encode=True)
@@ -483,7 +483,7 @@ class BaseTestCSVRead:
         expected = {
             'a': ["ab", "cdé", "cdé", "ab"],
             'b': [1, 2, 3, 4],
-            }
+        }
         assert table.schema == schema
         assert table.to_pydict() == expected
 
@@ -518,7 +518,7 @@ class BaseTestCSVRead:
         expected = {
             'a': [b"ab", b"cd\xff", b"ab"],
             'b': [1, 2, 3],
-            }
+        }
         assert table.schema == schema
         assert table.to_pydict() == expected
 
@@ -537,7 +537,7 @@ class BaseTestCSVRead:
             'b': ["Xxx", "#N/A"],
             'c': ["1", ""],
             'd': [2, None],
-            }
+        }
 
         opts = ConvertOptions(null_values=['Xxx', 'Zzz'],
                               strings_can_be_null=True)
@@ -547,7 +547,7 @@ class BaseTestCSVRead:
             'b': [None, "#N/A"],
             'c': ["1", ""],
             'd': [2, None],
-            }
+        }
 
         opts = ConvertOptions(null_values=[])
         rows = b"a,b\n#N/A,\n"
@@ -558,7 +558,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             'a': ["#N/A"],
             'b': [""],
-            }
+        }
 
     def test_custom_bools(self):
         # Infer booleans with custom values
@@ -579,7 +579,7 @@ class BaseTestCSVRead:
             'a': ["True", "False", "True", "False", "N/A"],
             'b': [True, False, True, False, None],
             'c': ["t", "f", "yes", "no", "N/A"],
-            }
+        }
 
     def test_column_types(self):
         # Ask for specific column types in ConvertOptions
@@ -601,7 +601,7 @@ class BaseTestCSVRead:
             'c': ["3", "6"],
             'd': [True, False],
             'e': [Decimal("1.00"), Decimal("0.00")]
-            }
+        }
         assert table.schema == schema
         assert table.to_pydict() == expected
         # Pass column_types as schema
@@ -636,7 +636,7 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             'x': [b'a', b'c', b'e'],
             'y': ['b', 'd', 'f'],
-            }
+        }
 
     def test_no_ending_newline(self):
         # No \n after last line
@@ -646,7 +646,7 @@ class BaseTestCSVRead:
             'a': [1, 4],
             'b': [2, 5],
             'c': [3, 6],
-            }
+        }
 
     def test_trivial(self):
         # A bit pointless, but at least it shouldn't crash
@@ -660,20 +660,20 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             'a': [1, 3],
             'b': [2, 4],
-            }
+        }
         parse_options = ParseOptions(ignore_empty_lines=False)
         table = self.read_bytes(rows, parse_options=parse_options)
         assert table.to_pydict() == {
             'a': [None, 1, None, 3],
             'b': [None, 2, None, 4],
-            }
+        }
         read_options = ReadOptions(skip_rows=2)
         table = self.read_bytes(rows, parse_options=parse_options,
                                 read_options=read_options)
         assert table.to_pydict() == {
             '1': [None, 3],
             '2': [None, 4],
-            }
+        }
 
     def test_invalid_csv(self):
         # Various CSV errors
@@ -693,13 +693,13 @@ class BaseTestCSVRead:
         assert table.to_pydict() == {
             'a;b': ['de'],
             'c': ['fg;eh'],
-            }
+        }
         opts = ParseOptions(delimiter=';')
         table = self.read_bytes(rows, parse_options=opts)
         assert table.to_pydict() == {
             'a': ['de,fg'],
             'b,c': ['eh'],
-            }
+        }
 
     def test_small_random_csv(self):
         csv, expected = make_random_csv(num_cols=2, num_rows=10)
@@ -1082,7 +1082,7 @@ class TestGZipCSVRead(BaseTestCompressedCSVRead, unittest.TestCase):
         assert table.to_pydict() == {
             'ab': ['ef', 'ij', 'mn'],
             'cd': ['gh', 'kl', 'op'],
-            }
+        }
 
 
 class TestBZ2CSVRead(BaseTestCompressedCSVRead, unittest.TestCase):
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index 202868d..30fd806 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -116,7 +116,7 @@ def test_cython_api(tmpdir):
             arr = mod.make_null_array(5)
             assert mod.get_array_length(arr) == 5
             assert arr.null_count == 5
-        """.format(mod_path=str(tmpdir), mod_name='pyarrow_cython_example')
+        """.format(mod_name='pyarrow_cython_example')
 
         if sys.platform == 'win32':
             delim, var = ';', 'PATH'
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
index d373034..cfae932 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -131,7 +131,7 @@ class BaseTestJSONRead:
             'a': [1, 4],
             'b': [2, 5],
             'c': [3, 6],
-            }
+        }
 
     def test_simple_ints(self):
         # Infer integer columns
@@ -145,7 +145,7 @@ class BaseTestJSONRead:
             'a': [1, 4],
             'b': [2, 5],
             'c': [3, 6],
-            }
+        }
 
     def test_simple_varied(self):
         # Infer various kinds of data
@@ -162,7 +162,7 @@ class BaseTestJSONRead:
             'b': [2, -5],
             'c': ["3", "foo"],
             'd': [False, True],
-            }
+        }
 
     def test_simple_nulls(self):
         # Infer various kinds of data, with nulls
@@ -182,7 +182,7 @@ class BaseTestJSONRead:
             'c': [None, "foo", "nan"],
             'd': [None, None, None],
             'e': [None, True, False],
-            }
+        }
 
     def test_small_random_json(self):
         data, expected = make_random_json(num_cols=2, num_rows=10)
diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py
index 44e98a0..cc75886 100644
--- a/python/pyarrow/tests/test_orc.py
+++ b/python/pyarrow/tests/test_orc.py
@@ -148,18 +148,18 @@ def test_orcfile_empty(datadir):
             ('list', pa.list_(pa.struct([
                 ('int1', pa.int32()),
                 ('string1', pa.string()),
-                ]))),
-            ])),
+            ]))),
+        ])),
         ('list', pa.list_(pa.struct([
             ('int1', pa.int32()),
             ('string1', pa.string()),
-            ]))),
+        ]))),
         ('map', pa.list_(pa.struct([
             ('key', pa.string()),
             ('value', pa.struct([
                 ('int1', pa.int32()),
                 ('string1', pa.string()),
-                ])),
-            ]))),
-        ])
+            ])),
+        ]))),
+    ])
     assert table.schema == expected_schema
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index a4bb37f..3a07ffa 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -1302,35 +1302,35 @@ class TestConvertDateTimeLikeTypes:
 
     def test_numpy_datetime64_columns(self):
         datetime64_ns = np.array([
-                '2007-07-13T01:23:34.123456789',
-                None,
-                '2006-01-13T12:34:56.432539784',
-                '2010-08-13T05:46:57.437699912'],
-                dtype='datetime64[ns]')
+            '2007-07-13T01:23:34.123456789',
+            None,
+            '2006-01-13T12:34:56.432539784',
+            '2010-08-13T05:46:57.437699912'],
+            dtype='datetime64[ns]')
         _check_array_from_pandas_roundtrip(datetime64_ns)
 
         datetime64_us = np.array([
-                '2007-07-13T01:23:34.123456',
-                None,
-                '2006-01-13T12:34:56.432539',
-                '2010-08-13T05:46:57.437699'],
-                dtype='datetime64[us]')
+            '2007-07-13T01:23:34.123456',
+            None,
+            '2006-01-13T12:34:56.432539',
+            '2010-08-13T05:46:57.437699'],
+            dtype='datetime64[us]')
         _check_array_from_pandas_roundtrip(datetime64_us)
 
         datetime64_ms = np.array([
-                '2007-07-13T01:23:34.123',
-                None,
-                '2006-01-13T12:34:56.432',
-                '2010-08-13T05:46:57.437'],
-                dtype='datetime64[ms]')
+            '2007-07-13T01:23:34.123',
+            None,
+            '2006-01-13T12:34:56.432',
+            '2010-08-13T05:46:57.437'],
+            dtype='datetime64[ms]')
         _check_array_from_pandas_roundtrip(datetime64_ms)
 
         datetime64_s = np.array([
-                '2007-07-13T01:23:34',
-                None,
-                '2006-01-13T12:34:56',
-                '2010-08-13T05:46:57'],
-                dtype='datetime64[s]')
+            '2007-07-13T01:23:34',
+            None,
+            '2006-01-13T12:34:56',
+            '2010-08-13T05:46:57'],
+            dtype='datetime64[s]')
         _check_array_from_pandas_roundtrip(datetime64_s)
 
     def test_timestamp_to_pandas_ns(self):
@@ -1378,11 +1378,11 @@ class TestConvertDateTimeLikeTypes:
     @pytest.mark.parametrize('dtype', [pa.date32(), pa.date64()])
     def test_numpy_datetime64_day_unit(self, dtype):
         datetime64_d = np.array([
-                '2007-07-13',
-                None,
-                '2006-01-15',
-                '2010-08-19'],
-                dtype='datetime64[D]')
+            '2007-07-13',
+            None,
+            '2006-01-15',
+            '2010-08-19'],
+            dtype='datetime64[D]')
         _check_array_from_pandas_roundtrip(datetime64_d, type=dtype)
 
     def test_array_from_pandas_date_with_mask(self):
@@ -1403,8 +1403,8 @@ class TestConvertDateTimeLikeTypes:
             'a': [
                 pd.Timestamp('2012-11-11 00:00:00+01:00'),
                 pd.NaT
-                ]
-             })
+            ]
+        })
         _check_pandas_roundtrip(df)
         _check_serialize_components_roundtrip(df)
 
@@ -1774,14 +1774,14 @@ class TestConvertListTypes:
 
     def test_column_of_decimal_list(self):
         array = pa.array([[decimal.Decimal('1'), decimal.Decimal('2')],
-                         [decimal.Decimal('3.3')]],
+                          [decimal.Decimal('3.3')]],
                          type=pa.list_(pa.decimal128(2, 1)))
         table = pa.Table.from_arrays([array], names=['col1'])
         df = table.to_pandas()
 
         expected_df = pd.DataFrame(
-                {'col1': [[decimal.Decimal('1'), decimal.Decimal('2')],
-                          [decimal.Decimal('3.3')]]})
+            {'col1': [[decimal.Decimal('1'), decimal.Decimal('2')],
+                      [decimal.Decimal('3.3')]]})
         tm.assert_frame_equal(df, expected_df)
 
     def test_nested_types_from_ndarray_null_entries(self):
@@ -3910,32 +3910,32 @@ def test_metadata_compat_missing_field_name():
 
     # metadata generated by fastparquet 0.3.2 with missing field_names
     table = table.replace_schema_metadata({
-        b'pandas': json.dumps(
-            {'column_indexes': [
+        b'pandas': json.dumps({
+            'column_indexes': [
                 {'field_name': None,
                  'metadata': None,
                  'name': None,
                  'numpy_type': 'object',
                  'pandas_type': 'mixed-integer'}
-                ],
-             'columns': [
-                 {'metadata': None,
-                  'name': 'a',
-                  'numpy_type': 'int64',
-                  'pandas_type': 'int64'},
-                 {'metadata': None,
-                  'name': 'b',
-                  'numpy_type': 'object',
-                  'pandas_type': 'unicode'}
-                 ],
-             'index_columns': [
-                 {'kind': 'range',
-                  'name': 'qux',
-                  'start': 0,
-                  'step': 2,
-                  'stop': 8}
-                 ],
-             'pandas_version': '0.25.0'}
+            ],
+            'columns': [
+                {'metadata': None,
+                 'name': 'a',
+                 'numpy_type': 'int64',
+                 'pandas_type': 'int64'},
+                {'metadata': None,
+                 'name': 'b',
+                 'numpy_type': 'object',
+                 'pandas_type': 'unicode'}
+            ],
+            'index_columns': [
+                {'kind': 'range',
+                 'name': 'qux',
+                 'start': 0,
+                 'step': 2,
+                 'stop': 8}
+            ],
+            'pandas_version': '0.25.0'}
 
         )})
     result = table.to_pandas()
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index e146c08..6c7b6d4 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -341,8 +341,8 @@ def test_nested_list_nonnullable_roundtrip_bug(use_legacy_dataset):
     typ = pa.list_(pa.field("item", pa.float32(), False))
     num_rows = 10000
     t = pa.table([
-        pa.array(([[0] * ((i + 5) % 10) for i in range(0, 10)]
-                  * (num_rows // 10)), type=typ)
+        pa.array(([[0] * ((i + 5) % 10) for i in range(0, 10)] *
+                  (num_rows // 10)), type=typ)
     ], ['a'])
     _check_roundtrip(
         t, data_page_size=4096, use_legacy_dataset=use_legacy_dataset)
diff --git a/python/pyarrow/tests/test_plasma.py b/python/pyarrow/tests/test_plasma.py
index 967e69d..c574735 100644
--- a/python/pyarrow/tests/test_plasma.py
+++ b/python/pyarrow/tests/test_plasma.py
@@ -194,8 +194,8 @@ class TestPlasmaClient:
                                                           with_meta=True)
             assert data_tuple[1].to_pybytes() == i * b'a'
             assert (self.plasma_client.get_metadata(
-                        [object_ids[i]])[0].to_pybytes()
-                    == i * b'b')
+                [object_ids[i]])[0].to_pybytes() ==
+                i * b'b')
 
         # Make sure that creating the same object twice raises an exception.
         object_id = random_object_id()
@@ -262,7 +262,7 @@ class TestPlasmaClient:
             [object_id], timeout_ms=1, with_meta=True)[0][1] is None
         self.plasma_client.seal(object_id)
         assert self.plasma_client.get_buffers(
-            [object_id], timeout_ms=0, with_meta=True)[0][1]is not None
+            [object_id], timeout_ms=0, with_meta=True)[0][1] is not None
 
     def test_buffer_lifetime(self):
         # ARROW-2195
@@ -758,8 +758,8 @@ class TestPlasmaClient:
             data_sizes = [np.random.randint(1000) + 1 for _ in range(i)]
             for j in range(i):
                 x = self.plasma_client2.create(
-                        object_ids[j], data_sizes[j],
-                        metadata=bytearray(np.random.bytes(metadata_sizes[j])))
+                    object_ids[j], data_sizes[j],
+                    metadata=bytearray(np.random.bytes(metadata_sizes[j])))
                 self.plasma_client2.seal(object_ids[j])
             del x
             # Check that we received notifications for creating all of the
@@ -794,8 +794,8 @@ class TestPlasmaClient:
         data_sizes.append(np.random.randint(1000))
         for i in range(num_object_ids):
             x = self.plasma_client2.create(
-                    object_ids[i], data_sizes[i],
-                    metadata=bytearray(np.random.bytes(metadata_sizes[i])))
+                object_ids[i], data_sizes[i],
+                metadata=bytearray(np.random.bytes(metadata_sizes[i])))
             self.plasma_client2.seal(object_ids[i])
         del x
         for i in range(num_object_ids):
diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py
index ede2b33..f1e0cf4 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -134,7 +134,7 @@ def assert_equal(obj1, obj2):
         assert obj1.equals(obj2)
     else:
         assert type(obj1) == type(obj2) and obj1 == obj2, \
-                "Objects {} and {} are different.".format(obj1, obj2)
+            "Objects {} and {} are different.".format(obj1, obj2)
 
 
 PRIMITIVE_OBJECTS = [
@@ -832,7 +832,7 @@ def test_pyarrow_objects_serialization(large_buffer):
     # or it will affect 'test_total_bytes_allocated'.
     pyarrow_objects = [
         pa.array([1, 2, 3, 4]), pa.array(['1', 'never U+1F631', '',
-                                         "233 * U+1F600"]),
+                                          "233 * U+1F600"]),
         pa.array([1, None, 2, 3]),
         pa.Tensor.from_numpy(np.random.rand(2, 3, 4)),
         pa.RecordBatch.from_arrays(
@@ -841,7 +841,7 @@ def test_pyarrow_objects_serialization(large_buffer):
             ['a', 'b']),
         pa.Table.from_arrays([pa.array([1, None, 2, 3]),
                               pa.array(['1', 'never U+1F631', '',
-                                       "233 * u1F600"])],
+                                        "233 * u1F600"])],
                              ['a', 'b'])
     ]
     for obj in pyarrow_objects:
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 5f2e380..c2ba86a 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -100,6 +100,7 @@ cdef class DataType:
 
     Each data type is an *instance* of this class.
     """
+
     def __cinit__(self):
         pass
 
@@ -215,6 +216,7 @@ cdef class DictionaryMemo:
     """
     Tracking container for dictionary-encoded fields.
     """
+
     def __cinit__(self):
         self.sp_memo.reset(new CDictionaryMemo())
         self.memo = self.sp_memo.get()
@@ -966,6 +968,7 @@ cdef class Field:
     -----
     Do not use this class's constructor directly; use pyarrow.field
     """
+
     def __cinit__(self):
         pass
 
diff --git a/python/setup.cfg b/python/setup.cfg
index 162a507..9aaad4f 100644
--- a/python/setup.cfg
+++ b/python/setup.cfg
@@ -28,3 +28,7 @@ build-dir  = doc/_build
 addopts = --ignore=scripts
 filterwarnings =
     error:The SparseDataFrame:FutureWarning
+
+[pep8]
+ignore = E211,E225,E226,E227,E402,W504
+max_line_length = 79
diff --git a/python/setup.py b/python/setup.py
index 134d6a4..f6f6b45 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -281,8 +281,8 @@ class build_ext(_build_ext):
 
             # Do the build
             print("-- Running cmake --build for pyarrow")
-            self.spawn(['cmake', '--build', '.', '--config', self.build_type]
-                       + build_tool_args)
+            self.spawn(['cmake', '--build', '.', '--config', self.build_type] +
+                       build_tool_args)
             print("-- Finished cmake --build for pyarrow")
 
             if self.inplace:
@@ -513,8 +513,8 @@ def _move_shared_libs_unix(build_prefix, build_lib, lib_name):
 # If the event of not running from a git clone (e.g. from a git archive
 # or a Python sdist), see if we can set the version number ourselves
 default_version = '0.18.0-SNAPSHOT'
-if (not os.path.exists('../.git')
-        and not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')):
+if (not os.path.exists('../.git') and
+        not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')):
     if os.path.exists('PKG-INFO'):
         # We're probably in a Python sdist, setuptools_scm will handle fine
         pass