You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2021/01/12 06:50:27 UTC

[arrow] branch master updated: ARROW-10777: [Packaging][Python] Build sdist by Crossbow

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e5d09e  ARROW-10777: [Packaging][Python] Build sdist by Crossbow
8e5d09e is described below

commit 8e5d09e0415aeb582e2a665b1d93912800a9ea8d
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Tue Jan 12 15:49:30 2021 +0900

    ARROW-10777: [Packaging][Python] Build sdist by Crossbow
    
    Closes #9145 from kou/packaging-python-sdist
    
    Lead-authored-by: Sutou Kouhei <ko...@clear-code.com>
    Co-authored-by: Krisztián Szűcs <sz...@gmail.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 ci/docker/python-sdist.dockerfile                  | 36 ++++++++++++
 ci/scripts/cpp_build.sh                            |  2 +-
 ci/scripts/python_sdist_build.sh                   | 27 +++++++++
 ci/scripts/python_sdist_test.sh                    | 52 +++++++++++++++++
 dev/release/binary-task.rb                         |  2 +-
 dev/release/download_rc_binaries.py                | 13 ++++-
 dev/release/post-10-python.sh                      | 44 +++++++++++++++
 dev/release/rat_exclude_files.txt                  |  1 +
 .../github.win.yml => python-sdist/github.yml}     | 65 ++++++++--------------
 dev/tasks/python-wheels/github.osx.yml             | 10 +---
 dev/tasks/python-wheels/github.win.yml             | 10 +---
 dev/tasks/requirements-crossbow.txt                |  7 +++
 dev/tasks/tasks.yml                                | 18 ++++++
 docker-compose.yml                                 | 49 ++++++++++++++++
 14 files changed, 272 insertions(+), 64 deletions(-)

diff --git a/ci/docker/python-sdist.dockerfile b/ci/docker/python-sdist.dockerfile
new file mode 100644
index 0000000..853b532
--- /dev/null
+++ b/ci/docker/python-sdist.dockerfile
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM amd64/ubuntu:20.04
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+        debconf-set-selections
+
+RUN apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        git \
+        python3-pip && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+COPY python/requirements-build.txt \
+     /arrow/python/requirements-build.txt
+RUN pip3 install --requirement /arrow/python/requirements-build.txt
+
+ENV PYTHON=/usr/bin/python3
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index a12c51c..144abbd 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -123,7 +123,7 @@ cmake -G "${CMAKE_GENERATOR:-Ninja}" \
       -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \
       -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \
       -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \
-      -DRE2_SOURCE=${RE2_SOURCE:-} \
+      -Dre2_SOURCE=${re2_SOURCE:-} \
       -DSnappy_SOURCE=${Snappy_SOURCE:-} \
       -DThrift_SOURCE=${Thrift_SOURCE:-} \
       -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \
diff --git a/ci/scripts/python_sdist_build.sh b/ci/scripts/python_sdist_build.sh
new file mode 100755
index 0000000..f9e9359
--- /dev/null
+++ b/ci/scripts/python_sdist_build.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+source_dir=${1}/python
+
+pushd ${source_dir}
+export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-}
+${PYTHON:-python} setup.py sdist
+popd
diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh
new file mode 100755
index 0000000..1388ca0
--- /dev/null
+++ b/ci/scripts/python_sdist_test.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+arrow_dir=${1}
+
+export ARROW_SOURCE_DIR=${arrow_dir}
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+
+export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
+export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
+export PYARROW_WITH_S3=${ARROW_S3:-OFF}
+export PYARROW_WITH_ORC=${ARROW_ORC:-OFF}
+export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
+export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF}
+export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
+export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF}
+export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF}
+export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF}
+export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF}
+
+# TODO: Users should not require ARROW_HOME and pkg-config to find Arrow C++.
+# Related: ARROW-9171
+# unset ARROW_HOME
+# apt purge -y pkg-config
+
+if [ -n "${PYARROW_VERSION:-}" ]; then
+  sdist="${arrow_dir}/python/dist/pyarrow-${PYARROW_VERSION}.tar.gz"
+else
+  sdist=$(ls "${arrow_dir}/python/dist/pyarrow-*.tar.gz" | sort -r | head -n1)
+fi
+${PYTHON:-python} -m pip install ${sdist}
+
+pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow
diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index dda0c62..8aedbcb 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -1833,7 +1833,7 @@ APT::FTPArchive::Release::Description "#{apt_repository_description}";
                               :python,
                               "#{rc_dir}/python/#{full_version}",
                               "#{release_dir}/python/#{full_version}",
-                              "{conda,wheel}-*/**/*")
+                              "{conda-*,wheel-*,python-sdist}/**/*")
   end
 
   def define_nuget_tasks
diff --git a/dev/release/download_rc_binaries.py b/dev/release/download_rc_binaries.py
index 1c3da0e..5ed8ece 100755
--- a/dev/release/download_rc_binaries.py
+++ b/dev/release/download_rc_binaries.py
@@ -135,11 +135,15 @@ ARROW_PACKAGE_TYPES = ['centos', 'debian', 'nuget', 'python', 'ubuntu']
 
 
 def download_rc_binaries(version, rc_number, re_match=None, dest=None,
-                         num_parallel=None):
+                         num_parallel=None, target_package_type=None):
     bintray = Bintray()
 
     version_string = '{}-rc{}'.format(version, rc_number)
-    for package_type in ARROW_PACKAGE_TYPES:
+    if target_package_type:
+        package_types = [target_package_type]
+    else:
+        package_types = ARROW_PACKAGE_TYPES
+    for package_type in package_types:
         files = bintray.get_file_list('{}-rc'.format(package_type),
                                       version_string)
         bintray.download_files(files, re_match=re_match, dest=dest,
@@ -160,7 +164,10 @@ if __name__ == '__main__':
                         help='The output folder for the downloaded files')
     parser.add_argument('--num_parallel', type=int, default=8,
                         help='The number of concurrent downloads to do')
+    parser.add_argument('--package_type', type=str, default=None,
+                        help='The package type to be downloaded')
     args = parser.parse_args()
 
     download_rc_binaries(args.version, args.rc_number, dest=args.dest,
-                         re_match=args.regexp, num_parallel=args.num_parallel)
+                         re_match=args.regexp, num_parallel=args.num_parallel,
+                         target_package_type=args.package_type)
diff --git a/dev/release/post-10-python.sh b/dev/release/post-10-python.sh
new file mode 100755
index 0000000..0f7a480
--- /dev/null
+++ b/dev/release/post-10-python.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -o pipefail
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <rc-num>"
+  exit
+fi
+
+version=$1
+rc=$2
+
+tmp=$(mktemp -d -t "arrow-post-python.XXXXX")
+${PYTHON:-python} \
+  "${SOURCE_DIR}/download_rc_binaries.py" \
+  ${version} \
+  ${rc} \
+  --dest="${tmp}" \
+  --package_type=python
+twine upload ${tmp}/python-rc/${version}-rc${rc}/*.{whl,tar.gz}
+rm -rf "${tmp}"
+
+echo "Success! The released PyPI packages are available here:"
+echo "  https://pypi.org/project/pyarrow/${version}"
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 3865239..85ac2c2 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -164,6 +164,7 @@ dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install
 dev/tasks/linux-packages/apache-arrow/debian/rules
 dev/tasks/linux-packages/apache-arrow/debian/source/format
 dev/tasks/linux-packages/apache-arrow/debian/watch
+dev/tasks/requirements*.txt
 dev/tasks/conda-recipes/*
 docs/requirements.txt
 go/arrow/flight/Flight_grpc.pb.go
diff --git a/dev/tasks/python-wheels/github.win.yml b/dev/tasks/python-sdist/github.yml
similarity index 65%
copy from dev/tasks/python-wheels/github.win.yml
copy to dev/tasks/python-sdist/github.yml
index f7638d8..b784fbd 100644
--- a/dev/tasks/python-wheels/github.win.yml
+++ b/dev/tasks/python-sdist/github.yml
@@ -26,16 +26,10 @@ on:
 
 jobs:
   build:
-    name: "Build wheel for Windows"
-    runs-on: windows-2016
-    env:
-      ARCH: "x64"
-      GENERATOR: Visual Studio 15 2017
-      PYTHON_VERSION: "{{ python_version }}"
-      PYARROW_VERSION: {{ arrow.no_rc_version }}
+    name: "Build sdist"
+    runs-on: ubuntu-20.04
     steps:
       - name: Checkout Arrow
-        shell: bash
         run: |
           git clone --no-checkout {{ arrow.remote }} arrow
           git -C arrow config core.symlinks true
@@ -43,49 +37,38 @@ jobs:
           git -C arrow checkout FETCH_HEAD
           git -C arrow submodule update --init --recursive
       - name: Fetch Submodules and Tags
-        shell: bash
         run: cd arrow && ci/scripts/util_checkout.sh
-      - uses: actions/setup-python@v2
-        with:
-          python-version: "{{ python_version }}"
-      - name: Set up Miniconda
-        shell: bash
-        run: |
-          echo "c:\\Miniconda\\condabin" >> $GITHUB_PATH
-      - name: Build wheel
-        shell: bash
-        run: |
-          arrow/dev/tasks/python-wheels/win-build.bat
-      - name: Prepare artifacts
-        # the artifacts must be uploaded from a directory relative to the build root
+      - name: Free Up Disk Space
         shell: bash
+        run: arrow/ci/scripts/util_cleanup.sh
+      - uses: actions/setup-python@v2
+      - name: Setup Archery
+        run: pip install -e arrow/dev/archery[docker]
+      - name: Build sdist
         run: |
-          mv arrow/python/dist/ wheels/
+          archery docker run python-sdist
+          {% if arrow.branch == 'master' %}
+          archery docker push python-sdist || :
+          {% endif %}
+        env:
+          PYARROW_VERSION: {{ arrow.no_rc_version }}
+      - name: Test sdist
+        run: archery docker run ubuntu-python-sdist-test
+        env:
+          PYARROW_VERSION: {{ arrow.no_rc_version }}
       {% if arrow.branch == 'master' %}
       - name: Upload to gemfury
-        shell: bash
         run: |
-          conda.bat install -y curl
-          WHEEL_PATH=$(echo wheels/*.whl)
-          curl.exe \
-            -F "package=@${WHEEL_PATH}" \
+          SDIST_PATH=$(echo arrow/python/dist/*.tar.gz)
+          curl \
+            -F "package=@${SDIST_PATH}" \
             "https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/"
         env:
           CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }}
           CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
       {% endif %}
-      - name: Set up Crossbow
-        shell: bash
-        run: |
-          pip install \
-            click \
-            github3.py \
-            jinja2 \
-            jira \
-            pygit2 \
-            ruamel.yaml \
-            setuptools_scm \
-            toolz
+      - name: Setup Crossbow
+        run: pip install --requirement arrow/dev/tasks/requirements-crossbow.txt
       - name: Upload artifacts
         shell: bash
         run: |
@@ -93,7 +76,7 @@ jobs:
             --queue-path . \
             --queue-remote {{ queue_remote_url }} \
             upload-artifacts \
-            --pattern "wheels/*.whl" \
+            --pattern "arrow/python/dist/*.tar.gz" \
             --sha {{ task.branch }} \
             --tag {{ task.tag }}
         env:
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index 5e9214f..f0edeb3 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -108,15 +108,7 @@ jobs:
 
       - name: Setup Crossbow
         run: |
-          pip3 install \
-            click \
-            github3.py \
-            jinja2 \
-            jira \
-            pygit2 \
-            ruamel.yaml \
-            setuptools_scm \
-            toolz
+          pip3 install --requirement arrow/dev/tasks/requirements-crossbow.txt
 
       - name: Upload artifacts
         run: |
diff --git a/dev/tasks/python-wheels/github.win.yml b/dev/tasks/python-wheels/github.win.yml
index f7638d8..4226432 100644
--- a/dev/tasks/python-wheels/github.win.yml
+++ b/dev/tasks/python-wheels/github.win.yml
@@ -77,15 +77,7 @@ jobs:
       - name: Set up Crossbow
         shell: bash
         run: |
-          pip install \
-            click \
-            github3.py \
-            jinja2 \
-            jira \
-            pygit2 \
-            ruamel.yaml \
-            setuptools_scm \
-            toolz
+          pip install --requirement arrow/dev/tasks/requirements-crossbow.txt
       - name: Upload artifacts
         shell: bash
         run: |
diff --git a/dev/tasks/requirements-crossbow.txt b/dev/tasks/requirements-crossbow.txt
new file mode 100644
index 0000000..2436b4d
--- /dev/null
+++ b/dev/tasks/requirements-crossbow.txt
@@ -0,0 +1,7 @@
+click>=7.1
+github3.py
+jinja2
+pygit2
+ruamel.yaml
+setuptools_scm
+toolz
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index c3b2139..f19e0a4 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -48,6 +48,15 @@ groups:
   homebrew:
     - homebrew-*
 
+  packaging:
+    - conda-*
+    - wheel-*
+    - debian-*
+    - ubuntu-*
+    - centos-*
+    - python-sdist
+    - nuget
+
   ############################# Testing tasks #################################
 
   test:
@@ -121,6 +130,7 @@ groups:
     - test-*
     - example-*
     - wheel-*
+    - python-sdist
 
 tasks:
   # arbitrary_task_name:
@@ -610,6 +620,14 @@ tasks:
     artifacts:
       - pyarrow-{no_rc_version}-cp38-cp38-win_amd64.whl
 
+  ############################ Python sdist ####################################
+
+  python-sdist:
+    ci: github
+    template: python-sdist/github.yml
+    artifacts:
+      - pyarrow-{no_rc_version}.tar.gz
+
   ############################## Linux PKGS ####################################
 
   debian-buster-amd64:
diff --git a/docker-compose.yml b/docker-compose.yml
index 0454c67..6aa195c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -110,12 +110,14 @@ x-hierarchy:
     - ubuntu-lint
     - ubuntu-python:
       - ubuntu-docs
+    - ubuntu-python-sdist-test
     - ubuntu-r
   - ubuntu-cuda-cpp:
     - ubuntu-cuda-python
   - ubuntu-csharp
   - ubuntu-cpp-sanitizer
   - ubuntu-r-sanitizer
+  - python-sdist
   - r
   # helper services
   - impala
@@ -588,6 +590,53 @@ services:
     volumes: *fedora-volumes
     command: *python-command
 
+  ############################ Python sdist ###################################
+
+  python-sdist:
+    # Usage:
+    #   docker-compose build python-sdist
+    #   docker-compose run --rm python-sdist
+    # Parameters:
+    #   PYARROW_VERSION: The pyarrow version for sdist such as "3.0.0"
+    image: ${REPO}:python-sdist
+    build:
+      context: .
+      dockerfile: ci/docker/python-sdist.dockerfile
+      cache_from:
+        - ${REPO}:python-sdist
+    environment:
+      PYARROW_VERSION: ${PYARROW_VERSION:-}
+    volumes:
+      - .:/arrow:delegated
+    command: /arrow/ci/scripts/python_sdist_build.sh /arrow
+
+  ubuntu-python-sdist-test:
+    # Usage:
+    #   docker-compose build ubuntu-cpp
+    #   docker-compose build ubuntu-python-sdist-test
+    #   docker-compose run --rm ubuntu-python-sdist-test
+    # Parameters:
+    #   ARCH: amd64, arm64v8, ...
+    #   PYARROW_VERSION: The test target pyarrow version such as "3.0.0"
+    #   UBUNTU: 16.04, 18.04, 20.04
+    image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3
+    build:
+      context: .
+      dockerfile: ci/docker/linux-apt-python-3.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3
+      args:
+        base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
+    shm_size: *shm-size
+    environment:
+      <<: *ccache
+      PYARROW_VERSION: ${PYARROW_VERSION:-}
+    volumes: *ubuntu-volumes
+    command: >
+      /bin/bash -c "
+        /arrow/ci/scripts/cpp_build.sh /arrow /build &&
+        /arrow/ci/scripts/python_sdist_test.sh /arrow"
+
   ##############################  Integration #################################
 
   conda-python-pandas: