You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2021/01/12 06:50:27 UTC
[arrow] branch master updated: ARROW-10777: [Packaging][Python]
Build sdist by Crossbow
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 8e5d09e ARROW-10777: [Packaging][Python] Build sdist by Crossbow
8e5d09e is described below
commit 8e5d09e0415aeb582e2a665b1d93912800a9ea8d
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Tue Jan 12 15:49:30 2021 +0900
ARROW-10777: [Packaging][Python] Build sdist by Crossbow
Closes #9145 from kou/packaging-python-sdist
Lead-authored-by: Sutou Kouhei <ko...@clear-code.com>
Co-authored-by: Krisztián Szűcs <sz...@gmail.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
ci/docker/python-sdist.dockerfile | 36 ++++++++++++
ci/scripts/cpp_build.sh | 2 +-
ci/scripts/python_sdist_build.sh | 27 +++++++++
ci/scripts/python_sdist_test.sh | 52 +++++++++++++++++
dev/release/binary-task.rb | 2 +-
dev/release/download_rc_binaries.py | 13 ++++-
dev/release/post-10-python.sh | 44 +++++++++++++++
dev/release/rat_exclude_files.txt | 1 +
.../github.win.yml => python-sdist/github.yml} | 65 ++++++++--------------
dev/tasks/python-wheels/github.osx.yml | 10 +---
dev/tasks/python-wheels/github.win.yml | 10 +---
dev/tasks/requirements-crossbow.txt | 7 +++
dev/tasks/tasks.yml | 18 ++++++
docker-compose.yml | 49 ++++++++++++++++
14 files changed, 272 insertions(+), 64 deletions(-)
diff --git a/ci/docker/python-sdist.dockerfile b/ci/docker/python-sdist.dockerfile
new file mode 100644
index 0000000..853b532
--- /dev/null
+++ b/ci/docker/python-sdist.dockerfile
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM amd64/ubuntu:20.04
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ git \
+ python3-pip && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+COPY python/requirements-build.txt \
+ /arrow/python/requirements-build.txt
+RUN pip3 install --requirement /arrow/python/requirements-build.txt
+
+ENV PYTHON=/usr/bin/python3
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index a12c51c..144abbd 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -123,7 +123,7 @@ cmake -G "${CMAKE_GENERATOR:-Ninja}" \
-DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \
-DProtobuf_SOURCE=${Protobuf_SOURCE:-} \
-DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \
- -DRE2_SOURCE=${RE2_SOURCE:-} \
+ -Dre2_SOURCE=${re2_SOURCE:-} \
-DSnappy_SOURCE=${Snappy_SOURCE:-} \
-DThrift_SOURCE=${Thrift_SOURCE:-} \
-Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \
diff --git a/ci/scripts/python_sdist_build.sh b/ci/scripts/python_sdist_build.sh
new file mode 100755
index 0000000..f9e9359
--- /dev/null
+++ b/ci/scripts/python_sdist_build.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+source_dir=${1}/python
+
+pushd ${source_dir}
+export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-}
+${PYTHON:-python} setup.py sdist
+popd
diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh
new file mode 100755
index 0000000..1388ca0
--- /dev/null
+++ b/ci/scripts/python_sdist_test.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+arrow_dir=${1}
+
+export ARROW_SOURCE_DIR=${arrow_dir}
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+
+export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
+export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
+export PYARROW_WITH_S3=${ARROW_S3:-OFF}
+export PYARROW_WITH_ORC=${ARROW_ORC:-OFF}
+export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
+export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF}
+export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
+export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF}
+export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF}
+export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF}
+export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF}
+
+# TODO: Users should not require ARROW_HOME and pkg-config to find Arrow C++.
+# Related: ARROW-9171
+# unset ARROW_HOME
+# apt purge -y pkg-config
+
+if [ -n "${PYARROW_VERSION:-}" ]; then
+ sdist="${arrow_dir}/python/dist/pyarrow-${PYARROW_VERSION}.tar.gz"
+else
+ sdist=$(ls "${arrow_dir}/python/dist/pyarrow-*.tar.gz" | sort -r | head -n1)
+fi
+${PYTHON:-python} -m pip install ${sdist}
+
+pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow
diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index dda0c62..8aedbcb 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -1833,7 +1833,7 @@ APT::FTPArchive::Release::Description "#{apt_repository_description}";
:python,
"#{rc_dir}/python/#{full_version}",
"#{release_dir}/python/#{full_version}",
- "{conda,wheel}-*/**/*")
+ "{conda-*,wheel-*,python-sdist}/**/*")
end
def define_nuget_tasks
diff --git a/dev/release/download_rc_binaries.py b/dev/release/download_rc_binaries.py
index 1c3da0e..5ed8ece 100755
--- a/dev/release/download_rc_binaries.py
+++ b/dev/release/download_rc_binaries.py
@@ -135,11 +135,15 @@ ARROW_PACKAGE_TYPES = ['centos', 'debian', 'nuget', 'python', 'ubuntu']
def download_rc_binaries(version, rc_number, re_match=None, dest=None,
- num_parallel=None):
+ num_parallel=None, target_package_type=None):
bintray = Bintray()
version_string = '{}-rc{}'.format(version, rc_number)
- for package_type in ARROW_PACKAGE_TYPES:
+ if target_package_type:
+ package_types = [target_package_type]
+ else:
+ package_types = ARROW_PACKAGE_TYPES
+ for package_type in package_types:
files = bintray.get_file_list('{}-rc'.format(package_type),
version_string)
bintray.download_files(files, re_match=re_match, dest=dest,
@@ -160,7 +164,10 @@ if __name__ == '__main__':
help='The output folder for the downloaded files')
parser.add_argument('--num_parallel', type=int, default=8,
help='The number of concurrent downloads to do')
+ parser.add_argument('--package_type', type=str, default=None,
+ help='The package type to be downloaded')
args = parser.parse_args()
download_rc_binaries(args.version, args.rc_number, dest=args.dest,
- re_match=args.regexp, num_parallel=args.num_parallel)
+ re_match=args.regexp, num_parallel=args.num_parallel,
+ target_package_type=args.package_type)
diff --git a/dev/release/post-10-python.sh b/dev/release/post-10-python.sh
new file mode 100755
index 0000000..0f7a480
--- /dev/null
+++ b/dev/release/post-10-python.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -o pipefail
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <version> <rc-num>"
+ exit
+fi
+
+version=$1
+rc=$2
+
+tmp=$(mktemp -d -t "arrow-post-python.XXXXX")
+${PYTHON:-python} \
+ "${SOURCE_DIR}/download_rc_binaries.py" \
+ ${version} \
+ ${rc} \
+ --dest="${tmp}" \
+ --package_type=python
+twine upload ${tmp}/python-rc/${version}-rc${rc}/*.{whl,tar.gz}
+rm -rf "${tmp}"
+
+echo "Success! The released PyPI packages are available here:"
+echo " https://pypi.org/project/pyarrow/${version}"
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 3865239..85ac2c2 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -164,6 +164,7 @@ dev/tasks/linux-packages/apache-arrow/debian/plasma-store-server.install
dev/tasks/linux-packages/apache-arrow/debian/rules
dev/tasks/linux-packages/apache-arrow/debian/source/format
dev/tasks/linux-packages/apache-arrow/debian/watch
+dev/tasks/requirements*.txt
dev/tasks/conda-recipes/*
docs/requirements.txt
go/arrow/flight/Flight_grpc.pb.go
diff --git a/dev/tasks/python-wheels/github.win.yml b/dev/tasks/python-sdist/github.yml
similarity index 65%
copy from dev/tasks/python-wheels/github.win.yml
copy to dev/tasks/python-sdist/github.yml
index f7638d8..b784fbd 100644
--- a/dev/tasks/python-wheels/github.win.yml
+++ b/dev/tasks/python-sdist/github.yml
@@ -26,16 +26,10 @@ on:
jobs:
build:
- name: "Build wheel for Windows"
- runs-on: windows-2016
- env:
- ARCH: "x64"
- GENERATOR: Visual Studio 15 2017
- PYTHON_VERSION: "{{ python_version }}"
- PYARROW_VERSION: {{ arrow.no_rc_version }}
+ name: "Build sdist"
+ runs-on: ubuntu-20.04
steps:
- name: Checkout Arrow
- shell: bash
run: |
git clone --no-checkout {{ arrow.remote }} arrow
git -C arrow config core.symlinks true
@@ -43,49 +37,38 @@ jobs:
git -C arrow checkout FETCH_HEAD
git -C arrow submodule update --init --recursive
- name: Fetch Submodules and Tags
- shell: bash
run: cd arrow && ci/scripts/util_checkout.sh
- - uses: actions/setup-python@v2
- with:
- python-version: "{{ python_version }}"
- - name: Set up Miniconda
- shell: bash
- run: |
- echo "c:\\Miniconda\\condabin" >> $GITHUB_PATH
- - name: Build wheel
- shell: bash
- run: |
- arrow/dev/tasks/python-wheels/win-build.bat
- - name: Prepare artifacts
- # the artifacts must be uploaded from a directory relative to the build root
+ - name: Free Up Disk Space
shell: bash
+ run: arrow/ci/scripts/util_cleanup.sh
+ - uses: actions/setup-python@v2
+ - name: Setup Archery
+ run: pip install -e arrow/dev/archery[docker]
+ - name: Build sdist
run: |
- mv arrow/python/dist/ wheels/
+ archery docker run python-sdist
+ {% if arrow.branch == 'master' %}
+ archery docker push python-sdist || :
+ {% endif %}
+ env:
+ PYARROW_VERSION: {{ arrow.no_rc_version }}
+ - name: Test sdist
+ run: archery docker run ubuntu-python-sdist-test
+ env:
+ PYARROW_VERSION: {{ arrow.no_rc_version }}
{% if arrow.branch == 'master' %}
- name: Upload to gemfury
- shell: bash
run: |
- conda.bat install -y curl
- WHEEL_PATH=$(echo wheels/*.whl)
- curl.exe \
- -F "package=@${WHEEL_PATH}" \
+ SDIST_PATH=$(echo arrow/python/dist/*.tar.gz)
+ curl \
+ -F "package=@${SDIST_PATH}" \
"https://${CROSSBOW_GEMFURY_TOKEN}@push.fury.io/${CROSSBOW_GEMFURY_ORG}/"
env:
CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }}
CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
{% endif %}
- - name: Set up Crossbow
- shell: bash
- run: |
- pip install \
- click \
- github3.py \
- jinja2 \
- jira \
- pygit2 \
- ruamel.yaml \
- setuptools_scm \
- toolz
+ - name: Setup Crossbow
+ run: pip install --requirement arrow/dev/tasks/requirements-crossbow.txt
- name: Upload artifacts
shell: bash
run: |
@@ -93,7 +76,7 @@ jobs:
--queue-path . \
--queue-remote {{ queue_remote_url }} \
upload-artifacts \
- --pattern "wheels/*.whl" \
+ --pattern "arrow/python/dist/*.tar.gz" \
--sha {{ task.branch }} \
--tag {{ task.tag }}
env:
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index 5e9214f..f0edeb3 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -108,15 +108,7 @@ jobs:
- name: Setup Crossbow
run: |
- pip3 install \
- click \
- github3.py \
- jinja2 \
- jira \
- pygit2 \
- ruamel.yaml \
- setuptools_scm \
- toolz
+ pip3 install --requirement arrow/dev/tasks/requirements-crossbow.txt
- name: Upload artifacts
run: |
diff --git a/dev/tasks/python-wheels/github.win.yml b/dev/tasks/python-wheels/github.win.yml
index f7638d8..4226432 100644
--- a/dev/tasks/python-wheels/github.win.yml
+++ b/dev/tasks/python-wheels/github.win.yml
@@ -77,15 +77,7 @@ jobs:
- name: Set up Crossbow
shell: bash
run: |
- pip install \
- click \
- github3.py \
- jinja2 \
- jira \
- pygit2 \
- ruamel.yaml \
- setuptools_scm \
- toolz
+ pip install --requirement arrow/dev/tasks/requirements-crossbow.txt
- name: Upload artifacts
shell: bash
run: |
diff --git a/dev/tasks/requirements-crossbow.txt b/dev/tasks/requirements-crossbow.txt
new file mode 100644
index 0000000..2436b4d
--- /dev/null
+++ b/dev/tasks/requirements-crossbow.txt
@@ -0,0 +1,7 @@
+click>=7.1
+github3.py
+jinja2
+pygit2
+ruamel.yaml
+setuptools_scm
+toolz
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index c3b2139..f19e0a4 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -48,6 +48,15 @@ groups:
homebrew:
- homebrew-*
+ packaging:
+ - conda-*
+ - wheel-*
+ - debian-*
+ - ubuntu-*
+ - centos-*
+ - python-sdist
+ - nuget
+
############################# Testing tasks #################################
test:
@@ -121,6 +130,7 @@ groups:
- test-*
- example-*
- wheel-*
+ - python-sdist
tasks:
# arbitrary_task_name:
@@ -610,6 +620,14 @@ tasks:
artifacts:
- pyarrow-{no_rc_version}-cp38-cp38-win_amd64.whl
+ ############################ Python sdist ####################################
+
+ python-sdist:
+ ci: github
+ template: python-sdist/github.yml
+ artifacts:
+ - pyarrow-{no_rc_version}.tar.gz
+
############################## Linux PKGS ####################################
debian-buster-amd64:
diff --git a/docker-compose.yml b/docker-compose.yml
index 0454c67..6aa195c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -110,12 +110,14 @@ x-hierarchy:
- ubuntu-lint
- ubuntu-python:
- ubuntu-docs
+ - ubuntu-python-sdist-test
- ubuntu-r
- ubuntu-cuda-cpp:
- ubuntu-cuda-python
- ubuntu-csharp
- ubuntu-cpp-sanitizer
- ubuntu-r-sanitizer
+ - python-sdist
- r
# helper services
- impala
@@ -588,6 +590,53 @@ services:
volumes: *fedora-volumes
command: *python-command
+ ############################ Python sdist ###################################
+
+ python-sdist:
+ # Usage:
+ # docker-compose build python-sdist
+ # docker-compose run --rm python-sdist
+ # Parameters:
+ # PYARROW_VERSION: The pyarrow version for sdist such as "3.0.0"
+ image: ${REPO}:python-sdist
+ build:
+ context: .
+ dockerfile: ci/docker/python-sdist.dockerfile
+ cache_from:
+ - ${REPO}:python-sdist
+ environment:
+ PYARROW_VERSION: ${PYARROW_VERSION:-}
+ volumes:
+ - .:/arrow:delegated
+ command: /arrow/ci/scripts/python_sdist_build.sh /arrow
+
+ ubuntu-python-sdist-test:
+ # Usage:
+ # docker-compose build ubuntu-cpp
+ # docker-compose build ubuntu-python-sdist-test
+ # docker-compose run --rm ubuntu-python-sdist-test
+ # Parameters:
+ # ARCH: amd64, arm64v8, ...
+ # PYARROW_VERSION: The test target pyarrow version such as "3.0.0"
+ # UBUNTU: 16.04, 18.04, 20.04
+ image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3
+ build:
+ context: .
+ dockerfile: ci/docker/linux-apt-python-3.dockerfile
+ cache_from:
+ - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3
+ args:
+ base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
+ shm_size: *shm-size
+ environment:
+ <<: *ccache
+ PYARROW_VERSION: ${PYARROW_VERSION:-}
+ volumes: *ubuntu-volumes
+ command: >
+ /bin/bash -c "
+ /arrow/ci/scripts/cpp_build.sh /arrow /build &&
+ /arrow/ci/scripts/python_sdist_test.sh /arrow"
+
############################## Integration #################################
conda-python-pandas: