You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/11/15 15:52:00 UTC
[arrow-datafusion-python] branch master updated: Add release scripts, bump version to 0.7.0 (#74)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git
The following commit(s) were added to refs/heads/master by this push:
new 7f2cb01 Add release scripts, bump version to 0.7.0 (#74)
7f2cb01 is described below
commit 7f2cb0144b0e3b3b401341b43f3fb9f8dd87d59a
Author: Andy Grove <an...@gmail.com>
AuthorDate: Tue Nov 15 08:51:55 2022 -0700
Add release scripts, bump version to 0.7.0 (#74)
* prepare for next release
* rat
* update Cargo.lock
---
Cargo.lock | 2 +-
Cargo.toml | 8 +-
dev/release/README.md | 36 +++++
dev/release/create-tarball.sh | 136 ++++++++++++++++++
dev/release/release-tarball.sh | 74 ++++++++++
dev/release/update_change_log-datafusion-python.sh | 33 +++++
dev/release/update_change_log.sh | 87 ++++++++++++
dev/release/verify-release-candidate.sh | 155 +++++++++++++++++++++
8 files changed, 526 insertions(+), 5 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 90bbf7c..db3a43e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -654,7 +654,7 @@ dependencies = [
[[package]]
name = "datafusion-python"
-version = "0.6.0"
+version = "0.7.0"
dependencies = [
"async-trait",
"datafusion",
diff --git a/Cargo.toml b/Cargo.toml
index b67cbc5..0e8db9c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,11 +17,11 @@
[package]
name = "datafusion-python"
-version = "0.6.0"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
+version = "0.7.0"
+homepage = "https://github.com/apache/arrow-datafusion-python"
+repository = "https://github.com/apache/arrow-datafusion-python"
authors = ["Apache Arrow <de...@arrow.apache.org>"]
-description = "Build and run queries against data"
+description = "Apache Arrow DataFusion DataFrame and SQL Query Engine"
readme = "README.md"
license = "Apache-2.0"
edition = "2021"
diff --git a/dev/release/README.md b/dev/release/README.md
new file mode 100644
index 0000000..6e4fc9a
--- /dev/null
+++ b/dev/release/README.md
@@ -0,0 +1,36 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# DataFusion Python Release Process
+
+This is a work-in-progress that will be updated as we work through the next release.
+
+## Preparing a Release Candidate
+
+- Update the version number in Cargo.toml
+- Generate changelog
+- Tag the repo with an rc tag e.g. `0.7.0-rc1`
+- Create tarball and upload to ASF
+- Start the vote
+
+## Releasing Artifacts
+
+```bash
+maturin publish
+```
\ No newline at end of file
diff --git a/dev/release/create-tarball.sh b/dev/release/create-tarball.sh
new file mode 100755
index 0000000..64150f5
--- /dev/null
+++ b/dev/release/create-tarball.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/create-tarball.sh
+
+# This script creates a signed tarball in
+# dev/dist/apache-arrow-datafusion-python-<version>-<sha>.tar.gz and uploads it to
+# the "dev" area of the dist.apache.arrow repository and prepares an
+# email for sending to the dev@arrow.apache.org list for a formal
+# vote.
+#
+# See release/README.md for full release instructions
+#
+# Requirements:
+#
+# 1. gpg setup for signing and have uploaded your public
+# signature to https://pgp.mit.edu/
+#
+# 2. Logged into the apache svn server with the appropriate
+# credentials
+#
+# 3. Install the requests python package
+#
+#
+# Based in part on 02-source.sh from apache/arrow
+#
+
+set -e
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <version> <rc>"
+ echo "ex. $0 4.1.0 2"
+ exit
+fi
+
+if [[ -z "${GH_TOKEN}" ]]; then
+ echo "Please set personal github token through GH_TOKEN environment variable"
+ exit
+fi
+
+version=$1
+rc=$2
+tag="${version}-rc${rc}"
+
+echo "Attempting to create ${tarball} from tag ${tag}"
+release_hash=$(cd "${SOURCE_TOP_DIR}" && git rev-list --max-count=1 ${tag})
+
+release=apache-arrow-datafusion-python-${version}
+distdir=${SOURCE_TOP_DIR}/dev/dist/${release}-rc${rc}
+tarname=${release}.tar.gz
+tarball=${distdir}/${tarname}
+url="https://dist.apache.org/repos/dist/dev/arrow/${release}-rc${rc}"
+
+if [ -z "$release_hash" ]; then
+ echo "Cannot continue: unknown git tag: ${tag}"
+fi
+
+echo "Draft email for dev@arrow.apache.org mailing list"
+echo ""
+echo "---------------------------------------------------------"
+cat <<MAIL
+To: dev@arrow.apache.org
+Subject: [VOTE][RUST][DataFusion] Release DataFusion Python Bindings ${version} RC${rc}
+Hi,
+
+I would like to propose a release of Apache Arrow DataFusion Python Bindings,
+version ${version}.
+
+This release candidate is based on commit: ${release_hash} [1]
+The proposed release tarball and signatures are hosted at [2].
+The changelog is located at [3].
+
+Please download, verify checksums and signatures, run the unit tests, and vote
+on the release. The vote will be open for at least 72 hours.
+
+Only votes from PMC members are binding, but all members of the community are
+encouraged to test the release and vote with "(non-binding)".
+
+The standard verification procedure is documented at https://github.com/apache/arrow-datafusion-python/blob/master/dev/release/README.md#verifying-release-candidates.
+
+[ ] +1 Release this as Apache Arrow DataFusion Python ${version}
+[ ] +0
+[ ] -1 Do not release this as Apache Arrow DataFusion Python ${version} because...
+
+Here is my vote:
+
++1
+
+[1]: https://github.com/apache/arrow-datafusion-python/tree/${release_hash}
+[2]: ${url}
+[3]: https://github.com/apache/arrow-datafusion-python/blob/${release_hash}/CHANGELOG.md
+MAIL
+echo "---------------------------------------------------------"
+
+
+# create <tarball> containing the files in git at $release_hash
+# the files in the tarball are prefixed with {version} (e.g. 4.0.1)
+mkdir -p ${distdir}
+(cd "${SOURCE_TOP_DIR}" && git archive ${release_hash} --prefix ${release}/ | gzip > ${tarball})
+
+echo "Running rat license checker on ${tarball}"
+${SOURCE_DIR}/run-rat.sh ${tarball}
+
+echo "Signing tarball and creating checksums"
+gpg --armor --output ${tarball}.asc --detach-sig ${tarball}
+# create signing with relative path of tarball
+# so that they can be verified with a command such as
+# shasum --check apache-arrow-datafusion-python-4.1.0-rc2.tar.gz.sha512
+(cd ${distdir} && shasum -a 256 ${tarname}) > ${tarball}.sha256
+(cd ${distdir} && shasum -a 512 ${tarname}) > ${tarball}.sha512
+
+
+echo "Uploading to apache dist/dev to ${url}"
+svn co --depth=empty https://dist.apache.org/repos/dist/dev/arrow ${SOURCE_TOP_DIR}/dev/dist
+svn add ${distdir}
+svn ci -m "Apache Arrow DataFusion Python ${version} ${rc}" ${distdir}
diff --git a/dev/release/release-tarball.sh b/dev/release/release-tarball.sh
new file mode 100755
index 0000000..15aa85e
--- /dev/null
+++ b/dev/release/release-tarball.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/release-tarball.sh
+
+# This script copies a tarball from the "dev" area of the
+# dist.apache.arrow repository to the "release" area
+#
+# This script should only be run after the release has been approved
+# by the arrow PMC committee.
+#
+# See release/README.md for full release instructions
+#
+# Based in part on post-01-upload.sh from apache/arrow
+
+
+set -e
+set -u
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <version> <rc-num>"
+ echo "ex. $0 4.1.0 2"
+ exit
+fi
+
+version=$1
+rc=$2
+
+tmp_dir=tmp-apache-arrow-datafusion-python-dist
+
+echo "Recreate temporary directory: ${tmp_dir}"
+rm -rf ${tmp_dir}
+mkdir -p ${tmp_dir}
+
+echo "Clone dev dist repository"
+svn \
+ co \
+ https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-datafusion-python-${version}-rc${rc} \
+ ${tmp_dir}/dev
+
+echo "Clone release dist repository"
+svn co https://dist.apache.org/repos/dist/release/arrow ${tmp_dir}/release
+
+echo "Copy ${version}-rc${rc} to release working copy"
+release_version=arrow-datafusion-${version}
+mkdir -p ${tmp_dir}/release/${release_version}
+cp -r ${tmp_dir}/dev/* ${tmp_dir}/release/${release_version}/
+svn add ${tmp_dir}/release/${release_version}
+
+echo "Commit release"
+svn ci -m "Apache Arrow DataFusion Python ${version}" ${tmp_dir}/release
+
+echo "Clean up"
+rm -rf ${tmp_dir}
+
+echo "Success! The release is available here:"
+echo " https://dist.apache.org/repos/dist/release/arrow/${release_version}"
diff --git a/dev/release/update_change_log-datafusion-python.sh b/dev/release/update_change_log-datafusion-python.sh
new file mode 100755
index 0000000..d993536
--- /dev/null
+++ b/dev/release/update_change_log-datafusion-python.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Usage:
+# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log-datafusion.sh master 8.0.0 7.1.0
+# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log-datafusion.sh maint-7.x 7.1.0 7.0.0
+
+RELEASE_BRANCH=$1
+RELEASE_TAG=$2
+BASE_TAG=$3
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+${SOURCE_DIR}/update_change_log.sh \
+ "${BASE_TAG}" \
+ --future-release "${RELEASE_TAG}" \
+ --release-branch "${RELEASE_BRANCH}"
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
new file mode 100755
index 0000000..a0b3981
--- /dev/null
+++ b/dev/release/update_change_log.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/update_change_log.sh
+
+# invokes the changelog generator from
+# https://github.com/github-changelog-generator/github-changelog-generator
+#
+# With the config located in
+# arrow-datafusion/.github_changelog_generator
+#
+# Usage:
+# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log.sh <PROJECT> <SINCE_TAG> <EXTRA_ARGS...>
+
+set -e
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
+
+if [[ "$#" -lt 1 ]]; then
+ echo "USAGE: $0 SINCE_TAG EXTRA_ARGS..."
+ exit 1
+fi
+
+SINCE_TAG=$1
+shift 1
+
+OUTPUT_PATH="CHANGELOG.md"
+
+pushd ${SOURCE_TOP_DIR}
+
+# reset content in changelog
+git checkout "${SINCE_TAG}" "${OUTPUT_PATH}"
+# remove license header so github-changelog-generator has a clean base to append
+sed -i.bak '1,18d' "${OUTPUT_PATH}"
+
+docker run -it --rm \
+ --cpus "0.1" \
+ -e CHANGELOG_GITHUB_TOKEN=$CHANGELOG_GITHUB_TOKEN \
+ -v "$(pwd)":/usr/local/src/your-app \
+ githubchangeloggenerator/github-changelog-generator \
+ --user apache \
+ --project arrow-datafusion-python \
+ --since-tag "${SINCE_TAG}" \
+ --base "${OUTPUT_PATH}" \
+ --output "${OUTPUT_PATH}" \
+ "$@"
+
+sed -i.bak "s/\\\n/\n\n/" "${OUTPUT_PATH}"
+
+echo '<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+' | cat - "${OUTPUT_PATH}" > "${OUTPUT_PATH}".tmp
+mv "${OUTPUT_PATH}".tmp "${OUTPUT_PATH}"
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
new file mode 100755
index 0000000..fee276c
--- /dev/null
+++ b/dev/release/verify-release-candidate.sh
@@ -0,0 +1,155 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+case $# in
+ 2) VERSION="$1"
+ RC_NUMBER="$2"
+ ;;
+ *) echo "Usage: $0 X.Y.Z RC_NUMBER"
+ exit 1
+ ;;
+esac
+
+set -e
+set -x
+set -o pipefail
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
+ARROW_DIR="$(dirname $(dirname ${SOURCE_DIR}))"
+ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow'
+
+download_dist_file() {
+ curl \
+ --silent \
+ --show-error \
+ --fail \
+ --location \
+ --remote-name $ARROW_DIST_URL/$1
+}
+
+download_rc_file() {
+ download_dist_file apache-arrow-datafusion-python-${VERSION}-rc${RC_NUMBER}/$1
+}
+
+import_gpg_keys() {
+ download_dist_file KEYS
+ gpg --import KEYS
+}
+
+if type shasum >/dev/null 2>&1; then
+ sha256_verify="shasum -a 256 -c"
+ sha512_verify="shasum -a 512 -c"
+else
+ sha256_verify="sha256sum -c"
+ sha512_verify="sha512sum -c"
+fi
+
+fetch_archive() {
+ local dist_name=$1
+ download_rc_file ${dist_name}.tar.gz
+ download_rc_file ${dist_name}.tar.gz.asc
+ download_rc_file ${dist_name}.tar.gz.sha256
+ download_rc_file ${dist_name}.tar.gz.sha512
+ verify_dir_artifact_signatures
+}
+
+verify_dir_artifact_signatures() {
+ # verify the signature and the checksums of each artifact
+ find . -name '*.asc' | while read sigfile; do
+ artifact=${sigfile/.asc/}
+ gpg --verify $sigfile $artifact || exit 1
+
+ # go into the directory because the checksum files contain only the
+ # basename of the artifact
+ pushd $(dirname $artifact)
+ base_artifact=$(basename $artifact)
+ ${sha256_verify} $base_artifact.sha256 || exit 1
+ ${sha512_verify} $base_artifact.sha512 || exit 1
+ popd
+ done
+}
+
+setup_tempdir() {
+ cleanup() {
+ if [ "${TEST_SUCCESS}" = "yes" ]; then
+ rm -fr "${ARROW_TMPDIR}"
+ else
+ echo "Failed to verify release candidate. See ${ARROW_TMPDIR} for details."
+ fi
+ }
+
+ if [ -z "${ARROW_TMPDIR}" ]; then
+ # clean up automatically if ARROW_TMPDIR is not defined
+ ARROW_TMPDIR=$(mktemp -d -t "$1.XXXXX")
+ trap cleanup EXIT
+ else
+ # don't clean up automatically
+ mkdir -p "${ARROW_TMPDIR}"
+ fi
+}
+
+test_source_distribution() {
+ # install rust toolchain in a similar fashion like test-miniconda
+ export RUSTUP_HOME=$PWD/test-rustup
+ export CARGO_HOME=$PWD/test-rustup
+
+ curl https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path
+
+ export PATH=$RUSTUP_HOME/bin:$PATH
+ source $RUSTUP_HOME/env
+
+ # build and test rust
+
+ # raises on any formatting errors
+ rustup component add rustfmt --toolchain stable
+ cargo fmt --all -- --check
+
+ # Clone testing repositories into the expected location
+ git clone https://github.com/apache/arrow-testing.git testing
+ git clone https://github.com/apache/parquet-testing.git parquet-testing
+
+ cargo build
+ cargo test --all
+
+ if ( find -iname 'Cargo.toml' | xargs grep SNAPSHOT ); then
+ echo "Cargo.toml version should not contain SNAPSHOT for releases"
+ exit 1
+ fi
+
+ cargo publish --dry-run
+}
+
+TEST_SUCCESS=no
+
+setup_tempdir "arrow-${VERSION}"
+echo "Working in sandbox ${ARROW_TMPDIR}"
+cd ${ARROW_TMPDIR}
+
+dist_name="apache-arrow-datafusion-python-${VERSION}"
+import_gpg_keys
+fetch_archive ${dist_name}
+tar xf ${dist_name}.tar.gz
+pushd ${dist_name}
+ test_source_distribution
+popd
+
+TEST_SUCCESS=yes
+echo 'Release candidate looks good!'
+exit 0