You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/11/15 15:52:00 UTC

[arrow-datafusion-python] branch master updated: Add release scripts, bump version to 0.7.0 (#74)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git


The following commit(s) were added to refs/heads/master by this push:
     new 7f2cb01  Add release scripts, bump version to 0.7.0 (#74)
7f2cb01 is described below

commit 7f2cb0144b0e3b3b401341b43f3fb9f8dd87d59a
Author: Andy Grove <an...@gmail.com>
AuthorDate: Tue Nov 15 08:51:55 2022 -0700

    Add release scripts, bump version to 0.7.0 (#74)
    
    * prepare for next release
    
    * rat
    
    * update Cargo.lock
---
 Cargo.lock                                         |   2 +-
 Cargo.toml                                         |   8 +-
 dev/release/README.md                              |  36 +++++
 dev/release/create-tarball.sh                      | 136 ++++++++++++++++++
 dev/release/release-tarball.sh                     |  74 ++++++++++
 dev/release/update_change_log-datafusion-python.sh |  33 +++++
 dev/release/update_change_log.sh                   |  87 ++++++++++++
 dev/release/verify-release-candidate.sh            | 155 +++++++++++++++++++++
 8 files changed, 526 insertions(+), 5 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 90bbf7c..db3a43e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -654,7 +654,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-python"
-version = "0.6.0"
+version = "0.7.0"
 dependencies = [
  "async-trait",
  "datafusion",
diff --git a/Cargo.toml b/Cargo.toml
index b67cbc5..0e8db9c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,11 +17,11 @@
 
 [package]
 name = "datafusion-python"
-version = "0.6.0"
-homepage = "https://github.com/apache/arrow"
-repository = "https://github.com/apache/arrow"
+version = "0.7.0"
+homepage = "https://github.com/apache/arrow-datafusion-python"
+repository = "https://github.com/apache/arrow-datafusion-python"
 authors = ["Apache Arrow <de...@arrow.apache.org>"]
-description = "Build and run queries against data"
+description = "Apache Arrow DataFusion DataFrame and SQL Query Engine"
 readme = "README.md"
 license = "Apache-2.0"
 edition = "2021"
diff --git a/dev/release/README.md b/dev/release/README.md
new file mode 100644
index 0000000..6e4fc9a
--- /dev/null
+++ b/dev/release/README.md
@@ -0,0 +1,36 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# DataFusion Python Release Process
+
+This is a work-in-progress that will be updated as we work through the next release.
+
+## Preparing a Release Candidate
+
+- Update the version number in Cargo.toml
+- Generate changelog
+- Tag the repo with an rc tag e.g. `0.7.0-rc1`
+- Create tarball and upload to ASF
+- Start the vote
+
+## Releasing Artifacts
+
+```bash
+maturin publish
+```
\ No newline at end of file
diff --git a/dev/release/create-tarball.sh b/dev/release/create-tarball.sh
new file mode 100755
index 0000000..64150f5
--- /dev/null
+++ b/dev/release/create-tarball.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/create-tarball.sh
+
+# This script creates a signed tarball in
+# dev/dist/apache-arrow-datafusion-python-<version>-<sha>.tar.gz and uploads it to
+# the "dev" area of the dist.apache.arrow repository and prepares an
+# email for sending to the dev@arrow.apache.org list for a formal
+# vote.
+#
+# See release/README.md for full release instructions
+#
+# Requirements:
+#
+# 1. gpg setup for signing and have uploaded your public
+# signature to https://pgp.mit.edu/
+#
+# 2. Logged into the apache svn server with the appropriate
+# credentials
+#
+# 3. Install the requests python package
+#
+#
+# Based in part on 02-source.sh from apache/arrow
+#
+
+set -e
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
+
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <version> <rc>"
+    echo "ex. $0 4.1.0 2"
+    exit
+fi
+
+if [[ -z "${GH_TOKEN}" ]]; then
+    echo "Please set personal github token through GH_TOKEN environment variable"
+    exit
+fi
+
+version=$1
+rc=$2
+tag="${version}-rc${rc}"
+
+echo "Attempting to create ${tarball} from tag ${tag}"
+release_hash=$(cd "${SOURCE_TOP_DIR}" && git rev-list --max-count=1 ${tag})
+
+release=apache-arrow-datafusion-python-${version}
+distdir=${SOURCE_TOP_DIR}/dev/dist/${release}-rc${rc}
+tarname=${release}.tar.gz
+tarball=${distdir}/${tarname}
+url="https://dist.apache.org/repos/dist/dev/arrow/${release}-rc${rc}"
+
+if [ -z "$release_hash" ]; then
+    echo "Cannot continue: unknown git tag: ${tag}"
+fi
+
+echo "Draft email for dev@arrow.apache.org mailing list"
+echo ""
+echo "---------------------------------------------------------"
+cat <<MAIL
+To: dev@arrow.apache.org
+Subject: [VOTE][RUST][DataFusion] Release DataFusion Python Bindings ${version} RC${rc}
+Hi,
+
+I would like to propose a release of Apache Arrow DataFusion Python Bindings,
+version ${version}.
+
+This release candidate is based on commit: ${release_hash} [1]
+The proposed release tarball and signatures are hosted at [2].
+The changelog is located at [3].
+
+Please download, verify checksums and signatures, run the unit tests, and vote
+on the release. The vote will be open for at least 72 hours.
+
+Only votes from PMC members are binding, but all members of the community are
+encouraged to test the release and vote with "(non-binding)".
+
+The standard verification procedure is documented at https://github.com/apache/arrow-datafusion-python/blob/master/dev/release/README.md#verifying-release-candidates.
+
+[ ] +1 Release this as Apache Arrow DataFusion Python ${version}
+[ ] +0
+[ ] -1 Do not release this as Apache Arrow DataFusion Python ${version} because...
+
+Here is my vote:
+
++1
+
+[1]: https://github.com/apache/arrow-datafusion-python/tree/${release_hash}
+[2]: ${url}
+[3]: https://github.com/apache/arrow-datafusion-python/blob/${release_hash}/CHANGELOG.md
+MAIL
+echo "---------------------------------------------------------"
+
+
+# create <tarball> containing the files in git at $release_hash
+# the files in the tarball are prefixed with {version} (e.g. 4.0.1)
+mkdir -p ${distdir}
+(cd "${SOURCE_TOP_DIR}" && git archive ${release_hash} --prefix ${release}/ | gzip > ${tarball})
+
+echo "Running rat license checker on ${tarball}"
+${SOURCE_DIR}/run-rat.sh ${tarball}
+
+echo "Signing tarball and creating checksums"
+gpg --armor --output ${tarball}.asc --detach-sig ${tarball}
+# create signing with relative path of tarball
+# so that they can be verified with a command such as
+#  shasum --check apache-arrow-datafusion-python-4.1.0-rc2.tar.gz.sha512
+(cd ${distdir} && shasum -a 256 ${tarname}) > ${tarball}.sha256
+(cd ${distdir} && shasum -a 512 ${tarname}) > ${tarball}.sha512
+
+
+echo "Uploading to apache dist/dev to ${url}"
+svn co --depth=empty https://dist.apache.org/repos/dist/dev/arrow ${SOURCE_TOP_DIR}/dev/dist
+svn add ${distdir}
+svn ci -m "Apache Arrow DataFusion Python ${version} ${rc}" ${distdir}
diff --git a/dev/release/release-tarball.sh b/dev/release/release-tarball.sh
new file mode 100755
index 0000000..15aa85e
--- /dev/null
+++ b/dev/release/release-tarball.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/release-tarball.sh
+
+# This script copies a tarball from the "dev" area of the
+# dist.apache.arrow repository to the "release" area
+#
+# This script should only be run after the release has been approved
+# by the arrow PMC committee.
+#
+# See release/README.md for full release instructions
+#
+# Based in part on post-01-upload.sh from apache/arrow
+
+
+set -e
+set -u
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <rc-num>"
+  echo "ex. $0 4.1.0 2"
+  exit
+fi
+
+version=$1
+rc=$2
+
+tmp_dir=tmp-apache-arrow-datafusion-python-dist
+
+echo "Recreate temporary directory: ${tmp_dir}"
+rm -rf ${tmp_dir}
+mkdir -p ${tmp_dir}
+
+echo "Clone dev dist repository"
+svn \
+  co \
+  https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-datafusion-python-${version}-rc${rc} \
+  ${tmp_dir}/dev
+
+echo "Clone release dist repository"
+svn co https://dist.apache.org/repos/dist/release/arrow ${tmp_dir}/release
+
+echo "Copy ${version}-rc${rc} to release working copy"
+release_version=arrow-datafusion-${version}
+mkdir -p ${tmp_dir}/release/${release_version}
+cp -r ${tmp_dir}/dev/* ${tmp_dir}/release/${release_version}/
+svn add ${tmp_dir}/release/${release_version}
+
+echo "Commit release"
+svn ci -m "Apache Arrow DataFusion Python ${version}" ${tmp_dir}/release
+
+echo "Clean up"
+rm -rf ${tmp_dir}
+
+echo "Success! The release is available here:"
+echo "  https://dist.apache.org/repos/dist/release/arrow/${release_version}"
diff --git a/dev/release/update_change_log-datafusion-python.sh b/dev/release/update_change_log-datafusion-python.sh
new file mode 100755
index 0000000..d993536
--- /dev/null
+++ b/dev/release/update_change_log-datafusion-python.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Usage:
+# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log-datafusion.sh master 8.0.0 7.1.0
+# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log-datafusion.sh maint-7.x 7.1.0 7.0.0
+
+RELEASE_BRANCH=$1
+RELEASE_TAG=$2
+BASE_TAG=$3
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+${SOURCE_DIR}/update_change_log.sh \
+    "${BASE_TAG}" \
+    --future-release "${RELEASE_TAG}" \
+    --release-branch "${RELEASE_BRANCH}"
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
new file mode 100755
index 0000000..a0b3981
--- /dev/null
+++ b/dev/release/update_change_log.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/update_change_log.sh
+
+# invokes the changelog generator from
+# https://github.com/github-changelog-generator/github-changelog-generator
+#
+# With the config located in
+# arrow-datafusion/.github_changelog_generator
+#
+# Usage:
+# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log.sh <PROJECT> <SINCE_TAG> <EXTRA_ARGS...>
+
+set -e
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
+
+if [[ "$#" -lt 1 ]]; then
+    echo "USAGE: $0 SINCE_TAG EXTRA_ARGS..."
+    exit 1
+fi
+
+SINCE_TAG=$1
+shift 1
+
+OUTPUT_PATH="CHANGELOG.md"
+
+pushd ${SOURCE_TOP_DIR}
+
+# reset content in changelog
+git checkout "${SINCE_TAG}" "${OUTPUT_PATH}"
+# remove license header so github-changelog-generator has a clean base to append
+sed -i.bak '1,18d' "${OUTPUT_PATH}"
+
+docker run -it --rm \
+    --cpus "0.1" \
+    -e CHANGELOG_GITHUB_TOKEN=$CHANGELOG_GITHUB_TOKEN \
+    -v "$(pwd)":/usr/local/src/your-app \
+    githubchangeloggenerator/github-changelog-generator \
+    --user apache \
+    --project arrow-datafusion-python \
+    --since-tag "${SINCE_TAG}" \
+    --base "${OUTPUT_PATH}" \
+    --output "${OUTPUT_PATH}" \
+    "$@"
+
+sed -i.bak "s/\\\n/\n\n/" "${OUTPUT_PATH}"
+
+echo '<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+' | cat - "${OUTPUT_PATH}" > "${OUTPUT_PATH}".tmp
+mv "${OUTPUT_PATH}".tmp "${OUTPUT_PATH}"
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
new file mode 100755
index 0000000..fee276c
--- /dev/null
+++ b/dev/release/verify-release-candidate.sh
@@ -0,0 +1,155 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+case $# in
+  2) VERSION="$1"
+     RC_NUMBER="$2"
+     ;;
+  *) echo "Usage: $0 X.Y.Z RC_NUMBER"
+     exit 1
+     ;;
+esac
+
+set -e
+set -x
+set -o pipefail
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
+ARROW_DIR="$(dirname $(dirname ${SOURCE_DIR}))"
+ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow'
+
+download_dist_file() {
+  curl \
+    --silent \
+    --show-error \
+    --fail \
+    --location \
+    --remote-name $ARROW_DIST_URL/$1
+}
+
+download_rc_file() {
+  download_dist_file apache-arrow-datafusion-python-${VERSION}-rc${RC_NUMBER}/$1
+}
+
+import_gpg_keys() {
+  download_dist_file KEYS
+  gpg --import KEYS
+}
+
+if type shasum >/dev/null 2>&1; then
+  sha256_verify="shasum -a 256 -c"
+  sha512_verify="shasum -a 512 -c"
+else
+  sha256_verify="sha256sum -c"
+  sha512_verify="sha512sum -c"
+fi
+
+fetch_archive() {
+  local dist_name=$1
+  download_rc_file ${dist_name}.tar.gz
+  download_rc_file ${dist_name}.tar.gz.asc
+  download_rc_file ${dist_name}.tar.gz.sha256
+  download_rc_file ${dist_name}.tar.gz.sha512
+  verify_dir_artifact_signatures
+}
+
+verify_dir_artifact_signatures() {
+  # verify the signature and the checksums of each artifact
+  find . -name '*.asc' | while read sigfile; do
+    artifact=${sigfile/.asc/}
+    gpg --verify $sigfile $artifact || exit 1
+
+    # go into the directory because the checksum files contain only the
+    # basename of the artifact
+    pushd $(dirname $artifact)
+    base_artifact=$(basename $artifact)
+    ${sha256_verify} $base_artifact.sha256 || exit 1
+    ${sha512_verify} $base_artifact.sha512 || exit 1
+    popd
+  done
+}
+
+setup_tempdir() {
+  cleanup() {
+    if [ "${TEST_SUCCESS}" = "yes" ]; then
+      rm -fr "${ARROW_TMPDIR}"
+    else
+      echo "Failed to verify release candidate. See ${ARROW_TMPDIR} for details."
+    fi
+  }
+
+  if [ -z "${ARROW_TMPDIR}" ]; then
+    # clean up automatically if ARROW_TMPDIR is not defined
+    ARROW_TMPDIR=$(mktemp -d -t "$1.XXXXX")
+    trap cleanup EXIT
+  else
+    # don't clean up automatically
+    mkdir -p "${ARROW_TMPDIR}"
+  fi
+}
+
+test_source_distribution() {
+  # install rust toolchain in a similar fashion like test-miniconda
+  export RUSTUP_HOME=$PWD/test-rustup
+  export CARGO_HOME=$PWD/test-rustup
+
+  curl https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path
+
+  export PATH=$RUSTUP_HOME/bin:$PATH
+  source $RUSTUP_HOME/env
+
+  # build and test rust
+
+  # raises on any formatting errors
+  rustup component add rustfmt --toolchain stable
+  cargo fmt --all -- --check
+
+  # Clone testing repositories into the expected location
+  git clone https://github.com/apache/arrow-testing.git testing
+  git clone https://github.com/apache/parquet-testing.git parquet-testing
+
+  cargo build
+  cargo test --all
+
+  if ( find -iname 'Cargo.toml' | xargs grep SNAPSHOT ); then
+    echo "Cargo.toml version should not contain SNAPSHOT for releases"
+    exit 1
+  fi
+
+  cargo publish --dry-run
+}
+
+TEST_SUCCESS=no
+
+setup_tempdir "arrow-${VERSION}"
+echo "Working in sandbox ${ARROW_TMPDIR}"
+cd ${ARROW_TMPDIR}
+
+dist_name="apache-arrow-datafusion-python-${VERSION}"
+import_gpg_keys
+fetch_archive ${dist_name}
+tar xf ${dist_name}.tar.gz
+pushd ${dist_name}
+    test_source_distribution
+popd
+
+TEST_SUCCESS=yes
+echo 'Release candidate looks good!'
+exit 0