You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/03/03 20:57:14 UTC

[GitHub] [arrow] kou commented on a change in pull request #12320: ARROW-15483: [Release] Revamp the verification scripts

kou commented on a change in pull request #12320:
URL: https://github.com/apache/arrow/pull/12320#discussion_r819047613



##########
File path: dev/release/verify-release-candidate.sh
##########
@@ -404,62 +636,102 @@ test_python() {
   if [ "${ARROW_GANDIVA}" = "ON" ]; then
     export PYARROW_WITH_GANDIVA=1
   fi
+  if [ "${ARROW_PLASMA}" = "ON" ]; then
+    export PYARROW_WITH_PLASMA=1
+  fi
+  if [ "${ARROW_S3}" = "ON" ]; then
+    export PYARROW_WITH_S3=1
+  fi
+
+  pushd python
 
+  # Build pyarrow
   python setup.py build_ext --inplace
-  pytest pyarrow -v --pdb
+
+  # Check mandatory and optional imports
+  python -c "
+import pyarrow
+import pyarrow._hdfs
+import pyarrow.csv
+import pyarrow.dataset
+import pyarrow.fs
+import pyarrow.json
+import pyarrow.orc
+import pyarrow.parquet
+"
+  if [ "${ARROW_CUDA}" == "ON" ]; then
+    python -c "import pyarrow.cuda"
+  fi
+  if [ "${ARROW_FLIGHT}" == "ON" ]; then
+    python -c "import pyarrow.flight"
+  fi
+  if [ "${ARROW_GANDIVA}" == "ON" ]; then
+    python -c "import pyarrow.gandiva"
+  fi
+  if [ "${ARROW_PLASMA}" == "ON" ]; then
+    python -c "import pyarrow.plasma"
+  fi
+  if [ "${ARROW_S3}" == "ON" ]; then
+    python -c "import pyarrow._s3fs"
+  fi
+
+  # Install test dependencies
+  pip install -r requirements-test.txt
+
+  # Execute pyarrow unittests
+  pytest pyarrow -v
 
   popd
 }
 
 test_glib() {
-  pushd c_glib
+  show_header "Build and test C Glib libraries"
 
-  pip install meson
-
-  meson build --prefix=$ARROW_HOME --libdir=lib
-  ninja -C build
-  ninja -C build install
-
-  export GI_TYPELIB_PATH=$ARROW_HOME/lib/girepository-1.0:$GI_TYPELIB_PATH
+  # Build and test C GLib
+  setup_conda glib gobject-introspection meson ninja ruby || exit 1
+  setup_virtualenv meson || exit 1

Review comment:
       I want to verify C GLib with system libraries (installed by `apt`).
   Could you add support for this case?
   
   We can confirm these dependencies by the following command lines:
   
   ```console
   $ pkg-config --exists glib-2.0
   $ pkg-config --exists gobject-introspection-1.0
   $ meson --version
   $ ninja --version
   $ ruby --version
   ```

##########
File path: dev/release/verify-release-candidate.sh
##########
@@ -34,47 +34,70 @@
 # a directory where the temporary files should be placed to, note that this
 # directory is not cleaned up automatically.
 
+set -e
+set -o pipefail
+
+if [ ${VERBOSE:-0} -gt 0 ]; then
+  set -x

Review comment:
       Great!!!

##########
File path: dev/release/verify-release-candidate.sh
##########
@@ -378,23 +353,280 @@ test_csharp() {
     fi
   fi
 
-  sourcelink test artifacts/Apache.Arrow/Release/netstandard1.3/Apache.Arrow.pdb
-  sourcelink test artifacts/Apache.Arrow/Release/netcoreapp2.1/Apache.Arrow.pdb
+  CSHARP_ALREADY_INSTALLED=1
+}
+
+install_go() {
+  # Install go
+  if [ "${GO_ALREADY_INSTALLED:-0}" -gt 0 ]; then
+    show_info "$(go version) already installed at $(which go)"
+    return 0
+  fi
 
+  local version=1.16.12
+  show_info "Installing go version ${version}..."
+
+  local arch="$(uname -m)"
+  if [ "$arch" == "x86_64" ]; then
+    arch=amd64
+  elif [ "$arch" == "aarch64" ]; then
+    arch=arm64
+  fi
+
+  if [ "$(uname)" == "Darwin" ]; then
+    local os=darwin
+  else
+    local os=linux
+  fi
+
+  local archive="go${version}.${os}-${arch}.tar.gz"
+  curl -sLO https://dl.google.com/go/$archive
+
+  local prefix=${ARROW_TMPDIR}/go
+  mkdir -p $prefix
+  tar -xzf $archive -C $prefix
+  rm -f $archive
+
+  export GOROOT=${prefix}/go
+  export GOPATH=${prefix}/gopath
+  export PATH=$GOROOT/bin:$GOPATH/bin:$PATH
+
+  show_info "$(go version) installed at $(which go)"
+
+  GO_ALREADY_INSTALLED=1
+}
+
+install_conda() {
+  # Setup short-lived miniconda for Python and integration tests
+  show_info "Ensuring that Conda is installed..."
+  local prefix=$ARROW_TMPDIR/mambaforge
+
+  # Setup miniconda only if the directory doesn't exist yet
+  if [ "${CONDA_ALREADY_INSTALLED:-0}" -eq 0 ]; then
+    if [ ! -d "${prefix}" ]; then
+      show_info "Installing miniconda at ${prefix}..."
+      local arch=$(uname -m)
+      local platform=$(uname)
+      local url="https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-${platform}-${arch}.sh"
+      curl -sL -o miniconda.sh $url
+      bash miniconda.sh -b -p $prefix
+      rm -f miniconda.sh
+    else
+      show_info "Miniconda already installed at ${prefix}"
+    fi
+  else
+    show_info "Conda installed at ${prefix}"
+  fi
+  CONDA_ALREADY_INSTALLED=1
+
+  # Creating a separate conda environment
+  . $prefix/etc/profile.d/conda.sh
+  conda activate base
+}
+
+setup_conda() {
+  # Optionally setup conda environment with the passed dependencies
+  local env="conda-${ENV:-source}"
+  local pyver=${PYTHON_VERSION:-3}
+
+  if [ "${USE_CONDA}" -gt 0 ]; then
+    show_info "Configuring Conda environment..."
+
+    # Deactivate previous env
+    if [ ! -z ${CONDA_PREFIX} ]; then
+      conda deactivate || :
+    fi
+    # Ensure that conda is installed
+    install_conda
+    # Create environment
+    if ! conda env list | cut -d" " -f 1 | grep $env; then
+      mamba create -y -n $env python=${pyver}
+    fi
+    # Install dependencies
+    if [ $# -gt 0 ]; then
+      mamba install -y -n $env $@
+    fi
+    # Activate the environment
+    conda activate $env
+  elif [ ! -z ${CONDA_PREFIX} ]; then
+    echo "Conda environment is active despite that USE_CONDA is set to 0."
+    echo "Deactivate the environment using `conda deactive` before running the verification script."
+    return 1
+  fi
+}
+
+setup_virtualenv() {
+  # Optionally setup pip virtualenv with the passed dependencies
+  local env="venv-${ENV:-source}"
+  local pyver=${PYTHON_VERSION:-3}
+  local python=${PYTHON:-"python${pyver}"}
+  local virtualenv="${ARROW_TMPDIR}/${env}"
+  local skip_missing_python=${SKIP_MISSING_PYTHON:-0}
+
+  if [ "${USE_CONDA}" -eq 0 ]; then
+    show_info "Configuring Python ${pyver} virtualenv..."
+
+    if [ ! -z ${CONDA_PREFIX} ]; then
+      echo "Conda environment is active despite that USE_CONDA is set to 0."
+      echo "Deactivate the environment before running the verification script."
+      return 1
+    fi
+    # Deactivate previous env
+    if command -v deactivate &> /dev/null; then
+      deactivate
+    fi
+    # Check that python interpreter exists
+    if ! command -v "${python}" &> /dev/null; then
+      echo "Couldn't locate python interpreter with version ${pyver}"
+      echo "Call the script with USE_CONDA=1 to test all of the python versions."
+      return 1
+    else
+      show_info "Found interpreter $($python --version): $(which $python)"
+    fi
+    # Create environment
+    if [ ! -d "${virtualenv}" ]; then
+      show_info "Creating python virtualenv at ${virtualenv}..."
+      $python -m venv ${virtualenv}
+      # Activate the environment
+      source "${virtualenv}/bin/activate"
+      # Upgrade pip
+      pip install -U pip
+    else
+      show_info "Using already created virtualenv at ${virtualenv}"
+      # Activate the environment
+      source "${virtualenv}/bin/activate"
+    fi
+    # Install dependencies
+    if [ $# -gt 0 ]; then
+      show_info "Installed pip packages $@..."
+      pip install $@

Review comment:
       Generally, we should surround `$@` with `"` to handle spaces correctly. (In this case, without `"` will work.)
   
   ```suggestion
         pip install "$@"
   ```
   
   ```shell
   ruby -e 'p ARGV' $@
   ruby -e 'p ARGV' "$@"
   ```
   
   ```console
   $ bash /tmp/a.sh 1 "2 3" 4
   ["1", "2", "3", "4"]
   ["1", "2 3", "4"]
   ```

##########
File path: dev/release/verify-release-candidate.sh
##########
@@ -481,98 +756,135 @@ test_ruby() {
   popd
 }
 
-test_go() {
-  local VERSION=1.16.12
+test_csharp() {
+  show_header "Build and test C# libraries"
 
-  local ARCH="$(uname -m)"
-  if [ "$ARCH" == "x86_64" ]; then
-    ARCH=amd64
-  elif [ "$ARCH" == "aarch64" ]; then
-    ARCH=arm64
-  fi
+  install_csharp
 
-  if [ "$(uname)" == "Darwin" ]; then
-    local OS=darwin
+  pushd csharp
+
+  dotnet test
+
+  if [ "${SOURCE_KIND}" = "local" -o "${SOURCE_KIND}" = "git" ]; then
+    dotnet pack -c Release
   else
-    local OS=linux
+    mv dummy.git ../.git
+    dotnet pack -c Release
+    mv ../.git dummy.git
   fi
 
-  local GO_ARCHIVE=go$VERSION.$OS-$ARCH.tar.gz
-  wget https://dl.google.com/go/$GO_ARCHIVE
+  sourcelink test artifacts/Apache.Arrow/Release/netstandard1.3/Apache.Arrow.pdb
+  sourcelink test artifacts/Apache.Arrow/Release/netcoreapp2.1/Apache.Arrow.pdb
+
+  popd
+}
 
-  mkdir -p local-go
-  tar -xzf $GO_ARCHIVE -C local-go
-  rm -f $GO_ARCHIVE
+test_js() {
+  show_header "Build and test JavaScript libraries"
 
-  export GOROOT=`pwd`/local-go/go
-  export GOPATH=`pwd`/local-go/gopath
-  export PATH=$GOROOT/bin:$GOPATH/bin:$PATH
+  setup_nodejs || exit 1
+  setup_conda nodejs=17 || exit 1
 
-  pushd go/arrow
+  if ! command -v yarn &> /dev/null; then
+    npm install -g yarn
+  fi
+
+  pushd js
+  yarn --frozen-lockfile
+  yarn clean:all
+  yarn lint
+  yarn build
+  yarn test
+  yarn test:bundle
+  popd
+}
+
+test_go() {
+  show_header "Build and test Go libraries"
+
+  setup_go || exit 1
+  setup_conda compilers go=1.17 || exit 1

Review comment:
       Is this needed?

##########
File path: dev/release/verify-release-candidate.sh
##########
@@ -481,98 +756,135 @@ test_ruby() {
   popd
 }
 
-test_go() {
-  local VERSION=1.16.12
+test_csharp() {
+  show_header "Build and test C# libraries"
 
-  local ARCH="$(uname -m)"
-  if [ "$ARCH" == "x86_64" ]; then
-    ARCH=amd64
-  elif [ "$ARCH" == "aarch64" ]; then
-    ARCH=arm64
-  fi
+  install_csharp
 
-  if [ "$(uname)" == "Darwin" ]; then
-    local OS=darwin
+  pushd csharp
+
+  dotnet test
+
+  if [ "${SOURCE_KIND}" = "local" -o "${SOURCE_KIND}" = "git" ]; then
+    dotnet pack -c Release
   else
-    local OS=linux
+    mv dummy.git ../.git
+    dotnet pack -c Release
+    mv ../.git dummy.git
   fi
 
-  local GO_ARCHIVE=go$VERSION.$OS-$ARCH.tar.gz
-  wget https://dl.google.com/go/$GO_ARCHIVE
+  sourcelink test artifacts/Apache.Arrow/Release/netstandard1.3/Apache.Arrow.pdb
+  sourcelink test artifacts/Apache.Arrow/Release/netcoreapp2.1/Apache.Arrow.pdb
+
+  popd
+}
 
-  mkdir -p local-go
-  tar -xzf $GO_ARCHIVE -C local-go
-  rm -f $GO_ARCHIVE
+test_js() {
+  show_header "Build and test JavaScript libraries"
 
-  export GOROOT=`pwd`/local-go/go
-  export GOPATH=`pwd`/local-go/gopath
-  export PATH=$GOROOT/bin:$GOPATH/bin:$PATH
+  setup_nodejs || exit 1
+  setup_conda nodejs=17 || exit 1

Review comment:
       I think that this is needless because `setup_nodejs` will install Node.js.

##########
File path: dev/release/verify-release-candidate.sh
##########
@@ -404,62 +636,102 @@ test_python() {
   if [ "${ARROW_GANDIVA}" = "ON" ]; then
     export PYARROW_WITH_GANDIVA=1
   fi
+  if [ "${ARROW_PLASMA}" = "ON" ]; then
+    export PYARROW_WITH_PLASMA=1
+  fi
+  if [ "${ARROW_S3}" = "ON" ]; then
+    export PYARROW_WITH_S3=1
+  fi
+
+  pushd python
 
+  # Build pyarrow
   python setup.py build_ext --inplace
-  pytest pyarrow -v --pdb
+
+  # Check mandatory and optional imports
+  python -c "
+import pyarrow
+import pyarrow._hdfs
+import pyarrow.csv
+import pyarrow.dataset
+import pyarrow.fs
+import pyarrow.json
+import pyarrow.orc
+import pyarrow.parquet
+"
+  if [ "${ARROW_CUDA}" == "ON" ]; then
+    python -c "import pyarrow.cuda"
+  fi
+  if [ "${ARROW_FLIGHT}" == "ON" ]; then
+    python -c "import pyarrow.flight"
+  fi
+  if [ "${ARROW_GANDIVA}" == "ON" ]; then
+    python -c "import pyarrow.gandiva"
+  fi
+  if [ "${ARROW_PLASMA}" == "ON" ]; then
+    python -c "import pyarrow.plasma"
+  fi
+  if [ "${ARROW_S3}" == "ON" ]; then
+    python -c "import pyarrow._s3fs"
+  fi
+
+  # Install test dependencies
+  pip install -r requirements-test.txt
+
+  # Execute pyarrow unittests
+  pytest pyarrow -v
 
   popd
 }
 
 test_glib() {
-  pushd c_glib
+  show_header "Build and test C Glib libraries"

Review comment:
       ```suggestion
     show_header "Build and test C GLib libraries"
   ```

##########
File path: dev/release/verify-release-candidate.sh
##########
@@ -404,62 +636,102 @@ test_python() {
   if [ "${ARROW_GANDIVA}" = "ON" ]; then
     export PYARROW_WITH_GANDIVA=1
   fi
+  if [ "${ARROW_PLASMA}" = "ON" ]; then
+    export PYARROW_WITH_PLASMA=1
+  fi
+  if [ "${ARROW_S3}" = "ON" ]; then
+    export PYARROW_WITH_S3=1
+  fi
+
+  pushd python
 
+  # Build pyarrow
   python setup.py build_ext --inplace
-  pytest pyarrow -v --pdb
+
+  # Check mandatory and optional imports
+  python -c "
+import pyarrow
+import pyarrow._hdfs
+import pyarrow.csv
+import pyarrow.dataset
+import pyarrow.fs
+import pyarrow.json
+import pyarrow.orc
+import pyarrow.parquet
+"
+  if [ "${ARROW_CUDA}" == "ON" ]; then
+    python -c "import pyarrow.cuda"
+  fi
+  if [ "${ARROW_FLIGHT}" == "ON" ]; then
+    python -c "import pyarrow.flight"
+  fi
+  if [ "${ARROW_GANDIVA}" == "ON" ]; then
+    python -c "import pyarrow.gandiva"
+  fi
+  if [ "${ARROW_PLASMA}" == "ON" ]; then
+    python -c "import pyarrow.plasma"
+  fi
+  if [ "${ARROW_S3}" == "ON" ]; then
+    python -c "import pyarrow._s3fs"
+  fi
+
+  # Install test dependencies
+  pip install -r requirements-test.txt
+
+  # Execute pyarrow unittests
+  pytest pyarrow -v
 
   popd
 }
 
 test_glib() {
-  pushd c_glib
+  show_header "Build and test C Glib libraries"
 
-  pip install meson
-
-  meson build --prefix=$ARROW_HOME --libdir=lib
-  ninja -C build
-  ninja -C build install
-
-  export GI_TYPELIB_PATH=$ARROW_HOME/lib/girepository-1.0:$GI_TYPELIB_PATH
+  # Build and test C GLib
+  setup_conda glib gobject-introspection meson ninja ruby || exit 1
+  setup_virtualenv meson || exit 1
 
+  # Install bundler if doesn't exist
   if ! bundle --version; then
     gem install --no-document bundler
   fi
 
+  local build_dir=$ARROW_TMPDIR/c-glib-build
+  mkdir -p $build_dir
+
+  pushd c_glib
+
+  # Build the C GLib bindings
+  meson \
+    --buildtype=${CMAKE_BUILD_TYPE:-release} \
+    --libdir=lib \
+    --prefix=$ARROW_HOME \
+    $build_dir
+  ninja -C $build_dir
+  ninja -C $build_dir install
+
+  # Test the C GLib bindings
+  export GI_TYPELIB_PATH=$ARROW_HOME/lib/girepository-1.0:$GI_TYPELIB_PATH
   bundle config set --local path 'vendor/bundle'
   bundle install
   bundle exec ruby test/run-test.rb
 
   popd
 }
 
-test_js() {
-  pushd js
-
-  if [ "${INSTALL_NODE}" -gt 0 ]; then
-    export NVM_DIR="`pwd`/.nvm"
-    mkdir -p $NVM_DIR
-    curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | \
-      PROFILE=/dev/null bash
-    [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
+test_ruby() {
+  show_header "Build and test Ruby libraries"
 
-    nvm install --lts
-    npm install -g yarn
-  fi
+  # required dependencies are installed by test_glib
+  setup_conda || exit 1
+  setup_virtualenv || exit 1

Review comment:
       Please don't call them if we use system libraries.
   
   BTW, I think that we can omit `|| exit 1` because we use `set -e`.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org