You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2020/04/15 20:24:51 UTC

[impala] branch master updated: IMPALA-9629: Add CentOS 8.1 support to bootstrap_system.sh

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 34018f6  IMPALA-9629: Add CentOS 8.1 support to bootstrap_system.sh
34018f6 is described below

commit 34018f62751b01c21e869ca7ff38e6d7d4a808d9
Author: Laszlo Gaal <la...@cloudera.com>
AuthorDate: Sat Mar 7 23:12:46 2020 +0100

    IMPALA-9629: Add CentOS 8.1 support to bootstrap_system.sh
    
    CentOS 8.1 is a new major version of the CentOS family.
    It is now stable and popular enough to start supporting it for Impala
    development.
    
    Prepare a raw CentOS 8.1 system to support Impala development and testing.
    This should work on a standalone computer, on a virtual machine,
    or inside a Docker container.
    
    Details:
    - snappy-devel moved to the PowerTools repo, so it needs to be installed
      from there
    - CentOS 8 has no default Python version. The bootstrap script installs
      (or configures) Python2 with pip2, then makes them the default via the
      "alternatives" mechanism. The installer is adaptive, it performs only
      the necessary steps, so it works in various environments.
      The installer logic is also shared between bin/bootstrap_system.sh and
      docker/entrypoint.sh
    - The toolchain package tag "ec2-centos-8" is added to
      bootstrap_toolchain.py
    - For some unknown reason, when the downloaded Maven tarball is extracted
      in a Docker-based test, the "bin" and "boot" directories are created
      with owner-only permissions. The 'impdev' users has no access to the
      maven executable, which then breaks the build.
      This patch forcibly restores the correct permissions on these
      directories; this is a no-op when the extraction happens correctly.
    - TOOLCHAIN_ID is bumped to a build that already has CentOS 8 binaries.
    - Centos8-specific bootstrap code was added to the Docker-based tests.
    
    Tested:
    - ran the Docker-based tests with --base-image=centos:8 to verify the following build
      phases are successful:
      * system prep
      * build
      * dataload
      and that test can start. Passing all tests is was not a requirement for this step,
      although plausible test results (i.e. not all of the tests fail) were.
    
    - ran the Docker-based tests to verify nonregression with --base-image set to the
      following: centos:7, ubuntu:16.04, ubuntu:18.04.
      On centos:7 and ubuntu:16.04 the only failure was IMPALA-9097 (BE tests fail without
      the minicluster running); ubuntu:18.04 showed the same failures as the current upstream
      code.
    
    - passed a core-mode test run on private infrastructure on Centos 7.4
    
    - ran buildall.sh in core mode manually inside a Docker container, simulating a developer
      workflow (prep-build-dataload-test). There were several observed test failures, but
      the workflow itself was run to completion with no problems.
    
    Change-Id: I3df5d48eca7a10219264e3604a4f05f072188e6e
    Reviewed-on: http://gerrit.cloudera.org:8080/15623
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/bootstrap_system.sh    | 85 ++++++++++++++++++++++++++++++++++++++++------
 bin/bootstrap_toolchain.py |  2 ++
 bin/impala-config.sh       |  4 +--
 docker/entrypoint.sh       | 39 +++++++++++++++++++--
 4 files changed, 116 insertions(+), 14 deletions(-)

diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh
index 3f32a5e..262ead4 100755
--- a/bin/bootstrap_system.sh
+++ b/bin/bootstrap_system.sh
@@ -72,6 +72,7 @@ set -x
 REDHAT=
 REDHAT6=
 REDHAT7=
+REDHAT8=
 UBUNTU=
 UBUNTU16=
 UBUNTU18=
@@ -79,6 +80,10 @@ IN_DOCKER=
 if [[ -f /etc/redhat-release ]]; then
   REDHAT=true
   echo "Identified redhat system."
+  if grep 'release 8\.' /etc/redhat-release; then
+    REDHAT8=true
+    echo "Identified redhat8 system."
+  fi
   if grep 'release 7\.' /etc/redhat-release; then
     REDHAT7=true
     echo "Identified redhat7 system."
@@ -158,6 +163,12 @@ function redhat7 {
     "$@"
   fi
 }
+# Helper function to execute following command only on RedHat8
+function redhat8 {
+  if [[ "$REDHAT8" == true ]]; then
+    "$@"
+  fi
+}
 # Helper function to execute following command only in docker
 function indocker {
   if [[ "$IN_DOCKER" == true ]]; then
@@ -228,10 +239,43 @@ fi
 redhat sudo yum install -y curl gawk gcc gcc-c++ git krb5-devel krb5-server \
         krb5-workstation libevent-devel libffi-devel make openssl-devel cyrus-sasl \
         cyrus-sasl-gssapi cyrus-sasl-devel cyrus-sasl-plain \
-        python-devel python-setuptools postgresql postgresql-server \
-        wget vim-common nscd cmake lzo-devel fuse-devel snappy-devel zlib-devel \
+        postgresql postgresql-server \
+        wget vim-common nscd cmake lzo-devel fuse-devel zlib-devel \
         psmisc lsof openssh-server redhat-lsb java-1.8.0-openjdk-devel \
-        java-1.8.0-openjdk-src python-argparse
+        java-1.8.0-openjdk-src
+
+# Enable the Powertools repo for snappy-devel on RedHat 8
+redhat8 sudo yum install -y dnf-plugins-core
+redhat8 sudo yum install -y --enablerepo="PowerTools*" snappy-devel
+
+# RedHat / CentOS 8 exposes only specific versions of Python.
+# Set up unversioned default Python 2.x for older CentOS versions
+redhat6 sudo yum install -y python-devel python-setuptools python-argparse
+redhat7 sudo yum install -y python-devel python-setuptools python-argparse
+
+# Install Python 2.x explicitly for CentOS 8
+function setup_python2() {
+  if command -v python && [[ $(python --version 2>&1 | cut -d ' ' -f 2) =~ 2\. ]]; then
+    echo "We have Python 2.x";
+  else
+    if ! command -v python2; then
+      # Python2 needs to be installed
+      sudo dnf install -y python2
+    fi
+    # Here Python2 is installed, but is not the default Python.
+    # 1. Link pip's version to Python's version
+    sudo alternatives --add-slave python /usr/bin/python2 /usr/bin/pip pip /usr/bin/pip2
+    sudo alternatives --add-slave python /usr/libexec/no-python  /usr/bin/pip pip \
+        /usr/libexec/no-python
+    # 2. Set Python2 (with pip2) to be the system default.
+    sudo alternatives --set python /usr/bin/python2
+  fi
+  # Here the Python2 runtime is already installed, add the dev package
+  sudo dnf -y install python2-devel
+}
+
+redhat8 setup_python2
+redhat8 pip install --user argparse
 
 # CentOS repos don't contain ccache, so install from EPEL
 redhat sudo yum install -y epel-release
@@ -243,18 +287,24 @@ redhat sudo yum clean all
 # Download ant for centos
 redhat sudo wget -nv \
   https://downloads.apache.org/ant/binaries/apache-ant-1.9.14-bin.tar.gz
-redhat sha512sum -c - <<< '487dbd1d7f678a92924ba884a57e910ccb4fe565c554278795a8fdfc80c4e88d81ebc2ccecb5a8f353f0b2076572bb921499a2cadb064e0f44fc406a3c31da20  apache-ant-1.9.14-bin.tar.gz'
+redhat sudo sha512sum -c - <<< '487dbd1d7f678a92924ba884a57e910ccb4fe565c554278795a8fdfc80c4e88d81ebc2ccecb5a8f353f0b2076572bb921499a2cadb064e0f44fc406a3c31da20  apache-ant-1.9.14-bin.tar.gz'
 redhat sudo tar -C /usr/local -xzf apache-ant-1.9.14-bin.tar.gz
-redhat sudo ln -s /usr/local/apache-ant-1.9.14/bin/ant /usr/local/bin
+redhat sudo ln -sf /usr/local/apache-ant-1.9.14/bin/ant /usr/local/bin
 
 # Download maven for all OSes, since the OS-packaged version can be
 # pretty old.
 if [ ! -d /usr/local/apache-maven-3.5.4 ]; then
   sudo wget -nv \
     https://downloads.apache.org/maven/maven-3/3.5.4/binaries/apache-maven-3.5.4-bin.tar.gz
-  sha512sum -c - <<< '2a803f578f341e164f6753e410413d16ab60fabe31dc491d1fe35c984a5cce696bc71f57757d4538fe7738be04065a216f3ebad4ef7e0ce1bb4c51bc36d6be86  apache-maven-3.5.4-bin.tar.gz'
-  sudo tar -C /usr/local -xzf apache-maven-3.5.4-bin.tar.gz
-  sudo ln -s /usr/local/apache-maven-3.5.4/bin/mvn /usr/local/bin
+  sudo sha512sum -c - <<< '2a803f578f341e164f6753e410413d16ab60fabe31dc491d1fe35c984a5cce696bc71f57757d4538fe7738be04065a216f3ebad4ef7e0ce1bb4c51bc36d6be86  apache-maven-3.5.4-bin.tar.gz'
+  sudo tar -C /usr/local -x --no-same-owner -zf apache-maven-3.5.4-bin.tar.gz
+  sudo ln -sf /usr/local/apache-maven-3.5.4/bin/mvn /usr/local/bin
+
+  # reset permissions on redhat8
+  # TODO: figure out why this is necessary for redhat8
+  MAVEN_DIRECTORY="/usr/local/apache-maven-3.5.4"
+  redhat8 indocker sudo chmod 0755 ${MAVEN_DIRECTORY}
+  redhat8 indocker sudo chmod 0755 ${MAVEN_DIRECTORY}/{bin,boot}
 fi
 
 if ! { service --status-all | grep -E '^ \[ \+ \]  ssh$'; }
@@ -263,8 +313,14 @@ then
   # TODO: CentOS/RH 7 uses systemd, and this doesn't work.
   redhat6 sudo service sshd start
   redhat7 notindocker sudo service sshd start
+  redhat8 notindocker sudo service sshd start
   redhat7 indocker sudo /usr/bin/ssh-keygen -A
   redhat7 indocker sudo /usr/sbin/sshd
+  redhat8 indocker sudo /usr/bin/ssh-keygen -A
+  redhat8 indocker sudo /usr/sbin/sshd
+  # The CentOS 8.1 image includes /var/run/nologin by mistake; this file prevents
+  # SSH logins. See https://github.com/CentOS/sig-cloud-instance-images/issues/60
+  redhat8 indocker sudo rm -f /var/run/nologin
 fi
 
 # TODO: config ccache to give it plenty of space
@@ -286,6 +342,9 @@ redhat6 sudo service postgresql stop
 redhat7 notindocker sudo service postgresql initdb
 redhat7 notindocker sudo service postgresql stop
 redhat7 indocker sudo -u postgres PGDATA=/var/lib/pgsql/data pg_ctl init
+redhat8 notindocker sudo service postgresql initdb
+redhat8 notindocker sudo service postgresql stop
+redhat8 indocker sudo -u postgres PGDATA=/var/lib/pgsql/data pg_ctl init
 ubuntu sudo service postgresql stop
 
 # These configurations expose connectiong to PostgreSQL via md5-hashed
@@ -301,12 +360,15 @@ redhat sudo sed -i -e 's,\(host.*\)ident,\1md5,' /var/lib/pgsql/data/pg_hba.conf
 ubuntu sudo service postgresql start
 redhat6 sudo service postgresql start
 redhat7 notindocker sudo service postgresql start
+redhat8 notindocker sudo service postgresql start
 # Important to redirect pg_ctl to a logfile, lest it keep the stdout
 # file descriptor open, preventing the shell from exiting.
 redhat7 indocker sudo -u postgres PGDATA=/var/lib/pgsql/data bash -c \
   "pg_ctl start -w --timeout=120 >> /var/lib/pgsql/pg.log 2>&1"
+redhat8 indocker sudo -u postgres PGDATA=/var/lib/pgsql/data bash -c \
+  "pg_ctl start -w --timeout=120 >> /var/lib/pgsql/pg.log 2>&1"
 
-# Set up postgress for HMS
+# Set up postgres for HMS
 if ! [[ 1 = $(sudo -u postgres psql -At -c "SELECT count(*) FROM pg_roles WHERE rolname = 'hiveuser';") ]]
 then
   sudo -u postgres psql -c "CREATE ROLE hiveuser LOGIN PASSWORD 'password';"
@@ -364,8 +426,11 @@ echo -e "\n* - nofile 1048576" | sudo tee -a /etc/security/limits.conf
 
 # Default on CentOS limits a user to 1024 or 4096 processes (threads) , which isn't
 # enough for minicluster with all of its friends.
-redhat sudo sed -i 's,\*\s*soft\s*nproc\s*[0-9]*$,* soft nproc unlimited,' \
+redhat6 sudo sed -i 's,\*\s*soft\s*nproc\s*[0-9]*$,* soft nproc unlimited,' \
+  /etc/security/limits.d/*-nproc.conf
+redhat7 sudo sed -i 's,\*\s*soft\s*nproc\s*[0-9]*$,* soft nproc unlimited,' \
   /etc/security/limits.d/*-nproc.conf
+redhat8 echo -e "* soft nproc unlimited" | sudo tee -a /etc/security/limits.conf
 
 echo ">>> Checking out Impala"
 
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 1b31872..69812e2 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -86,9 +86,11 @@ OS_MAPPING = [
   OsMapping("centos5", "ec2-package-centos-5", None),
   OsMapping("centos6", "ec2-package-centos-6", "redhat6"),
   OsMapping("centos7", "ec2-package-centos-7", "redhat7"),
+  OsMapping("centos8", "ec2-package-centos-8", "redhat8"),
   OsMapping("redhatenterpriseserver5", "ec2-package-centos-5", None),
   OsMapping("redhatenterpriseserver6", "ec2-package-centos-6", "redhat6"),
   OsMapping("redhatenterpriseserver7", "ec2-package-centos-7", "redhat7"),
+  OsMapping("redhatenterpriseserver8", "ec2-package-centos-8", "redhat8"),
   OsMapping("debian6", "ec2-package-debian-6", None),
   OsMapping("debian7", "ec2-package-debian-7", None),
   OsMapping("debian8", "ec2-package-debian-8", "debian8"),
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 66bf94b..49278db 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -68,7 +68,7 @@ fi
 # moving to a different build of the toolchain, e.g. when a version is bumped or a
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=159-0989d236fd
+export IMPALA_TOOLCHAIN_BUILD_ID=7-f2ddef91e9
 # Versions of toolchain dependencies.
 # -----------------------------------
 export IMPALA_AVRO_VERSION=1.7.4-p5
@@ -148,7 +148,7 @@ export IMPALA_TPC_DS_VERSION=2.1.0
 unset IMPALA_TPC_DS_URL
 export IMPALA_TPC_H_VERSION=2.17.0
 unset IMPALA_TPC_H_URL
-export IMPALA_THRIFT_VERSION=0.9.3-p7
+export IMPALA_THRIFT_VERSION=0.9.3-p8
 unset IMPALA_THRIFT_URL
 export IMPALA_THRIFT11_VERSION=0.11.0-p2
 unset IMPALA_THRIFT11_URL
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index 2d55a30..6584748 100755
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -55,6 +55,32 @@ function _pg_ctl() {
   sudo service postgresql $1
 }
 
+# Install Python2 with pip2 and make them the default Python and pip commands
+# on RedHat / CentOS 8.
+# This has no notion of "default" Python, and can install both Python2 and Python3
+# side by side. Impala currently needs Python2 as the default version.
+# The function is adaptive; it performs only the necessary steps; it shares the installer
+# logic with bin/bootstrap_system.sh
+function install_python2_for_centos8() {
+  if command -v python && [[ $(python --version 2>&1 | cut -d ' ' -f 2) =~ 2\. ]]; then
+    echo "We have Python 2.x";
+  else
+    if ! command -v python2; then
+      # Python2 needs to be installed
+      dnf install -y python2
+    fi
+    # Here Python2 is installed, but is not the default Python.
+    # 1. Link pip's version to Python's version
+    alternatives --add-slave python /usr/bin/python2 /usr/bin/pip pip /usr/bin/pip2
+    alternatives --add-slave python /usr/libexec/no-python  /usr/bin/pip pip \
+        /usr/libexec/no-python
+    # 2. Set Python2 (with pip2) to be the system default.
+    alternatives --set python /usr/bin/python2
+  fi
+  # Here the Python2 runtime is already installed, add the dev package
+  dnf -y install python2-devel
+}
+
 # Boostraps the container by creating a user and adding basic tools like Python and git.
 # Takes a uid as an argument for the user to be created.
 function build() {
@@ -80,11 +106,20 @@ function build() {
     paste <(cut -d : -f3 /etc/passwd) <(cut -d : -f1 /etc/passwd) | sort -n
     exit 1
   fi
-  if which apt-get > /dev/null; then
+  if command -v apt-get > /dev/null; then
     apt-get update
     apt-get install -y sudo git lsb-release python
+  elif grep 'release 8\.' /etc/redhat-release; then
+    # WARNING: Install the following packages one by one!
+    # Installing them in a common transaction breaks something inside yum/dnf,
+    # and the subsequent step installing Python2 will fail with a GPG signature error.
+    dnf -y install sudo
+    dnf -y install which
+    dnf -y install git-core
+
+    install_python2_for_centos8
   else
-    yum -y install sudo git python
+    yum -y install which sudo git python
   fi
 
   if ! id impdev; then