You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2020/06/12 01:54:43 UTC
[impala] branch master updated: IMPALA-9107 (part 2): Add script to
use the m2 archive tarball
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new fb28285 IMPALA-9107 (part 2): Add script to use the m2 archive tarball
fb28285 is described below
commit fb282852ef52d72079a86c55a90982ffac567cc7
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Thu Apr 2 17:28:45 2020 -0700
IMPALA-9107 (part 2): Add script to use the m2 archive tarball
This adds a script to find an appropriate m2 archive
tarball, download it, and use it to prepopulate the
~/.m2 directory.
The script uses the JSON interface for Jenkins to search through
the all-build-options-ub1604 builds on jenkins.impala.io to
find one that:
1. Is building the "master" branch
2. Has the m2_archive.tar.gz
Then, it downloads the m2 archive and uses it to populate ~/.m2.
It does not overwrite or remove any files already in ~/.m2.
The build scripts that call populate_m2_directory.py do not
rely on the script succeeding. They will continue even if
the script fails.
This also modifies the build-all-flag-combinations.sh script
to only build the m2 archive if the GENERATE_M2_ARCHIVE
environment variable is true. GENERATE_M2_ARCHIVE=true will
clear out the ~/.m2 directory to build an accurate m2 archive.
Precommit jobs will use GENERATE_M2_ARCHIVE=false, which
will allow them to use the m2 archive to speed up the build.
Testing:
- Ran gerrify-verify-dryrun
- Tested locally
Change-Id: I5065658d8c0514550927161855b0943fa7b3a402
Reviewed-on: http://gerrit.cloudera.org:8080/15735
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
bin/bootstrap_build.sh | 5 +
bin/bootstrap_system.sh | 5 +
bin/jenkins/build-all-flag-combinations.sh | 17 ++-
bin/jenkins/populate_m2_directory.py | 172 +++++++++++++++++++++++++++++
4 files changed, 195 insertions(+), 4 deletions(-)
diff --git a/bin/bootstrap_build.sh b/bin/bootstrap_build.sh
index 1168bb0..a450ef7 100755
--- a/bin/bootstrap_build.sh
+++ b/bin/bootstrap_build.sh
@@ -54,4 +54,9 @@ if [ ! -d /usr/local/apache-maven-3.5.4 ]; then
sudo ln -s /usr/local/apache-maven-3.5.4/bin/mvn /usr/local/bin
fi
+# Try to prepopulate the m2 directory to save time
+if ! bin/jenkins/populate_m2_directory.py ; then
+ echo "Failed to prepopulate the m2 directory. Continuing..."
+fi
+
./buildall.sh -notests -so
diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh
index a52083d..18cce2b 100755
--- a/bin/bootstrap_system.sh
+++ b/bin/bootstrap_system.sh
@@ -471,3 +471,8 @@ fi
cd "$HADOOP_LZO_HOME"
time -p ant package
cd "$IMPALA_HOME"
+
+# Try to prepopulate the m2 directory to save time
+if ! bin/jenkins/populate_m2_directory.py ; then
+ echo "Failed to prepopulate the m2 directory. Continuing..."
+fi
diff --git a/bin/jenkins/build-all-flag-combinations.sh b/bin/jenkins/build-all-flag-combinations.sh
index a6a0d2c..9209e48 100755
--- a/bin/jenkins/build-all-flag-combinations.sh
+++ b/bin/jenkins/build-all-flag-combinations.sh
@@ -32,6 +32,8 @@ export IMPALA_MAVEN_OPTIONS="-U"
. bin/impala-config.sh
+: ${GENERATE_M2_ARCHIVE:=false}
+
# These are configurations for buildall.
CONFIGS=(
# Test gcc builds with and without -so:
@@ -46,6 +48,13 @@ CONFIGS=(
FAILED=""
+if [[ "$GENERATE_M2_ARCHIVE" == true ]]; then
+ # The m2 archive relies on parsing the maven log to get a list of jars downloaded
+ # from particular repositories. To accurately produce the archive every time, we
+ # need to clear out the ~/.m2 directory before producing the archive.
+ rm -rf ~/.m2
+fi
+
TMP_DIR=$(mktemp -d)
function onexit {
echo "$0: Cleaning up temporary directory"
@@ -53,8 +62,6 @@ function onexit {
}
trap onexit EXIT
-mkdir -p ${TMP_DIR}
-
for CONFIG in "${CONFIGS[@]}"; do
DESCRIPTION="Options $CONFIG"
@@ -91,7 +98,9 @@ then
exit 1
fi
-# Make a tarball of the .m2 directory
-bin/jenkins/archive_m2_directory.sh logs/mvn/mvn_accumulated.log logs/m2_archive.tar.gz
+if [[ "$GENERATE_M2_ARCHIVE" == true ]]; then
+ # Make a tarball of the .m2 directory
+ bin/jenkins/archive_m2_directory.sh logs/mvn/mvn_accumulated.log logs/m2_archive.tar.gz
+fi
# Note: The exit callback handles cleanup of the temp directory.
diff --git a/bin/jenkins/populate_m2_directory.py b/bin/jenkins/populate_m2_directory.py
new file mode 100755
index 0000000..1570189
--- /dev/null
+++ b/bin/jenkins/populate_m2_directory.py
@@ -0,0 +1,172 @@
+#!/usr/bin/python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+import subprocess
+import os
+import shutil
+from tempfile import mkdtemp
+
+ALL_BUILD_OPTIONS_JOB = "all-build-options-ub1604"
+JENKINS_IMPALA_IO = "jenkins.impala.io"
+M2_ARCHIVE_NAME = "m2_archive.tar.gz"
+
+
+class JenkinsBuild(object):
+ """
+ Basic information about a Jenkins build (number, url) to allow retrieving
+ more detailed information.
+ """
+ def __init__(self, number, url):
+ self.number = number
+ self.url = url
+
+
+class JenkinsBuildDetails(object):
+ """
+ Detailed information about the parameters and artifacts for a particular
+ Jenkins build.
+ """
+ def __init__(self, parameter_dict, artifact_dict):
+ self.parameter_dict = parameter_dict
+ self.artifact_dict = artifact_dict
+
+
+def get_build_list(jenkins_server, job):
+ """
+ Get the list of recent builds for the specified job on the jenkins server.
+ This returns a list of JenkinsBuild objects containing the build numbers
+ and corresponding urls.
+ """
+ # Make a temporary directory
+ tmpdir = mkdtemp()
+
+ json_dict = {}
+ try:
+ # This uses Jenkin's JSON API to get the list of build numbers for this job
+ # along with the URL to each build. This downloads the JSON to a temporary file
+ # and reads it back. This uses wget to avoid any python dependencies.
+ json_url_tmpl = "https://{0}/job/{1}/api/json?tree=builds[number,url]&pretty=true"
+ json_url = json_url_tmpl.format(jenkins_server, job)
+ json_filename = os.path.join(tmpdir, "job_{0}_build_list.json".format(job))
+ subprocess.check_call(["wget", "-q", json_url, "-O", json_filename])
+ # Open the JSON file
+ with open(json_filename) as f:
+ json_dict = json.load(f)
+ finally:
+ # Cleanup temporary directory
+ shutil.rmtree(tmpdir)
+
+ # Convert the JSON dictionaries to JenkinsBuild objects
+ builds = []
+ for build_info in json_dict["builds"]:
+ builds.append(JenkinsBuild(build_info["number"], build_info["url"]))
+
+ return builds
+
+
+def get_build_details(build):
+ """
+ Download detailed build information for the build number at the provided URL using
+ the Jenkins JSON API. This returns a JenkinsBuildDetails, which includes information
+ about the parameters of the Jenkins job and the artifacts produced by the Jenkins job.
+ """
+
+ tmpdir = mkdtemp()
+ json_dict = {}
+ try:
+ # This downloads a json job to the temporary directory
+ json_url = "{0}/api/json?&pretty=true".format(build.url)
+ json_filename = os.path.join(tmpdir, "build_details_{0}.json".format(build.number))
+ subprocess.check_call(["wget", "-q", json_url, "-O", json_filename])
+ # Parse the JSON file
+ json_dict = {}
+ with open(json_filename) as f:
+ json_dict = json.load(f)
+ finally:
+ shutil.rmtree(tmpdir)
+
+ # Convert the JSON dictionaries to a JenkinsBuildDetail object
+ parameter_dict = {}
+ parameter_section = None
+ for section in json_dict["actions"]:
+ if "parameters" in section:
+ parameter_section = section["parameters"]
+ break
+ for parameter in parameter_section:
+ parameter_dict[parameter["name"]] = parameter["value"]
+
+ artifact_dict = {}
+ for artifact in json_dict["artifacts"]:
+ artifact_url = "{0}/artifact/{1}".format(build.url, artifact["relativePath"])
+ artifact_dict[artifact["fileName"]] = artifact_url
+
+ return JenkinsBuildDetails(parameter_dict, artifact_dict)
+
+
+def get_m2_archive_url(jenkins_server, jenkins_job):
+ # Get the JSON list of builds for the all-build-options-ub1604 job. This code
+ # is specific to how the Jenkins job is structured (i.e. parameters, archives),
+ # so this is not generic.
+ build_list = get_build_list(jenkins_server, jenkins_job)
+
+ for build in build_list:
+ # Go get more detailed information about the job
+ build_details = get_build_details(build)
+ # There are two criteria for a valid m2 archive:
+ # 1. The build is based on the master branch
+ # 2. The build has the appropriate m2 archive artifact
+ is_master_build = "IMPALA_REPO_BRANCH" in build_details.parameter_dict and \
+ build_details.parameter_dict["IMPALA_REPO_BRANCH"] == "master"
+ has_m2_archive = M2_ARCHIVE_NAME in build_details.artifact_dict
+ if is_master_build and has_m2_archive:
+ return build_details.artifact_dict[M2_ARCHIVE_NAME]
+
+ return None
+
+
+def download_and_unpack_m2_archive(url, directory):
+ print("Downloading m2 archive from {0} to {1}".format(url, directory))
+ tarball_name = os.path.basename(url)
+ tmp_tarball_location = os.path.join(directory, tarball_name)
+ subprocess.check_call(["wget", "-q", url, "-O", tmp_tarball_location])
+ m2_directory = os.path.expanduser("~/.m2")
+ if not os.path.exists(m2_directory):
+ print("{0} does not exist, creating...".format(m2_directory))
+ os.makedirs(m2_directory)
+ print("Unpacking {0} to {1}".format(tarball_name, m2_directory))
+ tar_command = ["tar", "-zxf", tmp_tarball_location]
+ # Unpack into m2 directory, but don't overwrite any files
+ tar_command.extend(["-C", m2_directory, "--skip-old-files"])
+ subprocess.check_call(tar_command)
+
+
+def main():
+ m2_archive_url = get_m2_archive_url(JENKINS_IMPALA_IO, ALL_BUILD_OPTIONS_JOB)
+ if not m2_archive_url:
+ print("Could not find any m2 archive for {0} {1}".format(JENKINS_IMPALA_IO,
+ ALL_BUILD_OPTIONS_JOB))
+ tmpdir = mkdtemp()
+ try:
+ download_and_unpack_m2_archive(m2_archive_url, tmpdir)
+ finally:
+ shutil.rmtree(tmpdir)
+
+
+if __name__ == "__main__":
+ main()