You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/10/11 21:52:18 UTC
incubator-impala git commit: IMPALA-6027: Retry downloading toolchain
components.
Repository: incubator-impala
Updated Branches:
refs/heads/master f03900a80 -> 3bdde74a7
IMPALA-6027: Retry downloading toolchain components.
We've seen intermittent 500 errors when downloading the toolchain from
S3 over the HTTPS URLs. As a first stab, this commit retries 3 times,
with some jitter.
I also changed the threadpool introduced previously to have a limit
of 4 threads, because that's sufficient to get the speed improvement.
The 500 errors have been observed both before and after the threadpool
change.
For testing, I ran the straight-forward case directly. I introduced
a broken version string to observe that retries would happen on
any error from wget.
Change-Id: I7669c7d41240aa0eb43c30d5bf2bd5c01b66180b
Reviewed-on: http://gerrit.cloudera.org:8080/8258
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/3bdde74a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/3bdde74a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/3bdde74a
Branch: refs/heads/master
Commit: 3bdde74a70ce8f202faee8ce52b157ea88860ecf
Parents: f03900a
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Wed Oct 11 09:28:37 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Oct 11 21:45:40 2017 +0000
----------------------------------------------------------------------
bin/bootstrap_toolchain.py | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3bdde74a/bin/bootstrap_toolchain.py
----------------------------------------------------------------------
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 76d4c5a..bea3a99 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -29,12 +29,14 @@
#
# python bootstrap_toolchain.py
import os
+import random
import re
import sh
import shutil
import subprocess
import sys
import tempfile
+import time
HOST = "https://native-toolchain.s3.amazonaws.com/build"
@@ -77,9 +79,18 @@ def get_platform_release_label(release=None):
def wget_and_unpack_package(download_path, file_name, destination, wget_no_clobber):
print "URL {0}".format(download_path)
- print "Downloading {0} to {1}".format(file_name, destination)
- # --no-clobber avoids downloading the file if a file with the name already exists
- sh.wget(download_path, directory_prefix=destination, no_clobber=wget_no_clobber)
+ NUM_ATTEMPTS = 3
+ for attempt in range(1, NUM_ATTEMPTS + 1):
+ print "Downloading {0} to {1} (attempt {2})".format(file_name, destination, attempt)
+ # --no-clobber avoids downloading the file if a file with the name already exists
+ try:
+ sh.wget(download_path, directory_prefix=destination, no_clobber=wget_no_clobber)
+ break
+ except Exception, e:
+ if attempt == NUM_ATTEMPTS:
+ raise
+ print "Download failed; retrying after sleep: " + str(e)
+ time.sleep(10 + random.random() * 5) # Sleep between 10 and 15 seconds.
print "Extracting {0}".format(file_name)
sh.tar(z=True, x=True, f=os.path.join(destination, file_name), directory=destination)
sh.rm(os.path.join(destination, file_name))
@@ -310,7 +321,7 @@ def execute_many(f, args):
pool = None
try:
import multiprocessing.pool
- pool = multiprocessing.pool.ThreadPool()
+ pool = multiprocessing.pool.ThreadPool(processes=min(multiprocessing.cpu_count(), 4))
return pool.map(f, args, 1)
except ImportError:
# multiprocessing was introduced in Python 2.6.