You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/03/16 03:43:20 UTC
[1/2] impala git commit: IMPALA-6654: [DOCS] Updated the
Kudu/Sentry/Impala limitations
Repository: impala
Updated Branches:
refs/heads/master 241031d33 -> 8dde41e80
IMPALA-6654: [DOCS] Updated the Kudu/Sentry/Impala limitations
Change-Id: I8991d85e77c7f5075525734145291457d50a7633
Reviewed-on: http://gerrit.cloudera.org:8080/9618
Reviewed-by: Thomas Tauber-Marshall <tm...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/b5b207ee
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/b5b207ee
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/b5b207ee
Branch: refs/heads/master
Commit: b5b207eefaf8160c6e17704a883f80c313de7b0b
Parents: 241031d
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Tue Mar 13 17:10:19 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Mar 16 01:48:41 2018 +0000
----------------------------------------------------------------------
docs/shared/impala_common.xml | 32 +++++++++++++++++++++++++++-----
1 file changed, 27 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/b5b207ee/docs/shared/impala_common.xml
----------------------------------------------------------------------
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
index e2842a3..4a96565 100644
--- a/docs/shared/impala_common.xml
+++ b/docs/shared/impala_common.xml
@@ -3972,11 +3972,33 @@ CREATE EXTERNAL TABLE impala_name STORED AS KUDU
</p>
<p id="kudu_sentry_limitations" rev="IMPALA-4000">
- Access to Kudu tables must be granted to and revoked from roles as usual.
- Only users with <codeph>ALL</codeph> privileges on <codeph>SERVER</codeph> can create external Kudu tables.
- Currently, access to a Kudu table is <q>all or nothing</q>:
- enforced at the table level rather than the column level, and applying to all
- SQL operations rather than individual statements such as <codeph>INSERT</codeph>.
+ Access to Kudu tables must be granted to and revoked from roles with the
+ following considerations:
+ <ul>
+ <li>
+ Only users with the <codeph>ALL</codeph> privilege on
+ <codeph>SERVER</codeph> can create external Kudu tables.
+ </li>
+ <li>
+ The <codeph>ALL</codeph> privileges on <codeph>SERVER</codeph> is
+ required to specify the <codeph>kudu.master_addresses</codeph>
+ property in the <codeph>CREATE TABLE</codeph> statements for managed
+ tables as well as external tables.
+ </li>
+ <li>
+ Access to Kudu tables is enforced at the table level and at the
+ column level.
+ </li>
+ <li>
+ The <codeph>SELECT</codeph>- and <codeph>INSERT</codeph>-specific
+ permissions are supported.
+ </li>
+ <li>
+ The <codeph>DELETE</codeph>, <codeph>UPDATE</codeph>, and
+ <codeph>UPSERT</codeph> operations require the <codeph>ALL</codeph>
+ privilege.
+ </li>
+ </ul>
Because non-SQL APIs can access Kudu data without going through Sentry
authorization, currently the Sentry support is considered preliminary
and subject to change.
[2/2] impala git commit: IMPALA-6682: Remove MD5 assumption from pypi
download script
Posted by ta...@apache.org.
IMPALA-6682: Remove MD5 assumption from pypi download script
pip_download.py assumes the python repository to use md5 as the hash
algorithm, which is not required by PEP-503 and not always true in
reality. This patch removes this assumption and enables support of all
hash algorithms in python hashlib.
Testing: buildall.sh works with 2 repos. One uses md5 and another uses
sha-256.
Change-Id: Ie78f851490cbab10daa654aece36dab6e6c4329b
Reviewed-on: http://gerrit.cloudera.org:8080/9683
Reviewed-by: Tianyi Wang <tw...@cloudera.com>
Reviewed-by: Taras Bobrovytsky <tb...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/8dde41e8
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/8dde41e8
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/8dde41e8
Branch: refs/heads/master
Commit: 8dde41e802e3566d07e2db7b2bf5cd76030ab3d3
Parents: b5b207e
Author: Tianyi Wang <tw...@cloudera.com>
Authored: Thu Mar 15 16:41:05 2018 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Mar 16 03:39:33 2018 +0000
----------------------------------------------------------------------
infra/python/deps/pip_download.py | 48 +++++++++++++++++++++-------------
1 file changed, 30 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/8dde41e8/infra/python/deps/pip_download.py
----------------------------------------------------------------------
diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py
index cc9b412..5a79b80 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -21,12 +21,11 @@
# over which archive type is downloaded and what post-download steps are executed.
# This script requires Python 2.6+.
-import json
+import hashlib
import os
import os.path
import re
import sys
-from hashlib import md5
from random import randint
from time import sleep
from urllib import urlopen, URLopener
@@ -39,9 +38,21 @@ PYPI_MIRROR = os.environ.get('PYPI_MIRROR', 'https://pypi.python.org')
REQUIREMENTS_FILES = ['requirements.txt', 'compiled-requirements.txt',
'kudu-requirements.txt', 'adls-requirements.txt']
-def check_md5sum(filename, expected_md5):
- actual_md5 = md5(open(filename).read()).hexdigest()
- return actual_md5 == expected_md5
+
+def check_digest(filename, algorithm, expected_digest):
+ try:
+ supported_algorithms = hashlib.algorithms_available
+ except AttributeError:
+ # Fallback to hardcoded set if hashlib.algorithms_available doesn't exist.
+ supported_algorithms = {'md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512'}
+ if algorithm not in supported_algorithms:
+ print 'Hash algorithm {0} is not supported by hashlib'.format(algorithm)
+ return False
+ h = hashlib.new(algorithm)
+ h.update(open(filename).read())
+ actual_digest = h.hexdigest()
+ return actual_digest == expected_digest
+
def retry(func):
'''Retry decorator.'''
@@ -64,7 +75,7 @@ def retry(func):
return wrapper
def get_package_info(pkg_name, pkg_version):
- '''Returns the file name, path and md5 digest of the package.'''
+ '''Returns the file name, path, hash algorithm and digest of the package.'''
# We store the matching result in the candidates list instead of returning right away
# to sort them and return the first value in alphabetical order. This ensures that the
# same result is always returned even if the ordering changed on the server.
@@ -76,39 +87,40 @@ def get_package_info(pkg_name, pkg_version):
# downloading an extra package before running this script. Since the HTML is guaranteed
# to be formatted according to PEP 503, this is acceptable.
pkg_info = urlopen(url).read()
- # We assume that the URL includes a hash and the hash function is md5. This not strictly
- # required by PEP 503.
- regex = r'<a href=\".*?packages/(.*?)#md5=(.*?)\".*?>(.*?)<\/a>'
+ regex = r'<a href=\".*?packages/(.*?)#(.*?)=(.*?)\".*?>(.*?)<\/a>'
for match in re.finditer(regex, pkg_info):
path = match.group(1)
- md5_digest = match.group(2)
- file_name = match.group(3)
+ hash_algorithm = match.group(2)
+ digest = match.group(3)
+ file_name = match.group(4)
# Make sure that we consider only non Wheel archives, because those are not supported.
if (file_name.endswith('-{0}.tar.gz'.format(pkg_version)) or
file_name.endswith('-{0}.tar.bz2'.format(pkg_version)) or
file_name.endswith('-{0}.zip'.format(pkg_version))):
- candidates.append((file_name, path, md5_digest))
+ candidates.append((file_name, path, hash_algorithm, digest))
if not candidates:
print 'Could not find archive to download for {0} {1}'.format(pkg_name, pkg_version)
- return (None, None, None)
+ return (None, None, None, None)
return sorted(candidates)[0]
@retry
def download_package(pkg_name, pkg_version):
- file_name, path, expected_md5 = get_package_info(pkg_name, pkg_version)
+ file_name, path, hash_algorithm, expected_digest = get_package_info(pkg_name,
+ pkg_version)
if not file_name:
return False
- if os.path.isfile(file_name) and check_md5sum(file_name, expected_md5):
- print 'File with matching md5sum already exists, skipping {0}'.format(file_name)
+ if os.path.isfile(file_name) and check_digest(file_name, hash_algorithm,
+ expected_digest):
+ print 'File with matching digest already exists, skipping {0}'.format(file_name)
return True
downloader = URLopener()
pkg_url = '{0}/packages/{1}'.format(PYPI_MIRROR, path)
print 'Downloading {0} from {1}'.format(file_name, pkg_url)
downloader.retrieve(pkg_url, file_name)
- if check_md5sum(file_name, expected_md5):
+ if check_digest(file_name, hash_algorithm, expected_digest):
return True
else:
- print 'MD5 mismatch in file {0}.'.format(file_name)
+ print 'Hash digest check failed in file {0}.'.format(file_name)
return False
def main():