You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2018/08/23 02:46:34 UTC
impala git commit: IMPALA-7460 part 1: require user to install
Paramiko and Fabric
Repository: impala
Updated Branches:
refs/heads/master 649f175df -> 971cf179f
IMPALA-7460 part 1: require user to install Paramiko and Fabric
- Remove Fabric and Paramiko as requirements. They aren't needed by
anything in buildall.sh.
- Add a means to install into the impala-python virtual environment by hand.
impala-pip is fine for this.
- Add another requirements file for extended testing. The dependency
situation is messy and untangling that out of impala-python and into
lib/python should be out of the scope of IMPALA-7460.
- Update core tests, which cover real regressions that have happened in
the past, to run against locations that don't require a Paramiko
import. This moves some logic out of concurrent_select.py into a
thinner module.
- Insulate ssh_util from globally-scoped import so that it only imports
when needed.
Testing:
- This works in my development environment.
- This works in my downstream stress and query gen environments.
- This works when doing a full data load.
- Impala still builds on a variety of OSs.
Todo:
- A subsequent review will update the versions.
Change-Id: Ibf9010a0387b52c95b7bda5d1d4606eba1008b65
Reviewed-on: http://gerrit.cloudera.org:8080/11264
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/971cf179
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/971cf179
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/971cf179
Branch: refs/heads/master
Commit: 971cf179f674b312cd133397fe74ee8c5f29a215
Parents: 649f175
Author: Michael Brown <mi...@cloudera.com>
Authored: Mon Aug 20 10:24:37 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Aug 23 00:20:15 2018 +0000
----------------------------------------------------------------------
bin/impala-pip | 21 ++++++
infra/python/deps/compiled-requirements.txt | 4 --
.../python/deps/extended-test-requirements.txt | 24 +++++++
tests/comparison/cluster.py | 4 +-
tests/comparison/leopard/impala_docker_env.py | 7 +-
tests/infra/test_stress_infra.py | 10 ++-
tests/stress/concurrent_select.py | 69 ++------------------
tests/util/cluster_controller.py | 14 ++--
tests/util/parse_util.py | 42 ++++++++++++
tests/util/ssh_util.py | 10 ++-
tests/util/test_file_parser.py | 34 +++++++++-
11 files changed, 157 insertions(+), 82 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/971cf179/bin/impala-pip
----------------------------------------------------------------------
diff --git a/bin/impala-pip b/bin/impala-pip
new file mode 100755
index 0000000..0041600
--- /dev/null
+++ b/bin/impala-pip
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+source "$(dirname "$0")/impala-python-common.sh"
+exec "$PY_DIR/env/bin/python" "$PY_DIR/env/bin/pip" "$@"
http://git-wip-us.apache.org/repos/asf/impala/blob/971cf179/infra/python/deps/compiled-requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/compiled-requirements.txt b/infra/python/deps/compiled-requirements.txt
index 2c5590e..72e939a 100644
--- a/infra/python/deps/compiled-requirements.txt
+++ b/infra/python/deps/compiled-requirements.txt
@@ -19,10 +19,6 @@
# after the toolchain is bootstrapped. Installed after requirements.txt
argparse == 1.4.0
-Fabric == 1.10.2
- paramiko == 1.15.2
- ecdsa == 0.13
- pycrypto == 2.6.1
impyla == 0.14.0
bitarray == 0.8.1
sasl == 0.1.3
http://git-wip-us.apache.org/repos/asf/impala/blob/971cf179/infra/python/deps/extended-test-requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/extended-test-requirements.txt b/infra/python/deps/extended-test-requirements.txt
new file mode 100644
index 0000000..c62c99a
--- /dev/null
+++ b/infra/python/deps/extended-test-requirements.txt
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Requirements for use of test tooling (tests/comparison and tests/stress).
+
+# IMPALA-7460 TODO: upgrade Fabric and Paramiko
+Fabric == 1.10.2
+ paramiko == 1.15.2
+ ecdsa == 0.13
+ pycrypto == 2.6.1
http://git-wip-us.apache.org/repos/asf/impala/blob/971cf179/tests/comparison/cluster.py
----------------------------------------------------------------------
diff --git a/tests/comparison/cluster.py b/tests/comparison/cluster.py
index 1cf9516..a2d8fc0 100644
--- a/tests/comparison/cluster.py
+++ b/tests/comparison/cluster.py
@@ -48,7 +48,6 @@ from zipfile import ZipFile
from db_connection import HiveConnection, ImpalaConnection
from tests.common.errors import Timeout
from tests.util.shell_util import shell as local_shell
-from tests.util.ssh_util import SshClient
from tests.util.parse_util import parse_glog, parse_mem_to_mb
LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
@@ -292,6 +291,9 @@ class CmCluster(Cluster):
if clients:
client = clients.pop()
else:
+ # IMPALA-7460: Insulate this import away from the global context so as to avoid
+ # requiring Paramiko unless it's absolutely needed.
+ from tests.util.ssh_util import SshClient
LOG.debug("Creating new SSH client for %s", host_name)
client = SshClient()
client.connect(host_name, username=self.ssh_user, key_filename=self.ssh_key_file)
http://git-wip-us.apache.org/repos/asf/impala/blob/971cf179/tests/comparison/leopard/impala_docker_env.py
----------------------------------------------------------------------
diff --git a/tests/comparison/leopard/impala_docker_env.py b/tests/comparison/leopard/impala_docker_env.py
index 38555a4..a837c00 100755
--- a/tests/comparison/leopard/impala_docker_env.py
+++ b/tests/comparison/leopard/impala_docker_env.py
@@ -18,7 +18,12 @@
'''This module generates a docker environment for a job'''
from __future__ import division
-from fabric.api import sudo, run, settings
+try:
+ from fabric.api import sudo, run, settings
+except ImportError as e:
+ raise Exception(
+ "Please run impala-pip install -r $IMPALA_HOME/infra/python/deps/extended-test-"
+ "requirements.txt:\n{0}".format(str(e)))
from logging import getLogger
from os.path import (
join as join_path,
http://git-wip-us.apache.org/repos/asf/impala/blob/971cf179/tests/infra/test_stress_infra.py
----------------------------------------------------------------------
diff --git a/tests/infra/test_stress_infra.py b/tests/infra/test_stress_infra.py
index 58f7625..7e97ffa 100644
--- a/tests/infra/test_stress_infra.py
+++ b/tests/infra/test_stress_infra.py
@@ -24,12 +24,10 @@ import pytest
from decimal import Decimal
from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.stress.concurrent_select import (
- EXPECTED_TPCDS_QUERIES_COUNT,
- EXPECTED_TPCH_NESTED_QUERIES_COUNT,
- EXPECTED_TPCH_QUERIES_COUNT,
- load_tpc_queries,
- match_memory_estimate)
+from tests.util.parse_util import (
+ EXPECTED_TPCDS_QUERIES_COUNT, EXPECTED_TPCH_NESTED_QUERIES_COUNT,
+ EXPECTED_TPCH_QUERIES_COUNT, match_memory_estimate)
+from tests.util.test_file_parser import load_tpc_queries
class TestStressInfra(ImpalaTestSuite):
http://git-wip-us.apache.org/repos/asf/impala/blob/971cf179/tests/stress/concurrent_select.py
----------------------------------------------------------------------
diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py
index 64e06e1..71fc146 100755
--- a/tests/stress/concurrent_select.py
+++ b/tests/stress/concurrent_select.py
@@ -85,25 +85,13 @@ from tests.comparison.db_types import Int, TinyInt, SmallInt, BigInt
from tests.comparison.model_translator import SqlWriter
from tests.comparison.query_generator import QueryGenerator
from tests.comparison.query_profile import DefaultProfile
-from tests.util.parse_util import parse_mem_to_mb
+from tests.util.parse_util import (
+ EXPECTED_TPCDS_QUERIES_COUNT, EXPECTED_TPCH_NESTED_QUERIES_COUNT,
+ EXPECTED_TPCH_QUERIES_COUNT, match_memory_estimate, parse_mem_to_mb)
from tests.util.thrift_util import op_handle_to_query_id
LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
-# IMPALA-6715: Every so often the stress test or the TPC workload directories get
-# changed, and the stress test loses the ability to run the full set of queries. Set
-# these constants and assert that when a workload is used, all the queries we expect to
-# use are there.
-EXPECTED_TPCDS_QUERIES_COUNT = 71
-EXPECTED_TPCH_NESTED_QUERIES_COUNT = 22
-EXPECTED_TPCH_QUERIES_COUNT = 22
-
-# Regex to extract the estimated memory from an explain plan.
-# The unit prefixes can be found in
-# fe/src/main/java/org/apache/impala/common/PrintUtils.java
-MEM_ESTIMATE_PATTERN = re.compile(
- r"Per-Host Resource Estimates: Memory=(\d+\.?\d*)(P|T|G|M|K)?B")
-
PROFILES_DIR = "profiles"
RESULT_HASHES_DIR = "result_hashes"
@@ -1230,25 +1218,10 @@ class QueryRunner(object):
def load_tpc_queries(workload):
"""Returns a list of TPC queries. 'workload' should either be 'tpch' or 'tpcds'."""
LOG.info("Loading %s queries", workload)
- queries = list()
- query_dir = os.path.join(
- os.path.dirname(__file__), "..", "..", "testdata", "workloads", workload, "queries")
- # IMPALA-6715 and others from the past: This pattern enforces the queries we actually
- # find. Both workload directories contain other queries that are not part of the TPC
- # spec.
- file_name_pattern = re.compile(r"^{0}-(q.*).test$".format(workload))
- for query_file in os.listdir(query_dir):
- match = file_name_pattern.search(query_file)
- if not match:
- continue
- file_path = os.path.join(query_dir, query_file)
- file_queries = load_queries_from_test_file(file_path)
- if len(file_queries) != 1:
- raise Exception(
- "Expected exactly 1 query to be in file %s but got %s"
- % (file_path, len(file_queries)))
- query = file_queries[0]
- query.name = match.group(1)
+ queries = []
+ for query_text in test_file_parser.load_tpc_queries(workload):
+ query = Query()
+ query.sql = query_text
queries.append(query)
return queries
@@ -1537,34 +1510,6 @@ def populate_runtime_info(query, impala, converted_args, timeout_secs=maxint):
LOG.debug("Query after populating runtime info: %s", query)
-def match_memory_estimate(explain_lines):
- """
- Given a list of strings from EXPLAIN output, find the estimated memory needed. This is
- used as a binary search start point.
-
- Params:
- explain_lines: list of str
-
- Returns:
- 2-tuple str of memory limit in decimal string and units (one of 'P', 'T', 'G', 'M',
- 'K', '' bytes)
-
- Raises:
- Exception if no match found
- """
- # IMPALA-6441: This method is a public, first class method so it can be importable and
- # tested with actual EXPLAIN output to make sure we always find the start point.
- mem_limit, units = None, None
- for line in explain_lines:
- regex_result = MEM_ESTIMATE_PATTERN.search(line)
- if regex_result:
- mem_limit, units = regex_result.groups()
- break
- if None in (mem_limit, units):
- raise Exception('could not parse explain string:\n' + '\n'.join(explain_lines))
- return mem_limit, units
-
-
def estimate_query_mem_mb_usage(query, query_runner):
"""Runs an explain plan then extracts and returns the estimated memory needed to run
the query.
http://git-wip-us.apache.org/repos/asf/impala/blob/971cf179/tests/util/cluster_controller.py
----------------------------------------------------------------------
diff --git a/tests/util/cluster_controller.py b/tests/util/cluster_controller.py
index 6b45429..388fa49 100644
--- a/tests/util/cluster_controller.py
+++ b/tests/util/cluster_controller.py
@@ -15,17 +15,23 @@
# specific language governing permissions and limitations
# under the License.
-import fabric.decorators
+try:
+ import fabric.decorators
+ from fabric.context_managers import hide, settings
+ from fabric.operations import local, run, sudo
+ from fabric.tasks import execute
+except ImportError as e:
+ raise Exception(
+ "Please run impala-pip install -r $IMPALA_HOME/infra/python/deps/extended-test-"
+ "requirements.txt:\n{0}".format(str(e)))
import logging
import os
from contextlib import contextmanager
-from fabric.context_managers import hide, settings
-from fabric.operations import local, run, sudo
-from fabric.tasks import execute
from textwrap import dedent
LOG = logging.getLogger('cluster_controller')
+
class ClusterController(object):
"""A convenience wrapper around fabric."""
http://git-wip-us.apache.org/repos/asf/impala/blob/971cf179/tests/util/parse_util.py
----------------------------------------------------------------------
diff --git a/tests/util/parse_util.py b/tests/util/parse_util.py
index 592716b..b84f635 100644
--- a/tests/util/parse_util.py
+++ b/tests/util/parse_util.py
@@ -18,8 +18,21 @@
import re
from datetime import datetime
+# IMPALA-6715: Every so often the stress test or the TPC workload directories get
+# changed, and the stress test loses the ability to run the full set of queries. Set
+# these constants and assert that when a workload is used, all the queries we expect to
+# use are there.
+EXPECTED_TPCDS_QUERIES_COUNT = 71
+EXPECTED_TPCH_NESTED_QUERIES_COUNT = 22
+EXPECTED_TPCH_QUERIES_COUNT = 22
+# Regex to extract the estimated memory from an explain plan.
+# The unit prefixes can be found in
+# fe/src/main/java/org/apache/impala/common/PrintUtils.java
+MEM_ESTIMATE_PATTERN = re.compile(
+ r"Per-Host Resource Estimates: Memory=(\d+\.?\d*)(P|T|G|M|K)?B")
NEW_GLOG_ENTRY_PATTERN = re.compile(r"[IWEF](?P<Time>\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*")
+
def parse_glog(text, start_time=None):
'''Parses the log 'text' and returns a list of log entries. If a 'start_time' is
provided only log entries that are after the time will be returned.
@@ -71,6 +84,7 @@ def parse_mem_to_mb(mem, units):
raise Exception('Unexpected memory unit "%s"' % units)
return int(mem)
+
def parse_duration_string_ms(duration):
"""Parses a duration string of the form 1h2h3m4s5.6ms4.5us7.8ns into milliseconds."""
pattern = r'(?P<value>[0-9]+\.?[0-9]*?)(?P<units>\D+)'
@@ -83,3 +97,31 @@ def parse_duration_string_ms(duration):
times[parsed['units']] = float(parsed['value'])
return (times['h'] * 60 * 60 + times['m'] * 60 + times['s']) * 1000 + times['ms']
+
+
+def match_memory_estimate(explain_lines):
+ """
+ Given a list of strings from EXPLAIN output, find the estimated memory needed. This is
+ used as a binary search start point.
+
+ Params:
+ explain_lines: list of str
+
+ Returns:
+ 2-tuple str of memory limit in decimal string and units (one of 'P', 'T', 'G', 'M',
+ 'K', '' bytes)
+
+ Raises:
+ Exception if no match found
+ """
+ # IMPALA-6441: This method is a public, first class method so it can be importable and
+ # tested with actual EXPLAIN output to make sure we always find the start point.
+ mem_limit, units = None, None
+ for line in explain_lines:
+ regex_result = MEM_ESTIMATE_PATTERN.search(line)
+ if regex_result:
+ mem_limit, units = regex_result.groups()
+ break
+ if None in (mem_limit, units):
+ raise Exception('could not parse explain string:\n' + '\n'.join(explain_lines))
+ return mem_limit, units
http://git-wip-us.apache.org/repos/asf/impala/blob/971cf179/tests/util/ssh_util.py
----------------------------------------------------------------------
diff --git a/tests/util/ssh_util.py b/tests/util/ssh_util.py
index 8a56d96..a26f1bc 100644
--- a/tests/util/ssh_util.py
+++ b/tests/util/ssh_util.py
@@ -18,13 +18,19 @@
import atexit
import logging
import os
-import paramiko
+try:
+ import paramiko
+except ImportError as e:
+ raise Exception(
+ "Please run impala-pip install -r $IMPALA_HOME/infra/python/deps/extended-test-"
+ "requirements.txt:\n{0}".format(str(e)))
import textwrap
import time
LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
-from tests.common.errors import Timeout
+from tests.common.errors import Timeout # noqa:E402
+
class SshClient(paramiko.SSHClient):
"""A paramiko SSH client modified to:
http://git-wip-us.apache.org/repos/asf/impala/blob/971cf179/tests/util/test_file_parser.py
----------------------------------------------------------------------
diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py
index 9fa8d70..8d3c58a 100644
--- a/tests/util/test_file_parser.py
+++ b/tests/util/test_file_parser.py
@@ -20,9 +20,11 @@
import codecs
import collections
import logging
+import os
+import os.path
import re
+
from collections import defaultdict
-from os.path import isfile
from textwrap import dedent
LOG = logging.getLogger('impala_test_suite')
@@ -102,7 +104,7 @@ def parse_table_constraints(constraints_file):
schema_include = defaultdict(list)
schema_exclude = defaultdict(list)
schema_only = defaultdict(list)
- if not isfile(constraints_file):
+ if not os.path.isfile(constraints_file):
LOG.info('No schema constraints file file found')
else:
with open(constraints_file, 'rb') as constraints_file:
@@ -313,3 +315,31 @@ def write_test_file(test_file_name, test_file_sections, encoding=None):
test_file_text.append(section_value)
test_file_text.append(SECTION_DELIMITER)
test_file.write(('\n').join(test_file_text))
+
+
+def load_tpc_queries(workload):
+ """Returns a list of TPC queries. 'workload' should either be 'tpch' or 'tpcds'."""
+ LOG.info("Loading %s queries", workload)
+ queries = list()
+ query_dir = os.path.join(
+ os.environ['IMPALA_HOME'], "testdata", "workloads", workload, "queries")
+ # IMPALA-6715 and others from the past: This pattern enforces the queries we actually
+ # find. Both workload directories contain other queries that are not part of the TPC
+ # spec.
+ file_name_pattern = re.compile(r"^{0}-(q.*).test$".format(workload))
+ for query_file in os.listdir(query_dir):
+ match = file_name_pattern.search(query_file)
+ if not match:
+ continue
+ file_path = os.path.join(query_dir, query_file)
+ test_cases = parse_query_test_file(file_path)
+ file_queries = list()
+ for test_case in test_cases:
+ query_sql = remove_comments(test_case["QUERY"])
+ file_queries.append(query_sql)
+ if len(file_queries) != 1:
+ raise Exception(
+ "Expected exactly 1 query to be in file %s but got %s"
+ % (file_path, len(file_queries)))
+ queries.append(file_queries[0])
+ return queries