You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ph...@apache.org on 2018/02/02 18:51:35 UTC
[11/19] impala git commit: IMPALA-6455: unique tmpdirs for
test_partition_metadata_compatibility
IMPALA-6455: unique tmpdirs for test_partition_metadata_compatibility
Concurrent hive statements running in local mode can race to modify
the contents of temporary directories - see IMPALA-6108. This applies
the workaround for IMPALA-6108 to the run_stmt_in_hive() utility
function, which is used by test_partition_metadata_compatibility.
Testing:
I wasn't able to reproduce the race locally, but I ran the test and
confirmed that it still passed. I also confirmed that the temporary
directories /tmp/impala-tests-* were created using "ls" while the
tests were running.
Change-Id: Ibabff859d19ddbb2a3048ecc02897a611d8ddb20
Reviewed-on: http://gerrit.cloudera.org:8080/9165
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5aab4d4a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5aab4d4a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5aab4d4a
Branch: refs/heads/2.x
Commit: 5aab4d4ad69e91e065a07459a01b7d370e799175
Parents: ca01c9b
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Jan 31 08:18:52 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Feb 2 01:10:15 2018 +0000
----------------------------------------------------------------------
bin/load-data.py | 1 +
tests/common/impala_test_suite.py | 47 ++++++++++++++++++++++++----------
2 files changed, 35 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/5aab4d4a/bin/load-data.py
----------------------------------------------------------------------
diff --git a/bin/load-data.py b/bin/load-data.py
index 273fe4d..ed51487 100755
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -121,6 +121,7 @@ HIVE_ARGS = '-n %s -u "jdbc:hive2://%s/default;%s" --verbose=true'\
# running MR jobs locally), we move the temporary directory into a unique
# directory via configuration. This block can be removed when
# https://issues.apache.org/jira/browse/MAPREDUCE-6441 is resolved.
+# A similar workaround is used in tests/common/impala_test_suite.py.
if options.hive_hs2_hostport.startswith("localhost:"):
HIVE_ARGS += ' --hiveconf "mapreduce.cluster.local.dir=%s"' % (tempfile.mkdtemp(
prefix="impala-data-load-"))
http://git-wip-us.apache.org/repos/asf/impala/blob/5aab4d4a/tests/common/impala_test_suite.py
----------------------------------------------------------------------
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index 86bbf71..bdd524f 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -24,7 +24,9 @@ import pprint
import pwd
import pytest
import re
+import shutil
import subprocess
+import tempfile
import time
from functools import wraps
from getpass import getuser
@@ -651,19 +653,38 @@ class ImpalaTestSuite(BaseTestSuite):
Run a statement in Hive, returning stdout if successful and throwing
RuntimeError(stderr) if not.
"""
- call = subprocess.Popen(
- ['beeline',
- '--outputformat=csv2',
- '-u', 'jdbc:hive2://' + pytest.config.option.hive_server2,
- '-n', username,
- '-e', stmt],
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- (stdout, stderr) = call.communicate()
- call.wait()
- if call.returncode != 0:
- raise RuntimeError(stderr)
- return stdout
+ # When HiveServer2 is configured to use "local" mode (i.e., MR jobs are run
+ # in-process rather than on YARN), Hadoop's LocalDistributedCacheManager has a
+ # race, wherein it tires to localize jars into
+ # /tmp/hadoop-$USER/mapred/local/<millis>. Two simultaneous Hive queries
+ # against HS2 can conflict here. Weirdly LocalJobRunner handles a similar issue
+ # (with the staging directory) by appending a random number. To overcome this,
+ # in the case that HS2 is on the local machine (which we conflate with also
+ # running MR jobs locally), we move the temporary directory into a unique
+ # directory via configuration. This workaround can be removed when
+ # https://issues.apache.org/jira/browse/MAPREDUCE-6441 is resolved.
+ # A similar workaround is used in bin/load-data.py.
+ tmpdir = None
+ beeline_opts = []
+ if pytest.config.option.hive_server2.startswith("localhost:"):
+ tmpdir = tempfile.mkdtemp(prefix="impala-tests-")
+ beeline_opts += ['--hiveconf', 'mapreduce.cluster.local.dir={0}'.format(tmpdir)]
+ try:
+ call = subprocess.Popen(
+ ['beeline',
+ '--outputformat=csv2',
+ '-u', 'jdbc:hive2://' + pytest.config.option.hive_server2,
+ '-n', username,
+ '-e', stmt] + beeline_opts,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ (stdout, stderr) = call.communicate()
+ call.wait()
+ if call.returncode != 0:
+ raise RuntimeError(stderr)
+ return stdout
+ finally:
+ if tmpdir is not None: shutil.rmtree(tmpdir)
def hive_partition_names(self, table_name):
"""Find the names of the partitions of a table, as Hive sees them.