You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ph...@apache.org on 2018/02/02 18:51:35 UTC

[11/19] impala git commit: IMPALA-6455: unique tmpdirs for test_partition_metadata_compatibility

IMPALA-6455: unique tmpdirs for test_partition_metadata_compatibility

Concurrent hive statements running in local mode can race to modify
the contents of temporary directories - see IMPALA-6108. This applies
the workaround for IMPALA-6108 to the run_stmt_in_hive() utility
function, which is used by test_partition_metadata_compatibility.

Testing:
I wasn't able to reproduce the race locally, but I ran the test and
confirmed that it still passed. I also confirmed that the temporary
directories /tmp/impala-tests-* were created using "ls" while the
tests were running.

Change-Id: Ibabff859d19ddbb2a3048ecc02897a611d8ddb20
Reviewed-on: http://gerrit.cloudera.org:8080/9165
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5aab4d4a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5aab4d4a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5aab4d4a

Branch: refs/heads/2.x
Commit: 5aab4d4ad69e91e065a07459a01b7d370e799175
Parents: ca01c9b
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Jan 31 08:18:52 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Feb 2 01:10:15 2018 +0000

----------------------------------------------------------------------
 bin/load-data.py                  |  1 +
 tests/common/impala_test_suite.py | 47 ++++++++++++++++++++++++----------
 2 files changed, 35 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/5aab4d4a/bin/load-data.py
----------------------------------------------------------------------
diff --git a/bin/load-data.py b/bin/load-data.py
index 273fe4d..ed51487 100755
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -121,6 +121,7 @@ HIVE_ARGS = '-n %s -u "jdbc:hive2://%s/default;%s" --verbose=true'\
 # running MR jobs locally), we move the temporary directory into a unique
 # directory via configuration. This block can be removed when
 # https://issues.apache.org/jira/browse/MAPREDUCE-6441 is resolved.
+# A similar workaround is used in tests/common/impala_test_suite.py.
 if options.hive_hs2_hostport.startswith("localhost:"):
   HIVE_ARGS += ' --hiveconf "mapreduce.cluster.local.dir=%s"' % (tempfile.mkdtemp(
     prefix="impala-data-load-"))

http://git-wip-us.apache.org/repos/asf/impala/blob/5aab4d4a/tests/common/impala_test_suite.py
----------------------------------------------------------------------
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index 86bbf71..bdd524f 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -24,7 +24,9 @@ import pprint
 import pwd
 import pytest
 import re
+import shutil
 import subprocess
+import tempfile
 import time
 from functools import wraps
 from getpass import getuser
@@ -651,19 +653,38 @@ class ImpalaTestSuite(BaseTestSuite):
     Run a statement in Hive, returning stdout if successful and throwing
     RuntimeError(stderr) if not.
     """
-    call = subprocess.Popen(
-        ['beeline',
-         '--outputformat=csv2',
-         '-u', 'jdbc:hive2://' + pytest.config.option.hive_server2,
-         '-n', username,
-         '-e', stmt],
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE)
-    (stdout, stderr) = call.communicate()
-    call.wait()
-    if call.returncode != 0:
-      raise RuntimeError(stderr)
-    return stdout
+    # When HiveServer2 is configured to use "local" mode (i.e., MR jobs are run
+    # in-process rather than on YARN), Hadoop's LocalDistributedCacheManager has a
+    # race, wherein it tires to localize jars into
+    # /tmp/hadoop-$USER/mapred/local/<millis>. Two simultaneous Hive queries
+    # against HS2 can conflict here. Weirdly LocalJobRunner handles a similar issue
+    # (with the staging directory) by appending a random number. To overcome this,
+    # in the case that HS2 is on the local machine (which we conflate with also
+    # running MR jobs locally), we move the temporary directory into a unique
+    # directory via configuration. This workaround can be removed when
+    # https://issues.apache.org/jira/browse/MAPREDUCE-6441 is resolved.
+    # A similar workaround is used in bin/load-data.py.
+    tmpdir = None
+    beeline_opts = []
+    if pytest.config.option.hive_server2.startswith("localhost:"):
+      tmpdir = tempfile.mkdtemp(prefix="impala-tests-")
+      beeline_opts += ['--hiveconf', 'mapreduce.cluster.local.dir={0}'.format(tmpdir)]
+    try:
+      call = subprocess.Popen(
+          ['beeline',
+           '--outputformat=csv2',
+           '-u', 'jdbc:hive2://' + pytest.config.option.hive_server2,
+           '-n', username,
+           '-e', stmt] + beeline_opts,
+          stdout=subprocess.PIPE,
+          stderr=subprocess.PIPE)
+      (stdout, stderr) = call.communicate()
+      call.wait()
+      if call.returncode != 0:
+        raise RuntimeError(stderr)
+      return stdout
+    finally:
+      if tmpdir is not None: shutil.rmtree(tmpdir)
 
   def hive_partition_names(self, table_name):
     """Find the names of the partitions of a table, as Hive sees them.