You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ad...@apache.org on 2018/05/04 22:23:17 UTC
[ambari] branch trunk updated: AMBARI-23766. Corrupt mapreduce/tez
tar.gz may be uploaded to HDFS if parallel execution is enabled (#1181)
This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/ambari.git
The following commit(s) were added to refs/heads/trunk by this push:
new 2365c1f AMBARI-23766. Corrupt mapreduce/tez tar.gz may be uploaded to HDFS if parallel execution is enabled (#1181)
2365c1f is described below
commit 2365c1f290613fb9ccb76ac4b9dd9bcbdc0f8d43
Author: Doroszlai, Attila <64...@users.noreply.github.com>
AuthorDate: Sat May 5 00:23:13 2018 +0200
AMBARI-23766. Corrupt mapreduce/tez tar.gz may be uploaded to HDFS if parallel execution is enabled (#1181)
---
.../python/resource_management/TestTarArchive.py | 66 ++++++++++++++++++++++
.../libraries/functions/copy_tarball.py | 17 +-----
.../libraries/functions/tar_archive.py | 43 +++++++++-----
3 files changed, 97 insertions(+), 29 deletions(-)
diff --git a/ambari-agent/src/test/python/resource_management/TestTarArchive.py b/ambari-agent/src/test/python/resource_management/TestTarArchive.py
new file mode 100644
index 0000000..66d8f2e
--- /dev/null
+++ b/ambari-agent/src/test/python/resource_management/TestTarArchive.py
@@ -0,0 +1,66 @@
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+from mock.mock import patch, MagicMock
+from unittest import TestCase
+
+from ambari_commons.os_check import OSCheck
+from only_for_platform import os_distro_value
+from resource_management.core.environment import Environment
+from resource_management.libraries.functions import tar_archive
+
+@patch.object(OSCheck, "os_distribution", new = MagicMock(return_value = os_distro_value))
+class TestTarArchive(TestCase):
+
+ @patch("resource_management.core.providers.system.ExecuteProvider")
+ def test_archive_dir(self, execute_mock):
+ archive = '/home/etc.tar.gz'
+ directory = '/etc'
+
+ with Environment():
+ tar_archive.archive_dir(archive, directory)
+
+ self.assertEqual(execute_mock.call_count, 1)
+ self.assertEqual(execute_mock.call_args[0][0].command, ('tar', '-zcf', archive, '-C', directory, '.'))
+
+
+ @patch("resource_management.core.providers.system.ExecuteProvider")
+ def test_archive_directory_dereference(self, execute_mock):
+ archive = '/home/etc.tar.gz'
+ directory = '/etc'
+
+ with Environment():
+ tar_archive.archive_directory_dereference(archive, directory)
+
+ self.assertEqual(execute_mock.call_count, 1)
+ self.assertEqual(execute_mock.call_args[0][0].command, ('tar', '-zchf', archive, '-C', directory, '.'))
+
+
+ @patch("resource_management.core.providers.system.ExecuteProvider")
+ def test_archive_dir_via_temp_file(self, execute_mock):
+ archive = '/home/etc.tar.gz'
+ directory = '/etc'
+
+ with Environment():
+ tar_archive.archive_dir_via_temp_file(archive, directory)
+
+ self.assertEqual(execute_mock.call_count, 2)
+ self.assertEqual(execute_mock.call_args_list[0][0][0].command[:2], ('tar', '-zchf'))
+ self.assertEqual(execute_mock.call_args_list[0][0][0].command[3:], ('-C', directory, '.'))
+ temp_file = execute_mock.call_args_list[0][0][0].command[2]
+ self.assertEqual(execute_mock.call_args_list[1][0][0].command, ('mv', temp_file, archive))
diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
index 63bd065..d480f72 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
@@ -23,11 +23,8 @@ __all__ = ["copy_to_hdfs", "get_sysprep_skip_copy_tarballs_hdfs"]
import os
import tempfile
import re
-import tarfile
-from contextlib import closing
from resource_management.libraries.script.script import Script
-from resource_management.libraries.resources.hdfs_resource import HdfsResource
from resource_management.libraries.functions import component_version
from resource_management.libraries.functions import lzo_utils
from resource_management.libraries.functions.default import default
@@ -126,14 +123,11 @@ def _prepare_tez_tarball():
tez_tarball_with_native_lib = os.path.join(tez_native_tarball_staging_dir, "tez-native.tar.gz")
Logger.info("Creating a new Tez tarball at {0}".format(tez_tarball_with_native_lib))
-
- # tar up Tez, making sure to specify nothing for the arcname so that it does not include an absolute path
- with closing(tarfile.open(tez_tarball_with_native_lib, "w:gz")) as new_tez_tarball:
- new_tez_tarball.add(tez_temp_dir, arcname=os.path.sep)
+ tar_archive.archive_dir_via_temp_file(tez_tarball_with_native_lib, tez_temp_dir)
# ensure that the tarball can be read and uploaded
sudo.chmod(tez_tarball_with_native_lib, 0744)
-
+
# cleanup
sudo.rmtree(mapreduce_temp_dir)
sudo.rmtree(tez_temp_dir)
@@ -193,10 +187,7 @@ def _prepare_mapreduce_tarball():
mapreduce_tarball_with_native_lib = os.path.join(mapreduce_native_tarball_staging_dir, "mapreduce-native.tar.gz")
Logger.info("Creating a new mapreduce tarball at {0}".format(mapreduce_tarball_with_native_lib))
-
- # tar up mapreduce, making sure to specify nothing for the arcname so that it does not include an absolute path
- with closing(tarfile.open(mapreduce_tarball_with_native_lib, "w:gz")) as new_tarball:
- new_tarball.add(mapreduce_temp_dir, arcname = os.path.sep)
+ tar_archive.archive_dir_via_temp_file(mapreduce_tarball_with_native_lib, mapreduce_temp_dir)
# ensure that the tarball can be read and uploaded
sudo.chmod(mapreduce_tarball_with_native_lib, 0744)
@@ -290,7 +281,6 @@ SERVICE_TO_CONFIG_MAP = {
}
def get_sysprep_skip_copy_tarballs_hdfs():
- import params
host_sys_prepped = default("/ambariLevelParams/host_sys_prepped", False)
# By default, copy the tarballs to HDFS. If the cluster is sysprepped, then set based on the config.
@@ -396,7 +386,6 @@ def _get_single_version_from_stack_select():
:return: Returns a version string if successful, and None otherwise.
"""
# Ubuntu returns: "stdin: is not a tty", as subprocess32 output, so must use a temporary file to store the output.
- tmpfile = tempfile.NamedTemporaryFile()
tmp_dir = Script.get_tmp_dir()
tmp_file = os.path.join(tmp_dir, "copy_tarball_out.txt")
stack_version = None
diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py b/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
index 3be1ab6..3313288 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
@@ -20,32 +20,45 @@ limitations under the License.
import os
import tarfile
-import zipfile
+import tempfile
from contextlib import closing
-from resource_management.core.resources.system import Execute
-def archive_dir(output_filename, input_dir):
- Execute(('tar', '-zcf', output_filename, '-C', input_dir, '.'),
- sudo = True,
- tries = 3,
- try_sleep = 1,
- )
+from ambari_commons import os_utils
+from resource_management.core.resources.system import Execute
-def archive_directory_dereference(archive, directory):
+def archive_dir(output_filename, input_dir, follow_links=False):
"""
- Creates an archive of the specified directory. This will ensure that
- symlinks are not included, but instead are followed for recursive inclusion.
- :param archive: the name of the archive to create, including path
- :param directory: the directory to include
+ Creates an archive of the specified directory.
+ :param output_filename: the name of the archive to create, including path
+ :param input_dir: the directory to include
+ :param follow_links: if True, symlinks are followed and the files/directories they point to will be included in the archive
:return: None
"""
- Execute(('tar', '-zchf', archive, '-C', directory, '.'),
+ options = '-zchf' if follow_links else '-zcf'
+
+ Execute(('tar', options, output_filename, '-C', input_dir, '.'),
sudo = True,
tries = 3,
try_sleep = 1,
)
+
+def archive_directory_dereference(archive, directory):
+ archive_dir(archive, directory, follow_links=True)
+
+
+def archive_dir_via_temp_file(archive, directory):
+ _, temp_output = tempfile.mkstemp()
+ try:
+ archive_directory_dereference(temp_output, directory)
+ except:
+ os_utils.remove_file(temp_output)
+ raise
+ else:
+ Execute(("mv", temp_output, archive))
+
+
def untar_archive(archive, directory, silent=True):
"""
Extracts a tarball using the system's tar utility. This is more
@@ -78,4 +91,4 @@ def mode(archive):
elif archive.endswith('.tar.bz2') or archive.endswith('.tbz'):
return 'r:bz2'
else:
- raise ValueError("Could not extract `%s` as no appropriate extractor is found" % archive)
\ No newline at end of file
+ raise ValueError("Could not extract `%s` as no appropriate extractor is found" % archive)
--
To stop receiving notification emails like this one, please contact
adoroszlai@apache.org.