You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ad...@apache.org on 2018/05/04 22:23:17 UTC

[ambari] branch trunk updated: AMBARI-23766. Corrupt mapreduce/tez tar.gz may be uploaded to HDFS if parallel execution is enabled (#1181)

This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/ambari.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 2365c1f  AMBARI-23766. Corrupt mapreduce/tez tar.gz may be uploaded to HDFS if parallel execution is enabled (#1181)
2365c1f is described below

commit 2365c1f290613fb9ccb76ac4b9dd9bcbdc0f8d43
Author: Doroszlai, Attila <64...@users.noreply.github.com>
AuthorDate: Sat May 5 00:23:13 2018 +0200

    AMBARI-23766. Corrupt mapreduce/tez tar.gz may be uploaded to HDFS if parallel execution is enabled (#1181)
---
 .../python/resource_management/TestTarArchive.py   | 66 ++++++++++++++++++++++
 .../libraries/functions/copy_tarball.py            | 17 +-----
 .../libraries/functions/tar_archive.py             | 43 +++++++++-----
 3 files changed, 97 insertions(+), 29 deletions(-)

diff --git a/ambari-agent/src/test/python/resource_management/TestTarArchive.py b/ambari-agent/src/test/python/resource_management/TestTarArchive.py
new file mode 100644
index 0000000..66d8f2e
--- /dev/null
+++ b/ambari-agent/src/test/python/resource_management/TestTarArchive.py
@@ -0,0 +1,66 @@
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+from mock.mock import patch, MagicMock
+from unittest import TestCase
+
+from ambari_commons.os_check import OSCheck
+from only_for_platform import os_distro_value
+from resource_management.core.environment import Environment
+from resource_management.libraries.functions import tar_archive
+
+@patch.object(OSCheck, "os_distribution", new = MagicMock(return_value = os_distro_value))
+class TestTarArchive(TestCase):
+
+  @patch("resource_management.core.providers.system.ExecuteProvider")
+  def test_archive_dir(self, execute_mock):
+    archive = '/home/etc.tar.gz'
+    directory = '/etc'
+
+    with Environment():
+      tar_archive.archive_dir(archive, directory)
+
+    self.assertEqual(execute_mock.call_count, 1)
+    self.assertEqual(execute_mock.call_args[0][0].command, ('tar', '-zcf', archive, '-C', directory, '.'))
+
+
+  @patch("resource_management.core.providers.system.ExecuteProvider")
+  def test_archive_directory_dereference(self, execute_mock):
+    archive = '/home/etc.tar.gz'
+    directory = '/etc'
+
+    with Environment():
+      tar_archive.archive_directory_dereference(archive, directory)
+
+    self.assertEqual(execute_mock.call_count, 1)
+    self.assertEqual(execute_mock.call_args[0][0].command, ('tar', '-zchf', archive, '-C', directory, '.'))
+
+
+  @patch("resource_management.core.providers.system.ExecuteProvider")
+  def test_archive_dir_via_temp_file(self, execute_mock):
+    archive = '/home/etc.tar.gz'
+    directory = '/etc'
+
+    with Environment():
+      tar_archive.archive_dir_via_temp_file(archive, directory)
+
+    self.assertEqual(execute_mock.call_count, 2)
+    self.assertEqual(execute_mock.call_args_list[0][0][0].command[:2], ('tar', '-zchf'))
+    self.assertEqual(execute_mock.call_args_list[0][0][0].command[3:], ('-C', directory, '.'))
+    temp_file = execute_mock.call_args_list[0][0][0].command[2]
+    self.assertEqual(execute_mock.call_args_list[1][0][0].command, ('mv', temp_file, archive))
diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
index 63bd065..d480f72 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
@@ -23,11 +23,8 @@ __all__ = ["copy_to_hdfs", "get_sysprep_skip_copy_tarballs_hdfs"]
 import os
 import tempfile
 import re
-import tarfile
-from contextlib import closing
 
 from resource_management.libraries.script.script import Script
-from resource_management.libraries.resources.hdfs_resource import HdfsResource
 from resource_management.libraries.functions import component_version
 from resource_management.libraries.functions import lzo_utils
 from resource_management.libraries.functions.default import default
@@ -126,14 +123,11 @@ def _prepare_tez_tarball():
 
   tez_tarball_with_native_lib = os.path.join(tez_native_tarball_staging_dir, "tez-native.tar.gz")
   Logger.info("Creating a new Tez tarball at {0}".format(tez_tarball_with_native_lib))
-
-  # tar up Tez, making sure to specify nothing for the arcname so that it does not include an absolute path
-  with closing(tarfile.open(tez_tarball_with_native_lib, "w:gz")) as new_tez_tarball:
-    new_tez_tarball.add(tez_temp_dir, arcname=os.path.sep)
+  tar_archive.archive_dir_via_temp_file(tez_tarball_with_native_lib, tez_temp_dir)
 
   # ensure that the tarball can be read and uploaded
   sudo.chmod(tez_tarball_with_native_lib, 0744)
-  
+
   # cleanup
   sudo.rmtree(mapreduce_temp_dir)
   sudo.rmtree(tez_temp_dir)
@@ -193,10 +187,7 @@ def _prepare_mapreduce_tarball():
 
   mapreduce_tarball_with_native_lib = os.path.join(mapreduce_native_tarball_staging_dir, "mapreduce-native.tar.gz")
   Logger.info("Creating a new mapreduce tarball at {0}".format(mapreduce_tarball_with_native_lib))
-
-  # tar up mapreduce, making sure to specify nothing for the arcname so that it does not include an absolute path
-  with closing(tarfile.open(mapreduce_tarball_with_native_lib, "w:gz")) as new_tarball:
-    new_tarball.add(mapreduce_temp_dir, arcname = os.path.sep)
+  tar_archive.archive_dir_via_temp_file(mapreduce_tarball_with_native_lib, mapreduce_temp_dir)
 
   # ensure that the tarball can be read and uploaded
   sudo.chmod(mapreduce_tarball_with_native_lib, 0744)
@@ -290,7 +281,6 @@ SERVICE_TO_CONFIG_MAP = {
 }
 
 def get_sysprep_skip_copy_tarballs_hdfs():
-  import params
   host_sys_prepped = default("/ambariLevelParams/host_sys_prepped", False)
 
   # By default, copy the tarballs to HDFS. If the cluster is sysprepped, then set based on the config.
@@ -396,7 +386,6 @@ def _get_single_version_from_stack_select():
   :return: Returns a version string if successful, and None otherwise.
   """
   # Ubuntu returns: "stdin: is not a tty", as subprocess32 output, so must use a temporary file to store the output.
-  tmpfile = tempfile.NamedTemporaryFile()
   tmp_dir = Script.get_tmp_dir()
   tmp_file = os.path.join(tmp_dir, "copy_tarball_out.txt")
   stack_version = None
diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py b/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
index 3be1ab6..3313288 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
@@ -20,32 +20,45 @@ limitations under the License.
 
 import os
 import tarfile
-import zipfile
+import tempfile
 from contextlib import closing
-from resource_management.core.resources.system import Execute
 
-def archive_dir(output_filename, input_dir):
-  Execute(('tar', '-zcf', output_filename, '-C', input_dir, '.'),
-    sudo = True,
-    tries = 3,
-    try_sleep = 1,
-  )
+from ambari_commons import os_utils
+from resource_management.core.resources.system import Execute
 
-def archive_directory_dereference(archive, directory):
+def archive_dir(output_filename, input_dir, follow_links=False):
   """
-  Creates an archive of the specified directory. This will ensure that
-  symlinks are not included, but instead are followed for recursive inclusion.
-  :param archive:   the name of the archive to create, including path
-  :param directory:   the directory to include
+  Creates an archive of the specified directory.
+  :param output_filename: the name of the archive to create, including path
+  :param input_dir: the directory to include
+  :param follow_links: if True, symlinks are followed and the files/directories they point to will be included in the archive
   :return:  None
   """
 
-  Execute(('tar', '-zchf', archive, '-C', directory, '.'),
+  options = '-zchf' if follow_links else '-zcf'
+
+  Execute(('tar', options, output_filename, '-C', input_dir, '.'),
     sudo = True,
     tries = 3,
     try_sleep = 1,
   )
 
+
+def archive_directory_dereference(archive, directory):
+  archive_dir(archive, directory, follow_links=True)
+
+
+def archive_dir_via_temp_file(archive, directory):
+  _, temp_output = tempfile.mkstemp()
+  try:
+    archive_directory_dereference(temp_output, directory)
+  except:
+    os_utils.remove_file(temp_output)
+    raise
+  else:
+    Execute(("mv", temp_output, archive))
+
+
 def untar_archive(archive, directory, silent=True):
   """
   Extracts a tarball using the system's tar utility. This is more
@@ -78,4 +91,4 @@ def mode(archive):
   elif archive.endswith('.tar.bz2') or archive.endswith('.tbz'):
     return 'r:bz2'
   else:
-    raise ValueError("Could not extract `%s` as no appropriate extractor is found" % archive)
\ No newline at end of file
+    raise ValueError("Could not extract `%s` as no appropriate extractor is found" % archive)

-- 
To stop receiving notification emails like this one, please contact
adoroszlai@apache.org.