You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ad...@apache.org on 2018/05/05 16:35:07 UTC

[ambari] branch branch-2.6 updated: AMBARI-23766. Corrupt mapreduce/tez tar.gz may be uploaded to HDFS if parallel execution is enabled (#1188)

This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/ambari.git


The following commit(s) were added to refs/heads/branch-2.6 by this push:
     new 4d99242  AMBARI-23766. Corrupt mapreduce/tez tar.gz may be uploaded to HDFS if parallel execution is enabled (#1188)
4d99242 is described below

commit 4d9924261b1a70c0891c48b335a01532bd066a39
Author: Doroszlai, Attila <64...@users.noreply.github.com>
AuthorDate: Sat May 5 18:35:03 2018 +0200

    AMBARI-23766. Corrupt mapreduce/tez tar.gz may be uploaded to HDFS if parallel execution is enabled (#1188)
---
 .../python/resource_management/TestTarArchive.py   | 66 ++++++++++++++++++++++
 .../libraries/functions/copy_tarball.py            | 16 +-----
 .../libraries/functions/tar_archive.py             | 41 +++++++++-----
 3 files changed, 96 insertions(+), 27 deletions(-)

diff --git a/ambari-agent/src/test/python/resource_management/TestTarArchive.py b/ambari-agent/src/test/python/resource_management/TestTarArchive.py
new file mode 100644
index 0000000..66d8f2e
--- /dev/null
+++ b/ambari-agent/src/test/python/resource_management/TestTarArchive.py
@@ -0,0 +1,66 @@
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+from mock.mock import patch, MagicMock
+from unittest import TestCase
+
+from ambari_commons.os_check import OSCheck
+from only_for_platform import os_distro_value
+from resource_management.core.environment import Environment
+from resource_management.libraries.functions import tar_archive
+
+@patch.object(OSCheck, "os_distribution", new = MagicMock(return_value = os_distro_value))
+class TestTarArchive(TestCase):
+
+  @patch("resource_management.core.providers.system.ExecuteProvider")
+  def test_archive_dir(self, execute_mock):
+    archive = '/home/etc.tar.gz'
+    directory = '/etc'
+
+    with Environment():
+      tar_archive.archive_dir(archive, directory)
+
+    self.assertEqual(execute_mock.call_count, 1)
+    self.assertEqual(execute_mock.call_args[0][0].command, ('tar', '-zcf', archive, '-C', directory, '.'))
+
+
+  @patch("resource_management.core.providers.system.ExecuteProvider")
+  def test_archive_directory_dereference(self, execute_mock):
+    archive = '/home/etc.tar.gz'
+    directory = '/etc'
+
+    with Environment():
+      tar_archive.archive_directory_dereference(archive, directory)
+
+    self.assertEqual(execute_mock.call_count, 1)
+    self.assertEqual(execute_mock.call_args[0][0].command, ('tar', '-zchf', archive, '-C', directory, '.'))
+
+
+  @patch("resource_management.core.providers.system.ExecuteProvider")
+  def test_archive_dir_via_temp_file(self, execute_mock):
+    archive = '/home/etc.tar.gz'
+    directory = '/etc'
+
+    with Environment():
+      tar_archive.archive_dir_via_temp_file(archive, directory)
+
+    self.assertEqual(execute_mock.call_count, 2)
+    self.assertEqual(execute_mock.call_args_list[0][0][0].command[:2], ('tar', '-zchf'))
+    self.assertEqual(execute_mock.call_args_list[0][0][0].command[3:], ('-C', directory, '.'))
+    temp_file = execute_mock.call_args_list[0][0][0].command[2]
+    self.assertEqual(execute_mock.call_args_list[1][0][0].command, ('mv', temp_file, archive))
diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
index b008db2..e33052d 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
@@ -23,11 +23,8 @@ __all__ = ["copy_to_hdfs", "get_sysprep_skip_copy_tarballs_hdfs"]
 import os
 import tempfile
 import re
-import tarfile
-from contextlib import closing
 
 from resource_management.libraries.script.script import Script
-from resource_management.libraries.resources.hdfs_resource import HdfsResource
 from resource_management.libraries.functions import component_version
 from resource_management.libraries.functions import lzo_utils
 from resource_management.libraries.functions.default import default
@@ -126,14 +123,11 @@ def _prepare_tez_tarball():
 
   tez_tarball_with_native_lib = os.path.join(tez_native_tarball_staging_dir, "tez-native.tar.gz")
   Logger.info("Creating a new Tez tarball at {0}".format(tez_tarball_with_native_lib))
-
-  # tar up Tez, making sure to specify nothing for the arcname so that it does not include an absolute path
-  with closing(tarfile.open(tez_tarball_with_native_lib, "w:gz")) as new_tez_tarball:
-    new_tez_tarball.add(tez_temp_dir, arcname=os.path.sep)
+  tar_archive.archive_dir_via_temp_file(tez_tarball_with_native_lib, tez_temp_dir)
 
   # ensure that the tarball can be read and uploaded
   sudo.chmod(tez_tarball_with_native_lib, 0744)
-  
+
   # cleanup
   sudo.rmtree(mapreduce_temp_dir)
   sudo.rmtree(tez_temp_dir)
@@ -193,10 +187,7 @@ def _prepare_mapreduce_tarball():
 
   mapreduce_tarball_with_native_lib = os.path.join(mapreduce_native_tarball_staging_dir, "mapreduce-native.tar.gz")
   Logger.info("Creating a new mapreduce tarball at {0}".format(mapreduce_tarball_with_native_lib))
-
-  # tar up mapreduce, making sure to specify nothing for the arcname so that it does not include an absolute path
-  with closing(tarfile.open(mapreduce_tarball_with_native_lib, "w:gz")) as new_tarball:
-    new_tarball.add(mapreduce_temp_dir, arcname = os.path.sep)
+  tar_archive.archive_dir_via_temp_file(mapreduce_tarball_with_native_lib, mapreduce_temp_dir)
 
   # ensure that the tarball can be read and uploaded
   sudo.chmod(mapreduce_tarball_with_native_lib, 0744)
@@ -382,7 +373,6 @@ def _get_single_version_from_stack_select():
   :return: Returns a version string if successful, and None otherwise.
   """
   # Ubuntu returns: "stdin: is not a tty", as subprocess32 output, so must use a temporary file to store the output.
-  tmpfile = tempfile.NamedTemporaryFile()
   tmp_dir = Script.get_tmp_dir()
   tmp_file = os.path.join(tmp_dir, "copy_tarball_out.txt")
   stack_version = None
diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py b/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
index 5e369cf..e6e5222 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/tar_archive.py
@@ -20,32 +20,45 @@ limitations under the License.
 
 import os
 import tarfile
-import zipfile
+import tempfile
 from contextlib import closing
-from resource_management.core.resources.system import Execute
 
-def archive_dir(output_filename, input_dir):
-  Execute(('tar', '-zcf', output_filename, '-C', input_dir, '.'),
-    sudo = True,
-    tries = 3,
-    try_sleep = 1,
-  )
+from ambari_commons import os_utils
+from resource_management.core.resources.system import Execute
 
-def archive_directory_dereference(archive, directory):
+def archive_dir(output_filename, input_dir, follow_links=False):
   """
-  Creates an archive of the specified directory. This will ensure that
-  symlinks are not included, but instead are followed for recursive inclusion.
-  :param archive:   the name of the archive to create, including path
-  :param directory:   the directory to include
+  Creates an archive of the specified directory.
+  :param output_filename: the name of the archive to create, including path
+  :param input_dir: the directory to include
+  :param follow_links: if True, symlinks are followed and the files/directories they point to will be included in the archive
   :return:  None
   """
 
-  Execute(('tar', '-zchf', archive, '-C', directory, '.'),
+  options = '-zchf' if follow_links else '-zcf'
+
+  Execute(('tar', options, output_filename, '-C', input_dir, '.'),
     sudo = True,
     tries = 3,
     try_sleep = 1,
   )
 
+
+def archive_directory_dereference(archive, directory):
+  archive_dir(archive, directory, follow_links=True)
+
+
+def archive_dir_via_temp_file(archive, directory):
+  _, temp_output = tempfile.mkstemp()
+  try:
+    archive_directory_dereference(temp_output, directory)
+  except:
+    os_utils.remove_file(temp_output)
+    raise
+  else:
+    Execute(("mv", temp_output, archive))
+
+
 def untar_archive(archive, directory, silent=True):
   """
   :param directory:   can be a symlink and is followed

-- 
To stop receiving notification emails like this one, please contact
adoroszlai@apache.org.