You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2017/11/28 21:20:31 UTC

[12/24] ambari git commit: AMBARI-22486 - Conditionally Rebuild MapReduce and Tez Tarballs with LZO if Enabled (jonathanhurley)

AMBARI-22486 - Conditionally Rebuild MapReduce and Tez Tarballs with LZO if Enabled (jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/58c7f784
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/58c7f784
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/58c7f784

Branch: refs/heads/branch-2.6
Commit: 58c7f784abe9b6f05a1358c14a8f94afc6725f79
Parents: 519f527
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Mon Nov 20 14:23:20 2017 -0500
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Tue Nov 21 20:32:13 2017 -0500

----------------------------------------------------------------------
 .../libraries/functions/copy_tarball.py         | 99 +++++++++++++++++++-
 .../TEZ/0.4.0.2.1/package/scripts/tez.py        |  5 +-
 .../configuration-mapred/mapred-site.xml        |  2 +-
 .../2.2/services/TEZ/configuration/tez-site.xml |  4 +-
 .../YARN/configuration-mapred/mapred-site.xml   |  2 +-
 .../stacks/HDP/2.6/upgrades/config-upgrade.xml  |  6 +-
 6 files changed, 106 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/58c7f784/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
----------------------------------------------------------------------
diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
index b05c97c..bf0701c 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/copy_tarball.py
@@ -28,6 +28,8 @@ from contextlib import closing
 
 from resource_management.libraries.script.script import Script
 from resource_management.libraries.resources.hdfs_resource import HdfsResource
+from resource_management.libraries.functions import component_version
+from resource_management.libraries.functions import lzo_utils
 from resource_management.libraries.functions.default import default
 from resource_management.core import shell
 from resource_management.core import sudo
@@ -45,6 +47,9 @@ STACK_VERSION_PATTERN = "{{ stack_version }}"
 def _prepare_tez_tarball():
   """
   Prepares the Tez tarball by adding the Hadoop native libraries found in the mapreduce tarball.
+  It's very important to use the version of mapreduce which matches tez here.
+  Additionally, this will also copy native LZO to the tez tarball if LZO is enabled and the
+  GPL license has been accepted.
   :return:  the full path of the newly created tez tarball to use
   """
   import tempfile
@@ -79,12 +84,31 @@ def _prepare_tez_tarball():
   if not os.path.exists(tez_lib_dir):
     raise Fail("Unable to seed the Tez tarball with native libraries since the target Tez lib directory {0} does not exist".format(tez_lib_dir))
 
-  # ensure that the tez/lib directory is readable by non-root (which it typically is not)
-  sudo.chmod(tez_lib_dir, 0755)
-
   # copy native libraries from hadoop to tez
   Execute(("cp", "-a", hadoop_lib_native_dir, tez_lib_dir), sudo = True)
 
+  # if enabled, LZO GPL libraries must be copied as well
+  if lzo_utils.should_install_lzo():
+    stack_root = Script.get_stack_root()
+    tez_version = component_version.get_component_repository_version("TEZ")
+    hadoop_lib_native_lzo_dir = os.path.join(stack_root, tez_version, "hadoop", "lib", "native")
+
+    if not sudo.path_isdir(hadoop_lib_native_lzo_dir):
+      Logger.warning("Unable to located native LZO libraries at {0}, falling back to hadoop home".format(hadoop_lib_native_lzo_dir))
+      hadoop_lib_native_lzo_dir = os.path.join(stack_root, "current", "hadoop-client", "lib", "native")
+
+    if not sudo.path_isdir(hadoop_lib_native_lzo_dir):
+      raise Fail("Unable to seed the Tez tarball with native libraries since LZO is enabled but the native LZO libraries could not be found at {0}".format(hadoop_lib_native_lzo_dir))
+
+    Execute(("cp", "-a", hadoop_lib_native_lzo_dir, tez_lib_dir), sudo = True)
+
+
+  # ensure that the tez/lib directory is readable by non-root (which it typically is not)
+  Directory(tez_lib_dir,
+    mode = 0755,
+    cd_access = 'a',
+    recursive_ownership = True)
+
   # create the staging directory so that non-root agents can write to it
   tez_native_tarball_staging_dir = os.path.join(temp_dir, "tez-native-tarball-staging")
   if not os.path.exists(tez_native_tarball_staging_dir):
@@ -111,6 +135,72 @@ def _prepare_tez_tarball():
   return tez_tarball_with_native_lib
 
 
+def _prepare_mapreduce_tarball():
+  """
+  Prepares the mapreduce tarball by including the native LZO libraries if necessary. If LZO is
+  not enabled or has not been opted-in, then this will do nothing and return the original
+  tarball to upload to HDFS.
+  :return:  the full path of the newly created mapreduce tarball to use or the original path
+  if no changes were made
+  """
+  # get the mapreduce tarball to crack open and add LZO libraries to
+  _, mapreduce_source_file, _, _ = get_tarball_paths("mapreduce")
+
+  if not lzo_utils.should_install_lzo():
+    return mapreduce_source_file
+
+  Logger.info("Preparing the mapreduce tarball with native LZO libraries...")
+
+  temp_dir = Script.get_tmp_dir()
+
+  # create the temp staging directories ensuring that non-root agents using tarfile can work with them
+  mapreduce_temp_dir = tempfile.mkdtemp(prefix="mapreduce-tarball-", dir=temp_dir)
+  sudo.chmod(mapreduce_temp_dir, 0777)
+
+  # calculate the source directory for LZO
+  hadoop_lib_native_source_dir = os.path.join(os.path.dirname(mapreduce_source_file), "lib", "native")
+  if not sudo.path_exists(hadoop_lib_native_source_dir):
+    raise Fail("Unable to seed the mapreduce tarball with native LZO libraries since the source Hadoop native lib directory {0} does not exist".format(hadoop_lib_native_source_dir))
+
+  Logger.info("Extracting {0} to {1}".format(mapreduce_source_file, mapreduce_temp_dir))
+  tar_archive.extract_archive(mapreduce_source_file, mapreduce_temp_dir)
+
+  mapreduce_lib_dir = os.path.join(mapreduce_temp_dir, "hadoop", "lib")
+
+  # copy native libraries from source hadoop to target
+  Execute(("cp", "-af", hadoop_lib_native_source_dir, mapreduce_lib_dir), sudo = True)
+
+  # ensure that the hadoop/lib/native directory is readable by non-root (which it typically is not)
+  Directory(mapreduce_lib_dir,
+    mode = 0755,
+    cd_access = 'a',
+    recursive_ownership = True)
+
+  # create the staging directory so that non-root agents can write to it
+  mapreduce_native_tarball_staging_dir = os.path.join(temp_dir, "mapreduce-native-tarball-staging")
+  if not os.path.exists(mapreduce_native_tarball_staging_dir):
+    Directory(mapreduce_native_tarball_staging_dir,
+      mode = 0777,
+      cd_access = 'a',
+      create_parents = True,
+      recursive_ownership = True)
+
+  mapreduce_tarball_with_native_lib = os.path.join(mapreduce_native_tarball_staging_dir, "mapreduce-native.tar.gz")
+  Logger.info("Creating a new mapreduce tarball at {0}".format(mapreduce_tarball_with_native_lib))
+
+  # tar up mapreduce, making sure to specify nothing for the arcname so that it does not include an absolute path
+  with closing(tarfile.open(mapreduce_tarball_with_native_lib, "w:gz")) as new_tarball:
+    new_tarball.add(mapreduce_temp_dir, arcname = os.path.sep)
+
+  # ensure that the tarball can be read and uploaded
+  sudo.chmod(mapreduce_tarball_with_native_lib, 0744)
+
+  # cleanup
+  sudo.rmtree(mapreduce_temp_dir)
+
+  return mapreduce_tarball_with_native_lib
+
+
 # TODO, in the future, each stack can define its own mapping of tarballs
 # inside the stack definition directory in some sort of xml file.
 # PLEASE DO NOT put this in cluster-env since it becomes much harder to change,
@@ -163,7 +253,8 @@ TARBALL_MAP = {
   "mapreduce": {
     "dirs": ("{0}/{1}/hadoop/mapreduce.tar.gz".format(STACK_ROOT_PATTERN, STACK_VERSION_PATTERN),
                 "/{0}/apps/{1}/mapreduce/mapreduce.tar.gz".format(STACK_NAME_PATTERN, STACK_VERSION_PATTERN)),
-    "service": "MAPREDUCE2"
+    "service": "MAPREDUCE2",
+    "prepare_function": _prepare_mapreduce_tarball
   },
 
   "spark": {

http://git-wip-us.apache.org/repos/asf/ambari/blob/58c7f784/ambari-server/src/main/resources/common-services/TEZ/0.4.0.2.1/package/scripts/tez.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/TEZ/0.4.0.2.1/package/scripts/tez.py b/ambari-server/src/main/resources/common-services/TEZ/0.4.0.2.1/package/scripts/tez.py
index dfa6501..35647e4 100644
--- a/ambari-server/src/main/resources/common-services/TEZ/0.4.0.2.1/package/scripts/tez.py
+++ b/ambari-server/src/main/resources/common-services/TEZ/0.4.0.2.1/package/scripts/tez.py
@@ -23,8 +23,8 @@ import os
 
 # Local Imports
 from resource_management.core.resources.system import Directory, File
+from resource_management.libraries.functions import lzo_utils
 from resource_management.libraries.resources.xml_config import XmlConfig
-from resource_management.libraries.functions.format import format
 from resource_management.core.source import InlineTemplate
 from ambari_commons import OSConst
 from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl
@@ -37,6 +37,9 @@ def tez(config_dir):
   """
   import params
 
+  # ensure that matching LZO libraries are installed for Tez
+  lzo_utils.install_lzo_if_needed()
+
   Directory(params.tez_etc_dir, mode=0755)
 
   Directory(config_dir,

http://git-wip-us.apache.org/repos/asf/ambari/blob/58c7f784/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/configuration-mapred/mapred-site.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/configuration-mapred/mapred-site.xml b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/configuration-mapred/mapred-site.xml
index 3438c45..398c9d7 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/configuration-mapred/mapred-site.xml
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/configuration-mapred/mapred-site.xml
@@ -438,7 +438,7 @@
   </property>
   <property>
     <name>mapreduce.admin.user.env</name>
-    <value>LD_LIBRARY_PATH=./mr-framework/hadoop/lib/native:{{hadoop_lib_home}}/native/Linux-{{architecture}}-64</value>
+    <value>LD_LIBRARY_PATH=./mr-framework/hadoop/lib/native:./mr-framework/hadoop/lib/native/Linux-{{architecture}}-64:{{hadoop_lib_home}}/native/Linux-{{architecture}}-64</value>
     <description>
       Additional execution environment entries for map and reduce task processes.
       This is not an additive property. You must preserve the original value if

http://git-wip-us.apache.org/repos/asf/ambari/blob/58c7f784/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml
index 4ffb7a4..5513ab1 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml
@@ -78,7 +78,7 @@
   </property>
   <property>
     <name>tez.am.launch.env</name>
-    <value>LD_LIBRARY_PATH=./tezlib/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64</value>
+    <value>LD_LIBRARY_PATH=./tezlib/lib/native:./tezlib/lib/native/Linux-{{architecture}}-64:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64</value>
     <description>
         Additional execution environment entries for tez. This is not an additive property. You must preserve the original value if
         you want to have access to native libraries.
@@ -124,7 +124,7 @@
   </property>
   <property>
     <name>tez.task.launch.env</name>
-    <value>LD_LIBRARY_PATH=./tezlib/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64</value>
+    <value>LD_LIBRARY_PATH=./tezlib/lib/native:./tezlib/lib/native/Linux-{{architecture}}-64:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64</value>
     <description>
       Additional execution environment entries for tez. This is not an additive property. You must preserve the original value if
       you want to have access to native libraries.

http://git-wip-us.apache.org/repos/asf/ambari/blob/58c7f784/ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-site.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-site.xml
index 084e912..099e388 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-site.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-site.xml
@@ -20,7 +20,7 @@
 <configuration xmlns:xi="http://www.w3.org/2001/XInclude" supports_final="true">
   <property>
     <name>mapreduce.admin.user.env</name>
-    <value>LD_LIBRARY_PATH=./mr-framework/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64</value>
+    <value>LD_LIBRARY_PATH=./mr-framework/hadoop/lib/native:./mr-framework/hadoop/lib/native/Linux-{{architecture}}-64:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64</value>
     <description>
       Additional execution environment entries for map and reduce task processes.
       This is not an additive property. You must preserve the original value if

http://git-wip-us.apache.org/repos/asf/ambari/blob/58c7f784/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml b/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml
index 4538072..5c672ba 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml
+++ b/ambari-server/src/main/resources/stacks/HDP/2.6/upgrades/config-upgrade.xml
@@ -269,8 +269,8 @@
         <changes>
           <definition xsi:type="configure" id="hdp_2_6_tez_tarball_ld_library">
             <type>tez-site</type>
-            <set key="tez.am.launch.env" value="LD_LIBRARY_PATH=./tezlib/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64"/>
-            <set key="tez.task.launch.env" value="LD_LIBRARY_PATH=./tezlib/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64"/>
+            <set key="tez.am.launch.env" value="LD_LIBRARY_PATH=./tezlib/lib/native:./tezlib/lib/native/Linux-{{architecture}}-64:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64"/>
+            <set key="tez.task.launch.env" value="LD_LIBRARY_PATH=./tezlib/lib/native:./tezlib/lib/native/Linux-{{architecture}}-64:/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-{{architecture}}-64"/>
           </definition>
         </changes>
       </component>
@@ -281,7 +281,7 @@
         <changes>
           <definition xsi:type="configure" id="hdp_2_6_mapreduce_tarball_ld_library">
             <type>mapred-site</type>
-            <set key="mapreduce.admin.user.env" value="LD_LIBRARY_PATH=./mr-framework/hadoop/lib/native:{{hadoop_lib_home}}/native/Linux-{{architecture}}-64"/>
+            <set key="mapreduce.admin.user.env" value="LD_LIBRARY_PATH=./mr-framework/hadoop/lib/native:./mr-framework/hadoop/lib/native/Linux-{{architecture}}-64:{{hadoop_lib_home}}/native/Linux-{{architecture}}-64"/>
           </definition>
         </changes>
       </component>