You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dlab.apache.org by om...@apache.org on 2019/03/26 09:53:15 UTC

[incubator-dlab] 01/01: [DLAB-483]: fixed issue with reconfiguring Spark

This is an automated email from the ASF dual-hosted git repository.

omartushevskyi pushed a commit to branch DLAB-483-RC2
in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git

commit 25aae258c8b390234c7db690c9e82d5877a87a68
Author: Oleh Martushevskyi <Ol...@epam.com>
AuthorDate: Tue Mar 26 11:53:02 2019 +0200

    [DLAB-483]: fixed issue with reconfiguring Spark
---
 .../src/general/lib/aws/actions_lib.py                 | 18 ++++++++++++++----
 .../src/general/lib/azure/actions_lib.py               | 12 ++++++++++--
 .../src/general/lib/gcp/actions_lib.py                 | 14 +++++++++++---
 .../src/general/scripts/os/reconfigure_spark.py        |  3 +++
 4 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/infrastructure-provisioning/src/general/lib/aws/actions_lib.py b/infrastructure-provisioning/src/general/lib/aws/actions_lib.py
index 2209bee..f39f64e 100644
--- a/infrastructure-provisioning/src/general/lib/aws/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/aws/actions_lib.py
@@ -1648,7 +1648,7 @@ def configure_zeppelin_emr_interpreter(emr_version, cluster_name, region, spark_
 
 
 def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''):
-    local("jar_list=`find {0} -name '*.jar' | tr '\\n' ','` ; echo \"spark.jars   $jar_list\" >> \
+    local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars   $jar_list\" >> \
           /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
     region = local('curl http://169.254.169.254/latest/meta-data/placement/availability-zone', capture=True)[:-1]
     if region == 'us-east-1':
@@ -1657,9 +1657,19 @@ def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_ena
         endpoint_url = "https://s3.{}.amazonaws.com.cn".format(region)
     else:
         endpoint_url = 'https://s3-' + region + '.amazonaws.com'
-    local("""bash -c 'echo "spark.hadoop.fs.s3a.endpoint    """ + endpoint_url + """" >> /tmp/{}/notebook_spark-defaults_local.conf'""".format(cluster_name))
-    local('echo "spark.hadoop.fs.s3a.server-side-encryption-algorithm   AES256" >> /tmp/{}/notebook_spark-defaults_local.conf'.format(cluster_name))
-    local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf  {1}spark/conf/spark-defaults.conf'.format(cluster_name, cluster_dir))
+    local("""bash -c 'echo "spark.hadoop.fs.s3a.endpoint    """ + endpoint_url +
+          """" >> /tmp/{}/notebook_spark-defaults_local.conf'""".format(cluster_name))
+    local('echo "spark.hadoop.fs.s3a.server-side-encryption-algorithm   AES256" >> '
+          '/tmp/{}/notebook_spark-defaults_local.conf'.format(cluster_name))
+    if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)):
+        additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" '
+                                            '/tmp/{0}/notebook_spark-defaults_local.conf '
+                                            '{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format(
+            cluster_name, cluster_dir), capture=True)
+        for property in additional_spark_properties.split('\n'):
+            local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name))
+    local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf  {1}spark/conf/spark-defaults.conf'.format(cluster_name,
+                                                                                                        cluster_dir))
     if spark_configs:
         spark_configurations = ast.literal_eval(spark_configs)
         new_spark_defaults = list()
diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index c9195ab..2719ec4 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -1150,9 +1150,17 @@ def configure_local_spark(jars_dir, templates_dir, memory_type='driver'):
 
 
 def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''):
-    local("jar_list=`find {0} -name '*.jar' | tr '\\n' ','` ; echo \"spark.jars   $jar_list\" >> \
+    local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars   $jar_list\" >> \
           /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
-    local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf  {1}spark/conf/spark-defaults.conf'.format(cluster_name, cluster_dir))
+    if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)):
+        additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" '
+                                            '/tmp/{0}/notebook_spark-defaults_local.conf '
+                                            '{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format(
+            cluster_name, cluster_dir), capture=True)
+        for property in additional_spark_properties.split('\n'):
+            local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name))
+    local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf  {1}spark/conf/spark-defaults.conf'.format(cluster_name,
+                                                                                                        cluster_dir))
     if datalake_enabled == 'false':
         local('cp -f /opt/spark/conf/core-site.xml {}spark/conf/'.format(cluster_dir))
     else:
diff --git a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
index 396b466..23a3941 100644
--- a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
@@ -1338,9 +1338,17 @@ def install_dataengine_spark(cluster_name, spark_link, spark_version, hadoop_ver
 
 
 def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''):
-    local("jar_list=`find {0} -name '*.jar' | tr '\\n' ','` ; echo \"spark.jars   $jar_list\" >> \
-          /tmp/{1}notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
-    local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf  {1}spark/conf/spark-defaults.conf'.format(cluster_name, cluster_dir))
+    local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars $jar_list\" >> \
+    	          /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
+    if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)):
+        additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" '
+                                            '/tmp/{0}/notebook_spark-defaults_local.conf '
+                                            '{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format(
+            cluster_name, cluster_dir), capture=True)
+        for property in additional_spark_properties.split('\n'):
+            local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name))
+    local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf  {1}spark/conf/spark-defaults.conf'.format(cluster_name,
+                                                                                                        cluster_dir))
     local('cp -f /opt/spark/conf/core-site.xml {}spark/conf/'.format(cluster_dir))
     if spark_configs:
         spark_configurations = ast.literal_eval(spark_configs)
diff --git a/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py b/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py
index 0d645ff..9be3147 100644
--- a/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py
+++ b/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py
@@ -60,6 +60,9 @@ if __name__ == "__main__":
                 '/tmp/notebook_reconfigure_dataengine_spark.py')
             sudo('mv /tmp/notebook_reconfigure_dataengine_spark.py '
                  '/usr/local/bin/notebook_reconfigure_dataengine_spark.py')
+        sudo('mkdir -p /tmp/{}'.format(args.cluster_name))
+        put('{}notebook_spark-defaults_local.conf'.format(templates_dir),
+            '/tmp/{}/notebook_spark-defaults_local.conf'.format(args.cluster_name), use_sudo=True)
         cluster_dir = '/opt/' + args.cluster_name + '/'
         if 'azure_datalake_enable' in os.environ:
             datalake_enabled = os.environ['azure_datalake_enable']


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@dlab.apache.org
For additional commands, e-mail: commits-help@dlab.apache.org