You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dlab.apache.org by om...@apache.org on 2019/03/26 09:53:14 UTC

[incubator-dlab] branch DLAB-483-RC2 created (now 25aae25)

This is an automated email from the ASF dual-hosted git repository.

omartushevskyi pushed a change to branch DLAB-483-RC2
in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git.


      at 25aae25  [DLAB-483]: fixed issue with reconfiguring Spark

This branch includes the following new commits:

     new 25aae25  [DLAB-483]: fixed issue with reconfiguring Spark

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@dlab.apache.org
For additional commands, e-mail: commits-help@dlab.apache.org


[incubator-dlab] 01/01: [DLAB-483]: fixed issue with reconfiguring Spark

Posted by om...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

omartushevskyi pushed a commit to branch DLAB-483-RC2
in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git

commit 25aae258c8b390234c7db690c9e82d5877a87a68
Author: Oleh Martushevskyi <Ol...@epam.com>
AuthorDate: Tue Mar 26 11:53:02 2019 +0200

    [DLAB-483]: fixed issue with reconfiguring Spark
---
 .../src/general/lib/aws/actions_lib.py                 | 18 ++++++++++++++----
 .../src/general/lib/azure/actions_lib.py               | 12 ++++++++++--
 .../src/general/lib/gcp/actions_lib.py                 | 14 +++++++++++---
 .../src/general/scripts/os/reconfigure_spark.py        |  3 +++
 4 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/infrastructure-provisioning/src/general/lib/aws/actions_lib.py b/infrastructure-provisioning/src/general/lib/aws/actions_lib.py
index 2209bee..f39f64e 100644
--- a/infrastructure-provisioning/src/general/lib/aws/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/aws/actions_lib.py
@@ -1648,7 +1648,7 @@ def configure_zeppelin_emr_interpreter(emr_version, cluster_name, region, spark_
 
 
 def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''):
-    local("jar_list=`find {0} -name '*.jar' | tr '\\n' ','` ; echo \"spark.jars   $jar_list\" >> \
+    local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars   $jar_list\" >> \
           /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
     region = local('curl http://169.254.169.254/latest/meta-data/placement/availability-zone', capture=True)[:-1]
     if region == 'us-east-1':
@@ -1657,9 +1657,19 @@ def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_ena
         endpoint_url = "https://s3.{}.amazonaws.com.cn".format(region)
     else:
         endpoint_url = 'https://s3-' + region + '.amazonaws.com'
-    local("""bash -c 'echo "spark.hadoop.fs.s3a.endpoint    """ + endpoint_url + """" >> /tmp/{}/notebook_spark-defaults_local.conf'""".format(cluster_name))
-    local('echo "spark.hadoop.fs.s3a.server-side-encryption-algorithm   AES256" >> /tmp/{}/notebook_spark-defaults_local.conf'.format(cluster_name))
-    local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf  {1}spark/conf/spark-defaults.conf'.format(cluster_name, cluster_dir))
+    local("""bash -c 'echo "spark.hadoop.fs.s3a.endpoint    """ + endpoint_url +
+          """" >> /tmp/{}/notebook_spark-defaults_local.conf'""".format(cluster_name))
+    local('echo "spark.hadoop.fs.s3a.server-side-encryption-algorithm   AES256" >> '
+          '/tmp/{}/notebook_spark-defaults_local.conf'.format(cluster_name))
+    if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)):
+        additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" '
+                                            '/tmp/{0}/notebook_spark-defaults_local.conf '
+                                            '{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format(
+            cluster_name, cluster_dir), capture=True)
+        for property in additional_spark_properties.split('\n'):
+            local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name))
+    local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf  {1}spark/conf/spark-defaults.conf'.format(cluster_name,
+                                                                                                        cluster_dir))
     if spark_configs:
         spark_configurations = ast.literal_eval(spark_configs)
         new_spark_defaults = list()
diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index c9195ab..2719ec4 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -1150,9 +1150,17 @@ def configure_local_spark(jars_dir, templates_dir, memory_type='driver'):
 
 
 def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''):
-    local("jar_list=`find {0} -name '*.jar' | tr '\\n' ','` ; echo \"spark.jars   $jar_list\" >> \
+    local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars   $jar_list\" >> \
           /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
-    local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf  {1}spark/conf/spark-defaults.conf'.format(cluster_name, cluster_dir))
+    if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)):
+        additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" '
+                                            '/tmp/{0}/notebook_spark-defaults_local.conf '
+                                            '{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format(
+            cluster_name, cluster_dir), capture=True)
+        for property in additional_spark_properties.split('\n'):
+            local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name))
+    local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf  {1}spark/conf/spark-defaults.conf'.format(cluster_name,
+                                                                                                        cluster_dir))
     if datalake_enabled == 'false':
         local('cp -f /opt/spark/conf/core-site.xml {}spark/conf/'.format(cluster_dir))
     else:
diff --git a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
index 396b466..23a3941 100644
--- a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
@@ -1338,9 +1338,17 @@ def install_dataengine_spark(cluster_name, spark_link, spark_version, hadoop_ver
 
 
 def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''):
-    local("jar_list=`find {0} -name '*.jar' | tr '\\n' ','` ; echo \"spark.jars   $jar_list\" >> \
-          /tmp/{1}notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
-    local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf  {1}spark/conf/spark-defaults.conf'.format(cluster_name, cluster_dir))
+    local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars $jar_list\" >> \
+    	          /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
+    if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)):
+        additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" '
+                                            '/tmp/{0}/notebook_spark-defaults_local.conf '
+                                            '{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format(
+            cluster_name, cluster_dir), capture=True)
+        for property in additional_spark_properties.split('\n'):
+            local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name))
+    local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf  {1}spark/conf/spark-defaults.conf'.format(cluster_name,
+                                                                                                        cluster_dir))
     local('cp -f /opt/spark/conf/core-site.xml {}spark/conf/'.format(cluster_dir))
     if spark_configs:
         spark_configurations = ast.literal_eval(spark_configs)
diff --git a/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py b/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py
index 0d645ff..9be3147 100644
--- a/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py
+++ b/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py
@@ -60,6 +60,9 @@ if __name__ == "__main__":
                 '/tmp/notebook_reconfigure_dataengine_spark.py')
             sudo('mv /tmp/notebook_reconfigure_dataengine_spark.py '
                  '/usr/local/bin/notebook_reconfigure_dataengine_spark.py')
+        sudo('mkdir -p /tmp/{}'.format(args.cluster_name))
+        put('{}notebook_spark-defaults_local.conf'.format(templates_dir),
+            '/tmp/{}/notebook_spark-defaults_local.conf'.format(args.cluster_name), use_sudo=True)
         cluster_dir = '/opt/' + args.cluster_name + '/'
         if 'azure_datalake_enable' in os.environ:
             datalake_enabled = os.environ['azure_datalake_enable']


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@dlab.apache.org
For additional commands, e-mail: commits-help@dlab.apache.org