You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dlab.apache.org by om...@apache.org on 2019/03/26 09:53:15 UTC
[incubator-dlab] 01/01: [DLAB-483]: fixed issue with reconfiguring
Spark
This is an automated email from the ASF dual-hosted git repository.
omartushevskyi pushed a commit to branch DLAB-483-RC2
in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git
commit 25aae258c8b390234c7db690c9e82d5877a87a68
Author: Oleh Martushevskyi <Ol...@epam.com>
AuthorDate: Tue Mar 26 11:53:02 2019 +0200
[DLAB-483]: fixed issue with reconfiguring Spark
---
.../src/general/lib/aws/actions_lib.py | 18 ++++++++++++++----
.../src/general/lib/azure/actions_lib.py | 12 ++++++++++--
.../src/general/lib/gcp/actions_lib.py | 14 +++++++++++---
.../src/general/scripts/os/reconfigure_spark.py | 3 +++
4 files changed, 38 insertions(+), 9 deletions(-)
diff --git a/infrastructure-provisioning/src/general/lib/aws/actions_lib.py b/infrastructure-provisioning/src/general/lib/aws/actions_lib.py
index 2209bee..f39f64e 100644
--- a/infrastructure-provisioning/src/general/lib/aws/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/aws/actions_lib.py
@@ -1648,7 +1648,7 @@ def configure_zeppelin_emr_interpreter(emr_version, cluster_name, region, spark_
def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''):
- local("jar_list=`find {0} -name '*.jar' | tr '\\n' ','` ; echo \"spark.jars $jar_list\" >> \
+ local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars $jar_list\" >> \
/tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
region = local('curl http://169.254.169.254/latest/meta-data/placement/availability-zone', capture=True)[:-1]
if region == 'us-east-1':
@@ -1657,9 +1657,19 @@ def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_ena
endpoint_url = "https://s3.{}.amazonaws.com.cn".format(region)
else:
endpoint_url = 'https://s3-' + region + '.amazonaws.com'
- local("""bash -c 'echo "spark.hadoop.fs.s3a.endpoint """ + endpoint_url + """" >> /tmp/{}/notebook_spark-defaults_local.conf'""".format(cluster_name))
- local('echo "spark.hadoop.fs.s3a.server-side-encryption-algorithm AES256" >> /tmp/{}/notebook_spark-defaults_local.conf'.format(cluster_name))
- local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name, cluster_dir))
+ local("""bash -c 'echo "spark.hadoop.fs.s3a.endpoint """ + endpoint_url +
+ """" >> /tmp/{}/notebook_spark-defaults_local.conf'""".format(cluster_name))
+ local('echo "spark.hadoop.fs.s3a.server-side-encryption-algorithm AES256" >> '
+ '/tmp/{}/notebook_spark-defaults_local.conf'.format(cluster_name))
+ if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)):
+ additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" '
+ '/tmp/{0}/notebook_spark-defaults_local.conf '
+ '{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format(
+ cluster_name, cluster_dir), capture=True)
+ for property in additional_spark_properties.split('\n'):
+ local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name))
+ local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name,
+ cluster_dir))
if spark_configs:
spark_configurations = ast.literal_eval(spark_configs)
new_spark_defaults = list()
diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index c9195ab..2719ec4 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -1150,9 +1150,17 @@ def configure_local_spark(jars_dir, templates_dir, memory_type='driver'):
def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''):
- local("jar_list=`find {0} -name '*.jar' | tr '\\n' ','` ; echo \"spark.jars $jar_list\" >> \
+ local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars $jar_list\" >> \
/tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
- local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name, cluster_dir))
+ if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)):
+ additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" '
+ '/tmp/{0}/notebook_spark-defaults_local.conf '
+ '{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format(
+ cluster_name, cluster_dir), capture=True)
+ for property in additional_spark_properties.split('\n'):
+ local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name))
+ local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name,
+ cluster_dir))
if datalake_enabled == 'false':
local('cp -f /opt/spark/conf/core-site.xml {}spark/conf/'.format(cluster_dir))
else:
diff --git a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
index 396b466..23a3941 100644
--- a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
@@ -1338,9 +1338,17 @@ def install_dataengine_spark(cluster_name, spark_link, spark_version, hadoop_ver
def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_enabled, spark_configs=''):
- local("jar_list=`find {0} -name '*.jar' | tr '\\n' ','` ; echo \"spark.jars $jar_list\" >> \
- /tmp/{1}notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
- local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name, cluster_dir))
+ local("jar_list=`find {0} -name '*.jar' | tr '\\n' ',' | sed 's/,$//'` ; echo \"spark.jars $jar_list\" >> \
+ /tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir, cluster_name))
+ if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)):
+ additional_spark_properties = local('diff --changed-group-format="%>" --unchanged-group-format="" '
+ '/tmp/{0}/notebook_spark-defaults_local.conf '
+ '{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format(
+ cluster_name, cluster_dir), capture=True)
+ for property in additional_spark_properties.split('\n'):
+ local('echo "{0}" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name))
+ local('cp -f /tmp/{0}/notebook_spark-defaults_local.conf {1}spark/conf/spark-defaults.conf'.format(cluster_name,
+ cluster_dir))
local('cp -f /opt/spark/conf/core-site.xml {}spark/conf/'.format(cluster_dir))
if spark_configs:
spark_configurations = ast.literal_eval(spark_configs)
diff --git a/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py b/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py
index 0d645ff..9be3147 100644
--- a/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py
+++ b/infrastructure-provisioning/src/general/scripts/os/reconfigure_spark.py
@@ -60,6 +60,9 @@ if __name__ == "__main__":
'/tmp/notebook_reconfigure_dataengine_spark.py')
sudo('mv /tmp/notebook_reconfigure_dataengine_spark.py '
'/usr/local/bin/notebook_reconfigure_dataengine_spark.py')
+ sudo('mkdir -p /tmp/{}'.format(args.cluster_name))
+ put('{}notebook_spark-defaults_local.conf'.format(templates_dir),
+ '/tmp/{}/notebook_spark-defaults_local.conf'.format(args.cluster_name), use_sudo=True)
cluster_dir = '/opt/' + args.cluster_name + '/'
if 'azure_datalake_enable' in os.environ:
datalake_enabled = os.environ['azure_datalake_enable']
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@dlab.apache.org
For additional commands, e-mail: commits-help@dlab.apache.org