You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dlab.apache.org by my...@apache.org on 2020/07/28 06:53:39 UTC

[incubator-dlab] branch DLAB-515 updated: [DLAB-515] - [GCP] EDGE node as NAT implemented, [GCP] Dataproc kernels connection via sparkmagic/livy implemented, [AWS] [GCP] [Azure] Sprk Cluster connection via sparkmagic/livy implemented

This is an automated email from the ASF dual-hosted git repository.

mykolabodnar pushed a commit to branch DLAB-515
in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git


The following commit(s) were added to refs/heads/DLAB-515 by this push:
     new dc8e24c  [DLAB-515] - [GCP] EDGE node as NAT implemented, [GCP] Dataproc kernels connection via sparkmagic/livy implemented, [AWS] [GCP] [Azure] Sprk Cluster connection via sparkmagic/livy implemented
dc8e24c is described below

commit dc8e24ca7937734ef8c580590962d56ea89532b5
Author: bodnarmykola <bo...@gmail.com>
AuthorDate: Tue Jul 28 09:53:11 2020 +0300

    [DLAB-515] - [GCP] EDGE node as NAT implemented, [GCP] Dataproc kernels connection via sparkmagic/livy implemented, [AWS] [GCP] [Azure] Sprk Cluster connection via sparkmagic/livy implemented
---
 .../src/dataengine/scripts/configure_dataengine.py | 22 ++++++++
 .../src/general/files/aws/dataengine_Dockerfile    |  2 +
 .../src/general/files/azure/dataengine_Dockerfile  |  2 +
 .../src/general/files/gcp/dataengine_Dockerfile    |  2 +
 .../src/general/files/gcp/jupyter_Dockerfile       |  1 +
 .../src/general/files/gcp/ssn_policy.json          |  4 +-
 .../src/general/lib/gcp/actions_lib.py             | 31 ++++++++++++
 .../src/general/lib/gcp/meta_lib.py                | 19 +++++++
 .../src/general/lib/os/debian/edge_lib.py          |  5 ++
 .../scripts/aws/dataengine-service_configure.py    |  2 +-
 .../jupyter_dataengine-service_create_configs.py   |  2 +-
 .../general/scripts/gcp/common_create_nat_route.py | 58 ++++++++++++++++++++++
 .../scripts/gcp/dataengine-service_prepare.py      |  2 +
 .../src/general/scripts/gcp/edge_configure.py      | 19 +++++++
 .../jupyter_dataengine-service_create_configs.py   | 53 +++++++++++++++-----
 .../jupyter_install_dataengine-service_kernels.py  | 24 ++++++---
 .../src/general/scripts/gcp/project_prepare.py     | 38 ++++++++++++++
 .../os/jupyter_dataengine_create_configs.py        | 49 ++++++++++++++----
 .../os/jupyter_install_dataengine_kernels.py       | 17 ++++---
 .../templates/gcp/dataengine-service_cluster.json  | 12 ++++-
 .../os/debian/livy.service}                        | 31 +++++-------
 .../src/general/templates/os/livy-env.sh           | 22 ++++++++
 .../src/project/templates/nftables.conf            |  2 +-
 23 files changed, 357 insertions(+), 62 deletions(-)

diff --git a/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py b/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py
index e5aacb8..2cdc730 100644
--- a/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py
+++ b/infrastructure-provisioning/src/dataengine/scripts/configure_dataengine.py
@@ -202,3 +202,25 @@ if __name__ == "__main__":
     if os.environ['application'] == 'zeppelin' and os.environ['notebook_r_enabled'] == 'true':
         print("Install additional R packages")
         install_r_packages(args.os_user)
+
+    # INSTALL LIVY
+    if not exists('/home/{0}/.ensure_dir/livy_ensured'.format(args.os_user)):
+        livy_version = '0.7.0'
+        sudo(
+            'wget -nv --timeout=30 --tries=5 --retry-connrefused https://archive.apache.org/dist/incubator/livy/{0}-incubating/apache-livy-{0}-incubating-bin.zip -P /tmp/'.format(
+                livy_version))
+        sudo('unzip -q /tmp/apache-livy-{}-incubating-bin.zip -d /tmp/'.format(livy_version))
+        sudo('mv /tmp/apache-livy-{}-incubating-bin /opt/livy'.format(livy_version))
+        sudo('mkdir /var/log/livy')
+        put('~/templates/livy-env.sh', '/tmp/livy-env.sh')
+        sudo('mv /tmp/livy-env.sh /opt/livy/conf/livy-env.sh')
+        sudo('chown -R -L {0}:{0} /opt/livy/'.format(args.os_user))
+        sudo('chown -R {0}:{0} /var/log/livy'.format(args.os_user))
+        put('~/templates/livy.service', '/tmp/livy.service')
+        sudo("sed -i 's|OS_USER|{}|' /tmp/livy.service".format(args.os_user))
+        sudo('mv /tmp/livy.service /etc/systemd/system/livy.service')
+        sudo('systemctl daemon-reload')
+        sudo('systemctl enable livy.service')
+        sudo('systemctl start livy.service')
+        sudo('touch /home/{0}/.ensure_dir/livy_ensured'.format(args.os_user))
+
diff --git a/infrastructure-provisioning/src/general/files/aws/dataengine_Dockerfile b/infrastructure-provisioning/src/general/files/aws/dataengine_Dockerfile
index a0be3e1..313ddd2 100644
--- a/infrastructure-provisioning/src/general/files/aws/dataengine_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/aws/dataengine_Dockerfile
@@ -36,6 +36,8 @@ COPY general/scripts/os/notebook_reconfigure_dataengine_spark.py /root/scripts/
 COPY general/templates/os/notebook_spark-defaults_local.conf /root/templates/
 COPY general/templates/os/tensorboard.service /root/templates/
 COPY general/templates/os/${OS}/spark-* /root/templates/
+COPY general/templates/os/${OS}/livy.service /root/templates/
+COPY general/templates/os/livy-env.sh /root/templates/
 
 RUN chmod a+x /root/fabfile.py; \
     chmod a+x /root/scripts/*
diff --git a/infrastructure-provisioning/src/general/files/azure/dataengine_Dockerfile b/infrastructure-provisioning/src/general/files/azure/dataengine_Dockerfile
index 8e394cc..daefa72 100644
--- a/infrastructure-provisioning/src/general/files/azure/dataengine_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/azure/dataengine_Dockerfile
@@ -37,6 +37,8 @@ COPY general/templates/os/notebook_spark-defaults_local.conf /root/templates/
 COPY general/templates/os/tensorboard.service /root/templates/
 COPY general/templates/azure/core-site* /root/templates/
 COPY general/templates/os/${OS}/spark-* /root/templates/
+COPY general/templates/os/${OS}/livy.service /root/templates/
+COPY general/templates/os/livy-env.sh /root/templates/
 
 RUN chmod a+x /root/fabfile.py; \
     chmod a+x /root/scripts/*
diff --git a/infrastructure-provisioning/src/general/files/gcp/dataengine_Dockerfile b/infrastructure-provisioning/src/general/files/gcp/dataengine_Dockerfile
index 0f4f14a..139e565 100644
--- a/infrastructure-provisioning/src/general/files/gcp/dataengine_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/gcp/dataengine_Dockerfile
@@ -37,6 +37,8 @@ COPY general/templates/os/notebook_spark-defaults_local.conf /root/templates/
 COPY general/templates/os/tensorboard.service /root/templates/
 COPY general/templates/gcp/core-site.xml /root/templates/
 COPY general/templates/os/${OS}/spark-* /root/templates/
+COPY general/templates/os/${OS}/livy.service /root/templates/
+COPY general/templates/os/livy-env.sh /root/templates/
 
 RUN chmod a+x /root/fabfile.py; \
     chmod a+x /root/scripts/*
diff --git a/infrastructure-provisioning/src/general/files/gcp/jupyter_Dockerfile b/infrastructure-provisioning/src/general/files/gcp/jupyter_Dockerfile
index cff0b7f..bd4b1bd 100644
--- a/infrastructure-provisioning/src/general/files/gcp/jupyter_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/gcp/jupyter_Dockerfile
@@ -34,6 +34,7 @@ COPY general/templates/os/notebook_spark-defaults_local.conf /root/templates/
 COPY general/templates/os/pyspark_local_template.json /root/templates/
 COPY general/templates/os/py3spark_local_template.json /root/templates/
 COPY general/templates/os/pyspark_dataengine-service_template.json /root/templates/
+COPY general/templates/os/sparkmagic_config_template.json /root/templates/
 COPY general/templates/os/r_dataengine-service_template.json /root/templates/
 COPY general/templates/os/r_template.json /root/templates/
 COPY general/templates/os/run_template.sh /root/templates/
diff --git a/infrastructure-provisioning/src/general/files/gcp/ssn_policy.json b/infrastructure-provisioning/src/general/files/gcp/ssn_policy.json
index bd95d12..4f0ad6a 100644
--- a/infrastructure-provisioning/src/general/files/gcp/ssn_policy.json
+++ b/infrastructure-provisioning/src/general/files/gcp/ssn_policy.json
@@ -16,5 +16,7 @@
     "compute.images.get",
     "compute.images.delete",
     "compute.images.setLabels",
-    "compute.images.list"
+    "compute.images.list",
+    "compute.routes.create",
+    "compute.routes.get"
 ]
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
index b1d0acb..7310f37 100644
--- a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
@@ -177,6 +177,37 @@ class GCPActions:
                                    file=sys.stdout)}))
             traceback.print_exc(file=sys.stdout)
 
+    def create_nat_route(self, nat_route_params):
+        request = self.service.routes().insert(project=self.project, body=nat_route_params)
+        try:
+            result = request.execute()
+            meta_lib.GCPMeta().wait_for_operation(result['name'])
+            print('NAT route {} created.'.format(nat_route_params['name']))
+            return result
+        except Exception as err:
+            logging.info(
+                "Unable to create NAT route: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
+            append_result(str({"error": "Unable to create NAT route",
+                               "error_message": str(err) + "\n Traceback: " + traceback.print_exc(
+                                   file=sys.stdout)}))
+            traceback.print_exc(file=sys.stdout)
+
+    def delete_nat_route(self, nat_route_name):
+        request = self.service.routes().delete(project=self.project, route=nat_route_name)
+        try:
+            result = request.execute()
+            meta_lib.GCPMeta().wait_for_operation(result['name'])
+            print('NAT route {} deleteed.'.format(nat_route_name))
+            return result
+        except Exception as err:
+            logging.info(
+                "Unable to delete NAT route: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
+            append_result(str({"error": "Unable to delete NAT route",
+                               "error_message": str(err) + "\n Traceback: " + traceback.print_exc(
+                                   file=sys.stdout)}))
+            traceback.print_exc(file=sys.stdout)
+
+
     def create_bucket(self, bucket_name):
         try:
             bucket = self.storage_client.create_bucket(bucket_name)
diff --git a/infrastructure-provisioning/src/general/lib/gcp/meta_lib.py b/infrastructure-provisioning/src/general/lib/gcp/meta_lib.py
index cc16028..6b7582b 100644
--- a/infrastructure-provisioning/src/general/lib/gcp/meta_lib.py
+++ b/infrastructure-provisioning/src/general/lib/gcp/meta_lib.py
@@ -157,6 +157,25 @@ class GCPMeta:
                                    file=sys.stdout)}))
             traceback.print_exc(file=sys.stdout)
 
+    def get_route(self, route_name):
+        request = self.service.routes().get(
+            project=self.project,
+            route=route_name)
+        try:
+            return request.execute()
+        except errors.HttpError as err:
+            if err.resp.status == 404:
+                return ''
+            else:
+                raise err
+        except Exception as err:
+            logging.info(
+                "Unable to get Route: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))
+            append_result(str({"error": "Unable to get Route",
+                               "error_message": str(err) + "\n Traceback: " + traceback.print_exc(
+                                   file=sys.stdout)}))
+            traceback.print_exc(file=sys.stdout)
+
     def get_bucket(self, bucket_name):
         try:
             bucket = self.storage_client.get_bucket(bucket_name)
diff --git a/infrastructure-provisioning/src/general/lib/os/debian/edge_lib.py b/infrastructure-provisioning/src/general/lib/os/debian/edge_lib.py
index 1130054..958a53c 100644
--- a/infrastructure-provisioning/src/general/lib/os/debian/edge_lib.py
+++ b/infrastructure-provisioning/src/general/lib/os/debian/edge_lib.py
@@ -160,8 +160,13 @@ def configure_nftables(config):
             sudo('systemctl enable nftables.service')
             sudo('systemctl start nftables')
             sudo('sysctl net.ipv4.ip_forward=1')
+            if os.environ['conf_cloud_provider'] == 'aws':
+                interface = 'eth0'
+            elif os.environ['conf_cloud_provider'] == 'gcp':
+                interface = 'ens4'
             sudo('sed -i \'s/#net.ipv4.ip_forward=1/net.ipv4.ip_forward=1/g\' /etc/sysctl.conf')
             sudo('sed -i \'s/EDGE_IP/{}/g\' /opt/dlab/templates/nftables.conf'.format(config['edge_ip']))
+            sudo('sed -i "s|INTERFACE|{}|g" /opt/dlab/templates/nftables.conf'.format(interface))
             sudo('sed -i "s|SUBNET_CIDR|{}|g" /opt/dlab/templates/nftables.conf'.format(config['exploratory_subnet']))
             sudo('cp /opt/dlab/templates/nftables.conf /etc/')
             sudo('systemctl restart nftables')
diff --git a/infrastructure-provisioning/src/general/scripts/aws/dataengine-service_configure.py b/infrastructure-provisioning/src/general/scripts/aws/dataengine-service_configure.py
index e442e45..147247a 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/dataengine-service_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/dataengine-service_configure.py
@@ -176,7 +176,7 @@ if __name__ == "__main__":
             emr_conf['computational_name'] = os.environ['computational_name']
         else:
             emr_conf['computational_name'] = ''
-        emr_conf['apps'] = 'Hadoop Hive Hue Spark'
+        emr_conf['apps'] = 'Hadoop Hive Hue Spark Livy'
         emr_conf['service_base_name'] = os.environ['conf_service_base_name']
         emr_conf['project_name'] = os.environ['project_name']
         emr_conf['endpoint_name'] = os.environ['endpoint_name']
diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
index 9a95b71..99cc46a 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py
@@ -175,7 +175,7 @@ def install_sparkamagic_kernels(args):
                                                                          args.cluster_name)
         local('sed -i \'s|PySpark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/pysparkkernel/kernel.json'.format(
             pyspark_kernel_name, args.os_user))
-        spark_kernel_name = 'PySpark (Scala-{0} / Spark-{1} ) [{2}]'.format(args.scala_version, args.spark_version,
+        spark_kernel_name = 'Spark (Scala-{0} / Spark-{1} ) [{2}]'.format(args.scala_version, args.spark_version,
                                                                          args.cluster_name)
         local('sed -i \'s|Spark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/sparkkernel/kernel.json'.format(
             spark_kernel_name, args.os_user))
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/common_create_nat_route.py b/infrastructure-provisioning/src/general/scripts/gcp/common_create_nat_route.py
new file mode 100644
index 0000000..8f03ea8
--- /dev/null
+++ b/infrastructure-provisioning/src/general/scripts/gcp/common_create_nat_route.py
@@ -0,0 +1,58 @@
+#!/usr/bin/python
+
+# *****************************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# ******************************************************************************
+
+import json
+import argparse
+from dlab.actions_lib import *
+from dlab.meta_lib import *
+import sys
+from botocore.exceptions import ClientError
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--nat_route_name', type=str)
+parser.add_argument('--vpc', type=str)
+parser.add_argument('--tag', type=str)
+parser.add_argument('--edge_instance', type=str)
+args = parser.parse_args()
+
+
+if __name__ == "__main__":
+    if GCPMeta().get_route(args.nat_route_name):
+        print("REQUESTED ROUTE {} ALREADY EXISTS".format(args.nat_route_name))
+    else:
+        print("Creating NAT ROUTE {}".format(args.nat_route_name))
+        params = {
+            "destRange": "0.0.0.0/0",
+            "name": args.nat_route_name,
+            "network": args.vpc,
+            "priority": 0,
+            "tags": [
+                args.tag
+            ],
+            "nextHopInstance": args.edge_instance
+        }
+        GCPActions().create_nat_route(params)
+else:
+    parser.print_help()
+    sys.exit(2)
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/dataengine-service_prepare.py b/infrastructure-provisioning/src/general/scripts/gcp/dataengine-service_prepare.py
index 993b8e7..91d16e2 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/dataengine-service_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/dataengine-service_prepare.py
@@ -152,6 +152,8 @@ if __name__ == "__main__":
     dataproc_cluster['config']['workerConfig']['machineTypeUri'] = os.environ['dataproc_slave_instance_type']
     dataproc_cluster['config']['masterConfig']['numInstances'] = int(os.environ['dataproc_master_count'])
     dataproc_cluster['config']['workerConfig']['numInstances'] = int(os.environ['dataproc_slave_count'])
+    livy_init = 'gs://goog-dataproc-initialization-actions-{}/livy/livy.sh'.format(dataproc_conf['region'])
+    dataproc_cluster['config']['initializationActions'][0]['executableFile'] = livy_init
     if int(os.environ['dataproc_preemptible_count']) != 0:
         dataproc_cluster['config']['secondaryWorkerConfig']['numInstances'] = int(
             os.environ['dataproc_preemptible_count'])
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/edge_configure.py b/infrastructure-provisioning/src/general/scripts/gcp/edge_configure.py
index 110efb9..7b2286d 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/edge_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/edge_configure.py
@@ -267,6 +267,25 @@ if __name__ == "__main__":
         sys.exit(1)
 
     try:
+        print('[CONFIGRING EDGE AS NAT]')
+        if os.environ['edge_is_nat'] == 'true':
+            print('Installing nftables')
+            additional_config = {"exploratory_subnet": edge_conf['private_subnet_cidr'],
+                                 "edge_ip": edge_conf['private_ip']}
+            params = "--hostname {} --keyfile {} --additional_config '{}' --user {}".format(
+                edge_conf['instance_hostname'], edge_conf['ssh_key_path'], json.dumps(additional_config),
+                edge_conf['dlab_ssh_user'])
+            try:
+                local("~/scripts/{}.py {}".format('configure_nftables', params))
+            except:
+                traceback.print_exc()
+                raise Exception
+    except Exception as err:
+        dlab.fab.append_result("Failed to configure NAT." + str(err))
+        clear_resources()
+        sys.exit(1)
+
+    try:
         print('[SUMMARY]')
         logging.info('[SUMMARY]')
         print("Instance name: {}".format(edge_conf['instance_name']))
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/jupyter_dataengine-service_create_configs.py b/infrastructure-provisioning/src/general/scripts/gcp/jupyter_dataengine-service_create_configs.py
index 3f10360..30ee470 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/jupyter_dataengine-service_create_configs.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/jupyter_dataengine-service_create_configs.py
@@ -51,6 +51,8 @@ parser.add_argument('--application', type=str, default='')
 parser.add_argument('--r_version', type=str, default='')
 parser.add_argument('--r_enabled', type=str, default='')
 parser.add_argument('--scala_version', type=str, default='')
+parser.add_argument('--python_version', type=str, default='')
+parser.add_argument('--master_ip', type=str, default='')
 args = parser.parse_args()
 
 dataproc_dir = '/opt/{}/jars/'.format(args.dataproc_version)
@@ -111,20 +113,47 @@ def toree_kernel(args):
     with open(run_sh_path, 'w') as f:
         f.write(text)
 
+def install_sparkamagic_kernels(args):
+    try:
+        local('sudo jupyter nbextension enable --py --sys-prefix widgetsnbextension')
+        sparkmagic_dir = local("sudo pip3 show sparkmagic | grep 'Location: ' | awk '{print $2}'", capture=True)
+        local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkkernel --user'.format(sparkmagic_dir))
+        local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/pysparkkernel --user'.format(sparkmagic_dir))
+        local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkrkernel --user'.format(sparkmagic_dir))
+        pyspark_kernel_name = 'PySpark (Python-{0} / Spark-{1} ) [{2}]'.format(args.python_version, args.spark_version,
+                                                                         args.cluster_name)
+        local('sed -i \'s|PySpark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/pysparkkernel/kernel.json'.format(
+            pyspark_kernel_name, args.os_user))
+        spark_kernel_name = 'PySpark (Scala-{0} / Spark-{1} ) [{2}]'.format(args.scala_version, args.spark_version,
+                                                                         args.cluster_name)
+        local('sed -i \'s|Spark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/sparkkernel/kernel.json'.format(
+            spark_kernel_name, args.os_user))
+        sparkr_kernel_name = 'SparkR (R-{0} / Spark-{1} ) [{2}]'.format(args.r_version, args.spark_version,
+                                                                            args.cluster_name)
+        local('sed -i \'s|SparkR|{0}|g\' /home/{1}/.local/share/jupyter/kernels/sparkrkernel/kernel.json'.format(
+            sparkr_kernel_name, args.os_user))
+        local('mkdir -p /home/' + args.os_user + '/.sparkmagic')
+        local('cp -f /tmp/sparkmagic_config_template.json /home/' + args.os_user + '/.sparkmagic/config.json')
+        local('sed -i \'s|LIVY_HOST|{0}|g\' /home/{1}/.sparkmagic/config.json'.format(
+                args.master_ip, args.os_user))
+        local('sudo chown -R {0}:{0} /home/{0}/.sparkmagic/'.format(args.os_user))
+    except:
+        sys.exit(1)
 
 if __name__ == "__main__":
     if args.dry_run == 'true':
         parser.print_help()
     else:
-        result = prepare(dataproc_dir, yarn_dir)
-        if result == False :
-            actions_lib.GCPActions().jars(args, dataproc_dir)
-        actions_lib.GCPActions().yarn(args, yarn_dir)
-        actions_lib.GCPActions().install_dataproc_spark(args)
-        pyspark_kernel(kernels_dir, args.dataproc_version, args.cluster_name, args.spark_version, args.bucket,
-                       args.user_name, args.region, args.os_user, args.application, args.pip_mirror)
-        toree_kernel(args)
-        if args.r_enabled == 'true':
-            r_kernel(args)
-        actions_lib.GCPActions().spark_defaults(args)
-        configuring_notebook(args.dataproc_version)
+        install_sparkamagic_kernels(args)
+        #result = prepare(dataproc_dir, yarn_dir)
+        #if result == False :
+        #    actions_lib.GCPActions().jars(args, dataproc_dir)
+        #actions_lib.GCPActions().yarn(args, yarn_dir)
+        #actions_lib.GCPActions().install_dataproc_spark(args)
+        #pyspark_kernel(kernels_dir, args.dataproc_version, args.cluster_name, args.spark_version, args.bucket,
+        #               args.user_name, args.region, args.os_user, args.application, args.pip_mirror)
+        #toree_kernel(args)
+        #if args.r_enabled == 'true':
+        #    r_kernel(args)
+        #actions_lib.GCPActions().spark_defaults(args)
+        #configuring_notebook(args.dataproc_version)
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/jupyter_install_dataengine-service_kernels.py b/infrastructure-provisioning/src/general/scripts/gcp/jupyter_install_dataengine-service_kernels.py
index cb17668..24586af 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/jupyter_install_dataengine-service_kernels.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/jupyter_install_dataengine-service_kernels.py
@@ -51,13 +51,14 @@ def configure_notebook(args):
     templates_dir = '/root/templates/'
     files_dir = '/root/files/'
     scripts_dir = '/root/scripts/'
-    put(templates_dir + 'pyspark_dataengine-service_template.json', '/tmp/pyspark_dataengine-service_template.json')
-    put(templates_dir + 'r_dataengine-service_template.json', '/tmp/r_dataengine-service_template.json')
-    put(templates_dir + 'toree_dataengine-service_template.json','/tmp/toree_dataengine-service_template.json')
+    put(templates_dir + 'sparkmagic_config_template.json', '/tmp/sparkmagic_config_template.json')
+    #put(templates_dir + 'pyspark_dataengine-service_template.json', '/tmp/pyspark_dataengine-service_template.json')
+    #put(templates_dir + 'r_dataengine-service_template.json', '/tmp/r_dataengine-service_template.json')
+    #put(templates_dir + 'toree_dataengine-service_template.json','/tmp/toree_dataengine-service_template.json')
     put(scripts_dir + '{}_dataengine-service_create_configs.py'.format(args.application), '/tmp/create_configs.py')
-    put(files_dir + 'toree_kernel.tar.gz', '/tmp/toree_kernel.tar.gz')
-    put(templates_dir + 'toree_dataengine-service_templatev2.json', '/tmp/toree_dataengine-service_templatev2.json')
-    put(templates_dir + 'run_template.sh', '/tmp/run_template.sh')
+    #put(files_dir + 'toree_kernel.tar.gz', '/tmp/toree_kernel.tar.gz')
+    #put(templates_dir + 'toree_dataengine-service_templatev2.json', '/tmp/toree_dataengine-service_templatev2.json')
+    #put(templates_dir + 'run_template.sh', '/tmp/run_template.sh')
     sudo('\cp /tmp/create_configs.py /usr/local/bin/create_configs.py')
     sudo('chmod 755 /usr/local/bin/create_configs.py')
     sudo('mkdir -p /usr/lib/python2.7/dlab/')
@@ -81,14 +82,21 @@ if __name__ == "__main__":
     configure_notebook(args)
     spark_version = actions_lib.GCPActions().get_cluster_app_version(args.bucket, args.project_name,
                                                                      args.cluster_name, 'spark')
+    python_version = actions_lib.GCPActions().get_cluster_app_version(args.bucket, args.project_name,
+                                                                     args.cluster_name, 'python')
     hadoop_version = actions_lib.GCPActions().get_cluster_app_version(args.bucket, args.project_name,
                                                                       args.cluster_name, 'hadoop')
     r_version = actions_lib.GCPActions().get_cluster_app_version(args.bucket, args.project_name,
                                                                  args.cluster_name, 'r')
     r_enabled = os.environ['notebook_r_enabled']
+    master_host = '{}-m'.format(args.cluster_name)
+    master_ip = get_instance_private_ip_address(os.environ['gcp_zone'], master_host)
     sudo('echo "[global]" > /etc/pip.conf; echo "proxy = $(cat /etc/profile | grep proxy | head -n1 | cut -f2 -d=)" >> /etc/pip.conf')
     sudo('echo "use_proxy=yes" > ~/.wgetrc; proxy=$(cat /etc/profile | grep proxy | head -n1 | cut -f2 -d=); echo "http_proxy=$proxy" >> ~/.wgetrc; echo "https_proxy=$proxy" >> ~/.wgetrc')
-    sudo('unset http_proxy https_proxy; export gcp_project_id="{0}"; export conf_resource="{1}"; /usr/bin/python /usr/local/bin/create_configs.py --bucket {2} --cluster_name {3} --dataproc_version {4} --spark_version {5} --hadoop_version {6} --region {7} --user_name {8} --os_user {9} --pip_mirror {10} --application {11} --r_version {12} --r_enabled {13} --scala_version {14}'
+    sudo('unset http_proxy https_proxy; export gcp_project_id="{0}"; export conf_resource="{1}"; '
+         '/usr/bin/python /usr/local/bin/create_configs.py --bucket {2} --cluster_name {3} --dataproc_version {4}'
+         ' --spark_version {5} --hadoop_version {6} --region {7} --user_name {8} --os_user {9} --pip_mirror {10} '
+         '--application {11} --r_version {12} --r_enabled {13} --python_version {14}  --master_ip {15} --scala_version {16}'
          .format(os.environ['gcp_project_id'], os.environ['conf_resource'], args.bucket, args.cluster_name,
                  args.dataproc_version, spark_version, hadoop_version, args.region, args.project_name, args.os_user,
-                 args.pip_mirror, args.application, r_version, r_enabled, scala_version))
+                 args.pip_mirror, args.application, r_version, r_enabled, python_version, master_ip, scala_version))
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/project_prepare.py b/infrastructure-provisioning/src/general/scripts/gcp/project_prepare.py
index f9822a0..d42ffdf 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/project_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/project_prepare.py
@@ -539,3 +539,41 @@ if __name__ == "__main__":
         GCPActions.remove_role(project_conf['edge_role_name'])
         GCPActions.remove_subnet(project_conf['private_subnet_name'], project_conf['region'])
         sys.exit(1)
+
+    if os.environ['edge_is_nat'] == 'true':
+        try:
+            logging.info('[CREATE NAT ROUTE]')
+            print('[REATE NAT ROUTE]')
+            nat_route_name = '{0}-{1}-{2}-nat-route'.format(project_conf['service_base_name'],
+                                                                  project_conf['project_name'],
+                                                                  project_conf['endpoint_name'])
+            edge_instance = GCPMeta.get_instance(project_conf['instance_name'])['selfLink']
+            params = "--nat_route_name {} --vpc {} --tag {} --edge_instance {}".format(nat_route_name,
+                                                                                                       project_conf['vpc_selflink'],
+                                                                                                       project_conf['ps_firewall_target'],
+                                                                                                       edge_instance)
+            try:
+                local("~/scripts/{}.py {}".format('common_create_nat_route', params))
+            except:
+                traceback.print_exc()
+                raise Exception
+        except Exception as err:
+            dlab.fab.append_result("Failed to create nat route.", str(err))
+            GCPActions.remove_instance(project_conf['instance_name'], project_conf['zone'])
+            GCPActions.remove_static_address(project_conf['static_address_name'], project_conf['region'])
+            GCPActions.remove_bucket(project_conf['bucket_name'])
+            GCPActions.remove_firewall(project_conf['fw_edge_ingress_public'])
+            GCPActions.remove_firewall(project_conf['fw_edge_ingress_internal'])
+            GCPActions.remove_firewall(project_conf['fw_edge_egress_public'])
+            GCPActions.remove_firewall(project_conf['fw_edge_egress_internal'])
+            GCPActions.remove_firewall(project_conf['fw_ps_ingress'])
+            GCPActions.remove_firewall(project_conf['fw_ps_egress_private'])
+            GCPActions.remove_firewall(project_conf['fw_ps_egress_public'])
+            GCPActions.remove_service_account(project_conf['ps_service_account_name'],
+                                              project_conf['service_base_name'])
+            GCPActions.remove_role(project_conf['ps_role_name'])
+            GCPActions.remove_service_account(project_conf['edge_service_account_name'],
+                                              project_conf['service_base_name'])
+            GCPActions.remove_role(project_conf['edge_role_name'])
+            GCPActions.remove_subnet(project_conf['private_subnet_name'], project_conf['region'])
+            sys.exit(1)
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/os/jupyter_dataengine_create_configs.py b/infrastructure-provisioning/src/general/scripts/os/jupyter_dataengine_create_configs.py
index 0f007ef..529076a 100644
--- a/infrastructure-provisioning/src/general/scripts/os/jupyter_dataengine_create_configs.py
+++ b/infrastructure-provisioning/src/general/scripts/os/jupyter_dataengine_create_configs.py
@@ -154,17 +154,48 @@ def pyspark_kernel(args):
         format(args.cluster_name, kernel_path, args.os_user))
     local('sudo mv /tmp/{}/kernel_var.json '.format(args.cluster_name) + kernel_path)
 
+def install_sparkamagic_kernels(args):
+    try:
+        local('sudo jupyter nbextension enable --py --sys-prefix widgetsnbextension')
+        sparkmagic_dir = local("sudo pip3 show sparkmagic | grep 'Location: ' | awk '{print $2}'", capture=True)
+        local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkkernel --user'.format(sparkmagic_dir))
+        local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/pysparkkernel --user'.format(sparkmagic_dir))
+        local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkrkernel --user'.format(sparkmagic_dir))
+        pyspark_kernel_name = 'PySpark (Python-3.6 / Spark-{0} ) [{1}]'.format(args.spark_version,
+                                                                         args.cluster_name)
+        local('sed -i \'s|PySpark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/pysparkkernel/kernel.json'.format(
+            pyspark_kernel_name, args.os_user))
+        scala_version = local('spark-submit --version 2>&1 | grep -o -P "Scala version \K.{0,7}"', capture=True)
+        spark_kernel_name = 'Spark (Scala-{0} / Spark-{1} ) [{2}]'.format(scala_version, args.spark_version,
+                                                                         args.cluster_name)
+        local('sed -i \'s|Spark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/sparkkernel/kernel.json'.format(
+            spark_kernel_name, args.os_user))
+        r_version = local("R --version | awk '/version / {print $3}'", capture=True)
+        sparkr_kernel_name = 'SparkR (R-{0} / Spark-{1} ) [{2}]'.format(str(r_version), args.spark_version,
+                                                                            args.cluster_name)
+        local('sed -i \'s|SparkR|{0}|g\' /home/{1}/.local/share/jupyter/kernels/sparkrkernel/kernel.json'.format(
+            sparkr_kernel_name, args.os_user))
+        local('mkdir -p /home/' + args.os_user + '/.sparkmagic')
+        local('cp -f /tmp/sparkmagic_config_template.json /home/' + args.os_user + '/.sparkmagic/config.json')
+        spark_master_ip = args.spark_master.split('//')[1].split(':')[0]
+        local('sed -i \'s|LIVY_HOST|{0}|g\' /home/{1}/.sparkmagic/config.json'.format(
+                spark_master_ip, args.os_user))
+        local('sudo chown -R {0}:{0} /home/{0}/.sparkmagic/'.format(args.os_user))
+    except:
+        sys.exit(1)
+
 
 if __name__ == "__main__":
     if args.dry_run == 'true':
         parser.print_help()
     else:
-        dataengine_dir_prepare('/opt/{}/'.format(args.cluster_name))
-        install_dataengine_spark(args.cluster_name, spark_link, spark_version, hadoop_version, cluster_dir, args.os_user,
-                                 args.datalake_enabled)
-        configure_dataengine_spark(args.cluster_name, local_jars_dir, cluster_dir, args.datalake_enabled,
-                                   args.spark_configurations)
-        pyspark_kernel(args)
-        toree_kernel(args)
-        if args.r_enabled == 'true':
-            r_kernel(args)
+        install_sparkamagic_kernels(args)
+        #dataengine_dir_prepare('/opt/{}/'.format(args.cluster_name))
+        #install_dataengine_spark(args.cluster_name, spark_link, spark_version, hadoop_version, cluster_dir, args.os_user,
+        #                         args.datalake_enabled)
+        #configure_dataengine_spark(args.cluster_name, local_jars_dir, cluster_dir, args.datalake_enabled,
+        #                           args.spark_configurations)
+        #pyspark_kernel(args)
+        #toree_kernel(args)
+        #if args.r_enabled == 'true':
+        #    r_kernel(args)
diff --git a/infrastructure-provisioning/src/general/scripts/os/jupyter_install_dataengine_kernels.py b/infrastructure-provisioning/src/general/scripts/os/jupyter_install_dataengine_kernels.py
index 9b984af..01afe7b 100644
--- a/infrastructure-provisioning/src/general/scripts/os/jupyter_install_dataengine_kernels.py
+++ b/infrastructure-provisioning/src/general/scripts/os/jupyter_install_dataengine_kernels.py
@@ -46,12 +46,13 @@ def configure_notebook(keyfile, hoststring):
     files_dir = '/root/files/'
     scripts_dir = '/root/scripts/'
     run('mkdir -p /tmp/{}/'.format(args.cluster_name))
-    put(templates_dir + 'pyspark_dataengine_template.json', '/tmp/{}/pyspark_dataengine_template.json'.format(args.cluster_name))
-    put(templates_dir + 'r_dataengine_template.json', '/tmp/{}/r_dataengine_template.json'.format(args.cluster_name))
-    put(templates_dir + 'toree_dataengine_template.json','/tmp/{}/toree_dataengine_template.json'.format(args.cluster_name))
-    put(files_dir + 'toree_kernel.tar.gz', '/tmp/{}/toree_kernel.tar.gz'.format(args.cluster_name))
-    put(templates_dir + 'toree_dataengine_template.json', '/tmp/{}/toree_dataengine_template.json'.format(args.cluster_name))
-    put(templates_dir + 'run_template.sh', '/tmp/{}/run_template.sh'.format(args.cluster_name))
+    put(templates_dir + 'sparkmagic_config_template.json', '/tmp/sparkmagic_config_template.json')
+    #put(templates_dir + 'pyspark_dataengine_template.json', '/tmp/{}/pyspark_dataengine_template.json'.format(args.cluster_name))
+    #put(templates_dir + 'r_dataengine_template.json', '/tmp/{}/r_dataengine_template.json'.format(args.cluster_name))
+    #put(templates_dir + 'toree_dataengine_template.json','/tmp/{}/toree_dataengine_template.json'.format(args.cluster_name))
+    #put(files_dir + 'toree_kernel.tar.gz', '/tmp/{}/toree_kernel.tar.gz'.format(args.cluster_name))
+    #put(templates_dir + 'toree_dataengine_template.json', '/tmp/{}/toree_dataengine_template.json'.format(args.cluster_name))
+    #put(templates_dir + 'run_template.sh', '/tmp/{}/run_template.sh'.format(args.cluster_name))
     put(templates_dir + 'notebook_spark-defaults_local.conf', '/tmp/{}/notebook_spark-defaults_local.conf'.format(args.cluster_name))
     spark_master_ip = args.spark_master.split('//')[1].split(':')[0]
     spark_memory = get_spark_memory(True, args.os_user, spark_master_ip, keyfile)
@@ -86,7 +87,7 @@ if __name__ == "__main__":
     create_inactivity_log(args.spark_master_ip, env.host_string)
     sudo('/usr/bin/python /usr/local/bin/jupyter_dataengine_create_configs.py '
          '--cluster_name {} --spark_version {} --hadoop_version {} --os_user {} \
-         --spark_master {} --region {} --datalake_enabled {} --r_enabled {} --spark_configurations "{}"'.
+         --spark_master {} --datalake_enabled {} --r_enabled {} --spark_configurations "{}"'.
          format(args.cluster_name, args.spark_version, args.hadoop_version, args.os_user, args.spark_master,
-                region, args.datalake_enabled, r_enabled, os.environ['spark_configurations']))
+                args.datalake_enabled, r_enabled, os.environ['spark_configurations']))
 
diff --git a/infrastructure-provisioning/src/general/templates/gcp/dataengine-service_cluster.json b/infrastructure-provisioning/src/general/templates/gcp/dataengine-service_cluster.json
index 9f8367d..9391323 100644
--- a/infrastructure-provisioning/src/general/templates/gcp/dataengine-service_cluster.json
+++ b/infrastructure-provisioning/src/general/templates/gcp/dataengine-service_cluster.json
@@ -14,7 +14,10 @@
             },
             "tags": [
                 "CLUSTER_TAG"
-            ]
+            ],
+            "metadata": {
+                "livy-version": "0.6.0",
+            }
         },
         "masterConfig": {
             "numInstances": "NUM_MASTERS",
@@ -38,6 +41,11 @@
         },
         "softwareConfig": {
             "imageVersion": "IMAGE_VERSION"
-        }
+        },
+        "initializationActions": [
+            {
+                "executableFile": "LIVY"
+            }
+        ]
     }
 }
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/files/aws/dataengine_Dockerfile b/infrastructure-provisioning/src/general/templates/os/debian/livy.service
similarity index 50%
copy from infrastructure-provisioning/src/general/files/aws/dataengine_Dockerfile
copy to infrastructure-provisioning/src/general/templates/os/debian/livy.service
index a0be3e1..a37f1f8 100644
--- a/infrastructure-provisioning/src/general/files/aws/dataengine_Dockerfile
+++ b/infrastructure-provisioning/src/general/templates/os/debian/livy.service
@@ -19,24 +19,17 @@
 #
 # ******************************************************************************
 
-FROM docker.dlab-base:latest
+[Unit]
+Description=Apache Livy service
+After=network.target
 
-ARG OS
-
-COPY dataengine/ /root/
-COPY general/scripts/os/dataengine_* /root/scripts/
-COPY general/scripts/os/update_inactivity_on_start.py /root/scripts/
-COPY general/scripts/os/reconfigure_spark.py /root/scripts/
-COPY general/scripts/os/install_additional_libs.py /root/scripts/install_additional_libs.py
-COPY general/scripts/os/get_list_available_pkgs.py /root/scripts/get_list_available_pkgs.py
-COPY general/lib/os/${OS}/notebook_lib.py /usr/lib/python2.7/dlab/notebook_lib.py
-COPY general/scripts/os/common_* /root/scripts/
-COPY general/scripts/aws/dataengine_* /root/scripts/
-COPY general/scripts/os/notebook_reconfigure_dataengine_spark.py /root/scripts/
-COPY general/templates/os/notebook_spark-defaults_local.conf /root/templates/
-COPY general/templates/os/tensorboard.service /root/templates/
-COPY general/templates/os/${OS}/spark-* /root/templates/
-
-RUN chmod a+x /root/fabfile.py; \
-    chmod a+x /root/scripts/*
+[Service]
+Group=OS_USER
+User=OS_USER
+Type=forking
+ExecStart=/opt/livy/bin/livy-server start
+ExecStop=/opt/livy/bin/livy-server stop
+Restart=on-failure
 
+[Install]
+WantedBy=multi-user.target
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/templates/os/livy-env.sh b/infrastructure-provisioning/src/general/templates/os/livy-env.sh
new file mode 100644
index 0000000..e9a5738
--- /dev/null
+++ b/infrastructure-provisioning/src/general/templates/os/livy-env.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# LIVY ENVIRONMENT VARIABLES
+#
+export SPARK_HOME=/opt/spark
+export SPARK_CONF_DIR=/opt/spark/conf
+export LIVY_LOG_DIR=/var/log/livy
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/project/templates/nftables.conf b/infrastructure-provisioning/src/project/templates/nftables.conf
index adde923..f5d4404 100644
--- a/infrastructure-provisioning/src/project/templates/nftables.conf
+++ b/infrastructure-provisioning/src/project/templates/nftables.conf
@@ -17,6 +17,6 @@ table inet filter {
 table ip nat {
         chain postrouting {
                 type nat hook postrouting priority 100; policy accept;
-                ip saddr SUBNET_CIDR oif "eth0" snat to EDGE_IP
+                ip saddr SUBNET_CIDR oif "INTERFACE" snat to EDGE_IP
         }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@dlab.apache.org
For additional commands, e-mail: commits-help@dlab.apache.org