You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2021/04/09 14:57:55 UTC

[incubator-datalab] branch DATALAB-2091 updated: [DATALAB-2091]: fixed error during notebook creation from custom ami

This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-2091
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git


The following commit(s) were added to refs/heads/DATALAB-2091 by this push:
     new ec298a6  [DATALAB-2091]: fixed error during notebook creation from custom ami
ec298a6 is described below

commit ec298a6b468bc6573c9c162bb23e9e4ee038cf95
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Fri Apr 9 17:57:43 2021 +0300

    [DATALAB-2091]: fixed error during notebook creation from custom ami
---
 .../src/general/lib/aws/actions_lib.py             | 86 +++++++++++-----------
 .../src/general/lib/os/fab.py                      | 10 +--
 .../scripts/aws/common_remove_remote_kernels.py    |  6 +-
 3 files changed, 49 insertions(+), 53 deletions(-)

diff --git a/infrastructure-provisioning/src/general/lib/aws/actions_lib.py b/infrastructure-provisioning/src/general/lib/aws/actions_lib.py
index 58b6032..631437a 100644
--- a/infrastructure-provisioning/src/general/lib/aws/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/aws/actions_lib.py
@@ -1254,26 +1254,24 @@ def remove_kernels(emr_name, tag_name, nb_tag_value, ssh_user, key_path, emr_ver
         if instances:
             for instance in instances:
                 private = getattr(instance, 'private_dns_name')
-                env.hosts = "{}".format(private)
-                env.user = "{}".format(ssh_user)
-                env.key_filename = "{}".format(key_path)
-                env.host_string = env.user + "@" + env.hosts
-                datalab.fab.conn.sudo('rm -rf /home/{}/.local/share/jupyter/kernels/*_{}'.format(ssh_user, emr_name))
-                if exists(datalab.fab.conn, '/home/{}/.ensure_dir/dataengine-service_{}_interpreter_ensured'.format(ssh_user, emr_name)):
+                global con
+                con = datalab.fab.init_datalab_connection(private, ssh_user, key_path)
+                con.sudo('rm -rf /home/{}/.local/share/jupyter/kernels/*_{}'.format(ssh_user, emr_name))
+                if exists(con, '/home/{}/.ensure_dir/dataengine-service_{}_interpreter_ensured'.format(ssh_user, emr_name)):
                     if os.environ['notebook_multiple_clusters'] == 'true':
                         try:
-                            livy_port = datalab.fab.conn.sudo("cat /opt/" + emr_version + "/" + emr_name +
+                            livy_port = con.sudo("cat /opt/" + emr_version + "/" + emr_name +
                                              "/livy/conf/livy.conf | grep livy.server.port | tail -n 1 | "
                                              "awk '{printf $3}'").stdout.replace('\n','')
-                            process_number = datalab.fab.conn.sudo("netstat -natp 2>/dev/null | grep ':" + livy_port +
+                            process_number = con.sudo("netstat -natp 2>/dev/null | grep ':" + livy_port +
                                                   "' | awk '{print $7}' | sed 's|/.*||g'").stdout.replace('\n','')
-                            datalab.fab.conn.sudo('kill -9 ' + process_number)
-                            datalab.fab.conn.sudo('systemctl disable livy-server-' + livy_port)
+                            con.sudo('kill -9 ' + process_number)
+                            con.sudo('systemctl disable livy-server-' + livy_port)
                         except:
                             print("Wasn't able to find Livy server for this EMR!")
-                    datalab.fab.conn.sudo('sed -i \"s/^export SPARK_HOME.*/export SPARK_HOME=\/opt\/spark/\" '
+                    con.sudo('sed -i \"s/^export SPARK_HOME.*/export SPARK_HOME=\/opt\/spark/\" '
                          '/opt/zeppelin/conf/zeppelin-env.sh')
-                    datalab.fab.conn.sudo("rm -rf /home/{}/.ensure_dir/dataengine-service_interpreter_ensure".format(ssh_user))
+                    con.sudo("rm -rf /home/{}/.ensure_dir/dataengine-service_interpreter_ensure".format(ssh_user))
                     zeppelin_url = 'http://' + private + ':8080/api/interpreter/setting/'
                     opener = urllib3.build_opener(urllib3.ProxyHandler({}))
                     req = opener.open(urllib3.Request(zeppelin_url))
@@ -1288,24 +1286,25 @@ def remove_kernels(emr_name, tag_name, nb_tag_value, ssh_user, key_path, emr_ver
                             request.get_method = lambda: 'DELETE'
                             url = opener.open(request)
                             print(url.read())
-                    datalab.fab.conn.sudo('chown ' + ssh_user + ':' + ssh_user + ' -R /opt/zeppelin/')
-                    datalab.fab.conn.sudo('systemctl daemon-reload')
-                    datalab.fab.conn.sudo("service zeppelin-notebook stop")
-                    datalab.fab.conn.sudo("service zeppelin-notebook start")
+                    con.sudo('chown ' + ssh_user + ':' + ssh_user + ' -R /opt/zeppelin/')
+                    con.sudo('systemctl daemon-reload')
+                    con.sudo("service zeppelin-notebook stop")
+                    con.sudo("service zeppelin-notebook start")
                     zeppelin_restarted = False
                     while not zeppelin_restarted:
-                        datalab.fab.conn.sudo('sleep 5')
-                        result = datalab.fab.conn.sudo('nmap -p 8080 localhost | grep "closed" > /dev/null; echo $?').stdout
+                        con.sudo('sleep 5')
+                        result = con.sudo('nmap -p 8080 localhost | grep "closed" > /dev/null; echo $?').stdout
                         result = result[:1]
                         if result == '1':
                             zeppelin_restarted = True
-                    datalab.fab.conn.sudo('sleep 5')
-                    datalab.fab.conn.sudo('rm -rf /home/{}/.ensure_dir/dataengine-service_{}_interpreter_ensured'.format(ssh_user,
+                    con.sudo('sleep 5')
+                    con.sudo('rm -rf /home/{}/.ensure_dir/dataengine-service_{}_interpreter_ensured'.format(ssh_user,
                                                                                                         emr_name))
-                if exists(datalab.fab.conn, '/home/{}/.ensure_dir/rstudio_dataengine-service_ensured'.format(ssh_user)):
+                if exists(con, '/home/{}/.ensure_dir/rstudio_dataengine-service_ensured'.format(ssh_user)):
                     datalab.fab.remove_rstudio_dataengines_kernel(computational_name, ssh_user)
-                datalab.fab.conn.sudo('rm -rf  /opt/' + emr_version + '/' + emr_name + '/')
+                con.sudo('rm -rf  /opt/' + emr_version + '/' + emr_name + '/')
                 print("Notebook's {} kernels were removed".format(env.hosts))
+                con.close()
         else:
             print("There are no notebooks to clean kernels.")
     except Exception as err:
@@ -1866,25 +1865,23 @@ def configure_dataengine_spark(cluster_name, jars_dir, cluster_dir, datalake_ena
 def remove_dataengine_kernels(tag_name, notebook_name, os_user, key_path, cluster_name):
     try:
         private = datalab.meta_lib.get_instance_private_ip_address(tag_name, notebook_name)
-        env.hosts = "{}".format(private)
-        env.user = "{}".format(os_user)
-        env.key_filename = "{}".format(key_path)
-        env.host_string = env.user + "@" + env.hosts
-        datalab.fab.conn.sudo('rm -rf /home/{}/.local/share/jupyter/kernels/*_{}'.format(os_user, cluster_name))
-        if exists(datalab.fab.conn, '/home/{}/.ensure_dir/dataengine_{}_interpreter_ensured'.format(os_user, cluster_name)):
+        global con
+        con = datalab.fab.init_datalab_connection(private, os_user, key_path)
+        con.sudo('rm -rf /home/{}/.local/share/jupyter/kernels/*_{}'.format(os_user, cluster_name))
+        if exists(con, '/home/{}/.ensure_dir/dataengine_{}_interpreter_ensured'.format(os_user, cluster_name)):
             if os.environ['notebook_multiple_clusters'] == 'true':
                 try:
-                    livy_port = datalab.fab.conn.sudo("cat /opt/" + cluster_name +
+                    livy_port = con.sudo("cat /opt/" + cluster_name +
                                      "/livy/conf/livy.conf | grep livy.server.port | tail -n 1 | awk '{printf $3}'").stdout.replace('\n','')
-                    process_number = datalab.fab.conn.sudo("netstat -natp 2>/dev/null | grep ':" + livy_port +
+                    process_number = con.sudo("netstat -natp 2>/dev/null | grep ':" + livy_port +
                                           "' | awk '{print $7}' | sed 's|/.*||g'").stdout.replace('\n','')
-                    datalab.fab.conn.sudo('kill -9 ' + process_number)
-                    datalab.fab.conn.sudo('systemctl disable livy-server-' + livy_port)
+                    con.sudo('kill -9 ' + process_number)
+                    con.sudo('systemctl disable livy-server-' + livy_port)
                 except:
                     print("Wasn't able to find Livy server for this EMR!")
-            datalab.fab.conn.sudo(
+            con.sudo(
                 'sed -i \"s/^export SPARK_HOME.*/export SPARK_HOME=\/opt\/spark/\" /opt/zeppelin/conf/zeppelin-env.sh')
-            datalab.fab.conn.sudo("rm -rf /home/{}/.ensure_dir/dataengine_interpreter_ensure".format(os_user))
+            con.sudo("rm -rf /home/{}/.ensure_dir/dataengine_interpreter_ensure".format(os_user))
             zeppelin_url = 'http://' + private + ':8080/api/interpreter/setting/'
             opener = urllib3.build_opener(urllib3.ProxyHandler({}))
             req = opener.open(urllib3.Request(zeppelin_url))
@@ -1899,23 +1896,24 @@ def remove_dataengine_kernels(tag_name, notebook_name, os_user, key_path, cluste
                     request.get_method = lambda: 'DELETE'
                     url = opener.open(request)
                     print(url.read())
-            datalab.fab.conn.sudo('chown ' + os_user + ':' + os_user + ' -R /opt/zeppelin/')
-            datalab.fab.conn.sudo('systemctl daemon-reload')
-            datalab.fab.conn.sudo("service zeppelin-notebook stop")
-            datalab.fab.conn.sudo("service zeppelin-notebook start")
+            con.sudo('chown ' + os_user + ':' + os_user + ' -R /opt/zeppelin/')
+            con.sudo('systemctl daemon-reload')
+            con.sudo("service zeppelin-notebook stop")
+            con.sudo("service zeppelin-notebook start")
             zeppelin_restarted = False
             while not zeppelin_restarted:
-                datalab.fab.conn.sudo('sleep 5')
-                result = datalab.fab.conn.sudo('nmap -p 8080 localhost | grep "closed" > /dev/null; echo $?').stdout
+                con.sudo('sleep 5')
+                result = con.sudo('nmap -p 8080 localhost | grep "closed" > /dev/null; echo $?').stdout
                 result = result[:1]
                 if result == '1':
                     zeppelin_restarted = True
-            datalab.fab.conn.sudo('sleep 5')
-            datalab.fab.conn.sudo('rm -rf /home/{}/.ensure_dir/dataengine_{}_interpreter_ensured'.format(os_user, cluster_name))
-        if exists(datalab.fab.conn, '/home/{}/.ensure_dir/rstudio_dataengine_ensured'.format(os_user)):
+            con.sudo('sleep 5')
+            con.sudo('rm -rf /home/{}/.ensure_dir/dataengine_{}_interpreter_ensured'.format(os_user, cluster_name))
+        if exists(con, '/home/{}/.ensure_dir/rstudio_dataengine_ensured'.format(os_user)):
             datalab.fab.remove_rstudio_dataengines_kernel(os.environ['computational_name'], os_user)
-        datalab.fab.conn.sudo('rm -rf  /opt/' + cluster_name + '/')
+        con.sudo('rm -rf  /opt/' + cluster_name + '/')
         print("Notebook's {} kernels were removed".format(env.hosts))
+        con.close()
     except Exception as err:
         logging.info("Unable to remove kernels on Notebook: " + str(err) + "\n Traceback: " + traceback.print_exc(
             file=sys.stdout))
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py b/infrastructure-provisioning/src/general/lib/os/fab.py
index fddbd58..4b9c66b 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -879,13 +879,11 @@ def update_pyopenssl_lib(os_user):
 
 def find_cluster_kernels():
     try:
-        with settings(sudo_user='root'):
-            de = [i for i in conn.sudo('find /opt/ -maxdepth 1 -name "*-de-*" -type d | rev | '
-                                  'cut -f 1 -d "/" | rev | xargs -r').split(' ') if i != '']
-            des =  [i for i in conn.sudo('find /opt/ -maxdepth 2 -name "*-des-*" -type d | rev | '
-                                    'cut -f 1,2 -d "/" | rev | xargs -r').split(' ') if i != '']
+        de = [i for i in conn.sudo('''bash -l -c 'find /opt/ -maxdepth 1 -name "*-de-*" -type d | rev | cut -f 1 -d "/" | rev | xargs -r' ''').stdout.replace('\n', '').split(' ') if i != '']
+        des =  [i for i in conn.sudo('''bash -l -c 'find /opt/ -maxdepth 2 -name "*-des-*" -type d | rev | cut -f 1,2 -d "/" | rev | xargs -r' ''').stdout.replace('\n', '').split(' ') if i != '']
         return (de, des)
-    except:
+    except Exception as err:
+        print('Failed to find cluster kernels.', str(err))
         sys.exit(1)
 
 
diff --git a/infrastructure-provisioning/src/general/scripts/aws/common_remove_remote_kernels.py b/infrastructure-provisioning/src/general/scripts/aws/common_remove_remote_kernels.py
index f903a92..e1af215 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/common_remove_remote_kernels.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/common_remove_remote_kernels.py
@@ -48,8 +48,8 @@ if __name__ == "__main__":
         for cluster in des_clusters:
             remove_kernels(cluster.split('/')[1], args.nb_tag_name, args.nb_tag_value,
                            args.os_user, args.keyfile, cluster.split('/')[0])
+        conn.close()
+        sys.exit(0)
     except Exception as err:
         print('Failed to remove cluster kernels.', str(err))
-        sys.exit(1)
-    conn.close()
-    sys.exit(0)
\ No newline at end of file
+        sys.exit(1)
\ No newline at end of file

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org