You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2022/09/07 15:12:41 UTC

[incubator-datalab] branch DATALAB-1408 updated (3f54a31f2 -> bd5f36720)

This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a change to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git


    from 3f54a31f2 Merge branch 'develop' into DATALAB-1408
     new 1e9664da1 [DATALAB-1408]: changed print to logging
     new bd5f36720 [DATALAB-1408]: fixed minor bugs, changed zookeeper shape to A2

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../src/general/lib/azure/actions_lib.py           |  8 +-
 .../scripts/azure/dataengine-service_configure.py  | 90 +++++++++++++++-------
 .../scripts/azure/dataengine-service_create.py     | 16 ++--
 .../scripts/azure/dataengine-service_terminate.py  |  2 +-
 .../src/general/scripts/azure/project_terminate.py |  3 +-
 5 files changed, 79 insertions(+), 40 deletions(-)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 02/02: [DATALAB-1408]: fixed minor bugs, changed zookeeper shape to A2

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit bd5f36720bed6ac7fcca3097eac68e8f3d493066
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Wed Sep 7 18:12:30 2022 +0300

    [DATALAB-1408]: fixed minor bugs, changed zookeeper shape to A2
---
 .../scripts/azure/dataengine-service_configure.py  | 90 +++++++++++++++-------
 .../scripts/azure/dataengine-service_create.py     | 16 ++--
 .../scripts/azure/dataengine-service_terminate.py  |  2 +-
 .../src/general/scripts/azure/project_terminate.py |  3 +-
 4 files changed, 75 insertions(+), 36 deletions(-)

diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
index 9973c3d5c..ed0a9ab85 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
@@ -21,40 +21,78 @@
 #
 # ******************************************************************************
 
+import datalab.actions_lib
+import datalab.fab
+import datalab.meta_lib
+import json
+from datalab.logger import logging
+import multiprocessing
+import os
+import sys
+import traceback
+import subprocess
+from Crypto.PublicKey import RSA
+from fabric import *
+
 if __name__ == "__main__":
     try:
-        data_engine['service_base_name'] = os.environ['conf_service_base_name']
-        data_engine['resource_group_name'] = os.environ['azure_resource_group_name']
-        data_engine['region'] = os.environ['azure_region']
-        data_engine['key_name'] = os.environ['conf_key_name']
-        data_engine['vpc_name'] = os.environ['azure_vpc_name']
-        data_engine['user_name'] = os.environ['edge_user_name']
-        data_engine['project_name'] = os.environ['project_name']
-        data_engine['project_tag'] = data_engine['project_name']
-        data_engine['endpoint_name'] = os.environ['endpoint_name']
-        data_engine['endpoint_tag'] = data_engine['endpoint_name']
-        data_engine['master_node_name'] = '{}-m'.format(data_engine['cluster_name'])
-        data_engine['key_name'] = os.environ['conf_key_name']
+        AzureMeta = datalab.meta_lib.AzureMeta()
+        AzureActions = datalab.actions_lib.AzureActions()
+        logging.info('Generating infrastructure names and tags')
+        hdinsight_conf = dict()
+        hdinsight_conf['service_base_name'] = os.environ['conf_service_base_name']
+        hdinsight_conf['resource_group_name'] = os.environ['azure_resource_group_name']
+        hdinsight_conf['region'] = os.environ['azure_region']
+        hdinsight_conf['key_name'] = os.environ['conf_key_name']
+        hdinsight_conf['vpc_name'] = os.environ['azure_vpc_name']
+        hdinsight_conf['user_name'] = os.environ['edge_user_name']
+        hdinsight_conf['project_name'] = os.environ['project_name']
+        hdinsight_conf['project_tag'] = hdinsight_conf['project_name']
+        hdinsight_conf['endpoint_name'] = os.environ['endpoint_name']
+        hdinsight_conf['endpoint_tag'] = hdinsight_conf['endpoint_name']
+        hdinsight_conf['key_name'] = os.environ['conf_key_name']
+        hdinsight_conf['hdinsight_master_instance_type'] = os.environ['hdinsight_master_instance_type']
+        hdinsight_conf['hdinsight_slave_instance_type'] = os.environ['hdinsight_slave_instance_type']
         if 'computational_name' in os.environ:
-            data_engine['computational_name'] = os.environ['computational_name']
+            hdinsight_conf['computational_name'] = os.environ['computational_name']
         else:
-            data_engine['computational_name'] = ''
-        data_engine['cluster_name'] = '{}-{}-{}-des-{}'.format(data_engine['service_base_name'],
-                                                              data_engine['project_name'],
-                                                              data_engine['endpoint_name'],
-                                                              data_engine['computational_name'])
+            hdinsight_conf['computational_name'] = ''
+        hdinsight_conf['cluster_name'] = '{}-{}-{}-des-{}'.format(hdinsight_conf['service_base_name'],
+                                                               hdinsight_conf['project_name'],
+                                                               hdinsight_conf['endpoint_name'],
+                                                               hdinsight_conf['computational_name'])
+        hdinsight_conf['cluster_url'] = 'https://{}.azurehdinsight.net'.format(hdinsight_conf['cluster_name'])
+        hdinsight_conf['cluster_jupyter_url'] = '{}/jupyter/'.format(hdinsight_conf['cluster_url'])
+        hdinsight_conf['cluster_sparkhistory_url'] = '{}/sparkhistory/'.format(hdinsight_conf['cluster_url'])
+        hdinsight_conf['cluster_zeppelin_url'] = '{}/zeppelin/'.format(hdinsight_conf['cluster_url'])
+        logging.info('[SUMMARY]')
+        logging.info("Service base name: {}".format(hdinsight_conf['service_base_name']))
+        logging.info("Region: {}".format(hdinsight_conf['region']))
+        logging.info("Cluster name: {}".format(hdinsight_conf['cluster_name']))
+        logging.info("Master node shape: {}".format(hdinsight_conf['hdinsight_master_instance_type']))
+        logging.info("Slave node shape: {}".format(hdinsight_conf['hdinsight_slave_instance_type']))
+        logging.info("Instance count: {}".format(str(os.environ['hdinsight_count'])))
+        logging.info("URL access username: datalab-user")
+        logging.info("URL access password: {}".format(os.environ['access_password']))
+
         with open("/root/result.json", 'w') as result:
-            res = {"hostname": data_engine['cluster_name'],
-                   "instance_id": data_engine['master_node_name'],
-                   "key_name": data_engine['key_name'],
+            res = {"hostname": hdinsight_conf['cluster_name'],
+                   "key_name": hdinsight_conf['key_name'],
                    "Action": "Create new HDInsight cluster",
                    "computational_url": [
                        {"description": "HDInsight cluster",
-                        "url": "spark_master_access_url"}
-                       # {"description": "Apache Spark Master (via tunnel)",
-                       # "url": spark_master_url}
+                        "url": hdinsight_conf['cluster_url']},
+                       {"description": "Apache Spark History",
+                        "url": hdinsight_conf['cluster_sparkhistory_url']},
+                       {"description": "Jupyter notebook",
+                        "url": hdinsight_conf['cluster_jupyter_url']},
+                       {"description": "Zeppelin notebook",
+                        "url": hdinsight_conf['cluster_zeppelin_url']}
                    ]
                    }
             result.write(json.dumps(res))
-    except:
-        pass
\ No newline at end of file
+    except Exception as err:
+        traceback.print_exc()
+        datalab.fab.append_result("Error with writing results", str(err))
+        AzureActions.terminate_hdinsight_cluster(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name'])
+        sys.exit(1)
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
index fd0aca6a0..a754e99e2 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
@@ -25,6 +25,7 @@ import argparse
 import json
 import sys
 import secrets
+import os
 from datalab.actions_lib import *
 from datalab.meta_lib import *
 from datalab.logger import logging
@@ -123,7 +124,7 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
                     Role(
                         name="zookeepernode",
                         target_instance_count=3,
-                        hardware_profile=HardwareProfile(vm_size="Standard_A4_v2"),
+                        hardware_profile=HardwareProfile(vm_size="Standard_A2_v2"),
                         os_profile=OsProfile(
                             linux_operating_system_profile=LinuxOperatingSystemProfile(
                                 username=cluster_login_username,
@@ -161,6 +162,7 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
 if __name__ == "__main__":
     #parser.print_help()
     password = secrets.token_urlsafe(20)
+    os.environ['access_password'] = password
     params = create_cluster_parameters(args.location, json.loads(args.tags), args.cluster_version, 'datalab-user',
                                        password, args.master_instance_type, args.worker_count,
                                        args.worker_instance_type, args.storage_account_name, args.storage_account_key,
@@ -168,9 +170,9 @@ if __name__ == "__main__":
 
     build_hdinsight_cluster(args.resource_group_name, args.cluster_name, params)
 
-    logfile = '{}_creation.log'.format(args.cluster_name)
-    logpath = '/response/' + logfile
-    out = open(logpath, 'w')
-    out.close()
-
-    sys.exit(0)
+    # logfile = '{}_creation.log'.format(args.cluster_name)
+    # logpath = '/response/' + logfile
+    # out = open(logpath, 'w')
+    # out.close()
+    #
+    # sys.exit(0)
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
index 4a168cf9a..1c7701644 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
@@ -67,7 +67,7 @@ if __name__ == "__main__":
         logging.info('[TERMINATE HDINSIGHT CLUSTER AND ASSOCIATED RESOURCES]')
         try:
             cluster = AzureMeta.get_hdinsight_cluster(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name'])
-            if cluster.properties.cluster_state == 'Running':
+            if cluster and cluster.properties.cluster_state == 'Running':
                 AzureActions.terminate_hdinsight_cluster(hdinsight_conf['resource_group_name'],
                                                          hdinsight_conf['cluster_name'])
                 for storage_account in AzureMeta.list_storage_accounts(hdinsight_conf['resource_group_name']):
diff --git a/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py
index bd400ac43..092310b62 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py
@@ -40,8 +40,7 @@ def terminate_edge_node(resource_group_name, service_base_name, project_tag, sub
             for cluster in clusters_list:
                 if "sbn" in cluster.tags and service_base_name == cluster.tags["sbn"] and \
                         "project" in cluster.tags and cluster.tags['project'] == project_tag:
-                    print(cluster.name + ' found for termination')
-                    #AzureActions.terminate_hdinsight_cluster(cluster.name, region)
+                    AzureActions.terminate_hdinsight_cluster(resource_group_name, cluster.name)
                     logging.info('The HDinsight cluster {} has been terminated successfully'.format(cluster.name))
         else:
             logging.info("There are no HDinsight clusters to terminate.")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 01/02: [DATALAB-1408]: changed print to logging

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit 1e9664da1f1fc4e66ce7bf655a3b38b01293bf13
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Wed Sep 7 16:56:46 2022 +0300

    [DATALAB-1408]: changed print to logging
---
 infrastructure-provisioning/src/general/lib/azure/actions_lib.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index 1559a7468..3f45aa41d 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -1174,12 +1174,12 @@ class AzureActions:
 
     def create_hdinsight_cluster(self, resource_group_name, cluster_name, cluster_parameters):
         try:
-            print('Starting to create HDInsight Spark cluster {}'.format(cluster_name))
+            logging.info('Starting to create HDInsight Spark cluster {}'.format(cluster_name))
             result = self.hdinsight_client.clusters.begin_create(resource_group_name, cluster_name, cluster_parameters)
             cluster = datalab.meta_lib.AzureMeta().get_hdinsight_cluster(resource_group_name, cluster_name)
             while cluster.properties.cluster_state != 'Running':
                 time.sleep(15)
-                print('The cluster is being provisioned... Please wait')
+                logging.info('The cluster is being provisioned... Please wait')
                 cluster = datalab.meta_lib.AzureMeta().get_hdinsight_cluster(resource_group_name, cluster_name)
             return result
         except Exception as err:
@@ -1193,12 +1193,12 @@ class AzureActions:
 
     def terminate_hdinsight_cluster(self, resource_group_name, cluster_name):
         try:
-            print('Starting to terminate HDInsight cluster {}'.format(cluster_name))
+            logging.info('Starting to terminate HDInsight cluster {}'.format(cluster_name))
             result = self.hdinsight_client.clusters.begin_delete(resource_group_name, cluster_name)
             cluster_status = datalab.meta_lib.AzureMeta().get_hdinsight_cluster(resource_group_name, cluster_name)
             while cluster_status:
                 time.sleep(15)
-                print('The cluster is being terminated... Please wait')
+                logging.info('The cluster is being terminated... Please wait')
                 cluster_status = datalab.meta_lib.AzureMeta().get_hdinsight_cluster(resource_group_name, cluster_name)
             return result
         except Exception as err:


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org