You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2022/09/07 15:12:43 UTC

[incubator-datalab] 02/02: [DATALAB-1408]: fixed minor bugs, changed zookeeper shape to A2

This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit bd5f36720bed6ac7fcca3097eac68e8f3d493066
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Wed Sep 7 18:12:30 2022 +0300

    [DATALAB-1408]: fixed minor bugs, changed zookeeper shape to A2
---
 .../scripts/azure/dataengine-service_configure.py  | 90 +++++++++++++++-------
 .../scripts/azure/dataengine-service_create.py     | 16 ++--
 .../scripts/azure/dataengine-service_terminate.py  |  2 +-
 .../src/general/scripts/azure/project_terminate.py |  3 +-
 4 files changed, 75 insertions(+), 36 deletions(-)

diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
index 9973c3d5c..ed0a9ab85 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
@@ -21,40 +21,78 @@
 #
 # ******************************************************************************
 
+import datalab.actions_lib
+import datalab.fab
+import datalab.meta_lib
+import json
+from datalab.logger import logging
+import multiprocessing
+import os
+import sys
+import traceback
+import subprocess
+from Crypto.PublicKey import RSA
+from fabric import *
+
 if __name__ == "__main__":
     try:
-        data_engine['service_base_name'] = os.environ['conf_service_base_name']
-        data_engine['resource_group_name'] = os.environ['azure_resource_group_name']
-        data_engine['region'] = os.environ['azure_region']
-        data_engine['key_name'] = os.environ['conf_key_name']
-        data_engine['vpc_name'] = os.environ['azure_vpc_name']
-        data_engine['user_name'] = os.environ['edge_user_name']
-        data_engine['project_name'] = os.environ['project_name']
-        data_engine['project_tag'] = data_engine['project_name']
-        data_engine['endpoint_name'] = os.environ['endpoint_name']
-        data_engine['endpoint_tag'] = data_engine['endpoint_name']
-        data_engine['master_node_name'] = '{}-m'.format(data_engine['cluster_name'])
-        data_engine['key_name'] = os.environ['conf_key_name']
+        AzureMeta = datalab.meta_lib.AzureMeta()
+        AzureActions = datalab.actions_lib.AzureActions()
+        logging.info('Generating infrastructure names and tags')
+        hdinsight_conf = dict()
+        hdinsight_conf['service_base_name'] = os.environ['conf_service_base_name']
+        hdinsight_conf['resource_group_name'] = os.environ['azure_resource_group_name']
+        hdinsight_conf['region'] = os.environ['azure_region']
+        hdinsight_conf['key_name'] = os.environ['conf_key_name']
+        hdinsight_conf['vpc_name'] = os.environ['azure_vpc_name']
+        hdinsight_conf['user_name'] = os.environ['edge_user_name']
+        hdinsight_conf['project_name'] = os.environ['project_name']
+        hdinsight_conf['project_tag'] = hdinsight_conf['project_name']
+        hdinsight_conf['endpoint_name'] = os.environ['endpoint_name']
+        hdinsight_conf['endpoint_tag'] = hdinsight_conf['endpoint_name']
+        hdinsight_conf['key_name'] = os.environ['conf_key_name']
+        hdinsight_conf['hdinsight_master_instance_type'] = os.environ['hdinsight_master_instance_type']
+        hdinsight_conf['hdinsight_slave_instance_type'] = os.environ['hdinsight_slave_instance_type']
         if 'computational_name' in os.environ:
-            data_engine['computational_name'] = os.environ['computational_name']
+            hdinsight_conf['computational_name'] = os.environ['computational_name']
         else:
-            data_engine['computational_name'] = ''
-        data_engine['cluster_name'] = '{}-{}-{}-des-{}'.format(data_engine['service_base_name'],
-                                                              data_engine['project_name'],
-                                                              data_engine['endpoint_name'],
-                                                              data_engine['computational_name'])
+            hdinsight_conf['computational_name'] = ''
+        hdinsight_conf['cluster_name'] = '{}-{}-{}-des-{}'.format(hdinsight_conf['service_base_name'],
+                                                               hdinsight_conf['project_name'],
+                                                               hdinsight_conf['endpoint_name'],
+                                                               hdinsight_conf['computational_name'])
+        hdinsight_conf['cluster_url'] = 'https://{}.azurehdinsight.net'.format(hdinsight_conf['cluster_name'])
+        hdinsight_conf['cluster_jupyter_url'] = '{}/jupyter/'.format(hdinsight_conf['cluster_url'])
+        hdinsight_conf['cluster_sparkhistory_url'] = '{}/sparkhistory/'.format(hdinsight_conf['cluster_url'])
+        hdinsight_conf['cluster_zeppelin_url'] = '{}/zeppelin/'.format(hdinsight_conf['cluster_url'])
+        logging.info('[SUMMARY]')
+        logging.info("Service base name: {}".format(hdinsight_conf['service_base_name']))
+        logging.info("Region: {}".format(hdinsight_conf['region']))
+        logging.info("Cluster name: {}".format(hdinsight_conf['cluster_name']))
+        logging.info("Master node shape: {}".format(hdinsight_conf['hdinsight_master_instance_type']))
+        logging.info("Slave node shape: {}".format(hdinsight_conf['hdinsight_slave_instance_type']))
+        logging.info("Instance count: {}".format(str(os.environ['hdinsight_count'])))
+        logging.info("URL access username: datalab-user")
+        logging.info("URL access password: {}".format(os.environ['access_password']))
+
         with open("/root/result.json", 'w') as result:
-            res = {"hostname": data_engine['cluster_name'],
-                   "instance_id": data_engine['master_node_name'],
-                   "key_name": data_engine['key_name'],
+            res = {"hostname": hdinsight_conf['cluster_name'],
+                   "key_name": hdinsight_conf['key_name'],
                    "Action": "Create new HDInsight cluster",
                    "computational_url": [
                        {"description": "HDInsight cluster",
-                        "url": "spark_master_access_url"}
-                       # {"description": "Apache Spark Master (via tunnel)",
-                       # "url": spark_master_url}
+                        "url": hdinsight_conf['cluster_url']},
+                       {"description": "Apache Spark History",
+                        "url": hdinsight_conf['cluster_sparkhistory_url']},
+                       {"description": "Jupyter notebook",
+                        "url": hdinsight_conf['cluster_jupyter_url']},
+                       {"description": "Zeppelin notebook",
+                        "url": hdinsight_conf['cluster_zeppelin_url']}
                    ]
                    }
             result.write(json.dumps(res))
-    except:
-        pass
\ No newline at end of file
+    except Exception as err:
+        traceback.print_exc()
+        datalab.fab.append_result("Error with writing results", str(err))
+        AzureActions.terminate_hdinsight_cluster(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name'])
+        sys.exit(1)
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
index fd0aca6a0..a754e99e2 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
@@ -25,6 +25,7 @@ import argparse
 import json
 import sys
 import secrets
+import os
 from datalab.actions_lib import *
 from datalab.meta_lib import *
 from datalab.logger import logging
@@ -123,7 +124,7 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
                     Role(
                         name="zookeepernode",
                         target_instance_count=3,
-                        hardware_profile=HardwareProfile(vm_size="Standard_A4_v2"),
+                        hardware_profile=HardwareProfile(vm_size="Standard_A2_v2"),
                         os_profile=OsProfile(
                             linux_operating_system_profile=LinuxOperatingSystemProfile(
                                 username=cluster_login_username,
@@ -161,6 +162,7 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
 if __name__ == "__main__":
     #parser.print_help()
     password = secrets.token_urlsafe(20)
+    os.environ['access_password'] = password
     params = create_cluster_parameters(args.location, json.loads(args.tags), args.cluster_version, 'datalab-user',
                                        password, args.master_instance_type, args.worker_count,
                                        args.worker_instance_type, args.storage_account_name, args.storage_account_key,
@@ -168,9 +170,9 @@ if __name__ == "__main__":
 
     build_hdinsight_cluster(args.resource_group_name, args.cluster_name, params)
 
-    logfile = '{}_creation.log'.format(args.cluster_name)
-    logpath = '/response/' + logfile
-    out = open(logpath, 'w')
-    out.close()
-
-    sys.exit(0)
+    # logfile = '{}_creation.log'.format(args.cluster_name)
+    # logpath = '/response/' + logfile
+    # out = open(logpath, 'w')
+    # out.close()
+    #
+    # sys.exit(0)
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
index 4a168cf9a..1c7701644 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
@@ -67,7 +67,7 @@ if __name__ == "__main__":
         logging.info('[TERMINATE HDINSIGHT CLUSTER AND ASSOCIATED RESOURCES]')
         try:
             cluster = AzureMeta.get_hdinsight_cluster(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name'])
-            if cluster.properties.cluster_state == 'Running':
+            if cluster and cluster.properties.cluster_state == 'Running':
                 AzureActions.terminate_hdinsight_cluster(hdinsight_conf['resource_group_name'],
                                                          hdinsight_conf['cluster_name'])
                 for storage_account in AzureMeta.list_storage_accounts(hdinsight_conf['resource_group_name']):
diff --git a/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py
index bd400ac43..092310b62 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py
@@ -40,8 +40,7 @@ def terminate_edge_node(resource_group_name, service_base_name, project_tag, sub
             for cluster in clusters_list:
                 if "sbn" in cluster.tags and service_base_name == cluster.tags["sbn"] and \
                         "project" in cluster.tags and cluster.tags['project'] == project_tag:
-                    print(cluster.name + ' found for termination')
-                    #AzureActions.terminate_hdinsight_cluster(cluster.name, region)
+                    AzureActions.terminate_hdinsight_cluster(resource_group_name, cluster.name)
                     logging.info('The HDinsight cluster {} has been terminated successfully'.format(cluster.name))
         else:
             logging.info("There are no HDinsight clusters to terminate.")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org