You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2022/09/07 15:12:43 UTC
[incubator-datalab] 02/02: [DATALAB-1408]: fixed minor bugs, changed zookeeper shape to A2
This is an automated email from the ASF dual-hosted git repository.
lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
commit bd5f36720bed6ac7fcca3097eac68e8f3d493066
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Wed Sep 7 18:12:30 2022 +0300
[DATALAB-1408]: fixed minor bugs, changed zookeeper shape to A2
---
.../scripts/azure/dataengine-service_configure.py | 90 +++++++++++++++-------
.../scripts/azure/dataengine-service_create.py | 16 ++--
.../scripts/azure/dataengine-service_terminate.py | 2 +-
.../src/general/scripts/azure/project_terminate.py | 3 +-
4 files changed, 75 insertions(+), 36 deletions(-)
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
index 9973c3d5c..ed0a9ab85 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
@@ -21,40 +21,78 @@
#
# ******************************************************************************
+import datalab.actions_lib
+import datalab.fab
+import datalab.meta_lib
+import json
+from datalab.logger import logging
+import multiprocessing
+import os
+import sys
+import traceback
+import subprocess
+from Crypto.PublicKey import RSA
+from fabric import *
+
if __name__ == "__main__":
try:
- data_engine['service_base_name'] = os.environ['conf_service_base_name']
- data_engine['resource_group_name'] = os.environ['azure_resource_group_name']
- data_engine['region'] = os.environ['azure_region']
- data_engine['key_name'] = os.environ['conf_key_name']
- data_engine['vpc_name'] = os.environ['azure_vpc_name']
- data_engine['user_name'] = os.environ['edge_user_name']
- data_engine['project_name'] = os.environ['project_name']
- data_engine['project_tag'] = data_engine['project_name']
- data_engine['endpoint_name'] = os.environ['endpoint_name']
- data_engine['endpoint_tag'] = data_engine['endpoint_name']
- data_engine['master_node_name'] = '{}-m'.format(data_engine['cluster_name'])
- data_engine['key_name'] = os.environ['conf_key_name']
+ AzureMeta = datalab.meta_lib.AzureMeta()
+ AzureActions = datalab.actions_lib.AzureActions()
+ logging.info('Generating infrastructure names and tags')
+ hdinsight_conf = dict()
+ hdinsight_conf['service_base_name'] = os.environ['conf_service_base_name']
+ hdinsight_conf['resource_group_name'] = os.environ['azure_resource_group_name']
+ hdinsight_conf['region'] = os.environ['azure_region']
+ hdinsight_conf['key_name'] = os.environ['conf_key_name']
+ hdinsight_conf['vpc_name'] = os.environ['azure_vpc_name']
+ hdinsight_conf['user_name'] = os.environ['edge_user_name']
+ hdinsight_conf['project_name'] = os.environ['project_name']
+ hdinsight_conf['project_tag'] = hdinsight_conf['project_name']
+ hdinsight_conf['endpoint_name'] = os.environ['endpoint_name']
+ hdinsight_conf['endpoint_tag'] = hdinsight_conf['endpoint_name']
+ hdinsight_conf['key_name'] = os.environ['conf_key_name']
+ hdinsight_conf['hdinsight_master_instance_type'] = os.environ['hdinsight_master_instance_type']
+ hdinsight_conf['hdinsight_slave_instance_type'] = os.environ['hdinsight_slave_instance_type']
if 'computational_name' in os.environ:
- data_engine['computational_name'] = os.environ['computational_name']
+ hdinsight_conf['computational_name'] = os.environ['computational_name']
else:
- data_engine['computational_name'] = ''
- data_engine['cluster_name'] = '{}-{}-{}-des-{}'.format(data_engine['service_base_name'],
- data_engine['project_name'],
- data_engine['endpoint_name'],
- data_engine['computational_name'])
+ hdinsight_conf['computational_name'] = ''
+ hdinsight_conf['cluster_name'] = '{}-{}-{}-des-{}'.format(hdinsight_conf['service_base_name'],
+ hdinsight_conf['project_name'],
+ hdinsight_conf['endpoint_name'],
+ hdinsight_conf['computational_name'])
+ hdinsight_conf['cluster_url'] = 'https://{}.azurehdinsight.net'.format(hdinsight_conf['cluster_name'])
+ hdinsight_conf['cluster_jupyter_url'] = '{}/jupyter/'.format(hdinsight_conf['cluster_url'])
+ hdinsight_conf['cluster_sparkhistory_url'] = '{}/sparkhistory/'.format(hdinsight_conf['cluster_url'])
+ hdinsight_conf['cluster_zeppelin_url'] = '{}/zeppelin/'.format(hdinsight_conf['cluster_url'])
+ logging.info('[SUMMARY]')
+ logging.info("Service base name: {}".format(hdinsight_conf['service_base_name']))
+ logging.info("Region: {}".format(hdinsight_conf['region']))
+ logging.info("Cluster name: {}".format(hdinsight_conf['cluster_name']))
+ logging.info("Master node shape: {}".format(hdinsight_conf['hdinsight_master_instance_type']))
+ logging.info("Slave node shape: {}".format(hdinsight_conf['hdinsight_slave_instance_type']))
+ logging.info("Instance count: {}".format(str(os.environ['hdinsight_count'])))
+ logging.info("URL access username: datalab-user")
+ logging.info("URL access password: {}".format(os.environ['access_password']))
+
with open("/root/result.json", 'w') as result:
- res = {"hostname": data_engine['cluster_name'],
- "instance_id": data_engine['master_node_name'],
- "key_name": data_engine['key_name'],
+ res = {"hostname": hdinsight_conf['cluster_name'],
+ "key_name": hdinsight_conf['key_name'],
"Action": "Create new HDInsight cluster",
"computational_url": [
{"description": "HDInsight cluster",
- "url": "spark_master_access_url"}
- # {"description": "Apache Spark Master (via tunnel)",
- # "url": spark_master_url}
+ "url": hdinsight_conf['cluster_url']},
+ {"description": "Apache Spark History",
+ "url": hdinsight_conf['cluster_sparkhistory_url']},
+ {"description": "Jupyter notebook",
+ "url": hdinsight_conf['cluster_jupyter_url']},
+ {"description": "Zeppelin notebook",
+ "url": hdinsight_conf['cluster_zeppelin_url']}
]
}
result.write(json.dumps(res))
- except:
- pass
\ No newline at end of file
+ except Exception as err:
+ traceback.print_exc()
+ datalab.fab.append_result("Error with writing results", str(err))
+ AzureActions.terminate_hdinsight_cluster(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name'])
+ sys.exit(1)
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
index fd0aca6a0..a754e99e2 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
@@ -25,6 +25,7 @@ import argparse
import json
import sys
import secrets
+import os
from datalab.actions_lib import *
from datalab.meta_lib import *
from datalab.logger import logging
@@ -123,7 +124,7 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
Role(
name="zookeepernode",
target_instance_count=3,
- hardware_profile=HardwareProfile(vm_size="Standard_A4_v2"),
+ hardware_profile=HardwareProfile(vm_size="Standard_A2_v2"),
os_profile=OsProfile(
linux_operating_system_profile=LinuxOperatingSystemProfile(
username=cluster_login_username,
@@ -161,6 +162,7 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
if __name__ == "__main__":
#parser.print_help()
password = secrets.token_urlsafe(20)
+ os.environ['access_password'] = password
params = create_cluster_parameters(args.location, json.loads(args.tags), args.cluster_version, 'datalab-user',
password, args.master_instance_type, args.worker_count,
args.worker_instance_type, args.storage_account_name, args.storage_account_key,
@@ -168,9 +170,9 @@ if __name__ == "__main__":
build_hdinsight_cluster(args.resource_group_name, args.cluster_name, params)
- logfile = '{}_creation.log'.format(args.cluster_name)
- logpath = '/response/' + logfile
- out = open(logpath, 'w')
- out.close()
-
- sys.exit(0)
+ # logfile = '{}_creation.log'.format(args.cluster_name)
+ # logpath = '/response/' + logfile
+ # out = open(logpath, 'w')
+ # out.close()
+ #
+ # sys.exit(0)
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
index 4a168cf9a..1c7701644 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
@@ -67,7 +67,7 @@ if __name__ == "__main__":
logging.info('[TERMINATE HDINSIGHT CLUSTER AND ASSOCIATED RESOURCES]')
try:
cluster = AzureMeta.get_hdinsight_cluster(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name'])
- if cluster.properties.cluster_state == 'Running':
+ if cluster and cluster.properties.cluster_state == 'Running':
AzureActions.terminate_hdinsight_cluster(hdinsight_conf['resource_group_name'],
hdinsight_conf['cluster_name'])
for storage_account in AzureMeta.list_storage_accounts(hdinsight_conf['resource_group_name']):
diff --git a/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py
index bd400ac43..092310b62 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py
@@ -40,8 +40,7 @@ def terminate_edge_node(resource_group_name, service_base_name, project_tag, sub
for cluster in clusters_list:
if "sbn" in cluster.tags and service_base_name == cluster.tags["sbn"] and \
"project" in cluster.tags and cluster.tags['project'] == project_tag:
- print(cluster.name + ' found for termination')
- #AzureActions.terminate_hdinsight_cluster(cluster.name, region)
+ AzureActions.terminate_hdinsight_cluster(resource_group_name, cluster.name)
logging.info('The HDinsight cluster {} has been terminated successfully'.format(cluster.name))
else:
logging.info("There are no HDinsight clusters to terminate.")
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org