You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2022/09/07 15:12:41 UTC
[incubator-datalab] branch DATALAB-1408 updated (3f54a31f2 -> bd5f36720)
This is an automated email from the ASF dual-hosted git repository.
lfrolov pushed a change to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
from 3f54a31f2 Merge branch 'develop' into DATALAB-1408
new 1e9664da1 [DATALAB-1408]: changed print to logging
new bd5f36720 [DATALAB-1408]: fixed minor bugs, changed zookeeper shape to A2
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.../src/general/lib/azure/actions_lib.py | 8 +-
.../scripts/azure/dataengine-service_configure.py | 90 +++++++++++++++-------
.../scripts/azure/dataengine-service_create.py | 16 ++--
.../scripts/azure/dataengine-service_terminate.py | 2 +-
.../src/general/scripts/azure/project_terminate.py | 3 +-
5 files changed, 79 insertions(+), 40 deletions(-)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org
[incubator-datalab] 02/02: [DATALAB-1408]: fixed minor bugs, changed zookeeper shape to A2
Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
commit bd5f36720bed6ac7fcca3097eac68e8f3d493066
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Wed Sep 7 18:12:30 2022 +0300
[DATALAB-1408]: fixed minor bugs, changed zookeeper shape to A2
---
.../scripts/azure/dataengine-service_configure.py | 90 +++++++++++++++-------
.../scripts/azure/dataengine-service_create.py | 16 ++--
.../scripts/azure/dataengine-service_terminate.py | 2 +-
.../src/general/scripts/azure/project_terminate.py | 3 +-
4 files changed, 75 insertions(+), 36 deletions(-)
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
index 9973c3d5c..ed0a9ab85 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
@@ -21,40 +21,78 @@
#
# ******************************************************************************
+import datalab.actions_lib
+import datalab.fab
+import datalab.meta_lib
+import json
+from datalab.logger import logging
+import multiprocessing
+import os
+import sys
+import traceback
+import subprocess
+from Crypto.PublicKey import RSA
+from fabric import *
+
if __name__ == "__main__":
try:
- data_engine['service_base_name'] = os.environ['conf_service_base_name']
- data_engine['resource_group_name'] = os.environ['azure_resource_group_name']
- data_engine['region'] = os.environ['azure_region']
- data_engine['key_name'] = os.environ['conf_key_name']
- data_engine['vpc_name'] = os.environ['azure_vpc_name']
- data_engine['user_name'] = os.environ['edge_user_name']
- data_engine['project_name'] = os.environ['project_name']
- data_engine['project_tag'] = data_engine['project_name']
- data_engine['endpoint_name'] = os.environ['endpoint_name']
- data_engine['endpoint_tag'] = data_engine['endpoint_name']
- data_engine['master_node_name'] = '{}-m'.format(data_engine['cluster_name'])
- data_engine['key_name'] = os.environ['conf_key_name']
+ AzureMeta = datalab.meta_lib.AzureMeta()
+ AzureActions = datalab.actions_lib.AzureActions()
+ logging.info('Generating infrastructure names and tags')
+ hdinsight_conf = dict()
+ hdinsight_conf['service_base_name'] = os.environ['conf_service_base_name']
+ hdinsight_conf['resource_group_name'] = os.environ['azure_resource_group_name']
+ hdinsight_conf['region'] = os.environ['azure_region']
+ hdinsight_conf['key_name'] = os.environ['conf_key_name']
+ hdinsight_conf['vpc_name'] = os.environ['azure_vpc_name']
+ hdinsight_conf['user_name'] = os.environ['edge_user_name']
+ hdinsight_conf['project_name'] = os.environ['project_name']
+ hdinsight_conf['project_tag'] = hdinsight_conf['project_name']
+ hdinsight_conf['endpoint_name'] = os.environ['endpoint_name']
+ hdinsight_conf['endpoint_tag'] = hdinsight_conf['endpoint_name']
+ hdinsight_conf['key_name'] = os.environ['conf_key_name']
+ hdinsight_conf['hdinsight_master_instance_type'] = os.environ['hdinsight_master_instance_type']
+ hdinsight_conf['hdinsight_slave_instance_type'] = os.environ['hdinsight_slave_instance_type']
if 'computational_name' in os.environ:
- data_engine['computational_name'] = os.environ['computational_name']
+ hdinsight_conf['computational_name'] = os.environ['computational_name']
else:
- data_engine['computational_name'] = ''
- data_engine['cluster_name'] = '{}-{}-{}-des-{}'.format(data_engine['service_base_name'],
- data_engine['project_name'],
- data_engine['endpoint_name'],
- data_engine['computational_name'])
+ hdinsight_conf['computational_name'] = ''
+ hdinsight_conf['cluster_name'] = '{}-{}-{}-des-{}'.format(hdinsight_conf['service_base_name'],
+ hdinsight_conf['project_name'],
+ hdinsight_conf['endpoint_name'],
+ hdinsight_conf['computational_name'])
+ hdinsight_conf['cluster_url'] = 'https://{}.azurehdinsight.net'.format(hdinsight_conf['cluster_name'])
+ hdinsight_conf['cluster_jupyter_url'] = '{}/jupyter/'.format(hdinsight_conf['cluster_url'])
+ hdinsight_conf['cluster_sparkhistory_url'] = '{}/sparkhistory/'.format(hdinsight_conf['cluster_url'])
+ hdinsight_conf['cluster_zeppelin_url'] = '{}/zeppelin/'.format(hdinsight_conf['cluster_url'])
+ logging.info('[SUMMARY]')
+ logging.info("Service base name: {}".format(hdinsight_conf['service_base_name']))
+ logging.info("Region: {}".format(hdinsight_conf['region']))
+ logging.info("Cluster name: {}".format(hdinsight_conf['cluster_name']))
+ logging.info("Master node shape: {}".format(hdinsight_conf['hdinsight_master_instance_type']))
+ logging.info("Slave node shape: {}".format(hdinsight_conf['hdinsight_slave_instance_type']))
+ logging.info("Instance count: {}".format(str(os.environ['hdinsight_count'])))
+ logging.info("URL access username: datalab-user")
+ logging.info("URL access password: {}".format(os.environ['access_password']))
+
with open("/root/result.json", 'w') as result:
- res = {"hostname": data_engine['cluster_name'],
- "instance_id": data_engine['master_node_name'],
- "key_name": data_engine['key_name'],
+ res = {"hostname": hdinsight_conf['cluster_name'],
+ "key_name": hdinsight_conf['key_name'],
"Action": "Create new HDInsight cluster",
"computational_url": [
{"description": "HDInsight cluster",
- "url": "spark_master_access_url"}
- # {"description": "Apache Spark Master (via tunnel)",
- # "url": spark_master_url}
+ "url": hdinsight_conf['cluster_url']},
+ {"description": "Apache Spark History",
+ "url": hdinsight_conf['cluster_sparkhistory_url']},
+ {"description": "Jupyter notebook",
+ "url": hdinsight_conf['cluster_jupyter_url']},
+ {"description": "Zeppelin notebook",
+ "url": hdinsight_conf['cluster_zeppelin_url']}
]
}
result.write(json.dumps(res))
- except:
- pass
\ No newline at end of file
+ except Exception as err:
+ traceback.print_exc()
+ datalab.fab.append_result("Error with writing results", str(err))
+ AzureActions.terminate_hdinsight_cluster(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name'])
+ sys.exit(1)
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
index fd0aca6a0..a754e99e2 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
@@ -25,6 +25,7 @@ import argparse
import json
import sys
import secrets
+import os
from datalab.actions_lib import *
from datalab.meta_lib import *
from datalab.logger import logging
@@ -123,7 +124,7 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
Role(
name="zookeepernode",
target_instance_count=3,
- hardware_profile=HardwareProfile(vm_size="Standard_A4_v2"),
+ hardware_profile=HardwareProfile(vm_size="Standard_A2_v2"),
os_profile=OsProfile(
linux_operating_system_profile=LinuxOperatingSystemProfile(
username=cluster_login_username,
@@ -161,6 +162,7 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
if __name__ == "__main__":
#parser.print_help()
password = secrets.token_urlsafe(20)
+ os.environ['access_password'] = password
params = create_cluster_parameters(args.location, json.loads(args.tags), args.cluster_version, 'datalab-user',
password, args.master_instance_type, args.worker_count,
args.worker_instance_type, args.storage_account_name, args.storage_account_key,
@@ -168,9 +170,9 @@ if __name__ == "__main__":
build_hdinsight_cluster(args.resource_group_name, args.cluster_name, params)
- logfile = '{}_creation.log'.format(args.cluster_name)
- logpath = '/response/' + logfile
- out = open(logpath, 'w')
- out.close()
-
- sys.exit(0)
+ # logfile = '{}_creation.log'.format(args.cluster_name)
+ # logpath = '/response/' + logfile
+ # out = open(logpath, 'w')
+ # out.close()
+ #
+ # sys.exit(0)
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
index 4a168cf9a..1c7701644 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
@@ -67,7 +67,7 @@ if __name__ == "__main__":
logging.info('[TERMINATE HDINSIGHT CLUSTER AND ASSOCIATED RESOURCES]')
try:
cluster = AzureMeta.get_hdinsight_cluster(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name'])
- if cluster.properties.cluster_state == 'Running':
+ if cluster and cluster.properties.cluster_state == 'Running':
AzureActions.terminate_hdinsight_cluster(hdinsight_conf['resource_group_name'],
hdinsight_conf['cluster_name'])
for storage_account in AzureMeta.list_storage_accounts(hdinsight_conf['resource_group_name']):
diff --git a/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py
index bd400ac43..092310b62 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py
@@ -40,8 +40,7 @@ def terminate_edge_node(resource_group_name, service_base_name, project_tag, sub
for cluster in clusters_list:
if "sbn" in cluster.tags and service_base_name == cluster.tags["sbn"] and \
"project" in cluster.tags and cluster.tags['project'] == project_tag:
- print(cluster.name + ' found for termination')
- #AzureActions.terminate_hdinsight_cluster(cluster.name, region)
+ AzureActions.terminate_hdinsight_cluster(resource_group_name, cluster.name)
logging.info('The HDinsight cluster {} has been terminated successfully'.format(cluster.name))
else:
logging.info("There are no HDinsight clusters to terminate.")
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org
[incubator-datalab] 01/02: [DATALAB-1408]: changed print to logging
Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
commit 1e9664da1f1fc4e66ce7bf655a3b38b01293bf13
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Wed Sep 7 16:56:46 2022 +0300
[DATALAB-1408]: changed print to logging
---
infrastructure-provisioning/src/general/lib/azure/actions_lib.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index 1559a7468..3f45aa41d 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -1174,12 +1174,12 @@ class AzureActions:
def create_hdinsight_cluster(self, resource_group_name, cluster_name, cluster_parameters):
try:
- print('Starting to create HDInsight Spark cluster {}'.format(cluster_name))
+ logging.info('Starting to create HDInsight Spark cluster {}'.format(cluster_name))
result = self.hdinsight_client.clusters.begin_create(resource_group_name, cluster_name, cluster_parameters)
cluster = datalab.meta_lib.AzureMeta().get_hdinsight_cluster(resource_group_name, cluster_name)
while cluster.properties.cluster_state != 'Running':
time.sleep(15)
- print('The cluster is being provisioned... Please wait')
+ logging.info('The cluster is being provisioned... Please wait')
cluster = datalab.meta_lib.AzureMeta().get_hdinsight_cluster(resource_group_name, cluster_name)
return result
except Exception as err:
@@ -1193,12 +1193,12 @@ class AzureActions:
def terminate_hdinsight_cluster(self, resource_group_name, cluster_name):
try:
- print('Starting to terminate HDInsight cluster {}'.format(cluster_name))
+ logging.info('Starting to terminate HDInsight cluster {}'.format(cluster_name))
result = self.hdinsight_client.clusters.begin_delete(resource_group_name, cluster_name)
cluster_status = datalab.meta_lib.AzureMeta().get_hdinsight_cluster(resource_group_name, cluster_name)
while cluster_status:
time.sleep(15)
- print('The cluster is being terminated... Please wait')
+ logging.info('The cluster is being terminated... Please wait')
cluster_status = datalab.meta_lib.AzureMeta().get_hdinsight_cluster(resource_group_name, cluster_name)
return result
except Exception as err:
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org