You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2022/09/01 07:12:02 UTC
[incubator-datalab] 33/36: [DATALAB-2982]: added new files for hdinsight
This is an automated email from the ASF dual-hosted git repository.
lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
commit c8092d2e2f0c552a702ba285c3bd907b43311183
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Tue Aug 16 11:15:32 2022 +0300
[DATALAB-2982]: added new files for hdinsight
---
.../files/azure/dataengine-service_Dockerfile | 39 +++++++++
.../azure/dataengine-service_description.json | 27 ++++++
.../scripts/azure/dataengine-service_configure.py | 60 +++++++++++++
.../scripts/azure/dataengine-service_prepare.py | 4 +-
.../scripts/azure/dataengine-service_terminate.py | 99 ++++++++++++++++++++++
5 files changed, 227 insertions(+), 2 deletions(-)
diff --git a/infrastructure-provisioning/src/general/files/azure/dataengine-service_Dockerfile b/infrastructure-provisioning/src/general/files/azure/dataengine-service_Dockerfile
new file mode 100644
index 000000000..2b443239b
--- /dev/null
+++ b/infrastructure-provisioning/src/general/files/azure/dataengine-service_Dockerfile
@@ -0,0 +1,39 @@
+# *****************************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# ******************************************************************************
+
+FROM docker.datalab-base:latest
+
+ARG OS
+
+COPY dataengine-service/fabfile.py /root/
+COPY dataengine-service/description.json /root/
+COPY general/scripts/azure/dataengine-service_* /root/scripts/
+COPY general/lib/os/${OS}/notebook_lib.py /usr/lib/python3.8/datalab/notebook_lib.py
+COPY general/scripts/os/common_* /root/scripts/
+COPY general/scripts/os/install_additional_libs.py /root/scripts/install_additional_libs.py
+COPY general/scripts/os/get_list_available_pkgs.py /root/scripts/get_list_available_pkgs.py
+COPY general/templates/os/inactive.sh /root/templates/
+COPY general/templates/os/inactive.service /root/templates/
+COPY general/templates/os/inactive.timer /root/templates/
+
+RUN chmod a+x /root/fabfile.py; \
+ chmod a+x /root/scripts/*
+
diff --git a/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json b/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json
new file mode 100644
index 000000000..d8ad248e5
--- /dev/null
+++ b/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json
@@ -0,0 +1,27 @@
+{
+ "template_name": "HDInsight cluster",
+ "description": "HDInsight cluster",
+ "environment_type": "computational",
+ "computation_resources_shapes":
+ {
+ "For testing" : [
+ {"Size": "S", "Description": "Standard_F4s", "Type": "Standard_F4s","Ram": "8.0 GB","Cpu": "4"}
+ ],
+ "Memory optimized" : [
+ {"Size": "S", "Description": "Standard_E4s_v3", "Type": "Standard_E4s_v3","Ram": "32 GB","Cpu": "4"},
+ {"Size": "M", "Description": "Standard_E16s_v3", "Type": "Standard_E16s_v3","Ram": "128 GB","Cpu": "16"},
+ {"Size": "L", "Description": "Standard_E32s_v3", "Type": "Standard_E32s_v3","Ram": "256 GB","Cpu": "32"}
+ ],
+ "Compute optimized": [
+ {"Size": "S", "Description": "Standard_F4s", "Type": "Standard_F4s","Ram": "8.0 GB","Cpu": "4"},
+ {"Size": "M", "Description": "Standard_F8s", "Type": "Standard_F8s","Ram": "16.0 GB","Cpu": "8"},
+ {"Size": "L", "Description": "Standard_F16s", "Type": "Standard_F16s","Ram": "32.0 GB","Cpu": "16"}
+ ],
+ "GPU optimized": [
+ {"Size": "S", "Description": "Standard_NC6", "Type": "Standard_NC6","Ram": "56.0 GB","Cpu": "6"}
+ ]
+ },
+ "templates":
+ [
+ ]
+}
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
index e69de29bb..9973c3d5c 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
@@ -0,0 +1,60 @@
+#!/usr/bin/python3
+
+# *****************************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# ******************************************************************************
+
+if __name__ == "__main__":
+ try:
+ data_engine['service_base_name'] = os.environ['conf_service_base_name']
+ data_engine['resource_group_name'] = os.environ['azure_resource_group_name']
+ data_engine['region'] = os.environ['azure_region']
+ data_engine['key_name'] = os.environ['conf_key_name']
+ data_engine['vpc_name'] = os.environ['azure_vpc_name']
+ data_engine['user_name'] = os.environ['edge_user_name']
+ data_engine['project_name'] = os.environ['project_name']
+ data_engine['project_tag'] = data_engine['project_name']
+ data_engine['endpoint_name'] = os.environ['endpoint_name']
+ data_engine['endpoint_tag'] = data_engine['endpoint_name']
+ data_engine['master_node_name'] = '{}-m'.format(data_engine['cluster_name'])
+ data_engine['key_name'] = os.environ['conf_key_name']
+ if 'computational_name' in os.environ:
+ data_engine['computational_name'] = os.environ['computational_name']
+ else:
+ data_engine['computational_name'] = ''
+ data_engine['cluster_name'] = '{}-{}-{}-des-{}'.format(data_engine['service_base_name'],
+ data_engine['project_name'],
+ data_engine['endpoint_name'],
+ data_engine['computational_name'])
+ with open("/root/result.json", 'w') as result:
+ res = {"hostname": data_engine['cluster_name'],
+ "instance_id": data_engine['master_node_name'],
+ "key_name": data_engine['key_name'],
+ "Action": "Create new HDInsight cluster",
+ "computational_url": [
+ {"description": "HDInsight cluster",
+ "url": "spark_master_access_url"}
+ # {"description": "Apache Spark Master (via tunnel)",
+ # "url": spark_master_url}
+ ]
+ }
+ result.write(json.dumps(res))
+ except:
+ pass
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
index 140495368..177275f78 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
@@ -103,5 +103,5 @@ def create_cluster_parameters():
)
if __name__ == "__main__":
- params = create_cluster_parameters()
- create_hdinsight_cluster(RESOURCE_GROUP_NAME,CLUSTER_NAME, params)
\ No newline at end of file
+ #params = create_cluster_parameters()
+ #create_hdinsight_cluster(RESOURCE_GROUP_NAME,CLUSTER_NAME, params)
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
new file mode 100644
index 000000000..02466e026
--- /dev/null
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
@@ -0,0 +1,99 @@
+#!/usr/bin/python3
+
+# *****************************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# ******************************************************************************
+
+import boto3
+import datalab.actions_lib
+import datalab.fab
+import datalab.meta_lib
+import json
+import os
+import sys
+import traceback
+from datalab.logger import logging
+
+
+def terminate_hdin_cluster(hdin_name, bucket_name, tag_name, nb_tag_value, ssh_user, key_path):
+ logging.info('Terminating hdin cluster and cleaning hdin config from S3 bucket')
+ # try:
+ # clusters_list = datalab.meta_lib.get_hdin_list(hdin_name, 'Value')
+ # if clusters_list:
+ # for cluster_id in clusters_list:
+ # computational_name = ''
+ # client = boto3.client('hdin')
+ # cluster = client.describe_cluster(ClusterId=cluster_id)
+ # cluster = cluster.get("Cluster")
+ # hdin_name = cluster.get('Name')
+ # hdin_version = cluster.get('ReleaseLabel')
+ # for tag in cluster.get('Tags'):
+ # if tag.get('Key') == 'ComputationalName':
+ # computational_name = tag.get('Value')
+ # datalab.actions_lib.s3_cleanup(bucket_name, hdin_name, os.environ['project_name'])
+ # print("The bucket {} has been cleaned successfully".format(bucket_name))
+ # datalab.actions_lib.terminate_hdin(cluster_id)
+ # print("The hdin cluster {} has been terminated successfully".format(hdin_name))
+ # print("Removing hdin kernels from notebook")
+ # datalab.actions_lib.remove_kernels(hdin_name, tag_name, nb_tag_value, ssh_user, key_path,
+ # hdin_version, computational_name)
+ # else:
+ # logging.info("There are no hdin clusters to terminate.")
+ except:
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ # generating variables dictionary
+ datalab.actions_lib.create_aws_config_files()
+ logging.info('Generating infrastructure names and tags')
+ hdin_conf = dict()
+ hdin_conf['service_base_name'] = (os.environ['conf_service_base_name'])
+ hdin_conf['hdin_name'] = os.environ['computational_name']
+ hdin_conf['notebook_name'] = os.environ['notebook_instance_name']
+ hdin_conf['project_name'] = os.environ['project_name']
+ hdin_conf['endpoint_name'] = os.environ['endpoint_name']
+ hdin_conf['bucket_name'] = '{0}-{1}-{2}-bucket'.format(hdin_conf['service_base_name'], hdin_conf['project_name'],
+ hdin_conf['endpoint_name']).lower().replace('_', '-')
+ hdin_conf['key_path'] = os.environ['conf_key_dir'] + '/' + os.environ['conf_key_name'] + '.pem'
+ hdin_conf['tag_name'] = hdin_conf['service_base_name'] + '-tag'
+
+ # try:
+ # logging.info('[TERMINATE hdin CLUSTER]')
+ # try:
+ # terminate_hdin_cluster(hdin_conf['hdin_name'], hdin_conf['bucket_name'], hdin_conf['tag_name'],
+ # hdin_conf['notebook_name'], os.environ['conf_os_user'], hdin_conf['key_path'])
+ # except Exception as err:
+ # traceback.print_exc()
+ # datalab.fab.append_result("Failed to terminate hdin cluster.", str(err))
+ # raise Exception
+ # except:
+ # sys.exit(1)
+
+ try:
+ with open("/root/result.json", 'w') as result:
+ res = {"dataengine-service_name": hdin_conf['hdin_name'],
+ "notebook_name": hdin_conf['notebook_name'],
+ "Action": "Terminate HDInsight cluster"}
+ print(json.dumps(res))
+ result.write(json.dumps(res))
+ except Exception as err:
+ datalab.fab.append_result("Error with writing results", str(err))
+ sys.exit(1)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org