You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2022/09/01 07:12:02 UTC

[incubator-datalab] 33/36: [DATALAB-2982]: added new files for hdinsight

This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit c8092d2e2f0c552a702ba285c3bd907b43311183
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Tue Aug 16 11:15:32 2022 +0300

    [DATALAB-2982]: added new files for hdinsight
---
 .../files/azure/dataengine-service_Dockerfile      | 39 +++++++++
 .../azure/dataengine-service_description.json      | 27 ++++++
 .../scripts/azure/dataengine-service_configure.py  | 60 +++++++++++++
 .../scripts/azure/dataengine-service_prepare.py    |  4 +-
 .../scripts/azure/dataengine-service_terminate.py  | 99 ++++++++++++++++++++++
 5 files changed, 227 insertions(+), 2 deletions(-)

diff --git a/infrastructure-provisioning/src/general/files/azure/dataengine-service_Dockerfile b/infrastructure-provisioning/src/general/files/azure/dataengine-service_Dockerfile
new file mode 100644
index 000000000..2b443239b
--- /dev/null
+++ b/infrastructure-provisioning/src/general/files/azure/dataengine-service_Dockerfile
@@ -0,0 +1,39 @@
+# *****************************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# ******************************************************************************
+
+FROM docker.datalab-base:latest
+
+ARG OS
+
+COPY dataengine-service/fabfile.py /root/
+COPY dataengine-service/description.json /root/
+COPY general/scripts/azure/dataengine-service_* /root/scripts/
+COPY general/lib/os/${OS}/notebook_lib.py /usr/lib/python3.8/datalab/notebook_lib.py
+COPY general/scripts/os/common_* /root/scripts/
+COPY general/scripts/os/install_additional_libs.py /root/scripts/install_additional_libs.py
+COPY general/scripts/os/get_list_available_pkgs.py /root/scripts/get_list_available_pkgs.py
+COPY general/templates/os/inactive.sh /root/templates/
+COPY general/templates/os/inactive.service /root/templates/
+COPY general/templates/os/inactive.timer /root/templates/
+
+RUN chmod a+x /root/fabfile.py; \
+    chmod a+x /root/scripts/*
+
diff --git a/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json b/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json
new file mode 100644
index 000000000..d8ad248e5
--- /dev/null
+++ b/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json
@@ -0,0 +1,27 @@
+{
+  "template_name": "HDInsight cluster",
+  "description": "HDInsight cluster",
+  "environment_type": "computational",
+    "computation_resources_shapes":
+    {
+      "For testing" : [
+        {"Size": "S", "Description": "Standard_F4s", "Type": "Standard_F4s","Ram": "8.0 GB","Cpu": "4"}
+      ],
+      "Memory optimized" : [
+        {"Size": "S", "Description": "Standard_E4s_v3", "Type": "Standard_E4s_v3","Ram": "32 GB","Cpu": "4"},
+        {"Size": "M", "Description": "Standard_E16s_v3", "Type": "Standard_E16s_v3","Ram": "128 GB","Cpu": "16"},
+        {"Size": "L", "Description": "Standard_E32s_v3", "Type": "Standard_E32s_v3","Ram": "256 GB","Cpu": "32"}
+      ],
+      "Compute optimized": [
+        {"Size": "S", "Description": "Standard_F4s", "Type": "Standard_F4s","Ram": "8.0 GB","Cpu": "4"},
+        {"Size": "M", "Description": "Standard_F8s", "Type": "Standard_F8s","Ram": "16.0 GB","Cpu": "8"},
+        {"Size": "L", "Description": "Standard_F16s", "Type": "Standard_F16s","Ram": "32.0 GB","Cpu": "16"}
+      ],
+      "GPU optimized": [
+        {"Size": "S", "Description": "Standard_NC6", "Type": "Standard_NC6","Ram": "56.0 GB","Cpu": "6"}
+      ]
+    },
+  "templates":
+  [
+  ]
+}
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
index e69de29bb..9973c3d5c 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
@@ -0,0 +1,60 @@
+#!/usr/bin/python3
+
+# *****************************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# ******************************************************************************
+
+if __name__ == "__main__":
+    try:
+        data_engine['service_base_name'] = os.environ['conf_service_base_name']
+        data_engine['resource_group_name'] = os.environ['azure_resource_group_name']
+        data_engine['region'] = os.environ['azure_region']
+        data_engine['key_name'] = os.environ['conf_key_name']
+        data_engine['vpc_name'] = os.environ['azure_vpc_name']
+        data_engine['user_name'] = os.environ['edge_user_name']
+        data_engine['project_name'] = os.environ['project_name']
+        data_engine['project_tag'] = data_engine['project_name']
+        data_engine['endpoint_name'] = os.environ['endpoint_name']
+        data_engine['endpoint_tag'] = data_engine['endpoint_name']
+        data_engine['master_node_name'] = '{}-m'.format(data_engine['cluster_name'])
+        data_engine['key_name'] = os.environ['conf_key_name']
+        if 'computational_name' in os.environ:
+            data_engine['computational_name'] = os.environ['computational_name']
+        else:
+            data_engine['computational_name'] = ''
+        data_engine['cluster_name'] = '{}-{}-{}-des-{}'.format(data_engine['service_base_name'],
+                                                              data_engine['project_name'],
+                                                              data_engine['endpoint_name'],
+                                                              data_engine['computational_name'])
+        with open("/root/result.json", 'w') as result:
+            res = {"hostname": data_engine['cluster_name'],
+                   "instance_id": data_engine['master_node_name'],
+                   "key_name": data_engine['key_name'],
+                   "Action": "Create new HDInsight cluster",
+                   "computational_url": [
+                       {"description": "HDInsight cluster",
+                        "url": "spark_master_access_url"}
+                       # {"description": "Apache Spark Master (via tunnel)",
+                       # "url": spark_master_url}
+                   ]
+                   }
+            result.write(json.dumps(res))
+    except:
+        pass
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
index 140495368..177275f78 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
@@ -103,5 +103,5 @@ def create_cluster_parameters():
     )
 
 if __name__ == "__main__":
-    params = create_cluster_parameters()
-    create_hdinsight_cluster(RESOURCE_GROUP_NAME,CLUSTER_NAME, params)
\ No newline at end of file
+    #params = create_cluster_parameters()
+    #create_hdinsight_cluster(RESOURCE_GROUP_NAME,CLUSTER_NAME, params)
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
new file mode 100644
index 000000000..02466e026
--- /dev/null
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
@@ -0,0 +1,99 @@
+#!/usr/bin/python3
+
+# *****************************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# ******************************************************************************
+
+import boto3
+import datalab.actions_lib
+import datalab.fab
+import datalab.meta_lib
+import json
+import os
+import sys
+import traceback
+from datalab.logger import logging
+
+
+def terminate_hdin_cluster(hdin_name, bucket_name, tag_name, nb_tag_value, ssh_user, key_path):
+    logging.info('Terminating hdin cluster and cleaning hdin config from S3 bucket')
+    # try:
+    #     clusters_list = datalab.meta_lib.get_hdin_list(hdin_name, 'Value')
+    #     if clusters_list:
+    #         for cluster_id in clusters_list:
+    #             computational_name = ''
+    #             client = boto3.client('hdin')
+    #             cluster = client.describe_cluster(ClusterId=cluster_id)
+    #             cluster = cluster.get("Cluster")
+    #             hdin_name = cluster.get('Name')
+    #             hdin_version = cluster.get('ReleaseLabel')
+    #             for tag in cluster.get('Tags'):
+    #                 if tag.get('Key') == 'ComputationalName':
+    #                     computational_name = tag.get('Value')
+    #             datalab.actions_lib.s3_cleanup(bucket_name, hdin_name, os.environ['project_name'])
+    #             print("The bucket {} has been cleaned successfully".format(bucket_name))
+    #             datalab.actions_lib.terminate_hdin(cluster_id)
+    #             print("The hdin cluster {} has been terminated successfully".format(hdin_name))
+    #             print("Removing hdin kernels from notebook")
+    #             datalab.actions_lib.remove_kernels(hdin_name, tag_name, nb_tag_value, ssh_user, key_path,
+    #                                                hdin_version, computational_name)
+    #     else:
+    #         logging.info("There are no hdin clusters to terminate.")
+    except:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    # generating variables dictionary
+    datalab.actions_lib.create_aws_config_files()
+    logging.info('Generating infrastructure names and tags')
+    hdin_conf = dict()
+    hdin_conf['service_base_name'] = (os.environ['conf_service_base_name'])
+    hdin_conf['hdin_name'] = os.environ['computational_name']
+    hdin_conf['notebook_name'] = os.environ['notebook_instance_name']
+    hdin_conf['project_name'] = os.environ['project_name']
+    hdin_conf['endpoint_name'] = os.environ['endpoint_name']
+    hdin_conf['bucket_name'] = '{0}-{1}-{2}-bucket'.format(hdin_conf['service_base_name'], hdin_conf['project_name'],
+                                                           hdin_conf['endpoint_name']).lower().replace('_', '-')
+    hdin_conf['key_path'] = os.environ['conf_key_dir'] + '/' + os.environ['conf_key_name'] + '.pem'
+    hdin_conf['tag_name'] = hdin_conf['service_base_name'] + '-tag'
+
+    # try:
+    #     logging.info('[TERMINATE hdin CLUSTER]')
+    #     try:
+    #         terminate_hdin_cluster(hdin_conf['hdin_name'], hdin_conf['bucket_name'], hdin_conf['tag_name'],
+    #                               hdin_conf['notebook_name'], os.environ['conf_os_user'], hdin_conf['key_path'])
+    #     except Exception as err:
+    #         traceback.print_exc()
+    #         datalab.fab.append_result("Failed to terminate hdin cluster.", str(err))
+    #         raise Exception
+    # except:
+    #     sys.exit(1)
+
+    try:
+        with open("/root/result.json", 'w') as result:
+            res = {"dataengine-service_name": hdin_conf['hdin_name'],
+                   "notebook_name": hdin_conf['notebook_name'],
+                   "Action": "Terminate HDInsight cluster"}
+            print(json.dumps(res))
+            result.write(json.dumps(res))
+    except Exception as err:
+        datalab.fab.append_result("Error with writing results", str(err))
+        sys.exit(1)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org