You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2022/10/19 10:02:46 UTC

[incubator-datalab] branch DATALAB-3079 created (now 28592bee6)

This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a change to branch DATALAB-3079
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git


      at 28592bee6 [DATALAB-3079]: added attachment of edge and shared storage accounts to hdinsight cluster

This branch includes the following new commits:

     new 28592bee6 [DATALAB-3079]: added attachment of edge and shared storage accounts to hdinsight cluster

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 01/01: [DATALAB-3079]: added attachment of edge and shared storage accounts to hdinsight cluster

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-3079
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit 28592bee6aedcb25eb239f6abd08e5c61ecd5588
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Wed Oct 19 13:02:27 2022 +0300

    [DATALAB-3079]: added attachment of edge and shared storage accounts to hdinsight cluster
---
 .../scripts/azure/dataengine-service_create.py     | 41 +++++++++++----
 .../scripts/azure/dataengine-service_prepare.py    | 61 ++++++++++++++++------
 2 files changed, 75 insertions(+), 27 deletions(-)

diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
index b4075318b..b2e2c8b62 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
@@ -42,9 +42,15 @@ parser.add_argument('--location', type=str, help='')
 parser.add_argument('--master_instance_type', type=str, help='')
 parser.add_argument('--worker_instance_type', type=str, help='')
 parser.add_argument('--worker_count', type=str, help='')
-parser.add_argument('--storage_account_name', type=str, help='')
-parser.add_argument('--storage_account_key', type=str, help='')
-parser.add_argument('--container_name', type=str, help='')
+parser.add_argument('--cluster_storage_account_name', type=str, help='')
+parser.add_argument('--cluster_storage_account_key', type=str, help='')
+parser.add_argument('--cluster_container_name', type=str, help='')
+parser.add_argument('--edge_storage_account_name', type=str, help='')
+parser.add_argument('--edge_storage_account_key', type=str, help='')
+parser.add_argument('--edge_container_name', type=str, help='')
+parser.add_argument('--shared_storage_account_name', type=str, help='')
+parser.add_argument('--shared_storage_account_key', type=str, help='')
+parser.add_argument('--shared_container_name', type=str, help='')
 parser.add_argument('--tags', type=str, help='')
 parser.add_argument('--public_key', type=str, help='')
 parser.add_argument('--vpc_id', type=str, help='')
@@ -59,8 +65,10 @@ def build_hdinsight_cluster(resource_group_name, cluster_name, params):
 
 
 def create_cluster_parameters(location, tags, cluster_version, cluster_login_username, password, master_instance_type,
-                              worker_count, worker_instance_type, storage_account_name, storage_account_key,
-                              container_name, public_key, vpc_id, subnet):
+                              worker_count, worker_instance_type, cluster_storage_account_name, cluster_storage_account_key,
+                              cluster_container_name, public_key, vpc_id, subnet,
+                              edge_storage_account_name, edge_storage_account_key, edge_container_name,
+                              shared_storage_account_name, shared_storage_account_key, shared_container_name):
 
     # Returns cluster parameters
 
@@ -149,10 +157,20 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
             storage_profile=StorageProfile(
                 storageaccounts=[
                     StorageAccount(
-                        name=storage_account_name + ".blob.core.windows.net",
-                        key=storage_account_key,
-                        container=container_name.lower(),
+                        name=cluster_storage_account_name + ".blob.core.windows.net",
+                        key=cluster_storage_account_key,
+                        container=cluster_container_name.lower(),
                         is_default=True
+                    ),
+                    StorageAccount(
+                        name=edge_storage_account_name + ".blob.core.windows.net",
+                        key=edge_storage_account_key,
+                        container=edge_container_name.lower()
+                    ),
+                    StorageAccount(
+                        name=shared_storage_account_name + ".blob.core.windows.net",
+                        key=shared_storage_account_key,
+                        container=shared_container_name.lower()
                     )
                 ]
             )
@@ -167,8 +185,11 @@ if __name__ == "__main__":
     #parser.print_help()
     params = create_cluster_parameters(args.location, json.loads(args.tags), args.cluster_version, 'datalab-user',
                                        args.access_password, args.master_instance_type, args.worker_count,
-                                       args.worker_instance_type, args.storage_account_name, args.storage_account_key,
-                                       args.container_name, args.public_key, args.vpc_id, args.subnet)
+                                       args.worker_instance_type, args.cluster_storage_account_name, args.cluster_storage_account_key,
+                                       args.cluster_container_name, args.public_key, args.vpc_id, args.subnet,
+                                       args.edge_storage_account_name, args.edge_storage_account_key,
+                                       args.edge_container_name, args.shared_storage_account_name,
+                                       args.shared_storage_account_key, args.shared_container_name)
 
     build_hdinsight_cluster(args.resource_group_name, args.cluster_name, params)
 
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
index 89a6f6d4f..c57d0ff75 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
@@ -87,14 +87,28 @@ if __name__ == "__main__":
         hdinsight_conf['release_label'] = os.environ['hdinsight_version']
         key = RSA.importKey(open(hdinsight_conf['key_path'], 'rb').read())
         ssh_admin_pubkey = key.publickey().exportKey("OpenSSH").decode('UTF-8')
-        hdinsight_conf['container_name'] = ('{}-bucket'.format(hdinsight_conf['cluster_name'])).lower()
-        hdinsight_conf['storage_account_name_tag'] = ('{}-bucket'.format(hdinsight_conf['cluster_name'])).lower()
-        hdinsight_conf['storage_account_tags'] = {"Name": hdinsight_conf['storage_account_name_tag'],
+        hdinsight_conf['cluster_container_name'] = ('{}-bucket'.format(hdinsight_conf['cluster_name'])).lower()
+        hdinsight_conf['cluster_storage_account_name_tag'] = ('{}-bucket'.format(hdinsight_conf['cluster_name'])).lower()
+        hdinsight_conf['cluster_storage_account_tags'] = {"Name": hdinsight_conf['cluster_storage_account_name_tag'],
                                                   "SBN": hdinsight_conf['service_base_name'],
                                                   "project_tag": hdinsight_conf['project_name'],
                                                   "endpoint_tag": hdinsight_conf['endpoint_name'],
                                                   os.environ['conf_billing_tag_key']: os.environ['conf_billing_tag_value'],
-                                                  hdinsight_conf['tag_name']: hdinsight_conf['storage_account_name_tag']}
+                                                  hdinsight_conf['tag_name']: hdinsight_conf['cluster_storage_account_name_tag']}
+
+        hdinsight_conf['edge_storage_account_name'] = ('{0}-{1}-{2}-bucket'.format(hdinsight_conf['service_base_name'],
+                                                                                   hdinsight_conf['project_name'],
+                                                                                   hdinsight_conf['endpoint_name'])).lower()
+        hdinsight_conf['edge_container_name'] = ('{0}-{1}-{2}-bucket'.format(hdinsight_conf['service_base_name'],
+                                                                             hdinsight_conf['project_name'],
+                                                                             hdinsight_conf['endpoint_name'])).lower()
+        hdinsight_conf['edge_storage_account_name_tag'] = hdinsight_conf['edge_storage_account_name']
+
+        hdinsight_conf['shared_storage_account_name'] = ('{0}-{1}-shared-bucket'.format(
+            hdinsight_conf['service_base_name'], hdinsight_conf['endpoint_name'])).lower()
+        hdinsight_conf['shared_container_name'] = ('{}-{}-shared-bucket'.format(hdinsight_conf['service_base_name'],
+                                                                                hdinsight_conf['endpoint_name'])).lower()
+        hdinsight_conf['shared_storage_account_name_tag'] = hdinsight_conf['shared_storage_account_name']
 
         hdinsight_conf['vpc_name'] = os.environ['azure_vpc_name']
 
@@ -121,7 +135,7 @@ if __name__ == "__main__":
 
         params = "--container_name {} --account_tags '{}' --resource_group_name {} --region {} " \
                  "--storage_account_kind StorageV2". \
-            format(hdinsight_conf['container_name'], json.dumps(hdinsight_conf['storage_account_tags']),
+            format(hdinsight_conf['cluster_container_name'], json.dumps(hdinsight_conf['cluster_storage_account_tags']),
                    hdinsight_conf['resource_group_name'], hdinsight_conf['region'])
         try:
             subprocess.run("~/scripts/{}.py {}".format('common_create_storage_account', params), shell=True, check=True)
@@ -131,30 +145,43 @@ if __name__ == "__main__":
     except Exception as err:
         datalab.fab.append_result("Failed to create storage account.", str(err))
         for storage_account in AzureMeta.list_storage_accounts(hdinsight_conf['resource_group_name']):
-            if hdinsight_conf['storage_account_name_tag'] == storage_account.tags["Name"]:
+            if hdinsight_conf['cluster_storage_account_name_tag'] == storage_account.tags["Name"]:
                 AzureActions.remove_storage_account(hdinsight_conf['resource_group_name'], storage_account.name)
         sys.exit(1)
 
     try:
         logging.info('[Creating HDInsight Cluster]')
         for storage_account in AzureMeta.list_storage_accounts(hdinsight_conf['resource_group_name']):
-            if hdinsight_conf['storage_account_name_tag'] == storage_account.tags["Name"]:
-                hdinsight_conf['storage_account_name'] = storage_account.name
-        hdinsight_conf['storage_account_key'] = AzureMeta.list_storage_keys(
-            hdinsight_conf['resource_group_name'], hdinsight_conf['storage_account_name'])[0]
+            if hdinsight_conf['cluster_storage_account_name_tag'] == storage_account.tags["Name"]:
+                hdinsight_conf['cluster_storage_account_name'] = storage_account.name
+            if hdinsight_conf['edge_storage_account_name_tag'] == storage_account.tags["Name"]:
+                hdinsight_conf['edge_storage_account_name'] = storage_account.name
+            if hdinsight_conf['shared_storage_account_name_tag'] == storage_account.tags["Name"]:
+                hdinsight_conf['shared_storage_account_name'] = storage_account.name
+        hdinsight_conf['cluster_storage_account_key'] = AzureMeta.list_storage_keys(
+            hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_storage_account_name'])[0]
+        hdinsight_conf['edge_storage_account_key'] = AzureMeta.list_storage_keys(
+            hdinsight_conf['resource_group_name'], hdinsight_conf['edge_storage_account_name'])[0]
+        hdinsight_conf['shared_storage_account_key'] = AzureMeta.list_storage_keys(
+            hdinsight_conf['resource_group_name'], hdinsight_conf['shared_storage_account_name'])[0]
         params = "--resource_group_name {} --cluster_name {} " \
                  "--cluster_version {} --location {} " \
                  "--master_instance_type {} --worker_instance_type {} " \
-                 "--worker_count {} --storage_account_name {} " \
-                 "--storage_account_key '{}' --container_name {} " \
-                 "--tags '{}' --public_key '{}' --vpc_id {} --subnet {} --access_password {}"\
+                 "--worker_count {} --cluster_storage_account_name {} " \
+                 "--cluster_storage_account_key '{}' --cluster_container_name {} " \
+                 "--tags '{}' --public_key '{}' --vpc_id {} --subnet {} --access_password {} " \
+                 "--edge_storage_account_name {} --edge_storage_account_key '{}' --edge_container_name {} " \
+                 "--shared_storage_account_name {} --shared_storage_account_key '{}' --shared_container_name {}"\
             .format(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name'],
                     hdinsight_conf['release_label'], hdinsight_conf['region'],
                     hdinsight_conf['hdinsight_master_instance_type'], hdinsight_conf['hdinsight_slave_instance_type'],
-                    hdinsight_conf['hdinsight_worker_count'], hdinsight_conf['storage_account_name'],
-                    hdinsight_conf['storage_account_key'], hdinsight_conf['container_name'],
+                    hdinsight_conf['hdinsight_worker_count'], hdinsight_conf['cluster_storage_account_name'],
+                    hdinsight_conf['cluster_storage_account_key'], hdinsight_conf['cluster_container_name'],
                     json.dumps(hdinsight_conf['cluster_tags']), ssh_admin_pubkey, hdinsight_conf['vpc_id'],
-                    hdinsight_conf['edge_network_id'], args.access_password)
+                    hdinsight_conf['edge_network_id'], args.access_password,
+                    hdinsight_conf['edge_storage_account_name'], hdinsight_conf['edge_storage_account_key'],
+                    hdinsight_conf['edge_container_name'],  hdinsight_conf['shared_storage_account_name'],
+                    hdinsight_conf['shared_storage_account_key'], hdinsight_conf['shared_container_name'])
 
         try:
             subprocess.run("~/scripts/{}.py {}".format('dataengine-service_create', params), shell=True, check=True)
@@ -165,7 +192,7 @@ if __name__ == "__main__":
     except Exception as err:
         datalab.fab.append_result("Failed to create hdinsight Cluster.", str(err))
         for storage_account in AzureMeta.list_storage_accounts(hdinsight_conf['resource_group_name']):
-            if hdinsight_conf['storage_account_name_tag'] == storage_account.tags["Name"]:
+            if hdinsight_conf['cluster_storage_account_name_tag'] == storage_account.tags["Name"]:
                 AzureActions.remove_storage_account(hdinsight_conf['resource_group_name'], storage_account.name)
         #subprocess.run('rm /response/.hdinsight_creating_{}'.format(os.environ['exploratory_name']), shell=True, check=True)
         sys.exit(1)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org