You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by lf...@apache.org on 2022/09/06 13:09:18 UTC

[incubator-datalab] branch DATALAB-1408 updated (b1cbe3004 -> 76ca78cf4)

This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a change to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git


    from b1cbe3004 [DATALAB-1408]: fixed some variables and imports
     new 9f9d1045d [DATALAB-1408]: fixed container name
     new 49f5dbbc1 [DATALAB-1408]: added vpc and edge subnet usage
     new f153bbeb3 [DATALAB-1408]: changed container type
     new 3a1f63280 [DATALAB-1408]: added zookeeper nodes
     new a3b6d915e [DATALAB-1408]: fixed shapes
     new 76ca78cf4 [DATALAB-1408]: removed cluster parameters from hdinsight terminate

The 6 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../azure/dataengine-service_description.json      | 18 +++------
 .../src/general/lib/azure/actions_lib.py           | 30 +++++++--------
 .../scripts/azure/common_create_storage_account.py |  7 ++--
 .../scripts/azure/dataengine-service_create.py     | 40 +++++++++++++++++--
 .../scripts/azure/dataengine-service_prepare.py    | 45 +++++++++++++++-------
 5 files changed, 91 insertions(+), 49 deletions(-)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 06/06: [DATALAB-1408]: removed cluster parameters from hdinsight terminate

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit 76ca78cf46fcbcefcc2aeae697ae2c5acf9f3b5d
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Tue Sep 6 16:09:07 2022 +0300

    [DATALAB-1408]: removed cluster parameters from hdinsight terminate
---
 infrastructure-provisioning/src/general/lib/azure/actions_lib.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index 1b3086e69..0c38da8e5 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -1185,10 +1185,10 @@ class AzureActions:
             traceback.print_exc(file=sys.stdout)
 
 
-    def terminate_hdinsight_cluster(self, resource_group_name, cluster_name, cluster_parameters):
+    def terminate_hdinsight_cluster(self, resource_group_name, cluster_name):
         try:
             print('Starting to terminate HDInsight Spark cluster {}'.format(cluster_name))
-            return self.hdinsight_client.clusters.begin_delete(resource_group_name, cluster_name, cluster_parameters)
+            return self.hdinsight_client.clusters.begin_delete(resource_group_name, cluster_name)
         except Exception as err:
             logging.info(
                 "Unable to terminate HDInsight Spark cluster: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 04/06: [DATALAB-1408]: added zookeeper nodes

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit 3a1f6328059a93f684da71bdc86b24bb85b8adc6
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Tue Sep 6 14:31:12 2022 +0300

    [DATALAB-1408]: added zookeeper nodes
---
 .../scripts/azure/dataengine-service_create.py        | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
index d31b001a1..741424eed 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
@@ -118,6 +118,25 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
                             id=vpc_id,
                             subnet=subnet
                         )
+                    ),
+                    Role(
+                        name="zookeepernode",
+                        target_instance_count=3,
+                        hardware_profile=HardwareProfile(vm_size=Small),
+                        os_profile=OsProfile(
+                            linux_operating_system_profile=LinuxOperatingSystemProfile(
+                                username=cluster_login_username,
+                                ssh_profile={
+                                    "publicKeys": [
+                                        {"certificateData": public_key}
+                                    ]
+                                }
+                            )
+                        ),
+                        virtual_network_profile=VirtualNetworkProfile(
+                            id=vpc_id,
+                            subnet=subnet
+                        )
                     )
                 ]
             ),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 03/06: [DATALAB-1408]: changed container type

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit f153bbeb365ae70bead702f44b0aa153b41c85c6
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Tue Sep 6 14:30:14 2022 +0300

    [DATALAB-1408]: changed container type
---
 .../src/general/lib/azure/actions_lib.py           | 25 +++++++++++-----------
 .../scripts/azure/common_create_storage_account.py |  7 +++---
 .../scripts/azure/dataengine-service_prepare.py    | 16 ++++++--------
 3 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index 563095bf4..db9088aa6 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -450,25 +450,23 @@ class AzureActions:
                                    file=sys.stdout)}))
             traceback.print_exc(file=sys.stdout)
 
-    def create_storage_account(self, resource_group_name, account_name, region, tags):
+    def create_storage_account(self, resource_group_name, account_name, region, tags, kind='BlobStorage'):
         try:
             ssn_network_id = datalab.meta_lib.AzureMeta().get_subnet(resource_group_name,
                                                                      vpc_name=os.environ['azure_vpc_name'],
-                                                                     subnet_name=os.environ['azure_subnet_name']
-                                                                     ).id
+                                                                     subnet_name=os.environ['azure_subnet_name']).id
             edge_network_id = datalab.meta_lib.AzureMeta().get_subnet(resource_group_name,
-                                                                     vpc_name=os.environ['azure_vpc_name'],
-                                                                     subnet_name='{}-{}-{}-subnet'.format(
-                                                                         os.environ['conf_service_base_name'],
-                                                                         (os.environ['project_name']),
-                                                                         (os.environ['endpoint_name']))
-                                                                       ).id
+                                                                      vpc_name=os.environ['azure_vpc_name'],
+                                                                      subnet_name='{}-{}-{}-subnet'.format(
+                                                                          os.environ['conf_service_base_name'],
+                                                                          (os.environ['project_name']),
+                                                                          (os.environ['endpoint_name']))).id
             result = self.storage_client.storage_accounts.begin_create(
                 resource_group_name,
                 account_name,
                 {
                     "sku": {"name": "Standard_LRS"},
-                    "kind": "BlobStorage",
+                    "kind": kind,
                     "location": region,
                     "tags": tags,
                     "access_tier": "Hot",
@@ -520,13 +518,14 @@ class AzureActions:
                                    file=sys.stdout)}))
             traceback.print_exc(file=sys.stdout)
 
-    def create_blob_container(self, resource_group_name, account_name, container_name):
+    def create_blob_container(self, account_name, container_name):
         try:
-            block_blob_service = BlobServiceClient(account_url="https://" + account_name + ".blob.core.windows.net/", credential=self.credential)
+            block_blob_service = BlobServiceClient(account_url="https://" + account_name + ".blob.core.windows.net/",
+                                                   credential=self.credential)
             result = block_blob_service.create_container(
                 container_name,
                 {
-                "public_access": "Off"
+                    "public_access": "Off"
                 }
             )
             return result
diff --git a/infrastructure-provisioning/src/general/scripts/azure/common_create_storage_account.py b/infrastructure-provisioning/src/general/scripts/azure/common_create_storage_account.py
index 17676b084..8c94592da 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/common_create_storage_account.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/common_create_storage_account.py
@@ -34,6 +34,7 @@ parser.add_argument('--container_name', type=str, default='')
 parser.add_argument('--account_tags', type=str, default='{"empty":"string"}')
 parser.add_argument('--resource_group_name', type=str, default='')
 parser.add_argument('--region', type=str, default='')
+parser.add_argument('--storage_account_kind', type=str, default='BlobStorage')
 args = parser.parse_args()
 
 if __name__ == "__main__":
@@ -50,9 +51,9 @@ if __name__ == "__main__":
             if check.name_available:
                 logging.info("Creating storage account {}.".format(account_name))
                 storage_account = AzureActions().create_storage_account(args.resource_group_name, account_name,
-                                                                        args.region, account_tags)
-                blob_container = AzureActions().create_blob_container(args.resource_group_name, account_name,
-                                                                      args.container_name)
+                                                                        args.region, account_tags,
+                                                                        args.storage_account_kind)
+                blob_container = AzureActions().create_blob_container(account_name, args.container_name)
                 logging.info("STORAGE ACCOUNT {} has been created".format(account_name))
                 logging.info("CONTAINER {} has been created".format(args.container_name))
             else:
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
index 5725c2c0c..49f636566 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
@@ -77,14 +77,8 @@ if __name__ == "__main__":
         hdinsight_conf['release_label'] = os.environ['hdinsight_version']
         key = RSA.importKey(open(hdinsight_conf['key_path'], 'rb').read())
         ssh_admin_pubkey = key.publickey().exportKey("OpenSSH").decode('UTF-8')
-        hdinsight_conf['container_name'] = ('{0}-bucket'.format(hdinsight_conf['service_base_name'],
-                                                                hdinsight_conf['project_name'],
-                                                                hdinsight_conf['endpoint_name'],
-                                                                hdinsight_conf['cluster_name'])).lower()
-        hdinsight_conf['storage_account_name_tag'] = ('{0}-bucket'.format(hdinsight_conf['service_base_name'],
-                                                                          hdinsight_conf['project_name'],
-                                                                          hdinsight_conf['endpoint_name'],
-                                                                          hdinsight_conf['cluster_name'])).lower()
+        hdinsight_conf['container_name'] = ('{}-bucket'.format(hdinsight_conf['cluster_name'])).lower()
+        hdinsight_conf['storage_account_name_tag'] = ('{}-bucket'.format(hdinsight_conf['cluster_name'])).lower()
         hdinsight_conf['storage_account_tags'] = {"Name": hdinsight_conf['storage_account_name_tag'],
                                                   "SBN": hdinsight_conf['service_base_name'],
                                                   "project_tag": hdinsight_conf['project_name'],
@@ -112,7 +106,8 @@ if __name__ == "__main__":
     try:
         logging.info('[CREATE STORAGE ACCOUNT AND CONTAINERS]')
 
-        params = "--container_name {} --account_tags '{}' --resource_group_name {} --region {}". \
+        params = "--container_name {} --account_tags '{}' --resource_group_name {} --region {} " \
+                 "--storage_account_kind StorageV2". \
             format(hdinsight_conf['container_name'], json.dumps(hdinsight_conf['storage_account_tags']),
                    hdinsight_conf['resource_group_name'], hdinsight_conf['region'])
         try:
@@ -156,5 +151,8 @@ if __name__ == "__main__":
 
     except Exception as err:
         datalab.fab.append_result("Failed to create hdinsight Cluster.", str(err))
+        for storage_account in AzureMeta.list_storage_accounts(hdinsight_conf['resource_group_name']):
+            if hdinsight_conf['storage_account_name_tag'] == storage_account.tags["Name"]:
+                AzureActions.remove_storage_account(hdinsight_conf['resource_group_name'], storage_account.name)
         #subprocess.run('rm /response/.hdinsight_creating_{}'.format(os.environ['exploratory_name']), shell=True, check=True)
         sys.exit(1)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 01/06: [DATALAB-1408]: fixed container name

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit 9f9d1045d700c6c50ca1da40bde8feadbd5610a9
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Tue Sep 6 11:19:27 2022 +0300

    [DATALAB-1408]: fixed container name
---
 .../scripts/azure/dataengine-service_create.py        |  2 ++
 .../scripts/azure/dataengine-service_prepare.py       | 19 +++++++++----------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
index 7b7af60b0..fe93fc030 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
@@ -48,10 +48,12 @@ parser.add_argument('--tags', type=str, help='')
 parser.add_argument('--public_key', type=str, help='')
 args = parser.parse_args()
 
+
 def build_hdinsight_cluster(resource_group_name, cluster_name, params):
     logging.info("Will be created cluster: {}".format(cluster_name))
     return datalab.actions_lib.AzureActions().create_hdinsight_cluster(resource_group_name, cluster_name, params)
 
+
 def create_cluster_parameters(location, tags, cluster_version, cluster_login_username, password, master_instance_type,
                               worker_count, worker_instance_type, storage_account_name, storage_account_key,
                               container_name, public_key):
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
index 2f47d74ca..d69929508 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
@@ -77,15 +77,14 @@ if __name__ == "__main__":
         hdinsight_conf['release_label'] = os.environ['hdinsight_version']
         key = RSA.importKey(open(hdinsight_conf['key_path'], 'rb').read())
         ssh_admin_pubkey = key.publickey().exportKey("OpenSSH").decode('UTF-8')
-        hdinsight_conf['container_name'] = ('{0}-{1}-{2}-{3}-bucket'.format(hdinsight_conf['service_base_name'],
-                                                                            hdinsight_conf['project_name'],
-                                                                            hdinsight_conf['endpoint_name'],
-                                                                            hdinsight_conf['cluster_name'])).lower()
-        hdinsight_conf['storage_account_name_tag'] = ('{0}-{1}-{2}-{3}-bucket'.format(hdinsight_conf['service_base_name'],
-                                                                                      hdinsight_conf['project_name'],
-                                                                                      hdinsight_conf['endpoint_name'],
-                                                                                      hdinsight_conf['cluster_name']
-                                                                                      )).lower()
+        hdinsight_conf['container_name'] = ('{0}-bucket'.format(hdinsight_conf['service_base_name'],
+                                                                hdinsight_conf['project_name'],
+                                                                hdinsight_conf['endpoint_name'],
+                                                                hdinsight_conf['cluster_name'])).lower()
+        hdinsight_conf['storage_account_name_tag'] = ('{0}-bucket'.format(hdinsight_conf['service_base_name'],
+                                                                          hdinsight_conf['project_name'],
+                                                                          hdinsight_conf['endpoint_name'],
+                                                                          hdinsight_conf['cluster_name'])).lower()
         hdinsight_conf['storage_account_tags'] = {"Name": hdinsight_conf['storage_account_name_tag'],
                                                   "SBN": hdinsight_conf['service_base_name'],
                                                   "project_tag": hdinsight_conf['project_name'],
@@ -125,7 +124,7 @@ if __name__ == "__main__":
                  "--cluster_version {} --location {} " \
                  "--master_instance_type {} --worker_instance_type {} " \
                  "--worker_count {} --storage_account_name {} " \
-                 "--storage_account_key {} --container_name {} " \
+                 "--storage_account_key '{}' --container_name {} " \
                  "--tags '{}' --public_key '{}'"\
             .format(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name'],
                     hdinsight_conf['release_label'], hdinsight_conf['region'],


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 05/06: [DATALAB-1408]: fixed shapes

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit a3b6d915e175325064ebcea8583a98547d7c792c
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Tue Sep 6 16:08:25 2022 +0300

    [DATALAB-1408]: fixed shapes
---
 .../files/azure/dataengine-service_description.json    | 18 +++++-------------
 .../src/general/lib/azure/actions_lib.py               |  1 +
 .../general/scripts/azure/dataengine-service_create.py |  4 ++--
 .../scripts/azure/dataengine-service_prepare.py        |  7 ++++++-
 4 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json b/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json
index c41965004..faa7d1767 100644
--- a/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json
+++ b/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json
@@ -5,24 +5,16 @@
   "computation_resources_shapes":
     {
       "For testing" : [
-        {"Size": "S", "Description": "Standard_F4s", "Type": "Standard_F4s","Ram": "8.0 GB","Cpu": "4"}
+        {"Size": "S", "Description": "Standard_D12_v2", "Type": "Standard_D12_v2","Ram": "28.0 GB","Cpu": "4"}
       ],
       "Memory optimized" : [
-        {"Size": "S", "Description": "Standard_E4s_v3", "Type": "Standard_E4s_v3","Ram": "32 GB","Cpu": "4"},
-        {"Size": "M", "Description": "Standard_E16s_v3", "Type": "Standard_E16s_v3","Ram": "128 GB","Cpu": "16"},
-        {"Size": "L", "Description": "Standard_E32s_v3", "Type": "Standard_E32s_v3","Ram": "256 GB","Cpu": "32"}
-      ],
-      "Compute optimized": [
-        {"Size": "S", "Description": "Standard_F4s", "Type": "Standard_F4s","Ram": "8.0 GB","Cpu": "4"},
-        {"Size": "M", "Description": "Standard_F8s", "Type": "Standard_F8s","Ram": "16.0 GB","Cpu": "8"},
-        {"Size": "L", "Description": "Standard_F16s", "Type": "Standard_F16s","Ram": "32.0 GB","Cpu": "16"}
-      ],
-      "GPU optimized": [
-        {"Size": "S", "Description": "Standard_NC6", "Type": "Standard_NC6","Ram": "56.0 GB","Cpu": "6"}
+        {"Size": "S", "Description": "Standard_E4_v3", "Type": "Standard_E4_v3","Ram": "32 GB","Cpu": "4"},
+        {"Size": "M", "Description": "Standard_E16_v3", "Type": "Standard_E16_v3","Ram": "128 GB","Cpu": "16"},
+        {"Size": "L", "Description": "Standard_E32_v3", "Type": "Standard_E32_v3","Ram": "256 GB","Cpu": "32"}
       ]
     },
   "templates":
   [
-    {"version":"4.0", "applications": [{"Name":"Spark", "Version": "x.x.x"}]}
+    {"version":"4.0", "applications": [{"Name":"Spark", "Version": "2.4"}]}
   ]
 }
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index db9088aa6..1b3086e69 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -509,6 +509,7 @@ class AzureActions:
                 resource_group_name,
                 account_name
             )
+            logging.info("Storage account {} was removed.".format(account_name))
             return result
         except Exception as err:
             logging.info(
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
index 741424eed..8684a329b 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
@@ -52,7 +52,7 @@ args = parser.parse_args()
 
 
 def build_hdinsight_cluster(resource_group_name, cluster_name, params):
-    logging.info("Will be created cluster: {}".format(cluster_name))
+    logging.info("{} cluster creation".format(cluster_name))
     return datalab.actions_lib.AzureActions().create_hdinsight_cluster(resource_group_name, cluster_name, params)
 
 
@@ -122,7 +122,7 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
                     Role(
                         name="zookeepernode",
                         target_instance_count=3,
-                        hardware_profile=HardwareProfile(vm_size=Small),
+                        hardware_profile=HardwareProfile(vm_size="Standard_A4_v2"),
                         os_profile=OsProfile(
                             linux_operating_system_profile=LinuxOperatingSystemProfile(
                                 username=cluster_login_username,
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
index 49f636566..af106da59 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
@@ -23,6 +23,7 @@
 
 import datalab.fab
 import datalab.meta_lib
+import datalab.actions_lib
 import json
 import multiprocessing
 import os
@@ -35,6 +36,7 @@ from fabric import *
 
 if __name__ == "__main__":
     try:
+        AzureActions = datalab.actions_lib.AzureActions()
         AzureMeta = datalab.meta_lib.AzureMeta()
         logging.info('Generating infrastructure names and tags')
         hdinsight_conf = dict()
@@ -99,6 +101,9 @@ if __name__ == "__main__":
                                                                  hdinsight_conf['vpc_name'],
                                                                  hdinsight_conf['subnet_name']).id
 
+        hdinsight_conf['hdinsight_master_instance_type'] = os.environ['hdinsight_master_instance_type']
+        hdinsight_conf['hdinsight_slave_instance_type'] = os.environ['hdinsight_slave_instance_type']
+
     except Exception as err:
         datalab.fab.append_result("Failed to generate variables dictionary. Exception:" + str(err))
         sys.exit(1)
@@ -137,7 +142,7 @@ if __name__ == "__main__":
                  "--tags '{}' --public_key '{}' --vpc_id {} --subnet {}"\
             .format(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name'],
                     hdinsight_conf['release_label'], hdinsight_conf['region'],
-                    os.environ['hdinsight_master_instance_type'], os.environ['hdinsight_slave_instance_type'],
+                    hdinsight_conf['hdinsight_master_instance_type'], hdinsight_conf['hdinsight_slave_instance_type'],
                     hdinsight_conf['hdinsight_worker_count'], hdinsight_conf['storage_account_name'],
                     hdinsight_conf['storage_account_key'], hdinsight_conf['container_name'],
                     json.dumps(hdinsight_conf['cluster_tags']), ssh_admin_pubkey, hdinsight_conf['vpc_id'],


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 02/06: [DATALAB-1408]: added vpc and edge subnet usage

Posted by lf...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit 49f5dbbc1b336d4c49ed93153aac05ca97338b9d
Author: leonidfrolov <fr...@gmail.com>
AuthorDate: Tue Sep 6 12:20:25 2022 +0300

    [DATALAB-1408]: added vpc and edge subnet usage
---
 .../scripts/azure/dataengine-service_create.py        | 17 ++++++++++++++---
 .../scripts/azure/dataengine-service_prepare.py       | 19 +++++++++++++++++--
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
index fe93fc030..d31b001a1 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
@@ -46,6 +46,8 @@ parser.add_argument('--storage_account_key', type=str, help='')
 parser.add_argument('--container_name', type=str, help='')
 parser.add_argument('--tags', type=str, help='')
 parser.add_argument('--public_key', type=str, help='')
+parser.add_argument('--vpc_id', type=str, help='')
+parser.add_argument('--subnet', type=str, help='')
 args = parser.parse_args()
 
 
@@ -56,7 +58,7 @@ def build_hdinsight_cluster(resource_group_name, cluster_name, params):
 
 def create_cluster_parameters(location, tags, cluster_version, cluster_login_username, password, master_instance_type,
                               worker_count, worker_instance_type, storage_account_name, storage_account_key,
-                              container_name, public_key):
+                              container_name, public_key, vpc_id, subnet):
 
     # Returns cluster parameters
 
@@ -92,6 +94,10 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
                                     ]
                                 }
                             )
+                        ),
+                        virtual_network_profile=VirtualNetworkProfile(
+                            id=vpc_id,
+                            subnet=subnet
                         )
                     ),
                     Role(
@@ -107,6 +113,10 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
                                     ]
                                 }
                             )
+                        ),
+                        virtual_network_profile=VirtualNetworkProfile(
+                            id=vpc_id,
+                            subnet=subnet
                         )
                     )
                 ]
@@ -129,12 +139,13 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use
 ##############
 
 if __name__ == "__main__":
-    parser.print_help()
+    #parser.print_help()
     password = ''
     params = create_cluster_parameters(args.location, json.loads(args.tags), args.cluster_version, 'datalab-user',
                                        password, args.master_instance_type, args.worker_count,
                                        args.worker_instance_type, args.storage_account_name, args.storage_account_key,
-                                       args.container_name, args.public_key)
+                                       args.container_name, args.public_key, args.vpc_id, args.subnet)
+
     build_hdinsight_cluster(args.resource_group_name, args.cluster_name, params)
 
     logfile = '{}_creation.log'.format(args.cluster_name)
diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
index d69929508..5725c2c0c 100644
--- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
@@ -91,6 +91,20 @@ if __name__ == "__main__":
                                                   "endpoint_tag": hdinsight_conf['endpoint_name'],
                                                   os.environ['conf_billing_tag_key']: os.environ['conf_billing_tag_value'],
                                                   hdinsight_conf['tag_name']: hdinsight_conf['storage_account_name_tag']}
+
+        hdinsight_conf['vpc_name'] = os.environ['azure_vpc_name']
+
+        hdinsight_conf['vpc_id'] = AzureMeta.get_vpc(hdinsight_conf['resource_group_name'],
+                                                     hdinsight_conf['vpc_name']).id
+
+        hdinsight_conf['subnet_name'] = '{}-{}-{}-subnet'.format(hdinsight_conf['service_base_name'],
+                                                                 hdinsight_conf['project_name'],
+                                                                 hdinsight_conf['endpoint_name'])
+
+        hdinsight_conf['edge_network_id'] = AzureMeta.get_subnet(hdinsight_conf['resource_group_name'],
+                                                                 hdinsight_conf['vpc_name'],
+                                                                 hdinsight_conf['subnet_name']).id
+
     except Exception as err:
         datalab.fab.append_result("Failed to generate variables dictionary. Exception:" + str(err))
         sys.exit(1)
@@ -125,13 +139,14 @@ if __name__ == "__main__":
                  "--master_instance_type {} --worker_instance_type {} " \
                  "--worker_count {} --storage_account_name {} " \
                  "--storage_account_key '{}' --container_name {} " \
-                 "--tags '{}' --public_key '{}'"\
+                 "--tags '{}' --public_key '{}' --vpc_id {} --subnet {}"\
             .format(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name'],
                     hdinsight_conf['release_label'], hdinsight_conf['region'],
                     os.environ['hdinsight_master_instance_type'], os.environ['hdinsight_slave_instance_type'],
                     hdinsight_conf['hdinsight_worker_count'], hdinsight_conf['storage_account_name'],
                     hdinsight_conf['storage_account_key'], hdinsight_conf['container_name'],
-                    json.dumps(hdinsight_conf['cluster_tags']), ssh_admin_pubkey)
+                    json.dumps(hdinsight_conf['cluster_tags']), ssh_admin_pubkey, hdinsight_conf['vpc_id'],
+                    hdinsight_conf['edge_network_id'])
 
         try:
             subprocess.run("~/scripts/{}.py {}".format('dataengine-service_create', params), shell=True, check=True)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org