You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datalab.apache.org by my...@apache.org on 2021/07/21 11:47:13 UTC

[incubator-datalab] branch DATALAB-2409 created (now 66eb986)

This is an automated email from the ASF dual-hosted git repository.

mykolabodnar pushed a change to branch DATALAB-2409
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git.


      at 66eb986  [DATALAB-2409] - DataLab deployment script refactored

This branch includes the following new commits:

     new 66eb986  [DATALAB-2409] - DataLab deployment script refactored

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org


[incubator-datalab] 01/01: [DATALAB-2409] - DataLab deployment script refactored

Posted by my...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mykolabodnar pushed a commit to branch DATALAB-2409
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git

commit 66eb986f6220430c89829ec6349df35b1fc3046c
Author: bodnarmykola <bo...@gmail.com>
AuthorDate: Wed Jul 21 14:46:49 2021 +0300

    [DATALAB-2409] - DataLab deployment script refactored
---
 .../scripts/deploy_datalab.py                      | 399 +++++++++++++--------
 .../src/base/scripts/install_prerequisites.py      |  13 -
 .../src/general/conf/datalab.ini                   |   4 -
 .../src/general/files/aws/base_Dockerfile          |   3 +-
 .../src/general/files/azure/base_Dockerfile        |   3 +-
 .../src/general/files/gcp/base_Dockerfile          |   3 +-
 .../src/general/files/os/debian/sources.list       |  56 ---
 .../src/general/files/os/redhat/sources.list       |  83 -----
 .../src/general/lib/os/debian/common_lib.py        |   7 +-
 .../src/general/lib/os/redhat/common_lib.py        |   7 -
 ...common_notebook_configure_dataengine-service.py |   4 +-
 .../src/general/scripts/aws/ssn_prepare.py         |  15 +-
 ...common_notebook_configure_dataengine-service.py |   5 +-
 infrastructure-provisioning/src/ssn/fabfile.py     |   8 +-
 .../src/ssn/scripts/configure_docker.py            |   7 -
 15 files changed, 256 insertions(+), 361 deletions(-)

diff --git a/infrastructure-provisioning/scripts/deploy_datalab.py b/infrastructure-provisioning/scripts/deploy_datalab.py
index e3bdd4c..cf322a6 100644
--- a/infrastructure-provisioning/scripts/deploy_datalab.py
+++ b/infrastructure-provisioning/scripts/deploy_datalab.py
@@ -20,162 +20,236 @@
 #
 # ******************************************************************************
 
+"""Examples How to deploy DataLab for different cloud providers.
+
+``GCP`` example::
+
+        $ infrastructure-provisioning/scripts/deploy_datalab.py \
+        --conf_service_base_name <SERVICE_NAME> \
+        --conf_os_family debian \
+        --action create \
+        --key_path /home/ubuntu/.ssh \
+        --conf_key_name gcp \
+        --billing_dataset_name billing \
+        gcp \
+        --gcp_ssn_instance_size n1-standard-2 \
+        --gcp_project_id <PROJECT_ID>\
+        --gcp_service_account_path /home/ubuntu/secret.json\
+        --gcp_region us-west1\
+        --gcp_zone us-west1-a
+
+``AWS`` example::
+
+        $ infrastructure-provisioning/scripts/deploy_datalab.py\
+        --conf_service_base_name datalab-test\
+        --conf_os_family debian\
+        --action create \
+        --key_path /path/to/key/\
+        --conf_key_name key_name\
+        --conf_tag_resource_id datalab\
+        aws\
+        --aws_vpc_id vpc-xxxxx\
+        --aws_subnet_id subnet-xxxxx\
+        --aws_security_groups_ids sg-xxxxx,sg-xxxx\
+        --aws_access_key XXXXXXX\
+        --aws_secret_access_key XXXXXXXXXX\
+        --aws_region xx-xxxxx-x\
+        --aws_account_id xxxxxxxx\
+        --aws_billing_bucket billing_bucket\
+        --aws_report_path /billing/directory/\
+
+"""
 
 import argparse
 import os
 import subprocess
-from fabric import *
-from invoke import task
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--conf_service_base_name', type=str, help='unique name for DataLab environment')
-parser.add_argument('--conf_network_type', type=str, default='',
-                    help='Define in which network DataLab will be deployed. '
-                         'Possible options: public|private')
-parser.add_argument('--conf_vpc_cidr', type=str, default='', help='CIDR of VPC')
-parser.add_argument('--conf_vpc2_cidr', type=str, default='', help='CIDR of secondary VPC')
-parser.add_argument('--conf_allowed_ip_cidr', type=str, default='', help='Comma-separated CIDR of IPs which will have '
-                                                                         'access to SSN')
-parser.add_argument('--conf_user_subnets_range', type=str, default='', help='Range of subnets which will be using for '
-                                                                            'users environments. For example: '
-                                                                            '10.10.0.0/24 - 10.10.10.0/24')
-parser.add_argument('--conf_private_subnet_prefix', type=str, default='24', help='Private subnet prefix')
-parser.add_argument('--conf_additional_tags', type=str, default='', help='Additional tags in format '
-                                                                         '"Key1:Value1;Key2:Value2"')
-parser.add_argument('--conf_image_enabled', type=str, default='', help='Enable or Disable creating image at first time')
-parser.add_argument('--aws_user_predefined_s3_policies', type=str, default='', help='Predefined policies for users '
-                                                                                    'instances')
-parser.add_argument('--aws_access_key', type=str, default='', help='AWS Access Key ID')
-parser.add_argument('--aws_secret_access_key', type=str, default='', help='AWS Secret Access Key')
-parser.add_argument('--aws_region', type=str, default='', help='AWS region')
-parser.add_argument('--aws_zone', type=str, default='', help='AWS zone')
-parser.add_argument('--azure_region', type=str, default='', help='Azure region')
-parser.add_argument('--gcp_region', type=str, default='', help='GCP region')
-parser.add_argument('--gcp_zone', type=str, default='', help='GCP zone')
-parser.add_argument('--conf_os_family', type=str, default='',
-                    help='Operating system type. Available options: debian, redhat')
-parser.add_argument('--conf_cloud_provider', type=str, default='',
-                    help='Where DataLab should be deployed. Available options: aws, azure, gcp')
-parser.add_argument('--ssn_hosted_zone_name', type=str, default='', help='Name of hosted zone')
-parser.add_argument('--ssn_hosted_zone_id', type=str, default='', help='ID of hosted zone')
-parser.add_argument('--ssn_subdomain', type=str, default='', help='Subdomain name')
-parser.add_argument('--ssn_assume_role_arn', type=str, default='', help='Role ARN for creating Route53 record in '
-                                                                        'different AWS account')
-parser.add_argument('--ssl_cert_path', type=str, default='', help='Full path to SSL certificate')
-parser.add_argument('--ssl_key_path', type=str, default='', help='Full path to key for SSL certificate')
-parser.add_argument('--aws_vpc_id', type=str, default='', help='AWS VPC ID')
-parser.add_argument('--conf_duo_vpc_enable', type=str, default='false', help='Duo VPC scheme enable(true|false)')
-parser.add_argument('--aws_vpc2_id', type=str, default='', help='Secondary AWS VPC ID')
-parser.add_argument('--aws_peering_id', type=str, default='', help='Amazon peering connection id')
-parser.add_argument('--azure_vpc_name', type=str, default='', help='Azure VPC Name')
-parser.add_argument('--gcp_vpc_name', type=str, default='', help='GCP VPC Name')
-parser.add_argument('--aws_subnet_id', type=str, default='', help='AWS Subnet ID')
-parser.add_argument('--azure_subnet_name', type=str, default='', help='Azure Subnet Name')
-parser.add_argument('--gcp_subnet_name', type=str, default='', help='GCP Subnet Name')
-parser.add_argument('--aws_security_groups_ids', type=str, default='', help='One of more comma-separated Security '
-                                                                            'groups IDs for SSN')
-parser.add_argument('--azure_security_group_name', type=str, default='', help='One Security'
-                                                                              'group name for SSN')
-parser.add_argument('--azure_edge_security_group_name', type=str, default='', help='One Security '
-                                                                              'group name for Edge node')
-parser.add_argument('--gcp_firewall_name', type=str, default='', help='One of more comma-separated GCP Firewall rules '
-                                                                      'for SSN')
-parser.add_argument('--key_path', type=str, default='', help='Path to admin key (WITHOUT KEY NAME)')
-parser.add_argument('--conf_key_name', type=str, default='', help='Admin key name (WITHOUT ".pem")')
-parser.add_argument('--workspace_path', type=str, default='', help='Admin key name (WITHOUT ".pem")')
-parser.add_argument('--conf_tag_resource_id', type=str, default='datalab', help='The name of user tag')
-parser.add_argument('--conf_billing_tag', type=str, default='datalab', help='Billing tag')
-parser.add_argument('--aws_ssn_instance_size', type=str, default='t2.large', help='The SSN instance shape')
-parser.add_argument('--azure_ssn_instance_size', type=str, default='Standard_DS2_v2', help='The SSN instance shape')
-parser.add_argument('--gcp_ssn_instance_size', type=str, default='n1-standard-2', help='The SSN instance shape')
-parser.add_argument('--aws_account_id', type=str, default='', help='The ID of Amazon account')
-parser.add_argument('--aws_billing_bucket', type=str, default='', help='The name of S3 bucket where billing reports '
-                                                                       'will be placed.')
-parser.add_argument('--aws_job_enabled', type=str, default='false', help='Billing format. Available options: '
-                                                                         'true (aws), false(epam)')
-parser.add_argument('--aws_report_path', type=str, default='', help='The path to billing reports directory in S3 '
-                                                                    'bucket')
-parser.add_argument('--azure_resource_group_name', type=str, default='', help='Name of Resource group in Azure')
-parser.add_argument('--azure_auth_path', type=str, default='', help='Full path to Azure credentials JSON file')
-parser.add_argument('--azure_datalake_enable', type=str, default='', help='Provision DataLake storage account')
-parser.add_argument('--azure_ad_group_id', type=str, default='', help='ID of Azure AD group')
-parser.add_argument('--azure_offer_number', type=str, default='', help='Azure offer number')
-parser.add_argument('--azure_currency', type=str, default='', help='Azure currency code')
-parser.add_argument('--azure_locale', type=str, default='', help='Azure locale')
-parser.add_argument('--azure_application_id', type=str, default='', help='Azure login application ID')
-parser.add_argument('--azure_validate_permission_scope', type=str, default='true', help='Azure permission scope '
-                                                                                        'validation(true|false).')
-parser.add_argument('--azure_oauth2_enabled', type=str, default='false', help='Using OAuth2 for logging in DataLab')
-parser.add_argument('--azure_region_info', type=str, default='', help='Azure region info')
-parser.add_argument('--azure_source_vpc_name', type=str, default='', help='Azure VPC source Name')
-parser.add_argument('--azure_source_resource_group_name', type=str, default='', help='Azure source resource group')
-parser.add_argument('--gcp_project_id', type=str, default='', help='The project ID in Google Cloud Platform')
-parser.add_argument('--gcp_service_account_path', type=str, default='', help='The project ID in Google Cloud Platform')
-parser.add_argument('--datalab_id', type=str, default="'resource_tags_user_user_tag'",
-                    help='Column name in report file that contains '
-                         'datalab id tag')
-parser.add_argument('--usage_date', type=str, default='line_item_usage_start_date',
-                    help='Column name in report file that contains '
-                         'usage date tag')
-parser.add_argument('--product', type=str, default='product_product_name',
-                    help='Column name in report file that contains '
-                         'product name tag')
-parser.add_argument('--usage_type', type=str, default='line_item_usage_type',
-                    help='Column name in report file that contains '
-                         'usage type tag')
-parser.add_argument('--usage', type=str, default='line_item_usage_amount',
-                    help='Column name in report file that contains '
-                         'usage tag')
-parser.add_argument('--cost', type=str, default='line_item_blended_cost',
-                    help='Column name in report file that contains cost tag')
-parser.add_argument('--resource_id', type=str, default='line_item_resource_id',
-                    help='Column name in report file that contains '
-                         'datalab resource id tag')
-parser.add_argument('--ldap_hostname', type=str, default='localhost', help='Ldap instance hostname')
-parser.add_argument('--ldap_dn', type=str, default='dc=example,dc=com',
-                    help='Ldap distinguished name')
-parser.add_argument('--ldap_ou', type=str, default='ou=People', help='Ldap organisation unit')
-parser.add_argument('--ldap_service_username', type=str, default='cn=service-user', help='Ldap service user name')
-parser.add_argument('--ldap_service_password', type=str, default='service-user-password',
-                    help='Ldap password for admin user')
-parser.add_argument('--keycloak_realm_name', type=str, default='datalab', help='Keycloak Realm name')
-parser.add_argument('--keycloak_auth_server_url', type=str, default='datalab', help='Keycloak auth server URL')
-parser.add_argument('--keycloak_client_name', type=str, default='datalab', help='Keycloak client name')
-parser.add_argument('--keycloak_client_secret', type=str, default='datalab', help='Keycloak client secret')
-parser.add_argument('--keycloak_user', type=str, default='datalab', help='Keycloak user')
-parser.add_argument('--keycloak_user_password', type=str, default='keycloak-user-password',
-                    help='Keycloak user password')
-parser.add_argument('--tags', type=str, default='line_item_operation,line_item_line_item_description',
-                    help='Column name in report file that '
-                         'contains tags')
-parser.add_argument('--billing_dataset_name', type=str, default='', help='Name of GCP dataset (BigQuery service)'
-                                                                         ' for billing')
-parser.add_argument('--default_endpoint_name', type=str, default='local', help='Name of localhost provisioning service,'
-                                                                               'that created by default')
-parser.add_argument('--conf_stepcerts_enabled', type=str, default='false', help='Enable or disable step certificates')
-parser.add_argument('--conf_stepcerts_root_ca', type=str, default='', help='Step root CA')
-parser.add_argument('--conf_stepcerts_kid', type=str, default='', help='Step KID')
-parser.add_argument('--conf_stepcerts_kid_password', type=str, default='', help='Step KID password')
-parser.add_argument('--conf_stepcerts_ca_url', type=str, default='', help='Step CA URL')
-parser.add_argument('--conf_letsencrypt_enabled', type=str, default='false',
-                    help='Enable or disable Let`s Encrypt certificates')
-parser.add_argument('--conf_repository_user', type=str, default='',
-                    help='user to access repository (used for jars download)')
-parser.add_argument('--conf_release_tag', type=str, default='2.5',
-                    help='tag used for jars download')
-parser.add_argument('--conf_repository_pass', type=str, default='',
-                    help='password to access repository (used for jars download)')
-parser.add_argument('--conf_repository_address', type=str, default='',
-                    help='address to access repository (used for jars download)')
-parser.add_argument('--conf_letsencrypt_domain_name', type=str, default='', help='Domain names to apply. '
-                                                                                 'For multiple domains enter a comma separated list of domains as a parameter'
-                                                                                 'ssn.domain_name will be used for ssn_node, DNS A record have to exist during deployment')
-parser.add_argument('--conf_letsencrypt_email', type=str, default='', help='Email that will be entered during '
-                                                                           'certificate obtaining and can be user for urgent renewal and security notices. '
-                                                                           'Use comma to register multiple emails, e.g. u1@example.com,u2@example.com.')
-parser.add_argument('--action', required=True, type=str, default='', choices=['build', 'deploy', 'create', 'terminate'],
-                    help='Available options: build, deploy, create, terminate')
-args = parser.parse_args()
+import sys
+
+BOOL_CHOICES_LIST = ['true', 'false']
+OS_DISTRO_LIST = ['debian', 'redhat']
+NETWORK_TYPE_LIST = ['public', 'private']
+
+
+def build_parser():
+    parser = argparse.ArgumentParser(description='DataLab Self-Service Node deployment',
+                                     prog='deploy_datalab')
+    # optional arguments
+    parser.add_argument('--conf_network_type', type=str, default='public',
+                        help='''Type of network. Define in which network DataLab will be deployed.
+                        (valid choices: %s)''' % NETWORK_TYPE_LIST,
+                        choices=NETWORK_TYPE_LIST)
+    parser.add_argument('--conf_vpc_cidr', type=str, default='172.31.0.0/16', help='CIDR of VPC')
+    parser.add_argument('--conf_vpc2_cidr', type=str, help='CIDR of secondary VPC')
+    parser.add_argument('--conf_allowed_ip_cidr', type=str, default='0.0.0.0/0',
+                        help='Comma-separated CIDR of IPs which will have access to SSN')
+    parser.add_argument('--conf_user_subnets_range', type=str,
+                        help='''Range of subnets which will be using for users environments.
+                        For example: 10.10.0.0/24 - 10.10.10.0/24''')
+    parser.add_argument('--conf_private_subnet_prefix', type=str, default='24', help='Private subnet prefix')
+    parser.add_argument('--conf_additional_tags', type=str,
+                        help='Additional tags in format "Key1:Value1;Key2:Value2"')
+    parser.add_argument('--conf_image_enabled', type=str,
+                        help='Enable or Disable creating image at first time')
+    parser.add_argument('--conf_os_family', type=str, default='debian', choices=OS_DISTRO_LIST,
+                        help='Operating system distribution. (valid choices: %s)' % OS_DISTRO_LIST)
+    parser.add_argument('--ssn_hosted_zone_name', type=str, help='Name of hosted zone')
+    parser.add_argument('--ssn_hosted_zone_id', type=str, help='ID of hosted zone')
+    parser.add_argument('--ssn_subdomain', type=str, help='Subdomain name')
+    parser.add_argument('--ssl_cert_path', type=str, help='Full path to SSL certificate')
+    parser.add_argument('--ssl_key_path', type=str, help='Full path to key for SSL certificate')
+    parser.add_argument('--workspace_path', type=str, default='', help='Docker workspace path')
+    parser.add_argument('--conf_tag_resource_id', type=str, default='datalab', help='The name of user tag')
+    parser.add_argument('--conf_billing_tag', type=str, default='datalab', help='Billing tag')
+    parser.add_argument('--datalab_id', type=str, default='resource_tags_user_user_tag',
+                        help='Column name in report file that contains datalab id tag')
+    parser.add_argument('--usage_date', type=str, default='line_item_usage_start_date',
+                        help='Column name in report file that contains usage date tag')
+    parser.add_argument('--product', type=str, default='product_product_name',
+                        help='Column name in report file that contains product name tag')
+    parser.add_argument('--usage_type', type=str, default='line_item_usage_type',
+                        help='Column name in report file that contains usage type tag')
+    parser.add_argument('--usage', type=str, default='line_item_usage_amount',
+                        help='Column name in report file that contains usage tag')
+    parser.add_argument('--cost', type=str, default='line_item_blended_cost',
+                        help='Column name in report file that contains cost tag')
+    parser.add_argument('--resource_id', type=str, default='line_item_resource_id',
+                        help='Column name in report file that contains datalab resource id tag')
+
+    parser.add_argument('--tags', type=str, default='line_item_operation,line_item_line_item_description',
+                        help='Column name in report file that contains tags')
+    parser.add_argument('--conf_stepcerts_enabled', type=str, default='false',
+                        help='Enable or disable step certificates. (valid choices: %s)' % BOOL_CHOICES_LIST,
+                        choices=BOOL_CHOICES_LIST)
+    parser.add_argument('--conf_stepcerts_root_ca', type=str, help='Step root CA')
+    parser.add_argument('--conf_stepcerts_kid', type=str, help='Step KID')
+    parser.add_argument('--conf_stepcerts_kid_password', type=str, help='Step KID password')
+    parser.add_argument('--conf_stepcerts_ca_url', type=str, help='Step CA URL')
+    parser.add_argument('--conf_letsencrypt_enabled', type=str, default='false',
+                        help='Enable or disable Let`s Encrypt certificates. (valid choices: %s)' % BOOL_CHOICES_LIST,
+                        choices=BOOL_CHOICES_LIST)
+    parser.add_argument('--conf_letsencrypt_domain_name', type=str,
+                        help='''Domain names to apply. For multiple domains enter a comma separated list of domains
+        as a parameter. ssn.domain_name will be used for ssn_node,DNS A record have to exist during deployment''')
+    parser.add_argument('--conf_letsencrypt_email', type=str, help='''Email that will be entered during
+        certificate obtaining and can be user for urgent renewal and security notices. Use comma to register
+        multiple emails, e.g. u1@example.com,u2@example.com.''')
+    parser.add_argument('--conf_repository_user', type=str, default='',
+                        help='user to access repository (used for jars download)')
+    parser.add_argument('--conf_release_tag', type=str, default='2.5',
+                        help='tag used for jars download')
+    parser.add_argument('--conf_repository_pass', type=str, default='',
+                        help='password to access repository (used for jars download)')
+    parser.add_argument('--conf_repository_address', type=str, default='',
+                        help='address to access repository (used for jars download)')
+
+    required_args = parser.add_argument_group('Required arguments')
+    required_args.add_argument('--conf_service_base_name', type=str,
+                               help='Unique name for DataLab environment', required=True)
+    required_args.add_argument('--action', type=str, help='Action to perform',
+                               choices=['build', 'deploy', 'create', 'terminate'], required=True)
+    required_args.add_argument('--key_path', type=str, help='Path to admin key (WITHOUT KEY NAME)', required=True)
+    required_args.add_argument('--conf_key_name', type=str, help='Admin key name (WITHOUT ".pem")', required=True)
+    required_args.add_argument('--keycloak_auth_server_url', type=str, default='datalab',
+                               help='Keycloak auth server URL', required=True)
+    required_args.add_argument('--keycloak_realm_name', type=str, help='Keycloak Realm name', required=True)
+    required_args.add_argument('--keycloak_client_name', type=str, default='datalab',
+                               help='Keycloak client name', required=True)
+    required_args.add_argument('--keycloak_client_secret', type=str, default='datalab',
+                               help='Keycloak client secret', required=True)
+    required_args.add_argument('--keycloak_user', type=str, default='datalab', help='Keycloak user', required=True)
+    required_args.add_argument('--keycloak_user_password', type=str, default='keycloak-user-password',
+                               help='Keycloak user password', required=True)
+    required_args.add_argument('--default_endpoint_name', type=str, default='local',
+                               help='Name of localhost provisioning service, that created by default', required=True)
+
+    # subparsers
+    subparsers = parser.add_subparsers(dest='conf_cloud_provider', required=True, help='sub-command help',
+                                       description='''These are the subcommands for deploying resources
+                                       in a specific cloud provider''')
+
+    # --------- aws subcommand ----------------------
+    aws_parser = subparsers.add_parser('aws')
+    aws_parser.add_argument('--aws_user_predefined_s3_policies', type=str,
+                            help='Predefined policies for users instances')
+    aws_parser.add_argument('--aws_access_key', type=str,
+                            help='''AWS Access Key ID. reuqired in case of deployment with IAM user DataLab
+                            deployment script is executed on local machine and uses
+                            IAM user permissions to create resources in AWS.''')
+    aws_parser.add_argument('--aws_secret_access_key', type=str, help='AWS Secret Access Key')
+    aws_parser.add_argument('--ssn_assume_role_arn', type=str,
+                            help='Role ARN for creating Route53 record in different AWS account')
+    aws_parser.add_argument('--aws_vpc_id', type=str, help='AWS VPC ID')
+    aws_parser.add_argument('--conf_duo_vpc_enable', type=str, default='false',
+                            help='Duo VPC scheme enable. (valid choices: %s)' % BOOL_CHOICES_LIST,
+                            choices=BOOL_CHOICES_LIST)
+    aws_parser.add_argument('--aws_vpc2_id', type=str, help='Secondary AWS VPC ID')
+    aws_parser.add_argument('--aws_peering_id', type=str, help='Amazon peering connection id')
+    aws_parser.add_argument('--aws_subnet_id', type=str, help='AWS Subnet ID')
+    aws_parser.add_argument('--aws_security_groups_ids', type=str,
+                            help='One of more comma-separated Security groups IDs for SSN')
+    aws_parser.add_argument('--aws_billing_bucket', type=str,
+                            help='The name of S3 bucket where billing reports will be placed.')
+    aws_parser.add_argument('--aws_job_enabled', type=str, default='false', choices=BOOL_CHOICES_LIST,
+                            help='Billing format. (valid choices: %s)' % BOOL_CHOICES_LIST)
+    aws_parser.add_argument('--aws_report_path', type=str, help='The path to billing reports directory in S3 bucket')
+
+    aws_required_args = aws_parser.add_argument_group('Required arguments')
+    aws_required_args.add_argument('--aws_region', type=str, required=True, help='AWS region')
+    aws_required_args.add_argument('--aws_zone', type=str, required=True, help='AWS zone')
+    aws_required_args.add_argument('--aws_ssn_instance_size', type=str, required=True, default='t2.large',
+                                   help='The SSN instance shape')
+    aws_required_args.add_argument('--aws_account_id', type=str, required=True, help='The ID of Amazon account')
+
+    # --------azure subcommand -------------------------
+    azure_parser = subparsers.add_parser('azure')
+    azure_parser.add_argument('--azure_vpc_name', type=str, help='Azure VPC Name')
+    azure_parser.add_argument('--azure_subnet_name', type=str, help='Azure Subnet Name')
+    azure_parser.add_argument('--azure_security_group_name', type=str, help='One Security group name for SSN')
+    azure_parser.add_argument('--azure_edge_security_group_name', type=str,
+                              help='One Security group name for Edge node')
+    azure_parser.add_argument('--azure_resource_group_name', type=str, help='Name of Resource group in Azure')
+    azure_parser.add_argument('--azure_datalake_enable', type=str, default='false', choices=BOOL_CHOICES_LIST,
+                              help='Provision DataLake storage account. (valid choices: %s)' % BOOL_CHOICES_LIST)
+    azure_parser.add_argument('--azure_ad_group_id', type=str, help='ID of Azure AD group')
+    azure_parser.add_argument('--azure_offer_number', type=str, help='Azure offer number')
+    azure_parser.add_argument('--azure_currency', type=str, help='Azure currency code')
+    azure_parser.add_argument('--azure_locale', type=str, help='Azure locale')
+    azure_parser.add_argument('--azure_application_id', type=str, help='Azure login application ID')
+    azure_parser.add_argument('--azure_validate_permission_scope', type=str, default='true',
+                              choices=BOOL_CHOICES_LIST,
+                              help='Azure permission scope validation. (valid choices: %s)' % BOOL_CHOICES_LIST)
+    azure_parser.add_argument('--azure_oauth2_enabled', type=str, default='false', choices=BOOL_CHOICES_LIST,
+                              help='Using OAuth2 for logging in DataLab. (valid choices: %s)' % BOOL_CHOICES_LIST)
+    azure_parser.add_argument('--azure_region_info', type=str, help='Azure region info')
+    azure_parser.add_argument('--azure_source_vpc_name', type=str, help='Azure VPC source Name')
+    azure_parser.add_argument('--azure_source_resource_group_name', type=str, help='Azure source resource group')
+
+    azure_required_args = azure_parser.add_argument_group('Required arguments')
+    azure_required_args.add_argument('--azure_region', type=str, required=True, help='Azure region')
+    azure_required_args.add_argument('--azure_ssn_instance_size', type=str, default='Standard_DS2_v2', required=True,
+                                     help='The SSN instance shape')
+    azure_required_args.add_argument('--azure_auth_path', type=str, required=True,
+                                     help='Full path to Azure credentials JSON file')
+
+    # --------gcp subcommand -----------------------------
+    gcp_parser = subparsers.add_parser('gcp')
+    gcp_parser.add_argument('--billing_dataset_name', type=str,
+                            help='Name of GCP dataset (BigQuery service) for billing')
+    gcp_parser.add_argument('--gcp_subnet_name', type=str, help='GCP Subnet Name')
+    gcp_parser.add_argument('--gcp_vpc_name', type=str, help='GCP VPC Name')
+    gcp_parser.add_argument('--gcp_firewall_name', type=str,
+                            help='One of more comma-separated GCP Firewall rules for SSN')
+
+    gcp_required_args = gcp_parser.add_argument_group('Required arguments')
+    gcp_required_args.add_argument('--gcp_region', type=str, required=True, help='GCP region')
+    gcp_required_args.add_argument('--gcp_zone', type=str, required=True, help='GCP zone')
+    gcp_required_args.add_argument('--gcp_ssn_instance_size', type=str, required=True, default='n1-standard-2',
+                                   help='The SSN instance shape')
+    gcp_required_args.add_argument('--gcp_project_id', type=str, required=True,
+                                   help='The project ID in Google Cloud Platform')
+    gcp_required_args.add_argument('--gcp_service_account_path', type=str, required=True,
+                                   help='The project ID in Google Cloud Platform')
+    return parser
 
 
 def generate_docker_command():
@@ -207,11 +281,15 @@ def generate_docker_command():
 
 def build_docker_images(args):
     # Building base and ssn docker images
-    subprocess.run('cd {2}; sudo docker build --build-arg OS={0} --build-arg SRC_PATH="infrastructure-provisioning/src/" --file '
-              'infrastructure-provisioning/src/general/files/{1}/'
-              'base_Dockerfile -t docker.datalab-base .'.format(args.conf_os_family, args.conf_cloud_provider, args.workspace_path), shell=True, check=True)
-    subprocess.run('cd {2}; sudo docker build --build-arg OS={0} --file infrastructure-provisioning/src/general/files/{1}/'
-              'ssn_Dockerfile -t docker.datalab-ssn .'.format(args.conf_os_family, args.conf_cloud_provider, args.workspace_path), shell=True, check=True)
+    subprocess.run(
+        'cd {2}; sudo docker build --build-arg OS={0} --build-arg SRC_PATH="infrastructure-provisioning/src/" --file '
+        'infrastructure-provisioning/src/general/files/{1}/'
+        'base_Dockerfile -t docker.datalab-base .'.format(args.conf_os_family, args.conf_cloud_provider,
+                                                          args.workspace_path), shell=True, check=True)
+    subprocess.run(
+        'cd {2}; sudo docker build --build-arg OS={0} --file infrastructure-provisioning/src/general/files/{1}/'
+        'ssn_Dockerfile -t docker.datalab-ssn .'.format(args.conf_os_family, args.conf_cloud_provider,
+                                                        args.workspace_path), shell=True, check=True)
 
 
 def deploy_datalab(args):
@@ -227,6 +305,13 @@ def terminate_datalab(args):
 
 
 if __name__ == "__main__":
+    parser = build_parser()
+    args = parser.parse_args()
+
+    if args.aws_secret_access_key or args.aws_access_key:
+        if not (args.aws_secret_access_key and args.aws_access_key):
+            sys.exit('Please provide both arguments: --aws_secret_access_key and --aws_access_key')
+
     if not args.workspace_path:
         print("Workspace path isn't set, using current directory: {}".format(os.environ['PWD']))
         args.workspace_path = os.environ['PWD']
diff --git a/infrastructure-provisioning/src/base/scripts/install_prerequisites.py b/infrastructure-provisioning/src/base/scripts/install_prerequisites.py
index 7b747b2..b75ae24 100644
--- a/infrastructure-provisioning/src/base/scripts/install_prerequisites.py
+++ b/infrastructure-provisioning/src/base/scripts/install_prerequisites.py
@@ -43,25 +43,12 @@ parser.add_argument('--region', type=str, default='')
 args = parser.parse_args()
 
 
-def create_china_pip_conf_file(conn):
-    if not exists(conn,'/home/{}/pip_china_ensured'.format(args.user)):
-        conn.sudo('touch /etc/pip.conf')
-        conn.sudo('echo "[global]" >> /etc/pip.conf')
-        conn.sudo('echo "timeout = 600" >> /etc/pip.conf')
-        conn.sudo('echo "index-url = https://{}/simple/" >> /etc/pip.conf'.format(os.environ['conf_pypi_mirror']))
-        conn.sudo('echo "trusted-host = {}" >> /etc/pip.conf'.format(os.environ['conf_pypi_mirror']))
-        conn.sudo('touch /home/{}/pip_china_ensured'.format(args.user))
-
 if __name__ == "__main__":
     print("Configure connections")
     global conn
     conn = init_datalab_connection(args.hostname, args.user, args.keyfile)
     deeper_config = json.loads(args.additional_config)
 
-    if args.region == 'cn-north-1':
-        change_pkg_repos()
-        create_china_pip_conf_file()
-
     print("Updating hosts file")
     update_hosts_file(args.user)
 
diff --git a/infrastructure-provisioning/src/general/conf/datalab.ini b/infrastructure-provisioning/src/general/conf/datalab.ini
index f9d2d1b..4881a03 100644
--- a/infrastructure-provisioning/src/general/conf/datalab.ini
+++ b/infrastructure-provisioning/src/general/conf/datalab.ini
@@ -40,8 +40,6 @@ key_dir = /root/keys/
 lifecycle_stage = dev
 ### The name of user for tag, which will be set for all resources
 # tag_resource_id = user:tag
-### Pypi mirror for China
-pypi_mirror = pypi.doubanio.com
 ### Name of own GitLab SSL certificate
 gitlab_certfile = datalab-gitlab.crt
 ### Enable or Disable creating image at first time
@@ -262,8 +260,6 @@ scala_version = 2.12.8
 livy_version = 0.3.0
 ### If it is true, Livy will be used on Zeppelin notebook
 multiple_clusters = false
-### R China mirror
-r_mirror = http://mirror.lzu.edu.cn/CRAN/
 ### NVidia driver version for Tensor/DeepLearning notebooks
 nvidia_version = 418.126.02
 ### Caffe library version for DeepLearning notebook
diff --git a/infrastructure-provisioning/src/general/files/aws/base_Dockerfile b/infrastructure-provisioning/src/general/files/aws/base_Dockerfile
index 9cad9c4..70870b8 100644
--- a/infrastructure-provisioning/src/general/files/aws/base_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/aws/base_Dockerfile
@@ -41,7 +41,7 @@ ENV LANGUAGE en_US:en
 ENV LC_ALL en_US.UTF-8
 
 # Install any python dependencies
-RUN python3 -m pip install -UI qtconsole==4.7.7 pip==21.0.1 && \
+RUN python3 -m pip install -UI qtconsole==5.1.1 pip==21.1.2 && \
     python3 -m pip install boto3 backoff patchwork fabric fabvenv awscli argparse requests ujson jupyter pycryptodome
 
 # Configuring ssh for user
@@ -79,7 +79,6 @@ COPY ${SRC_PATH}general/scripts/aws/common_* /root/scripts/
 COPY ${SRC_PATH}general/lib/aws/* /usr/lib/python3.8/datalab/
 COPY ${SRC_PATH}general/lib/os/${OS}/common_lib.py /usr/lib/python3.8/datalab/common_lib.py
 COPY ${SRC_PATH}general/lib/os/fab.py /usr/lib/python3.8/datalab/fab.py
-COPY ${SRC_PATH}general/files/os/${OS}/sources.list /root/files/
 COPY ${SRC_PATH}general/files/os/ivysettings.xml /root/templates/
 COPY ${SRC_PATH}general/files/os/local_endpoint.json /root/files/
 COPY ${SRC_PATH}project/templates/locations/ /root/locations/
diff --git a/infrastructure-provisioning/src/general/files/azure/base_Dockerfile b/infrastructure-provisioning/src/general/files/azure/base_Dockerfile
index 3d608de..2070c05 100644
--- a/infrastructure-provisioning/src/general/files/azure/base_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/azure/base_Dockerfile
@@ -30,7 +30,7 @@ RUN apt-get update && \
     apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 
 # Install any python dependencies
-RUN python3 -m pip install -UI qtconsole==4.7.7 pip==21.0.1 && \
+RUN python3 -m pip install -UI qtconsole==5.1.1 pip==21.1.2 && \
     python3 -m pip install backoff patchwork fabric fabvenv argparse requests ujson jupyter pycryptodome azure==2.0.0 azure-mgmt-authorization pyyaml
 
 # Configuring ssh for user
@@ -68,7 +68,6 @@ COPY ${SRC_PATH}general/scripts/azure/common_* /root/scripts/
 COPY ${SRC_PATH}general/lib/azure/* /usr/lib/python3.8/datalab/
 COPY ${SRC_PATH}general/lib/os/${OS}/common_lib.py /usr/lib/python3.8/datalab/common_lib.py
 COPY ${SRC_PATH}general/lib/os/fab.py /usr/lib/python3.8/datalab/fab.py
-COPY ${SRC_PATH}general/files/os/${OS}/sources.list /root/files/
 COPY ${SRC_PATH}general/files/os/ivysettings.xml /root/templates/
 COPY ${SRC_PATH}general/files/os/local_endpoint.json /root/files/
 COPY ${SRC_PATH}project/templates/locations/ /root/locations/
diff --git a/infrastructure-provisioning/src/general/files/gcp/base_Dockerfile b/infrastructure-provisioning/src/general/files/gcp/base_Dockerfile
index 2059bce..bb4a81c 100644
--- a/infrastructure-provisioning/src/general/files/gcp/base_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/gcp/base_Dockerfile
@@ -41,7 +41,7 @@ ENV LANGUAGE en_US:en
 ENV LC_ALL en_US.UTF-8
 
 # Install any python dependencies
-RUN python3 -m pip install -UI pip==21.0.1 && \
+RUN python3 -m pip install -UI pip==21.1.2 && \
     python3 -m pip install -U six patchwork configparser boto3 backoff fabric fabvenv argparse ujson jupyter pycryptodome google-api-python-client google-cloud-storage \
     pyyaml google-auth-httplib2 oauth2client
 
@@ -79,7 +79,6 @@ COPY ${SRC_PATH}general/scripts/gcp/common_* /root/scripts/
 COPY ${SRC_PATH}general/lib/gcp/* /usr/lib/python3.8/datalab/
 COPY ${SRC_PATH}general/lib/os/${OS}/common_lib.py /usr/lib/python3.8/datalab/common_lib.py
 COPY ${SRC_PATH}general/lib/os/fab.py /usr/lib/python3.8/datalab/fab.py
-COPY ${SRC_PATH}general/files/os/${OS}/sources.list /root/files/
 COPY ${SRC_PATH}general/files/os/ivysettings.xml /root/templates/
 COPY ${SRC_PATH}general/files/os/local_endpoint.json /root/files/
 COPY ${SRC_PATH}project/templates/locations/ /root/locations/
diff --git a/infrastructure-provisioning/src/general/files/os/debian/sources.list b/infrastructure-provisioning/src/general/files/os/debian/sources.list
deleted file mode 100644
index 3b3e80a..0000000
--- a/infrastructure-provisioning/src/general/files/os/debian/sources.list
+++ /dev/null
@@ -1,56 +0,0 @@
-# *****************************************************************************
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# ******************************************************************************
-
-# See http://help.ubuntu.com/community/UpgradeNotes for how to upgrade to
-# newer versions of the distribution.
-
-deb http://mirrors.aliyun.com/ubuntu/ xenial main restricted
-deb-src http://mirrors.aliyun.com/ubuntu/ xenial main restricted
-
-## Major bug fix updates produced after the final release of the
-## distribution.
-deb http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted
-deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted
-
-## Uncomment the following two lines to add software from the 'universe'
-## repository.
-## N.B. software from this repository is ENTIRELY UNSUPPORTED by the Ubuntu
-## team. Also, please note that software in universe WILL NOT receive any
-## review or updates from the Ubuntu security team.
-deb http://mirrors.aliyun.com/ubuntu/ xenial universe
-deb-src http://mirrors.aliyun.com/ubuntu/ xenial universe
-deb http://mirrors.aliyun.com/ubuntu/ xenial-updates universe
-deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates universe
-
-## N.B. software from this repository may not have been tested as
-## extensively as that contained in the main release, although it includes
-## newer versions of some applications which may provide useful features.
-## Also, please note that software in backports WILL NOT receive any review
-## or updates from the Ubuntu security team.
-# deb http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted
-# deb-src http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted
-
-deb http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted
-deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted
-deb http://mirrors.aliyun.com/ubuntu/ xenial-security universe
-deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security universe
-# deb http://mirrors.aliyun.com/ubuntu/ xenial-security multiverse
-# deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security multiverse
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/files/os/redhat/sources.list b/infrastructure-provisioning/src/general/files/os/redhat/sources.list
deleted file mode 100644
index fe5549c..0000000
--- a/infrastructure-provisioning/src/general/files/os/redhat/sources.list
+++ /dev/null
@@ -1,83 +0,0 @@
-# *****************************************************************************
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# ******************************************************************************
-
-# CentOS-Base.repo
-#
-# The mirror system uses the connecting IP address of the client and the
-# update status of each mirror to pick mirrors that are updated to and
-# geographically close to the client.  You should use this for CentOS updates
-# unless you are manually picking other mirrors.
-#
-# If the mirrorlist= does not work for you, as a fall back you can try the
-# remarked out baseurl= line instead.
-#
-#
-
-[base]
-name=CentOS-7 - Base - mirrors.aliyun.com
-failovermethod=priority
-baseurl=http://mirrors.aliyun.com/centos/7/os/$basearch/
-        http://mirrors.aliyuncs.com/centos/7/os/$basearch/
-#mirrorlist=http://mirrorlist.centos.org/?release=7&arch=$basearch&repo=os
-gpgcheck=1
-gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
-
-#released updates
-[updates]
-name=CentOS-7 - Updates - mirrors.aliyun.com
-failovermethod=priority
-baseurl=http://mirrors.aliyun.com/centos/7/updates/$basearch/
-        http://mirrors.aliyuncs.com/centos/7/updates/$basearch/
-#mirrorlist=http://mirrorlist.centos.org/?release=7&arch=$basearch&repo=updates
-gpgcheck=1
-gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
-
-#additional packages that may be useful
-[extras]
-name=CentOS-7 - Extras - mirrors.aliyun.com
-failovermethod=priority
-baseurl=http://mirrors.aliyun.com/centos/7/extras/$basearch/
-        http://mirrors.aliyuncs.com/centos/7/extras/$basearch/
-#mirrorlist=http://mirrorlist.centos.org/?release=7&arch=$basearch&repo=extras
-gpgcheck=1
-gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
-
-#additional packages that extend functionality of existing packages
-[centosplus]
-name=CentOS-7 - Plus - mirrors.aliyun.com
-failovermethod=priority
-baseurl=http://mirrors.aliyun.com/centos/7/centosplus/$basearch/
-        http://mirrors.aliyuncs.com/centos/7/centosplus/$basearch/
-#mirrorlist=http://mirrorlist.centos.org/?release=7&arch=$basearch&repo=centosplus
-gpgcheck=1
-enabled=0
-gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
-
-#contrib - packages by Centos Users
-[contrib]
-name=CentOS-7 - Contrib - mirrors.aliyun.com
-failovermethod=priority
-baseurl=http://mirrors.aliyun.com/centos/7/contrib/$basearch/
-        http://mirrors.aliyuncs.com/centos/7/contrib/$basearch/
-#mirrorlist=http://mirrorlist.centos.org/?release=7&arch=$basearch&repo=contrib
-gpgcheck=1
-enabled=0
-gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7
\ No newline at end of file
diff --git a/infrastructure-provisioning/src/general/lib/os/debian/common_lib.py b/infrastructure-provisioning/src/general/lib/os/debian/common_lib.py
index 29b504b..e62d301 100644
--- a/infrastructure-provisioning/src/general/lib/os/debian/common_lib.py
+++ b/infrastructure-provisioning/src/general/lib/os/debian/common_lib.py
@@ -156,12 +156,7 @@ def renew_gpg_key():
         sys.exit(1)
 
 
-def change_pkg_repos():
-    if not exists(datalab.fab.conn,'/tmp/pkg_china_ensured'):
-        datalab.fab.conn.put('/root/files/sources.list', '/tmp/sources.list')
-        datalab.fab.conn.sudo('mv /tmp/sources.list /etc/apt/sources.list')
-        manage_pkg('update', 'remote', '')
-        datalab.fab.conn.sudo('touch /tmp/pkg_china_ensured')
+
 
 
 def find_java_path_remote():
diff --git a/infrastructure-provisioning/src/general/lib/os/redhat/common_lib.py b/infrastructure-provisioning/src/general/lib/os/redhat/common_lib.py
index 6b432fd..15d51c5 100644
--- a/infrastructure-provisioning/src/general/lib/os/redhat/common_lib.py
+++ b/infrastructure-provisioning/src/general/lib/os/redhat/common_lib.py
@@ -89,13 +89,6 @@ def ensure_pkg(user, requisites='git vim gcc python-devel openssl-devel nmap lib
         sys.exit(1)
 
 
-def change_pkg_repos():
-    if not exists(datalab.fab.conn,'/tmp/pkg_china_ensured'):
-        datalab.fab.conn.put('/root/files/sources.list', '/tmp/sources.list')
-        datalab.fab.conn.sudo('mv /tmp/sources.list  /etc/yum.repos.d/CentOS-Base-aliyun.repo')
-        datalab.fab.conn.sudo('touch /tmp/pkg_china_ensured')
-
-
 def find_java_path_remote():
     java_path = datalab.fab.conn.sudo("alternatives --display java | grep 'slave jre: ' | awk '{print $3}'").stdout.replace('\n','')
     return java_path
diff --git a/infrastructure-provisioning/src/general/scripts/aws/common_notebook_configure_dataengine-service.py b/infrastructure-provisioning/src/general/scripts/aws/common_notebook_configure_dataengine-service.py
index 4d542b4..b265226 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/common_notebook_configure_dataengine-service.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/common_notebook_configure_dataengine-service.py
@@ -87,12 +87,12 @@ if __name__ == "__main__":
         print('[INSTALLING KERNELS INTO SPECIFIED NOTEBOOK]')
         params = "--bucket {} --cluster_name {} --emr_version {} --keyfile {} --notebook_ip {} --region {} " \
                  "--emr_excluded_spark_properties {} --project_name {} --os_user {}  --edge_hostname {} " \
-                 "--proxy_port {} --scala_version {} --application {} --pip_mirror {}" \
+                 "--proxy_port {} --scala_version {} --application {}" \
             .format(notebook_config['bucket_name'], notebook_config['cluster_name'], os.environ['emr_version'],
                     notebook_config['key_path'], notebook_config['notebook_ip'], os.environ['aws_region'],
                     os.environ['emr_excluded_spark_properties'], os.environ['project_name'],
                     os.environ['conf_os_user'], edge_instance_hostname, '3128', os.environ['notebook_scala_version'],
-                    os.environ['application'], os.environ['conf_pypi_mirror'])
+                    os.environ['application'])
         try:
             subprocess.run("~/scripts/{}_{}.py {}".format(application, 'install_dataengine-service_kernels', params), shell=True, check=True)
             datalab.actions_lib.remove_emr_tag(notebook_config['cluster_id'], ['State'])
diff --git a/infrastructure-provisioning/src/general/scripts/aws/ssn_prepare.py b/infrastructure-provisioning/src/general/scripts/aws/ssn_prepare.py
index bee2e3f..6ce0672 100644
--- a/infrastructure-provisioning/src/general/scripts/aws/ssn_prepare.py
+++ b/infrastructure-provisioning/src/general/scripts/aws/ssn_prepare.py
@@ -37,7 +37,8 @@ if __name__ == "__main__":
     local_log_filepath = "/logs/" + os.environ['conf_resource'] + "/" + local_log_filename
     logging.basicConfig(format='%(levelname)-8s [%(asctime)s]  %(message)s',
                         level=logging.DEBUG,
-                        filename=local_log_filepath)
+                        filename=local_log_filepath,
+                        handlers=[logging.StreamHandler()])
     ssn_conf = dict()
     ssn_conf['instance'] = 'ssn'
     ssn_conf['pre_defined_vpc'] = False
@@ -59,7 +60,6 @@ if __name__ == "__main__":
 
     try:
         logging.info('[DERIVING NAMES]')
-        print('[DERIVING NAMES]')
         ssn_conf['service_base_name'] = os.environ['conf_service_base_name'] = datalab.fab.replace_multi_symbols(
             os.environ['conf_service_base_name'][:20], '-', True)
         ssn_conf['role_name'] = '{}-ssn-role'.format(ssn_conf['service_base_name'])
@@ -101,7 +101,6 @@ if __name__ == "__main__":
         try:
             ssn_conf['pre_defined_vpc'] = True
             logging.info('[CREATE VPC AND ROUTE TABLE]')
-            print('[CREATE VPC AND ROUTE TABLE]')
             params = "--vpc {} --region {} --infra_tag_name {} --infra_tag_value {} --vpc_name {}".format(
                 ssn_conf['vpc_cidr'], ssn_conf['region'], ssn_conf['tag_name'], ssn_conf['service_base_name'],
                 ssn_conf['vpc_name'])
@@ -127,7 +126,6 @@ if __name__ == "__main__":
         try:
             ssn_conf['pre_defined_vpc2'] = True
             logging.info('[CREATE SECONDARY VPC AND ROUTE TABLE]')
-            print('[CREATE SECONDARY VPC AND ROUTE TABLE]')
             params = "--vpc {} --region {} --infra_tag_name {} --infra_tag_value {} --secondary " \
                      "--vpc_name {}".format(ssn_conf['vpc2_cidr'], ssn_conf['region'], ssn_conf['tag2_name'],
                                             ssn_conf['service_base_name'], ssn_conf['vpc2_name'])
@@ -154,7 +152,6 @@ if __name__ == "__main__":
         try:
             ssn_conf['pre_defined_subnet'] = True
             logging.info('[CREATE SUBNET]')
-            print('[CREATE SUBNET]')
             params = "--vpc_id {0} --username {1} --infra_tag_name {2} --infra_tag_value {3} --prefix {4} " \
                      "--ssn {5} --zone {6} --subnet_name {7}".format(
                       os.environ['aws_vpc_id'], 'ssn', ssn_conf['tag_name'],ssn_conf['service_base_name'], '20',
@@ -193,7 +190,6 @@ if __name__ == "__main__":
     except KeyError:
         try:
             logging.info('[CREATE PEERING CONNECTION]')
-            print('[CREATE PEERING CONNECTION]')
             os.environ['aws_peering_id'] = datalab.actions_lib.create_peering_connection(
                 os.environ['aws_vpc_id'], os.environ['aws_vpc2_id'], ssn_conf['service_base_name'])
             print('PEERING CONNECTION ID:' + os.environ['aws_peering_id'])
@@ -226,7 +222,6 @@ if __name__ == "__main__":
         try:
             ssn_conf['pre_defined_sg'] = True
             logging.info('[CREATE SG FOR SSN]')
-            print('[CREATE SG FOR SSN]')
             ssn_conf['ingress_sg_rules_template'] = datalab.meta_lib.format_sg([
                 {
                     "PrefixListIds": [],
@@ -301,7 +296,6 @@ if __name__ == "__main__":
 
     try:
         logging.info('[CREATE ROLES]')
-        print('[CREATE ROLES]')
         params = "--role_name {} --role_profile_name {} --policy_name {} --policy_file_name {} --region {} " \
                  "--infra_tag_name {} --infra_tag_value {} --user_tag_value {}".\
             format(ssn_conf['role_name'], ssn_conf['role_profile_name'], ssn_conf['policy_name'],
@@ -335,7 +329,6 @@ if __name__ == "__main__":
 
     try:
         logging.info('[CREATE ENDPOINT AND ROUTE-TABLE]')
-        print('[CREATE ENDPOINT AND ROUTE-TABLE]')
         params = "--vpc_id {} --region {} --infra_tag_name {} --infra_tag_value {}".format(
             os.environ['aws_vpc_id'], os.environ['aws_region'], ssn_conf['tag_name'], ssn_conf['service_base_name'])
         try:
@@ -368,7 +361,6 @@ if __name__ == "__main__":
     if os.environ['conf_duo_vpc_enable'] == 'true':
         try:
             logging.info('[CREATE ENDPOINT AND ROUTE-TABLE FOR NOTEBOOK VPC]')
-            print('[CREATE ENDPOINT AND ROUTE-TABLE FOR NOTEBOOK VPC]')
             params = "--vpc_id {} --region {} --infra_tag_name {} --infra_tag_value {}".format(
                 os.environ['aws_vpc2_id'], os.environ['aws_region'], ssn_conf['tag2_name'],
                 ssn_conf['service_base_name'])
@@ -401,7 +393,6 @@ if __name__ == "__main__":
 
     try:
         logging.info('[CREATE SSN INSTANCE]')
-        print('[CREATE SSN INSTANCE]')
         params = "--node_name {0} --ami_id {1} --instance_type {2} --key_name {3} --security_group_ids {4} " \
                  "--subnet_id {5} --iam_profile {6} --infra_tag_name {7} --infra_tag_value {8} --instance_class {9} " \
                  "--primary_disk_size {10}".\
@@ -441,7 +432,6 @@ if __name__ == "__main__":
     if ssn_conf['network_type'] == 'public':
         try:
             logging.info('[ASSOCIATING ELASTIC IP]')
-            print('[ASSOCIATING ELASTIC IP]')
             ssn_conf['ssn_id'] = datalab.meta_lib.get_instance_by_name(ssn_conf['tag_name'], ssn_conf['instance_name'])
             try:
                 ssn_conf['elastic_ip'] = os.environ['ssn_elastic_ip']
@@ -489,7 +479,6 @@ if __name__ == "__main__":
     if 'ssn_hosted_zone_id' in os.environ and 'ssn_hosted_zone_name' in os.environ and 'ssn_subdomain' in os.environ:
         try:
             logging.info('[CREATING ROUTE53 RECORD]')
-            print('[CREATING ROUTE53 RECORD]')
             try:
                 datalab.actions_lib.create_route_53_record(os.environ['ssn_hosted_zone_id'],
                                                            os.environ['ssn_hosted_zone_name'],
diff --git a/infrastructure-provisioning/src/general/scripts/gcp/common_notebook_configure_dataengine-service.py b/infrastructure-provisioning/src/general/scripts/gcp/common_notebook_configure_dataengine-service.py
index a79a4c4..7273709 100644
--- a/infrastructure-provisioning/src/general/scripts/gcp/common_notebook_configure_dataengine-service.py
+++ b/infrastructure-provisioning/src/general/scripts/gcp/common_notebook_configure_dataengine-service.py
@@ -100,12 +100,11 @@ if __name__ == "__main__":
         print('[INSTALLING KERNELS INTO SPECIFIED NOTEBOOK]')
         params = "--bucket {} --cluster_name {} --dataproc_version {} --keyfile {} --notebook_ip {} --region {} " \
                  "--edge_user_name {} --project_name {} --os_user {}  --edge_hostname {} --proxy_port {} " \
-                 "--scala_version {} --application {} --pip_mirror {}" \
+                 "--scala_version {} --application {}" \
             .format(notebook_config['bucket_name'], notebook_config['cluster_name'], os.environ['dataproc_version'],
                     notebook_config['key_path'], notebook_config['notebook_ip'], os.environ['gcp_region'],
                     notebook_config['edge_user_name'], notebook_config['project_name'], os.environ['conf_os_user'],
-                    edge_instance_hostname, '3128', os.environ['notebook_scala_version'], os.environ['application'],
-                    os.environ['conf_pypi_mirror'])
+                    edge_instance_hostname, '3128', os.environ['notebook_scala_version'], os.environ['application'])
         try:
             subprocess.run("~/scripts/{}_{}.py {}".format(application, 'install_dataengine-service_kernels', params), shell=True, check=True)
             GCPActions.update_dataproc_cluster(notebook_config['cluster_name'], notebook_config['cluster_labels'])
diff --git a/infrastructure-provisioning/src/ssn/fabfile.py b/infrastructure-provisioning/src/ssn/fabfile.py
index 79243cd..1c107b0 100644
--- a/infrastructure-provisioning/src/ssn/fabfile.py
+++ b/infrastructure-provisioning/src/ssn/fabfile.py
@@ -26,8 +26,6 @@ import os
 import sys
 import traceback
 import uuid
-from datalab.fab import *
-from fabric import *
 
 @task
 def run(ctx):
@@ -39,14 +37,16 @@ def run(ctx):
     ssn_config = dict()
     ssn_config['ssn_unique_index'] = str(uuid.uuid4())[:5]
     try:
-        subprocess.run("~/scripts/{}.py --ssn_unique_index {}".format('ssn_prepare', ssn_config['ssn_unique_index']), shell=True, check=True)
+        subprocess.run("~/scripts/{}.py --ssn_unique_index {}".format('ssn_prepare', ssn_config['ssn_unique_index']),
+                       shell=True, check=True)
     except Exception as err:
         traceback.print_exc()
         append_result("Failed preparing SSN node.", str(err))
         sys.exit(1)
 
     try:
-        subprocess.run("~/scripts/{}.py --ssn_unique_index {}".format('ssn_configure', ssn_config['ssn_unique_index']), shell=True, check=True)
+        subprocess.run("~/scripts/{}.py --ssn_unique_index {}".format('ssn_configure', ssn_config['ssn_unique_index']),
+                       shell=True, check=True)
     except Exception as err:
         traceback.print_exc()
         append_result("Failed configuring SSN node.", str(err))
diff --git a/infrastructure-provisioning/src/ssn/scripts/configure_docker.py b/infrastructure-provisioning/src/ssn/scripts/configure_docker.py
index aa20a68..8c0e4cb 100644
--- a/infrastructure-provisioning/src/ssn/scripts/configure_docker.py
+++ b/infrastructure-provisioning/src/ssn/scripts/configure_docker.py
@@ -73,11 +73,6 @@ def download_toree():
         sys.exit(1)
 
 
-def add_china_repository(datalab_path):
-    conn.sudo('''bash -c 'cd {1}sources/infrastructure-provisioning/src/base/ && sed -i "/pip install/s/$/ -i https\:\/\/{0}\/simple --trusted-host {0} --timeout 60000/g" Dockerfile' '''.format(os.environ['conf_pypi_mirror'], datalab_path))
-    conn.sudo('''bash -c 'cd {}sources/infrastructure-provisioning/src/base/ && sed -i "/pip install/s/jupyter/ipython==5.0.0 jupyter==1.0.0/g" Dockerfile' '''.format(datalab_path))
-    conn.sudo('''bash -c 'cd {}sources/infrastructure-provisioning/src/base/ && sed -i "22i COPY general/files/os/debian/sources.list /etc/apt/sources.list" Dockerfile' '''.format(datalab_path))
-
 def login_in_gcr(os_user, gcr_creds, odahu_image, datalab_path, cloud_provider):
     if gcr_creds != '':
         try:
@@ -119,8 +114,6 @@ def build_docker_images(image_list, region, datalab_path):
                   'azure_auth.json'.format(args.keyfile, host_string, args.datalab_path))
             conn.sudo('cp {0}sources/infrastructure-provisioning/src/base/azure_auth.json '
                  '/home/{1}/keys/azure_auth.json'.format(args.datalab_path, args.os_user))
-        if region == 'cn-north-1':
-            add_china_repository(datalab_path)
         for image in image_list:
             name = image['name']
             tag = image['tag']

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datalab.apache.org
For additional commands, e-mail: commits-help@datalab.apache.org