You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ka...@apache.org on 2020/06/25 15:36:16 UTC

[airflow] branch master updated: Use literal syntax instead of function calls to create data structure (#9516)

This is an automated email from the ASF dual-hosted git repository.

kaxilnaik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/master by this push:
     new 87fdbd0  Use literal syntax instead of function calls to create data structure (#9516)
87fdbd0 is described below

commit 87fdbd0708d942af98d35604fe5962962e25d246
Author: Kaxil Naik <ka...@gmail.com>
AuthorDate: Thu Jun 25 16:35:37 2020 +0100

    Use literal syntax instead of function calls to create data structure (#9516)
    
    It is slower to call e.g. dict() than using the empty literal, because the name dict must be looked up in the global scope in case it has been rebound. Same for the other two types like list() and tuple().
---
 .pre-commit-config.yaml                                      |  1 +
 airflow/executors/base_executor.py                           |  4 ++--
 airflow/jobs/backfill_job.py                                 |  2 +-
 airflow/models/dag.py                                        |  4 ++--
 airflow/models/dagcode.py                                    |  2 +-
 airflow/providers/amazon/aws/hooks/base_aws.py               |  4 ++--
 airflow/providers/amazon/aws/hooks/sagemaker.py              |  4 ++--
 airflow/providers/amazon/aws/operators/datasync.py           | 10 +++++-----
 airflow/providers/google/cloud/operators/bigquery.py         |  2 +-
 airflow/providers/google/cloud/operators/bigtable.py         |  2 +-
 airflow/providers/google/cloud/operators/dataproc.py         |  2 +-
 airflow/providers/google/cloud/transfers/cassandra_to_gcs.py |  2 +-
 airflow/providers/google/cloud/utils/credentials_provider.py |  2 +-
 airflow/utils/log/file_processor_handler.py                  |  2 +-
 airflow/utils/log/logging_mixin.py                           |  6 +++---
 airflow/utils/operator_helpers.py                            |  2 +-
 airflow/www/extensions/init_manifest_files.py                |  2 +-
 backport_packages/setup_backport_packages.py                 |  4 ++--
 dev/send_email.py                                            |  2 +-
 tests/models/test_dag.py                                     |  2 +-
 tests/test_stats.py                                          |  4 ++--
 tests/utils/log/elasticmock/fake_elasticsearch.py            |  2 +-
 22 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 293577a..1b4ce49 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -145,6 +145,7 @@ metastore_browser/templates/.*\\.html$|.*\\.jinja2"
     hooks:
       - id: check-merge-conflict
       - id: debug-statements
+      - id: check-builtin-literals
       - id: detect-private-key
       - id: end-of-file-fixer
       - id: mixed-line-ending
diff --git a/airflow/executors/base_executor.py b/airflow/executors/base_executor.py
index 476eeec..2471c3f 100644
--- a/airflow/executors/base_executor.py
+++ b/airflow/executors/base_executor.py
@@ -226,10 +226,10 @@ class BaseExecutor(LoggingMixin):
         :param dag_ids: to dag_ids to return events for, if None returns all
         :return: a dict of events
         """
-        cleared_events: Dict[TaskInstanceKeyType, EventBufferValueType] = dict()
+        cleared_events: Dict[TaskInstanceKeyType, EventBufferValueType] = {}
         if dag_ids is None:
             cleared_events = self.event_buffer
-            self.event_buffer = dict()
+            self.event_buffer = {}
         else:
             for key in list(self.event_buffer.keys()):
                 dag_id, _, _, _ = key
diff --git a/airflow/jobs/backfill_job.py b/airflow/jobs/backfill_job.py
index 2d1986d..1638654 100644
--- a/airflow/jobs/backfill_job.py
+++ b/airflow/jobs/backfill_job.py
@@ -104,7 +104,7 @@ class BackfillJob(BaseJob):
                      total_runs=0,
                      ):
             self.to_run = to_run or OrderedDict()
-            self.running = running or dict()
+            self.running = running or {}
             self.skipped = skipped or set()
             self.succeeded = succeeded or set()
             self.failed = failed or set()
diff --git a/airflow/models/dag.py b/airflow/models/dag.py
index ccdb60a..ee3a475 100644
--- a/airflow/models/dag.py
+++ b/airflow/models/dag.py
@@ -257,7 +257,7 @@ class DAG(BaseDag, LoggingMixin):
         # set file location to caller source path
         back = sys._getframe().f_back
         self.fileloc = back.f_code.co_filename if back else ""
-        self.task_dict: Dict[str, BaseOperator] = dict()
+        self.task_dict: Dict[str, BaseOperator] = {}
 
         # set timezone from start_date
         if start_date and start_date.tzinfo:
@@ -1277,7 +1277,7 @@ class DAG(BaseDag, LoggingMixin):
         raise TaskNotFound("Task {task_id} not found".format(task_id=task_id))
 
     def pickle_info(self):
-        d = dict()
+        d = {}
         d['is_picklable'] = True
         try:
             dttm = timezone.utcnow()
diff --git a/airflow/models/dagcode.py b/airflow/models/dagcode.py
index 7a8520b..3e0e276 100644
--- a/airflow/models/dagcode.py
+++ b/airflow/models/dagcode.py
@@ -92,7 +92,7 @@ class DagCode(Base):
                 orm_dag_code.fileloc: orm_dag_code for orm_dag_code in existing_orm_dag_codes
             }
         else:
-            existing_orm_dag_codes_map = dict()
+            existing_orm_dag_codes_map = {}
 
         existing_orm_dag_codes_by_fileloc_hashes = {
             orm.fileloc_hash: orm for orm in existing_orm_dag_codes
diff --git a/airflow/providers/amazon/aws/hooks/base_aws.py b/airflow/providers/amazon/aws/hooks/base_aws.py
index 83bb17e..fe39c1d 100644
--- a/airflow/providers/amazon/aws/hooks/base_aws.py
+++ b/airflow/providers/amazon/aws/hooks/base_aws.py
@@ -89,7 +89,7 @@ class AwsBaseHook(BaseHook):
         aws_secret_access_key = None
         aws_session_token = None
         endpoint_url = None
-        session_kwargs = dict()
+        session_kwargs = {}
 
         if self.aws_conn_id:  # pylint: disable=too-many-nested-blocks
             self.log.info("Airflow Connection: aws_conn_id=%s",
@@ -187,7 +187,7 @@ class AwsBaseHook(BaseHook):
                     )
                     sts_client = sts_session.client("sts", config=self.config)
 
-                    assume_role_kwargs = dict()
+                    assume_role_kwargs = {}
                     if "assume_role_kwargs" in extra_config:
                         assume_role_kwargs = extra_config["assume_role_kwargs"]
 
diff --git a/airflow/providers/amazon/aws/hooks/sagemaker.py b/airflow/providers/amazon/aws/hooks/sagemaker.py
index 2b0c4a9..c7692f9 100644
--- a/airflow/providers/amazon/aws/hooks/sagemaker.py
+++ b/airflow/providers/amazon/aws/hooks/sagemaker.py
@@ -763,7 +763,7 @@ class SageMakerHook(AwsBaseHook):
         :return: results of the list_training_jobs request
         """
 
-        config = dict()
+        config = {}
 
         if name_contains:
             if "NameContains" in kwargs:
@@ -806,7 +806,7 @@ class SageMakerHook(AwsBaseHook):
         next_token = None
 
         while True:
-            kwargs = dict()
+            kwargs = {}
             if next_token is not None:
                 kwargs["NextToken"] = next_token
 
diff --git a/airflow/providers/amazon/aws/operators/datasync.py b/airflow/providers/amazon/aws/operators/datasync.py
index b670106..1bc3b9a 100644
--- a/airflow/providers/amazon/aws/operators/datasync.py
+++ b/airflow/providers/amazon/aws/operators/datasync.py
@@ -137,16 +137,16 @@ class AWSDataSyncOperator(BaseOperator):
         self.allow_random_task_choice = allow_random_task_choice
         self.allow_random_location_choice = allow_random_location_choice
 
-        self.create_task_kwargs = create_task_kwargs if create_task_kwargs else dict()
-        self.create_source_location_kwargs = dict()
+        self.create_task_kwargs = create_task_kwargs if create_task_kwargs else {}
+        self.create_source_location_kwargs = {}
         if create_source_location_kwargs:
             self.create_source_location_kwargs = create_source_location_kwargs
-        self.create_destination_location_kwargs = dict()
+        self.create_destination_location_kwargs = {}
         if create_destination_location_kwargs:
             self.create_destination_location_kwargs = create_destination_location_kwargs
 
-        self.update_task_kwargs = update_task_kwargs if update_task_kwargs else dict()
-        self.task_execution_kwargs = task_execution_kwargs if task_execution_kwargs else dict()
+        self.update_task_kwargs = update_task_kwargs if update_task_kwargs else {}
+        self.task_execution_kwargs = task_execution_kwargs if task_execution_kwargs else {}
         self.delete_task_after_execution = delete_task_after_execution
 
         # Validations
diff --git a/airflow/providers/google/cloud/operators/bigquery.py b/airflow/providers/google/cloud/operators/bigquery.py
index 01aacd6..5d633f9 100644
--- a/airflow/providers/google/cloud/operators/bigquery.py
+++ b/airflow/providers/google/cloud/operators/bigquery.py
@@ -1042,7 +1042,7 @@ class BigQueryCreateExternalTableOperator(BaseOperator):
         self.google_cloud_storage_conn_id = google_cloud_storage_conn_id
         self.delegate_to = delegate_to
 
-        self.src_fmt_configs = src_fmt_configs or dict()
+        self.src_fmt_configs = src_fmt_configs or {}
         self.labels = labels
         self.encryption_configuration = encryption_configuration
         self.location = location
diff --git a/airflow/providers/google/cloud/operators/bigtable.py b/airflow/providers/google/cloud/operators/bigtable.py
index 5fe07f7..9d697d6 100644
--- a/airflow/providers/google/cloud/operators/bigtable.py
+++ b/airflow/providers/google/cloud/operators/bigtable.py
@@ -255,7 +255,7 @@ class BigtableCreateTableOperator(BaseOperator, BigtableValidationMixin):
         self.instance_id = instance_id
         self.table_id = table_id
         self.initial_split_keys = initial_split_keys or []
-        self.column_families = column_families or dict()
+        self.column_families = column_families or {}
         self._validate_inputs()
         self.gcp_conn_id = gcp_conn_id
         super().__init__(*args, **kwargs)
diff --git a/airflow/providers/google/cloud/operators/dataproc.py b/airflow/providers/google/cloud/operators/dataproc.py
index b9b3da1..565c7e6 100644
--- a/airflow/providers/google/cloud/operators/dataproc.py
+++ b/airflow/providers/google/cloud/operators/dataproc.py
@@ -208,7 +208,7 @@ class ClusterGenerator:
         self.custom_image = custom_image
         self.custom_image_project_id = custom_image_project_id
         self.image_version = image_version
-        self.properties = properties or dict()
+        self.properties = properties or {}
         self.optional_components = optional_components
         self.master_machine_type = master_machine_type
         self.master_disk_type = master_disk_type
diff --git a/airflow/providers/google/cloud/transfers/cassandra_to_gcs.py b/airflow/providers/google/cloud/transfers/cassandra_to_gcs.py
index 5c20cb7..16f7e2b 100644
--- a/airflow/providers/google/cloud/transfers/cassandra_to_gcs.py
+++ b/airflow/providers/google/cloud/transfers/cassandra_to_gcs.py
@@ -307,7 +307,7 @@ class CassandraToGCSOperator(BaseOperator):
         """
         Generates BQ schema.
         """
-        field_schema: Dict[str, Any] = dict()
+        field_schema: Dict[str, Any] = {}
         field_schema.update({'name': name})
         field_schema.update({'type_': cls.get_bq_type(type_)})
         field_schema.update({'mode': cls.get_bq_mode(type_)})
diff --git a/airflow/providers/google/cloud/utils/credentials_provider.py b/airflow/providers/google/cloud/utils/credentials_provider.py
index 3e7927d..2555fba 100644
--- a/airflow/providers/google/cloud/utils/credentials_provider.py
+++ b/airflow/providers/google/cloud/utils/credentials_provider.py
@@ -59,7 +59,7 @@ def build_gcp_conn(
     conn = "google-cloud-platform://?{}"
     extras = "extra__google_cloud_platform"
 
-    query_params = dict()
+    query_params = {}
     if key_file_path:
         query_params["{}__key_path".format(extras)] = key_file_path
     if scopes:
diff --git a/airflow/utils/log/file_processor_handler.py b/airflow/utils/log/file_processor_handler.py
index 4297c98..9b1f60d 100644
--- a/airflow/utils/log/file_processor_handler.py
+++ b/airflow/utils/log/file_processor_handler.py
@@ -84,7 +84,7 @@ class FileProcessorHandler(logging.Handler):
 
     def _render_filename(self, filename):
         filename = os.path.relpath(filename, self.dag_dir)
-        ctx = dict()
+        ctx = {}
         ctx['filename'] = filename
 
         if self.filename_jinja_template:
diff --git a/airflow/utils/log/logging_mixin.py b/airflow/utils/log/logging_mixin.py
index 448c8f4..ac6ca59 100644
--- a/airflow/utils/log/logging_mixin.py
+++ b/airflow/utils/log/logging_mixin.py
@@ -72,7 +72,7 @@ class StreamLogWriter:
         """
         self.logger = logger
         self.level = level
-        self._buffer = str()
+        self._buffer = ''
 
     @property
     def closed(self):   # noqa: D402
@@ -101,7 +101,7 @@ class StreamLogWriter:
         else:
             self._buffer += message
             self._propagate_log(self._buffer.rstrip())
-            self._buffer = str()
+            self._buffer = ''
 
     def flush(self):
         """
@@ -109,7 +109,7 @@ class StreamLogWriter:
         """
         if len(self._buffer) > 0:
             self._propagate_log(self._buffer)
-            self._buffer = str()
+            self._buffer = ''
 
     def isatty(self):
         """
diff --git a/airflow/utils/operator_helpers.py b/airflow/utils/operator_helpers.py
index 5909fa4..c2e7228 100644
--- a/airflow/utils/operator_helpers.py
+++ b/airflow/utils/operator_helpers.py
@@ -46,7 +46,7 @@ def context_to_airflow_vars(context, in_env_var_format=False):
     :type in_env_var_format: bool
     :return: task_instance context as dict.
     """
-    params = dict()
+    params = {}
     if in_env_var_format:
         name_format = 'env_var_format'
     else:
diff --git a/airflow/www/extensions/init_manifest_files.py b/airflow/www/extensions/init_manifest_files.py
index 643e9a5..ced8b0d 100644
--- a/airflow/www/extensions/init_manifest_files.py
+++ b/airflow/www/extensions/init_manifest_files.py
@@ -29,7 +29,7 @@ def configure_manifest_files(app):
     :param app:
     :return:
     """
-    manifest = dict()
+    manifest = {}
 
     def parse_manifest_json():
         # noinspection PyBroadException
diff --git a/backport_packages/setup_backport_packages.py b/backport_packages/setup_backport_packages.py
index 0061e9e..2ad125c 100644
--- a/backport_packages/setup_backport_packages.py
+++ b/backport_packages/setup_backport_packages.py
@@ -604,7 +604,7 @@ def get_package_class_summary(full_package_name: str, imported_classes: List[str
     from airflow.hooks.base_hook import BaseHook
     from airflow.models.baseoperator import BaseOperator
 
-    all_verified_entities: Dict[EntityType, VerifiedEntities] = dict()
+    all_verified_entities: Dict[EntityType, VerifiedEntities] = {}
     all_verified_entities[EntityType.Operators] = find_all_entities(
         imported_classes=imported_classes,
         base_package=full_package_name,
@@ -657,7 +657,7 @@ def get_package_class_summary(full_package_name: str, imported_classes: List[str
     for entity in EntityType:
         print_wrong_naming(entity, all_verified_entities[entity].wrong_entities)
 
-    entities_summary: Dict[EntityType, EntityTypeSummary] = dict() # noqa
+    entities_summary: Dict[EntityType, EntityTypeSummary] = {} # noqa
 
     for entity_type in EntityType:
         entities_summary[entity_type] = get_details_about_classes(
diff --git a/dev/send_email.py b/dev/send_email.py
index 68d3240..6d698f7 100755
--- a/dev/send_email.py
+++ b/dev/send_email.py
@@ -127,7 +127,7 @@ class BaseParameters:
         self.password = password
         self.version = version
         self.version_rc = version_rc
-        self.template_arguments = dict()
+        self.template_arguments = {}
 
     def __repr__(self):
         return f"Apache Credentials: {self.email}/{self.username}/{self.version}/{self.version_rc}"
diff --git a/tests/models/test_dag.py b/tests/models/test_dag.py
index 6e57fae..250560a 100644
--- a/tests/models/test_dag.py
+++ b/tests/models/test_dag.py
@@ -317,7 +317,7 @@ class TestDag(unittest.TestCase):
             start_date=DEFAULT_DATE,
             default_args={'owner': 'owner1'})
 
-        self.assertEqual(tuple(), dag.topological_sort())
+        self.assertEqual((), dag.topological_sort())
 
     def test_dag_naive_start_date_string(self):
         DAG('DAG', default_args={'start_date': '2019-06-01'})
diff --git a/tests/test_stats.py b/tests/test_stats.py
index 57e2f6b..9df0aab 100644
--- a/tests/test_stats.py
+++ b/tests/test_stats.py
@@ -74,7 +74,7 @@ class TestStats(unittest.TestCase):
         self.statsd_client.incr.assert_called_once_with('test_stats_run', 1, 1)
 
     def test_stat_name_must_be_a_string(self):
-        self.stats.incr(list())
+        self.stats.incr([])
         self.statsd_client.assert_not_called()
 
     def test_stat_name_must_not_exceed_max_length(self):
@@ -152,7 +152,7 @@ class TestDogStats(unittest.TestCase):
         )
 
     def test_stat_name_must_be_a_string_with_dogstatsd(self):
-        self.dogstatsd.incr(list())
+        self.dogstatsd.incr([])
         self.dogstatsd_client.assert_not_called()
 
     def test_stat_name_must_not_exceed_max_length_with_dogstatsd(self):
diff --git a/tests/utils/log/elasticmock/fake_elasticsearch.py b/tests/utils/log/elasticmock/fake_elasticsearch.py
index 73764bb..c60db6a 100644
--- a/tests/utils/log/elasticmock/fake_elasticsearch.py
+++ b/tests/utils/log/elasticmock/fake_elasticsearch.py
@@ -81,7 +81,7 @@ class FakeElasticsearch(Elasticsearch):
                   'routing', 'timeout', 'timestamp', 'ttl', 'version', 'version_type')
     def index(self, index, doc_type, body, id=None, params=None):
         if index not in self.__documents_dict:
-            self.__documents_dict[index] = list()
+            self.__documents_dict[index] = []
 
         if id is None:
             id = get_random_id()