You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by yh...@apache.org on 2023/02/03 14:12:46 UTC

[beam] branch master updated: Fix flaky test due to create bigquery dataset conflict (#25266)

This is an automated email from the ASF dual-hosted git repository.

yhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 86480f33397 Fix flaky test due to create bigquery dataset conflict (#25266)
86480f33397 is described below

commit 86480f33397bd7984ae5cbb06ddcb3ffa1043ffe
Author: Yi Hu <ya...@google.com>
AuthorDate: Fri Feb 3 09:12:38 2023 -0500

    Fix flaky test due to create bigquery dataset conflict (#25266)
    
    * Fix test_hourly_team_score_output_checksum_on_small_input flaky
    
    * Include python bigquery integration test created tables to clean up job
---
 .test-infra/tools/stale_bq_datasets_cleaner.sh             |  2 +-
 .../apache_beam/io/gcp/big_query_query_to_table_it_test.py |  6 +++---
 sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py |  8 +++-----
 sdks/python/apache_beam/io/gcp/bigquery_read_it_test.py    | 10 ++++------
 sdks/python/apache_beam/io/gcp/bigquery_test.py            | 14 +++++---------
 sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py   |  8 +++-----
 sdks/python/apache_beam/io/gcp/tests/utils.py              |  6 +++---
 7 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/.test-infra/tools/stale_bq_datasets_cleaner.sh b/.test-infra/tools/stale_bq_datasets_cleaner.sh
index fc68666e4aa..ee3f27ef671 100755
--- a/.test-infra/tools/stale_bq_datasets_cleaner.sh
+++ b/.test-infra/tools/stale_bq_datasets_cleaner.sh
@@ -24,7 +24,7 @@ PROJECT=apache-beam-testing
 MAX_RESULT=1500
 BQ_DATASETS=`bq --project_id=$PROJECT ls --max_results=$MAX_RESULT | tail -n $MAX_RESULT | sed s/^[[:space:]]*/${PROJECT}:/`
 
-CLEANUP_DATASET_TEMPLATES=(beam_bigquery_samples_ beam_temp_dataset_ FHIR_store_ bq_query_schema_update_options_16 bq_query_to_table_16 bq_read_all_[a-z0-9]*)
+CLEANUP_DATASET_TEMPLATES=(beam_bigquery_samples_ beam_temp_dataset_ FHIR_store_ bq_query_schema_update_options_16 bq_query_to_table_16 '\:(bq_read_all_|combine_per_key_examples|filter_examples|hourly_team_score_|leader_board_|leaderboard_|game_stats_|python_|temp_dataset)[a-z_]*[0-9a-f]{12,}$')
 
 # A grace period of 5 days
 GRACE_PERIOD=$((`date +%s` - 24 * 3600 * 5))
diff --git a/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py b/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
index eede4248ea0..501c2edee40 100644
--- a/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
@@ -24,7 +24,7 @@ Integration test for Google Cloud BigQuery.
 import base64
 import datetime
 import logging
-import random
+import secrets
 import time
 import unittest
 
@@ -93,8 +93,8 @@ class BigQueryQueryToTableIT(unittest.TestCase):
     self.project = self.test_pipeline.get_option('project')
 
     self.bigquery_client = BigQueryWrapper()
-    self.dataset_id = '%s%s%d' % (
-        BIG_QUERY_DATASET_ID, str(int(time.time())), random.randint(0, 10000))
+    self.dataset_id = '%s%d%s' % (
+        BIG_QUERY_DATASET_ID, int(time.time()), secrets.token_hex(3))
     self.bigquery_client.get_or_create_dataset(self.project, self.dataset_id)
     self.output_table = "%s.output_table" % (self.dataset_id)
 
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py b/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py
index 797ea0333ec..adff1dab333 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_file_loads_test.py
@@ -21,7 +21,7 @@
 
 import logging
 import os
-import random
+import secrets
 import time
 import unittest
 
@@ -888,10 +888,8 @@ class BigQueryFileLoadsIT(unittest.TestCase):
     self.runner_name = type(self.test_pipeline.runner).__name__
     self.project = self.test_pipeline.get_option('project')
 
-    self.dataset_id = '%s%s%d' % (
-        self.BIG_QUERY_DATASET_ID,
-        str(int(time.time())),
-        random.randint(0, 10000))
+    self.dataset_id = '%s%d%s' % (
+        self.BIG_QUERY_DATASET_ID, int(time.time()), secrets.token_hex(3))
     self.bigquery_client = bigquery_tools.BigQueryWrapper()
     self.bigquery_client.get_or_create_dataset(self.project, self.dataset_id)
     self.output_table = "%s.output_table" % (self.dataset_id)
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_read_it_test.py b/sdks/python/apache_beam/io/gcp/bigquery_read_it_test.py
index c36c3a28e30..98a6d383190 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_read_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_read_it_test.py
@@ -23,7 +23,7 @@
 import base64
 import datetime
 import logging
-import random
+import secrets
 import time
 import unittest
 import uuid
@@ -98,10 +98,8 @@ class BigQueryReadIntegrationTests(unittest.TestCase):
     cls.project = cls.test_pipeline.get_option('project')
 
     cls.bigquery_client = BigQueryWrapper()
-    cls.dataset_id = '%s%s%d' % (
-        cls.BIG_QUERY_DATASET_ID,
-        str(int(time.time())),
-        random.randint(0, 10000))
+    cls.dataset_id = '%s%d%s' % (
+        cls.BIG_QUERY_DATASET_ID, int(time.time()), secrets.token_hex(3))
     cls.bigquery_client.get_or_create_dataset(cls.project, cls.dataset_id)
     _LOGGER.info(
         "Created dataset %s in project %s", cls.dataset_id, cls.project)
@@ -409,7 +407,7 @@ class ReadUsingStorageApiTests(BigQueryReadIntegrationTests):
         'materializing_table_before_reading',
         str(uuid.uuid4())[0:10],
         bigquery_tools.BigQueryJobTypes.QUERY,
-        '%s_%s' % (int(time.time()), random.randint(0, 1000)))
+        '%d_%s' % (int(time.time()), secrets.token_hex(3)))
     cls._setup_temporary_dataset(cls.project, cls.query)
     job = cls.bigquery_client._start_query_job(
         project,
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_test.py b/sdks/python/apache_beam/io/gcp/bigquery_test.py
index 18048e94054..08775d26075 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_test.py
@@ -25,8 +25,8 @@ import json
 import logging
 import os
 import pickle
-import random
 import re
+import secrets
 import time
 import unittest
 import uuid
@@ -1607,10 +1607,8 @@ class BigQueryStreamingInsertTransformIntegrationTests(unittest.TestCase):
     self.runner_name = type(self.test_pipeline.runner).__name__
     self.project = self.test_pipeline.get_option('project')
 
-    self.dataset_id = '%s%s%d' % (
-        self.BIG_QUERY_DATASET_ID,
-        str(int(time.time())),
-        random.randint(0, 10000))
+    self.dataset_id = '%s%d%s' % (
+        self.BIG_QUERY_DATASET_ID, int(time.time()), secrets.token_hex(3))
     self.bigquery_client = bigquery_tools.BigQueryWrapper()
     self.bigquery_client.get_or_create_dataset(self.project, self.dataset_id)
     self.output_table = "%s.output_table" % (self.dataset_id)
@@ -1907,10 +1905,8 @@ class BigQueryFileLoadsIntegrationTests(unittest.TestCase):
     self.runner_name = type(self.test_pipeline.runner).__name__
     self.project = self.test_pipeline.get_option('project')
 
-    self.dataset_id = '%s%s%s' % (
-        self.BIG_QUERY_DATASET_ID,
-        str(int(time.time())),
-        random.randint(0, 10000))
+    self.dataset_id = '%s%d%s' % (
+        self.BIG_QUERY_DATASET_ID, int(time.time()), secrets.token_hex(3))
     self.bigquery_client = bigquery_tools.BigQueryWrapper()
     self.bigquery_client.get_or_create_dataset(self.project, self.dataset_id)
     self.output_table = '%s.output_table' % (self.dataset_id)
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py
index e75b698c651..a307e06ac5b 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py
@@ -23,7 +23,7 @@
 import base64
 import datetime
 import logging
-import random
+import secrets
 import time
 import unittest
 from decimal import Decimal
@@ -66,10 +66,8 @@ class BigQueryWriteIntegrationTests(unittest.TestCase):
     self.project = self.test_pipeline.get_option('project')
 
     self.bigquery_client = BigQueryWrapper()
-    self.dataset_id = '%s%s%d' % (
-        self.BIG_QUERY_DATASET_ID,
-        str(int(time.time())),
-        random.randint(0, 10000))
+    self.dataset_id = '%s%d%s' % (
+        self.BIG_QUERY_DATASET_ID, int(time.time()), secrets.token_hex(3))
     self.bigquery_client.get_or_create_dataset(self.project, self.dataset_id)
     _LOGGER.info(
         "Created dataset %s in project %s", self.dataset_id, self.project)
diff --git a/sdks/python/apache_beam/io/gcp/tests/utils.py b/sdks/python/apache_beam/io/gcp/tests/utils.py
index 428ecd4e949..e348915c145 100644
--- a/sdks/python/apache_beam/io/gcp/tests/utils.py
+++ b/sdks/python/apache_beam/io/gcp/tests/utils.py
@@ -20,7 +20,7 @@
 # pytype: skip-file
 
 import logging
-import random
+import secrets
 import time
 
 from apache_beam.io import filesystems
@@ -58,8 +58,8 @@ def create_bq_dataset(project, dataset_base_name):
     new dataset.
   """
   client = bigquery.Client(project=project)
-  unique_dataset_name = '%s%s%d' % (
-      dataset_base_name, str(int(time.time())), random.randint(0, 10000))
+  unique_dataset_name = '%s%d%s' % (
+      dataset_base_name, int(time.time()), secrets.token_hex(3))
   dataset_ref = client.dataset(unique_dataset_name, project=project)
   dataset = bigquery.Dataset(dataset_ref)
   client.create_dataset(dataset)