You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by tv...@apache.org on 2022/07/11 17:34:14 UTC
[beam] branch master updated: Allow BigQuery TableIds to have space in between (#22167)

This is an automated email from the ASF dual-hosted git repository.

tvalentyn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new da84804bc01 Allow BigQuery TableIds to have space in between (#22167)
da84804bc01 is described below

commit da84804bc01d88096232cbc91b8e2fc758a770a5
Author: Dheeraj Gharde <45...@users.noreply.github.com>
AuthorDate: Mon Jul 11 23:04:08 2022 +0530

    Allow BigQuery TableIds to have space in between (#22167)
    
    * Updated Regex and test
    
    * Updated Regex and test
---
 sdks/python/apache_beam/io/gcp/bigquery.py          | 21 +++++++++++----------
 sdks/python/apache_beam/io/gcp/bigquery_tools.py    |  7 +++++--
 .../apache_beam/io/gcp/bigquery_tools_test.py       | 11 +++++++++++
 3 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py
index 5ce7519d3f2..5c2c832e3b0 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -1022,9 +1022,9 @@ class _CustomBigQueryStorageSource(BoundedSource):
   using the BigQuery Storage API.
   Args:
     table (str, TableReference): The ID of the table. The ID must contain only
-      letters ``a-z``, ``A-Z``, numbers ``0-9``, or underscores ``_``  If
-      **dataset** argument is :data:`None` then the table argument must
-      contain the entire table reference specified as:
+      letters ``a-z``, ``A-Z``, numbers ``0-9``, underscores ``_`` or white
+      spaces. If **dataset** argument is :data:`None` then the table
+      argument must contain the entire table reference specified as:
       ``'PROJECT:DATASET.TABLE'`` or must specify a TableReference.
     dataset (str): Optional ID of the dataset containing this table or
       :data:`None` if the table argument specifies a TableReference.
@@ -1427,10 +1427,10 @@ class BigQuerySink(dataflow_io.NativeSink):
 
     Args:
       table (str): The ID of the table. The ID must contain only letters
-        ``a-z``, ``A-Z``, numbers ``0-9``, or underscores ``_``. If
-        **dataset** argument is :data:`None` then the table argument must
-        contain the entire table reference specified as: ``'DATASET.TABLE'`` or
-        ``'PROJECT:DATASET.TABLE'``.
+        ``a-z``, ``A-Z``, numbers ``0-9``, underscores ``_`` or or white
+        spaces. If **dataset** argument is :data:`None` then the table
+        argument must contain the entire table reference specified
+        as: ``'DATASET.TABLE'`` or ``'PROJECT:DATASET.TABLE'``.
       dataset (str): The ID of the dataset containing this table or
         :data:`None` if the table reference is specified entirely by the table
         argument.
@@ -2602,7 +2602,7 @@ class ReadFromBigQuery(PTransform):
       'method' is 'DIRECT_READ'.
     table (str, callable, ValueProvider): The ID of the table, or a callable
       that returns it. The ID must contain only letters ``a-z``, ``A-Z``,
-      numbers ``0-9``, or underscores ``_``. If dataset argument is
+      numbers ``0-9``, underscores ``_`` or white spaces. If dataset argument is
       :data:`None` then the table argument must contain the entire table
       reference specified as: ``'DATASET.TABLE'``
       or ``'PROJECT:DATASET.TABLE'``. If it's a callable, it must receive one
@@ -2821,8 +2821,9 @@ class ReadFromBigQueryRequest:
       This parameter is ignored for table inputs.
     :param table:
       The ID of the table to read. The ID must contain only letters
-      ``a-z``, ``A-Z``, numbers ``0-9``, or underscores ``_``. Table should
-      define project and dataset (ex.: ``'PROJECT:DATASET.TABLE'``).
+      ``a-z``, ``A-Z``, numbers ``0-9``, underscores ``_`` or white spaces.
+      Table should define project and dataset
+      (ex.: ``'PROJECT:DATASET.TABLE'``).
     :param flatten_results:
       Flattens all nested and repeated fields in the query results.
       The default value is :data:`False`.
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
index bb3b6027340..8098209c7e0 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
@@ -254,8 +254,11 @@ def parse_table_reference(table, dataset=None, project=None):
   # table argument will contain a full table reference instead of just a
   # table name.
   if dataset is None:
-    match = re.match(
-        r'^((?P<project>.+):)?(?P<dataset>\w+)\.(?P<table>[-\w\$]+)$', table)
+    regex = re.compile(
+        r'''^((?P<project>.+):)?(?P<dataset>\w+)\.
+            (?P<table>[-\w\$]+(\s+\-*\w+)*)$''',
+        re.X)
+    match = regex.match(table)
     if not match:
       raise ValueError(
           'Expected a table reference (PROJECT:DATASET.TABLE or '
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
index 3ce8d0ff7de..8df6f09ba58 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
@@ -147,6 +147,17 @@ class TestTableReferenceParser(unittest.TestCase):
     self.assertEqual(parsed_ref.datasetId, datasetId)
     self.assertEqual(parsed_ref.tableId, tableId)
 
+  def test_calling_with_spaced_table_ref(self):
+    projectId = 'test_project'
+    datasetId = 'test_dataset'
+    tableId = 'test- -table 1'
+    fully_qualified_table = '{}:{}.{}'.format(projectId, datasetId, tableId)
+    parsed_ref = parse_table_reference(fully_qualified_table)
+    self.assertIsInstance(parsed_ref, bigquery.TableReference)
+    self.assertEqual(parsed_ref.projectId, projectId)
+    self.assertEqual(parsed_ref.datasetId, datasetId)
+    self.assertEqual(parsed_ref.tableId, tableId)
+
   def test_calling_with_partially_qualified_table_ref(self):
     datasetId = 'test_dataset'
     tableId = 'test_table'