You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by tv...@apache.org on 2022/07/11 17:34:14 UTC
[beam] branch master updated: Allow BigQuery TableIds to have space in between (#22167)
This is an automated email from the ASF dual-hosted git repository.
tvalentyn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new da84804bc01 Allow BigQuery TableIds to have space in between (#22167)
da84804bc01 is described below
commit da84804bc01d88096232cbc91b8e2fc758a770a5
Author: Dheeraj Gharde <45...@users.noreply.github.com>
AuthorDate: Mon Jul 11 23:04:08 2022 +0530
Allow BigQuery TableIds to have space in between (#22167)
* Updated Regex and test
* Updated Regex and test
---
sdks/python/apache_beam/io/gcp/bigquery.py | 21 +++++++++++----------
sdks/python/apache_beam/io/gcp/bigquery_tools.py | 7 +++++--
.../apache_beam/io/gcp/bigquery_tools_test.py | 11 +++++++++++
3 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py
index 5ce7519d3f2..5c2c832e3b0 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -1022,9 +1022,9 @@ class _CustomBigQueryStorageSource(BoundedSource):
using the BigQuery Storage API.
Args:
table (str, TableReference): The ID of the table. The ID must contain only
- letters ``a-z``, ``A-Z``, numbers ``0-9``, or underscores ``_`` If
- **dataset** argument is :data:`None` then the table argument must
- contain the entire table reference specified as:
+ letters ``a-z``, ``A-Z``, numbers ``0-9``, underscores ``_`` or white
+ spaces. If **dataset** argument is :data:`None` then the table
+ argument must contain the entire table reference specified as:
``'PROJECT:DATASET.TABLE'`` or must specify a TableReference.
dataset (str): Optional ID of the dataset containing this table or
:data:`None` if the table argument specifies a TableReference.
@@ -1427,10 +1427,10 @@ class BigQuerySink(dataflow_io.NativeSink):
Args:
table (str): The ID of the table. The ID must contain only letters
- ``a-z``, ``A-Z``, numbers ``0-9``, or underscores ``_``. If
- **dataset** argument is :data:`None` then the table argument must
- contain the entire table reference specified as: ``'DATASET.TABLE'`` or
- ``'PROJECT:DATASET.TABLE'``.
+ ``a-z``, ``A-Z``, numbers ``0-9``, underscores ``_`` or or white
+ spaces. If **dataset** argument is :data:`None` then the table
+ argument must contain the entire table reference specified
+ as: ``'DATASET.TABLE'`` or ``'PROJECT:DATASET.TABLE'``.
dataset (str): The ID of the dataset containing this table or
:data:`None` if the table reference is specified entirely by the table
argument.
@@ -2602,7 +2602,7 @@ class ReadFromBigQuery(PTransform):
'method' is 'DIRECT_READ'.
table (str, callable, ValueProvider): The ID of the table, or a callable
that returns it. The ID must contain only letters ``a-z``, ``A-Z``,
- numbers ``0-9``, or underscores ``_``. If dataset argument is
+ numbers ``0-9``, underscores ``_`` or white spaces. If dataset argument is
:data:`None` then the table argument must contain the entire table
reference specified as: ``'DATASET.TABLE'``
or ``'PROJECT:DATASET.TABLE'``. If it's a callable, it must receive one
@@ -2821,8 +2821,9 @@ class ReadFromBigQueryRequest:
This parameter is ignored for table inputs.
:param table:
The ID of the table to read. The ID must contain only letters
- ``a-z``, ``A-Z``, numbers ``0-9``, or underscores ``_``. Table should
- define project and dataset (ex.: ``'PROJECT:DATASET.TABLE'``).
+ ``a-z``, ``A-Z``, numbers ``0-9``, underscores ``_`` or white spaces.
+ Table should define project and dataset
+ (ex.: ``'PROJECT:DATASET.TABLE'``).
:param flatten_results:
Flattens all nested and repeated fields in the query results.
The default value is :data:`False`.
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
index bb3b6027340..8098209c7e0 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
@@ -254,8 +254,11 @@ def parse_table_reference(table, dataset=None, project=None):
# table argument will contain a full table reference instead of just a
# table name.
if dataset is None:
- match = re.match(
- r'^((?P<project>.+):)?(?P<dataset>\w+)\.(?P<table>[-\w\$]+)$', table)
+ regex = re.compile(
+ r'''^((?P<project>.+):)?(?P<dataset>\w+)\.
+ (?P<table>[-\w\$]+(\s+\-*\w+)*)$''',
+ re.X)
+ match = regex.match(table)
if not match:
raise ValueError(
'Expected a table reference (PROJECT:DATASET.TABLE or '
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
index 3ce8d0ff7de..8df6f09ba58 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
@@ -147,6 +147,17 @@ class TestTableReferenceParser(unittest.TestCase):
self.assertEqual(parsed_ref.datasetId, datasetId)
self.assertEqual(parsed_ref.tableId, tableId)
+ def test_calling_with_spaced_table_ref(self):
+ projectId = 'test_project'
+ datasetId = 'test_dataset'
+ tableId = 'test- -table 1'
+ fully_qualified_table = '{}:{}.{}'.format(projectId, datasetId, tableId)
+ parsed_ref = parse_table_reference(fully_qualified_table)
+ self.assertIsInstance(parsed_ref, bigquery.TableReference)
+ self.assertEqual(parsed_ref.projectId, projectId)
+ self.assertEqual(parsed_ref.datasetId, datasetId)
+ self.assertEqual(parsed_ref.tableId, tableId)
+
def test_calling_with_partially_qualified_table_ref(self):
datasetId = 'test_dataset'
tableId = 'test_table'