You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by pa...@apache.org on 2020/06/09 21:01:38 UTC
[beam] branch master updated: Support STRUCT, FLOAT64,
INT64 BigQuery types
This is an automated email from the ASF dual-hosted git repository.
pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 629c56f Support STRUCT, FLOAT64, INT64 BigQuery types
new 564cef0 Merge pull request #11923 from [BEAM-10176] Support STRUCT, FLOAT64, INT64 BigQuery types
629c56f is described below
commit 629c56f1c215e1d3b9e777496dcfbe274a9145fb
Author: Chuck Yang <ch...@getcruise.com>
AuthorDate: Tue Jun 2 13:06:29 2020 -0700
Support STRUCT, FLOAT64, INT64 BigQuery types
Support STRUCT, FLOAT64, INT64 BigQuery types in WriteToBigQuery with
Avro temp file format.
---
.../apache_beam/io/gcp/bigquery_avro_tools.py | 7 ++++
.../apache_beam/io/gcp/bigquery_avro_tools_test.py | 48 ++++++++++++++--------
2 files changed, 37 insertions(+), 18 deletions(-)
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py
index 67be83b..f726b11 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py
@@ -26,13 +26,20 @@ NOTHING IN THIS FILE HAS BACKWARDS COMPATIBILITY GUARANTEES.
from __future__ import absolute_import
from __future__ import division
+# BigQuery types as listed in
+# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
+# with aliases (RECORD, BOOLEAN, FLOAT, INTEGER) as defined in
+# https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableFieldSchema.html#setType-java.lang.String-
BIG_QUERY_TO_AVRO_TYPES = {
+ "STRUCT": "record",
"RECORD": "record",
"STRING": "string",
"BOOL": "boolean",
"BOOLEAN": "boolean",
"BYTES": "bytes",
+ "FLOAT64": "double",
"FLOAT": "double",
+ "INT64": "long",
"INTEGER": "long",
"TIME": {
"type": "long",
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_avro_tools_test.py b/sdks/python/apache_beam/io/gcp/bigquery_avro_tools_test.py
index 01f3cae..229dde0 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_avro_tools_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_avro_tools_test.py
@@ -50,8 +50,12 @@ class TestBigQueryToAvroSchema(unittest.TestCase):
bigquery.TableFieldSchema(
name="quality", type="FLOAT"), # default to NULLABLE
bigquery.TableFieldSchema(
+ name="grade", type="FLOAT64"), # default to NULLABLE
+ bigquery.TableFieldSchema(
name="quantity", type="INTEGER"), # default to NULLABLE
bigquery.TableFieldSchema(
+ name="dependents", type="INT64"), # default to NULLABLE
+ bigquery.TableFieldSchema(
name="birthday", type="TIMESTAMP", mode="NULLABLE"),
bigquery.TableFieldSchema(
name="birthdayMoney", type="NUMERIC", mode="NULLABLE"),
@@ -70,6 +74,8 @@ class TestBigQueryToAvroSchema(unittest.TestCase):
bigquery.TableFieldSchema(
name="scion", type="RECORD", mode="NULLABLE", fields=subfields),
bigquery.TableFieldSchema(
+ name="family", type="STRUCT", mode="NULLABLE", fields=subfields),
+ bigquery.TableFieldSchema(
name="associates", type="RECORD", mode="REPEATED", fields=subfields),
bigquery.TableFieldSchema(
name="geoPositions", type="GEOGRAPHY", mode="NULLABLE"),
@@ -94,8 +100,12 @@ class TestBigQueryToAvroSchema(unittest.TestCase):
self.assertEqual(
field_map["quality"].type, Parse(json.dumps(["null", "double"])))
self.assertEqual(
+ field_map["grade"].type, Parse(json.dumps(["null", "double"])))
+ self.assertEqual(
field_map["quantity"].type, Parse(json.dumps(["null", "long"])))
self.assertEqual(
+ field_map["dependents"].type, Parse(json.dumps(["null", "long"])))
+ self.assertEqual(
field_map["birthday"].type,
Parse(
json.dumps(
@@ -137,24 +147,26 @@ class TestBigQueryToAvroSchema(unittest.TestCase):
self.assertEqual(
field_map["geoPositions"].type, Parse(json.dumps(["null", "string"])))
- self.assertEqual(
- field_map["scion"].type,
- Parse(
- json.dumps([
- "null",
- {
- "type": "record",
- "name": "scion",
- "fields": [
- {
- "type": ["null", "string"],
- "name": "species",
- },
- ],
- "doc": "Translated Avro Schema for scion",
- "namespace": "apache_beam.io.gcp.bigquery.root.scion",
- },
- ])))
+ for field in ("scion", "family"):
+ self.assertEqual(
+ field_map[field].type,
+ Parse(
+ json.dumps([
+ "null",
+ {
+ "type": "record",
+ "name": field,
+ "fields": [
+ {
+ "type": ["null", "string"],
+ "name": "species",
+ },
+ ],
+ "doc": "Translated Avro Schema for {}".format(field),
+ "namespace": "apache_beam.io.gcp.bigquery.root.{}".format(
+ field),
+ }
+ ])))
self.assertEqual(
field_map["associates"].type,