You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by pa...@apache.org on 2020/06/09 21:01:38 UTC

[beam] branch master updated: Support STRUCT, FLOAT64, INT64 BigQuery types

This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 629c56f  Support STRUCT, FLOAT64, INT64 BigQuery types
     new 564cef0  Merge pull request #11923 from [BEAM-10176] Support STRUCT, FLOAT64, INT64 BigQuery types
629c56f is described below

commit 629c56f1c215e1d3b9e777496dcfbe274a9145fb
Author: Chuck Yang <ch...@getcruise.com>
AuthorDate: Tue Jun 2 13:06:29 2020 -0700

    Support STRUCT, FLOAT64, INT64 BigQuery types
    
    Support STRUCT, FLOAT64, INT64 BigQuery types in WriteToBigQuery with
    Avro temp file format.
---
 .../apache_beam/io/gcp/bigquery_avro_tools.py      |  7 ++++
 .../apache_beam/io/gcp/bigquery_avro_tools_test.py | 48 ++++++++++++++--------
 2 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py
index 67be83b..f726b11 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py
@@ -26,13 +26,20 @@ NOTHING IN THIS FILE HAS BACKWARDS COMPATIBILITY GUARANTEES.
 from __future__ import absolute_import
 from __future__ import division
 
+# BigQuery types as listed in
+# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
+# with aliases (RECORD, BOOLEAN, FLOAT, INTEGER) as defined in
+# https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/java/latest/com/google/api/services/bigquery/model/TableFieldSchema.html#setType-java.lang.String-
 BIG_QUERY_TO_AVRO_TYPES = {
+    "STRUCT": "record",
     "RECORD": "record",
     "STRING": "string",
     "BOOL": "boolean",
     "BOOLEAN": "boolean",
     "BYTES": "bytes",
+    "FLOAT64": "double",
     "FLOAT": "double",
+    "INT64": "long",
     "INTEGER": "long",
     "TIME": {
         "type": "long",
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_avro_tools_test.py b/sdks/python/apache_beam/io/gcp/bigquery_avro_tools_test.py
index 01f3cae..229dde0 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_avro_tools_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_avro_tools_test.py
@@ -50,8 +50,12 @@ class TestBigQueryToAvroSchema(unittest.TestCase):
       bigquery.TableFieldSchema(
         name="quality", type="FLOAT"),  # default to NULLABLE
       bigquery.TableFieldSchema(
+        name="grade", type="FLOAT64"),  # default to NULLABLE
+      bigquery.TableFieldSchema(
         name="quantity", type="INTEGER"),  # default to NULLABLE
       bigquery.TableFieldSchema(
+        name="dependents", type="INT64"),  # default to NULLABLE
+      bigquery.TableFieldSchema(
         name="birthday", type="TIMESTAMP", mode="NULLABLE"),
       bigquery.TableFieldSchema(
         name="birthdayMoney", type="NUMERIC", mode="NULLABLE"),
@@ -70,6 +74,8 @@ class TestBigQueryToAvroSchema(unittest.TestCase):
       bigquery.TableFieldSchema(
         name="scion", type="RECORD", mode="NULLABLE", fields=subfields),
       bigquery.TableFieldSchema(
+        name="family", type="STRUCT", mode="NULLABLE", fields=subfields),
+      bigquery.TableFieldSchema(
         name="associates", type="RECORD", mode="REPEATED", fields=subfields),
       bigquery.TableFieldSchema(
         name="geoPositions", type="GEOGRAPHY", mode="NULLABLE"),
@@ -94,8 +100,12 @@ class TestBigQueryToAvroSchema(unittest.TestCase):
     self.assertEqual(
         field_map["quality"].type, Parse(json.dumps(["null", "double"])))
     self.assertEqual(
+        field_map["grade"].type, Parse(json.dumps(["null", "double"])))
+    self.assertEqual(
         field_map["quantity"].type, Parse(json.dumps(["null", "long"])))
     self.assertEqual(
+        field_map["dependents"].type, Parse(json.dumps(["null", "long"])))
+    self.assertEqual(
         field_map["birthday"].type,
         Parse(
             json.dumps(
@@ -137,24 +147,26 @@ class TestBigQueryToAvroSchema(unittest.TestCase):
     self.assertEqual(
         field_map["geoPositions"].type, Parse(json.dumps(["null", "string"])))
 
-    self.assertEqual(
-        field_map["scion"].type,
-        Parse(
-            json.dumps([
-                "null",
-                {
-                    "type": "record",
-                    "name": "scion",
-                    "fields": [
-                        {
-                            "type": ["null", "string"],
-                            "name": "species",
-                        },
-                    ],
-                    "doc": "Translated Avro Schema for scion",
-                    "namespace": "apache_beam.io.gcp.bigquery.root.scion",
-                },
-            ])))
+    for field in ("scion", "family"):
+      self.assertEqual(
+          field_map[field].type,
+          Parse(
+              json.dumps([
+                  "null",
+                  {
+                      "type": "record",
+                      "name": field,
+                      "fields": [
+                          {
+                              "type": ["null", "string"],
+                              "name": "species",
+                          },
+                      ],
+                      "doc": "Translated Avro Schema for {}".format(field),
+                      "namespace": "apache_beam.io.gcp.bigquery.root.{}".format(
+                          field),
+                  }
+              ])))
 
     self.assertEqual(
         field_map["associates"].type,