You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by pa...@apache.org on 2019/05/02 01:13:45 UTC

[beam] branch master updated: [BEAM-6769] write bytes to bigquery in python 2

This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new a53180e  [BEAM-6769] write bytes to bigquery in python 2
     new 615013b  Merge pull request #8047 from Juta/bq-io
a53180e is described below

commit a53180e28c732032ee56e53f9883387a209d1ace
Author: Juta <ju...@gmail.com>
AuthorDate: Wed Mar 13 15:52:13 2019 +0100

    [BEAM-6769] write bytes to bigquery in python 2
---
 .../io/gcp/big_query_query_to_table_it_test.py     | 25 +++++++++++++++-------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py b/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
index 5bdf8a8..fea2462 100644
--- a/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
+++ b/sdks/python/apache_beam/io/gcp/big_query_query_to_table_it_test.py
@@ -20,6 +20,7 @@ Integration test for Google Cloud BigQuery.
 
 from __future__ import absolute_import
 
+import base64
 import datetime
 import logging
 import os
@@ -50,12 +51,13 @@ NEW_TYPES_INPUT_TABLE = 'python_new_types_table'
 NEW_TYPES_OUTPUT_SCHEMA = (
     '{"fields": [{"name": "bytes","type": "BYTES"},'
     '{"name": "date","type": "DATE"},{"name": "time","type": "TIME"}]}')
-NEW_TYPES_OUTPUT_VERIFY_QUERY = ('SELECT date FROM `%s`;')
-# There are problems with query time and bytes with current version of bigquery.
+NEW_TYPES_OUTPUT_VERIFY_QUERY = ('SELECT bytes, date, time FROM `%s`;')
 NEW_TYPES_OUTPUT_EXPECTED = [
-    (datetime.date(2000, 1, 1),),
-    (datetime.date(2011, 1, 1),),
-    (datetime.date(3000, 12, 31),)]
+    (b'xyw', datetime.date(2011, 1, 1), datetime.time(23, 59, 59, 999999),),
+    (b'abc', datetime.date(2000, 1, 1), datetime.time(0, 0),),
+    (b'\xe4\xbd\xa0\xe5\xa5\xbd', datetime.date(3000, 12, 31),
+     datetime.time(23, 59, 59, 990000),),
+    (b'\xab\xac\xad', datetime.date(2000, 1, 1), datetime.time(0, 0),)]
 LEGACY_QUERY = (
     'SELECT * FROM (SELECT "apple" as fruit), (SELECT "orange" as fruit),')
 STANDARD_QUERY = (
@@ -115,10 +117,17 @@ class BigQueryQueryToTableIT(unittest.TestCase):
         projectId=self.project, datasetId=self.dataset_id, table=table)
     self.bigquery_client.client.tables.Insert(request)
     table_data = [
-        {'bytes':b'xyw=', 'date':'2011-01-01', 'time':'23:59:59.999999'},
-        {'bytes':b'abc=', 'date':'2000-01-01', 'time':'00:00:00'},
-        {'bytes':b'dec=', 'date':'3000-12-31', 'time':'23:59:59.990000'}
+        {'bytes':b'xyw', 'date':'2011-01-01', 'time':'23:59:59.999999'},
+        {'bytes':b'abc', 'date':'2000-01-01', 'time':'00:00:00'},
+        {'bytes':b'\xe4\xbd\xa0\xe5\xa5\xbd', 'date':'3000-12-31',
+         'time':'23:59:59.990000'},
+        {'bytes':b'\xab\xac\xad', 'date':'2000-01-01', 'time':'00:00:00'}
     ]
+    # the API Tools bigquery client expects byte values to be base-64 encoded
+    # TODO BEAM-4850: upgrade to google-cloud-bigquery which does not require
+    # handling the encoding in beam
+    for row in table_data:
+      row['bytes'] = base64.b64encode(row['bytes']).decode('utf-8')
     self.bigquery_client.insert_rows(
         self.project, self.dataset_id, NEW_TYPES_INPUT_TABLE, table_data)