You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by pa...@apache.org on 2022/09/30 06:17:54 UTC

[beam] branch master updated: [BEAM-10785] Change RowAsDictJsonCoder to not ensure ASCII while encoding (#22312)

This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 31dab8179d8 [BEAM-10785] Change RowAsDictJsonCoder to not ensure ASCII while encoding (#22312)
31dab8179d8 is described below

commit 31dab8179d87925789a8fca638f7d457045f4abf
Author: Seunghwan Hong <ha...@gmail.com>
AuthorDate: Fri Sep 30 15:17:45 2022 +0900

    [BEAM-10785] Change RowAsDictJsonCoder to not ensure ASCII while encoding (#22312)
    
    * Change RowAsDictJsonCoder to not ensure ASCII while encoding
    
    Signed-off-by: Seunghwan Hong <ha...@gmail.com>
    
    * Format code, Refactor test for readability
    
    Signed-off-by: Seunghwan Hong <ha...@gmail.com>
    
    Signed-off-by: Seunghwan Hong <ha...@gmail.com>
    Co-authored-by: Pablo <pa...@users.noreply.github.com>
---
 CHANGES.md                                            | 1 +
 sdks/python/apache_beam/io/gcp/bigquery_tools.py      | 5 ++++-
 sdks/python/apache_beam/io/gcp/bigquery_tools_test.py | 7 +++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/CHANGES.md b/CHANGES.md
index 8673b401005..87d3d685eb3 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -61,6 +61,7 @@
 * Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)).
 * Decreased TextSource CPU utilization by 2.3x (Java) ([#23193](https://github.com/apache/beam/issues/23193)).
 * Fixed bug when using SpannerIO with RuntimeValueProvider options (Java) ([#22146](https://github.com/apache/beam/issues/22146)).
+* Fixed issue for unicode rendering on WriteToBigQuery ([#10785](https://github.com/apache/beam/issues/10785))
 
 ## New Features / Improvements
 
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
index b5e50c1d42a..27428aca533 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py
@@ -1538,7 +1538,10 @@ class RowAsDictJsonCoder(coders.Coder):
     # to the programmer that they have used NAN/INF values.
     try:
       return json.dumps(
-          table_row, allow_nan=False, default=default_encoder).encode('utf-8')
+          table_row,
+          allow_nan=False,
+          ensure_ascii=False,
+          default=default_encoder).encode('utf-8')
     except ValueError as e:
       raise ValueError(
           '%s. %s. Row: %r' % (e, JSON_COMPLIANCE_ERROR, table_row))
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
index 8c26e75a1ff..2ee4f374497 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_tools_test.py
@@ -1052,6 +1052,13 @@ class TestRowAsDictJsonCoder(unittest.TestCase):
   def test_invalid_json_neg_inf(self):
     self.json_compliance_exception(float('-inf'))
 
+  def test_ensure_ascii(self):
+    coder = RowAsDictJsonCoder()
+    test_value = {'s': '🎉'}
+    output_value = b'{"s": "\xf0\x9f\x8e\x89"}'
+
+    self.assertEqual(output_value, coder.encode(test_value))
+
 
 @unittest.skipIf(HttpError is None, 'GCP dependencies are not installed')
 class TestJsonRowWriter(unittest.TestCase):