You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by pa...@apache.org on 2022/09/01 15:30:06 UTC

[beam] branch master updated: Improve BQ test utils to support JSON in a more simple manner (#22942)

This is an automated email from the ASF dual-hosted git repository.

pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 9c1c316953a Improve BQ test utils to support JSON in a more simple manner (#22942)
9c1c316953a is described below

commit 9c1c316953aa4a7171dc03a9f5b52ded43b9fc00
Author: Pablo Estrada <pa...@users.noreply.github.com>
AuthorDate: Thu Sep 1 08:29:57 2022 -0700

    Improve BQ test utils to support JSON in a more simple manner (#22942)
    
    * Improve BQ test utils to support JSON in a more simple manner
    
    * Fix storage api path
    
    * fix boolean tests
    
    * fixup
    
    * double is representable in JSON
    
    * same
    
    * Doubles can be represented in JSON as well
---
 .../beam/sdk/io/gcp/bigquery/BigQueryUtils.java    | 10 +++++++--
 .../io/gcp/bigquery/TableRowToStorageApiProto.java | 11 +++++++++
 .../sdk/io/gcp/bigquery/BigQueryIOWriteTest.java   | 22 +++++++++---------
 .../sdk/io/gcp/bigquery/BigQueryUtilsTest.java     | 26 +++++++++++-----------
 4 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java
index 256d6786ecd..06db56234b5 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java
@@ -567,12 +567,18 @@ public class BigQueryUtils {
 
       case INT16:
       case INT32:
-      case INT64:
       case FLOAT:
+      case BOOLEAN:
       case DOUBLE:
+        // The above types have native representations in JSON for all their
+        // possible values.
+        return fieldValue;
+
       case STRING:
-      case BOOLEAN:
+      case INT64:
       case DECIMAL:
+        // The above types must be cast to string to be safely encoded in
+        // JSON (due to JSON's float-based representation of all numbers).
         return fieldValue.toString();
 
       case BYTES:
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java
index 7088498eeda..be98154fb38 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java
@@ -590,7 +590,18 @@ public class TableRowToStorageApiProto {
         return BaseEncoding.base64().encode(((ByteString) fieldValue).toByteArray());
       case ENUM:
         throw new RuntimeException("Enumerations not supported");
+      case INT32:
+      case FLOAT:
+      case BOOL:
+      case DOUBLE:
+        // The above types have native representations in JSON for all their
+        // possible values.
+        return fieldValue;
+      case STRING:
+      case INT64:
       default:
+        // The above types must be cast to string to be safely encoded in
+        // JSON (due to JSON's float-based representation of all numbers).
         return fieldValue.toString();
     }
   }
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
index 5efe8c04188..ae4285c19bb 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
@@ -300,7 +300,7 @@ public class BigQueryIOWriteTest implements Serializable {
 
   public void writeDynamicDestinations(boolean schemas, boolean autoSharding) throws Exception {
     final Schema schema =
-        Schema.builder().addField("name", FieldType.STRING).addField("id", FieldType.INT32).build();
+        Schema.builder().addField("name", FieldType.STRING).addField("id", FieldType.INT64).build();
 
     final Pattern userPattern = Pattern.compile("([a-z]+)([0-9]+)");
 
@@ -341,10 +341,10 @@ public class BigQueryIOWriteTest implements Serializable {
                 checkState(matcher.matches());
                 return Row.withSchema(schema)
                     .addValue(matcher.group(1))
-                    .addValue(Integer.valueOf(matcher.group(2)))
+                    .addValue(Long.valueOf(matcher.group(2)))
                     .build();
               },
-              r -> r.getString(0) + r.getInt32(1));
+              r -> r.getString(0) + r.getInt64(1));
     }
 
     // Use a partition decorator to verify that partition decorators are supported.
@@ -1328,10 +1328,10 @@ public class BigQueryIOWriteTest implements Serializable {
     assertThat(
         fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
         containsInAnyOrder(
-            new TableRow().set("name", "a").set("number", "1"),
-            new TableRow().set("name", "b").set("number", "2"),
-            new TableRow().set("name", "c").set("number", "3"),
-            new TableRow().set("name", "d").set("number", "4")));
+            new TableRow().set("name", "a").set("number", 1),
+            new TableRow().set("name", "b").set("number", 2),
+            new TableRow().set("name", "c").set("number", 3),
+            new TableRow().set("name", "d").set("number", 4)));
   }
 
   @Test
@@ -1369,10 +1369,10 @@ public class BigQueryIOWriteTest implements Serializable {
     assertThat(
         fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
         containsInAnyOrder(
-            new TableRow().set("name", "a").set("number", "1"),
-            new TableRow().set("name", "b").set("number", "2"),
-            new TableRow().set("name", "c").set("number", "3"),
-            new TableRow().set("name", "d").set("number", "4")));
+            new TableRow().set("name", "a").set("number", 1),
+            new TableRow().set("name", "b").set("number", 2),
+            new TableRow().set("name", "c").set("number", 3),
+            new TableRow().set("name", "d").set("number", 4)));
   }
 
   /**
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java
index eacb95a9a68..b832a9b3612 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java
@@ -599,7 +599,7 @@ public class BigQueryUtilsTest {
 
     assertThat(row.size(), equalTo(22));
     assertThat(row, hasEntry("id", "123"));
-    assertThat(row, hasEntry("value", "123.456"));
+    assertThat(row, hasEntry("value", 123.456));
     assertThat(row, hasEntry("datetime", "2020-11-02T12:34:56.789876"));
     assertThat(row, hasEntry("datetime0ms", "2020-11-02T12:34:56"));
     assertThat(row, hasEntry("datetime0s_ns", "2020-11-02T12:34:00.789876"));
@@ -610,12 +610,12 @@ public class BigQueryUtilsTest {
     assertThat(row, hasEntry("time0s_ns", "12:34:00.789876"));
     assertThat(row, hasEntry("time0s_0ns", "12:34:00"));
     assertThat(row, hasEntry("name", "test"));
-    assertThat(row, hasEntry("valid", "false"));
+    assertThat(row, hasEntry("valid", false));
     assertThat(row, hasEntry("binary", "ABCD1234"));
     assertThat(row, hasEntry("numeric", "123.456"));
-    assertThat(row, hasEntry("boolean", "true"));
+    assertThat(row, hasEntry("boolean", true));
     assertThat(row, hasEntry("long", "123"));
-    assertThat(row, hasEntry("double", "123.456"));
+    assertThat(row, hasEntry("double", 123.456));
   }
 
   @Test
@@ -642,7 +642,7 @@ public class BigQueryUtilsTest {
     row = ((List<TableRow>) row.get("map")).get(0);
     assertThat(row.size(), equalTo(2));
     assertThat(row, hasEntry("key", "test"));
-    assertThat(row, hasEntry("value", "123.456"));
+    assertThat(row, hasEntry("value", 123.456));
   }
 
   @Test
@@ -653,7 +653,7 @@ public class BigQueryUtilsTest {
     row = (TableRow) row.get("row");
     assertThat(row.size(), equalTo(22));
     assertThat(row, hasEntry("id", "123"));
-    assertThat(row, hasEntry("value", "123.456"));
+    assertThat(row, hasEntry("value", 123.456));
     assertThat(row, hasEntry("datetime", "2020-11-02T12:34:56.789876"));
     assertThat(row, hasEntry("datetime0ms", "2020-11-02T12:34:56"));
     assertThat(row, hasEntry("datetime0s_ns", "2020-11-02T12:34:00.789876"));
@@ -664,12 +664,12 @@ public class BigQueryUtilsTest {
     assertThat(row, hasEntry("time0s_ns", "12:34:00.789876"));
     assertThat(row, hasEntry("time0s_0ns", "12:34:00"));
     assertThat(row, hasEntry("name", "test"));
-    assertThat(row, hasEntry("valid", "false"));
+    assertThat(row, hasEntry("valid", false));
     assertThat(row, hasEntry("binary", "ABCD1234"));
     assertThat(row, hasEntry("numeric", "123.456"));
-    assertThat(row, hasEntry("boolean", "true"));
+    assertThat(row, hasEntry("boolean", true));
     assertThat(row, hasEntry("long", "123"));
-    assertThat(row, hasEntry("double", "123.456"));
+    assertThat(row, hasEntry("double", 123.456));
   }
 
   @Test
@@ -680,7 +680,7 @@ public class BigQueryUtilsTest {
     row = ((List<TableRow>) row.get("rows")).get(0);
     assertThat(row.size(), equalTo(22));
     assertThat(row, hasEntry("id", "123"));
-    assertThat(row, hasEntry("value", "123.456"));
+    assertThat(row, hasEntry("value", 123.456));
     assertThat(row, hasEntry("datetime", "2020-11-02T12:34:56.789876"));
     assertThat(row, hasEntry("datetime0ms", "2020-11-02T12:34:56"));
     assertThat(row, hasEntry("datetime0s_ns", "2020-11-02T12:34:00.789876"));
@@ -691,12 +691,12 @@ public class BigQueryUtilsTest {
     assertThat(row, hasEntry("time0s_ns", "12:34:00.789876"));
     assertThat(row, hasEntry("time0s_0ns", "12:34:00"));
     assertThat(row, hasEntry("name", "test"));
-    assertThat(row, hasEntry("valid", "false"));
+    assertThat(row, hasEntry("valid", false));
     assertThat(row, hasEntry("binary", "ABCD1234"));
     assertThat(row, hasEntry("numeric", "123.456"));
-    assertThat(row, hasEntry("boolean", "true"));
+    assertThat(row, hasEntry("boolean", true));
     assertThat(row, hasEntry("long", "123"));
-    assertThat(row, hasEntry("double", "123.456"));
+    assertThat(row, hasEntry("double", 123.456));
   }
 
   @Test