You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by pa...@apache.org on 2022/09/01 15:30:06 UTC
[beam] branch master updated: Improve BQ test utils to support JSON in a more simple manner (#22942)
This is an automated email from the ASF dual-hosted git repository.
pabloem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 9c1c316953a Improve BQ test utils to support JSON in a more simple manner (#22942)
9c1c316953a is described below
commit 9c1c316953aa4a7171dc03a9f5b52ded43b9fc00
Author: Pablo Estrada <pa...@users.noreply.github.com>
AuthorDate: Thu Sep 1 08:29:57 2022 -0700
Improve BQ test utils to support JSON in a more simple manner (#22942)
* Improve BQ test utils to support JSON in a more simple manner
* Fix storage api path
* fix boolean tests
* fixup
* double is representable in JSON
* same
* Doubles can be represented in JSON as well
---
.../beam/sdk/io/gcp/bigquery/BigQueryUtils.java | 10 +++++++--
.../io/gcp/bigquery/TableRowToStorageApiProto.java | 11 +++++++++
.../sdk/io/gcp/bigquery/BigQueryIOWriteTest.java | 22 +++++++++---------
.../sdk/io/gcp/bigquery/BigQueryUtilsTest.java | 26 +++++++++++-----------
4 files changed, 43 insertions(+), 26 deletions(-)
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java
index 256d6786ecd..06db56234b5 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtils.java
@@ -567,12 +567,18 @@ public class BigQueryUtils {
case INT16:
case INT32:
- case INT64:
case FLOAT:
+ case BOOLEAN:
case DOUBLE:
+ // The above types have native representations in JSON for all their
+ // possible values.
+ return fieldValue;
+
case STRING:
- case BOOLEAN:
+ case INT64:
case DECIMAL:
+ // The above types must be cast to string to be safely encoded in
+ // JSON (due to JSON's float-based representation of all numbers).
return fieldValue.toString();
case BYTES:
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java
index 7088498eeda..be98154fb38 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowToStorageApiProto.java
@@ -590,7 +590,18 @@ public class TableRowToStorageApiProto {
return BaseEncoding.base64().encode(((ByteString) fieldValue).toByteArray());
case ENUM:
throw new RuntimeException("Enumerations not supported");
+ case INT32:
+ case FLOAT:
+ case BOOL:
+ case DOUBLE:
+ // The above types have native representations in JSON for all their
+ // possible values.
+ return fieldValue;
+ case STRING:
+ case INT64:
default:
+ // The above types must be cast to string to be safely encoded in
+ // JSON (due to JSON's float-based representation of all numbers).
return fieldValue.toString();
}
}
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
index 5efe8c04188..ae4285c19bb 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOWriteTest.java
@@ -300,7 +300,7 @@ public class BigQueryIOWriteTest implements Serializable {
public void writeDynamicDestinations(boolean schemas, boolean autoSharding) throws Exception {
final Schema schema =
- Schema.builder().addField("name", FieldType.STRING).addField("id", FieldType.INT32).build();
+ Schema.builder().addField("name", FieldType.STRING).addField("id", FieldType.INT64).build();
final Pattern userPattern = Pattern.compile("([a-z]+)([0-9]+)");
@@ -341,10 +341,10 @@ public class BigQueryIOWriteTest implements Serializable {
checkState(matcher.matches());
return Row.withSchema(schema)
.addValue(matcher.group(1))
- .addValue(Integer.valueOf(matcher.group(2)))
+ .addValue(Long.valueOf(matcher.group(2)))
.build();
},
- r -> r.getString(0) + r.getInt32(1));
+ r -> r.getString(0) + r.getInt64(1));
}
// Use a partition decorator to verify that partition decorators are supported.
@@ -1328,10 +1328,10 @@ public class BigQueryIOWriteTest implements Serializable {
assertThat(
fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
containsInAnyOrder(
- new TableRow().set("name", "a").set("number", "1"),
- new TableRow().set("name", "b").set("number", "2"),
- new TableRow().set("name", "c").set("number", "3"),
- new TableRow().set("name", "d").set("number", "4")));
+ new TableRow().set("name", "a").set("number", 1),
+ new TableRow().set("name", "b").set("number", 2),
+ new TableRow().set("name", "c").set("number", 3),
+ new TableRow().set("name", "d").set("number", 4)));
}
@Test
@@ -1369,10 +1369,10 @@ public class BigQueryIOWriteTest implements Serializable {
assertThat(
fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"),
containsInAnyOrder(
- new TableRow().set("name", "a").set("number", "1"),
- new TableRow().set("name", "b").set("number", "2"),
- new TableRow().set("name", "c").set("number", "3"),
- new TableRow().set("name", "d").set("number", "4")));
+ new TableRow().set("name", "a").set("number", 1),
+ new TableRow().set("name", "b").set("number", 2),
+ new TableRow().set("name", "c").set("number", 3),
+ new TableRow().set("name", "d").set("number", 4)));
}
/**
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java
index eacb95a9a68..b832a9b3612 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryUtilsTest.java
@@ -599,7 +599,7 @@ public class BigQueryUtilsTest {
assertThat(row.size(), equalTo(22));
assertThat(row, hasEntry("id", "123"));
- assertThat(row, hasEntry("value", "123.456"));
+ assertThat(row, hasEntry("value", 123.456));
assertThat(row, hasEntry("datetime", "2020-11-02T12:34:56.789876"));
assertThat(row, hasEntry("datetime0ms", "2020-11-02T12:34:56"));
assertThat(row, hasEntry("datetime0s_ns", "2020-11-02T12:34:00.789876"));
@@ -610,12 +610,12 @@ public class BigQueryUtilsTest {
assertThat(row, hasEntry("time0s_ns", "12:34:00.789876"));
assertThat(row, hasEntry("time0s_0ns", "12:34:00"));
assertThat(row, hasEntry("name", "test"));
- assertThat(row, hasEntry("valid", "false"));
+ assertThat(row, hasEntry("valid", false));
assertThat(row, hasEntry("binary", "ABCD1234"));
assertThat(row, hasEntry("numeric", "123.456"));
- assertThat(row, hasEntry("boolean", "true"));
+ assertThat(row, hasEntry("boolean", true));
assertThat(row, hasEntry("long", "123"));
- assertThat(row, hasEntry("double", "123.456"));
+ assertThat(row, hasEntry("double", 123.456));
}
@Test
@@ -642,7 +642,7 @@ public class BigQueryUtilsTest {
row = ((List<TableRow>) row.get("map")).get(0);
assertThat(row.size(), equalTo(2));
assertThat(row, hasEntry("key", "test"));
- assertThat(row, hasEntry("value", "123.456"));
+ assertThat(row, hasEntry("value", 123.456));
}
@Test
@@ -653,7 +653,7 @@ public class BigQueryUtilsTest {
row = (TableRow) row.get("row");
assertThat(row.size(), equalTo(22));
assertThat(row, hasEntry("id", "123"));
- assertThat(row, hasEntry("value", "123.456"));
+ assertThat(row, hasEntry("value", 123.456));
assertThat(row, hasEntry("datetime", "2020-11-02T12:34:56.789876"));
assertThat(row, hasEntry("datetime0ms", "2020-11-02T12:34:56"));
assertThat(row, hasEntry("datetime0s_ns", "2020-11-02T12:34:00.789876"));
@@ -664,12 +664,12 @@ public class BigQueryUtilsTest {
assertThat(row, hasEntry("time0s_ns", "12:34:00.789876"));
assertThat(row, hasEntry("time0s_0ns", "12:34:00"));
assertThat(row, hasEntry("name", "test"));
- assertThat(row, hasEntry("valid", "false"));
+ assertThat(row, hasEntry("valid", false));
assertThat(row, hasEntry("binary", "ABCD1234"));
assertThat(row, hasEntry("numeric", "123.456"));
- assertThat(row, hasEntry("boolean", "true"));
+ assertThat(row, hasEntry("boolean", true));
assertThat(row, hasEntry("long", "123"));
- assertThat(row, hasEntry("double", "123.456"));
+ assertThat(row, hasEntry("double", 123.456));
}
@Test
@@ -680,7 +680,7 @@ public class BigQueryUtilsTest {
row = ((List<TableRow>) row.get("rows")).get(0);
assertThat(row.size(), equalTo(22));
assertThat(row, hasEntry("id", "123"));
- assertThat(row, hasEntry("value", "123.456"));
+ assertThat(row, hasEntry("value", 123.456));
assertThat(row, hasEntry("datetime", "2020-11-02T12:34:56.789876"));
assertThat(row, hasEntry("datetime0ms", "2020-11-02T12:34:56"));
assertThat(row, hasEntry("datetime0s_ns", "2020-11-02T12:34:00.789876"));
@@ -691,12 +691,12 @@ public class BigQueryUtilsTest {
assertThat(row, hasEntry("time0s_ns", "12:34:00.789876"));
assertThat(row, hasEntry("time0s_0ns", "12:34:00"));
assertThat(row, hasEntry("name", "test"));
- assertThat(row, hasEntry("valid", "false"));
+ assertThat(row, hasEntry("valid", false));
assertThat(row, hasEntry("binary", "ABCD1234"));
assertThat(row, hasEntry("numeric", "123.456"));
- assertThat(row, hasEntry("boolean", "true"));
+ assertThat(row, hasEntry("boolean", true));
assertThat(row, hasEntry("long", "123"));
- assertThat(row, hasEntry("double", "123.456"));
+ assertThat(row, hasEntry("double", 123.456));
}
@Test