You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gobblin.apache.org by wl...@apache.org on 2023/05/01 18:40:54 UTC
[gobblin] branch master updated: [GOBBLIN-1827] Add check that if nested field is optional and has a non-null default… (#3689)
This is an automated email from the ASF dual-hosted git repository.
wlo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new fa39f118a [GOBBLIN-1827] Add check that if nested field is optional and has a non-null default… (#3689)
fa39f118a is described below
commit fa39f118a9c3152bd89e4a3954ae35f38b8d68bb
Author: William Lo <lo...@gmail.com>
AuthorDate: Mon May 1 14:40:46 2023 -0400
[GOBBLIN-1827] Add check that if nested field is optional and has a non-null default… (#3689)
* Add check that if nested field is optional and has a non-null default, then it should order the types with its default type first instead of null
* add default type check in test
---
.../org/apache/gobblin/util/AvroFlattener.java | 6 +++-
.../org/apache/gobblin/util/AvroFlattenerTest.java | 28 ++++++++++++++++
...lDefaultWithinOptionWithinRecord_flattened.json | 37 ++++++++++++++++++++++
...llDefaultWithinOptionWithinRecord_original.json | 33 +++++++++++++++++++
4 files changed, 103 insertions(+), 1 deletion(-)
diff --git a/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroFlattener.java b/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroFlattener.java
index f81ce3878..38c2a9aa6 100644
--- a/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroFlattener.java
+++ b/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroFlattener.java
@@ -23,13 +23,13 @@ import java.util.List;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Schema;
-import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
+import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper;
/***
* This class provides methods to flatten an Avro Schema to make it more optimal for ORC
@@ -402,6 +402,10 @@ public class AvroFlattener {
}
// Wrap the Union, since parent Union is an option
else {
+ // If the field within the parent Union has a non-null default value, then null should not be the first member
+ if (f.hasDefaultValue() && f.defaultVal() != null) {
+ isNullFirstMember = false;
+ }
if (isNullFirstMember) {
flattenedFieldSchema =
Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), flattenedFieldSchema));
diff --git a/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java b/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java
index c6889da19..a4c9477b5 100644
--- a/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java
+++ b/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java
@@ -22,6 +22,8 @@ import org.apache.avro.Schema;
import org.testng.Assert;
import org.testng.annotations.Test;
+import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper;
+
public class AvroFlattenerTest {
@@ -188,4 +190,30 @@ public class AvroFlattenerTest {
}
+ /**
+ * Test flattening for non-null default within an Option within another Record
+ * Record R1 {
+ * fields: {
+ * Union [ null,
+ * Record 2 {
+ * field: type
+ * default: type
+ * }
+ * ]
+ * }
+ * }
+ */
+ @Test
+ public void testNonNullDefaultWithinOptionWithinRecord () throws IOException {
+
+ Schema originalSchema = readSchemaFromJsonFile("nonNullDefaultWithinOptionWithinRecord_original.json");
+ Schema expectedSchema = readSchemaFromJsonFile("nonNullDefaultWithinOptionWithinRecord_flattened.json");
+ Schema flattenedSchema = new AvroFlattener().flatten(originalSchema, false);
+ Assert.assertEquals(AvroCompatibilityHelper.getSpecificDefaultValue(
+ flattenedSchema.getField("parentFieldUnion__unionRecordMemberFieldUnion__superNestedFieldString1")).toString(),
+ "defaultString1");
+ Assert.assertEquals(flattenedSchema.toString(), expectedSchema.toString());
+ }
+
+
}
diff --git a/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_flattened.json b/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_flattened.json
new file mode 100644
index 000000000..1ef589909
--- /dev/null
+++ b/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_flattened.json
@@ -0,0 +1,37 @@
+{
+ "type":"record",
+ "name":"parentRecordName",
+ "fields":[
+ {
+ "name":"parentFieldUnion__unionRecordMemberFieldUnion__superNestedFieldString1",
+ "type":[
+ "string",
+ "null"
+ ],
+ "default":"defaultString1",
+ "flatten_source":"parentFieldUnion.unionRecordMemberFieldUnion.superNestedFieldString1"
+ },
+ {
+ "name":"parentFieldUnion__unionRecordMemberFieldUnion__superNestedFieldString2",
+ "type":[
+ "string",
+ "null"
+ ],
+ "default":"defaultString2",
+ "flatten_source":"parentFieldUnion.unionRecordMemberFieldUnion.superNestedFieldString2"
+ },
+ {
+ "name":"parentFieldUnion__unionRecordMemberFieldString",
+ "type":[
+ "null",
+ "string"
+ ],
+ "default":null,
+ "flatten_source":"parentFieldUnion.unionRecordMemberFieldString"
+ },
+ {
+ "name":"parentFieldInt",
+ "type":"int"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_original.json b/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_original.json
new file mode 100644
index 000000000..ae66543f1
--- /dev/null
+++ b/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_original.json
@@ -0,0 +1,33 @@
+{
+ "type" : "record",
+ "name" : "parentRecordName",
+ "fields" : [ {
+ "name" : "parentFieldUnion",
+ "type" : [ "null", {
+ "type" : "record",
+ "name" : "unionRecordMember",
+ "fields" : [ {
+ "name" : "unionRecordMemberFieldUnion",
+ "type" : [ "null", {
+ "type" : "record",
+ "name" : "superNestedRecord",
+ "fields" : [ {
+ "name" : "superNestedFieldString1",
+ "type" : "string",
+ "default": "defaultString1"
+ }, {
+ "name" : "superNestedFieldString2",
+ "type" : "string",
+ "default": "defaultString2"
+ } ]
+ } ]
+ }, {
+ "name" : "unionRecordMemberFieldString",
+ "type" : "string"
+ } ]
+ } ]
+ }, {
+ "name" : "parentFieldInt",
+ "type" : "int"
+ } ]
+}
\ No newline at end of file