You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gobblin.apache.org by wl...@apache.org on 2023/05/01 18:40:54 UTC

[gobblin] branch master updated: [GOBBLIN-1827] Add check that if nested field is optional and has a non-null default… (#3689)

This is an automated email from the ASF dual-hosted git repository.

wlo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new fa39f118a [GOBBLIN-1827] Add check that if nested field is optional and has a non-null default… (#3689)
fa39f118a is described below

commit fa39f118a9c3152bd89e4a3954ae35f38b8d68bb
Author: William Lo <lo...@gmail.com>
AuthorDate: Mon May 1 14:40:46 2023 -0400

    [GOBBLIN-1827] Add check that if nested field is optional and has a non-null default… (#3689)
    
    * Add check that if nested field is optional and has a non-null default, then it should order the types with its default type first instead of null
    
    * add default type check in test
---
 .../org/apache/gobblin/util/AvroFlattener.java     |  6 +++-
 .../org/apache/gobblin/util/AvroFlattenerTest.java | 28 ++++++++++++++++
 ...lDefaultWithinOptionWithinRecord_flattened.json | 37 ++++++++++++++++++++++
 ...llDefaultWithinOptionWithinRecord_original.json | 33 +++++++++++++++++++
 4 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroFlattener.java b/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroFlattener.java
index f81ce3878..38c2a9aa6 100644
--- a/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroFlattener.java
+++ b/gobblin-utility/src/main/java/org/apache/gobblin/util/AvroFlattener.java
@@ -23,13 +23,13 @@ import java.util.List;
 
 import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Schema;
-import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.log4j.Logger;
 
 import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
+import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper;
 
 /***
  * This class provides methods to flatten an Avro Schema to make it more optimal for ORC
@@ -402,6 +402,10 @@ public class AvroFlattener {
           }
           // Wrap the Union, since parent Union is an option
           else {
+            // If the field within the parent Union has a non-null default value, then null should not be the first member
+            if (f.hasDefaultValue() && f.defaultVal() != null) {
+              isNullFirstMember = false;
+            }
             if (isNullFirstMember) {
               flattenedFieldSchema =
                   Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), flattenedFieldSchema));
diff --git a/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java b/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java
index c6889da19..a4c9477b5 100644
--- a/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java
+++ b/gobblin-utility/src/test/java/org/apache/gobblin/util/AvroFlattenerTest.java
@@ -22,6 +22,8 @@ import org.apache.avro.Schema;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 
+import com.linkedin.avroutil1.compatibility.AvroCompatibilityHelper;
+
 
 public class AvroFlattenerTest {
 
@@ -188,4 +190,30 @@ public class AvroFlattenerTest {
   }
 
 
+  /**
+   * Test flattening for non-null default within an Option within another Record
+   * Record R1 {
+   *  fields: {
+   *    Union [ null,
+   *            Record 2 {
+   *              field: type
+   *              default: type
+   *            }
+   *          ]
+   *    }
+   * }
+   */
+  @Test
+  public void testNonNullDefaultWithinOptionWithinRecord () throws IOException {
+
+    Schema originalSchema = readSchemaFromJsonFile("nonNullDefaultWithinOptionWithinRecord_original.json");
+    Schema expectedSchema = readSchemaFromJsonFile("nonNullDefaultWithinOptionWithinRecord_flattened.json");
+    Schema flattenedSchema = new AvroFlattener().flatten(originalSchema, false);
+    Assert.assertEquals(AvroCompatibilityHelper.getSpecificDefaultValue(
+        flattenedSchema.getField("parentFieldUnion__unionRecordMemberFieldUnion__superNestedFieldString1")).toString(),
+        "defaultString1");
+    Assert.assertEquals(flattenedSchema.toString(), expectedSchema.toString());
+  }
+
+
 }
diff --git a/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_flattened.json b/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_flattened.json
new file mode 100644
index 000000000..1ef589909
--- /dev/null
+++ b/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_flattened.json
@@ -0,0 +1,37 @@
+{
+  "type":"record",
+  "name":"parentRecordName",
+  "fields":[
+    {
+      "name":"parentFieldUnion__unionRecordMemberFieldUnion__superNestedFieldString1",
+      "type":[
+        "string",
+        "null"
+      ],
+      "default":"defaultString1",
+      "flatten_source":"parentFieldUnion.unionRecordMemberFieldUnion.superNestedFieldString1"
+    },
+    {
+      "name":"parentFieldUnion__unionRecordMemberFieldUnion__superNestedFieldString2",
+      "type":[
+        "string",
+        "null"
+      ],
+      "default":"defaultString2",
+      "flatten_source":"parentFieldUnion.unionRecordMemberFieldUnion.superNestedFieldString2"
+    },
+    {
+      "name":"parentFieldUnion__unionRecordMemberFieldString",
+      "type":[
+        "null",
+        "string"
+      ],
+      "default":null,
+      "flatten_source":"parentFieldUnion.unionRecordMemberFieldString"
+    },
+    {
+      "name":"parentFieldInt",
+      "type":"int"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_original.json b/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_original.json
new file mode 100644
index 000000000..ae66543f1
--- /dev/null
+++ b/gobblin-utility/src/test/resources/flattenAvro/nonNullDefaultWithinOptionWithinRecord_original.json
@@ -0,0 +1,33 @@
+{
+  "type" : "record",
+  "name" : "parentRecordName",
+  "fields" : [ {
+    "name" : "parentFieldUnion",
+    "type" : [ "null", {
+      "type" : "record",
+      "name" : "unionRecordMember",
+      "fields" : [ {
+        "name" : "unionRecordMemberFieldUnion",
+        "type" : [ "null", {
+          "type" : "record",
+          "name" : "superNestedRecord",
+          "fields" : [ {
+            "name" : "superNestedFieldString1",
+            "type" : "string",
+            "default": "defaultString1"
+          }, {
+            "name" : "superNestedFieldString2",
+            "type" : "string",
+            "default": "defaultString2"
+          } ]
+        } ]
+      }, {
+        "name" : "unionRecordMemberFieldString",
+        "type" : "string"
+      } ]
+    } ]
+  }, {
+    "name" : "parentFieldInt",
+    "type" : "int"
+  } ]
+}
\ No newline at end of file