You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2020/05/28 15:38:40 UTC

[iceberg] branch master updated: ORC: In BuildOrcProjection field should be optional if any parent is optional (#1071)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new d5c166b  ORC: In BuildOrcProjection field should be optional if any parent is optional (#1071)
d5c166b is described below

commit d5c166b1b29f10b0eafa2d27fcb41e8e77341061
Author: Shardul Mahadik <sm...@linkedin.com>
AuthorDate: Thu May 28 08:38:30 2020 -0700

    ORC: In BuildOrcProjection field should be optional if any parent is optional (#1071)
---
 .../iceberg/data/TestMetricsRowGroupFilter.java    | 19 +++-----
 .../apache/iceberg/data/TestReadProjection.java    | 29 ++++++++++++
 .../java/org/apache/iceberg/orc/ORCSchemaUtil.java | 10 ++--
 .../apache/iceberg/orc/TestBuildOrcProjection.java | 53 ++++++++++++++++++++++
 4 files changed, 94 insertions(+), 17 deletions(-)

diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
index 8fa4cae..5ab5959 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
@@ -112,12 +112,9 @@ public class TestMetricsRowGroupFilter {
       optional(6, "no_nulls", StringType.get()),
       optional(7, "struct_not_null", structFieldType),
       optional(9, "not_in_file", FloatType.get()),
-      optional(10, "str", StringType.get())
-      // TODO: Uncomment this after #961 is fixed
-      // ORCSchemaUtil.buildOrcProjection has a bug which does not allow addition of container types with nested
-      // required children. Enable this field after #961 is fixed along with commented tests for the same column below
-      //  optional(11, "map_not_null",
-      //      Types.MapType.ofRequired(12, 13, StringType.get(), IntegerType.get()))
+      optional(10, "str", StringType.get()),
+      optional(11, "map_not_null",
+          Types.MapType.ofRequired(12, 13, StringType.get(), IntegerType.get()))
   );
 
   private static final Types.StructType _structFieldType =
@@ -265,9 +262,8 @@ public class TestMetricsRowGroupFilter {
     shouldRead = shouldRead(notNull("no_nulls"));
     Assert.assertTrue("Should read: non-null column contains a non-null value", shouldRead);
 
-    // TODO: Enable this test after #961 is fixed
-    // shouldRead = shouldRead(notNull("map_not_null"));
-    // Assert.assertTrue("Should read: map type is not skipped", shouldRead);
+    shouldRead = shouldRead(notNull("map_not_null"));
+    Assert.assertTrue("Should read: map type is not skipped", shouldRead);
 
     shouldRead = shouldRead(notNull("struct_not_null"));
     Assert.assertTrue("Should read: struct type is not skipped", shouldRead);
@@ -284,9 +280,8 @@ public class TestMetricsRowGroupFilter {
     shouldRead = shouldRead(isNull("no_nulls"));
     Assert.assertFalse("Should skip: non-null column contains no null values", shouldRead);
 
-    // TODO: Enable this test after #961 is fixed
-    // shouldRead = shouldRead(isNull("map_not_null"));
-    // Assert.assertTrue("Should read: map type is not skipped", shouldRead);
+    shouldRead = shouldRead(isNull("map_not_null"));
+    Assert.assertTrue("Should read: map type is not skipped", shouldRead);
 
     shouldRead = shouldRead(isNull("struct_not_null"));
     Assert.assertTrue("Should read: struct type is not skipped", shouldRead);
diff --git a/data/src/test/java/org/apache/iceberg/data/TestReadProjection.java b/data/src/test/java/org/apache/iceberg/data/TestReadProjection.java
index 08dc733..2dee8d1 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestReadProjection.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestReadProjection.java
@@ -577,4 +577,33 @@ public abstract class TestReadProjection {
     Assert.assertNull("Should not project y", projectedP2.getField("y"));
     Assert.assertEquals("Should project null z", null, projectedP2.getField("z"));
   }
+
+  @Test
+  public void testAddedFieldsWithRequiredChildren() throws Exception {
+    Schema schema = new Schema(
+        Types.NestedField.required(1, "a", Types.LongType.get())
+    );
+
+    Record record = GenericRecord.create(schema.asStruct());
+    record.setField("a", 100L);
+
+    Schema addedFields = new Schema(
+        Types.NestedField.optional(1, "a", Types.LongType.get()),
+        Types.NestedField.optional(2, "b", Types.StructType.of(
+            Types.NestedField.required(3, "c", Types.LongType.get())
+        )),
+        Types.NestedField.optional(4, "d", Types.ListType.ofRequired(5, Types.LongType.get())),
+        Types.NestedField.optional(6, "e", Types.MapType.ofRequired(7, 8, Types.LongType.get(), Types.LongType.get()))
+    );
+
+    Record projected = writeAndRead("add_fields_with_required_children_projection", schema, addedFields, record);
+    Assert.assertEquals("Should contain the correct value in column 1", projected.get(0), 100L);
+    Assert.assertEquals("Should contain the correct value in column a", projected.getField("a"), 100L);
+    Assert.assertNull("Should contain empty value in new column 2", projected.get(1));
+    Assert.assertNull("Should contain empty value in column b", projected.getField("b"));
+    Assert.assertNull("Should contain empty value in new column 4", projected.get(2));
+    Assert.assertNull("Should contain empty value in column d", projected.getField("d"));
+    Assert.assertNull("Should contain empty value in new column 6", projected.get(3));
+    Assert.assertNull("Should contain empty value in column e", projected.getField("e"));
+  }
 }
diff --git a/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java b/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java
index 27645a4..07be70f 100644
--- a/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java
+++ b/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java
@@ -269,21 +269,21 @@ public final class ORCSchemaUtil {
               .map(OrcField::name)
               .orElse(nestedField.name() + "_r" + nestedField.fieldId());
           TypeDescription childType = buildOrcProjection(nestedField.fieldId(), nestedField.type(),
-              nestedField.isRequired(), mapping);
+              isRequired && nestedField.isRequired(), mapping);
           orcType.addField(name, childType);
         }
         break;
       case LIST:
         Types.ListType list = (Types.ListType) type;
         TypeDescription elementType = buildOrcProjection(list.elementId(), list.elementType(),
-            list.isElementRequired(), mapping);
+            isRequired && list.isElementRequired(), mapping);
         orcType = TypeDescription.createList(elementType);
         break;
       case MAP:
         Types.MapType map = (Types.MapType) type;
-        TypeDescription keyType = buildOrcProjection(map.keyId(), map.keyType(), true, mapping);
-        TypeDescription valueType = buildOrcProjection(map.valueId(), map.valueType(), map.isValueRequired(),
-            mapping);
+        TypeDescription keyType = buildOrcProjection(map.keyId(), map.keyType(), isRequired, mapping);
+        TypeDescription valueType = buildOrcProjection(map.valueId(), map.valueType(),
+            isRequired && map.isValueRequired(), mapping);
         orcType = TypeDescription.createMap(keyType, valueType);
         break;
       default:
diff --git a/orc/src/test/java/org/apache/iceberg/orc/TestBuildOrcProjection.java b/orc/src/test/java/org/apache/iceberg/orc/TestBuildOrcProjection.java
index 2a86f23..12b74af 100644
--- a/orc/src/test/java/org/apache/iceberg/orc/TestBuildOrcProjection.java
+++ b/orc/src/test/java/org/apache/iceberg/orc/TestBuildOrcProjection.java
@@ -22,9 +22,12 @@ package org.apache.iceberg.orc;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.types.Types;
 import org.apache.orc.TypeDescription;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 
 import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
 import static org.junit.Assert.assertEquals;
 
 /**
@@ -127,4 +130,54 @@ public class TestBuildOrcProjection {
     assertEquals(TypeDescription.Category.STRING, nestedCol.findSubtype("b").getCategory());
   }
 
+  @Test
+  public void testEvolutionAddContainerField() {
+    Schema baseSchema = new Schema(
+        required(1, "a", Types.IntegerType.get())
+    );
+    TypeDescription baseOrcSchema = ORCSchemaUtil.convert(baseSchema);
+
+    Schema evolvedSchema = new Schema(
+        required(1, "a", Types.IntegerType.get()),
+        optional(2, "b", Types.StructType.of(
+            required(3, "c", Types.LongType.get())
+        ))
+    );
+
+    TypeDescription newOrcSchema = ORCSchemaUtil.buildOrcProjection(evolvedSchema, baseOrcSchema);
+    assertEquals(2, newOrcSchema.getChildren().size());
+    assertEquals(TypeDescription.Category.INT, newOrcSchema.findSubtype("a").getCategory());
+    assertEquals(2, newOrcSchema.findSubtype("b_r2").getId());
+    assertEquals(TypeDescription.Category.STRUCT, newOrcSchema.findSubtype("b_r2").getCategory());
+    TypeDescription nestedCol = newOrcSchema.findSubtype("b_r2");
+    assertEquals(3, nestedCol.findSubtype("c_r3").getId());
+    assertEquals(TypeDescription.Category.LONG, nestedCol.findSubtype("c_r3").getCategory());
+  }
+
+  @Rule
+  public ExpectedException exceptionRule = ExpectedException.none();
+
+  @Test
+  public void testRequiredNestedFieldMissingInFile() {
+    exceptionRule.expect(IllegalArgumentException.class);
+    exceptionRule.expectMessage("Field 4 of type long is required and was not found");
+
+    Schema baseSchema = new Schema(
+        required(1, "a", Types.IntegerType.get()),
+        required(2, "b", Types.StructType.of(
+            required(3, "c", Types.LongType.get())
+        ))
+    );
+    TypeDescription baseOrcSchema = ORCSchemaUtil.convert(baseSchema);
+
+    Schema evolvedSchema = new Schema(
+        required(1, "a", Types.IntegerType.get()),
+        required(2, "b", Types.StructType.of(
+            required(3, "c", Types.LongType.get()),
+            required(4, "d", Types.LongType.get())
+        ))
+    );
+
+    ORCSchemaUtil.buildOrcProjection(evolvedSchema, baseOrcSchema);
+  }
 }