You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2020/05/28 15:38:40 UTC
[iceberg] branch master updated: ORC: In BuildOrcProjection field
should be optional if any parent is optional (#1071)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new d5c166b ORC: In BuildOrcProjection field should be optional if any parent is optional (#1071)
d5c166b is described below
commit d5c166b1b29f10b0eafa2d27fcb41e8e77341061
Author: Shardul Mahadik <sm...@linkedin.com>
AuthorDate: Thu May 28 08:38:30 2020 -0700
ORC: In BuildOrcProjection field should be optional if any parent is optional (#1071)
---
.../iceberg/data/TestMetricsRowGroupFilter.java | 19 +++-----
.../apache/iceberg/data/TestReadProjection.java | 29 ++++++++++++
.../java/org/apache/iceberg/orc/ORCSchemaUtil.java | 10 ++--
.../apache/iceberg/orc/TestBuildOrcProjection.java | 53 ++++++++++++++++++++++
4 files changed, 94 insertions(+), 17 deletions(-)
diff --git a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
index 8fa4cae..5ab5959 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestMetricsRowGroupFilter.java
@@ -112,12 +112,9 @@ public class TestMetricsRowGroupFilter {
optional(6, "no_nulls", StringType.get()),
optional(7, "struct_not_null", structFieldType),
optional(9, "not_in_file", FloatType.get()),
- optional(10, "str", StringType.get())
- // TODO: Uncomment this after #961 is fixed
- // ORCSchemaUtil.buildOrcProjection has a bug which does not allow addition of container types with nested
- // required children. Enable this field after #961 is fixed along with commented tests for the same column below
- // optional(11, "map_not_null",
- // Types.MapType.ofRequired(12, 13, StringType.get(), IntegerType.get()))
+ optional(10, "str", StringType.get()),
+ optional(11, "map_not_null",
+ Types.MapType.ofRequired(12, 13, StringType.get(), IntegerType.get()))
);
private static final Types.StructType _structFieldType =
@@ -265,9 +262,8 @@ public class TestMetricsRowGroupFilter {
shouldRead = shouldRead(notNull("no_nulls"));
Assert.assertTrue("Should read: non-null column contains a non-null value", shouldRead);
- // TODO: Enable this test after #961 is fixed
- // shouldRead = shouldRead(notNull("map_not_null"));
- // Assert.assertTrue("Should read: map type is not skipped", shouldRead);
+ shouldRead = shouldRead(notNull("map_not_null"));
+ Assert.assertTrue("Should read: map type is not skipped", shouldRead);
shouldRead = shouldRead(notNull("struct_not_null"));
Assert.assertTrue("Should read: struct type is not skipped", shouldRead);
@@ -284,9 +280,8 @@ public class TestMetricsRowGroupFilter {
shouldRead = shouldRead(isNull("no_nulls"));
Assert.assertFalse("Should skip: non-null column contains no null values", shouldRead);
- // TODO: Enable this test after #961 is fixed
- // shouldRead = shouldRead(isNull("map_not_null"));
- // Assert.assertTrue("Should read: map type is not skipped", shouldRead);
+ shouldRead = shouldRead(isNull("map_not_null"));
+ Assert.assertTrue("Should read: map type is not skipped", shouldRead);
shouldRead = shouldRead(isNull("struct_not_null"));
Assert.assertTrue("Should read: struct type is not skipped", shouldRead);
diff --git a/data/src/test/java/org/apache/iceberg/data/TestReadProjection.java b/data/src/test/java/org/apache/iceberg/data/TestReadProjection.java
index 08dc733..2dee8d1 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestReadProjection.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestReadProjection.java
@@ -577,4 +577,33 @@ public abstract class TestReadProjection {
Assert.assertNull("Should not project y", projectedP2.getField("y"));
Assert.assertEquals("Should project null z", null, projectedP2.getField("z"));
}
+
+ @Test
+ public void testAddedFieldsWithRequiredChildren() throws Exception {
+ Schema schema = new Schema(
+ Types.NestedField.required(1, "a", Types.LongType.get())
+ );
+
+ Record record = GenericRecord.create(schema.asStruct());
+ record.setField("a", 100L);
+
+ Schema addedFields = new Schema(
+ Types.NestedField.optional(1, "a", Types.LongType.get()),
+ Types.NestedField.optional(2, "b", Types.StructType.of(
+ Types.NestedField.required(3, "c", Types.LongType.get())
+ )),
+ Types.NestedField.optional(4, "d", Types.ListType.ofRequired(5, Types.LongType.get())),
+ Types.NestedField.optional(6, "e", Types.MapType.ofRequired(7, 8, Types.LongType.get(), Types.LongType.get()))
+ );
+
+ Record projected = writeAndRead("add_fields_with_required_children_projection", schema, addedFields, record);
+ Assert.assertEquals("Should contain the correct value in column 1", projected.get(0), 100L);
+ Assert.assertEquals("Should contain the correct value in column a", projected.getField("a"), 100L);
+ Assert.assertNull("Should contain empty value in new column 2", projected.get(1));
+ Assert.assertNull("Should contain empty value in column b", projected.getField("b"));
+ Assert.assertNull("Should contain empty value in new column 4", projected.get(2));
+ Assert.assertNull("Should contain empty value in column d", projected.getField("d"));
+ Assert.assertNull("Should contain empty value in new column 6", projected.get(3));
+ Assert.assertNull("Should contain empty value in column e", projected.getField("e"));
+ }
}
diff --git a/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java b/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java
index 27645a4..07be70f 100644
--- a/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java
+++ b/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java
@@ -269,21 +269,21 @@ public final class ORCSchemaUtil {
.map(OrcField::name)
.orElse(nestedField.name() + "_r" + nestedField.fieldId());
TypeDescription childType = buildOrcProjection(nestedField.fieldId(), nestedField.type(),
- nestedField.isRequired(), mapping);
+ isRequired && nestedField.isRequired(), mapping);
orcType.addField(name, childType);
}
break;
case LIST:
Types.ListType list = (Types.ListType) type;
TypeDescription elementType = buildOrcProjection(list.elementId(), list.elementType(),
- list.isElementRequired(), mapping);
+ isRequired && list.isElementRequired(), mapping);
orcType = TypeDescription.createList(elementType);
break;
case MAP:
Types.MapType map = (Types.MapType) type;
- TypeDescription keyType = buildOrcProjection(map.keyId(), map.keyType(), true, mapping);
- TypeDescription valueType = buildOrcProjection(map.valueId(), map.valueType(), map.isValueRequired(),
- mapping);
+ TypeDescription keyType = buildOrcProjection(map.keyId(), map.keyType(), isRequired, mapping);
+ TypeDescription valueType = buildOrcProjection(map.valueId(), map.valueType(),
+ isRequired && map.isValueRequired(), mapping);
orcType = TypeDescription.createMap(keyType, valueType);
break;
default:
diff --git a/orc/src/test/java/org/apache/iceberg/orc/TestBuildOrcProjection.java b/orc/src/test/java/org/apache/iceberg/orc/TestBuildOrcProjection.java
index 2a86f23..12b74af 100644
--- a/orc/src/test/java/org/apache/iceberg/orc/TestBuildOrcProjection.java
+++ b/orc/src/test/java/org/apache/iceberg/orc/TestBuildOrcProjection.java
@@ -22,9 +22,12 @@ package org.apache.iceberg.orc;
import org.apache.iceberg.Schema;
import org.apache.iceberg.types.Types;
import org.apache.orc.TypeDescription;
+import org.junit.Rule;
import org.junit.Test;
+import org.junit.rules.ExpectedException;
import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
import static org.junit.Assert.assertEquals;
/**
@@ -127,4 +130,54 @@ public class TestBuildOrcProjection {
assertEquals(TypeDescription.Category.STRING, nestedCol.findSubtype("b").getCategory());
}
+ @Test
+ public void testEvolutionAddContainerField() {
+ Schema baseSchema = new Schema(
+ required(1, "a", Types.IntegerType.get())
+ );
+ TypeDescription baseOrcSchema = ORCSchemaUtil.convert(baseSchema);
+
+ Schema evolvedSchema = new Schema(
+ required(1, "a", Types.IntegerType.get()),
+ optional(2, "b", Types.StructType.of(
+ required(3, "c", Types.LongType.get())
+ ))
+ );
+
+ TypeDescription newOrcSchema = ORCSchemaUtil.buildOrcProjection(evolvedSchema, baseOrcSchema);
+ assertEquals(2, newOrcSchema.getChildren().size());
+ assertEquals(TypeDescription.Category.INT, newOrcSchema.findSubtype("a").getCategory());
+ assertEquals(2, newOrcSchema.findSubtype("b_r2").getId());
+ assertEquals(TypeDescription.Category.STRUCT, newOrcSchema.findSubtype("b_r2").getCategory());
+ TypeDescription nestedCol = newOrcSchema.findSubtype("b_r2");
+ assertEquals(3, nestedCol.findSubtype("c_r3").getId());
+ assertEquals(TypeDescription.Category.LONG, nestedCol.findSubtype("c_r3").getCategory());
+ }
+
+ @Rule
+ public ExpectedException exceptionRule = ExpectedException.none();
+
+ @Test
+ public void testRequiredNestedFieldMissingInFile() {
+ exceptionRule.expect(IllegalArgumentException.class);
+ exceptionRule.expectMessage("Field 4 of type long is required and was not found");
+
+ Schema baseSchema = new Schema(
+ required(1, "a", Types.IntegerType.get()),
+ required(2, "b", Types.StructType.of(
+ required(3, "c", Types.LongType.get())
+ ))
+ );
+ TypeDescription baseOrcSchema = ORCSchemaUtil.convert(baseSchema);
+
+ Schema evolvedSchema = new Schema(
+ required(1, "a", Types.IntegerType.get()),
+ required(2, "b", Types.StructType.of(
+ required(3, "c", Types.LongType.get()),
+ required(4, "d", Types.LongType.get())
+ ))
+ );
+
+ ORCSchemaUtil.buildOrcProjection(evolvedSchema, baseOrcSchema);
+ }
}