You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ju...@apache.org on 2016/02/06 20:57:27 UTC
parquet-mr git commit: PARQUET-385 PARQUET-379: Fixes strict schema
merging
Repository: parquet-mr
Updated Branches:
refs/heads/master a4acf5333 -> c26fa7881
PARQUET-385 PARQUET-379: Fixes strict schema merging
This PR fixes strict mode schema merging. To merge two `PrimitiveType` `t1` and `t2`, they must satisfy the following conditions:
1. `t1` and `t2` have the same primitive type name
1. `t1` and `t2` either
- don't have original type, or
- have the same original type
1. If `t1` and `t2` are both `FIXED_LEN_BYTE_ARRAY`, they should have the same length
Also, merged schema now preserves original name if there's any.
Author: Cheng Lian <li...@databricks.com>
Closes #315 from liancheng/fix-strict-schema-merge and squashes the following commits:
a29138c [Cheng Lian] Addresses PR comment
1ac804e [Cheng Lian] Fixes strict schema merging
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/c26fa788
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/c26fa788
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/c26fa788
Branch: refs/heads/master
Commit: c26fa78817f30cc3eb91165b783e07fb80d80f59
Parents: a4acf53
Author: Cheng Lian <li...@databricks.com>
Authored: Sat Feb 6 11:57:19 2016 -0800
Committer: Julien Le Dem <ju...@dremio.com>
Committed: Sat Feb 6 11:57:19 2016 -0800
----------------------------------------------------------------------
.../apache/parquet/schema/PrimitiveType.java | 36 +++++++++++++++-----
.../apache/parquet/schema/TestMessageType.java | 35 ++++++++++++++++---
2 files changed, 59 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c26fa788/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
index 5c6e460..1cdc6c3 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -514,16 +514,36 @@ public final class PrimitiveType extends Type {
return union(toMerge, true);
}
+ private void reportSchemaMergeError(Type toMerge) {
+ throw new IncompatibleSchemaModificationException("can not merge type " + toMerge + " into " + this);
+ }
+
@Override
protected Type union(Type toMerge, boolean strict) {
- if (!toMerge.isPrimitive() || (strict && !primitive.equals(toMerge.asPrimitiveType().getPrimitiveTypeName()))) {
- throw new IncompatibleSchemaModificationException("can not merge type " + toMerge + " into " + this);
+ if (!toMerge.isPrimitive()) {
+ reportSchemaMergeError(toMerge);
+ }
+
+ if (strict) {
+ // Can't merge primitive fields of different type names or different original types
+ if (!primitive.equals(toMerge.asPrimitiveType().getPrimitiveTypeName()) ||
+ getOriginalType() != toMerge.getOriginalType()) {
+ reportSchemaMergeError(toMerge);
+ }
+
+ // Can't merge FIXED_LEN_BYTE_ARRAY fields of different lengths
+ int toMergeLength = toMerge.asPrimitiveType().getTypeLength();
+ if (primitive == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY && length != toMergeLength) {
+ reportSchemaMergeError(toMerge);
+ }
}
- Types.PrimitiveBuilder<PrimitiveType> builder = Types.primitive(
- primitive, toMerge.getRepetition());
+
+ Types.PrimitiveBuilder<PrimitiveType> builder = Types.primitive(primitive, toMerge.getRepetition());
+
if (PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY == primitive) {
builder.length(length);
}
- return builder.named(getName());
+
+ return builder.as(getOriginalType()).named(getName());
}
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/c26fa788/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
index ca5d939..438fae9 100644
--- a/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
+++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -18,6 +18,7 @@
*/
package org.apache.parquet.schema;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
@@ -131,6 +132,33 @@ public class TestMessageType {
} catch (IncompatibleSchemaModificationException e) {
assertEquals("can not merge type optional int32 a into optional binary a", e.getMessage());
}
+
+ MessageType t9 = Types.buildMessage()
+ .addField(Types.optional(BINARY).as(OriginalType.UTF8).named("a"))
+ .named("root1");
+ MessageType t10 = Types.buildMessage()
+ .addField(Types.optional(BINARY).named("a"))
+ .named("root1");
+ assertEquals(t9.union(t9), t9);
+ try {
+ t9.union(t10);
+ fail("moving from BINARY (UTF8) to BINARY");
+ } catch (IncompatibleSchemaModificationException e) {
+ assertEquals("can not merge type optional binary a into optional binary a (UTF8)", e.getMessage());
+ }
+
+ MessageType t11 = Types.buildMessage()
+ .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(10).named("a"))
+ .named("root1");
+ MessageType t12 = Types.buildMessage()
+ .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(20).named("a"))
+ .named("root2");
+ try {
+ t11.union(t12);
+ fail("moving from FIXED_LEN_BYTE_ARRAY(10) to FIXED_LEN_BYTE_ARRAY(20)");
+ } catch (IncompatibleSchemaModificationException e) {
+ assertEquals("can not merge type optional fixed_len_byte_array(20) a into optional fixed_len_byte_array(10) a", e.getMessage());
+ }
}
@Test
@@ -145,5 +173,4 @@ public class TestMessageType {
assertEquals(schema, schema2);
assertEquals(schema.toString(), schema2.toString());
}
-
}