You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2018/01/04 18:32:36 UTC
parquet-mr git commit: PARQUET-1141: Fix field ID handling
Repository: parquet-mr
Updated Branches:
refs/heads/master da3e8eb7e -> 9191fbd20
PARQUET-1141: Fix field ID handling
There are two places where field IDs are dropped:
* Map and list type builders were not passing IDs when building
* ParquetMetadataConverter was not writing field IDs or reading the ID for root schemas
Author: Ryan Blue <bl...@apache.org>
Closes #428 from rdblue/PARQUET-1141-fix-column-ids and squashes the following commits:
475a90ed7 [Ryan Blue] PARQUET-1141: Fix tests by adding Type$ID#getId.
e110c00a7 [Ryan Blue] PARQUET-1141: Fix IDs in ParquetMetadataConverter.
a63066a8c [Ryan Blue] PARQUET-1141: Fix IDs for lists and maps.
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/9191fbd2
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/9191fbd2
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/9191fbd2
Branch: refs/heads/master
Commit: 9191fbd202cd76d03fc23057c5a16cac547d90df
Parents: da3e8eb
Author: Ryan Blue <bl...@apache.org>
Authored: Thu Jan 4 10:32:31 2018 -0800
Committer: Ryan Blue <bl...@apache.org>
Committed: Thu Jan 4 10:32:31 2018 -0800
----------------------------------------------------------------------
.../main/java/org/apache/parquet/schema/Type.java | 10 ++++++++++
.../java/org/apache/parquet/schema/Types.java | 18 +++++++++++++++---
.../converter/ParquetMetadataConverter.java | 12 ++++++++++++
3 files changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9191fbd2/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
index 99222f9..176b9a6 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
@@ -20,6 +20,7 @@ package org.apache.parquet.schema;
import static org.apache.parquet.Preconditions.checkNotNull;
+import java.io.Serializable;
import java.util.List;
import org.apache.parquet.io.InvalidRecordException;
@@ -45,6 +46,15 @@ abstract public class Type {
this.id = id;
}
+ /**
+ * For bean serialization, used by Cascading 3.
+ * @deprecated use {@link #intValue()} instead.
+ */
+ @Deprecated
+ public int getId() {
+ return id;
+ }
+
public int intValue() {
return id;
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9191fbd2/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
index 5526cfc..e81daae 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
@@ -1030,12 +1030,18 @@ public class Types {
if (keyType == null) {
keyType = STRING_KEY;
}
+
+ GroupBuilder<GroupType> builder = buildGroup(repetition).as(OriginalType.MAP);
+ if (id != null) {
+ builder.id(id.intValue());
+ }
+
if (valueType != null) {
- return buildGroup(repetition).as(OriginalType.MAP)
+ return builder
.repeatedGroup().addFields(keyType, valueType).named("map")
.named(name);
} else {
- return buildGroup(repetition).as(OriginalType.MAP)
+ return builder
.repeatedGroup().addFields(keyType).named("map")
.named(name);
}
@@ -1170,7 +1176,13 @@ public class Types {
Preconditions.checkState(originalType == null,
"LIST is already the logical type and can't be changed");
Preconditions.checkNotNull(elementType, "List element type");
- return buildGroup(repetition).as(OriginalType.LIST)
+
+ GroupBuilder<GroupType> builder = buildGroup(repetition).as(OriginalType.LIST);
+ if (id != null) {
+ builder.id(id.intValue());
+ }
+
+ return builder
.repeatedGroup().addFields(elementType).named("list")
.named(name);
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9191fbd2/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index bba7e62..163056c 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -162,12 +162,18 @@ public class ParquetMetadataConverter {
if (primitiveType.getTypeLength() > 0) {
element.setType_length(primitiveType.getTypeLength());
}
+ if (primitiveType.getId() != null) {
+ element.setField_id(primitiveType.getId().intValue());
+ }
result.add(element);
}
@Override
public void visit(MessageType messageType) {
SchemaElement element = new SchemaElement(messageType.getName());
+ if (messageType.getId() != null) {
+ element.setField_id(messageType.getId().intValue());
+ }
visitChildren(result, messageType.asGroupType(), element);
}
@@ -178,6 +184,9 @@ public class ParquetMetadataConverter {
if (groupType.getOriginalType() != null) {
element.setConverted_type(getConvertedType(groupType.getOriginalType()));
}
+ if (groupType.getId() != null) {
+ element.setField_id(groupType.getId().intValue());
+ }
visitChildren(result, groupType, element);
}
@@ -881,6 +890,9 @@ public class ParquetMetadataConverter {
Iterator<SchemaElement> iterator = schema.iterator();
SchemaElement root = iterator.next();
Types.MessageTypeBuilder builder = Types.buildMessage();
+ if (root.isSetField_id()) {
+ builder.id(root.field_id);
+ }
buildChildren(builder, iterator, root.getNum_children());
return builder.named(root.name);
}