You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2018/01/04 18:32:36 UTC

parquet-mr git commit: PARQUET-1141: Fix field ID handling

Repository: parquet-mr
Updated Branches:
  refs/heads/master da3e8eb7e -> 9191fbd20


PARQUET-1141: Fix field ID handling

There are two places where field IDs are dropped:
* Map and list type builders were not passing IDs when building
* ParquetMetadataConverter was not writing field IDs or reading the ID for root schemas

Author: Ryan Blue <bl...@apache.org>

Closes #428 from rdblue/PARQUET-1141-fix-column-ids and squashes the following commits:

475a90ed7 [Ryan Blue] PARQUET-1141: Fix tests by adding Type$ID#getId.
e110c00a7 [Ryan Blue] PARQUET-1141: Fix IDs in ParquetMetadataConverter.
a63066a8c [Ryan Blue] PARQUET-1141: Fix IDs for lists and maps.


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/9191fbd2
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/9191fbd2
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/9191fbd2

Branch: refs/heads/master
Commit: 9191fbd202cd76d03fc23057c5a16cac547d90df
Parents: da3e8eb
Author: Ryan Blue <bl...@apache.org>
Authored: Thu Jan 4 10:32:31 2018 -0800
Committer: Ryan Blue <bl...@apache.org>
Committed: Thu Jan 4 10:32:31 2018 -0800

----------------------------------------------------------------------
 .../main/java/org/apache/parquet/schema/Type.java | 10 ++++++++++
 .../java/org/apache/parquet/schema/Types.java     | 18 +++++++++++++++---
 .../converter/ParquetMetadataConverter.java       | 12 ++++++++++++
 3 files changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9191fbd2/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
index 99222f9..176b9a6 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Type.java
@@ -20,6 +20,7 @@ package org.apache.parquet.schema;
 
 import static org.apache.parquet.Preconditions.checkNotNull;
 
+import java.io.Serializable;
 import java.util.List;
 
 import org.apache.parquet.io.InvalidRecordException;
@@ -45,6 +46,15 @@ abstract public class Type {
       this.id = id;
     }
 
+    /**
+     * For bean serialization, used by Cascading 3.
+     * @deprecated use {@link #intValue()} instead.
+     */
+    @Deprecated
+    public int getId() {
+      return id;
+    }
+
     public int intValue() {
       return id;
     }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9191fbd2/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
index 5526cfc..e81daae 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
@@ -1030,12 +1030,18 @@ public class Types {
       if (keyType == null) {
         keyType = STRING_KEY;
       }
+
+      GroupBuilder<GroupType> builder = buildGroup(repetition).as(OriginalType.MAP);
+      if (id != null) {
+        builder.id(id.intValue());
+      }
+
       if (valueType != null) {
-        return buildGroup(repetition).as(OriginalType.MAP)
+        return builder
             .repeatedGroup().addFields(keyType, valueType).named("map")
             .named(name);
       } else {
-        return buildGroup(repetition).as(OriginalType.MAP)
+        return builder
             .repeatedGroup().addFields(keyType).named("map")
             .named(name);
       }
@@ -1170,7 +1176,13 @@ public class Types {
       Preconditions.checkState(originalType == null,
           "LIST is already the logical type and can't be changed");
       Preconditions.checkNotNull(elementType, "List element type");
-      return buildGroup(repetition).as(OriginalType.LIST)
+
+      GroupBuilder<GroupType> builder = buildGroup(repetition).as(OriginalType.LIST);
+      if (id != null) {
+        builder.id(id.intValue());
+      }
+
+      return builder
           .repeatedGroup().addFields(elementType).named("list")
           .named(name);
     }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9191fbd2/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index bba7e62..163056c 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -162,12 +162,18 @@ public class ParquetMetadataConverter {
         if (primitiveType.getTypeLength() > 0) {
           element.setType_length(primitiveType.getTypeLength());
         }
+        if (primitiveType.getId() != null) {
+          element.setField_id(primitiveType.getId().intValue());
+        }
         result.add(element);
       }
 
       @Override
       public void visit(MessageType messageType) {
         SchemaElement element = new SchemaElement(messageType.getName());
+        if (messageType.getId() != null) {
+          element.setField_id(messageType.getId().intValue());
+        }
         visitChildren(result, messageType.asGroupType(), element);
       }
 
@@ -178,6 +184,9 @@ public class ParquetMetadataConverter {
         if (groupType.getOriginalType() != null) {
           element.setConverted_type(getConvertedType(groupType.getOriginalType()));
         }
+        if (groupType.getId() != null) {
+          element.setField_id(groupType.getId().intValue());
+        }
         visitChildren(result, groupType, element);
       }
 
@@ -881,6 +890,9 @@ public class ParquetMetadataConverter {
     Iterator<SchemaElement> iterator = schema.iterator();
     SchemaElement root = iterator.next();
     Types.MessageTypeBuilder builder = Types.buildMessage();
+    if (root.isSetField_id()) {
+      builder.id(root.field_id);
+    }
     buildChildren(builder, iterator, root.getNum_children());
     return builder.named(root.name);
   }