You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2015/12/08 19:15:39 UTC

parquet-mr git commit: PARQUET-352: Add object model property to file footers.

Repository: parquet-mr
Updated Branches:
  refs/heads/master f2615d9a6 -> dcd1c33f0


PARQUET-352: Add object model property to file footers.

WriteSupport now has a getName getter method that is added to the footer
if it returns a non-null string as writer.model.name. This is intended
to help identify files written by object models incorrectly.

Author: Ryan Blue <bl...@apache.org>

Closes #289 from rdblue/PARQUET-352-add-object-model-property and squashes the following commits:

23f8f67 [Ryan Blue] PARQUET-352: Add object model property to file footers.


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/dcd1c33f
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/dcd1c33f
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/dcd1c33f

Branch: refs/heads/master
Commit: dcd1c33f0dba247b43418b922c1c3a2fc432dc11
Parents: f2615d9
Author: Ryan Blue <bl...@apache.org>
Authored: Tue Dec 8 10:15:30 2015 -0800
Committer: Ryan Blue <bl...@apache.org>
Committed: Tue Dec 8 10:15:30 2015 -0800

----------------------------------------------------------------------
 .../java/org/apache/parquet/avro/AvroWriteSupport.java  |  5 +++++
 .../org/apache/parquet/cascading/TupleWriteSupport.java |  5 +++++
 .../parquet/hadoop/InternalParquetRecordWriter.java     |  4 ++++
 .../java/org/apache/parquet/hadoop/ParquetWriter.java   |  2 ++
 .../parquet/hadoop/api/DelegatingWriteSupport.java      |  5 +++++
 .../org/apache/parquet/hadoop/api/WriteSupport.java     | 12 ++++++++++++
 .../parquet/hadoop/example/GroupWriteSupport.java       |  5 +++++
 .../org/apache/parquet/hadoop/TestParquetWriter.java    |  3 +++
 .../java/org/apache/parquet/pig/TupleWriteSupport.java  |  5 +++++
 .../org/apache/parquet/proto/ProtoWriteSupport.java     |  5 +++++
 .../org/apache/parquet/scrooge/ScroogeWriteSupport.java |  5 +++++
 .../apache/parquet/hadoop/thrift/TBaseWriteSupport.java |  5 +++++
 .../parquet/hadoop/thrift/ThriftBytesWriteSupport.java  |  5 +++++
 .../parquet/hadoop/thrift/ThriftWriteSupport.java       |  5 +++++
 .../parquet/thrift/pig/TupleToThriftWriteSupport.java   |  5 +++++
 15 files changed, 76 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java
index 48fc01e..c75bb03 100644
--- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java
+++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java
@@ -92,6 +92,11 @@ public class AvroWriteSupport<T> extends WriteSupport<T> {
     this.model = model;
   }
 
+  @Override
+  public String getName() {
+    return "avro";
+  }
+
   /**
    * @see org.apache.parquet.avro.AvroParquetOutputFormat#setSchema(org.apache.hadoop.mapreduce.Job, org.apache.avro.Schema)
    */

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java b/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java
index 2489b2e..032f534 100644
--- a/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java
+++ b/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java
@@ -42,6 +42,11 @@ public class TupleWriteSupport extends WriteSupport<TupleEntry> {
   public static final String PARQUET_CASCADING_SCHEMA = "parquet.cascading.schema";
 
   @Override
+  public String getName() {
+    return "cascading";
+  }
+
+  @Override
   public WriteContext init(Configuration configuration) {
     String schema = configuration.get(PARQUET_CASCADING_SCHEMA);
     rootSchema = MessageTypeParser.parseMessageType(schema);

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
index 87b23a2..2b1d48b 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
@@ -119,6 +119,10 @@ class InternalParquetRecordWriter<T> {
     flushRowGroupToStore();
     FinalizedWriteContext finalWriteContext = writeSupport.finalizeWrite();
     Map<String, String> finalMetadata = new HashMap<String, String>(extraMetaData);
+    String modelName = writeSupport.getName();
+    if (modelName != null) {
+      finalMetadata.put(ParquetWriter.OBJECT_MODEL_NAME_PROP, modelName);
+    }
     finalMetadata.putAll(finalWriteContext.getExtraMetaData());
     parquetFileWriter.end(finalMetadata);
   }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
index e2521fb..be8c0cd 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
@@ -45,6 +45,8 @@ public class ParquetWriter<T> implements Closeable {
   public static final WriterVersion DEFAULT_WRITER_VERSION =
       WriterVersion.PARQUET_1_0;
 
+  public static final String OBJECT_MODEL_NAME_PROP = "writer.model.name";
+
   // max size (bytes) to write as padding and the min size of a row group
   public static final int MAX_PADDING_SIZE_DEFAULT = 0;
 

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java
index 207bb1a..66a4b01 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java
@@ -55,6 +55,11 @@ public class DelegatingWriteSupport<T> extends WriteSupport<T> {
   }
 
   @Override
+  public String getName() {
+    return delegate.getName();
+  }
+
+  @Override
   public WriteSupport.FinalizedWriteContext finalizeWrite() {
     return delegate.finalizeWrite();
   }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java
index 91c37c3..1a61faa 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java
@@ -121,6 +121,18 @@ abstract public class WriteSupport<T> {
   public abstract void write(T record);
 
   /**
+   * Called to get a name to identify the WriteSupport object model.
+   * If not null, this is added to the file footer metadata.
+   * <p>
+   * Defining this method will be required in a future API version.
+   *
+   * @return a String name for file metadata.
+   */
+  public String getName() {
+    return null;
+  }
+
+  /**
    * called once in the end after the last record was written
    * @return information to be added in the file
    */

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java
index ee59a6e..c038f25 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java
@@ -64,6 +64,11 @@ public class GroupWriteSupport extends WriteSupport<Group> {
   }
 
   @Override
+  public String getName() {
+    return "example";
+  }
+
+  @Override
   public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) {
     // if present, prefer the schema passed to the constructor
     if (schema == null) {

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
index e327643..6fc3c72 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
@@ -135,6 +135,9 @@ public class TestParquetWriter {
             }
           }
         }
+        assertEquals("Object model property should be example",
+            "example", footer.getFileMetaData().getKeyValueMetaData()
+                .get(ParquetWriter.OBJECT_MODEL_NAME_PROP));
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java b/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java
index 829fe70..2cf676c 100644
--- a/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java
+++ b/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java
@@ -67,6 +67,11 @@ public class TupleWriteSupport extends WriteSupport<Tuple> {
     this.rootPigSchema = pigSchema;
   }
 
+  @Override
+  public String getName() {
+    return "pig";
+  }
+
   public Schema getPigSchema() {
     return rootPigSchema;
   }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
index 40e36d5..d7f7a53 100644
--- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
+++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
@@ -62,6 +62,11 @@ public class ProtoWriteSupport<T extends MessageOrBuilder> extends WriteSupport<
     this.protoMessage = protobufClass;
   }
 
+  @Override
+  public String getName() {
+    return "protobuf";
+  }
+
   public static void setSchema(Configuration configuration, Class<? extends Message> protoClass) {
     configuration.setClass(PB_CLASS_WRITE, protoClass, Message.class);
   }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java b/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java
index a478bf7..7b72b73 100644
--- a/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java
+++ b/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java
@@ -49,6 +49,11 @@ public class ScroogeWriteSupport<T extends ThriftStruct> extends AbstractThriftW
   }
 
   @Override
+  public String getName() {
+    return "scrooge";
+  }
+
+  @Override
   protected StructType getThriftStruct() {
     ScroogeStructConverter schemaConverter = new ScroogeStructConverter();
     return schemaConverter.convert(thriftClass);

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java
index b457278..56bf299 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java
@@ -46,6 +46,11 @@ public class TBaseWriteSupport<T extends TBase<?, ?>> extends AbstractThriftWrit
   }
 
   @Override
+  public String getName() {
+    return "thrift";
+  }
+
+  @Override
   protected StructType getThriftStruct() {
     return ThriftSchemaConverter.toStructType(thriftClass);
   }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java
index 6db769e..f6f511b 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java
@@ -93,6 +93,11 @@ public class ThriftBytesWriteSupport extends WriteSupport<BytesWritable> {
   }
 
   @Override
+  public String getName() {
+    return "thrift";
+  }
+
+  @Override
   public WriteContext init(Configuration configuration) {
     if (this.protocolFactory == null) {
       try {

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java
index e81caa8..a9864ff 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java
@@ -59,6 +59,11 @@ public class ThriftWriteSupport<T extends TBase<?,?>> extends WriteSupport<T> {
   }
 
   @Override
+  public String getName() {
+    return writeSupport.getName();
+  }
+
+  @Override
   public WriteContext init(Configuration configuration) {
     return this.writeSupport.init(configuration);
   }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java
index 53fc16d..b8add82 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java
@@ -49,6 +49,11 @@ public class TupleToThriftWriteSupport extends WriteSupport<Tuple> {
     this.className = className;
   }
 
+  @Override
+  public String getName() {
+    return "thrift";
+  }
+
   @SuppressWarnings({"rawtypes", "unchecked"})
   @Override
   public WriteContext init(Configuration configuration) {