You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2015/12/08 19:15:39 UTC
parquet-mr git commit: PARQUET-352: Add object model property to file
footers.
Repository: parquet-mr
Updated Branches:
refs/heads/master f2615d9a6 -> dcd1c33f0
PARQUET-352: Add object model property to file footers.
WriteSupport now has a getName getter method that is added to the footer
if it returns a non-null string as writer.model.name. This is intended
to help identify files written by object models incorrectly.
Author: Ryan Blue <bl...@apache.org>
Closes #289 from rdblue/PARQUET-352-add-object-model-property and squashes the following commits:
23f8f67 [Ryan Blue] PARQUET-352: Add object model property to file footers.
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/dcd1c33f
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/dcd1c33f
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/dcd1c33f
Branch: refs/heads/master
Commit: dcd1c33f0dba247b43418b922c1c3a2fc432dc11
Parents: f2615d9
Author: Ryan Blue <bl...@apache.org>
Authored: Tue Dec 8 10:15:30 2015 -0800
Committer: Ryan Blue <bl...@apache.org>
Committed: Tue Dec 8 10:15:30 2015 -0800
----------------------------------------------------------------------
.../java/org/apache/parquet/avro/AvroWriteSupport.java | 5 +++++
.../org/apache/parquet/cascading/TupleWriteSupport.java | 5 +++++
.../parquet/hadoop/InternalParquetRecordWriter.java | 4 ++++
.../java/org/apache/parquet/hadoop/ParquetWriter.java | 2 ++
.../parquet/hadoop/api/DelegatingWriteSupport.java | 5 +++++
.../org/apache/parquet/hadoop/api/WriteSupport.java | 12 ++++++++++++
.../parquet/hadoop/example/GroupWriteSupport.java | 5 +++++
.../org/apache/parquet/hadoop/TestParquetWriter.java | 3 +++
.../java/org/apache/parquet/pig/TupleWriteSupport.java | 5 +++++
.../org/apache/parquet/proto/ProtoWriteSupport.java | 5 +++++
.../org/apache/parquet/scrooge/ScroogeWriteSupport.java | 5 +++++
.../apache/parquet/hadoop/thrift/TBaseWriteSupport.java | 5 +++++
.../parquet/hadoop/thrift/ThriftBytesWriteSupport.java | 5 +++++
.../parquet/hadoop/thrift/ThriftWriteSupport.java | 5 +++++
.../parquet/thrift/pig/TupleToThriftWriteSupport.java | 5 +++++
15 files changed, 76 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java
index 48fc01e..c75bb03 100644
--- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java
+++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java
@@ -92,6 +92,11 @@ public class AvroWriteSupport<T> extends WriteSupport<T> {
this.model = model;
}
+ @Override
+ public String getName() {
+ return "avro";
+ }
+
/**
* @see org.apache.parquet.avro.AvroParquetOutputFormat#setSchema(org.apache.hadoop.mapreduce.Job, org.apache.avro.Schema)
*/
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java b/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java
index 2489b2e..032f534 100644
--- a/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java
+++ b/parquet-cascading/src/main/java/org/apache/parquet/cascading/TupleWriteSupport.java
@@ -42,6 +42,11 @@ public class TupleWriteSupport extends WriteSupport<TupleEntry> {
public static final String PARQUET_CASCADING_SCHEMA = "parquet.cascading.schema";
@Override
+ public String getName() {
+ return "cascading";
+ }
+
+ @Override
public WriteContext init(Configuration configuration) {
String schema = configuration.get(PARQUET_CASCADING_SCHEMA);
rootSchema = MessageTypeParser.parseMessageType(schema);
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
index 87b23a2..2b1d48b 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
@@ -119,6 +119,10 @@ class InternalParquetRecordWriter<T> {
flushRowGroupToStore();
FinalizedWriteContext finalWriteContext = writeSupport.finalizeWrite();
Map<String, String> finalMetadata = new HashMap<String, String>(extraMetaData);
+ String modelName = writeSupport.getName();
+ if (modelName != null) {
+ finalMetadata.put(ParquetWriter.OBJECT_MODEL_NAME_PROP, modelName);
+ }
finalMetadata.putAll(finalWriteContext.getExtraMetaData());
parquetFileWriter.end(finalMetadata);
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
index e2521fb..be8c0cd 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
@@ -45,6 +45,8 @@ public class ParquetWriter<T> implements Closeable {
public static final WriterVersion DEFAULT_WRITER_VERSION =
WriterVersion.PARQUET_1_0;
+ public static final String OBJECT_MODEL_NAME_PROP = "writer.model.name";
+
// max size (bytes) to write as padding and the min size of a row group
public static final int MAX_PADDING_SIZE_DEFAULT = 0;
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java
index 207bb1a..66a4b01 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/DelegatingWriteSupport.java
@@ -55,6 +55,11 @@ public class DelegatingWriteSupport<T> extends WriteSupport<T> {
}
@Override
+ public String getName() {
+ return delegate.getName();
+ }
+
+ @Override
public WriteSupport.FinalizedWriteContext finalizeWrite() {
return delegate.finalizeWrite();
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java
index 91c37c3..1a61faa 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/api/WriteSupport.java
@@ -121,6 +121,18 @@ abstract public class WriteSupport<T> {
public abstract void write(T record);
/**
+ * Called to get a name to identify the WriteSupport object model.
+ * If not null, this is added to the file footer metadata.
+ * <p>
+ * Defining this method will be required in a future API version.
+ *
+ * @return a String name for file metadata.
+ */
+ public String getName() {
+ return null;
+ }
+
+ /**
* called once in the end after the last record was written
* @return information to be added in the file
*/
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java
index ee59a6e..c038f25 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/example/GroupWriteSupport.java
@@ -64,6 +64,11 @@ public class GroupWriteSupport extends WriteSupport<Group> {
}
@Override
+ public String getName() {
+ return "example";
+ }
+
+ @Override
public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) {
// if present, prefer the schema passed to the constructor
if (schema == null) {
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
index e327643..6fc3c72 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
@@ -135,6 +135,9 @@ public class TestParquetWriter {
}
}
}
+ assertEquals("Object model property should be example",
+ "example", footer.getFileMetaData().getKeyValueMetaData()
+ .get(ParquetWriter.OBJECT_MODEL_NAME_PROP));
}
}
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java b/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java
index 829fe70..2cf676c 100644
--- a/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java
+++ b/parquet-pig/src/main/java/org/apache/parquet/pig/TupleWriteSupport.java
@@ -67,6 +67,11 @@ public class TupleWriteSupport extends WriteSupport<Tuple> {
this.rootPigSchema = pigSchema;
}
+ @Override
+ public String getName() {
+ return "pig";
+ }
+
public Schema getPigSchema() {
return rootPigSchema;
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
index 40e36d5..d7f7a53 100644
--- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
+++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
@@ -62,6 +62,11 @@ public class ProtoWriteSupport<T extends MessageOrBuilder> extends WriteSupport<
this.protoMessage = protobufClass;
}
+ @Override
+ public String getName() {
+ return "protobuf";
+ }
+
public static void setSchema(Configuration configuration, Class<? extends Message> protoClass) {
configuration.setClass(PB_CLASS_WRITE, protoClass, Message.class);
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java b/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java
index a478bf7..7b72b73 100644
--- a/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java
+++ b/parquet-scrooge/src/main/java/org/apache/parquet/scrooge/ScroogeWriteSupport.java
@@ -49,6 +49,11 @@ public class ScroogeWriteSupport<T extends ThriftStruct> extends AbstractThriftW
}
@Override
+ public String getName() {
+ return "scrooge";
+ }
+
+ @Override
protected StructType getThriftStruct() {
ScroogeStructConverter schemaConverter = new ScroogeStructConverter();
return schemaConverter.convert(thriftClass);
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java
index b457278..56bf299 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/TBaseWriteSupport.java
@@ -46,6 +46,11 @@ public class TBaseWriteSupport<T extends TBase<?, ?>> extends AbstractThriftWrit
}
@Override
+ public String getName() {
+ return "thrift";
+ }
+
+ @Override
protected StructType getThriftStruct() {
return ThriftSchemaConverter.toStructType(thriftClass);
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java
index 6db769e..f6f511b 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftBytesWriteSupport.java
@@ -93,6 +93,11 @@ public class ThriftBytesWriteSupport extends WriteSupport<BytesWritable> {
}
@Override
+ public String getName() {
+ return "thrift";
+ }
+
+ @Override
public WriteContext init(Configuration configuration) {
if (this.protocolFactory == null) {
try {
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java
index e81caa8..a9864ff 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftWriteSupport.java
@@ -59,6 +59,11 @@ public class ThriftWriteSupport<T extends TBase<?,?>> extends WriteSupport<T> {
}
@Override
+ public String getName() {
+ return writeSupport.getName();
+ }
+
+ @Override
public WriteContext init(Configuration configuration) {
return this.writeSupport.init(configuration);
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/dcd1c33f/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java
index 53fc16d..b8add82 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/pig/TupleToThriftWriteSupport.java
@@ -49,6 +49,11 @@ public class TupleToThriftWriteSupport extends WriteSupport<Tuple> {
this.className = className;
}
+ @Override
+ public String getName() {
+ return "thrift";
+ }
+
@SuppressWarnings({"rawtypes", "unchecked"})
@Override
public WriteContext init(Configuration configuration) {