You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ju...@apache.org on 2016/09/08 21:48:46 UTC
parquet-mr git commit: PARQUET-660: Ignore extension fields in
protobuf messages.
Repository: parquet-mr
Updated Branches:
refs/heads/master 044de16c1 -> e54ca615f
PARQUET-660: Ignore extension fields in protobuf messages.
Currently, converting protobuf messages with extension can result in an uninformative error or a data corruption. A more detailed explanation in the corresponding [jira](https://issues.apache.org/jira/browse/PARQUET-660).
This patch simply ignores extension fields in protobuf messages.
In the longer run, I'd like to add a proper support for Protobuf extensions. This might take a little longer though, so I've decided to improve the current situation with this patch.
Author: Jakub Kukul <ja...@gmail.com>
Closes #351 from jkukul/master and squashes the following commits:
27580ab [Jakub Kukul] PARQUET-660: Throw Unsupported exception for messages with extensions.
db6e08b [Jakub Kukul] PARQUET-660: Refactor: Don't use additional variable for indexing fieldWriters.
e910a8a [Jakub Kukul] PARQUET-660: Refactor: Add missing indentation.
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/e54ca615
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/e54ca615
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/e54ca615
Branch: refs/heads/master
Commit: e54ca615f213f5db6d34d9163c97eec98920d7a7
Parents: 044de16
Author: Jakub Kukul <ja...@gmail.com>
Authored: Thu Sep 8 14:48:42 2016 -0700
Committer: Julien Le Dem <ju...@dremio.com>
Committed: Thu Sep 8 14:48:42 2016 -0700
----------------------------------------------------------------------
.../parquet/proto/ProtoSchemaConverter.java | 30 ++++++++++----------
.../apache/parquet/proto/ProtoWriteSupport.java | 13 ++++++---
.../parquet/proto/ProtoWriteSupportTest.java | 15 ++++++++++
.../src/test/resources/TestProtobuf.proto | 11 +++++++
4 files changed, 50 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/e54ca615/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java
index 8c9685a..3f6ed6b 100644
--- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java
+++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java
@@ -86,21 +86,21 @@ public class ProtoSchemaConverter {
Type.Repetition repetition = getRepetition(descriptor);
JavaType javaType = descriptor.getJavaType();
switch (javaType) {
- case BOOLEAN : return builder.primitive(BOOLEAN, repetition);
- case INT : return builder.primitive(INT32, repetition);
- case LONG : return builder.primitive(INT64, repetition);
- case FLOAT : return builder.primitive(FLOAT, repetition);
- case DOUBLE: return builder.primitive(DOUBLE, repetition);
- case BYTE_STRING: return builder.primitive(BINARY, repetition);
- case STRING: return builder.primitive(BINARY, repetition).as(UTF8);
- case MESSAGE: {
- GroupBuilder<GroupBuilder<T>> group = builder.group(repetition);
- convertFields(group, descriptor.getMessageType().getFields());
- return group;
- }
- case ENUM: return builder.primitive(BINARY, repetition).as(ENUM);
- default:
- throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType);
+ case BOOLEAN: return builder.primitive(BOOLEAN, repetition);
+ case INT: return builder.primitive(INT32, repetition);
+ case LONG: return builder.primitive(INT64, repetition);
+ case FLOAT: return builder.primitive(FLOAT, repetition);
+ case DOUBLE: return builder.primitive(DOUBLE, repetition);
+ case BYTE_STRING: return builder.primitive(BINARY, repetition);
+ case STRING: return builder.primitive(BINARY, repetition).as(UTF8);
+ case MESSAGE: {
+ GroupBuilder<GroupBuilder<T>> group = builder.group(repetition);
+ convertFields(group, descriptor.getMessageType().getFields());
+ return group;
+ }
+ case ENUM: return builder.primitive(BINARY, repetition).as(ENUM);
+ default:
+ throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType);
}
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/e54ca615/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
index d7f7a53..cef2b93 100644
--- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
+++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
@@ -156,7 +156,6 @@ public class ProtoWriteSupport<T extends MessageOrBuilder> extends WriteSupport<
List<Descriptors.FieldDescriptor> fields = descriptor.getFields();
fieldWriters = (FieldWriter[]) Array.newInstance(FieldWriter.class, fields.size());
- int i = 0;
for (Descriptors.FieldDescriptor fieldDescriptor: fields) {
String name = fieldDescriptor.getName();
Type type = schema.getType(name);
@@ -169,8 +168,7 @@ public class ProtoWriteSupport<T extends MessageOrBuilder> extends WriteSupport<
writer.setFieldName(name);
writer.setIndex(schema.getFieldIndex(name));
- fieldWriters[i] = writer;
- i++;
+ fieldWriters[fieldDescriptor.getIndex()] = writer;
}
}
@@ -220,6 +218,13 @@ public class ProtoWriteSupport<T extends MessageOrBuilder> extends WriteSupport<
for (Map.Entry<Descriptors.FieldDescriptor, Object> entry : changedPbFields.entrySet()) {
Descriptors.FieldDescriptor fieldDescriptor = entry.getKey();
+
+ if(fieldDescriptor.isExtension()) {
+ // Field index of an extension field might overlap with a base field.
+ throw new UnsupportedOperationException(
+ "Cannot convert Protobuf message with extension field(s)");
+ }
+
int fieldIndex = fieldDescriptor.getIndex();
fieldWriters[fieldIndex].writeField(entry.getValue());
}
@@ -276,7 +281,7 @@ public class ProtoWriteSupport<T extends MessageOrBuilder> extends WriteSupport<
}
class IntWriter extends FieldWriter {
- @Override
+ @Override
final void writeRawValue(Object value) {
recordConsumer.addInteger((Integer) value);
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/e54ca615/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java
index 73f7734..3a273c9 100644
--- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java
+++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java
@@ -165,4 +165,19 @@ public class ProtoWriteSupportTest {
inOrder.verify(readConsumerMock).endMessage();
Mockito.verifyNoMoreInteractions(readConsumerMock);
}
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testMessageWithExtensions() throws Exception {
+ RecordConsumer readConsumerMock = Mockito.mock(RecordConsumer.class);
+ ProtoWriteSupport instance = createReadConsumerInstance(TestProtobuf.Vehicle.class, readConsumerMock);
+
+ TestProtobuf.Vehicle.Builder msg = TestProtobuf.Vehicle.newBuilder();
+ msg.setHorsePower(300);
+ // Currently there's no support for extension fields. This test tests that the extension field
+ // will cause an exception.
+ msg.setExtension(TestProtobuf.Airplane.wingSpan, 50);
+
+ instance.write(msg.build());
+ }
+
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/e54ca615/parquet-protobuf/src/test/resources/TestProtobuf.proto
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/test/resources/TestProtobuf.proto b/parquet-protobuf/src/test/resources/TestProtobuf.proto
index afa0f63..d7cdf03 100644
--- a/parquet-protobuf/src/test/resources/TestProtobuf.proto
+++ b/parquet-protobuf/src/test/resources/TestProtobuf.proto
@@ -137,3 +137,14 @@ message SecondCustomClassMessage {
}
//please place your unit test Protocol Buffer definitions here.
+
+message Vehicle {
+ optional int32 horsePower = 1;
+ extensions 100 to 199;
+}
+
+message Airplane {
+ extend Vehicle {
+ optional int32 wingSpan = 101;
+ }
+}