You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ju...@apache.org on 2017/04/21 23:07:58 UTC

parquet-mr git commit: PARQUET-665 Adds support for proto3

Repository: parquet-mr
Updated Branches:
  refs/heads/master 2fd62ee4d -> 70f28810a


PARQUET-665 Adds support for proto3

This change bumps the protobuf version and adds
tests to show compatibility with proto3. It does
not actually change anything else.

Tests are mostly identical to existing tests, and tests
that tested functionality not present in proto3 are not
present (such as groups and extensions). Proto3
oneof and map are represented in the tests.

Tested by running `mvn test --am --projects parquet-protobuf`

Author: Mark Chua <ma...@asana.com>

Closes #407 from markchua/mkc/proto3 and squashes the following commits:

40ef997 [Mark Chua] PARQUET-665 Adds support for proto3


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/70f28810
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/70f28810
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/70f28810

Branch: refs/heads/master
Commit: 70f28810a5547219e18ffc3465f519c454fee6e5
Parents: 2fd62ee
Author: Mark Chua <ma...@asana.com>
Authored: Fri Apr 21 16:07:55 2017 -0700
Committer: Julien Le Dem <ju...@apache.org>
Committed: Fri Apr 21 16:07:55 2017 -0700

----------------------------------------------------------------------
 .travis.yml                                     |   9 +-
 parquet-protobuf/pom.xml                        |   5 +-
 .../proto/ProtoInputOutputFormatTest.java       |  79 +++++++++-
 .../parquet/proto/ProtoRecordConverterTest.java | 156 ++++++++++++++++---
 .../parquet/proto/ProtoSchemaConverterTest.java |  63 +++++++-
 .../parquet/proto/ProtoWriteSupportTest.java    | 138 +++++++++++++++-
 .../src/test/resources/TestProto3.proto         | 139 +++++++++++++++++
 7 files changed, 556 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/70f28810/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 231405e..72d7030 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,11 +4,14 @@ before_install:
   - sudo apt-get install build-essential
   - mkdir protobuf_install
   - pushd protobuf_install
-  - wget https://github.com/google/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.gz
-  - tar xzf protobuf-2.5.0.tar.gz
-  - cd  protobuf-2.5.0
+  - wget https://github.com/google/protobuf/archive/v3.2.0.tar.gz -O protobuf-3.2.0.tar.gz
+  - tar xzf protobuf-3.2.0.tar.gz
+  - cd protobuf-3.2.0
+  - sudo apt-get install autoconf automake libtool curl make g++ unzip
+  - ./autogen.sh
   - ./configure
   - make
+  - make check
   - sudo make install
   - sudo ldconfig
   - protoc --version

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/70f28810/parquet-protobuf/pom.xml
----------------------------------------------------------------------
diff --git a/parquet-protobuf/pom.xml b/parquet-protobuf/pom.xml
index f1932e6..978a964 100644
--- a/parquet-protobuf/pom.xml
+++ b/parquet-protobuf/pom.xml
@@ -31,7 +31,7 @@
 
   <properties>
     <elephant-bird.version>4.4</elephant-bird.version>
-    <protobuf.version>2.6.1</protobuf.version>
+    <protobuf.version>3.2.0</protobuf.version>
   </properties>
 
 
@@ -87,7 +87,7 @@
   <developers>
     <developer>
       <id>lukasnalezenec</id>
-      <name>Lukas Nalezenec</name>      
+      <name>Lukas Nalezenec</name>
     </developer>
   </developers>
 
@@ -158,6 +158,7 @@
                 <exec failonerror="true" executable="protoc">
                   <arg value="--java_out=${project.build.directory}/generated-test-sources/java" />
                   <arg value="src/test/resources/TestProtobuf.proto" />
+                  <arg value="src/test/resources/TestProto3.proto" />
                   <arg value="-I." />
                 </exec>
               </tasks>

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/70f28810/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
index 5c6ebca..6c01d7b 100644
--- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
+++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -20,6 +20,7 @@ package org.apache.parquet.proto;
 
 import com.google.protobuf.Message;
 import org.apache.hadoop.fs.Path;
+import org.apache.parquet.proto.test.TestProto3;
 import org.apache.parquet.proto.test.TestProtobuf;
 import org.apache.parquet.proto.test.TestProtobuf.FirstCustomClassMessage;
 import org.apache.parquet.proto.test.TestProtobuf.SecondCustomClassMessage;
@@ -61,7 +62,31 @@ public class ProtoInputOutputFormatTest {
     assertEquals("Msg2", output.getRepeatedString(1));
 
     assertEquals(input, output);
+  }
+
+  @Test
+  public void testProto3InputOutput() throws Exception {
+    TestProto3.IOFormatMessage input;
+    {
+      TestProto3.IOFormatMessage.Builder msg = TestProto3.IOFormatMessage.newBuilder();
+      msg.setOptionalDouble(666);
+      msg.addRepeatedString("Msg1");
+      msg.addRepeatedString("Msg2");
+      msg.getMsgBuilder().setSomeId(323);
+      input = msg.build();
+    }
+
+    List<Message> result = runMRJobs(input);
+
+    assertEquals(1, result.size());
+    TestProto3.IOFormatMessage output = (TestProto3.IOFormatMessage) result.get(0);
 
+    assertEquals(666, output.getOptionalDouble(), 0.00001);
+    assertEquals(323, output.getMsg().getSomeId());
+    assertEquals("Msg1", output.getRepeatedString(0));
+    assertEquals("Msg2", output.getRepeatedString(1));
+
+    assertEquals(input, output);
   }
 
 
@@ -92,6 +117,30 @@ public class ProtoInputOutputFormatTest {
     assertTrue("Found data outside projection.", readDocument.getNameCount() == 0);
   }
 
+  @Test
+  public void testProto3Projection() throws Exception {
+
+    TestProto3.Document.Builder writtenDocument = TestProto3.Document.newBuilder();
+    writtenDocument.setDocId(12345);
+    writtenDocument.addNameBuilder().setUrl("http://goout.cz/");
+
+    Path outputPath = new WriteUsingMR().write(writtenDocument.build());
+
+    //lets prepare reading with schema
+    ReadUsingMR reader = new ReadUsingMR();
+
+    String projection = "message Document {optional int64 DocId; }";
+    reader.setRequestedProjection(projection);
+    List<Message> output = reader.read(outputPath);
+    TestProto3.Document readDocument = (TestProto3.Document) output.get(0);
+
+
+    //test that only requested fields were deserialized
+    assertTrue(readDocument.getDocId() == 12345);
+    assertTrue(readDocument.getNameCount() == 0);
+    assertTrue("Found data outside projection.", readDocument.getNameCount() == 0);
+  }
+
   /**
    * When user specified protobuffer class in configuration,
    * It should replace class specified in header.
@@ -120,6 +169,30 @@ public class ProtoInputOutputFormatTest {
     assertEquals("writtenString", stringValue);
   }
 
+  @Test
+  public void testProto3CustomProtoClass() throws Exception {
+    TestProto3.FirstCustomClassMessage.Builder inputMessage;
+    inputMessage = TestProto3.FirstCustomClassMessage.newBuilder();
+    inputMessage.setString("writtenString");
+
+    Path outputPath = new WriteUsingMR().write(new Message[]{inputMessage.build()});
+    ReadUsingMR readUsingMR = new ReadUsingMR();
+    String customClass = TestProto3.SecondCustomClassMessage.class.getName();
+    ProtoReadSupport.setProtobufClass(readUsingMR.getConfiguration(), customClass);
+    List<Message> result = readUsingMR.read(outputPath);
+
+    assertEquals(1, result.size());
+    Message msg = result.get(0);
+    assertFalse("Class from header returned.",
+      msg instanceof TestProto3.FirstCustomClassMessage);
+    assertTrue("Custom class was not used",
+      msg instanceof TestProto3.SecondCustomClassMessage);
+
+    String stringValue;
+    stringValue = ((TestProto3.SecondCustomClassMessage) msg).getString();
+    assertEquals("writtenString", stringValue);
+  }
+
   /**
    * Runs job that writes input to file and then job reading data back.
    */

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/70f28810/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
index 5318bd2..e042f96 100644
--- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
+++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -20,20 +20,21 @@ package org.apache.parquet.proto;
 
 import com.google.protobuf.ByteString;
 import org.junit.Test;
+import org.apache.parquet.proto.test.TestProto3;
 import org.apache.parquet.proto.test.TestProtobuf;
 
 import java.util.List;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 import static org.apache.parquet.proto.TestUtils.testData;
 import static org.apache.parquet.proto.test.TestProtobuf.SchemaConverterAllDatatypes;
 
 public class ProtoRecordConverterTest {
 
-
   @Test
-  public void testSimple() throws Exception {
+  public void testAllTypes() throws Exception {
     SchemaConverterAllDatatypes.Builder data;
     data = SchemaConverterAllDatatypes.newBuilder();
 
@@ -53,27 +54,48 @@ public class ProtoRecordConverterTest {
     data.setOptionalString("Good Will Hunting");
     data.setOptionalUInt32(1000 * 1000 * 8);
     data.setOptionalUInt64(1000L * 1000 * 1000 * 9);
-//    data.getOptionalMessageBuilder().setSomeId(1984);
-//    data.getPbGroupBuilder().setGroupInt(1492);
+    data.getOptionalMessageBuilder().setSomeId(1984);
+    data.getPbGroupBuilder().setGroupInt(1492);
 
     SchemaConverterAllDatatypes dataBuilt = data.build();
     data.clear();
 
     List<TestProtobuf.SchemaConverterAllDatatypes> result;
     result = testData(dataBuilt);
-  }
 
+    //data are fully checked in testData function. Lets do one more check.
+    SchemaConverterAllDatatypes o = result.get(0);
+    assertEquals("Good Will Hunting", o.getOptionalString());
+
+    assertEquals(true, o.getOptionalBool());
+    assertEquals(ByteString.copyFrom("someText", "UTF-8"), o.getOptionalBytes());
+    assertEquals(0.577, o.getOptionalDouble(), 0.00001);
+    assertEquals(3.1415f, o.getOptionalFloat(), 0.00001);
+    assertEquals(SchemaConverterAllDatatypes.TestEnum.FIRST, o.getOptionalEnum());
+    assertEquals(1000 * 1000 * 1, o.getOptionalFixed32());
+    assertEquals(1000 * 1000 * 1000 * 2, o.getOptionalFixed64());
+    assertEquals(1000 * 1000 * 3, o.getOptionalInt32());
+    assertEquals(1000L * 1000 * 1000 * 4, o.getOptionalInt64());
+    assertEquals(1000 * 1000 * 5, o.getOptionalSFixed32());
+    assertEquals(1000L * 1000 * 1000 * 6, o.getOptionalSFixed64());
+    assertEquals(1000 * 1000 * 56, o.getOptionalSInt32());
+    assertEquals(1000L * 1000 * 1000 * 7, o.getOptionalSInt64());
+    assertEquals(1000 * 1000 * 8, o.getOptionalUInt32());
+    assertEquals(1000L * 1000 * 1000 * 9, o.getOptionalUInt64());
+    assertEquals(1984, o.getOptionalMessage().getSomeId());
+    assertEquals(1492, o.getPbGroup().getGroupInt());
+  }
 
   @Test
-  public void testAllTypes() throws Exception {
-    SchemaConverterAllDatatypes.Builder data;
-    data = SchemaConverterAllDatatypes.newBuilder();
+  public void testProto3AllTypes() throws Exception {
+    TestProto3.SchemaConverterAllDatatypes.Builder data;
+    data = TestProto3.SchemaConverterAllDatatypes.newBuilder();
 
     data.setOptionalBool(true);
     data.setOptionalBytes(ByteString.copyFrom("someText", "UTF-8"));
     data.setOptionalDouble(0.577);
     data.setOptionalFloat(3.1415f);
-    data.setOptionalEnum(SchemaConverterAllDatatypes.TestEnum.FIRST);
+    data.setOptionalEnum(TestProto3.SchemaConverterAllDatatypes.TestEnum.FIRST);
     data.setOptionalFixed32(1000 * 1000 * 1);
     data.setOptionalFixed64(1000 * 1000 * 1000 * 2);
     data.setOptionalInt32(1000 * 1000 * 3);
@@ -86,23 +108,22 @@ public class ProtoRecordConverterTest {
     data.setOptionalUInt32(1000 * 1000 * 8);
     data.setOptionalUInt64(1000L * 1000 * 1000 * 9);
     data.getOptionalMessageBuilder().setSomeId(1984);
-    data.getPbGroupBuilder().setGroupInt(1492);
 
-    SchemaConverterAllDatatypes dataBuilt = data.build();
+    TestProto3.SchemaConverterAllDatatypes dataBuilt = data.build();
     data.clear();
 
-    List<TestProtobuf.SchemaConverterAllDatatypes> result;
+    List<TestProto3.SchemaConverterAllDatatypes> result;
     result = testData(dataBuilt);
 
     //data are fully checked in testData function. Lets do one more check.
-    SchemaConverterAllDatatypes o = result.get(0);
+    TestProto3.SchemaConverterAllDatatypes o = result.get(0);
     assertEquals("Good Will Hunting", o.getOptionalString());
 
     assertEquals(true, o.getOptionalBool());
     assertEquals(ByteString.copyFrom("someText", "UTF-8"), o.getOptionalBytes());
     assertEquals(0.577, o.getOptionalDouble(), 0.00001);
     assertEquals(3.1415f, o.getOptionalFloat(), 0.00001);
-    assertEquals(SchemaConverterAllDatatypes.TestEnum.FIRST, o.getOptionalEnum());
+    assertEquals(TestProto3.SchemaConverterAllDatatypes.TestEnum.FIRST, o.getOptionalEnum());
     assertEquals(1000 * 1000 * 1, o.getOptionalFixed32());
     assertEquals(1000 * 1000 * 1000 * 2, o.getOptionalFixed64());
     assertEquals(1000 * 1000 * 3, o.getOptionalInt32());
@@ -114,7 +135,6 @@ public class ProtoRecordConverterTest {
     assertEquals(1000 * 1000 * 8, o.getOptionalUInt32());
     assertEquals(1000L * 1000 * 1000 * 9, o.getOptionalUInt64());
     assertEquals(1984, o.getOptionalMessage().getSomeId());
-    assertEquals(1492, o.getPbGroup().getGroupInt());
   }
 
   @Test
@@ -154,6 +174,41 @@ public class ProtoRecordConverterTest {
     assertEquals("Good Will Hunting 90", result.get(90).getOptionalString());
   }
 
+  @Test
+  public void testProto3AllTypesMultiple() throws Exception {
+    int count = 100;
+    TestProto3.SchemaConverterAllDatatypes[] input = new TestProto3.SchemaConverterAllDatatypes[count];
+
+    for (int i = 0; i < count; i++) {
+      TestProto3.SchemaConverterAllDatatypes.Builder d = TestProto3.SchemaConverterAllDatatypes.newBuilder();
+
+      if (i % 2 != 0) d.setOptionalBool(true);
+      if (i % 3 != 0) d.setOptionalBytes(ByteString.copyFrom("someText " + i, "UTF-8"));
+      if (i % 4 != 0) d.setOptionalDouble(0.577 * i);
+      if (i % 5 != 0) d.setOptionalFloat(3.1415f * i);
+      if (i % 6 != 0) d.setOptionalEnum(TestProto3.SchemaConverterAllDatatypes.TestEnum.FIRST);
+      if (i % 7 != 0) d.setOptionalFixed32(1000 * i * 1);
+      if (i % 8 != 0) d.setOptionalFixed64(1000 * i * 1000 * 2);
+      if (i % 9 != 0) d.setOptionalInt32(1000 * i * 3);
+      if (i % 2 != 1) d.setOptionalSFixed32(1000 * i * 5);
+      if (i % 3 != 1) d.setOptionalSFixed64(1000 * i * 1000 * 6);
+      if (i % 4 != 1) d.setOptionalSInt32(1000 * i * 56);
+      if (i % 5 != 1) d.setOptionalSInt64(1000 * i * 1000 * 7);
+      if (i % 6 != 1) d.setOptionalString("Good Will Hunting " + i);
+      if (i % 7 != 1) d.setOptionalUInt32(1000 * i * 8);
+      if (i % 8 != 1) d.setOptionalUInt64(1000 * i * 1000 * 9);
+      if (i % 9 != 1) d.getOptionalMessageBuilder().setSomeId(1984 * i);
+      if (i % 3 != 1) d.setOptionalInt64(1000 * i * 1000 * 4);
+      input[i] = d.build();
+    }
+
+    List<TestProto3.SchemaConverterAllDatatypes> result;
+    result = testData(input);
+
+    //data are fully checked in testData function. Lets do one more check.
+    assertEquals("Good Will Hunting 0", result.get(0).getOptionalString());
+    assertEquals("Good Will Hunting 90", result.get(90).getOptionalString());
+  }
 
   @Test
   public void testDefaults() throws Exception {
@@ -168,6 +223,18 @@ public class ProtoRecordConverterTest {
   }
 
   @Test
+  public void testProto3Defaults() throws Exception {
+    TestProto3.SchemaConverterAllDatatypes.Builder data;
+    data = TestProto3.SchemaConverterAllDatatypes.newBuilder();
+
+    List<TestProto3.SchemaConverterAllDatatypes> result = testData(data.build());
+    TestProto3.SchemaConverterAllDatatypes message = result.get(0);
+    assertEquals("", message.getOptionalString());
+    assertEquals(false, message.getOptionalBool());
+    assertEquals(0, message.getOptionalFixed32());
+  }
+
+  @Test
   public void testRepeatedMessages() throws Exception {
     TestProtobuf.TopMessage.Builder top = TestProtobuf.TopMessage.newBuilder();
     top.addInnerBuilder().setOne("First inner");
@@ -195,6 +262,33 @@ public class ProtoRecordConverterTest {
     assertFalse(third.hasTwo());
   }
 
+  @Test
+  public void testProto3RepeatedMessages() throws Exception {
+    TestProto3.TopMessage.Builder top = TestProto3.TopMessage.newBuilder();
+    top.addInnerBuilder().setOne("First inner");
+    top.addInnerBuilder().setTwo("Second inner");
+    top.addInnerBuilder().setThree("Third inner");
+
+    TestProto3.TopMessage result = testData(top.build()).get(0);
+
+    assertEquals(3, result.getInnerCount());
+
+    TestProto3.InnerMessage first = result.getInner(0);
+    TestProto3.InnerMessage second = result.getInner(1);
+    TestProto3.InnerMessage third = result.getInner(2);
+
+    assertEquals("First inner", first.getOne());
+    assertTrue(first.getTwo().isEmpty());
+    assertTrue(first.getThree().isEmpty());
+
+    assertEquals("Second inner", second.getTwo());
+    assertTrue(second.getOne().isEmpty());
+    assertTrue(second.getThree().isEmpty());
+
+    assertEquals("Third inner", third.getThree());
+    assertTrue(third.getOne().isEmpty());
+    assertTrue(third.getTwo().isEmpty());
+  }
 
   @Test
   public void testRepeatedInt() throws Exception {
@@ -214,6 +308,23 @@ public class ProtoRecordConverterTest {
   }
 
   @Test
+  public void testProto3RepeatedInt() throws Exception {
+    TestProto3.RepeatedIntMessage.Builder top = TestProto3.RepeatedIntMessage.newBuilder();
+
+    top.addRepeatedInt(1);
+    top.addRepeatedInt(2);
+    top.addRepeatedInt(3);
+
+    TestProto3.RepeatedIntMessage result = testData(top.build()).get(0);
+
+    assertEquals(3, result.getRepeatedIntCount());
+
+    assertEquals(1, result.getRepeatedInt(0));
+    assertEquals(2, result.getRepeatedInt(1));
+    assertEquals(3, result.getRepeatedInt(2));
+  }
+
+  @Test
   public void testLargeProtobufferFieldId() throws Exception {
     TestProtobuf.HighIndexMessage.Builder builder = TestProtobuf.HighIndexMessage.newBuilder();
     builder.addRepeatedInt(1);
@@ -221,4 +332,13 @@ public class ProtoRecordConverterTest {
 
     testData(builder.build());
   }
+
+  @Test
+  public void testProto3LargeProtobufferFieldId() throws Exception {
+    TestProto3.HighIndexMessage.Builder builder = TestProto3.HighIndexMessage.newBuilder();
+    builder.addRepeatedInt(1);
+    builder.addRepeatedInt(2);
+
+    testData(builder.build());
+  }
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/70f28810/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java
index 1b17334..6f5ff53 100644
--- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java
+++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -20,6 +20,7 @@ package org.apache.parquet.proto;
 
 import com.google.protobuf.Message;
 import org.junit.Test;
+import org.apache.parquet.proto.test.TestProto3;
 import org.apache.parquet.proto.test.TestProtobuf;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.MessageTypeParser;
@@ -74,6 +75,45 @@ public class ProtoSchemaConverterTest {
     testConversion(TestProtobuf.SchemaConverterAllDatatypes.class, expectedSchema);
   }
 
+  /**
+   * Tests that all protocol buffer datatypes are converted to correct parquet datatypes.
+   */
+  @Test
+  public void testProto3ConvertAllDatatypes() throws Exception {
+    String expectedSchema =
+      "message TestProto3.SchemaConverterAllDatatypes {\n" +
+        "  optional double optionalDouble = 1;\n" +
+        "  optional float optionalFloat = 2;\n" +
+        "  optional int32 optionalInt32 = 3;\n" +
+        "  optional int64 optionalInt64 = 4;\n" +
+        "  optional int32 optionalUInt32 = 5;\n" +
+        "  optional int64 optionalUInt64 = 6;\n" +
+        "  optional int32 optionalSInt32 = 7;\n" +
+        "  optional int64 optionalSInt64 = 8;\n" +
+        "  optional int32 optionalFixed32 = 9;\n" +
+        "  optional int64 optionalFixed64 = 10;\n" +
+        "  optional int32 optionalSFixed32 = 11;\n" +
+        "  optional int64 optionalSFixed64 = 12;\n" +
+        "  optional boolean optionalBool = 13;\n" +
+        "  optional binary optionalString (UTF8) = 14;\n" +
+        "  optional binary optionalBytes = 15;\n" +
+        "  optional group optionalMessage = 16 {\n" +
+        "    optional int32 someId = 3;\n" +
+        "  }\n" +
+        "  optional binary optionalEnum (ENUM) = 18;" +
+        "  optional int32 someInt32 = 19;" +
+        "  optional binary someString (UTF8) = 20;" +
+        "  repeated group optionalMap = 21 {\n" +
+        "    optional int64 key = 1;\n" +
+        "    optional group value = 2 {\n" +
+        "      optional int32 someId = 3;\n" +
+        "    }\n" +
+        "  }\n" +
+        "}";
+
+    testConversion(TestProto3.SchemaConverterAllDatatypes.class, expectedSchema);
+  }
+
   @Test
   public void testConvertRepetition() throws Exception {
     String expectedSchema =
@@ -94,4 +134,21 @@ public class ProtoSchemaConverterTest {
 
     testConversion(TestProtobuf.SchemaConverterRepetition.class, expectedSchema);
   }
+
+  @Test
+  public void testProto3ConvertRepetition() throws Exception {
+    String expectedSchema =
+      "message TestProto3.SchemaConverterRepetition {\n" +
+        "  optional int32 optionalPrimitive = 1;\n" +
+        "  repeated int32 repeatedPrimitive = 3;\n" +
+        "  optional group optionalMessage = 7 {\n" +
+        "    optional int32 someId = 3;\n" +
+        "  }\n" +
+        "  repeated group repeatedMessage = 9 {" +
+        "    optional int32 someId = 3;\n" +
+        "  }\n" +
+        "}";
+
+    testConversion(TestProto3.SchemaConverterRepetition.class, expectedSchema);
+  }
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/70f28810/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java
index 3a273c9..b937618 100644
--- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java
+++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoWriteSupportTest.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -25,6 +25,7 @@ import org.mockito.InOrder;
 import org.mockito.Mockito;
 import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.proto.test.TestProto3;
 import org.apache.parquet.proto.test.TestProtobuf;
 
 public class ProtoWriteSupportTest {
@@ -58,6 +59,27 @@ public class ProtoWriteSupportTest {
   }
 
   @Test
+  public void testProto3SimplestMessage() throws Exception {
+    RecordConsumer readConsumerMock =  Mockito.mock(RecordConsumer.class);
+    ProtoWriteSupport instance = createReadConsumerInstance(TestProto3.InnerMessage.class, readConsumerMock);
+
+    TestProto3.InnerMessage.Builder msg = TestProto3.InnerMessage.newBuilder();
+    msg.setOne("oneValue");
+
+    instance.write(msg.build());
+
+    InOrder inOrder = Mockito.inOrder(readConsumerMock);
+
+    inOrder.verify(readConsumerMock).startMessage();
+    inOrder.verify(readConsumerMock).startField("one", 0);
+    inOrder.verify(readConsumerMock).addBinary(Binary.fromString("oneValue"));
+    inOrder.verify(readConsumerMock).endField("one", 0);
+
+    inOrder.verify(readConsumerMock).endMessage();
+    Mockito.verifyNoMoreInteractions(readConsumerMock);
+  }
+
+  @Test
   public void testRepeatedIntMessage() throws Exception {
     RecordConsumer readConsumerMock =  Mockito.mock(RecordConsumer.class);
     ProtoWriteSupport instance = createReadConsumerInstance(TestProtobuf.RepeatedIntMessage.class, readConsumerMock);
@@ -80,6 +102,28 @@ public class ProtoWriteSupportTest {
   }
 
   @Test
+  public void testProto3RepeatedIntMessage() throws Exception {
+    RecordConsumer readConsumerMock =  Mockito.mock(RecordConsumer.class);
+    ProtoWriteSupport instance = createReadConsumerInstance(TestProto3.RepeatedIntMessage.class, readConsumerMock);
+
+    TestProto3.RepeatedIntMessage.Builder msg = TestProto3.RepeatedIntMessage.newBuilder();
+    msg.addRepeatedInt(1323);
+    msg.addRepeatedInt(54469);
+
+    instance.write(msg.build());
+
+    InOrder inOrder = Mockito.inOrder(readConsumerMock);
+
+    inOrder.verify(readConsumerMock).startMessage();
+    inOrder.verify(readConsumerMock).startField("repeatedInt", 0);
+    inOrder.verify(readConsumerMock).addInteger(1323);
+    inOrder.verify(readConsumerMock).addInteger(54469);
+    inOrder.verify(readConsumerMock).endField("repeatedInt", 0);
+    inOrder.verify(readConsumerMock).endMessage();
+    Mockito.verifyNoMoreInteractions(readConsumerMock);
+  }
+
+  @Test
   public void testRepeatedInnerMessageMessage_message() throws Exception {
     RecordConsumer readConsumerMock =  Mockito.mock(RecordConsumer.class);
     ProtoWriteSupport instance = createReadConsumerInstance(TestProtobuf.TopMessage.class, readConsumerMock);
@@ -107,6 +151,33 @@ public class ProtoWriteSupportTest {
   }
 
   @Test
+  public void testProto3RepeatedInnerMessageMessage_message() throws Exception {
+    RecordConsumer readConsumerMock =  Mockito.mock(RecordConsumer.class);
+    ProtoWriteSupport instance = createReadConsumerInstance(TestProto3.TopMessage.class, readConsumerMock);
+
+    TestProto3.TopMessage.Builder msg = TestProto3.TopMessage.newBuilder();
+    msg.addInnerBuilder().setOne("one").setTwo("two");
+
+    instance.write(msg.build());
+
+    InOrder inOrder = Mockito.inOrder(readConsumerMock);
+
+    inOrder.verify(readConsumerMock).startMessage();
+    inOrder.verify(readConsumerMock).startField("inner", 0);
+    inOrder.verify(readConsumerMock).startGroup();
+    inOrder.verify(readConsumerMock).startField("one", 0);
+    inOrder.verify(readConsumerMock).addBinary(Binary.fromConstantByteArray("one".getBytes()));
+    inOrder.verify(readConsumerMock).endField("one", 0);
+    inOrder.verify(readConsumerMock).startField("two", 1);
+    inOrder.verify(readConsumerMock).addBinary(Binary.fromConstantByteArray("two".getBytes()));
+    inOrder.verify(readConsumerMock).endField("two", 1);
+    inOrder.verify(readConsumerMock).endGroup();
+    inOrder.verify(readConsumerMock).endField("inner", 0);
+    inOrder.verify(readConsumerMock).endMessage();
+    Mockito.verifyNoMoreInteractions(readConsumerMock);
+  }
+
+  @Test
   public void testRepeatedInnerMessageMessage_scalar() throws Exception {
     RecordConsumer readConsumerMock =  Mockito.mock(RecordConsumer.class);
     ProtoWriteSupport instance = createReadConsumerInstance(TestProtobuf.TopMessage.class, readConsumerMock);
@@ -141,6 +212,40 @@ public class ProtoWriteSupportTest {
   }
 
   @Test
+  public void testProto3RepeatedInnerMessageMessage_scalar() throws Exception {
+    RecordConsumer readConsumerMock =  Mockito.mock(RecordConsumer.class);
+    ProtoWriteSupport instance = createReadConsumerInstance(TestProto3.TopMessage.class, readConsumerMock);
+
+    TestProto3.TopMessage.Builder msg = TestProto3.TopMessage.newBuilder();
+    msg.addInnerBuilder().setOne("one");
+    msg.addInnerBuilder().setTwo("two");
+
+    instance.write(msg.build());
+
+    InOrder inOrder = Mockito.inOrder(readConsumerMock);
+
+    inOrder.verify(readConsumerMock).startMessage();
+    inOrder.verify(readConsumerMock).startField("inner", 0);
+    //first inner message
+    inOrder.verify(readConsumerMock).startGroup();
+    inOrder.verify(readConsumerMock).startField("one", 0);
+    inOrder.verify(readConsumerMock).addBinary(Binary.fromConstantByteArray("one".getBytes()));
+    inOrder.verify(readConsumerMock).endField("one", 0);
+    inOrder.verify(readConsumerMock).endGroup();
+
+    //second inner message
+    inOrder.verify(readConsumerMock).startGroup();
+    inOrder.verify(readConsumerMock).startField("two", 1);
+    inOrder.verify(readConsumerMock).addBinary(Binary.fromConstantByteArray("two".getBytes()));
+    inOrder.verify(readConsumerMock).endField("two", 1);
+    inOrder.verify(readConsumerMock).endGroup();
+
+    inOrder.verify(readConsumerMock).endField("inner", 0);
+    inOrder.verify(readConsumerMock).endMessage();
+    Mockito.verifyNoMoreInteractions(readConsumerMock);
+  }
+
+  @Test
   public void testOptionalInnerMessage() throws Exception {
     RecordConsumer readConsumerMock =  Mockito.mock(RecordConsumer.class);
     ProtoWriteSupport instance = createReadConsumerInstance(TestProtobuf.MessageA.class, readConsumerMock);
@@ -166,6 +271,32 @@ public class ProtoWriteSupportTest {
     Mockito.verifyNoMoreInteractions(readConsumerMock);
   }
 
+  @Test
+  public void testProto3OptionalInnerMessage() throws Exception {
+    RecordConsumer readConsumerMock =  Mockito.mock(RecordConsumer.class);
+    ProtoWriteSupport instance = createReadConsumerInstance(TestProto3.MessageA.class, readConsumerMock);
+
+    TestProto3.MessageA.Builder msg = TestProto3.MessageA.newBuilder();
+    msg.getInnerBuilder().setOne("one");
+
+    instance.write(msg.build());
+
+    InOrder inOrder = Mockito.inOrder(readConsumerMock);
+
+    inOrder.verify(readConsumerMock).startMessage();
+    inOrder.verify(readConsumerMock).startField("inner", 0);
+
+    inOrder.verify(readConsumerMock).startGroup();
+    inOrder.verify(readConsumerMock).startField("one", 0);
+    inOrder.verify(readConsumerMock).addBinary(Binary.fromConstantByteArray("one".getBytes()));
+    inOrder.verify(readConsumerMock).endField("one", 0);
+    inOrder.verify(readConsumerMock).endGroup();
+
+    inOrder.verify(readConsumerMock).endField("inner", 0);
+    inOrder.verify(readConsumerMock).endMessage();
+    Mockito.verifyNoMoreInteractions(readConsumerMock);
+  }
+
   @Test(expected = UnsupportedOperationException.class)
   public void testMessageWithExtensions() throws Exception {
     RecordConsumer readConsumerMock =  Mockito.mock(RecordConsumer.class);
@@ -179,5 +310,4 @@ public class ProtoWriteSupportTest {
 
     instance.write(msg.build());
   }
-
 }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/70f28810/parquet-protobuf/src/test/resources/TestProto3.proto
----------------------------------------------------------------------
diff --git a/parquet-protobuf/src/test/resources/TestProto3.proto b/parquet-protobuf/src/test/resources/TestProto3.proto
new file mode 100644
index 0000000..1896445
--- /dev/null
+++ b/parquet-protobuf/src/test/resources/TestProto3.proto
@@ -0,0 +1,139 @@
+syntax = "proto3";
+//
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+package TestProto3;
+
+option java_package = "org.apache.parquet.proto.test";
+
+// original Dremel paper structures: Original paper used groups, not internal
+// messages but groups were deprecated.
+
+message Document {
+    int64 DocId = 1;
+    Links links = 32;
+    repeated Name Name = 24;
+}
+
+message Name {
+    repeated Language name = 4;
+    string url = 5;
+}
+
+message Language {
+    string code = 12;
+    string Country = 14;
+}
+
+message Links {
+    repeated int64 Backward = 1;
+    repeated int64 Forward = 2;
+}
+
+
+// begin - protocol buffers for ProtoSchemaConverterTest
+
+message SchemaConverterSimpleMessage {
+    int32 someId = 3;
+}
+
+message SchemaConverterAllDatatypes {
+    double optionalDouble = 1;
+    float optionalFloat = 2;
+    int32 optionalInt32 = 3;
+    int64 optionalInt64 = 4;
+    uint32 optionalUInt32 = 5;
+    uint64 optionalUInt64 = 6;
+    sint32 optionalSInt32 = 7;
+    sint64 optionalSInt64 = 8;
+    fixed32 optionalFixed32 = 9;
+    fixed64 optionalFixed64 = 10;
+    sfixed32 optionalSFixed32 = 11;
+    sfixed64 optionalSFixed64 = 12;
+    bool optionalBool = 13;
+    string optionalString = 14;
+    bytes optionalBytes = 15;
+    SchemaConverterSimpleMessage optionalMessage = 16;
+    enum TestEnum {
+        FIRST = 0;
+        SECOND = 1;
+    }
+    TestEnum optionalEnum = 18;
+    oneof oneof {
+        int32 someInt32 = 19;
+        string someString = 20;
+    }
+    map<int64, SchemaConverterSimpleMessage> optionalMap = 21;
+}
+
+message SchemaConverterRepetition {
+    int32 optionalPrimitive = 1;
+    repeated int32 repeatedPrimitive = 3;
+    SchemaConverterSimpleMessage optionalMessage = 7;
+    repeated SchemaConverterSimpleMessage repeatedMessage = 9;
+}
+
+// end - protocol buffers for ProtoSchemaConverterTest
+
+
+//begin protocol buffers for ProtoInputOutputFormatTest
+
+message InputOutputMsgFormat {
+    int32 someId = 3;
+}
+
+message IOFormatMessage {
+    double optionalDouble = 1;
+    repeated string repeatedString = 2;
+    InputOutputMsgFormat msg = 3;
+ }
+
+//end protocol buffers for ProtoInputOutputFormatTest
+
+
+message InnerMessage {
+    string one = 1;
+    string two = 2;
+    string three = 3;
+}
+
+message TopMessage {
+    repeated InnerMessage inner = 1;
+}
+
+message MessageA {
+    InnerMessage inner = 123;
+}
+
+message RepeatedIntMessage {
+    repeated int32 repeatedInt = 1;
+}
+
+message HighIndexMessage {
+    repeated int32 repeatedInt = 50000;
+}
+
+//custom proto class - ProtoInputOutputFormatTest
+
+message FirstCustomClassMessage {
+    string string = 11;
+}
+
+message SecondCustomClassMessage {
+    string string = 11;
+}