You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@parquet.apache.org by "Julien Le Dem (JIRA)" <ji...@apache.org> on 2017/04/21 23:12:04 UTC

[jira] [Assigned] (PARQUET-665) Parquet-mr: Protobuf 3 support

     [ https://issues.apache.org/jira/browse/PARQUET-665?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Julien Le Dem reassigned PARQUET-665:
-------------------------------------

    Assignee: Wael Nasreddine

> Parquet-mr: Protobuf 3 support
> ------------------------------
>
>                 Key: PARQUET-665
>                 URL: https://issues.apache.org/jira/browse/PARQUET-665
>             Project: Parquet
>          Issue Type: Improvement
>          Components: parquet-mr
>            Reporter: Wael Nasreddine
>            Assignee: Wael Nasreddine
>             Fix For: 1.9.0
>
>
> Does parquet-mr support Protobuf version 3? I've applied the following patch and the tests are failing mostly due to optional vs required.
> {code}
> diff --git a/parquet-protobuf/pom.xml b/parquet-protobuf/pom.xml
> index b3e4e50..aa67423 100644
> --- a/parquet-protobuf/pom.xml
> +++ b/parquet-protobuf/pom.xml
> @@ -31,7 +31,7 @@
>  
>    <properties>
>      <elephant-bird.version>4.4</elephant-bird.version>
> -    <protobuf.version>2.5.0</protobuf.version>
> +    <protobuf.version>3.0.0-beta-4</protobuf.version>
>    </properties>
>  
>  
> diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
> index 5c6ebca..7e2557f 100644
> --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
> +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoInputOutputFormatTest.java
> @@ -88,7 +88,7 @@ public class ProtoInputOutputFormatTest {
>  
>  
>      //test that only requested fields were deserialized
> -    assertTrue(readDocument.hasDocId());
> +    assertTrue(readDocument.getDocId() == 12345);
>      assertTrue("Found data outside projection.", readDocument.getNameCount() == 0);
>    }
>  
> diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
> index 5318bd2..1cbb972 100644
> --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
> +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoRecordConverterTest.java
> @@ -183,16 +183,16 @@ public class ProtoRecordConverterTest {
>      TestProtobuf.InnerMessage third = result.getInner(2);
>  
>      assertEquals("First inner", first.getOne());
> -    assertFalse(first.hasTwo());
> -    assertFalse(first.hasThree());
> +    assertEquals(first.getTwo(), "");
> +    assertEquals(first.getThree(), "");
>  
>      assertEquals("Second inner", second.getTwo());
> -    assertFalse(second.hasOne());
> -    assertFalse(second.hasThree());
> +    assertEquals(second.getOne(), "");
> +    assertEquals(second.getThree(), "");
>  
>      assertEquals("Third inner", third.getThree());
> -    assertFalse(third.hasOne());
> -    assertFalse(third.hasTwo());
> +    assertEquals(third.getOne(), "");
> +    assertEquals(third.getTwo(), "");
>    }
>  
>  
> diff --git a/parquet-protobuf/src/test/resources/TestProtobuf.proto b/parquet-protobuf/src/test/resources/TestProtobuf.proto
> index afa0f63..caf7926 100644
> --- a/parquet-protobuf/src/test/resources/TestProtobuf.proto
> +++ b/parquet-protobuf/src/test/resources/TestProtobuf.proto
> @@ -9,7 +9,7 @@
>  //
>  //   http://www.apache.org/licenses/LICENSE-2.0
>  //
> -// Unless required by applicable law or agreed to in writing,
> +// Unless by applicable law or agreed to in writing,
>  // software distributed under the License is distributed on an
>  // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
>  // KIND, either express or implied.  See the License for the
> @@ -17,6 +17,8 @@
>  // under the License.
>  //
>  
> +syntax = "proto3";
> +
>  package TestProtobuf;
>  
>  option java_package = "org.apache.parquet.proto.test";
> @@ -25,17 +27,18 @@ option java_package = "org.apache.parquet.proto.test";
>  // messages but groups were deprecated.
>  
>  message Document {
> -    required int64 DocId = 1;
> -    optional Links links = 32;
> -    repeated group Name = 24 {
> +    int64 DocId = 1;
> +    Links links = 32;
> +    message Name  {
>          repeated Language name = 4;
> -        optional string url = 5;
> +        string url = 5;
>      }
> +    repeated Name name = 24;
>  }
>  
>  message Language {
> -    required string code = 12;
> -    optional string Country = 14;
> +    string code = 12;
> +    string Country = 14;
>  }
>  
>  message Links {
> @@ -47,42 +50,43 @@ message Links {
>  // begin - protocol buffers for ProtoSchemaConverterTest
>  
>   message SchemaConverterSimpleMessage {
> -     optional int32 someId = 3;
> +     int32 someId = 3;
>   }
>  
>   message SchemaConverterAllDatatypes {
> -     optional double optionalDouble = 1;
> -     optional float optionalFloat = 2;
> -     optional int32 optionalInt32 = 3;
> -     optional int64 optionalInt64 = 4;
> -     optional uint32 optionalUInt32 = 5;
> -     optional uint64 optionalUInt64 = 6;
> -     optional sint32 optionalSInt32 = 7;
> -     optional sint64 optionalSInt64 = 8;
> -     optional fixed32 optionalFixed32 = 9;
> -     optional fixed64 optionalFixed64 = 10;
> -     optional sfixed32 optionalSFixed32 = 11;
> -     optional sfixed64 optionalSFixed64 = 12;
> -     optional bool optionalBool = 13;
> -     optional string optionalString = 14;
> -     optional bytes optionalBytes = 15;
> -     optional SchemaConverterSimpleMessage optionalMessage = 16;
> -     optional group PbGroup  = 17 {
> -       optional int32 groupInt = 2;
> +     double optionalDouble = 1;
> +     float optionalFloat = 2;
> +     int32 optionalInt32 = 3;
> +     int64 optionalInt64 = 4;
> +     uint32 optionalUInt32 = 5;
> +     uint64 optionalUInt64 = 6;
> +     sint32 optionalSInt32 = 7;
> +     sint64 optionalSInt64 = 8;
> +     fixed32 optionalFixed32 = 9;
> +     fixed64 optionalFixed64 = 10;
> +     sfixed32 optionalSFixed32 = 11;
> +     sfixed64 optionalSFixed64 = 12;
> +     bool optionalBool = 13;
> +     string optionalString = 14;
> +     bytes optionalBytes = 15;
> +     SchemaConverterSimpleMessage optionalMessage = 16;
> +     message PbGroup {
> +       int32 groupInt = 2;
>       }
> +     PbGroup pbGroup = 17;
>      enum TestEnum {
>          FIRST = 0;
>          SECOND = 1;
>      }
> -    optional TestEnum optionalEnum = 18;
> +    TestEnum optionalEnum = 18;
>   }
>  
>   message SchemaConverterRepetition {
> -     optional int32 optionalPrimitive = 1;
> -     required int32 requiredPrimitive = 2;
> +     int32 optionalPrimitive = 1;
> +     int32 requiredPrimitive = 2;
>       repeated int32 repeatedPrimitive = 3;
> -     optional SchemaConverterSimpleMessage optionalMessage = 7;
> -     required SchemaConverterSimpleMessage requiredMessage = 8;
> +     SchemaConverterSimpleMessage optionalMessage = 7;
> +     SchemaConverterSimpleMessage requiredMessage = 8;
>       repeated SchemaConverterSimpleMessage repeatedMessage = 9;
>   }
>  
> @@ -92,22 +96,22 @@ message Links {
>  //begin protocol buffers for ProtoInputOutputFormatTest
>  
>  message InputOutputMsgFormat {
> -    optional int32 someId = 3;
> +    int32 someId = 3;
>  }
>  
>  message IOFormatMessage {
> -    optional double optionalDouble = 1;
> +    double optionalDouble = 1;
>      repeated string repeatedString = 2;
> -    optional InputOutputMsgFormat msg = 3;
> +    InputOutputMsgFormat msg = 3;
>   }
>  
>  //end protocol buffers for ProtoInputOutputFormatTest
>  
>  
>  message InnerMessage {
> -    optional string one = 1;
> -    optional string two = 2;
> -    optional string three = 3;
> +    string one = 1;
> +    string two = 2;
> +    string three = 3;
>  }
>  
>  message TopMessage {
> @@ -115,7 +119,7 @@ message TopMessage {
>  }
>  
>  message MessageA {
> -    optional InnerMessage inner = 123;
> +    InnerMessage inner = 123;
>  }
>  
>  message RepeatedIntMessage {
> @@ -129,11 +133,11 @@ message HighIndexMessage {
>  //custom proto class - ProtoInputOutputFormatTest
>  
>  message FirstCustomClassMessage {
> -    optional string string = 11;
> +    string string = 11;
>  }
>  
>  message SecondCustomClassMessage {
> -    optional string string = 11;
> +    string string = 11;
>  }
>  
>  //please place your unit test Protocol Buffer definitions here.
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)