You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@avro.apache.org by "Doug Cutting (JIRA)" <ji...@apache.org> on 2013/02/20 00:31:13 UTC

[jira] [Commented] (AVRO-1176) ResolvingDecoder fails to resolve or parse schemas

    [ https://issues.apache.org/jira/browse/AVRO-1176?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13581751#comment-13581751 ] 

Doug Cutting commented on AVRO-1176:
------------------------------------

This needs some tests, as existing tests pass without the patch.  Also, we shouldn't add comments about problems that have been fixed.
                
> ResolvingDecoder fails to resolve or parse schemas
> --------------------------------------------------
>
>                 Key: AVRO-1176
>                 URL: https://issues.apache.org/jira/browse/AVRO-1176
>             Project: Avro
>          Issue Type: Bug
>          Components: c++
>    Affects Versions: 1.7.0
>            Reporter: Keh-Li Sheng
>              Labels: patch
>             Fix For: 1.7.4
>
>         Attachments: AVRO-1176.patch
>
>
> We have encountered a number of problems using ResolvingDecoder in the C++ project that we can trace to
> 1. Incorrectly swapped reader/writer arguments passed to ResolvingGrammarGenerator::generate()
> 2. Using the wrong tree in ResolvingGrammarGenerator::generate() to generate the backup parsing stack
> 3. A decoder has no "hook" into the generated codec_traits decode methods for Specific that advances the resolved parse tree through the Symbol::sSkipStart nodes to ignore extra or unknown fields in the writer's data.
> 4. A resolving decoder can generate a valid decoded object even if there are garbage characters at the end of the input stream if those characters appear in a field that the reader schema is unaware of
> Reader/Writer schemas that fail to parse properly below. First example is the writer adding a field to a record that is inside an array
> {code:title=Added field to record inside array}
> {
>     std::string readerString("{\"type\":\"record\",\"name\":\"Outer\",\"fields\":[{\"name\":\"outerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}");
>     std::string writerString("{\"type\":\"record\",\"name\":\"Outer\",\"fields\":[{\"name\":\"outerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"third\",\"type\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}]}}}]}");
>     std::stringstream readerStream(readerString);
>     std::stringstream writerStream(writerString);
>     
>     avro::ValidSchema readerSchema;
>     avro::ValidSchema writerSchema;
>     
>     avro::compileJsonSchema(readerStream, readerSchema);
>     avro::compileJsonSchema(writerStream, writerSchema);
>     
>     avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema));
>     struct Outer outer;
>     
>     std::stringstream jsonStream("{\"outerArray\":[{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"third\":{\"number\":3}},{\"first\":{\"field\":\"cool\"},\"second\":{\"field\":\"beans\"},\"third\":{\"number\":4}}]}");
>     std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream);
>     decoder->init(*input);
>     avro::decode(*decoder, outer);
> }
> {code}
> {code:title=Additional array of writer-only record}
> {
>     std::string readerString("{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}");
>     std::string writerString("{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"innerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}}]}}}]}");
>     std::stringstream readerStream(readerString);
>     std::stringstream writerStream(writerString);
>     
>     avro::ValidSchema readerSchema;
>     avro::ValidSchema writerSchema;
>     
>     avro::compileJsonSchema(readerStream, readerSchema);
>     avro::compileJsonSchema(writerStream, writerSchema);
>     
>     avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema));
>     struct Outer outer;
>     
>     std::stringstream jsonStream("{\"extraArray\":[{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"innerArray\":[{\"number\":1},{\"number\":2},{\"number\":3}]},{\"first\":{\"field\":\"second item in array\"},\"second\":{\"field\":\"inner2 field of 2\"},\"innerArray\":[{\"number\":4},{\"number\":5}]},{\"first\":{\"field\":\"third item in array\"},\"second\":{\"field\":\"inner2 field of 3\"},\"innerArray\":[{\"number\":6}]}]}");
>     std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream);
>     decoder->init(*input);
>     avro::decode(*decoder, outer);
> }
> {code}
> {code:title=Multiple nesting of unknown records}
> {
>     std::string readerString("{\"type\":\"record\",\"name\":\"CombinationExtra\",\"fields\":[{\"name\":\"outerAsField\",\"type\":{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}}]}");
>     std::string writerString("{\"type\":\"record\",\"name\":\"CombinationExtra\",\"fields\":[{\"name\":\"outerAsField\",\"type\":{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"innerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}}]}}}]}}]}");
>     std::stringstream readerStream(readerString);
>     std::stringstream writerStream(writerString);
>     
>     avro::ValidSchema readerSchema;
>     avro::ValidSchema writerSchema;
>     
>     avro::compileJsonSchema(readerStream, readerSchema);
>     avro::compileJsonSchema(writerStream, writerSchema);
>     
>     avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema));
>     struct Outer outer;
>     
>     std::stringstream jsonStream("{\"outerAsField\":{\"extraArray\":[{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"innerArray\":[{\"number\":1},{\"number\":2},{\"number\":3}]}]}}");
>     std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream);
>     decoder->init(*input);
>     avro::decode(*decoder, outer);
> }
> {code}
> The following will generate a proper object according to the reader schema and completely ignores the extraneous characters at the end of the stream.
> {code:title=Garbage after appended field of new record}
> {
>     std::string readerString("{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}");
>     std::string writerString("{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"third\",\"type\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}]}");
>     std::stringstream readerStream(readerString);
>     std::stringstream writerStream(writerString);
>     
>     avro::ValidSchema readerSchema;
>     avro::ValidSchema writerSchema;
>     
>     avro::compileJsonSchema(readerStream, readerSchema);
>     avro::compileJsonSchema(writerStream, writerSchema);
>     
>     avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema));
>     struct Outer outer;
>     
>     std::stringstream jsonStream("{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"third\":{\"number\":3} GARBAGE_HERE}");
>     std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream);
>     decoder->init(*input);
>     avro::decode(*decoder, outer);
> }
> {code}

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira