You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@avro.apache.org by "Thiruvalluvan M. G. (JIRA)" <ji...@apache.org> on 2019/01/03 02:13:00 UTC
[jira] [Updated] (AVRO-1176) ResolvingDecoder fails to resolve or
parse schemas
[ https://issues.apache.org/jira/browse/AVRO-1176?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Thiruvalluvan M. G. updated AVRO-1176:
--------------------------------------
Resolution: Fixed
Status: Resolved (was: Patch Available)
Merged Pull Request
> ResolvingDecoder fails to resolve or parse schemas
> --------------------------------------------------
>
> Key: AVRO-1176
> URL: https://issues.apache.org/jira/browse/AVRO-1176
> Project: Apache Avro
> Issue Type: Bug
> Components: c++
> Affects Versions: 1.7.0
> Reporter: Keh-Li Sheng
> Assignee: Thiruvalluvan M. G.
> Priority: Major
> Labels: patch
> Fix For: 1.9.0
>
> Attachments: AVRO-1176.patch
>
>
> We have encountered a number of problems using ResolvingDecoder in the C++ project that we can trace to
> 1. Incorrectly swapped reader/writer arguments passed to ResolvingGrammarGenerator::generate()
> 2. Using the wrong tree in ResolvingGrammarGenerator::generate() to generate the backup parsing stack
> 3. A decoder has no "hook" into the generated codec_traits decode methods for Specific that advances the resolved parse tree through the Symbol::sSkipStart nodes to ignore extra or unknown fields in the writer's data.
> 4. A resolving decoder can generate a valid decoded object even if there are garbage characters at the end of the input stream if those characters appear in a field that the reader schema is unaware of
> Reader/Writer schemas that fail to parse properly below. First example is the writer adding a field to a record that is inside an array
> {code:title=Added field to record inside array}
> {
> std::string readerString("{\"type\":\"record\",\"name\":\"Outer\",\"fields\":[{\"name\":\"outerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}");
> std::string writerString("{\"type\":\"record\",\"name\":\"Outer\",\"fields\":[{\"name\":\"outerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"third\",\"type\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}]}}}]}");
> std::stringstream readerStream(readerString);
> std::stringstream writerStream(writerString);
>
> avro::ValidSchema readerSchema;
> avro::ValidSchema writerSchema;
>
> avro::compileJsonSchema(readerStream, readerSchema);
> avro::compileJsonSchema(writerStream, writerSchema);
>
> avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema));
> struct Outer outer;
>
> std::stringstream jsonStream("{\"outerArray\":[{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"third\":{\"number\":3}},{\"first\":{\"field\":\"cool\"},\"second\":{\"field\":\"beans\"},\"third\":{\"number\":4}}]}");
> std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream);
> decoder->init(*input);
> avro::decode(*decoder, outer);
> }
> {code}
> {code:title=Additional array of writer-only record}
> {
> std::string readerString("{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}");
> std::string writerString("{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"innerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}}]}}}]}");
> std::stringstream readerStream(readerString);
> std::stringstream writerStream(writerString);
>
> avro::ValidSchema readerSchema;
> avro::ValidSchema writerSchema;
>
> avro::compileJsonSchema(readerStream, readerSchema);
> avro::compileJsonSchema(writerStream, writerSchema);
>
> avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema));
> struct Outer outer;
>
> std::stringstream jsonStream("{\"extraArray\":[{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"innerArray\":[{\"number\":1},{\"number\":2},{\"number\":3}]},{\"first\":{\"field\":\"second item in array\"},\"second\":{\"field\":\"inner2 field of 2\"},\"innerArray\":[{\"number\":4},{\"number\":5}]},{\"first\":{\"field\":\"third item in array\"},\"second\":{\"field\":\"inner2 field of 3\"},\"innerArray\":[{\"number\":6}]}]}");
> std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream);
> decoder->init(*input);
> avro::decode(*decoder, outer);
> }
> {code}
> {code:title=Multiple nesting of unknown records}
> {
> std::string readerString("{\"type\":\"record\",\"name\":\"CombinationExtra\",\"fields\":[{\"name\":\"outerAsField\",\"type\":{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}}}]}}]}");
> std::string writerString("{\"type\":\"record\",\"name\":\"CombinationExtra\",\"fields\":[{\"name\":\"outerAsField\",\"type\":{\"type\":\"record\",\"name\":\"OuterExtra\",\"fields\":[{\"name\":\"extraArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"InArrayExtraArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"innerArray\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}}]}}}]}}]}");
> std::stringstream readerStream(readerString);
> std::stringstream writerStream(writerString);
>
> avro::ValidSchema readerSchema;
> avro::ValidSchema writerSchema;
>
> avro::compileJsonSchema(readerStream, readerSchema);
> avro::compileJsonSchema(writerStream, writerSchema);
>
> avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema));
> struct Outer outer;
>
> std::stringstream jsonStream("{\"outerAsField\":{\"extraArray\":[{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"innerArray\":[{\"number\":1},{\"number\":2},{\"number\":3}]}]}}");
> std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream);
> decoder->init(*input);
> avro::decode(*decoder, outer);
> }
> {code}
> The following will generate a proper object according to the reader schema and completely ignores the extraneous characters at the end of the stream.
> {code:title=Garbage after appended field of new record}
> {
> std::string readerString("{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}}]}");
> std::string writerString("{\"type\":\"record\",\"name\":\"InArray\",\"fields\":[{\"name\":\"first\",\"type\":{\"type\":\"record\",\"name\":\"Inner1\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"second\",\"type\":{\"type\":\"record\",\"name\":\"Inner2\",\"fields\":[{\"name\":\"field\",\"type\":\"string\"}]}},{\"name\":\"third\",\"type\":{\"type\":\"record\",\"name\":\"Inner3\",\"fields\":[{\"name\":\"number\",\"type\":\"int\"}]}}]}");
> std::stringstream readerStream(readerString);
> std::stringstream writerStream(writerString);
>
> avro::ValidSchema readerSchema;
> avro::ValidSchema writerSchema;
>
> avro::compileJsonSchema(readerStream, readerSchema);
> avro::compileJsonSchema(writerStream, writerSchema);
>
> avro::DecoderPtr decoder = avro::resolvingDecoder(writerSchema, readerSchema, avro::jsonDecoder(writerSchema));
> struct Outer outer;
>
> std::stringstream jsonStream("{\"first\":{\"field\":\"here is a string field\"},\"second\":{\"field\":\"here is another string field\"},\"third\":{\"number\":3} GARBAGE_HERE}");
> std::auto_ptr<avro::InputStream> input = avro::istreamInputStream(jsonStream);
> decoder->init(*input);
> avro::decode(*decoder, outer);
> }
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)