You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@avro.apache.org by "Jerome Delrieu (Jira)" <ji...@apache.org> on 2023/07/21 13:34:00 UTC

[jira] [Created] (AVRO-3810) Incorrect JSON generated by avro::jsonEncoder from avro::GenericDatum when schema contains as last value a record with 0 field

Jerome Delrieu created AVRO-3810:
------------------------------------

             Summary: Incorrect JSON generated by avro::jsonEncoder from avro::GenericDatum when schema contains as last value a record with 0 field
                 Key: AVRO-3810
                 URL: https://issues.apache.org/jira/browse/AVRO-3810
             Project: Apache Avro
          Issue Type: Bug
          Components: c++
         Environment: Windows, 64bit, compiled with msvc
            Reporter: Jerome Delrieu


When the Schema has a record with 0 field as its last value, the avro::jsonEncoder doesn't generate the last characters in the JSON string.

The obtained JSON string is malformed and cannot be parsed with a standard JSON parser.

Note : the avro::{color:#74531f}jsonDecoder {color}accept this string whereas it is not a valid JSON syntax

small C++ example:

{color:#0000FF}const{color} {color:#0000FF}char{color} *{color:#1f377f}sSchema{color} = {color:#e21f1f}"{color}{color:#a31515}{{color}{color:#b776fb}\"{color}{color:#a31515}type{color}{color:#b776fb}\"{color}{color:#a31515}: {color}{color:#b776fb}\"{color}{color:#a31515}record{color}{color:#b776fb}\"{color}{color:#a31515},{color}{color:#b776fb}\"{color}{color:#a31515}name{color}{color:#b776fb}\"{color}{color:#a31515} : {color}{color:#b776fb}\"{color}{color:#a31515}Event{color}{color:#b776fb}\"{color}{color:#a31515},{color}{color:#b776fb}\"{color}{color:#a31515}namespace{color}{color:#b776fb}\"{color}{color:#a31515} : {color}{color:#b776fb}\"{color}{color:#a31515}test{color}{color:#b776fb}\"{color}{color:#a31515},{color}{color:#b776fb}\"{color}{color:#a31515}doc{color}{color:#b776fb}\"{color}{color:#a31515} : {color}{color:#b776fb}\"\"{color}{color:#a31515}, {color}{color:#b776fb}\"{color}{color:#a31515}fields{color}{color:#b776fb}\"{color}{color:#a31515} : {color}{color:#e21f1f}"{color}   {color:#e21f1f}"{color}{color:#a31515}[ { {color}{color:#b776fb}\"{color}{color:#a31515}name{color}{color:#b776fb}\"{color}{color:#a31515}: {color}{color:#b776fb}\"{color}{color:#a31515}Event{color}{color:#b776fb}\"{color}{color:#a31515}, {color}{color:#b776fb}\"{color}{color:#a31515}type{color}{color:#b776fb}\"{color}{color:#a31515} : [ {  {color}{color:#b776fb}\"{color}{color:#a31515}type{color}{color:#b776fb}\"{color}{color:#a31515}: {color}{color:#b776fb}\"{color}{color:#a31515}record{color}{color:#b776fb}\"{color}{color:#a31515},  {color}{color:#b776fb}\"{color}{color:#a31515}name{color}{color:#b776fb}\"{color}{color:#a31515} : {color}{color:#b776fb}\"{color}{color:#a31515}ABEvent{color}{color:#b776fb}\"{color}{color:#a31515}, {color}{color:#b776fb}\"{color}{color:#a31515}doc{color}{color:#b776fb}\"{color}{color:#a31515} : {color}{color:#b776fb}\"{color}{color:#a31515}unused{color}{color:#b776fb}\"{color}{color:#a31515},{color}{color:#e21f1f}"{color}   {color:#e21f1f}"{color}{color:#b776fb}\"{color}{color:#a31515}fields{color}{color:#b776fb}\"{color}{color:#a31515} : [ ] } ] } ] }{color}{color:#e21f1f}"{color};

avro::{color:#2b91af}ValidSchema{color} {color:#1f377f}schema{color}(avro::{color:#74531f}compileJsonSchemaFromString{color}({color:#1f377f}sSchema{color})); avro::{color:#2b91af}GenericDatum{color} {color:#1f377f}datumDecode{color}({color:#1f377f}schema{color}); avro::{color:#2b91af}DecoderPtr{color} {color:#1f377f}decoder{color} = avro::{color:#74531f}binaryDecoder{color}();

{color:#008000}// only one byte, 0, as source for decoding{color} {color:#2b91af}uint8_t{color} {color:#1f377f}binarySource{color} = 0; std::{color:#2b91af}unique_ptr{color}<avro::{color:#2b91af}InputStream{color}> {color:#1f377f}in{color} = avro::{color:#74531f}memoryInputStream{color}(({color:#2b91af}uint8_t{color} *)&{color:#1f377f}binarySource{color}, 1); {color:#1f377f}decoder{color}{color:#008080}->{color}{color:#74531f}init{color}({color:#008080}*{color}{color:#1f377f}in{color}); avro::{color:#74531f}decode{color}({color:#008080}*{color}{color:#1f377f}decoder{color}, {color:#1f377f}datumDecode{color});

std::{color:#2b91af}unique_ptr{color}<avro::{color:#2b91af}OutputStream{color}> {color:#1f377f}out{color} = avro::{color:#74531f}memoryOutputStream{color}();

std::{color:#2b91af}shared_ptr{color}<avro::{color:#2b91af}Encoder{color}> {color:#1f377f}encoder{color} = avro::{color:#74531f}jsonEncoder{color}({color:#1f377f}schema{color});

{color:#1f377f}encoder{color}{color:#008080}->{color}{color:#74531f}init{color}({color:#008080}*{color}{color:#1f377f}out{color}); avro::{color:#74531f}encode{color}({color:#008080}*{color}{color:#1f377f}encoder{color}, {color:#1f377f}datumDecode{color}); {color:#1f377f}encoder{color}{color:#008080}->{color}{color:#74531f}flush{color}();

{color:#0000FF}const{color} std::{color:#2b91af}shared_ptr{color}<std::{color:#2b91af}vector{color}<{color:#2b91af}uint8_t{color}> > &{color:#1f377f}buffer{color} = avro::{color:#74531f}snapshot{color}(*{color:#1f377f}out{color}.{color:#74531f}get{color}());

std::{color:#2b91af}string{color} {color:#1f377f}result{color}({color:#0000FF}reinterpret_cast{color}<{color:#0000FF}char{color} *>({color:#1f377f}buffer{color}.{color:#74531f}get{color}()->{color:#74531f}data{color}()), {color:#1f377f}buffer{color}.{color:#74531f}get{color}()->{color:#74531f}size{color}());

 

Example Schema:

Schema:
{
  "type": "record",
  "name": "Event",
  "namespace": "test",
  "doc": "",
  "fields": [
    {
      "name": "Event",
      "type": [
        {
          "type": "record",
          "name": "ABEvent",
          "doc": "unused",
          "fields": [
            
          ]
        }
      ]
    }
  ]
}

 

Binary source : 1 byte 00

Result:

{"Event":{"test.ABEvent":

 

 

A small patch to lang\c++\impl\parsing\Symbol.hh can solve this issue
Before:
    void processImplicitActions() {
        for (;;) {
            Symbol &s = parsingStack.top();
            if (s.isImplicitAction()) {
                handler_.handle(s);
                parsingStack.pop();
            } else if (s.kind() == Symbol::Kind::SkipStart) {
                parsingStack.pop();
                skip(*decoder_);
            } else {
                break;
            }
        }
    }

After:

    void processImplicitActions() {
        for (;;) {
            Symbol &s = parsingStack.top();
            if (s.isImplicitAction()) {
                handler_.handle(s);
                parsingStack.pop();
            } else if (s.kind() == Symbol::Kind::SkipStart) {
                parsingStack.pop();
                skip(*decoder_);
            } else if (s.kind() == Symbol::Kind::Indirect) {
                parsingStack.pop();
                parsingStack.push(Symbol::recordEndSymbol());
                parsingStack.push(Symbol::recordStartSymbol());
            } else {
                break;
            }
        }
    }

 

 

 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)