You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@avro.apache.org by "Kengo Seki (JIRA)" <ji...@apache.org> on 2019/07/07 14:34:00 UTC
[jira] [Created] (AVRO-2468) Fix broken data interoperability on
the Perl bindings
Kengo Seki created AVRO-2468:
--------------------------------
Summary: Fix broken data interoperability on the Perl bindings
Key: AVRO-2468
URL: https://issues.apache.org/jira/browse/AVRO-2468
Project: Apache Avro
Issue Type: Bug
Components: interop, perl
Reporter: Kengo Seki
Assignee: Kengo Seki
I found some data interop problems on the Perl bindings.
1. They fail to parse a schema if there's an array/map/union which contains named types with a simple (not fully-qualified) name in it. For example, they can't parse {{share/test/schemas/interop.avsc}} or {{share/schemas/org/apache/avro/data/Json.avsc}}, because they have a named type called "Node" or "Json" respectively in arrays/maps. This seems because the parser doesn't take namespace into consideration in parsing array/map/union.
{code}
$ cd lang/perl
$ perl -Ilib -de 1
(snip)
DB<1> open FH, '../../share/test/schemas/interop.avsc'; local $/ = undef; $s = <FH>; close FH; print $s
{"type": "record", "name":"Interop", "namespace": "org.apache.avro",
"fields": [
{"name": "intField", "type": "int"},
{"name": "longField", "type": "long"},
{"name": "stringField", "type": "string"},
{"name": "boolField", "type": "boolean"},
{"name": "floatField", "type": "float"},
{"name": "doubleField", "type": "double"},
{"name": "bytesField", "type": "bytes"},
{"name": "nullField", "type": "null"},
{"name": "arrayField", "type": {"type": "array", "items": "double"}},
{"name": "mapField", "type":
{"type": "map", "values":
{"type": "record", "name": "Foo",
"fields": [{"name": "label", "type": "string"}]}}},
{"name": "unionField", "type":
["boolean", "double", {"type": "array", "items": "bytes"}]},
{"name": "enumField", "type":
{"type": "enum", "name": "Kind", "symbols": ["A","B","C"]}},
{"name": "fixedField", "type":
{"type": "fixed", "name": "MD5", "size": 16}},
{"name": "recordField", "type":
{"type": "record", "name": "Node",
"fields": [
{"name": "label", "type": "string"},
{"name": "children", "type": {"type": "array", "items": "Node"}}]}}
]
}
DB<2> use Avro::Schema; Avro::Schema->parse($s)
Not a primitive type Node at lib/Avro/Schema.pm line 257.
{code}
2. They encode the size for a fixed type as a string rather than a number, so other language bindings fail to parse it.
{code}
$ cd lang/perl
$ perl -Ilib -de 1
(snip)
DB<1> use Avro::Schema; $s = Avro::Schema->parse('{"type": "fixed", "size": 16, "name": "md5"}')
DB<2> open($fh, '>/tmp/output')
DB<3> use Avro::DataFileWriter; $w = Avro::DataFileWriter->new(fh => $fh, writer_schema => $s)
DB<4> $w->print('0123456789abcdef')
DB<5> $w->close
{code}
{code}
$ ipython
(snip)
In [1]: from avro.datafile import DataFileReader
In [2]: from avro.io import DatumReader
In [3]: DataFileReader(datum_reader=DatumReader(), reader=open("/tmp/output"))
---------------------------------------------------------------------------
AvroException Traceback (most recent call last)
<ipython-input-3-13da25c7d572> in <module>()
----> 1 DataFileReader(datum_reader=DatumReader(), reader=open("/tmp/output"))
/home/sekikn/repo/avro/lang/py/src/avro/datafile.pyc in __init__(self, reader, datum_reader)
255 # get ready to read
256 self._block_count = 0
--> 257 self.datum_reader.writers_schema = schema.parse(self.get_meta(SCHEMA_KEY))
258
259 def __enter__(self):
/home/sekikn/repo/avro/lang/py/src/avro/schema.pyc in parse(json_string)
984
985 # construct the Avro Schema object
--> 986 return make_avsc_object(json_data, names)
/home/sekikn/repo/avro/lang/py/src/avro/schema.pyc in make_avsc_object(json_data, names)
931 scale = 0 if json_data.get('scale') is None else json_data.get('scale')
932 return FixedDecimalSchema(size, name, precision, scale, namespace, names, other_props)
--> 933 return FixedSchema(name, namespace, size, names, other_props)
934 elif type == 'enum':
935 symbols = json_data.get('symbols')
/home/sekikn/repo/avro/lang/py/src/avro/schema.pyc in __init__(self, name, namespace, size, names, other_props)
482 if not isinstance(size, int) or size < 0:
483 fail_msg = 'Fixed Schema requires a valid positive integer for size property.'
--> 484 raise AvroException(fail_msg)
485
486 # Call parent ctor
AvroException: Fixed Schema requires a valid positive integer for size property.
{code}
{code}
$ strings /tmp/output
avro.schemaR{"size":"16","type":"fixed","name":"md5"}
(snip)
{code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)