You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by rs...@apache.org on 2022/02/11 10:21:53 UTC

[avro] branch branch-1.11 updated: AVRO-3380: Raise an exception if invalid number of bytes read (#1529)

This is an automated email from the ASF dual-hosted git repository.

rskraba pushed a commit to branch branch-1.11
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/branch-1.11 by this push:
     new a9de24e  AVRO-3380: Raise an exception if invalid number of bytes read (#1529)
a9de24e is described below

commit a9de24e44874642c114593ca0e4122954505dd2b
Author: Jarkko Jaakola <91...@users.noreply.github.com>
AuthorDate: Fri Feb 11 12:20:08 2022 +0200

    AVRO-3380: Raise an exception if invalid number of bytes read (#1529)
    
    * AVRO-3380: Raise an exception if invalid number of bytes read
    
    The Python Avro 1.10.0 Python 3 compatibility layer had assertion
    for read bytes to match the expected number of bytes.
    In cases where schema is incompatible between reader and writer,
    or something unexpected happens, the bytes read is less than
    expected and BytesIO position can be at the end of the buffer.
    This causes for example a TypeError if field expected is int/long
    as ord() is called with zero data.
    
    * AVRO-3380: Rename InvalidBytesRead error to InvalidAvroBinaryEncoding
    
    * AVRO-3380: Raise InvalidAvroByteEncoding if < 0 bytes requested for read
    
    * AVRO-3380: Remove InvalidAvroByteEncoding catch in read_data
---
 lang/py/avro/errors.py       |  4 ++++
 lang/py/avro/io.py           |  7 +++++-
 lang/py/avro/test/test_io.py | 53 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/lang/py/avro/errors.py b/lang/py/avro/errors.py
index b8828d0..2c76751 100644
--- a/lang/py/avro/errors.py
+++ b/lang/py/avro/errors.py
@@ -32,6 +32,10 @@ class AvroException(Exception):
     """The base class for exceptions in avro."""
 
 
+class InvalidAvroBinaryEncoding(AvroException):
+    """For invalid numbers of bytes read."""
+
+
 class SchemaParseException(AvroException):
     """Raised when a schema failed to parse."""
 
diff --git a/lang/py/avro/io.py b/lang/py/avro/io.py
index cd6a7ae..be81d9b 100644
--- a/lang/py/avro/io.py
+++ b/lang/py/avro/io.py
@@ -222,7 +222,12 @@ class BinaryDecoder:
         """
         Read n bytes.
         """
-        return self.reader.read(n)
+        if n < 0:
+            raise avro.errors.InvalidAvroBinaryEncoding(f"Requested {n} bytes to read, expected positive integer.")
+        read_bytes = self.reader.read(n)
+        if len(read_bytes) != n:
+            raise avro.errors.InvalidAvroBinaryEncoding(f"Read {len(read_bytes)} bytes, expected {n} bytes")
+        return read_bytes
 
     def read_null(self) -> None:
         """
diff --git a/lang/py/avro/test/test_io.py b/lang/py/avro/test/test_io.py
index a07d122..99f9aa5 100644
--- a/lang/py/avro/test/test_io.py
+++ b/lang/py/avro/test/test_io.py
@@ -426,6 +426,58 @@ class DefaultValueTestCase(unittest.TestCase):
             self.assertEqual(datum_to_read, datum_read)
 
 
+class TestIncompatibleSchemaReading(unittest.TestCase):
+    def test_deserialization_fails(self) -> None:
+
+        reader_schema = avro.schema.parse(
+            json.dumps(
+                {
+                    "namespace": "example.avro",
+                    "type": "record",
+                    "name": "User",
+                    "fields": [
+                        {"name": "name", "type": "string"},
+                        {"name": "age", "type": "int"},
+                        {"name": "location", "type": "string"},
+                    ],
+                }
+            )
+        )
+        writer_schema = avro.schema.parse(
+            json.dumps(
+                {
+                    "namespace": "example.avro",
+                    "type": "record",
+                    "name": "IncompatibleUser",
+                    "fields": [
+                        {"name": "name", "type": "int"},
+                        {"name": "age", "type": "int"},
+                        {"name": "location", "type": "string"},
+                    ],
+                }
+            )
+        )
+
+        incompatibleUserRecord = {"name": 100, "age": 21, "location": "Woodford"}
+        writer = avro.io.DatumWriter(writer_schema)
+        with io.BytesIO() as writer_bio:
+            enc = avro.io.BinaryEncoder(writer_bio)
+            writer.write(incompatibleUserRecord, enc)
+            enc_bytes = writer_bio.getvalue()
+        reader = avro.io.DatumReader(reader_schema)
+        with io.BytesIO(enc_bytes) as reader_bio:
+            self.assertRaises(avro.errors.InvalidAvroBinaryEncoding, reader.read, avro.io.BinaryDecoder(reader_bio))
+
+        incompatibleUserRecord = {"name": -10, "age": 21, "location": "Woodford"}
+        with io.BytesIO() as writer_bio:
+            enc = avro.io.BinaryEncoder(writer_bio)
+            writer.write(incompatibleUserRecord, enc)
+            enc_bytes = writer_bio.getvalue()
+        reader = avro.io.DatumReader(reader_schema)
+        with io.BytesIO(enc_bytes) as reader_bio:
+            self.assertRaises(avro.errors.InvalidAvroBinaryEncoding, reader.read, avro.io.BinaryDecoder(reader_bio))
+
+
 class TestMisc(unittest.TestCase):
     def test_decimal_bytes_small_scale(self) -> None:
         """Avro should raise an AvroTypeException when attempting to write a decimal with a larger exponent than the schema's scale."""
@@ -585,6 +637,7 @@ def load_tests(loader: unittest.TestLoader, default_tests: None, pattern: None)
         SchemaPromotionTestCase(write_type, read_type) for write_type, read_type in itertools.combinations(("int", "long", "float", "double"), 2)
     )
     suite.addTests(DefaultValueTestCase(field_type, default) for field_type, default in DEFAULT_VALUE_EXAMPLES)
+    suite.addTests(loader.loadTestsFromTestCase(TestIncompatibleSchemaReading))
     return suite