You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by fo...@apache.org on 2022/09/20 18:07:43 UTC
[iceberg] branch master updated: Python: Handle optional Avro fields in conversion. (#5796)
This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 53e056a277 Python: Handle optional Avro fields in conversion. (#5796)
53e056a277 is described below
commit 53e056a2771fd8aa6b88d6e21e0586c1b6a92ac8
Author: Joshua Robinson <33...@users.noreply.github.com>
AuthorDate: Tue Sep 20 20:07:36 2022 +0200
Python: Handle optional Avro fields in conversion. (#5796)
Found by processing fields in manifestentry with empty split_offsets field.
For pos_to_dict, check if values is None before processing as list,
struct, or dict. Added unit tests to verify.
Thanks to @fokko for the fix.
---
python/pyiceberg/manifest.py | 20 +++++++++++------
python/tests/avro/test_reader.py | 46 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 60 insertions(+), 6 deletions(-)
diff --git a/python/pyiceberg/manifest.py b/python/pyiceberg/manifest.py
index 751dfbef2a..8619d18abf 100644
--- a/python/pyiceberg/manifest.py
+++ b/python/pyiceberg/manifest.py
@@ -173,22 +173,30 @@ def _(schema: Schema, struct: AvroStruct) -> Dict[str, Any]:
@_convert_pos_to_dict.register
def _(struct_type: StructType, values: AvroStruct) -> Dict[str, Any]:
"""Iterates over all the fields in the dict, and gets the data from the struct"""
- return {field.name: _convert_pos_to_dict(field.field_type, values.get(pos)) for pos, field in enumerate(struct_type.fields)}
+ return (
+ {field.name: _convert_pos_to_dict(field.field_type, values.get(pos)) for pos, field in enumerate(struct_type.fields)}
+ if values is not None
+ else None
+ )
@_convert_pos_to_dict.register
def _(list_type: ListType, values: List[Any]) -> Any:
"""In the case of a list, we'll go over the elements in the list to handle complex types"""
- return [_convert_pos_to_dict(list_type.element_type, value) for value in values]
+ return [_convert_pos_to_dict(list_type.element_type, value) for value in values] if values is not None else None
@_convert_pos_to_dict.register
def _(map_type: MapType, values: Dict) -> Dict:
"""In the case of a map, we both traverse over the key and value to handle complex types"""
- return {
- _convert_pos_to_dict(map_type.key_type, key): _convert_pos_to_dict(map_type.value_type, value)
- for key, value in values.items()
- }
+ return (
+ {
+ _convert_pos_to_dict(map_type.key_type, key): _convert_pos_to_dict(map_type.value_type, value)
+ for key, value in values.items()
+ }
+ if values is not None
+ else None
+ )
@_convert_pos_to_dict.register
diff --git a/python/tests/avro/test_reader.py b/python/tests/avro/test_reader.py
index 44505a6174..77e186b4f7 100644
--- a/python/tests/avro/test_reader.py
+++ b/python/tests/avro/test_reader.py
@@ -36,6 +36,7 @@ from pyiceberg.avro.reader import (
TimestamptzReader,
primitive_reader,
)
+from pyiceberg.manifest import _convert_pos_to_dict
from pyiceberg.schema import Schema
from pyiceberg.types import (
BinaryType,
@@ -46,9 +47,13 @@ from pyiceberg.types import (
FixedType,
FloatType,
IntegerType,
+ ListType,
LongType,
+ MapType,
+ NestedField,
PrimitiveType,
StringType,
+ StructType,
TimestampType,
TimestamptzType,
TimeType,
@@ -395,6 +400,47 @@ def test_read_manifest_file_file(generated_manifest_file_file: str):
assert actual == expected
+def test_null_list_convert_pos_to_dict():
+ data = _convert_pos_to_dict(
+ Schema(
+ NestedField(name="field", field_id=1, field_type=ListType(element_id=2, element=StringType(), element_required=False))
+ ),
+ AvroStruct([None]),
+ )
+ assert data["field"] is None
+
+
+def test_null_dict_convert_pos_to_dict():
+ data = _convert_pos_to_dict(
+ Schema(
+ NestedField(
+ name="field",
+ field_id=1,
+ field_type=MapType(key_id=2, key_type=StringType(), value_id=3, value_type=StringType(), value_required=False),
+ )
+ ),
+ AvroStruct([None]),
+ )
+ assert data["field"] is None
+
+
+def test_null_struct_convert_pos_to_dict():
+ data = _convert_pos_to_dict(
+ Schema(
+ NestedField(
+ name="field",
+ field_id=1,
+ field_type=StructType(
+ NestedField(2, "required_field", StringType(), True), NestedField(3, "optional_field", IntegerType())
+ ),
+ required=False,
+ )
+ ),
+ AvroStruct([None]),
+ )
+ assert data["field"] is None
+
+
def test_fixed_reader():
assert primitive_reader(FixedType(22)) == FixedReader(22)