You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2019/10/02 19:34:19 UTC

[GitHub] [incubator-iceberg] danielcweeks commented on a change in pull request #407: [python] Parquet read path

danielcweeks commented on a change in pull request #407: [python] Parquet read path
URL: https://github.com/apache/incubator-iceberg/pull/407#discussion_r330652615
 
 

 ##########
 File path: python/iceberg/core/avro/avro_to_iceberg.py
 ##########
 @@ -228,37 +232,41 @@ def is_option_schema(field_type):
         return False
 
     @staticmethod
-    def read_avro_row(iceberg_schema, avro_reader):
-        try:
-            avro_row = avro_reader.__next__()
+    def read_avro_file(iceberg_schema, data_file):
+        fo = data_file.new_fo()
+        avro_reader = fastavro.reader(fo)
+        for avro_row in avro_reader:
             iceberg_row = dict()
             for field in iceberg_schema.as_struct().fields:
                 iceberg_row[field.name] = AvroToIceberg.get_field_from_avro(avro_row, field)
             yield iceberg_row
+        fo.close()
+
+    @staticmethod
+    def read_avro_row(iceberg_schema, avro_reader):
+        try:
+            for avro_row in avro_reader:
+                iceberg_row = dict()
+                for field in iceberg_schema.as_struct().fields:
+                    iceberg_row[field.name] = AvroToIceberg.get_field_from_avro(avro_row, field)
+                yield iceberg_row
         except StopIteration:
             return
 
     @staticmethod
     def get_field_from_avro(avro_row, field):
-        process_funcs = {TypeID.STRUCT: lambda avro_row, field: AvroToIceberg.get_field_from_struct(avro_row, field),
-                         TypeID.LIST: lambda avro_row, field: AvroToIceberg.get_field_from_list(avro_row, field),
-                         TypeID.MAP: lambda avro_row, field: AvroToIceberg.get_field_from_map(avro_row, field)}
-        if field.type.is_primitive_type():
-            processing_func = AvroToIceberg.get_field_from_primitive
-        else:
-            processing_func = process_funcs.get(field.type.type_id)
-
-        if processing_func is None:
+        try:
 
 Review comment:
   I'm not clear on how we are dealing with values that are not in the avro record here.  In the case that an optional field is not in the avro record, we should emit "null", but it appears that we always fail.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org