You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@beam.apache.org by "ASF GitHub Bot (JIRA)" <ji...@apache.org> on 2019/04/16 05:57:00 UTC

[jira] [Work logged] (BEAM-6522) Avro RecordSchema class is not picklable

     [ https://issues.apache.org/jira/browse/BEAM-6522?focusedWorklogId=228147&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-228147 ]

ASF GitHub Bot logged work on BEAM-6522:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 16/Apr/19 05:56
            Start Date: 16/Apr/19 05:56
    Worklog Time Spent: 10m 
      Work Description: tvalentyn commented on pull request #8130: [BEAM-6522] Fix fastavro on Python 3
URL: https://github.com/apache/beam/pull/8130#discussion_r275099438
 
 

 ##########
 File path: sdks/python/apache_beam/io/avroio.py
 ##########
 @@ -65,20 +64,28 @@
 
 # pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports
 try:
-  from avro.schema import Parse # avro-python3 library for python3
+  from avro.schema import parse # avro library for python2
+  import avro
+  from avro import io as avroio
+  from avro import datafile
 except ImportError:
-  from avro.schema import parse as Parse # avro library for python2
+  pass
 # pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports
 
+
 __all__ = ['ReadFromAvro', 'ReadAllFromAvro', 'WriteToAvro']
+if sys.version_info[0] >= 3:
+  fastavro_default = True
 
 Review comment:
   Let's introduce a function `_use_fastavro()`  instead of `fastavro_default`, and pass it as a default value, e.g. `..., `use_fastavro=_use_fastavro()`. Let's make sure to document the behavior of the default value in the docstrings of public API.
   
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 228147)
    Time Spent: 1h 40m  (was: 1.5h)

> Avro RecordSchema class is not picklable
> ----------------------------------------
>
>                 Key: BEAM-6522
>                 URL: https://issues.apache.org/jira/browse/BEAM-6522
>             Project: Beam
>          Issue Type: Sub-task
>          Components: sdk-py-core
>            Reporter: Robbe
>            Assignee: Robbe
>            Priority: Major
>              Labels: triaged
>          Time Spent: 1h 40m
>  Remaining Estimate: 0h
>
> The avroio module still has 4 failing tests. This is actually 2 times the same 2 tests, both for Avro and Fastavro.
> *apache_beam.io.avroio_test.TestAvro.test_sink_transform*
>  *apache_beam.io.avroio_test.TestFastAvro.test_sink_transform*
> fail with:
> {code:java}
> Traceback (most recent call last):
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/io/avroio_test.py", line 432, in test_sink_transform
> | avroio.WriteToAvro(path, self.SCHEMA, use_fastavro=self.use_fastavro)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/pvalue.py", line 112, in __or__
> return self.pipeline.apply(ptransform, self)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/pipeline.py", line 515, in apply
> pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/runners/runner.py", line 193, in apply
> return m(transform, input, options)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/runners/runner.py", line 199, in apply_PTransform
> return transform.expand(input)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/io/avroio.py", line 528, in expand
> return pcoll | beam.io.iobase.Write(self._sink)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/pvalue.py", line 112, in __or__
> return self.pipeline.apply(ptransform, self)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/pipeline.py", line 515, in apply
> pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/runners/runner.py", line 193, in apply
> return m(transform, input, options)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/runners/runner.py", line 199, in apply_PTransform
> return transform.expand(input)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/io/iobase.py", line 960, in expand
> return pcoll | WriteImpl(self.sink)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/pvalue.py", line 112, in __or__
> return self.pipeline.apply(ptransform, self)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/pipeline.py", line 515, in apply
> pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/runners/runner.py", line 193, in apply
> return m(transform, input, options)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/runners/runner.py", line 199, in apply_PTransform
> return transform.expand(input)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/io/iobase.py", line 979, in expand
> lambda _, sink: sink.initialize_write(), self.sink)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/transforms/core.py", line 1103, in Map
> pardo = FlatMap(wrapper, *args, **kwargs)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/transforms/core.py", line 1054, in FlatMap
> pardo = ParDo(CallableWrapperDoFn(fn), *args, **kwargs)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/transforms/core.py", line 864, in __init__
> super(ParDo, self).__init__(fn, *args, **kwargs)
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/transforms/ptransform.py", line 646, in __init__
> self.args = pickler.loads(pickler.dumps(self.args))
> File "/home/robbe/workspace/beam/sdks/python/apache_beam/internal/pickler.py", line 247, in loads
> return dill.loads(s)
> File "/home/robbe/workspace/beam/sdks/python/.eggs/dill-0.2.9-py3.5.egg/dill/_dill.py", line 317, in loads
> return load(file, ignore)
> File "/home/robbe/workspace/beam/sdks/python/.eggs/dill-0.2.9-py3.5.egg/dill/_dill.py", line 305, in load
> obj = pik.load()
> File "/home/robbe/workspace/beam/sdks/python/target/.tox/py3/lib/python3.5/site-packages/avro/schema.py", line 173, in __setitem__
> % (key, value, self))
> Exception: Attempting to map key 'favorite_color' to value <avro.schema.Field object at 0x7f8f72d0d0b8> in ImmutableDict {}
> {code}
>  
> *apache_beam.io.avroio_test.TestAvro.test_split_points*
> *apache_beam.io.avroio_test.TestFastAvro.test_split_points*
> fail with:
>  
> {code:java}
> Traceback (most recent call last):
>  File "/home/robbe/workspace/beam/sdks/python/apache_beam/io/avroio_test.py", line 308, in test_split_points
>  self.assertEquals(split_points_report[-10:], [(2, 1)] * 10)
> AssertionError: Lists differ: [(10, 1), (10, 1), (10, 1), (10, 1), (10, 1[42 chars], 1)] != [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2[32 chars], 1)]
> First differing element 0:
> (10, 1)
> (2, 1)
> + [(2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1), (2, 1)]
> - [(10, 1),
> - (10, 1),
> - (10, 1),
> - (10, 1),
> - (10, 1),
> - (10, 1),
> - (10, 1),
> - (10, 1),
> - (10, 1),
> - (10, 1)] 
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)