You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@beam.apache.org by "Jeff Webb (Jira)" <ji...@apache.org> on 2021/09/14 22:51:00 UTC

[jira] [Updated] (BEAM-9386) _ReadRange in filebasedsource.py fails with IndexError

     [ https://issues.apache.org/jira/browse/BEAM-9386?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Jeff Webb updated BEAM-9386:
----------------------------
    Resolution: Won't Fix
        Status: Resolved  (was: Triage Needed)

old issue - resolving

 

> _ReadRange in filebasedsource.py fails with IndexError 
> -------------------------------------------------------
>
>                 Key: BEAM-9386
>                 URL: https://issues.apache.org/jira/browse/BEAM-9386
>             Project: Beam
>          Issue Type: Bug
>          Components: io-py-files, io-py-gcp, runner-dataflow, sdk-py-core
>    Affects Versions: 2.17.0
>            Reporter: Fabian Rosenthal
>            Priority: P3
>
> We are using Apache Beam in version 2.17.0 (Python SDK using Python 3.7) with the Dataflow runner running on the Google Cloud Platform.
> We are getting frequently a index out of range exception in the filebasedsource.py, i.e. in that line: [https://github.com/apache/beam/blob/release-2.17.0/sdks/python/apache_beam/io/filebasedsource.py#L370]
> The whole stack trace:
> {code:java}
> Traceback (most recent call last):
>   File "/usr/local/lib/python3.7/site-packages/dataflow_worker/batchworker.py", line 650, in do_work
>     work_executor.execute()
>   File "/usr/local/lib/python3.7/site-packages/dataflow_worker/executor.py", line 176, in execute
>     op.start()
>   File "dataflow_worker/shuffle_operations.py", line 50, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
>   File "dataflow_worker/shuffle_operations.py", line 51, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
>   File "dataflow_worker/shuffle_operations.py", line 66, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
>   File "dataflow_worker/shuffle_operations.py", line 67, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
>   File "dataflow_worker/shuffle_operations.py", line 71, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start
>   File "apache_beam/runners/worker/operations.py", line 256, in apache_beam.runners.worker.operations.Operation.output
>   File "apache_beam/runners/worker/operations.py", line 143, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
>   File "dataflow_worker/shuffle_operations.py", line 234, in dataflow_worker.shuffle_operations.BatchGroupAlsoByWindowsOperation.process
>   File "dataflow_worker/shuffle_operations.py", line 241, in dataflow_worker.shuffle_operations.BatchGroupAlsoByWindowsOperation.process
>   File "apache_beam/runners/worker/operations.py", line 256, in apache_beam.runners.worker.operations.Operation.output
>   File "apache_beam/runners/worker/operations.py", line 143, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
>   File "apache_beam/runners/worker/operations.py", line 593, in apache_beam.runners.worker.operations.DoOperation.process
>   File "apache_beam/runners/worker/operations.py", line 594, in apache_beam.runners.worker.operations.DoOperation.process
>   File "apache_beam/runners/common.py", line 776, in apache_beam.runners.common.DoFnRunner.receive
>   File "apache_beam/runners/common.py", line 782, in apache_beam.runners.common.DoFnRunner.process
>   File "apache_beam/runners/common.py", line 834, in apache_beam.runners.common.DoFnRunner._reraise_augmented
>   File "apache_beam/runners/common.py", line 780, in apache_beam.runners.common.DoFnRunner.process
>   File "apache_beam/runners/common.py", line 440, in apache_beam.runners.common.SimpleInvoker.invoke_process
>   File "apache_beam/runners/common.py", line 919, in apache_beam.runners.common._OutputProcessor.process_outputs
>   File "apache_beam/runners/worker/operations.py", line 143, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
>   File "apache_beam/runners/worker/operations.py", line 593, in apache_beam.runners.worker.operations.DoOperation.process
>   File "apache_beam/runners/worker/operations.py", line 594, in apache_beam.runners.worker.operations.DoOperation.process
>   File "apache_beam/runners/common.py", line 776, in apache_beam.runners.common.DoFnRunner.receive
>   File "apache_beam/runners/common.py", line 782, in apache_beam.runners.common.DoFnRunner.process
>   File "apache_beam/runners/common.py", line 834, in apache_beam.runners.common.DoFnRunner._reraise_augmented
>   File "apache_beam/runners/common.py", line 780, in apache_beam.runners.common.DoFnRunner.process
>   File "apache_beam/runners/common.py", line 440, in apache_beam.runners.common.SimpleInvoker.invoke_process
>   File "apache_beam/runners/common.py", line 919, in apache_beam.runners.common._OutputProcessor.process_outputs
>   File "apache_beam/runners/worker/operations.py", line 143, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
>   File "apache_beam/runners/worker/operations.py", line 593, in apache_beam.runners.worker.operations.DoOperation.process
>   File "apache_beam/runners/worker/operations.py", line 594, in apache_beam.runners.worker.operations.DoOperation.process
>   File "apache_beam/runners/common.py", line 776, in apache_beam.runners.common.DoFnRunner.receive
>   File "apache_beam/runners/common.py", line 782, in apache_beam.runners.common.DoFnRunner.process
>   File "apache_beam/runners/common.py", line 849, in apache_beam.runners.common.DoFnRunner._reraise_augmented
>   File "/usr/local/lib/python3.7/site-packages/future/utils/__init__.py", line 421, in raise_with_traceback
>     raise exc.with_traceback(traceback)
>   File "apache_beam/runners/common.py", line 780, in apache_beam.runners.common.DoFnRunner.process
>   File "apache_beam/runners/common.py", line 440, in apache_beam.runners.common.SimpleInvoker.invoke_process
>   File "apache_beam/runners/common.py", line 895, in apache_beam.runners.common._OutputProcessor.process_outputs
>   File "/usr/local/lib/python3.7/site-packages/apache_beam/io/filebasedsource.py", line 370, in process
>     source = list(source.split(float('inf')))[0].source
> IndexError: list index out of range [while running 'example/new_data/read/ReadAllFiles/ReadRange']
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)