You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@beam.apache.org by "Brian Hulette (Jira)" <ji...@apache.org> on 2022/04/20 16:39:00 UTC
[jira] [Work started] (BEAM-14336) Access Denied: Table bigquery-samples:airline_ontime_data.flights
[ https://issues.apache.org/jira/browse/BEAM-14336?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Work on BEAM-14336 started by Brian Hulette.
--------------------------------------------
> Access Denied: Table bigquery-samples:airline_ontime_data.flights
> -----------------------------------------------------------------
>
> Key: BEAM-14336
> URL: https://issues.apache.org/jira/browse/BEAM-14336
> Project: Beam
> Issue Type: Bug
> Components: dsl-dataframe, examples-python
> Reporter: Andrew Pilloud
> Assignee: Brian Hulette
> Priority: P0
>
> This has been perma-red for the last week.
> https://ci-beam.apache.org/job/beam_PostCommit_Python36/5189/testReport/junit/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/
> https://ci-beam.apache.org/job/beam_PostCommit_Python36/5215/testReport/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/
> {code}
> self = <apache_beam.examples.dataframe.flight_delays_it_test.FlightDelaysTest testMethod=test_flight_delays>
> @pytest.mark.examples_postcommit
> @pytest.mark.it_postcommit
> def test_flight_delays(self):
> flight_delays.run_flight_delay_pipeline(
> self.test_pipeline,
> start_date='2012-12-23',
> end_date='2012-12-25',
> > output=self.output_path)
> apache_beam/examples/dataframe/flight_delays_it_test.py:110:
> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> apache_beam/examples/dataframe/flight_delays.py:105: in run_flight_delay_pipeline
> result.to_csv(output)
> apache_beam/pipeline.py:596: in __exit__
> self.result = self.run()
> apache_beam/testing/test_pipeline.py:114: in run
> False if self.not_use_test_runner_api else test_runner_api))
> apache_beam/pipeline.py:549: in run
> self._options).run(False)
> apache_beam/pipeline.py:573: in run
> return self.runner.run_pipeline(self, self._options)
> apache_beam/runners/dataflow/test_dataflow_runner.py:64: in run_pipeline
> self.result.wait_until_finish(duration=wait_duration)
> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> self = <DataflowPipelineResult <Job
> clientRequestId: '20220420090044988852-7153'
> createTime: '2022-04-20T09:00:55.136265Z'
> ...022-04-20T09:00:55.136265Z'
> steps: []
> tempFiles: []
> type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7fcf0b6f0e80>
> duration = None
> def wait_until_finish(self, duration=None):
> if not self.is_in_terminal_state():
> if not self.has_job:
> raise IOError('Failed to get the Dataflow job id.')
>
> thread = threading.Thread(
> target=DataflowRunner.poll_for_job_completion,
> args=(self._runner, self, duration))
>
> # Mark the thread as a daemon thread so a keyboard interrupt on the main
> # thread will terminate everything. This is also the reason we will not
> # use thread.join() to wait for the polling thread.
> thread.daemon = True
> thread.start()
> while thread.is_alive():
> time.sleep(5.0)
>
> # TODO: Merge the termination code in poll_for_job_completion and
> # is_in_terminal_state.
> terminated = self.is_in_terminal_state()
> assert duration or terminated, (
> 'Job did not reach to a terminal state after waiting indefinitely.')
>
> # TODO(BEAM-14291): Also run this check if wait_until_finish was called
> # after the pipeline completed.
> if terminated and self.state != PipelineState.DONE:
> # TODO(BEAM-1290): Consider converting this to an error log based on
> # theresolution of the issue.
> raise DataflowRuntimeException(
> 'Dataflow pipeline failed. State: %s, Error:\n%s' %
> (self.state, getattr(self._runner, 'last_error_msg', None)),
> > self)
> E apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
> E Traceback (most recent call last):
> E File "/usr/local/lib/python3.6/site-packages/dataflow_worker/batchworker.py", line 646, in do_work
> E work_executor.execute()
> E File "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 255, in execute
> E self._split_task)
> E File "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 263, in _perform_source_split_considering_api_limits
> E desired_bundle_size)
> E File "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 300, in _perform_source_split
> E for split in source.split(desired_bundle_size):
> E File "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line 810, in split
> E self._setup_temporary_dataset(bq)
> E File "/usr/local/lib/python3.6/site-packages/apache_beam/options/value_provider.py", line 193, in _f
> E return fnc(self, *args, **kwargs)
> E File "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line 849, in _setup_temporary_dataset
> E self._get_project(), self.query.get(), self.use_legacy_sql)
> E File "/usr/local/lib/python3.6/site-packages/apache_beam/utils/retry.py", line 253, in wrapper
> E return fun(*args, **kwargs)
> E File "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery_tools.py", line 416, in get_query_location
> E response = self.client.jobs.Insert(request)
> E File "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py", line 345, in Insert
> E upload=upload, upload_config=upload_config)
> E File "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line 731, in _RunMethod
> E return self.ProcessHttpResponse(method_config, http_response, request)
> E File "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line 737, in ProcessHttpResponse
> E self.__ProcessHttpResponse(method_config, http_response, request))
> E File "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line 604, in __ProcessHttpResponse
> E http_response, method_config=method_config, request=request)
> E apitools.base.py.exceptions.HttpForbiddenError: HttpError accessing <https://bigquery.googleapis.com/bigquery/v2/projects/apache-beam-testing/jobs?alt=json>: response: <{'vary': 'Origin, X-Origin, Referer', 'content-type': 'application/json; charset=UTF-8', 'date': 'Wed, 20 Apr 2022 09:06:44 GMT', 'server': 'ESF', 'cache-control': 'private', 'x-xss-protection': '0', 'x-frame-options': 'SAMEORIGIN', 'x-content-type-options': 'nosniff', 'transfer-encoding': 'chunked', 'status': '403', 'content-length': '528', '-content-encoding': 'gzip'}>, content <{
> E "error": {
> E "code": 403,
> E "message": "Access Denied: Table bigquery-samples:airline_ontime_data.flights: User does not have permission to query table bigquery-samples:airline_ontime_data.flights.",
> E "errors": [
> E {
> E "message": "Access Denied: Table bigquery-samples:airline_ontime_data.flights: User does not have permission to query table bigquery-samples:airline_ontime_data.flights.",
> E "domain": "global",
> E "reason": "accessDenied"
> E }
> E ],
> E "status": "PERMISSION_DENIED"
> E }
> E }
> E >
> apache_beam/runners/dataflow/dataflow_runner.py:1661: DataflowRuntimeException
> {code}
--
This message was sent by Atlassian Jira
(v8.20.7#820007)