You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by mi...@apache.org on 2019/12/02 19:06:24 UTC
[beam] branch release-2.17.0 updated: [BEAM-8803] Default behaviour
is to always retry.
This is an automated email from the ASF dual-hosted git repository.
mikhail pushed a commit to branch release-2.17.0
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/release-2.17.0 by this push:
new 693c1c0 [BEAM-8803] Default behaviour is to always retry.
new a4dc606 Merge pull request #10207 from pabloem/release-2.17.0
693c1c0 is described below
commit 693c1c0b70aff9d392275eaf07aaf9baa21da0b1
Author: Pablo <pa...@users.noreply.github.com>
AuthorDate: Thu Nov 21 21:00:20 2019 -0800
[BEAM-8803] Default behaviour is to always retry.
---
sdks/python/apache_beam/io/gcp/bigquery.py | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py
index d3ac5ca..3eaf3db 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -725,7 +725,7 @@ class BigQueryWriteFn(DoFn):
self._max_buffered_rows = (max_buffered_rows
or BigQueryWriteFn.DEFAULT_MAX_BUFFERED_ROWS)
self._retry_strategy = (
- retry_strategy or bigquery_tools.RetryStrategy.RETRY_ON_TRANSIENT_ERROR)
+ retry_strategy or bigquery_tools.RetryStrategy.RETRY_ALWAYS)
self.additional_bq_parameters = additional_bq_parameters or {}
@@ -865,7 +865,9 @@ class BigQueryWriteFn(DoFn):
insert_ids=insert_ids,
skip_invalid_rows=True)
- logging.debug("Passed: %s. Errors are %s", passed, errors)
+ if not passed:
+ logging.info("There were errors inserting to BigQuery: %s",
+ errors)
failed_rows = [rows[entry.index] for entry in errors]
should_retry = any(
bigquery_tools.RetryStrategy.should_retry(
@@ -1063,6 +1065,10 @@ bigquery_v2_messages.TableSchema`. or a `ValueProvider` that has a JSON string,
FILE_LOADS on Batch pipelines.
insert_retry_strategy: The strategy to use when retrying streaming inserts
into BigQuery. Options are shown in bigquery_tools.RetryStrategy attrs.
+ Default is to retry always. This means that whenever there are rows
+ that fail to be inserted to BigQuery, they will be retried indefinitely.
+ Other retry strategy settings will produce a deadletter PCollection
+ as output.
additional_bq_parameters (callable): A function that returns a dictionary
with additional parameters to pass to BQ when creating / loading data
into a table. These can be 'timePartitioning', 'clustering', etc. They