You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by mi...@apache.org on 2019/12/02 19:06:24 UTC

[beam] branch release-2.17.0 updated: [BEAM-8803] Default behaviour is to always retry.

This is an automated email from the ASF dual-hosted git repository.

mikhail pushed a commit to branch release-2.17.0
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/release-2.17.0 by this push:
     new 693c1c0  [BEAM-8803] Default behaviour is to always retry.
     new a4dc606  Merge pull request #10207 from pabloem/release-2.17.0
693c1c0 is described below

commit 693c1c0b70aff9d392275eaf07aaf9baa21da0b1
Author: Pablo <pa...@users.noreply.github.com>
AuthorDate: Thu Nov 21 21:00:20 2019 -0800

    [BEAM-8803] Default behaviour is to always retry.
---
 sdks/python/apache_beam/io/gcp/bigquery.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py
index d3ac5ca..3eaf3db 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -725,7 +725,7 @@ class BigQueryWriteFn(DoFn):
     self._max_buffered_rows = (max_buffered_rows
                                or BigQueryWriteFn.DEFAULT_MAX_BUFFERED_ROWS)
     self._retry_strategy = (
-        retry_strategy or bigquery_tools.RetryStrategy.RETRY_ON_TRANSIENT_ERROR)
+        retry_strategy or bigquery_tools.RetryStrategy.RETRY_ALWAYS)
 
     self.additional_bq_parameters = additional_bq_parameters or {}
 
@@ -865,7 +865,9 @@ class BigQueryWriteFn(DoFn):
           insert_ids=insert_ids,
           skip_invalid_rows=True)
 
-      logging.debug("Passed: %s. Errors are %s", passed, errors)
+      if not passed:
+        logging.info("There were errors inserting to BigQuery: %s",
+                     errors)
       failed_rows = [rows[entry.index] for entry in errors]
       should_retry = any(
           bigquery_tools.RetryStrategy.should_retry(
@@ -1063,6 +1065,10 @@ bigquery_v2_messages.TableSchema`. or a `ValueProvider` that has a JSON string,
         FILE_LOADS on Batch pipelines.
       insert_retry_strategy: The strategy to use when retrying streaming inserts
         into BigQuery. Options are shown in bigquery_tools.RetryStrategy attrs.
+        Default is to retry always. This means that whenever there are rows
+        that fail to be inserted to BigQuery, they will be retried indefinitely.
+        Other retry strategy settings will produce a deadletter PCollection
+        as output.
       additional_bq_parameters (callable): A function that returns a dictionary
         with additional parameters to pass to BQ when creating / loading data
         into a table. These can be 'timePartitioning', 'clustering', etc. They