You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by al...@apache.org on 2017/04/27 01:06:15 UTC
[1/2] beam git commit: Do not depend on message id in DataflowRunner
Repository: beam
Updated Branches:
refs/heads/master 650ee8ea6 -> 3961ce46c
Do not depend on message id in DataflowRunner
This field is deprecated and causing messages to be repeated. Hash
message to avoid printing duplicate messages.
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bcde998f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bcde998f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bcde998f
Branch: refs/heads/master
Commit: bcde998f9fff72d59d74107db317de7b20a9f003
Parents: 650ee8e
Author: Ahmet Altay <al...@google.com>
Authored: Tue Apr 25 18:42:03 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Wed Apr 26 18:06:03 2017 -0700
----------------------------------------------------------------------
.../apache_beam/runners/dataflow/dataflow_runner.py | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/bcde998f/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
index 4534895..05f6833 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
@@ -76,7 +76,7 @@ class DataflowRunner(PipelineRunner):
def poll_for_job_completion(runner, result):
"""Polls for the specified job to finish running (successfully or not)."""
last_message_time = None
- last_message_id = None
+ last_message_hash = None
last_error_rank = float('-inf')
last_error_msg = None
@@ -126,19 +126,20 @@ class DataflowRunner(PipelineRunner):
messages, page_token = runner.dataflow_client.list_messages(
job_id, page_token=page_token, start_time=last_message_time)
for m in messages:
- if last_message_id is not None and m.id == last_message_id:
+ message = '%s: %s: %s' % (m.time, m.messageImportance, m.messageText)
+ m_hash = hash(message)
+
+ if last_message_hash is not None and m_hash == last_message_hash:
# Skip the first message if it is the last message we got in the
# previous round. This can happen because we use the
# last_message_time as a parameter of the query for new messages.
continue
last_message_time = m.time
- last_message_id = m.id
+ last_message_hash = m_hash
# Skip empty messages.
if m.messageImportance is None:
continue
- logging.info(
- '%s: %s: %s: %s', m.id, m.time, m.messageImportance,
- m.messageText)
+ logging.info(message)
if str(m.messageImportance) == 'JOB_MESSAGE_ERROR':
if rank_error(m.messageText) >= last_error_rank:
last_error_rank = rank_error(m.messageText)
[2/2] beam git commit: This closes #2696
Posted by al...@apache.org.
This closes #2696
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/3961ce46
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/3961ce46
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/3961ce46
Branch: refs/heads/master
Commit: 3961ce46c6a77f8559cf9b582a9b42677a7cca07
Parents: 650ee8e bcde998
Author: Ahmet Altay <al...@google.com>
Authored: Wed Apr 26 18:06:05 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Wed Apr 26 18:06:05 2017 -0700
----------------------------------------------------------------------
.../apache_beam/runners/dataflow/dataflow_runner.py | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
----------------------------------------------------------------------