You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by al...@apache.org on 2017/04/27 01:06:15 UTC

[1/2] beam git commit: Do not depend on message id in DataflowRunner

Repository: beam
Updated Branches:
  refs/heads/master 650ee8ea6 -> 3961ce46c


Do not depend on message id in DataflowRunner

This field is deprecated and causing messages to be repeated. Hash
message to avoid printing duplicate messages.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bcde998f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bcde998f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bcde998f

Branch: refs/heads/master
Commit: bcde998f9fff72d59d74107db317de7b20a9f003
Parents: 650ee8e
Author: Ahmet Altay <al...@google.com>
Authored: Tue Apr 25 18:42:03 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Wed Apr 26 18:06:03 2017 -0700

----------------------------------------------------------------------
 .../apache_beam/runners/dataflow/dataflow_runner.py    | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/bcde998f/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
index 4534895..05f6833 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
@@ -76,7 +76,7 @@ class DataflowRunner(PipelineRunner):
   def poll_for_job_completion(runner, result):
     """Polls for the specified job to finish running (successfully or not)."""
     last_message_time = None
-    last_message_id = None
+    last_message_hash = None
 
     last_error_rank = float('-inf')
     last_error_msg = None
@@ -126,19 +126,20 @@ class DataflowRunner(PipelineRunner):
         messages, page_token = runner.dataflow_client.list_messages(
             job_id, page_token=page_token, start_time=last_message_time)
         for m in messages:
-          if last_message_id is not None and m.id == last_message_id:
+          message = '%s: %s: %s' % (m.time, m.messageImportance, m.messageText)
+          m_hash = hash(message)
+
+          if last_message_hash is not None and m_hash == last_message_hash:
             # Skip the first message if it is the last message we got in the
             # previous round. This can happen because we use the
             # last_message_time as a parameter of the query for new messages.
             continue
           last_message_time = m.time
-          last_message_id = m.id
+          last_message_hash = m_hash
           # Skip empty messages.
           if m.messageImportance is None:
             continue
-          logging.info(
-              '%s: %s: %s: %s', m.id, m.time, m.messageImportance,
-              m.messageText)
+          logging.info(message)
           if str(m.messageImportance) == 'JOB_MESSAGE_ERROR':
             if rank_error(m.messageText) >= last_error_rank:
               last_error_rank = rank_error(m.messageText)


[2/2] beam git commit: This closes #2696

Posted by al...@apache.org.
This closes #2696


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/3961ce46
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/3961ce46
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/3961ce46

Branch: refs/heads/master
Commit: 3961ce46c6a77f8559cf9b582a9b42677a7cca07
Parents: 650ee8e bcde998
Author: Ahmet Altay <al...@google.com>
Authored: Wed Apr 26 18:06:05 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Wed Apr 26 18:06:05 2017 -0700

----------------------------------------------------------------------
 .../apache_beam/runners/dataflow/dataflow_runner.py    | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)
----------------------------------------------------------------------