You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by bo...@apache.org on 2020/02/24 23:50:48 UTC

[beam] branch master updated: Update docstring of ManualWatermarkEstimator.set_watermark()

This is an automated email from the ASF dual-hosted git repository.

boyuanz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new f2f0282  Update docstring of ManualWatermarkEstimator.set_watermark()
     new 7139216  Merge pull request #10933 from boyuanzz/follow_up
f2f0282 is described below

commit f2f0282719194f31557d628d0fb1fc6a44d72c53
Author: Boyuan Zhang <bo...@google.com>
AuthorDate: Fri Feb 21 15:17:41 2020 -0800

    Update docstring of ManualWatermarkEstimator.set_watermark()
---
 sdks/python/apache_beam/io/watermark_estimators.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/sdks/python/apache_beam/io/watermark_estimators.py b/sdks/python/apache_beam/io/watermark_estimators.py
index 5a00ae7..65c4611 100644
--- a/sdks/python/apache_beam/io/watermark_estimators.py
+++ b/sdks/python/apache_beam/io/watermark_estimators.py
@@ -120,10 +120,19 @@ class ManualWatermarkEstimator(WatermarkEstimator):
     return self._watermark
 
   def set_watermark(self, timestamp):
-    # Please call set_watermark after calling restriction_tracker.try_claim() to
-    # prevent advancing watermark early.
-    # TODO(BEAM-7473): It's possible that getting a slightly stale watermark
-    # when performing split.
+    # pylint: disable=line-too-long
+
+    """Sets a timestamp before or at the timestamps of all future elements
+    produced by the associated DoFn.
+
+    This can be approximate. If records are output that violate this guarantee,
+    they will be considered late, which will affect how they will be processed.
+    See https://beam.apache.org/documentation/programming-guide/#watermarks-and-late-data
+    for more information on late data and how to handle it.
+
+    However, this value should be as late as possible. Downstream windows may
+    not be able to close until this watermark passes their end.
+    """
     if not isinstance(timestamp, Timestamp):
       raise ValueError('set_watermark expects a Timestamp as input')
     if self._watermark and self._watermark > timestamp: