You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by bo...@apache.org on 2020/02/24 23:50:48 UTC
[beam] branch master updated: Update docstring of
ManualWatermarkEstimator.set_watermark()
This is an automated email from the ASF dual-hosted git repository.
boyuanz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new f2f0282 Update docstring of ManualWatermarkEstimator.set_watermark()
new 7139216 Merge pull request #10933 from boyuanzz/follow_up
f2f0282 is described below
commit f2f0282719194f31557d628d0fb1fc6a44d72c53
Author: Boyuan Zhang <bo...@google.com>
AuthorDate: Fri Feb 21 15:17:41 2020 -0800
Update docstring of ManualWatermarkEstimator.set_watermark()
---
sdks/python/apache_beam/io/watermark_estimators.py | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/sdks/python/apache_beam/io/watermark_estimators.py b/sdks/python/apache_beam/io/watermark_estimators.py
index 5a00ae7..65c4611 100644
--- a/sdks/python/apache_beam/io/watermark_estimators.py
+++ b/sdks/python/apache_beam/io/watermark_estimators.py
@@ -120,10 +120,19 @@ class ManualWatermarkEstimator(WatermarkEstimator):
return self._watermark
def set_watermark(self, timestamp):
- # Please call set_watermark after calling restriction_tracker.try_claim() to
- # prevent advancing watermark early.
- # TODO(BEAM-7473): It's possible that getting a slightly stale watermark
- # when performing split.
+ # pylint: disable=line-too-long
+
+ """Sets a timestamp before or at the timestamps of all future elements
+ produced by the associated DoFn.
+
+ This can be approximate. If records are output that violate this guarantee,
+ they will be considered late, which will affect how they will be processed.
+ See https://beam.apache.org/documentation/programming-guide/#watermarks-and-late-data
+ for more information on late data and how to handle it.
+
+ However, this value should be as late as possible. Downstream windows may
+ not be able to close until this watermark passes their end.
+ """
if not isinstance(timestamp, Timestamp):
raise ValueError('set_watermark expects a Timestamp as input')
if self._watermark and self._watermark > timestamp: