You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2020/09/02 20:41:31 UTC

[GitHub] [beam] davidyan74 commented on a change in pull request #12703: [BEAM-10603] Add describe and cancel to RecordingManager

davidyan74 commented on a change in pull request #12703:
URL: https://github.com/apache/beam/pull/12703#discussion_r482420061



##########
File path: sdks/python/apache_beam/runners/interactive/recording_manager_test.py
##########
@@ -288,18 +347,122 @@ def test_basic_wordcount(self):
     # Create the recording objects. By calling `record` a new PipelineFragment
     # is started to compute the given PCollections and cache to disk.
     rm = RecordingManager(p)
-    recording = rm.record([elems], max_n=3, max_duration_secs=500)
-    stream = recording.stream(elems)
-    recording.wait_until_finish()
+    numbers_recording = rm.record([numbers], max_n=3, max_duration_secs=500)
+    numbers_stream = numbers_recording.stream(numbers)
+    numbers_recording.wait_until_finish()
 
     # Once the pipeline fragment completes, we can read from the stream and know
     # that all elements were written to cache.
-    elems = list(stream.read())
+    elems = list(numbers_stream.read())
     expected_elems = [
         WindowedValue(i, MIN_TIMESTAMP, [GlobalWindow()]) for i in range(3)
     ]
     self.assertListEqual(elems, expected_elems)
 
+    # Make an extra recording and test the description.
+    letters_recording = rm.record([letters], max_n=3, max_duration_secs=500)
+    letters_recording.wait_until_finish()
+
+    self.assertEqual(
+        rm.describe()['size'],
+        numbers_recording.describe()['size'] +
+        letters_recording.describe()['size'])
+
+    rm.cancel()
+
+  @unittest.skipIf(
+      sys.version_info < (3, 6, 0),
+      'This test requires at least Python 3.6 to work.')
+  def test_cancel_stops_recording(self):
+    # Add the TestStream so that it can be cached.
+    ib.options.capturable_sources.add(TestStream)
+
+    p = beam.Pipeline(
+        InteractiveRunner(), options=PipelineOptions(streaming=True))
+    elems = (
+        p
+        | TestStream().advance_watermark_to(0).advance_processing_time(
+            1).add_elements(list(range(10))).advance_processing_time(1))
+    squares = elems | beam.Map(lambda x: x**2)
+
+    # Watch the local scope for Interactive Beam so that referenced PCollections
+    # will be cached.
+    ib.watch(locals())
+
+    # This is normally done in the interactive_utils when a transform is
+    # applied but needs an IPython environment. So we manually run this here.
+    ie.current_env().track_user_pipelines()
+
+    # Get the recording then the BackgroundCachingJob.

Review comment:
       Are we still calling it BackgroundCachingJob?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org