You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2022/03/28 20:56:07 UTC

[GitHub] [beam] rohdesamuel commented on a change in pull request #17127: [BEAM-14130] Implement Jupyterlab extension for managing Dataproc clusters

rohdesamuel commented on a change in pull request #17127:
URL: https://github.com/apache/beam/pull/17127#discussion_r836841823



##########
File path: sdks/python/apache_beam/runners/interactive/interactive_runner.py
##########
@@ -267,9 +267,12 @@ def _get_dataproc_cluster_master_url_if_applicable(
               category=DeprecationWarning)
         project_id = (user_pipeline.options.view_as(GoogleCloudOptions).project)
         region = (user_pipeline.options.view_as(GoogleCloudOptions).region)
-        cluster_name = ie.current_env().clusters.default_cluster_name
-        cluster_metadata = MasterURLIdentifier(
-            project_id=project_id, region=region, cluster_name=cluster_name)
+        if not project_id:
+          cluster_metadata = ie.current_env().clusters.default_cluster_metadata
+        else:
+          cluster_name = ie.current_env().clusters.default_cluster_name
+          cluster_metadata = MasterURLIdentifier(
+              project_id=project_id, region=region, cluster_name=cluster_name)

Review comment:
       Can you comment on why this if case is needed?

##########
File path: sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/SidePanel.ts
##########
@@ -119,6 +130,7 @@ export class SidePanel extends BoxPanel {
     this.dispose();
   }
 
+  private _clusters: ClustersWidget;
   private _inspector: InteractiveInspectorWidget;

Review comment:
       Once you pass in the widget, you can also turn this into a single variable.

##########
File path: sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/src/SidePanel.ts
##########
@@ -35,24 +40,30 @@ import {
 export class SidePanel extends BoxPanel {
   constructor(
     manager: ServiceManager.IManager,
-    rendermime: IRenderMimeRegistry
+    rendermime: IRenderMimeRegistry,
+    pageType: string
   ) {
     super({
       direction: 'top-to-bottom',
       alignment: 'end'
     });
     this.id = 'apache-beam-jupyterlab-sidepanel';
-    this.title.label = 'Interactive Beam Inspector';
     this.title.closable = true;
-
     this._sessionContext = new SessionContext({
       sessionManager: manager.sessions,
       specsManager: manager.kernelspecs,
       name: 'Interactive Beam Inspector Session'
     });
 
-    this._inspector = new InteractiveInspectorWidget(this._sessionContext);
-    this.addWidget(this._inspector);
+    if (pageType === 'Inspector') {
+      this.title.label = 'Interactive Beam Inspector';
+      this._inspector = new InteractiveInspectorWidget(this._sessionContext);
+      this.addWidget(this._inspector);
+    } else if (pageType === 'Clusters') {
+      this.title.label = 'Interactive Beam Cluster Manager';
+      this._clusters = new ClustersWidget(this._sessionContext);
+      this.addWidget(this._clusters);
+    }

Review comment:
       It's a little weird to have the SidePanel know how to construct a widget based on a string parameter. In the future if we want to add more widgets that each construct using different parameters this code could get messy. Instead, I suggest giving the sidepanel the widget and title as parameters.

##########
File path: sdks/python/apache_beam/runners/interactive/messaging/interactive_environment_inspector.py
##########
@@ -136,6 +138,60 @@ def get_pcoll_data(self, identifier, include_window_info=False):
       return dataframe.to_json(orient='table')
     return {}
 
+  @as_json
+  def list_clusters(self):
+    """Retrieves information for all clusters as a json.
+
+    The json object maps a unique obfuscated identifier of a cluster to
+    the corresponding cluster_name, project, region, master_url, dashboard,
+    and pipelines. Furthermore, copies the mapping to self._clusters.
+    """
+    from apache_beam.runners.interactive import interactive_environment as ie
+    clusters = ie.current_env().clusters
+    all_cluster_data = {}
+    for master_url in clusters.master_urls:
+      cluster_metadata = clusters.master_urls[master_url]
+      project = cluster_metadata.project_id
+      region = cluster_metadata.region
+      name = cluster_metadata.cluster_name
+
+      all_cluster_data[obfuscate(project, region, name)] = {
+          'cluster_name': name,
+          'project': project,
+          'region': region,
+          'master_url': master_url,
+          'dashboard': clusters.master_urls_to_dashboards[master_url],
+          'pipelines': clusters.master_urls_to_pipelines[master_url]
+      }
+    self._clusters = all_cluster_data
+    return all_cluster_data
+
+  def delete_cluster(self, id: str):
+    """Deletes the cluster with the given obfuscated identifier from the
+    Interactive Environment, as well as from Dataproc. Additionally,
+    unassigns the 'flink_master' pipeline option for all impacted pipelines.
+    """
+    from apache_beam.runners.interactive import interactive_environment as ie
+    pipelines = [
+        ie.current_env().pipeline_id_to_pipeline(pid)
+        for pid in self._clusters[id]['pipelines']
+    ]
+    for p in pipelines:
+      ie.current_env().clusters.cleanup(p)
+      p.options.view_as(FlinkRunnerOptions).flink_master = '[auto]'
+    del self._clusters[id]

Review comment:
       Will this create any dangling references?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@beam.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org