You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2021/04/05 11:00:46 UTC

[GitHub] [airflow] mik-laj commented on a change in pull request #11802: Add Kubernetes cleanup-pods CLI command for Helm Chart

mik-laj commented on a change in pull request #11802:
URL: https://github.com/apache/airflow/pull/11802#discussion_r607034452



##########
File path: airflow/cli/commands/kubernetes_command.py
##########
@@ -61,3 +64,66 @@ def generate_pod_yaml(args):
             sanitized_pod = api_client.sanitize_for_serialization(pod)
             output.write(yaml.dump(sanitized_pod))
     print(f"YAML output can be found at {yaml_output_path}/airflow_yaml_output/")
+
+
+@cli_utils.action_logging
+def cleanup_pods(args):
+    """Clean up k8s pods in evicted/failed/succeeded states"""
+    namespace = args.namespace
+
+    # https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/
+    # All Containers in the Pod have terminated in success, and will not be restarted.
+    pod_succeeded = 'succeeded'
+
+    # All Containers in the Pod have terminated, and at least one Container has terminated in failure.
+    # That is, the Container either exited with non-zero status or was terminated by the system.
+    pod_failed = 'failed'
+
+    # https://kubernetes.io/docs/tasks/administer-cluster/out-of-resource/
+    pod_reason_evicted = 'evicted'
+    # If pod is failed and restartPolicy is:
+    # * Always: Restart Container; Pod phase stays Running.
+    # * OnFailure: Restart Container; Pod phase stays Running.
+    # * Never: Pod phase becomes Failed.
+    pod_restart_policy_never = 'never'
+
+    print('Loading Kubernetes configuration')
+    kube_client = get_kube_client()
+    print(f'Listing pods in namespace {namespace}')
+    continue_token = None
+    while True:  # pylint: disable=too-many-nested-blocks
+        pod_list = kube_client.list_namespaced_pod(namespace=namespace, limit=500, _continue=continue_token)
+        for pod in pod_list.items:
+            pod_name = pod.metadata.name
+            print(f'Inspecting pod {pod_name}')
+            pod_phase = pod.status.phase.lower()
+            pod_reason = pod.status.reason.lower() if pod.status.reason else ''
+            pod_restart_policy = pod.spec.restart_policy.lower()
+
+            if (
+                pod_phase == pod_succeeded
+                or (pod_phase == pod_failed and pod_restart_policy == pod_restart_policy_never)
+                or (pod_reason == pod_reason_evicted)
+            ):
+                print(
+                    f'Deleting pod "{pod_name}" phase "{pod_phase}" and reason "{pod_reason}", '
+                    f'restart policy "{pod_restart_policy}"'
+                )
+                try:
+                    _delete_pod(pod.metadata.name, namespace)
+                except ApiException as e:
+                    print(f"can't remove POD: {e}", file=sys.stderr)
+                continue
+            print(f'No action taken on pod {pod_name}')
+        continue_token = pod_list.metadata._continue  # pylint: disable=protected-access
+        if not continue_token:
+            break
+
+
+def _delete_pod(name, namespace):
+    """Helper Function for cleanup_pods"""
+    core_v1 = client.CoreV1Api()

Review comment:
       Should we use get_kube_client here?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org