You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by as...@apache.org on 2021/03/15 10:16:44 UTC

[airflow-ci-infra] 01/01: Upload job output logs to Cloudwatch too

This is an automated email from the ASF dual-hosted git repository.

ash pushed a commit to branch upload-task-logs-cloudwatch-too
in repository https://gitbox.apache.org/repos/asf/airflow-ci-infra.git

commit e46584050bc8c21178c1254655758f6c99f45e1d
Author: Ash Berlin-Taylor <as...@firemirror.com>
AuthorDate: Mon Mar 15 10:14:45 2021 +0000

    Upload job output logs to Cloudwatch too
    
    We have some cases where logs aren't being uploaded to Github, which
    makes debugging failures hard.
    
    This is a problem with GitHub's hosted runners too, but for self-hosted
    runners we can at least do something about it
---
 cloud-init.yml | 60 ++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 48 insertions(+), 12 deletions(-)

diff --git a/cloud-init.yml b/cloud-init.yml
index b52aedd..e65b954 100644
--- a/cloud-init.yml
+++ b/cloud-init.yml
@@ -231,6 +231,15 @@ write_files:
         type = "journald"
         include_units = ["actions.runner.service", "actions.runner-supervisor.service"]
 
+      [transforms.without_systemd_fields]
+        type = "remove_fields"
+        inputs = ["logs"]
+        fields = ["_CAP_EFFECTIVE", "_SYSTEMD_SLICE", "_SYSTEMD_CGROUP",
+          "_SYSTEMD_INVOCATION_ID", "_SELINUX_CONTEXT", "_COMM", "_BOOT_ID",
+          "_MACHINE_ID", "_STREAM_ID", "_PID", "_GID", "_UID","_TRANSPORT",
+          "__MONOTONIC_TIMESTAMP", "SYSLOG_IDENTIFIER", "PRIORITY",
+          "source_type"]
+
       [sources.runner-logs]
         type = "file"
         include = ["/home/runner/actions-runner/_diag/*.log"]
@@ -242,23 +251,50 @@ write_files:
             timeout_ms = 250
 
       [transforms.grok-runner-logs]
-        type = "grok_parser"
+        type = "remap"
         inputs=["runner-logs"]
-        pattern = "(?m)\\[%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{NOTSPACE:logger}\\] %{GREEDYDATA:message}"
-        types.timestamp = "timestamp|%F %TZ"
+        source = '''
+          structured, err = parse_grok(.message, "(?m)\\[%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{NOTSPACE:logger}\\] %{GREEDYDATA:message}")
+
+          if err != null {
+            .err = err
+          } else {
+            . = merge(., structured)
+          }
+        '''
+      [transforms.filter-runner-logs]
+        type = "filter"
+        inputs = ['grok-runner-logs']
+        condition.type = "remap"
+        condition.source = '''
+          if .logger == "JobServerQueue" {
+            !match!(.message, r'Try to append \d+ batches web console lines for record')
+          } else if .logger == "HostContext" {
+            !starts_with!(.message, "Well known directory")
+          } else {
+            true
+          }
+        '''
+
+      [sources.job-logs]
+        type = "file"
+        include = ["/home/runner/actions-runner/_diag/pages/*.log"]
 
-      [transforms.without_systemd_fields]
-        type = "remove_fields"
-        inputs = ["logs"]
-        fields = ["_CAP_EFFECTIVE", "_SYSTEMD_SLICE", "_SYSTEMD_CGROUP",
-          "_SYSTEMD_INVOCATION_ID", "_SELINUX_CONTEXT", "_COMM", "_BOOT_ID",
-          "_MACHINE_ID", "_STREAM_ID", "_PID", "_GID", "_UID","_TRANSPORT",
-          "__MONOTONIC_TIMESTAMP", "SYSLOG_IDENTIFIER", "PRIORITY",
-          "source_type"]
+      [transforms.grok-job-logs]
+        type = "remap"
+        inputs = ["job-logs"]
+        source = '''
+          structured, err = parse_grok(.message, "%{TIMESTAMP_ISO8601:timestamp} %{GREEDYDATA:message}")
+
+          if err == null {
+            . = merge(., structured)
+            .type = "job-output"
+          }
+        '''
 
       # Output data
       [sinks.cloudwatch]
-      inputs   = ["without_systemd_fields", "grok-runner-logs"]
+      inputs   = ["without_systemd_fields", "filter-runner-logs", "grok-job-logs"]
       type     = "aws_cloudwatch_logs"
       encoding = "json"
       create_missing_group = false