You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by as...@apache.org on 2021/03/15 10:16:44 UTC
[airflow-ci-infra] 01/01: Upload job output logs to Cloudwatch too
This is an automated email from the ASF dual-hosted git repository.
ash pushed a commit to branch upload-task-logs-cloudwatch-too
in repository https://gitbox.apache.org/repos/asf/airflow-ci-infra.git
commit e46584050bc8c21178c1254655758f6c99f45e1d
Author: Ash Berlin-Taylor <as...@firemirror.com>
AuthorDate: Mon Mar 15 10:14:45 2021 +0000
Upload job output logs to Cloudwatch too
We have some cases where logs aren't being uploaded to Github, which
makes debugging failures hard.
This is a problem with GitHub's hosted runners too, but for self-hosted
runners we can at least do something about it
---
cloud-init.yml | 60 ++++++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 48 insertions(+), 12 deletions(-)
diff --git a/cloud-init.yml b/cloud-init.yml
index b52aedd..e65b954 100644
--- a/cloud-init.yml
+++ b/cloud-init.yml
@@ -231,6 +231,15 @@ write_files:
type = "journald"
include_units = ["actions.runner.service", "actions.runner-supervisor.service"]
+ [transforms.without_systemd_fields]
+ type = "remove_fields"
+ inputs = ["logs"]
+ fields = ["_CAP_EFFECTIVE", "_SYSTEMD_SLICE", "_SYSTEMD_CGROUP",
+ "_SYSTEMD_INVOCATION_ID", "_SELINUX_CONTEXT", "_COMM", "_BOOT_ID",
+ "_MACHINE_ID", "_STREAM_ID", "_PID", "_GID", "_UID","_TRANSPORT",
+ "__MONOTONIC_TIMESTAMP", "SYSLOG_IDENTIFIER", "PRIORITY",
+ "source_type"]
+
[sources.runner-logs]
type = "file"
include = ["/home/runner/actions-runner/_diag/*.log"]
@@ -242,23 +251,50 @@ write_files:
timeout_ms = 250
[transforms.grok-runner-logs]
- type = "grok_parser"
+ type = "remap"
inputs=["runner-logs"]
- pattern = "(?m)\\[%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{NOTSPACE:logger}\\] %{GREEDYDATA:message}"
- types.timestamp = "timestamp|%F %TZ"
+ source = '''
+ structured, err = parse_grok(.message, "(?m)\\[%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{NOTSPACE:logger}\\] %{GREEDYDATA:message}")
+
+ if err != null {
+ .err = err
+ } else {
+ . = merge(., structured)
+ }
+ '''
+ [transforms.filter-runner-logs]
+ type = "filter"
+ inputs = ['grok-runner-logs']
+ condition.type = "remap"
+ condition.source = '''
+ if .logger == "JobServerQueue" {
+ !match!(.message, r'Try to append \d+ batches web console lines for record')
+ } else if .logger == "HostContext" {
+ !starts_with!(.message, "Well known directory")
+ } else {
+ true
+ }
+ '''
+
+ [sources.job-logs]
+ type = "file"
+ include = ["/home/runner/actions-runner/_diag/pages/*.log"]
- [transforms.without_systemd_fields]
- type = "remove_fields"
- inputs = ["logs"]
- fields = ["_CAP_EFFECTIVE", "_SYSTEMD_SLICE", "_SYSTEMD_CGROUP",
- "_SYSTEMD_INVOCATION_ID", "_SELINUX_CONTEXT", "_COMM", "_BOOT_ID",
- "_MACHINE_ID", "_STREAM_ID", "_PID", "_GID", "_UID","_TRANSPORT",
- "__MONOTONIC_TIMESTAMP", "SYSLOG_IDENTIFIER", "PRIORITY",
- "source_type"]
+ [transforms.grok-job-logs]
+ type = "remap"
+ inputs = ["job-logs"]
+ source = '''
+ structured, err = parse_grok(.message, "%{TIMESTAMP_ISO8601:timestamp} %{GREEDYDATA:message}")
+
+ if err == null {
+ . = merge(., structured)
+ .type = "job-output"
+ }
+ '''
# Output data
[sinks.cloudwatch]
- inputs = ["without_systemd_fields", "grok-runner-logs"]
+ inputs = ["without_systemd_fields", "filter-runner-logs", "grok-job-logs"]
type = "aws_cloudwatch_logs"
encoding = "json"
create_missing_group = false