You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by rb...@apache.org on 2020/01/06 06:12:40 UTC

[tez] branch master updated: TEZ-4098: tez-tools improvements: log-split, swimlane (László Bodor, reviewed by rbalamohan)

This is an automated email from the ASF dual-hosted git repository.

rbalamohan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tez.git


The following commit(s) were added to refs/heads/master by this push:
     new 271351b  TEZ-4098: tez-tools improvements: log-split, swimlane (László Bodor, reviewed by rbalamohan)
271351b is described below

commit 271351ba29382da08a24adb2ba2327cc68b95efb
Author: Rajesh Balamohan <rb...@apache.org>
AuthorDate: Mon Jan 6 11:41:57 2020 +0530

    TEZ-4098: tez-tools improvements: log-split, swimlane (László Bodor, reviewed by rbalamohan)
---
 tez-tools/swimlanes/swimlane.py                    |   1 +
 tez-tools/swimlanes/yarn-swimlanes.sh              |  17 +++-
 tez-tools/tez-log-split/README.md                  |  77 ++++++++++++++
 tez-tools/tez-log-split/logsplit.py                | 111 +++++++++++++++++++++
 .../tez-log-splitter.sh}                           |  19 ++--
 5 files changed, 214 insertions(+), 11 deletions(-)

diff --git a/tez-tools/swimlanes/swimlane.py b/tez-tools/swimlanes/swimlane.py
index bbd54df..11976da 100644
--- a/tez-tools/swimlanes/swimlane.py
+++ b/tez-tools/swimlanes/swimlane.py
@@ -195,6 +195,7 @@ def main(argv):
 			svg.text(marginRight+xdomain(percentX), y+marginTop+12, "%d%% (%0.1fs)" % (int(fraction*100), (percentX - dag.start)/1000.0), style="font-size:12px; text-anchor: middle")
 	out.write(svg.flush())
 	out.close()
+	print("Output svg is written into: " + str(out))
 
 if __name__ == "__main__":
 	sys.exit(main(sys.argv[1:]))
diff --git a/tez-tools/swimlanes/yarn-swimlanes.sh b/tez-tools/swimlanes/yarn-swimlanes.sh
index df4d071..02465b0 100644
--- a/tez-tools/swimlanes/yarn-swimlanes.sh
+++ b/tez-tools/swimlanes/yarn-swimlanes.sh
@@ -19,10 +19,17 @@
 set -e
 
 APPID=$1
-
-YARN=$(which yarn);
 TMP=$(mktemp)
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+if [[ -f $APPID ]]; then
+    echo "Reading yarn logs from local file: $APPID"
+    cat "$APPID" | grep HISTORY > "$TMP"
+else
+    YARN=$(which yarn);
+    echo "Fetching yarn logs for $APPID"
+    $YARN logs -applicationId "$APPID" | grep HISTORY > "$TMP"
+fi
+echo "History was written into $TMP"
 
-echo "Fetching yarn logs for $APPID"
-$YARN logs -applicationId $APPID | grep HISTORY > $TMP 
-python swimlane.py -o $APPID.svg $TMP
+python "$DIR/swimlane.py" -o "$APPID.svg" "$TMP"
\ No newline at end of file
diff --git a/tez-tools/tez-log-split/README.md b/tez-tools/tez-log-split/README.md
new file mode 100644
index 0000000..a7341a7
--- /dev/null
+++ b/tez-tools/tez-log-split/README.md
@@ -0,0 +1,77 @@
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+Tez log splitter
+=========
+
+This is a post-hoc analysis tool for Apache Tez which splits
+an aggregated yarn log file to separate files into a hierarchical folder structure.
+
+```
+.
+├── vc0525.your.domain.com_8041
+│   └── container_e10_1575565459633_0004_01_000001
+│       ├── container-localizer-syslog
+│       ├── dag_1575565459633_0004_1-tez-dag.pb.txt
+│       ├── dag_1575565459633_0004_1.dot
+│       ├── prelaunch.err
+│       ├── prelaunch.out
+│       ├── stderr
+│       ├── stdout
+│       ├── syslog
+│       ├── syslog_dag_1575565459633_0004_1
+│       └── syslog_dag_1575565459633_0004_1_post
+├── vc0526.your.domain.com_8041
+│   └── container_e10_1575565459633_0004_01_000004
+│       ├── container-localizer-syslog
+│       ├── prelaunch.err
+│       ├── prelaunch.out
+│       ├── stderr
+│       ├── stdout
+│       ├── syslog
+│       └── syslog_attempt_1575565459633_0004_1_00_000000_2
+├── vc0528.your.domain.com_8041
+│   └── container_e10_1575565459633_0004_01_000002
+│       ├── container-localizer-syslog
+│       ├── prelaunch.err
+│       ├── prelaunch.out
+│       ├── stderr
+│       ├── stdout
+│       ├── syslog
+│       └── syslog_attempt_1575565459633_0004_1_00_000000_0
+├── vc0529.your.domain.com_8041
+│   └── container_e10_1575565459633_0004_01_000005
+│       ├── container-localizer-syslog
+│       ├── prelaunch.err
+│       ├── prelaunch.out
+│       ├── stderr
+│       ├── stdout
+│       ├── syslog
+│       └── syslog_attempt_1575565459633_0004_1_00_000000_3
+└── vc0536.your.domain.com_8041
+    └── container_e10_1575565459633_0004_01_000003
+        ├── container-localizer-syslog
+        ├── prelaunch.err
+        ├── prelaunch.out
+        ├── stderr
+        ├── stdout
+        ├── syslog
+        └── syslog_attempt_1575565459633_0004_1_00_000000_1
+```
+
+To use the tool, run e.g.
+
+`tez-log-splitter.sh application_1576254620247_0010`  (app log is fetched from yarn)
+`tez-log-splitter.sh ~/path/to/application_1576254620247_0010.log`  (...when app log is already on your computer)
+`tez-log-splitter.sh ~/path/to/application_1576254620247_0010.log.gz`  (...when app log is already on your computer in gz)
diff --git a/tez-tools/tez-log-split/logsplit.py b/tez-tools/tez-log-split/logsplit.py
new file mode 100644
index 0000000..47e17da
--- /dev/null
+++ b/tez-tools/tez-log-split/logsplit.py
@@ -0,0 +1,111 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import sys
+import os
+import re
+from gzip import GzipFile as GZFile
+from getopt import getopt
+
+def usage():
+    sys.stderr.write("""
+usage: logsplit.py <log-file>
+
+Input files for this tool can be prepared by "yarn logs -applicationId <application_...>".
+""")
+
+def open_file(f):
+    if f.endswith(".gz"):
+        return GZFile(f)
+    return open(f)
+
+class AggregatedLog(object):
+    def __init__(self):
+        self.in_container = False
+        self.in_logfile = False
+        self.current_container_header = None
+        self.current_container_name = None
+        self.current_host_name = None # as read from log line: "hello.my.host.com_8041"
+        self.current_file = None
+        self.HEADER_CONTAINER_RE = re.compile("Container: (container_[a-z0-9_]+) on (.*)")
+        self.HEADER_LAST_ROW_RE = re.compile("^LogContents:$")
+        self.HEADER_LOG_TYPE_RE = re.compile("^LogType:(.*)")
+        self.LAST_LOG_LINE_RE = re.compile("^End of LogType:.*")
+
+    def process(self, input_file):
+        self.output_folder = input_file.name + "_splitlogs"
+        os.mkdir(self.output_folder)
+
+        for line in input_file:
+            self.parse(line)
+
+    def parse(self, line):
+        if self.in_container:
+            if self.in_logfile:
+                m = self.LAST_LOG_LINE_RE.match(line)
+                if m:
+                    self.in_container = False
+                    self.in_logfile = False
+                    self.current_file.close()
+                else:
+                    self.write_to_current_file(line)
+            else:
+                m = self.HEADER_LOG_TYPE_RE.match(line)
+                if m:
+                    file_name = m.group(1)
+                    self.create_file_in_current_container(file_name)
+                elif self.HEADER_LAST_ROW_RE.match(line):
+                    self.in_logfile = True
+                    self.write_to_current_file(self.current_container_header) #for host reference
+        else:
+            m = self.HEADER_CONTAINER_RE.match(line)
+            self.current_container_header = line
+            if m:
+                self.in_container = True
+                self.current_container_name = m.group(1)
+                self.current_host_name = m.group(2)
+                self.start_container_folder()
+
+    def start_container_folder(self):
+        container_dir = os.path.join(self.output_folder, self.get_current_container_dir_name())
+        if not os.path.exists(container_dir):
+            os.makedirs(container_dir)
+
+    def create_file_in_current_container(self, file_name):
+        file_to_be_created = os.path.join(self.output_folder, self.get_current_container_dir_name(), file_name)
+        file = open(file_to_be_created, "w+")
+        self.current_file = file
+
+    def write_to_current_file(self, line):
+        self.current_file.write(line)
+
+    def get_current_container_dir_name(self):
+        return os.path.join(self.current_host_name, self.current_container_name)
+
+def main(argv):
+    (opts, args) = getopt(argv, "")
+    input_file = args[0]
+    fp = open_file(input_file)
+    aggregated_log = AggregatedLog()
+    aggregated_log.process(fp)
+    print ("Split application logs was written into folder " + aggregated_log.output_folder)
+    fp.close()
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
diff --git a/tez-tools/swimlanes/yarn-swimlanes.sh b/tez-tools/tez-log-split/tez-log-splitter.sh
similarity index 68%
copy from tez-tools/swimlanes/yarn-swimlanes.sh
copy to tez-tools/tez-log-split/tez-log-splitter.sh
index df4d071..712e499 100644
--- a/tez-tools/swimlanes/yarn-swimlanes.sh
+++ b/tez-tools/tez-log-split/tez-log-splitter.sh
@@ -16,13 +16,20 @@
 # limitations under the License.
 
 
-set -e
+#set -e
 
 APPID=$1
-
-YARN=$(which yarn);
 TMP=$(mktemp)
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+if [[ -f $APPID ]]; then
+    echo "Reading yarn logs from local file: $APPID"
+    TMP=$APPID
+else
+    YARN=$(which yarn);
+    echo "Fetching yarn logs for $APPID"
+    $YARN logs -applicationId "$APPID" > "$TMP"
+    echo "Application log was written into $TMP"
+fi
 
-echo "Fetching yarn logs for $APPID"
-$YARN logs -applicationId $APPID | grep HISTORY > $TMP 
-python swimlane.py -o $APPID.svg $TMP
+python "$DIR/logsplit.py" "$TMP"
\ No newline at end of file