You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by rb...@apache.org on 2020/01/06 06:12:40 UTC
[tez] branch master updated: TEZ-4098: tez-tools improvements: log-split, swimlane (László Bodor, reviewed by rbalamohan)
This is an automated email from the ASF dual-hosted git repository.
rbalamohan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tez.git
The following commit(s) were added to refs/heads/master by this push:
new 271351b TEZ-4098: tez-tools improvements: log-split, swimlane (László Bodor, reviewed by rbalamohan)
271351b is described below
commit 271351ba29382da08a24adb2ba2327cc68b95efb
Author: Rajesh Balamohan <rb...@apache.org>
AuthorDate: Mon Jan 6 11:41:57 2020 +0530
TEZ-4098: tez-tools improvements: log-split, swimlane (László Bodor, reviewed by rbalamohan)
---
tez-tools/swimlanes/swimlane.py | 1 +
tez-tools/swimlanes/yarn-swimlanes.sh | 17 +++-
tez-tools/tez-log-split/README.md | 77 ++++++++++++++
tez-tools/tez-log-split/logsplit.py | 111 +++++++++++++++++++++
.../tez-log-splitter.sh} | 19 ++--
5 files changed, 214 insertions(+), 11 deletions(-)
diff --git a/tez-tools/swimlanes/swimlane.py b/tez-tools/swimlanes/swimlane.py
index bbd54df..11976da 100644
--- a/tez-tools/swimlanes/swimlane.py
+++ b/tez-tools/swimlanes/swimlane.py
@@ -195,6 +195,7 @@ def main(argv):
svg.text(marginRight+xdomain(percentX), y+marginTop+12, "%d%% (%0.1fs)" % (int(fraction*100), (percentX - dag.start)/1000.0), style="font-size:12px; text-anchor: middle")
out.write(svg.flush())
out.close()
+ print("Output svg is written into: " + str(out))
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
diff --git a/tez-tools/swimlanes/yarn-swimlanes.sh b/tez-tools/swimlanes/yarn-swimlanes.sh
index df4d071..02465b0 100644
--- a/tez-tools/swimlanes/yarn-swimlanes.sh
+++ b/tez-tools/swimlanes/yarn-swimlanes.sh
@@ -19,10 +19,17 @@
set -e
APPID=$1
-
-YARN=$(which yarn);
TMP=$(mktemp)
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+if [[ -f $APPID ]]; then
+ echo "Reading yarn logs from local file: $APPID"
+ cat "$APPID" | grep HISTORY > "$TMP"
+else
+ YARN=$(which yarn);
+ echo "Fetching yarn logs for $APPID"
+ $YARN logs -applicationId "$APPID" | grep HISTORY > "$TMP"
+fi
+echo "History was written into $TMP"
-echo "Fetching yarn logs for $APPID"
-$YARN logs -applicationId $APPID | grep HISTORY > $TMP
-python swimlane.py -o $APPID.svg $TMP
+python "$DIR/swimlane.py" -o "$APPID.svg" "$TMP"
\ No newline at end of file
diff --git a/tez-tools/tez-log-split/README.md b/tez-tools/tez-log-split/README.md
new file mode 100644
index 0000000..a7341a7
--- /dev/null
+++ b/tez-tools/tez-log-split/README.md
@@ -0,0 +1,77 @@
+<!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+
+Tez log splitter
+=========
+
+This is a post-hoc analysis tool for Apache Tez which splits
+an aggregated yarn log file to separate files into a hierarchical folder structure.
+
+```
+.
+├── vc0525.your.domain.com_8041
+│ └── container_e10_1575565459633_0004_01_000001
+│ ├── container-localizer-syslog
+│ ├── dag_1575565459633_0004_1-tez-dag.pb.txt
+│ ├── dag_1575565459633_0004_1.dot
+│ ├── prelaunch.err
+│ ├── prelaunch.out
+│ ├── stderr
+│ ├── stdout
+│ ├── syslog
+│ ├── syslog_dag_1575565459633_0004_1
+│ └── syslog_dag_1575565459633_0004_1_post
+├── vc0526.your.domain.com_8041
+│ └── container_e10_1575565459633_0004_01_000004
+│ ├── container-localizer-syslog
+│ ├── prelaunch.err
+│ ├── prelaunch.out
+│ ├── stderr
+│ ├── stdout
+│ ├── syslog
+│ └── syslog_attempt_1575565459633_0004_1_00_000000_2
+├── vc0528.your.domain.com_8041
+│ └── container_e10_1575565459633_0004_01_000002
+│ ├── container-localizer-syslog
+│ ├── prelaunch.err
+│ ├── prelaunch.out
+│ ├── stderr
+│ ├── stdout
+│ ├── syslog
+│ └── syslog_attempt_1575565459633_0004_1_00_000000_0
+├── vc0529.your.domain.com_8041
+│ └── container_e10_1575565459633_0004_01_000005
+│ ├── container-localizer-syslog
+│ ├── prelaunch.err
+│ ├── prelaunch.out
+│ ├── stderr
+│ ├── stdout
+│ ├── syslog
+│ └── syslog_attempt_1575565459633_0004_1_00_000000_3
+└── vc0536.your.domain.com_8041
+ └── container_e10_1575565459633_0004_01_000003
+ ├── container-localizer-syslog
+ ├── prelaunch.err
+ ├── prelaunch.out
+ ├── stderr
+ ├── stdout
+ ├── syslog
+ └── syslog_attempt_1575565459633_0004_1_00_000000_1
+```
+
+To use the tool, run e.g.
+
+`tez-log-splitter.sh application_1576254620247_0010` (app log is fetched from yarn)
+`tez-log-splitter.sh ~/path/to/application_1576254620247_0010.log` (...when app log is already on your computer)
+`tez-log-splitter.sh ~/path/to/application_1576254620247_0010.log.gz` (...when app log is already on your computer in gz)
diff --git a/tez-tools/tez-log-split/logsplit.py b/tez-tools/tez-log-split/logsplit.py
new file mode 100644
index 0000000..47e17da
--- /dev/null
+++ b/tez-tools/tez-log-split/logsplit.py
@@ -0,0 +1,111 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+import sys
+import os
+import re
+from gzip import GzipFile as GZFile
+from getopt import getopt
+
+def usage():
+ sys.stderr.write("""
+usage: logsplit.py <log-file>
+
+Input files for this tool can be prepared by "yarn logs -applicationId <application_...>".
+""")
+
+def open_file(f):
+ if f.endswith(".gz"):
+ return GZFile(f)
+ return open(f)
+
+class AggregatedLog(object):
+ def __init__(self):
+ self.in_container = False
+ self.in_logfile = False
+ self.current_container_header = None
+ self.current_container_name = None
+ self.current_host_name = None # as read from log line: "hello.my.host.com_8041"
+ self.current_file = None
+ self.HEADER_CONTAINER_RE = re.compile("Container: (container_[a-z0-9_]+) on (.*)")
+ self.HEADER_LAST_ROW_RE = re.compile("^LogContents:$")
+ self.HEADER_LOG_TYPE_RE = re.compile("^LogType:(.*)")
+ self.LAST_LOG_LINE_RE = re.compile("^End of LogType:.*")
+
+ def process(self, input_file):
+ self.output_folder = input_file.name + "_splitlogs"
+ os.mkdir(self.output_folder)
+
+ for line in input_file:
+ self.parse(line)
+
+ def parse(self, line):
+ if self.in_container:
+ if self.in_logfile:
+ m = self.LAST_LOG_LINE_RE.match(line)
+ if m:
+ self.in_container = False
+ self.in_logfile = False
+ self.current_file.close()
+ else:
+ self.write_to_current_file(line)
+ else:
+ m = self.HEADER_LOG_TYPE_RE.match(line)
+ if m:
+ file_name = m.group(1)
+ self.create_file_in_current_container(file_name)
+ elif self.HEADER_LAST_ROW_RE.match(line):
+ self.in_logfile = True
+ self.write_to_current_file(self.current_container_header) #for host reference
+ else:
+ m = self.HEADER_CONTAINER_RE.match(line)
+ self.current_container_header = line
+ if m:
+ self.in_container = True
+ self.current_container_name = m.group(1)
+ self.current_host_name = m.group(2)
+ self.start_container_folder()
+
+ def start_container_folder(self):
+ container_dir = os.path.join(self.output_folder, self.get_current_container_dir_name())
+ if not os.path.exists(container_dir):
+ os.makedirs(container_dir)
+
+ def create_file_in_current_container(self, file_name):
+ file_to_be_created = os.path.join(self.output_folder, self.get_current_container_dir_name(), file_name)
+ file = open(file_to_be_created, "w+")
+ self.current_file = file
+
+ def write_to_current_file(self, line):
+ self.current_file.write(line)
+
+ def get_current_container_dir_name(self):
+ return os.path.join(self.current_host_name, self.current_container_name)
+
+def main(argv):
+ (opts, args) = getopt(argv, "")
+ input_file = args[0]
+ fp = open_file(input_file)
+ aggregated_log = AggregatedLog()
+ aggregated_log.process(fp)
+ print ("Split application logs was written into folder " + aggregated_log.output_folder)
+ fp.close()
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
diff --git a/tez-tools/swimlanes/yarn-swimlanes.sh b/tez-tools/tez-log-split/tez-log-splitter.sh
similarity index 68%
copy from tez-tools/swimlanes/yarn-swimlanes.sh
copy to tez-tools/tez-log-split/tez-log-splitter.sh
index df4d071..712e499 100644
--- a/tez-tools/swimlanes/yarn-swimlanes.sh
+++ b/tez-tools/tez-log-split/tez-log-splitter.sh
@@ -16,13 +16,20 @@
# limitations under the License.
-set -e
+#set -e
APPID=$1
-
-YARN=$(which yarn);
TMP=$(mktemp)
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+if [[ -f $APPID ]]; then
+ echo "Reading yarn logs from local file: $APPID"
+ TMP=$APPID
+else
+ YARN=$(which yarn);
+ echo "Fetching yarn logs for $APPID"
+ $YARN logs -applicationId "$APPID" > "$TMP"
+ echo "Application log was written into $TMP"
+fi
-echo "Fetching yarn logs for $APPID"
-$YARN logs -applicationId $APPID | grep HISTORY > $TMP
-python swimlane.py -o $APPID.svg $TMP
+python "$DIR/logsplit.py" "$TMP"
\ No newline at end of file