You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by ss...@apache.org on 2015/05/06 09:41:05 UTC
[03/50] [abbrv] tez git commit: TEZ-2390. tez-tools swimlane tool
fails to parse large jobs >8K containers (jeagles)
TEZ-2390. tez-tools swimlane tool fails to parse large jobs >8K containers (jeagles)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/a02a5ea9
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/a02a5ea9
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/a02a5ea9
Branch: refs/heads/TEZ-2003
Commit: a02a5ea9dd8af4e47114ef4145d8e6b75db6c119
Parents: 765afd2
Author: Jonathan Eagles <je...@gmail.com>
Authored: Thu Apr 30 16:16:10 2015 -0500
Committer: Jonathan Eagles <je...@gmail.com>
Committed: Thu Apr 30 16:16:10 2015 -0500
----------------------------------------------------------------------
CHANGES.txt | 1 +
tez-tools/swimlanes/amlogparser.py | 20 ++++++++++----------
tez-tools/swimlanes/swimlane.py | 6 ++----
3 files changed, 13 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/a02a5ea9/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index aa72320..5a5c21f 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -155,6 +155,7 @@ Release 0.6.1: Unreleased
INCOMPATIBLE CHANGES
ALL CHANGES:
+ TEZ-2390. tez-tools swimlane tool fails to parse large jobs >8K containers
TEZ-2256. Avoid use of BufferTooSmallException to signal end of buffer in UnorderedPartitionedKVWriter
TEZ-2380. Disable fall back to reading from timeline if timeline disabled.
TEZ-2226. Disable writing history to timeline if domain creation fails.
http://git-wip-us.apache.org/repos/asf/tez/blob/a02a5ea9/tez-tools/swimlanes/amlogparser.py
----------------------------------------------------------------------
diff --git a/tez-tools/swimlanes/amlogparser.py b/tez-tools/swimlanes/amlogparser.py
index 02f4892..8ab8e29 100644
--- a/tez-tools/swimlanes/amlogparser.py
+++ b/tez-tools/swimlanes/amlogparser.py
@@ -17,8 +17,7 @@
# under the License.
#
-import os,sys,re,math,os.path
-from collections import defaultdict
+import sys,re
from itertools import groupby
from bz2 import BZ2File
from gzip import GzipFile as GZFile
@@ -142,10 +141,15 @@ class Attempt(object):
def __init__(self, pair):
start = first(filter(lambda a: a.event == "TASK_ATTEMPT_STARTED", pair))
finish = first(filter(lambda a: a.event == "TASK_ATTEMPT_FINISHED", pair))
+ if start is None or finish is None:
+ print [start, finish];
self.raw = finish
- self.dag = finish.dag
self.kvs = csv_kv(start.args)
- self.kvs.update(csv_kv(finish.args))
+ if finish is not None:
+ self.dag = finish.dag
+ self.kvs.update(csv_kv(finish.args))
+ self.finish = (int)(self.kvs["finishTime"])
+ self.duration = (int)(self.kvs["timeTaken"])
self.name = self.kvs["taskAttemptId"]
self.task = self.name[:self.name.rfind("_")].replace("attempt","task")
(_, _, amid, dagid, vertexid, taskid, attemptid) = self.name.split("_")
@@ -153,8 +157,6 @@ class Attempt(object):
self.attemptnum = int(attemptid)
self.vertex = self.kvs["vertexName"]
self.start = (int)(self.kvs["startTime"])
- self.finish = (int)(self.kvs["finishTime"])
- self.duration = (int)(self.kvs["timeTaken"])
self.container = self.kvs["containerId"]
self.node = self.kvs["nodeId"]
def __repr__(self):
@@ -243,6 +245,7 @@ class AMLog(object):
def parse(self, l):
if(l.find("[HISTORY]") != -1):
m = self.MAIN_RE.match(l)
+ print(m);
ts = m.group("ts")
dag = m.group("dag")
event = m.group("event")
@@ -250,14 +253,11 @@ class AMLog(object):
return AMRawEvent(ts, dag, event, args)
def main(argv):
- f = argv[0]
tree = AMLog(argv[0]).structure()
# AM -> dag -> vertex -> task -> attempt
# AM -> container
- containers = set(tree.containers.keys())
- timeto = lambda a: (a - tree.zero)
for d in tree.dags:
- for a in d.attempts():
+ for a in d.attempts():
print [a.vertex, a.name, a.container, a.start, a.finish]
if __name__ == "__main__":
http://git-wip-us.apache.org/repos/asf/tez/blob/a02a5ea9/tez-tools/swimlanes/swimlane.py
----------------------------------------------------------------------
diff --git a/tez-tools/swimlanes/swimlane.py b/tez-tools/swimlanes/swimlane.py
index b739b1e..dc8dc6f 100644
--- a/tez-tools/swimlanes/swimlane.py
+++ b/tez-tools/swimlanes/swimlane.py
@@ -17,10 +17,9 @@
# under the License.
#
-import os,sys,re,math,os.path
+import sys,math,os.path
import StringIO
from amlogparser import AMLog
-import random
from getopt import getopt
class ColourManager(object):
@@ -133,7 +132,7 @@ def main(argv):
svg = SVGHelper(x+2*marginRight+256, y+2*marginTop)
a = marginTop
svg.text(x/2, 32, log.name, style="font-size: 32px; text-anchor: middle")
- containerMap = dict(zip(list(lanes), xrange(8192)))
+ containerMap = dict(zip(list(lanes), xrange(len(lanes))))
svg.text(marginRight - 16, marginTop - 32, "Container ID", "text-anchor:end; font-size: 16px;")
# draw a grid
for l in lanes:
@@ -193,7 +192,6 @@ def main(argv):
percentX = finishes[int(len(finishes)*fraction)]
svg.line(marginRight+xdomain(percentX), marginTop, marginRight+xdomain(percentX), y+marginTop, style="stroke: red")
svg.text(marginRight+xdomain(percentX), y+marginTop+12, "%d%% (%0.1fs)" % (int(fraction*100), (percentX - dag.start)/1000.0), style="font-size:12px; text-anchor: middle")
- prefix = lambda a: (a.find(".") == -1 and a) or (a[:a.find(".")])
out.write(svg.flush())
out.close()