You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by ss...@apache.org on 2015/05/06 09:41:05 UTC

[03/50] [abbrv] tez git commit: TEZ-2390. tez-tools swimlane tool fails to parse large jobs >8K containers (jeagles)

TEZ-2390. tez-tools swimlane tool fails to parse large jobs >8K containers (jeagles)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/a02a5ea9
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/a02a5ea9
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/a02a5ea9

Branch: refs/heads/TEZ-2003
Commit: a02a5ea9dd8af4e47114ef4145d8e6b75db6c119
Parents: 765afd2
Author: Jonathan Eagles <je...@gmail.com>
Authored: Thu Apr 30 16:16:10 2015 -0500
Committer: Jonathan Eagles <je...@gmail.com>
Committed: Thu Apr 30 16:16:10 2015 -0500

----------------------------------------------------------------------
 CHANGES.txt                        |  1 +
 tez-tools/swimlanes/amlogparser.py | 20 ++++++++++----------
 tez-tools/swimlanes/swimlane.py    |  6 ++----
 3 files changed, 13 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/a02a5ea9/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index aa72320..5a5c21f 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -155,6 +155,7 @@ Release 0.6.1: Unreleased
 INCOMPATIBLE CHANGES
 
 ALL CHANGES:
+  TEZ-2390. tez-tools swimlane tool fails to parse large jobs >8K containers
   TEZ-2256. Avoid use of BufferTooSmallException to signal end of buffer in UnorderedPartitionedKVWriter
   TEZ-2380. Disable fall back to reading from timeline if timeline disabled.
   TEZ-2226. Disable writing history to timeline if domain creation fails.

http://git-wip-us.apache.org/repos/asf/tez/blob/a02a5ea9/tez-tools/swimlanes/amlogparser.py
----------------------------------------------------------------------
diff --git a/tez-tools/swimlanes/amlogparser.py b/tez-tools/swimlanes/amlogparser.py
index 02f4892..8ab8e29 100644
--- a/tez-tools/swimlanes/amlogparser.py
+++ b/tez-tools/swimlanes/amlogparser.py
@@ -17,8 +17,7 @@
 # under the License.
 #
 
-import os,sys,re,math,os.path
-from collections import defaultdict
+import sys,re
 from itertools import groupby
 from bz2 import BZ2File
 from gzip import GzipFile as GZFile
@@ -142,10 +141,15 @@ class Attempt(object):
 	def __init__(self, pair):
 		start = first(filter(lambda a: a.event == "TASK_ATTEMPT_STARTED", pair))
 		finish = first(filter(lambda a: a.event == "TASK_ATTEMPT_FINISHED", pair))
+		if start is None or finish is None:
+			print [start, finish];
 		self.raw = finish
-		self.dag = finish.dag
 		self.kvs = csv_kv(start.args)
-		self.kvs.update(csv_kv(finish.args))
+		if finish is not None:
+			self.dag = finish.dag
+			self.kvs.update(csv_kv(finish.args))
+			self.finish = (int)(self.kvs["finishTime"])
+			self.duration = (int)(self.kvs["timeTaken"])
 		self.name = self.kvs["taskAttemptId"]
 		self.task = self.name[:self.name.rfind("_")].replace("attempt","task")
 		(_, _, amid, dagid, vertexid, taskid, attemptid) = self.name.split("_")
@@ -153,8 +157,6 @@ class Attempt(object):
 		self.attemptnum = int(attemptid)
 		self.vertex = self.kvs["vertexName"]
 		self.start = (int)(self.kvs["startTime"])
-		self.finish = (int)(self.kvs["finishTime"])
-		self.duration = (int)(self.kvs["timeTaken"])
 		self.container = self.kvs["containerId"]
 		self.node = self.kvs["nodeId"]
 	def __repr__(self):
@@ -243,6 +245,7 @@ class AMLog(object):
 	def parse(self, l):		
 		if(l.find("[HISTORY]") != -1):
 			m = self.MAIN_RE.match(l)
+			print(m);
 			ts = m.group("ts")
 			dag = m.group("dag")
 			event = m.group("event")
@@ -250,14 +253,11 @@ class AMLog(object):
 			return AMRawEvent(ts, dag, event, args)
 
 def main(argv):
-	f = argv[0]
 	tree = AMLog(argv[0]).structure()
 	# AM -> dag -> vertex -> task -> attempt
 	# AM -> container
-	containers = set(tree.containers.keys())
-	timeto = lambda a: (a - tree.zero)
 	for d in tree.dags:
-		for a in d.attempts():			
+		for a in d.attempts():
 			print [a.vertex, a.name, a.container, a.start, a.finish]
 
 if __name__ == "__main__":

http://git-wip-us.apache.org/repos/asf/tez/blob/a02a5ea9/tez-tools/swimlanes/swimlane.py
----------------------------------------------------------------------
diff --git a/tez-tools/swimlanes/swimlane.py b/tez-tools/swimlanes/swimlane.py
index b739b1e..dc8dc6f 100644
--- a/tez-tools/swimlanes/swimlane.py
+++ b/tez-tools/swimlanes/swimlane.py
@@ -17,10 +17,9 @@
 # under the License.
 #
 
-import os,sys,re,math,os.path
+import sys,math,os.path
 import StringIO
 from amlogparser import AMLog
-import random
 from getopt import getopt
 
 class ColourManager(object):
@@ -133,7 +132,7 @@ def main(argv):
 	svg = SVGHelper(x+2*marginRight+256, y+2*marginTop)
 	a = marginTop
 	svg.text(x/2, 32, log.name, style="font-size: 32px; text-anchor: middle")	
-	containerMap = dict(zip(list(lanes), xrange(8192)))
+	containerMap = dict(zip(list(lanes), xrange(len(lanes))))
 	svg.text(marginRight - 16, marginTop - 32, "Container ID", "text-anchor:end; font-size: 16px;")
 	# draw a grid
 	for l in lanes:
@@ -193,7 +192,6 @@ def main(argv):
 			percentX = finishes[int(len(finishes)*fraction)]
 			svg.line(marginRight+xdomain(percentX), marginTop, marginRight+xdomain(percentX), y+marginTop, style="stroke: red")
 			svg.text(marginRight+xdomain(percentX), y+marginTop+12, "%d%% (%0.1fs)" % (int(fraction*100), (percentX - dag.start)/1000.0), style="font-size:12px; text-anchor: middle")
-	prefix = lambda a: (a.find(".") == -1 and a) or (a[:a.find(".")])
 	out.write(svg.flush())
 	out.close()