You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by aa...@apache.org on 2021/02/22 14:16:31 UTC
[hadoop] branch branch-3.3 updated: MAPREDUCE-7323. Remove
job_history_summary.py. (#2712)
This is an automated email from the ASF dual-hosted git repository.
aajisaka pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new 3339ff5 MAPREDUCE-7323. Remove job_history_summary.py. (#2712)
3339ff5 is described below
commit 3339ff596699f17058becc100e2b82acb3d26c36
Author: Akira Ajisaka <aa...@apache.org>
AuthorDate: Mon Feb 22 23:15:04 2021 +0900
MAPREDUCE-7323. Remove job_history_summary.py. (#2712)
Reviewed-by: Takanobu Asanuma <ta...@apache.org>
(cherry picked from commit 8bca105655d91d949f8aa8daae656f34ed950e78)
---
.../examples/terasort/job_history_summary.py | 100 ---------------------
1 file changed, 100 deletions(-)
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/job_history_summary.py b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/job_history_summary.py
deleted file mode 100644
index 70725f8..0000000
--- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/job_history_summary.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-import sys
-
-pat = re.compile('(?P<name>[^=]+)="(?P<value>[^"]*)" *')
-counterPat = re.compile('(?P<name>[^:]+):(?P<value>[^,]*),?')
-
-def parse(tail):
- result = {}
- for n,v in re.findall(pat, tail):
- result[n] = v
- return result
-
-mapStartTime = {}
-mapEndTime = {}
-reduceStartTime = {}
-reduceShuffleTime = {}
-reduceSortTime = {}
-reduceEndTime = {}
-reduceBytes = {}
-
-for line in sys.stdin:
- words = line.split(" ",1)
- event = words[0]
- attrs = parse(words[1])
- if event == 'MapAttempt':
- if attrs.has_key("START_TIME"):
- mapStartTime[attrs["TASKID"]] = int(attrs["START_TIME"])/1000
- elif attrs.has_key("FINISH_TIME"):
- mapEndTime[attrs["TASKID"]] = int(attrs["FINISH_TIME"])/1000
- elif event == 'ReduceAttempt':
- if attrs.has_key("START_TIME"):
- reduceStartTime[attrs["TASKID"]] = int(attrs["START_TIME"]) / 1000
- elif attrs.has_key("FINISH_TIME"):
- reduceShuffleTime[attrs["TASKID"]] = int(attrs["SHUFFLE_FINISHED"])/1000
- reduceSortTime[attrs["TASKID"]] = int(attrs["SORT_FINISHED"])/1000
- reduceEndTime[attrs["TASKID"]] = int(attrs["FINISH_TIME"])/1000
- elif event == 'Task':
- if attrs["TASK_TYPE"] == "REDUCE" and attrs.has_key("COUNTERS"):
- for n,v in re.findall(counterPat, attrs["COUNTERS"]):
- if n == "File Systems.HDFS bytes written":
- reduceBytes[attrs["TASKID"]] = int(v)
-
-runningMaps = {}
-shufflingReduces = {}
-sortingReduces = {}
-runningReduces = {}
-startTime = min(reduce(min, mapStartTime.values()),
- reduce(min, reduceStartTime.values()))
-endTime = max(reduce(max, mapEndTime.values()),
- reduce(max, reduceEndTime.values()))
-
-reduces = reduceBytes.keys()
-reduces.sort()
-
-print "Name reduce-output-bytes shuffle-finish reduce-finish"
-for r in reduces:
- print r, reduceBytes[r], reduceShuffleTime[r] - startTime,
- print reduceEndTime[r] - startTime
-
-print
-
-for t in range(startTime, endTime):
- runningMaps[t] = 0
- shufflingReduces[t] = 0
- sortingReduces[t] = 0
- runningReduces[t] = 0
-
-for map in mapStartTime.keys():
- for t in range(mapStartTime[map], mapEndTime[map]):
- runningMaps[t] += 1
-for reduce in reduceStartTime.keys():
- for t in range(reduceStartTime[reduce], reduceShuffleTime[reduce]):
- shufflingReduces[t] += 1
- for t in range(reduceShuffleTime[reduce], reduceSortTime[reduce]):
- sortingReduces[t] += 1
- for t in range(reduceSortTime[reduce], reduceEndTime[reduce]):
- runningReduces[t] += 1
-
-print "time maps shuffle merge reduce"
-for t in range(startTime, endTime):
- print t - startTime, runningMaps[t], shufflingReduces[t], sortingReduces[t],
- print runningReduces[t]
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org