You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by aa...@apache.org on 2021/02/22 14:16:31 UTC

[hadoop] branch branch-3.3 updated: MAPREDUCE-7323. Remove job_history_summary.py. (#2712)

This is an automated email from the ASF dual-hosted git repository.

aajisaka pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 3339ff5  MAPREDUCE-7323. Remove job_history_summary.py. (#2712)
3339ff5 is described below

commit 3339ff596699f17058becc100e2b82acb3d26c36
Author: Akira Ajisaka <aa...@apache.org>
AuthorDate: Mon Feb 22 23:15:04 2021 +0900

    MAPREDUCE-7323. Remove job_history_summary.py. (#2712)
    
    Reviewed-by: Takanobu Asanuma <ta...@apache.org>
    (cherry picked from commit 8bca105655d91d949f8aa8daae656f34ed950e78)
---
 .../examples/terasort/job_history_summary.py       | 100 ---------------------
 1 file changed, 100 deletions(-)

diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/job_history_summary.py b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/job_history_summary.py
deleted file mode 100644
index 70725f8..0000000
--- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/job_history_summary.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-import sys
-
-pat = re.compile('(?P<name>[^=]+)="(?P<value>[^"]*)" *')
-counterPat = re.compile('(?P<name>[^:]+):(?P<value>[^,]*),?')
-
-def parse(tail):
-  result = {}
-  for n,v in re.findall(pat, tail):
-    result[n] = v
-  return result
-
-mapStartTime = {}
-mapEndTime = {}
-reduceStartTime = {}
-reduceShuffleTime = {}
-reduceSortTime = {}
-reduceEndTime = {}
-reduceBytes = {}
-
-for line in sys.stdin:
-  words = line.split(" ",1)
-  event = words[0]
-  attrs = parse(words[1])
-  if event == 'MapAttempt':
-    if attrs.has_key("START_TIME"):
-      mapStartTime[attrs["TASKID"]] = int(attrs["START_TIME"])/1000
-    elif attrs.has_key("FINISH_TIME"):
-      mapEndTime[attrs["TASKID"]] = int(attrs["FINISH_TIME"])/1000
-  elif event == 'ReduceAttempt':
-    if attrs.has_key("START_TIME"):
-      reduceStartTime[attrs["TASKID"]] = int(attrs["START_TIME"]) / 1000
-    elif attrs.has_key("FINISH_TIME"):
-      reduceShuffleTime[attrs["TASKID"]] = int(attrs["SHUFFLE_FINISHED"])/1000
-      reduceSortTime[attrs["TASKID"]] = int(attrs["SORT_FINISHED"])/1000
-      reduceEndTime[attrs["TASKID"]] = int(attrs["FINISH_TIME"])/1000
-  elif event == 'Task':
-    if attrs["TASK_TYPE"] == "REDUCE" and attrs.has_key("COUNTERS"):
-      for n,v in re.findall(counterPat, attrs["COUNTERS"]):
-        if n == "File Systems.HDFS bytes written":
-          reduceBytes[attrs["TASKID"]] = int(v)
-
-runningMaps = {}
-shufflingReduces = {}
-sortingReduces = {}
-runningReduces = {}
-startTime = min(reduce(min, mapStartTime.values()),
-                reduce(min, reduceStartTime.values()))
-endTime = max(reduce(max, mapEndTime.values()),
-              reduce(max, reduceEndTime.values()))
-
-reduces = reduceBytes.keys()
-reduces.sort()
-
-print "Name reduce-output-bytes shuffle-finish reduce-finish"
-for r in reduces:
-  print r, reduceBytes[r], reduceShuffleTime[r] - startTime,
-  print reduceEndTime[r] - startTime
-
-print
-
-for t in range(startTime, endTime):
-  runningMaps[t] = 0
-  shufflingReduces[t] = 0
-  sortingReduces[t] = 0
-  runningReduces[t] = 0
-
-for map in mapStartTime.keys():
-  for t in range(mapStartTime[map], mapEndTime[map]):
-    runningMaps[t] += 1
-for reduce in reduceStartTime.keys():
-  for t in range(reduceStartTime[reduce], reduceShuffleTime[reduce]):
-    shufflingReduces[t] += 1
-  for t in range(reduceShuffleTime[reduce], reduceSortTime[reduce]):
-    sortingReduces[t] += 1
-  for t in range(reduceSortTime[reduce], reduceEndTime[reduce]):
-    runningReduces[t] += 1
-
-print "time maps shuffle merge reduce"
-for t in range(startTime, endTime):
-  print t - startTime, runningMaps[t], shufflingReduces[t], sortingReduces[t], 
-  print runningReduces[t]


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org