You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ae...@apache.org on 2016/02/01 19:40:35 UTC

[16/50] [abbrv] hadoop git commit: YARN-4612. Fix rumen and scheduler load simulator handle killed tasks properly. Contributed by Ming Ma.

YARN-4612. Fix rumen and scheduler load simulator handle killed tasks
properly. Contributed by Ming Ma.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/4efdf3a9
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/4efdf3a9
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/4efdf3a9

Branch: refs/heads/HDFS-7240
Commit: 4efdf3a979c361348612f817a3253be6d0de58f7
Parents: d323639
Author: Xuan <xg...@apache.org>
Authored: Tue Jan 26 18:17:12 2016 -0800
Committer: Xuan <xg...@apache.org>
Committed: Tue Jan 26 18:17:12 2016 -0800

----------------------------------------------------------------------
 .../apache/hadoop/tools/rumen/JobBuilder.java   |  11 +-
 .../src/main/data/2jobs2min-rumen-jh.json       | 606 +++++++++++++++++++
 .../org/apache/hadoop/yarn/sls/SLSRunner.java   |   6 +
 .../apache/hadoop/yarn/sls/utils/SLSUtils.java  |   6 +
 hadoop-yarn-project/CHANGES.txt                 |   3 +
 5 files changed, 628 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/4efdf3a9/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java
index c5ae2fc..890f388 100644
--- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java
+++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java
@@ -473,9 +473,12 @@ public class JobBuilder {
     task.setTaskStatus(getPre21Value(event.getTaskStatus()));
     TaskFailed t = (TaskFailed)(event.getDatum());
     task.putDiagnosticInfo(t.error.toString());
-    task.putFailedDueToAttemptId(t.failedDueToAttempt.toString());
+    // killed task wouldn't have failed attempt.
+    if (t.getFailedDueToAttempt() != null) {
+      task.putFailedDueToAttemptId(t.getFailedDueToAttempt().toString());
+    }
     org.apache.hadoop.mapreduce.jobhistory.JhCounters counters =
-        ((TaskFailed) event.getDatum()).counters;
+        ((TaskFailed) event.getDatum()).getCounters();
     task.incorporateCounters(
         counters == null ? EMPTY_COUNTERS : counters);
   }
@@ -500,7 +503,7 @@ public class JobBuilder {
 
     attempt.setFinishTime(event.getFinishTime());
     org.apache.hadoop.mapreduce.jobhistory.JhCounters counters =
-        ((TaskAttemptUnsuccessfulCompletion) event.getDatum()).counters;
+        ((TaskAttemptUnsuccessfulCompletion) event.getDatum()).getCounters();
     attempt.incorporateCounters(
         counters == null ? EMPTY_COUNTERS : counters);
     attempt.arraySetClockSplits(event.getClockSplits());
@@ -509,7 +512,7 @@ public class JobBuilder {
     attempt.arraySetPhysMemKbytes(event.getPhysMemKbytes());
     TaskAttemptUnsuccessfulCompletion t =
         (TaskAttemptUnsuccessfulCompletion) (event.getDatum());
-    attempt.putDiagnosticInfo(t.error.toString());
+    attempt.putDiagnosticInfo(t.getError().toString());
   }
 
   private void processTaskAttemptStartedEvent(TaskAttemptStartedEvent event) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4efdf3a9/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json b/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json
index 83629ed..9d90deb 100644
--- a/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json
+++ b/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json
@@ -10208,4 +10208,610 @@
   "clusterReduceMB" : -1,
   "jobMapMB" : 200,
   "jobReduceMB" : 200
+} {
+"priority" : "NORMAL",
+"jobID" : "job_1369942127770_1207",
+"user" : "jenkins",
+"jobName" : "TeraGen",
+"submitTime" : 1371223054499,
+"finishTime" : 1371223153874,
+"queue" : "sls_queue_1",
+"mapTasks" : [ {
+"startTime" : 1371223059053,
+"taskID" : "task_1369942127770_1207_m_000000",
+"taskType" : "MAP",
+"finishTime" : 1371223078206,
+"attempts" : [ ],
+"preferredLocations" : [ ],
+"taskStatus" : "KILLED",
+"inputBytes" : -1,
+"inputRecords" : -1,
+"outputBytes" : -1,
+"outputRecords" : -1
+} ],
+"reduceTasks" : [ ],
+"launchTime" : 1371223058937,
+"totalMaps" : 1,
+"totalReduces" : 0,
+"otherTasks" : [ ],
+"jobProperties" : {
+"mapreduce.job.ubertask.enable" : "false",
+"yarn.resourcemanager.max-completed-applications" : "10000",
+"yarn.resourcemanager.delayed.delegation-token.removal-interval-ms" : "30000",
+"mapreduce.client.submit.file.replication" : "2",
+"yarn.nodemanager.container-manager.thread-count" : "20",
+"mapred.queue.default.acl-administer-jobs" : "*",
+"dfs.image.transfer.bandwidthPerSec" : "0",
+"mapreduce.tasktracker.healthchecker.interval" : "60000",
+"mapreduce.jobtracker.staging.root.dir" : "/user",
+"yarn.resourcemanager.recovery.enabled" : "false",
+"yarn.resourcemanager.am.max-retries" : "1",
+"dfs.block.access.token.lifetime" : "600",
+"fs.AbstractFileSystem.file.impl" : "org.apache.hadoop.fs.local.LocalFs",
+"mapreduce.client.completion.pollinterval" : "5000",
+"mapreduce.job.ubertask.maxreduces" : "1",
+"mapreduce.reduce.shuffle.memory.limit.percent" : "0.25",
+"dfs.domain.socket.path" : "/var/run/hdfs-sockets/dn",
+"hadoop.ssl.keystores.factory.class" : "org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory",
+"hadoop.http.authentication.kerberos.keytab" : "${user.home}/hadoop.keytab",
+"yarn.nodemanager.keytab" : "/etc/krb5.keytab",
+"io.seqfile.sorter.recordlimit" : "1000000",
+"s3.blocksize" : "67108864",
+"mapreduce.task.io.sort.factor" : "10",
+"yarn.nodemanager.disk-health-checker.interval-ms" : "120000",
+"mapreduce.job.working.dir" : "hdfs://a2115.smile.com:8020/user/jenkins",
+"yarn.admin.acl" : "*",
+"mapreduce.job.speculative.speculativecap" : "0.1",
+"dfs.namenode.num.checkpoints.retained" : "2",
+"dfs.namenode.delegation.token.renew-interval" : "86400000",
+"yarn.nodemanager.resource.memory-mb" : "8192",
+"io.map.index.interval" : "128",
+"s3.client-write-packet-size" : "65536",
+"mapreduce.task.files.preserve.failedtasks" : "false",
+"dfs.namenode.http-address" : "a2115.smile.com:20101",
+"ha.zookeeper.session-timeout.ms" : "5000",
+"hadoop.hdfs.configuration.version" : "1",
+"s3.replication" : "3",
+"dfs.datanode.balance.bandwidthPerSec" : "1048576",
+"mapreduce.reduce.shuffle.connect.timeout" : "180000",
+"hadoop.ssl.enabled" : "false",
+"dfs.journalnode.rpc-address" : "0.0.0.0:8485",
+"yarn.nodemanager.aux-services" : "mapreduce.shuffle",
+"mapreduce.job.counters.max" : "120",
+"dfs.datanode.readahead.bytes" : "4193404",
+"ipc.client.connect.max.retries.on.timeouts" : "45",
+"mapreduce.job.complete.cancel.delegation.tokens" : "true",
+"dfs.client.failover.max.attempts" : "15",
+"dfs.namenode.checkpoint.dir" : "file://${hadoop.tmp.dir}/dfs/namesecondary",
+"dfs.namenode.replication.work.multiplier.per.iteration" : "2",
+"fs.trash.interval" : "1",
+"yarn.resourcemanager.admin.address" : "a2115.smile.com:8033",
+"ha.health-monitor.check-interval.ms" : "1000",
+"mapreduce.job.outputformat.class" : "org.apache.hadoop.examples.terasort.TeraOutputFormat",
+"hadoop.jetty.logs.serve.aliases" : "true",
+"hadoop.http.authentication.kerberos.principal" : "HTTP/_HOST@LOCALHOST",
+"mapreduce.job.reduce.shuffle.consumer.plugin.class" : "org.apache.hadoop.mapreduce.task.reduce.Shuffle",
+"s3native.blocksize" : "67108864",
+"dfs.namenode.edits.dir" : "${dfs.namenode.name.dir}",
+"ha.health-monitor.sleep-after-disconnect.ms" : "1000",
+"dfs.encrypt.data.transfer" : "false",
+"dfs.datanode.http.address" : "0.0.0.0:50075",
+"mapreduce.terasort.num-rows" : "400000000",
+"mapreduce.job.map.class" : "org.apache.hadoop.examples.terasort.TeraGen$SortGenMapper",
+"mapreduce.jobtracker.jobhistory.task.numberprogresssplits" : "12",
+"dfs.namenode.write.stale.datanode.ratio" : "0.5f",
+"dfs.client.use.datanode.hostname" : "false",
+"yarn.acl.enable" : "true",
+"hadoop.security.instrumentation.requires.admin" : "false",
+"yarn.nodemanager.localizer.fetch.thread-count" : "4",
+"hadoop.security.authorization" : "false",
+"user.name" : "jenkins",
+"dfs.namenode.fs-limits.min-block-size" : "1048576",
+"dfs.client.failover.connection.retries.on.timeouts" : "0",
+"hadoop.security.group.mapping.ldap.search.filter.group" : "(objectClass=group)",
+"mapreduce.output.fileoutputformat.compress.codec" : "org.apache.hadoop.io.compress.DefaultCodec",
+"dfs.namenode.safemode.extension" : "30000",
+"mapreduce.shuffle.port" : "8080",
+"mapreduce.reduce.log.level" : "INFO",
+"yarn.log-aggregation-enable" : "false",
+"dfs.datanode.sync.behind.writes" : "false",
+"mapreduce.jobtracker.instrumentation" : "org.apache.hadoop.mapred.JobTrackerMetricsInst",
+"dfs.https.server.keystore.resource" : "ssl-server.xml",
+"hadoop.security.group.mapping.ldap.search.attr.group.name" : "cn",
+"dfs.namenode.replication.min" : "1",
+"mapreduce.map.java.opts" : " -Xmx825955249",
+"yarn.scheduler.fair.allocation.file" : "/etc/yarn/fair-scheduler.xml",
+"s3native.bytes-per-checksum" : "512",
+"mapreduce.tasktracker.tasks.sleeptimebeforesigkill" : "5000",
+"tfile.fs.output.buffer.size" : "262144",
+"yarn.nodemanager.local-dirs" : "${hadoop.tmp.dir}/nm-local-dir",
+"mapreduce.jobtracker.persist.jobstatus.active" : "false",
+"fs.AbstractFileSystem.hdfs.impl" : "org.apache.hadoop.fs.Hdfs",
+"mapreduce.job.map.output.collector.class" : "org.apache.hadoop.mapred.MapTask$MapOutputBuffer",
+"mapreduce.tasktracker.local.dir.minspacestart" : "0",
+"dfs.namenode.safemode.min.datanodes" : "0",
+"hadoop.security.uid.cache.secs" : "14400",
+"dfs.client.https.need-auth" : "false",
+"dfs.client.write.exclude.nodes.cache.expiry.interval.millis" : "600000",
+"dfs.client.https.keystore.resource" : "ssl-client.xml",
+"dfs.namenode.max.objects" : "0",
+"hadoop.ssl.client.conf" : "ssl-client.xml",
+"dfs.namenode.safemode.threshold-pct" : "0.999f",
+"mapreduce.tasktracker.local.dir.minspacekill" : "0",
+"mapreduce.jobtracker.retiredjobs.cache.size" : "1000",
+"dfs.blocksize" : "134217728",
+"yarn.resourcemanager.scheduler.class" : "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler",
+"mapreduce.job.reduce.slowstart.completedmaps" : "0.8",
+"mapreduce.job.end-notification.retry.attempts" : "5",
+"mapreduce.job.inputformat.class" : "org.apache.hadoop.examples.terasort.TeraGen$RangeInputFormat",
+"mapreduce.map.memory.mb" : "1024",
+"mapreduce.job.user.name" : "jenkins",
+"mapreduce.tasktracker.outofband.heartbeat" : "false",
+"io.native.lib.available" : "true",
+"mapreduce.jobtracker.persist.jobstatus.hours" : "0",
+"dfs.client-write-packet-size" : "65536",
+"mapreduce.client.progressmonitor.pollinterval" : "1000",
+"dfs.namenode.name.dir" : "file://${hadoop.tmp.dir}/dfs/name",
+"dfs.ha.log-roll.period" : "120",
+"mapreduce.reduce.input.buffer.percent" : "0.0",
+"mapreduce.map.output.compress.codec" : "org.apache.hadoop.io.compress.SnappyCodec",
+"dfs.client.failover.sleep.base.millis" : "500",
+"dfs.datanode.directoryscan.threads" : "1",
+"mapreduce.jobtracker.address" : "neededForHive:999999",
+"mapreduce.cluster.local.dir" : "${hadoop.tmp.dir}/mapred/local",
+"yarn.scheduler.fair.user-as-default-queue" : "true",
+"mapreduce.job.application.attempt.id" : "1",
+"dfs.permissions.enabled" : "true",
+"mapreduce.tasktracker.taskcontroller" : "org.apache.hadoop.mapred.DefaultTaskController",
+"yarn.scheduler.fair.preemption" : "true",
+"mapreduce.reduce.shuffle.parallelcopies" : "5",
+"dfs.support.append" : "true",
+"yarn.nodemanager.env-whitelist" : "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,YARN_HOME",
+"mapreduce.jobtracker.heartbeats.in.second" : "100",
+"mapreduce.job.maxtaskfailures.per.tracker" : "3",
+"ipc.client.connection.maxidletime" : "10000",
+"mapreduce.shuffle.ssl.enabled" : "false",
+"dfs.namenode.invalidate.work.pct.per.iteration" : "0.32f",
+"dfs.blockreport.intervalMsec" : "21600000",
+"fs.s3.sleepTimeSeconds" : "10",
+"dfs.namenode.replication.considerLoad" : "true",
+"dfs.client.block.write.retries" : "3",
+"hadoop.ssl.server.conf" : "ssl-server.xml",
+"dfs.namenode.name.dir.restore" : "false",
+"rpc.engine.org.apache.hadoop.mapreduce.v2.api.MRClientProtocolPB" : "org.apache.hadoop.ipc.ProtobufRpcEngine",
+"dfs.datanode.hdfs-blocks-metadata.enabled" : "true",
+"ha.zookeeper.parent-znode" : "/hadoop-ha",
+"io.seqfile.lazydecompress" : "true",
+"mapreduce.reduce.merge.inmem.threshold" : "1000",
+"mapreduce.input.fileinputformat.split.minsize" : "0",
+"dfs.replication" : "3",
+"ipc.client.tcpnodelay" : "false",
+"dfs.namenode.accesstime.precision" : "3600000",
+"s3.stream-buffer-size" : "4096",
+"mapreduce.jobtracker.tasktracker.maxblacklists" : "4",
+"dfs.client.read.shortcircuit.skip.checksum" : "false",
+"mapreduce.job.jvm.numtasks" : "1",
+"mapreduce.task.io.sort.mb" : "100",
+"io.file.buffer.size" : "65536",
+"dfs.namenode.audit.loggers" : "default",
+"dfs.namenode.checkpoint.txns" : "1000000",
+"yarn.nodemanager.admin-env" : "MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX",
+"mapreduce.job.jar" : "/user/jenkins/.staging/job_1369942127770_1207/job.jar",
+"mapreduce.job.split.metainfo.maxsize" : "10000000",
+"kfs.replication" : "3",
+"rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB" : "org.apache.hadoop.ipc.ProtobufRpcEngine",
+"yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms" : "1000",
+"mapreduce.reduce.maxattempts" : "4",
+"kfs.stream-buffer-size" : "4096",
+"dfs.ha.tail-edits.period" : "60",
+"hadoop.security.authentication" : "simple",
+"fs.s3.buffer.dir" : "${hadoop.tmp.dir}/s3",
+"rpc.engine.org.apache.hadoop.yarn.api.AMRMProtocolPB" : "org.apache.hadoop.ipc.ProtobufRpcEngine",
+"mapreduce.jobtracker.taskscheduler" : "org.apache.hadoop.mapred.JobQueueTaskScheduler",
+"yarn.app.mapreduce.am.job.task.listener.thread-count" : "30",
+"dfs.namenode.avoid.read.stale.datanode" : "false",
+"mapreduce.job.reduces" : "0",
+"mapreduce.map.sort.spill.percent" : "0.8",
+"dfs.client.file-block-storage-locations.timeout" : "60",
+"dfs.datanode.drop.cache.behind.writes" : "false",
+"mapreduce.job.end-notification.retry.interval" : "1",
+"mapreduce.job.maps" : "96",
+"mapreduce.job.speculative.slownodethreshold" : "1.0",
+"tfile.fs.input.buffer.size" : "262144",
+"mapreduce.map.speculative" : "false",
+"dfs.block.access.token.enable" : "false",
+"dfs.journalnode.http-address" : "0.0.0.0:8480",
+"mapreduce.job.acl-view-job" : " ",
+"mapreduce.reduce.shuffle.retry-delay.max.ms" : "60000",
+"mapreduce.job.end-notification.max.retry.interval" : "5",
+"ftp.blocksize" : "67108864",
+"mapreduce.tasktracker.http.threads" : "80",
+"mapreduce.reduce.java.opts" : " -Xmx825955249",
+"dfs.datanode.data.dir" : "file://${hadoop.tmp.dir}/dfs/data",
+"ha.failover-controller.cli-check.rpc-timeout.ms" : "20000",
+"dfs.namenode.max.extra.edits.segments.retained" : "10000",
+"dfs.https.port" : "20102",
+"dfs.namenode.replication.interval" : "3",
+"mapreduce.task.skip.start.attempts" : "2",
+"dfs.namenode.https-address" : "a2115.smile.com:20102",
+"mapreduce.jobtracker.persist.jobstatus.dir" : "/jobtracker/jobsInfo",
+"ipc.client.kill.max" : "10",
+"dfs.ha.automatic-failover.enabled" : "false",
+"mapreduce.jobhistory.keytab" : "/etc/security/keytab/jhs.service.keytab",
+"dfs.image.transfer.timeout" : "600000",
+"dfs.client.failover.sleep.max.millis" : "15000",
+"mapreduce.job.end-notification.max.attempts" : "5",
+"mapreduce.task.tmp.dir" : "./tmp",
+"dfs.default.chunk.view.size" : "32768",
+"kfs.bytes-per-checksum" : "512",
+"mapreduce.reduce.memory.mb" : "1024",
+"hadoop.http.filter.initializers" : "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer",
+"dfs.datanode.failed.volumes.tolerated" : "0",
+"hadoop.http.authentication.type" : "simple",
+"dfs.datanode.data.dir.perm" : "700",
+"yarn.resourcemanager.client.thread-count" : "50",
+"ipc.server.listen.queue.size" : "128",
+"mapreduce.reduce.skip.maxgroups" : "0",
+"file.stream-buffer-size" : "4096",
+"dfs.namenode.fs-limits.max-directory-items" : "0",
+"io.mapfile.bloom.size" : "1048576",
+"yarn.nodemanager.container-executor.class" : "org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor",
+"mapreduce.map.maxattempts" : "4",
+"mapreduce.jobtracker.jobhistory.block.size" : "3145728",
+"yarn.log-aggregation.retain-seconds" : "-1",
+"yarn.app.mapreduce.am.job.committer.cancel-timeout" : "60000",
+"ftp.replication" : "3",
+"mapreduce.jobtracker.http.address" : "0.0.0.0:50030",
+"yarn.nodemanager.health-checker.script.timeout-ms" : "1200000",
+"mapreduce.jobhistory.address" : "a2115.smile.com:10020",
+"mapreduce.jobtracker.taskcache.levels" : "2",
+"dfs.datanode.dns.nameserver" : "default",
+"mapreduce.application.classpath" : "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*",
+"yarn.nodemanager.log.retain-seconds" : "10800",
+"mapred.child.java.opts" : "-Xmx200m",
+"dfs.replication.max" : "512",
+"map.sort.class" : "org.apache.hadoop.util.QuickSort",
+"dfs.stream-buffer-size" : "4096",
+"dfs.namenode.backup.address" : "0.0.0.0:50100",
+"hadoop.util.hash.type" : "murmur",
+"dfs.block.access.key.update.interval" : "600",
+"dfs.datanode.dns.interface" : "default",
+"dfs.datanode.use.datanode.hostname" : "false",
+"mapreduce.job.output.key.class" : "org.apache.hadoop.io.Text",
+"dfs.client.read.shortcircuit" : "false",
+"dfs.namenode.backup.http-address" : "0.0.0.0:50105",
+"yarn.nodemanager.container-monitor.interval-ms" : "3000",
+"yarn.nodemanager.disk-health-checker.min-healthy-disks" : "0.25",
+"kfs.client-write-packet-size" : "65536",
+"ha.zookeeper.acl" : "world:anyone:rwcda",
+"yarn.nodemanager.sleep-delay-before-sigkill.ms" : "250",
+"mapreduce.job.dir" : "/user/jenkins/.staging/job_1369942127770_1207",
+"io.map.index.skip" : "0",
+"net.topology.node.switch.mapping.impl" : "org.apache.hadoop.net.ScriptBasedMapping",
+"fs.s3.maxRetries" : "4",
+"ha.failover-controller.new-active.rpc-timeout.ms" : "60000",
+"s3native.client-write-packet-size" : "65536",
+"yarn.resourcemanager.amliveliness-monitor.interval-ms" : "1000",
+"hadoop.http.staticuser.user" : "dr.who",
+"mapreduce.reduce.speculative" : "false",
+"mapreduce.client.output.filter" : "FAILED",
+"mapreduce.ifile.readahead.bytes" : "4194304",
+"mapreduce.tasktracker.report.address" : "127.0.0.1:0",
+"mapreduce.task.userlog.limit.kb" : "0",
+"mapreduce.tasktracker.map.tasks.maximum" : "2",
+"hadoop.http.authentication.simple.anonymous.allowed" : "true",
+"hadoop.fuse.timer.period" : "5",
+"dfs.namenode.num.extra.edits.retained" : "1000000",
+"hadoop.rpc.socket.factory.class.default" : "org.apache.hadoop.net.StandardSocketFactory",
+"mapreduce.job.submithostname" : "a2115.smile.com",
+"dfs.namenode.handler.count" : "10",
+"fs.automatic.close" : "false",
+"mapreduce.job.submithostaddress" : "10.20.206.115",
+"mapreduce.tasktracker.healthchecker.script.timeout" : "600000",
+"dfs.datanode.directoryscan.interval" : "21600",
+"yarn.resourcemanager.address" : "a2115.smile.com:8032",
+"yarn.nodemanager.health-checker.interval-ms" : "600000",
+"dfs.client.file-block-storage-locations.num-threads" : "10",
+"yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs" : "86400",
+"mapreduce.reduce.markreset.buffer.percent" : "0.0",
+"hadoop.security.group.mapping.ldap.directory.search.timeout" : "10000",
+"mapreduce.map.log.level" : "INFO",
+"dfs.bytes-per-checksum" : "512",
+"yarn.nodemanager.localizer.address" : "0.0.0.0:8040",
+"dfs.namenode.checkpoint.max-retries" : "3",
+"ha.health-monitor.rpc-timeout.ms" : "45000",
+"yarn.resourcemanager.keytab" : "/etc/krb5.keytab",
+"ftp.stream-buffer-size" : "4096",
+"dfs.namenode.avoid.write.stale.datanode" : "false",
+"hadoop.security.group.mapping.ldap.search.attr.member" : "member",
+"mapreduce.output.fileoutputformat.outputdir" : "hdfs://a2115.smile.com:8020/user/jenkins/tera-gen-1",
+"dfs.blockreport.initialDelay" : "0",
+"yarn.nm.liveness-monitor.expiry-interval-ms" : "600000",
+"hadoop.http.authentication.token.validity" : "36000",
+"dfs.namenode.delegation.token.max-lifetime" : "604800000",
+"mapreduce.job.hdfs-servers" : "${fs.defaultFS}",
+"s3native.replication" : "3",
+"yarn.nodemanager.localizer.client.thread-count" : "5",
+"dfs.heartbeat.interval" : "3",
+"rpc.engine.org.apache.hadoop.ipc.ProtocolMetaInfoPB" : "org.apache.hadoop.ipc.ProtobufRpcEngine",
+"dfs.ha.fencing.ssh.connect-timeout" : "30000",
+"yarn.resourcemanager.container.liveness-monitor.interval-ms" : "600000",
+"yarn.am.liveness-monitor.expiry-interval-ms" : "600000",
+"mapreduce.task.profile" : "false",
+"mapreduce.tasktracker.http.address" : "0.0.0.0:50060",
+"mapreduce.tasktracker.instrumentation" : "org.apache.hadoop.mapred.TaskTrackerMetricsInst",
+"mapreduce.jobhistory.webapp.address" : "a2115.smile.com:19888",
+"ha.failover-controller.graceful-fence.rpc-timeout.ms" : "5000",
+"yarn.ipc.rpc.class" : "org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC",
+"mapreduce.job.name" : "TeraGen",
+"kfs.blocksize" : "67108864",
+"yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs" : "86400",
+"mapreduce.job.ubertask.maxmaps" : "9",
+"yarn.scheduler.maximum-allocation-mb" : "8192",
+"yarn.nodemanager.heartbeat.interval-ms" : "1000",
+"mapreduce.job.userlog.retain.hours" : "24",
+"dfs.namenode.secondary.http-address" : "0.0.0.0:50090",
+"mapreduce.task.timeout" : "600000",
+"mapreduce.framework.name" : "yarn",
+"ipc.client.idlethreshold" : "4000",
+"ftp.bytes-per-checksum" : "512",
+"ipc.server.tcpnodelay" : "false",
+"dfs.namenode.stale.datanode.interval" : "30000",
+"s3.bytes-per-checksum" : "512",
+"mapreduce.job.speculative.slowtaskthreshold" : "1.0",
+"yarn.nodemanager.localizer.cache.target-size-mb" : "10240",
+"yarn.nodemanager.remote-app-log-dir" : "/tmp/logs",
+"fs.s3.block.size" : "67108864",
+"mapreduce.job.queuename" : "sls_queue_1",
+"dfs.client.failover.connection.retries" : "0",
+"hadoop.rpc.protection" : "authentication",
+"yarn.scheduler.minimum-allocation-mb" : "1024",
+"yarn.app.mapreduce.client-am.ipc.max-retries" : "1",
+"hadoop.security.auth_to_local" : "DEFAULT",
+"dfs.secondary.namenode.kerberos.internal.spnego.principal" : "${dfs.web.authentication.kerberos.principal}",
+"ftp.client-write-packet-size" : "65536",
+"fs.defaultFS" : "hdfs://a2115.smile.com:8020",
+"yarn.nodemanager.address" : "0.0.0.0:0",
+"yarn.scheduler.fair.assignmultiple" : "true",
+"yarn.resourcemanager.scheduler.client.thread-count" : "50",
+"mapreduce.task.merge.progress.records" : "10000",
+"file.client-write-packet-size" : "65536",
+"yarn.nodemanager.delete.thread-count" : "4",
+"yarn.resourcemanager.scheduler.address" : "a2115.smile.com:8030",
+"fs.trash.checkpoint.interval" : "0",
+"hadoop.http.authentication.signature.secret.file" : "${user.home}/hadoop-http-auth-signature-secret",
+"s3native.stream-buffer-size" : "4096",
+"mapreduce.reduce.shuffle.read.timeout" : "180000",
+"mapreduce.admin.user.env" : "LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native",
+"yarn.app.mapreduce.am.command-opts" : " -Xmx1238932873",
+"dfs.namenode.checkpoint.edits.dir" : "${dfs.namenode.checkpoint.dir}",
+"fs.permissions.umask-mode" : "022",
+"dfs.client.domain.socket.data.traffic" : "false",
+"hadoop.common.configuration.version" : "0.23.0",
+"mapreduce.tasktracker.dns.interface" : "default",
+"mapreduce.output.fileoutputformat.compress.type" : "BLOCK",
+"mapreduce.ifile.readahead" : "true",
+"hadoop.security.group.mapping.ldap.ssl" : "false",
+"io.serializations" : "org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,org.apache.hadoop.io.serializer.avro.AvroReflectSerialization",
+"yarn.nodemanager.aux-services.mapreduce.shuffle.class" : "org.apache.hadoop.mapred.ShuffleHandler",
+"fs.df.interval" : "60000",
+"mapreduce.reduce.shuffle.input.buffer.percent" : "0.70",
+"io.seqfile.compress.blocksize" : "1000000",
+"hadoop.security.groups.cache.secs" : "300",
+"ipc.client.connect.max.retries" : "10",
+"dfs.namenode.delegation.key.update-interval" : "86400000",
+"yarn.nodemanager.process-kill-wait.ms" : "2000",
+"yarn.application.classpath" : "$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,$YARN_HOME/*,$YARN_HOME/lib/*",
+"yarn.app.mapreduce.client.max-retries" : "3",
+"dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction" : "0.75f",
+"yarn.nodemanager.log-aggregation.compression-type" : "none",
+"hadoop.security.group.mapping.ldap.search.filter.user" : "(&(objectClass=user)(sAMAccountName={0}))",
+"yarn.nodemanager.localizer.cache.cleanup.interval-ms" : "600000",
+"dfs.image.compress" : "false",
+"mapred.mapper.new-api" : "true",
+"yarn.nodemanager.log-dirs" : "${yarn.log.dir}/userlogs",
+"dfs.namenode.kerberos.internal.spnego.principal" : "${dfs.web.authentication.kerberos.principal}",
+"fs.s3n.block.size" : "67108864",
+"fs.ftp.host" : "0.0.0.0",
+"hadoop.security.group.mapping" : "org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback",
+"dfs.datanode.address" : "0.0.0.0:50010",
+"mapreduce.map.skip.maxrecords" : "0",
+"dfs.datanode.https.address" : "0.0.0.0:50475",
+"file.replication" : "1",
+"yarn.resourcemanager.resource-tracker.address" : "a2115.smile.com:8031",
+"dfs.datanode.drop.cache.behind.reads" : "false",
+"hadoop.fuse.connection.timeout" : "300",
+"hadoop.work.around.non.threadsafe.getpwuid" : "false",
+"mapreduce.jobtracker.restart.recover" : "false",
+"hadoop.tmp.dir" : "/tmp/hadoop-${user.name}",
+"mapreduce.output.fileoutputformat.compress" : "false",
+"mapreduce.tasktracker.indexcache.mb" : "10",
+"mapreduce.client.genericoptionsparser.used" : "true",
+"dfs.client.block.write.replace-datanode-on-failure.policy" : "DEFAULT",
+"mapreduce.job.committer.setup.cleanup.needed" : "true",
+"hadoop.kerberos.kinit.command" : "kinit",
+"dfs.datanode.du.reserved" : "0",
+"dfs.namenode.fs-limits.max-blocks-per-file" : "1048576",
+"file.bytes-per-checksum" : "512",
+"mapreduce.task.profile.reduces" : "0-2",
+"mapreduce.jobtracker.handler.count" : "10",
+"dfs.client.block.write.replace-datanode-on-failure.enable" : "true",
+"mapreduce.job.output.value.class" : "org.apache.hadoop.io.Text",
+"yarn.dispatcher.exit-on-error" : "true",
+"net.topology.script.number.args" : "100",
+"mapreduce.task.profile.maps" : "0-2",
+"dfs.namenode.decommission.interval" : "30",
+"dfs.image.compression.codec" : "org.apache.hadoop.io.compress.DefaultCodec",
+"yarn.resourcemanager.webapp.address" : "a2115.smile.com:8088",
+"mapreduce.jobtracker.system.dir" : "${hadoop.tmp.dir}/mapred/system",
+"hadoop.ssl.hostname.verifier" : "DEFAULT",
+"yarn.nodemanager.vmem-pmem-ratio" : "2.1",
+"dfs.namenode.support.allow.format" : "true",
+"mapreduce.jobhistory.principal" : "jhs/_HOST@REALM.TLD",
+"io.mapfile.bloom.error.rate" : "0.005",
+"mapreduce.shuffle.ssl.file.buffer.size" : "65536",
+"dfs.permissions.superusergroup" : "supergroup",
+"dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold" : "10737418240",
+"mapreduce.jobtracker.expire.trackers.interval" : "600000",
+"mapreduce.cluster.acls.enabled" : "false",
+"yarn.nodemanager.remote-app-log-dir-suffix" : "logs",
+"ha.failover-controller.graceful-fence.connection.retries" : "1",
+"ha.health-monitor.connect-retry-interval.ms" : "1000",
+"mapreduce.reduce.shuffle.merge.percent" : "0.66",
+"yarn.app.mapreduce.am.resource.mb" : "1536",
+"io.seqfile.local.dir" : "${hadoop.tmp.dir}/io/local",
+"dfs.namenode.checkpoint.check.period" : "60",
+"yarn.resourcemanager.nm.liveness-monitor.interval-ms" : "1000",
+"mapreduce.jobtracker.maxtasks.perjob" : "-1",
+"mapreduce.jobtracker.jobhistory.lru.cache.size" : "5",
+"file.blocksize" : "67108864",
+"tfile.io.chunk.size" : "1048576",
+"mapreduce.job.acl-modify-job" : " ",
+"yarn.nodemanager.webapp.address" : "0.0.0.0:8042",
+"mapreduce.tasktracker.reduce.tasks.maximum" : "2",
+"io.skip.checksum.errors" : "false",
+"mapreduce.cluster.temp.dir" : "${hadoop.tmp.dir}/mapred/temp",
+"yarn.app.mapreduce.am.staging-dir" : "/user",
+"dfs.namenode.edits.journal-plugin.qjournal" : "org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager",
+"dfs.datanode.handler.count" : "10",
+"fs.ftp.host.port" : "21",
+"dfs.namenode.decommission.nodes.per.interval" : "5",
+"yarn.resourcemanager.admin.client.thread-count" : "1",
+"dfs.namenode.fs-limits.max-component-length" : "0",
+"dfs.namenode.checkpoint.period" : "3600",
+"fs.AbstractFileSystem.viewfs.impl" : "org.apache.hadoop.fs.viewfs.ViewFs",
+"yarn.resourcemanager.resource-tracker.client.thread-count" : "50",
+"mapreduce.tasktracker.dns.nameserver" : "default",
+"mapreduce.map.output.compress" : "true",
+"dfs.datanode.ipc.address" : "0.0.0.0:50020",
+"hadoop.ssl.require.client.cert" : "false",
+"yarn.nodemanager.delete.debug-delay-sec" : "0",
+"dfs.datanode.max.transfer.threads" : "4096"
+},
+"computonsPerMapInputByte" : -1,
+"computonsPerMapOutputByte" : -1,
+"computonsPerReduceInputByte" : -1,
+"computonsPerReduceOutputByte" : -1,
+"heapMegabytes" : 200,
+"outcome" : "SUCCESS",
+"jobtype" : "JAVA",
+"directDependantJobs" : [ ],
+"successfulMapAttemptCDFs" : [ {
+"maximum" : 9223372036854775807,
+"minimum" : -9223372036854775808,
+"rankings" : [ ],
+"numberValues" : 0
+}, {
+"maximum" : 9223372036854775807,
+"minimum" : -9223372036854775808,
+"rankings" : [ ],
+"numberValues" : 0
+}, {
+"maximum" : 9223372036854775807,
+"minimum" : -9223372036854775808,
+"rankings" : [ ],
+"numberValues" : 0
+}, {
+"maximum" : 47021,
+"minimum" : 11143,
+"rankings" : [ {
+"datum" : 13354,
+"relativeRanking" : 0.05
+}, {
+"datum" : 14101,
+"relativeRanking" : 0.1
+}, {
+"datum" : 15609,
+"relativeRanking" : 0.15
+}, {
+"datum" : 15919,
+"relativeRanking" : 0.2
+}, {
+"datum" : 17003,
+"relativeRanking" : 0.25
+}, {
+"datum" : 17109,
+"relativeRanking" : 0.3
+}, {
+"datum" : 18342,
+"relativeRanking" : 0.35
+}, {
+"datum" : 18870,
+"relativeRanking" : 0.4
+}, {
+"datum" : 19127,
+"relativeRanking" : 0.45
+}, {
+"datum" : 19221,
+"relativeRanking" : 0.5
+}, {
+"datum" : 19481,
+"relativeRanking" : 0.55
+}, {
+"datum" : 19896,
+"relativeRanking" : 0.6
+}, {
+"datum" : 20585,
+"relativeRanking" : 0.65
+}, {
+"datum" : 20784,
+"relativeRanking" : 0.7
+}, {
+"datum" : 21452,
+"relativeRanking" : 0.75
+}, {
+"datum" : 21853,
+"relativeRanking" : 0.8
+}, {
+"datum" : 22436,
+"relativeRanking" : 0.85
+}, {
+"datum" : 32646,
+"relativeRanking" : 0.9
+}, {
+"datum" : 41553,
+"relativeRanking" : 0.95
+} ],
+"numberValues" : 96
+} ],
+"failedMapAttemptCDFs" : [ {
+"maximum" : 9223372036854775807,
+"minimum" : -9223372036854775808,
+"rankings" : [ ],
+"numberValues" : 0
+}, {
+"maximum" : 9223372036854775807,
+"minimum" : -9223372036854775808,
+"rankings" : [ ],
+"numberValues" : 0
+}, {
+"maximum" : 9223372036854775807,
+"minimum" : -9223372036854775808,
+"rankings" : [ ],
+"numberValues" : 0
+}, {
+"maximum" : 9223372036854775807,
+"minimum" : -9223372036854775808,
+"rankings" : [ ],
+"numberValues" : 0
+} ],
+"successfulReduceAttemptCDF" : {
+"maximum" : 9223372036854775807,
+"minimum" : -9223372036854775808,
+"rankings" : [ ],
+"numberValues" : 0
+},
+"failedReduceAttemptCDF" : {
+"maximum" : 9223372036854775807,
+"minimum" : -9223372036854775808,
+"rankings" : [ ],
+"numberValues" : 0
+},
+"mapperTriesToSucceed" : [ 1.0 ],
+"failedMapperFraction" : 0.0,
+"relativeTime" : 0,
+"clusterMapMB" : -1,
+"clusterReduceMB" : -1,
+"jobMapMB" : 200,
+"jobReduceMB" : 200
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4efdf3a9/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java
index b36edc9..c792335 100644
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java
@@ -389,6 +389,9 @@ public class SLSRunner {
                   new ArrayList<ContainerSimulator>();
           // map tasks
           for(LoggedTask mapTask : job.getMapTasks()) {
+            if (mapTask.getAttempts().size() == 0) {
+              continue;
+            }
             LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
                     .get(mapTask.getAttempts().size() - 1);
             String hostname = taskAttempt.getHostName().getValue();
@@ -400,6 +403,9 @@ public class SLSRunner {
 
           // reduce tasks
           for(LoggedTask reduceTask : job.getReduceTasks()) {
+            if (reduceTask.getAttempts().size() == 0) {
+              continue;
+            }
             LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
                     .get(reduceTask.getAttempts().size() - 1);
             String hostname = taskAttempt.getHostName().getValue();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4efdf3a9/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java
index d950aee..f1b4f07 100644
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java
@@ -72,11 +72,17 @@ public class SLSUtils {
       while ((job = reader.getNext()) != null) {
         for(LoggedTask mapTask : job.getMapTasks()) {
           // select the last attempt
+          if (mapTask.getAttempts().size() == 0) {
+            continue;
+          }
           LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
                   .get(mapTask.getAttempts().size() - 1);
           nodeSet.add(taskAttempt.getHostName().getValue());
         }
         for(LoggedTask reduceTask : job.getReduceTasks()) {
+          if (reduceTask.getAttempts().size() == 0) {
+            continue;
+          }
           LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
                   .get(reduceTask.getAttempts().size() - 1);
           nodeSet.add(taskAttempt.getHostName().getValue());

http://git-wip-us.apache.org/repos/asf/hadoop/blob/4efdf3a9/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index c2f16d5..435eb68 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -159,6 +159,9 @@ Release 2.9.0 - UNRELEASED
     YARN-4613. Fix test failure in TestClientRMService#testGetClusterNodes.
     (Takashi Ohnishi via rohithsharmaks)
 
+    YARN-4612. Fix rumen and scheduler load simulator handle killed tasks properly.
+    (Ming Ma via xgong)
+
 Release 2.8.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES