You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-issues@hadoop.apache.org by "Jonathan Eagles (JIRA)" <ji...@apache.org> on 2019/08/16 17:23:00 UTC

[jira] [Created] (MAPREDUCE-7232) Reduce excessive getFileLinkInfo calls in MR

Jonathan Eagles created MAPREDUCE-7232:
------------------------------------------

             Summary: Reduce excessive getFileLinkInfo calls in MR
                 Key: MAPREDUCE-7232
                 URL: https://issues.apache.org/jira/browse/MAPREDUCE-7232
             Project: Hadoop Map/Reduce
          Issue Type: Improvement
            Reporter: Jonathan Eagles
            Assignee: Jonathan Eagles


The MR Client redundantly calls getFileLinkInfo for the same file sometimes over 10 times. The files under question originate from mapreduce.job.cache.files and mapreduce.job.classpath.files and their archive equivalences.

Client Observation
Case 1 & 2
{code}
19/08/15 10:57:55 INFO hdfs.DFSClient: getFileLinkInfo:/shared/tez-exploded/tez-0.9.2-SNAPSHOT-minimal/tez-runtime-internals-0.9.2-SNAPSHOT.jar
java.lang.Exception
	at org.apache.hadoop.hdfs.DFSClient.getFileLinkInfo(DFSClient.java:1683)
	at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1486)
	at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1483)
	at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
	at org.apache.hadoop.hdfs.DistributedFileSystem.getFileLinkStatus(DistributedFileSystem.java:1498)
	at org.apache.hadoop.fs.FileSystem.resolvePath(FileSystem.java:772)
	at org.apache.hadoop.mapreduce.v2.util.MRApps.addToClasspathIfNotJar(MRApps.java:341)
	at org.apache.hadoop.mapreduce.v2.util.MRApps.addClasspathToEnv(MRApps.java:301)
	at org.apache.hadoop.mapreduce.v2.util.MRApps.setClasspath(MRApps.java:262)
	at org.apache.hadoop.mapred.YARNRunner.createApplicationSubmissionContext(YARNRunner.java:467)
	at org.apache.hadoop.mapred.YARNRunner.submitJob(YARNRunner.java:296)
	at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:244)
	at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1341)
	at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1338)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1953)
	at org.apache.hadoop.mapreduce.Job.submit(Job.java:1338)
	at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1359)
	at org.apache.hadoop.mapreduce.SleepJob.run(SleepJob.java:273)
	at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
	at org.apache.hadoop.mapreduce.SleepJob.main(SleepJob.java:194)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:71)
	at org.apache.hadoop.util.ProgramDriver.run(ProgramDriver.java:144)
	at org.apache.hadoop.test.MapredTestDriver.run(MapredTestDriver.java:136)
	at org.apache.hadoop.test.MapredTestDriver.main(MapredTestDriver.java:144)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.util.RunJar.run(RunJar.java:239)
	at org.apache.hadoop.util.RunJar.main(RunJar.java:153)
{code}

{code}
19/08/15 10:57:55 INFO hdfs.DFSClient: getFileLinkInfo:/shared/tez-exploded/tez-0.9.2-SNAPSHOT-minimal/tez-runtime-internals-0.9.2-SNAPSHOT.jar
java.lang.Exception
	at org.apache.hadoop.hdfs.DFSClient.getFileLinkInfo(DFSClient.java:1683)
	at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1486)
	at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1483)
	at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
	at org.apache.hadoop.hdfs.DistributedFileSystem.getFileLinkStatus(DistributedFileSystem.java:1498)
	at org.apache.hadoop.fs.FileSystem.resolvePath(FileSystem.java:772)
	at org.apache.hadoop.mapreduce.v2.util.MRApps.addToClasspathIfNotJar(MRApps.java:341)
	at org.apache.hadoop.mapreduce.v2.util.MRApps.addClasspathToEnv(MRApps.java:301)
	at org.apache.hadoop.mapreduce.v2.util.MRApps.setClasspath(MRApps.java:263)
	at org.apache.hadoop.mapred.YARNRunner.createApplicationSubmissionContext(YARNRunner.java:467)
	at org.apache.hadoop.mapred.YARNRunner.submitJob(YARNRunner.java:296)
	at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:244)
	at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1341)
	at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1338)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1953)
	at org.apache.hadoop.mapreduce.Job.submit(Job.java:1338)
	at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1359)
	at org.apache.hadoop.mapreduce.SleepJob.run(SleepJob.java:273)
	at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
	at org.apache.hadoop.mapreduce.SleepJob.main(SleepJob.java:194)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:71)
	at org.apache.hadoop.util.ProgramDriver.run(ProgramDriver.java:144)
	at org.apache.hadoop.test.MapredTestDriver.run(MapredTestDriver.java:136)
	at org.apache.hadoop.test.MapredTestDriver.main(MapredTestDriver.java:144)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.util.RunJar.run(RunJar.java:239)
	at org.apache.hadoop.util.RunJar.main(RunJar.java:153)
{code}

AM Observation
Case 1 & 2
{code}
2019-08-15 10:58:00,987 INFO [AsyncDispatcher event handler] org.apache.hadoop.hdfs.DFSClient: getFileLinkInfo:/shared/tez-exploded/tez-0.9.2-SNAPSHOT-minimal/tez-runtime-internals-0.9.2-SNAPSHOT.jar
java.lang.Exception
        at org.apache.hadoop.hdfs.DFSClient.getFileLinkInfo(DFSClient.java:1683)
        at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1486)
        at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1483)
        at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getFileLinkStatus(DistributedFileSystem.java:1498)
        at org.apache.hadoop.fs.FileSystem.resolvePath(FileSystem.java:772)
        at org.apache.hadoop.mapreduce.v2.util.MRApps.addToClasspathIfNotJar(MRApps.java:341)
        at org.apache.hadoop.mapreduce.v2.util.MRApps.addClasspathToEnv(MRApps.java:301)
        at org.apache.hadoop.mapreduce.v2.util.MRApps.setClasspath(MRApps.java:262)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.getInitialClasspath(TaskAttemptImpl.java:744)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.createCommonContainerLaunchContext(TaskAttemptImpl.java:883)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.createContainerLaunchContext(TaskAttemptImpl.java:944)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl$ContainerAssignedTransition.transition(TaskAttemptImpl.java:1711)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl$ContainerAssignedTransition.transition(TaskAttemptImpl.java:1688)
        at org.apache.hadoop.yarn.state.StateMachineFactory$SingleInternalArc.doTransition(StateMachineFactory.java:362)
        at org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302)
        at org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46)
        at org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:1207)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:147)
        at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$TaskAttemptEventDispatcher.handle(MRAppMaster.java:1450)
        at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$TaskAttemptEventDispatcher.handle(MRAppMaster.java:1442)
        at org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:184)
        at org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:110)
        at java.lang.Thread.run(Thread.java:748)
{code}

{code}
2019-08-15 10:58:00,990 INFO [AsyncDispatcher event handler] org.apache.hadoop.hdfs.DFSClient: getFileLinkInfo:/shared/tez-exploded/tez-0.9.2-SNAPSHOT-minimal/tez-runtime-internals-0.9.2-SNAPSHOT.jar
java.lang.Exception
        at org.apache.hadoop.hdfs.DFSClient.getFileLinkInfo(DFSClient.java:1683)
        at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1486)
        at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1483)
        at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getFileLinkStatus(DistributedFileSystem.java:1498)
        at org.apache.hadoop.fs.FileSystem.resolvePath(FileSystem.java:772)
        at org.apache.hadoop.mapreduce.v2.util.MRApps.addToClasspathIfNotJar(MRApps.java:341)
        at org.apache.hadoop.mapreduce.v2.util.MRApps.addClasspathToEnv(MRApps.java:301)
        at org.apache.hadoop.mapreduce.v2.util.MRApps.setClasspath(MRApps.java:263)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.getInitialClasspath(TaskAttemptImpl.java:744)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.createCommonContainerLaunchContext(TaskAttemptImpl.java:883)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.createContainerLaunchContext(TaskAttemptImpl.java:944)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl$ContainerAssignedTransition.transition(TaskAttemptImpl.java:1711)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl$ContainerAssignedTransition.transition(TaskAttemptImpl.java:1688)
        at org.apache.hadoop.yarn.state.StateMachineFactory$SingleInternalArc.doTransition(StateMachineFactory.java:362)
        at org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302)
        at org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46)
        at org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:1207)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:147)
        at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$TaskAttemptEventDispatcher.handle(MRAppMaster.java:1450)
        at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$TaskAttemptEventDispatcher.handle(MRAppMaster.java:1442)
        at org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:184)
        at org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:110)
        at java.lang.Thread.run(Thread.java:748)
{code}

Client mapreduce.job.cache.files
{code}
19/08/15 15:14:11 INFO hdfs.DFSClient: getFileLinkInfo:/shared/tez-exploded/tez-0.9.2-SNAPSHOT-minimal/tez-runtime-internals-0.9.2-SNAPSHOT.jar
java.lang.Exception
	at org.apache.hadoop.hdfs.DFSClient.getFileLinkInfo(DFSClient.java:1683)
	at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1486)
	at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1483)
	at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
	at org.apache.hadoop.hdfs.DistributedFileSystem.getFileLinkStatus(DistributedFileSystem.java:1498)
	at org.apache.hadoop.fs.FileSystem.resolvePath(FileSystem.java:772)
	at org.apache.hadoop.mapreduce.v2.util.MRApps.parseDistributedCacheArtifacts(MRApps.java:601)
	at org.apache.hadoop.mapreduce.v2.util.MRApps.setupDistributedCache(MRApps.java:491)
	at org.apache.hadoop.mapred.YARNRunner.createApplicationSubmissionContext(YARNRunner.java:487)
	at org.apache.hadoop.mapred.YARNRunner.submitJob(YARNRunner.java:296)
	at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:244)
	at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1341)
	at org.apache.hadoop.mapreduce.Job$11.run(Job.java:1338)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1953)
	at org.apache.hadoop.mapreduce.Job.submit(Job.java:1338)
	at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1359)
	at org.apache.hadoop.mapreduce.SleepJob.run(SleepJob.java:273)
	at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76)
	at org.apache.hadoop.mapreduce.SleepJob.main(SleepJob.java:194)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:71)
	at org.apache.hadoop.util.ProgramDriver.run(ProgramDriver.java:144)
	at org.apache.hadoop.test.MapredTestDriver.run(MapredTestDriver.java:136)
	at org.apache.hadoop.test.MapredTestDriver.main(MapredTestDriver.java:144)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.util.RunJar.run(RunJar.java:239)
	at org.apache.hadoop.util.RunJar.main(RunJar.java:153)
{code}

AM mapreduce.job.cache.files
{code}
2019-08-15 15:14:15,872 INFO [AsyncDispatcher event handler] org.apache.hadoop.hdfs.DFSClient: getFileLinkInfo:/shared/tez-exploded/tez-0.9.2-SNAPSHOT-minimal/tez-runtime-internals-0.9.2-SNAPSHOT.jar
java.lang.Exception
        at org.apache.hadoop.hdfs.DFSClient.getFileLinkInfo(DFSClient.java:1683)
        at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1486)
        at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1483)
        at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
        at org.apache.hadoop.hdfs.DistributedFileSystem.getFileLinkStatus(DistributedFileSystem.java:1498)
        at org.apache.hadoop.fs.FileSystem.resolvePath(FileSystem.java:772)
        at org.apache.hadoop.mapreduce.v2.util.MRApps.parseDistributedCacheArtifacts(MRApps.java:601)
        at org.apache.hadoop.mapreduce.v2.util.MRApps.setupDistributedCache(MRApps.java:491)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.createCommonContainerLaunchContext(TaskAttemptImpl.java:818)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.createContainerLaunchContext(TaskAttemptImpl.java:944)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl$ContainerAssignedTransition.transition(TaskAttemptImpl.java:1711)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl$ContainerAssignedTransition.transition(TaskAttemptImpl.java:1688)
        at org.apache.hadoop.yarn.state.StateMachineFactory$SingleInternalArc.doTransition(StateMachineFactory.java:362)
        at org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302)
        at org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46)
        at org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:1207)
        at org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl.handle(TaskAttemptImpl.java:147)
        at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$TaskAttemptEventDispatcher.handle(MRAppMaster.java:1450)
        at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$TaskAttemptEventDispatcher.handle(MRAppMaster.java:1442)
        at org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:184)
        at org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:110)
        at java.lang.Thread.run(Thread.java:748)
{code}



--
This message was sent by Atlassian JIRA
(v7.6.14#76016)

---------------------------------------------------------------------
To unsubscribe, e-mail: mapreduce-issues-unsubscribe@hadoop.apache.org
For additional commands, e-mail: mapreduce-issues-help@hadoop.apache.org