You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@tez.apache.org by "Rajesh Balamohan (JIRA)" <ji...@apache.org> on 2017/07/12 03:48:00 UTC

[jira] [Created] (TEZ-3793) Consider reducing the number of times "DiskChecker.doDiskIo" needs to be invoked for writing to local folders

Rajesh Balamohan created TEZ-3793:
-------------------------------------

             Summary: Consider reducing the number of times "DiskChecker.doDiskIo" needs to be invoked for writing to local folders
                 Key: TEZ-3793
                 URL: https://issues.apache.org/jira/browse/TEZ-3793
             Project: Apache Tez
          Issue Type: Improvement
            Reporter: Rajesh Balamohan


For large jobs, {{LocalDirAllocator}} comes up as bottleneck fairly often as it tries to do minimal write operation. It would be good to consider reducing the number of times directories are checked for errors/issues.

Some sample stack traces are given below

{noformat}

	at java.io.FileDescriptor.sync(Native Method)
	at org.apache.hadoop.util.DiskChecker.diskIoCheckWithoutNativeIo(DiskChecker.java:249)
	at org.apache.hadoop.util.DiskChecker.doDiskIo(DiskChecker.java:220)
	at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:82)
	at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createPath(LocalDirAllocator.java:350)
	at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:424)
	at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:151)
	at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:132)
	at org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles.getOutputFileForWrite(TezTaskOutputFiles.java:91)
	at org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.getSpillPathDetails(UnorderedPartitionedKVWriter.java:721)
	at org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.mergeAll(UnorderedPartitionedKVWriter.java:748)
	at org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.close(UnorderedPartitionedKVWriter.java:545)
	at org.apache.tez.runtime.library.output.UnorderedPartitionedKVOutput.close(UnorderedPartitionedKVOutput.java:105)
	- locked <0x00007f4823d5dc88> (a org.apache.tez.runtime.library.output.UnorderedPartitionedKVOutput)
	at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.close(LogicalIOProcessorRuntimeTask.java:393)
	at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:83)
	at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1866)
	at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
	at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
	at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
	at org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool$WrappedCallable.call(StatsRecordingThreadPool.java:110)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:748)


	
	at java.io.FileDescriptor.sync(Native Method)
	at org.apache.hadoop.util.DiskChecker.diskIoCheckWithoutNativeIo(DiskChecker.java:249)
	at org.apache.hadoop.util.DiskChecker.doDiskIo(DiskChecker.java:220)
	at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:82)
	at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createPath(LocalDirAllocator.java:350)
	at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:424)
	at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:151)
	at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:132)
	at org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles.getInputFileForWrite(TezTaskOutputFiles.java:250)
	at org.apache.tez.runtime.library.common.shuffle.DiskFetchedInput.<init>(DiskFetchedInput.java:52)
	at org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator.allocate(SimpleFetchedInputAllocator.java:140)
	- locked <0x00007f4891288858> (a org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator)
	at org.apache.tez.runtime.library.common.shuffle.Fetcher.fetchInputs(Fetcher.java:793)
	at org.apache.tez.runtime.library.common.shuffle.Fetcher.doHttpFetch(Fetcher.java:539)
	at org.apache.tez.runtime.library.common.shuffle.Fetcher.doHttpFetch(Fetcher.java:428)
	at org.apache.tez.runtime.library.common.shuffle.Fetcher.callInternal(Fetcher.java:226)
	at org.apache.tez.runtime.library.common.shuffle.Fetcher.callInternal(Fetcher.java:73)
	at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:748)

	at java.io.FileDescriptor.sync(Native Method)
	at org.apache.hadoop.util.DiskChecker.diskIoCheckWithoutNativeIo(DiskChecker.java:249)
	at org.apache.hadoop.util.DiskChecker.doDiskIo(DiskChecker.java:220)
	at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:82)
	at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createPath(LocalDirAllocator.java:350)
	at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:424)
	at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:151)
	at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:132)
	at org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles.getSpillFileForWrite(TezTaskOutputFiles.java:207)
	at org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.getSpillPathDetails(UnorderedPartitionedKVWriter.java:728)
	at org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.setupNextBuffer(UnorderedPartitionedKVWriter.java:356)
	at org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.write(UnorderedPartitionedKVWriter.java:299)
	at org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.write(UnorderedPartitionedKVWriter.java:269)
	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor$TezKVOutputCollector.collect(TezProcessor.java:260)
	at org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator.collect(VectorReduceSinkCommonOperator.java:432)
	at org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator.process(VectorReduceSinkCommonOperator.java:397)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
	at org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:145)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
	at org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator.process(VectorFilterOperator.java:123)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
	at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
	at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:783)
	at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:86)

{noformat}	



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)