You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hbase.apache.org by prem yadav <ip...@gmail.com> on 2013/01/07 10:23:44 UTC

datanodes not sending report

Hi,

We have been running hadoop without much issues for some time. Today we has
a problem where the datanodes has their disks full and the cluster stopped
working.
We fixed things, modified the config to add directories to dfs.data.dir and
restarted.

The hadoop version is 1.0.4.

The issue is:
the datanodes are not sending any block reports. No errors in the logs. The
namenode shows there are 6 datanodes but never leaves the safe mode and the
report ratio never goes up from 0.000.

On one of the slave the jstack logs are:

2013-01-07 09:13:04
Full thread dump Java HotSpot(TM) 64-Bit Server VM (23.5-b02 mixed mode):

"Attach Listener" daemon prio=10 tid=0x00007f40f0766800 nid=0x6268 waiting
on condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"org.apache.hadoop.hdfs.server.datanode.DataBlockScanner@207a0c69" daemon
prio=10 tid=0x00007f40e001a000 nid=0x5f52 waiting on condition
[0x00007f40d9219000]
   java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep(Native Method)
at
org.apache.hadoop.hdfs.server.datanode.DataBlockScanner.run(DataBlockScanner.java:620)
at java.lang.Thread.run(Thread.java:722)

"IPC Server handler 2 on 50020" daemon prio=10 tid=0x00007f40e0017800
nid=0x5f51 waiting on condition [0x00007f40d931a000]
   java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for  <0x00000000eedc95b8> (a
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2043)
at
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1364)

"IPC Server handler 1 on 50020" daemon prio=10 tid=0x00007f40e0015000
nid=0x5f50 waiting on condition [0x00007f40d941b000]
   java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for  <0x00000000eedc95b8> (a
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2043)
at
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1364)

"IPC Server handler 0 on 50020" daemon prio=10 tid=0x00007f40e0013000
nid=0x5f4f waiting on condition [0x00007f40d951c000]
   java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for  <0x00000000eedc95b8> (a
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2043)
at
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1364)

"IPC Server listener on 50020" daemon prio=10 tid=0x00007f40e000a000
nid=0x5f4e runnable [0x00007f40d961d000]
   java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:228)
at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:81)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:87)
- locked <0x00000000eeda0720> (a sun.nio.ch.Util$2)
- locked <0x00000000eeda0710> (a java.util.Collections$UnmodifiableSet)
- locked <0x00000000eeda04d0> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:98)
at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:102)
at org.apache.hadoop.ipc.Server$Listener.run(Server.java:439)

"IPC Server Responder" daemon prio=10 tid=0x00007f40e0008800 nid=0x5f4d
runnable [0x00007f40d971e000]
   java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:228)
at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:81)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:87)
- locked <0x00000000eedc99e0> (a sun.nio.ch.Util$2)
- locked <0x00000000eedc99d0> (a java.util.Collections$UnmodifiableSet)
- locked <0x00000000eedc97b0> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:98)
at org.apache.hadoop.ipc.Server$Responder.run(Server.java:605)

"org.apache.hadoop.hdfs.server.datanode.DataXceiverServer@75a61582" daemon
prio=10 tid=0x00007f40e0007000 nid=0x5f4c runnable [0x00007f40d981f000]
   java.lang.Thread.State: RUNNABLE
at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method)
at
sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:226)
- locked <0x00000000eeddb870> (a java.lang.Object)
at sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:99)
- locked <0x00000000eeddb838> (a java.lang.Object)
at
org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:131)
at java.lang.Thread.run(Thread.java:722)

"DataNode:
[/data/hadoopfs,/data1/hadoopfs,/data2/hadoopfs,/data3/hadoopfs]" daemon
prio=10 tid=0x00007f40f0761000 nid=0x5f4b in Object.wait()
[0x00007f40d9920000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000eeddb4f8> (a java.util.LinkedList)
at
org.apache.hadoop.hdfs.server.datanode.DataNode.offerService(DataNode.java:1023)
- locked <0x00000000eeddb4f8> (a java.util.LinkedList)
at org.apache.hadoop.hdfs.server.datanode.DataNode.run(DataNode.java:1458)
at java.lang.Thread.run(Thread.java:722)

"pool-1-thread-1" prio=10 tid=0x00007f40f075d800 nid=0x5f4a runnable
[0x00007f40d9a21000]
   java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:228)
at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:81)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:87)
- locked <0x00000000eeda0d40> (a sun.nio.ch.Util$2)
- locked <0x00000000eeda0d30> (a java.util.Collections$UnmodifiableSet)
- locked <0x00000000eeda0b00> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:98)
at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:102)
at org.apache.hadoop.ipc.Server$Listener$Reader.run(Server.java:333)
- locked <0x00000000eeda0ae8> (a
org.apache.hadoop.ipc.Server$Listener$Reader)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603)
at java.lang.Thread.run(Thread.java:722)

"Timer-0" daemon prio=10 tid=0x00007f40f019c800 nid=0x5f49 in Object.wait()
[0x00007f40d9d69000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000eede50c0> (a java.util.TaskQueue)
at java.util.TimerThread.mainLoop(Timer.java:552)
- locked <0x00000000eede50c0> (a java.util.TaskQueue)
at java.util.TimerThread.run(Timer.java:505)

"611753678@qtp-1701186867-1 - Acceptor0 SelectChannelConnector@0.0.0.0:50075"
prio=10 tid=0x00007f40f0653000 nid=0x5f48 runnable [0x00007f40d9e6a000]
   java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:228)
at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:81)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:87)
- locked <0x00000000eee000f0> (a sun.nio.ch.Util$2)
- locked <0x00000000eee00100> (a java.util.Collections$UnmodifiableSet)
- locked <0x00000000eee000a8> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:98)
at
org.mortbay.io.nio.SelectorManager$SelectSet.doSelect(SelectorManager.java:498)
at org.mortbay.io.nio.SelectorManager.doSelect(SelectorManager.java:192)
at
org.mortbay.jetty.nio.SelectChannelConnector.accept(SelectChannelConnector.java:124)
at
org.mortbay.jetty.AbstractConnector$Acceptor.run(AbstractConnector.java:708)
at
org.mortbay.thread.QueuedThreadPool$PoolThread.run(QueuedThreadPool.java:582)

"1261953562@qtp-1701186867-0" prio=10 tid=0x00007f40f0651800 nid=0x5f47 in
Object.wait() [0x00007f40d9f6b000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000eede8068> (a
org.mortbay.thread.QueuedThreadPool$PoolThread)
at
org.mortbay.thread.QueuedThreadPool$PoolThread.run(QueuedThreadPool.java:626)
- locked <0x00000000eede8068> (a
org.mortbay.thread.QueuedThreadPool$PoolThread)

"Async Block Report Generator" daemon prio=10 tid=0x00007f40f05ec000
nid=0x5f46 in Object.wait() [0x00007f40da06c000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000eeddaed0> (a
org.apache.hadoop.hdfs.server.datanode.FSDataset$AsyncBlockReport)
at
org.apache.hadoop.hdfs.server.datanode.FSDataset$AsyncBlockReport.waitForReportRequest(FSDataset.java:2254)
- locked <0x00000000eeddaed0> (a
org.apache.hadoop.hdfs.server.datanode.FSDataset$AsyncBlockReport)
at
org.apache.hadoop.hdfs.server.datanode.FSDataset$AsyncBlockReport.run(FSDataset.java:2224)
at java.lang.Thread.run(Thread.java:722)

"refreshUsed-/data3/hadoopfs" daemon prio=10 tid=0x00007f40f05e7000
nid=0x5f45 waiting on condition [0x00007f40da16d000]
   java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep(Native Method)
at org.apache.hadoop.fs.DU$DURefreshThread.run(DU.java:80)
at java.lang.Thread.run(Thread.java:722)

"refreshUsed-/data2/hadoopfs" daemon prio=10 tid=0x00007f40f05e5800
nid=0x5f42 waiting on condition [0x00007f40e41d7000]
   java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep(Native Method)
at org.apache.hadoop.fs.DU$DURefreshThread.run(DU.java:80)
at java.lang.Thread.run(Thread.java:722)

"refreshUsed-/data1/hadoopfs" daemon prio=10 tid=0x00007f40f05e4800
nid=0x5f3f waiting on condition [0x00007f40e42d8000]
   java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep(Native Method)
at org.apache.hadoop.fs.DU$DURefreshThread.run(DU.java:80)
at java.lang.Thread.run(Thread.java:722)

"refreshUsed-/data/hadoopfs" daemon prio=10 tid=0x00007f40f05df000
nid=0x5f3c waiting on condition [0x00007f40e43d9000]
   java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep(Native Method)
at org.apache.hadoop.fs.DU$DURefreshThread.run(DU.java:80)
at java.lang.Thread.run(Thread.java:722)

"IPC Client (47) connection to master:54310 from hadoop" daemon prio=10
tid=0x00007f40f05bd000 nid=0x5f39 in Object.wait() [0x00007f40e44da000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000eedca5f0> (a
org.apache.hadoop.ipc.Client$Connection)
at org.apache.hadoop.ipc.Client$Connection.waitForWork(Client.java:706)
- locked <0x00000000eedca5f0> (a org.apache.hadoop.ipc.Client$Connection)
at org.apache.hadoop.ipc.Client$Connection.run(Client.java:748)

"Timer for 'DataNode' metrics system" daemon prio=10 tid=0x00007f40f0509800
nid=0x5f27 in Object.wait() [0x00007f40e4804000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000eedf86d0> (a java.util.TaskQueue)
at java.util.TimerThread.mainLoop(Timer.java:552)
- locked <0x00000000eedf86d0> (a java.util.TaskQueue)
at java.util.TimerThread.run(Timer.java:505)

"ganglia" daemon prio=10 tid=0x00007f40f0507000 nid=0x5f26 in Object.wait()
[0x00007f40e4905000]
   java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000eedf8790> (a
org.apache.hadoop.metrics2.impl.SinkQueue)
at java.lang.Object.wait(Object.java:503)
at org.apache.hadoop.metrics2.impl.SinkQueue.waitForData(SinkQueue.java:109)
- locked <0x00000000eedf8790> (a org.apache.hadoop.metrics2.impl.SinkQueue)
at org.apache.hadoop.metrics2.impl.SinkQueue.consumeAll(SinkQueue.java:78)
at
org.apache.hadoop.metrics2.impl.MetricsSinkAdapter.publishMetricsFromQueue(MetricsSinkAdapter.java:113)
at
org.apache.hadoop.metrics2.impl.MetricsSinkAdapter$2.run(MetricsSinkAdapter.java:89)

"RMI TCP Accept-0" daemon prio=10 tid=0x00007f40f0350000 nid=0x5f23
runnable [0x00007f40e4d0d000]
   java.lang.Thread.State: RUNNABLE
at java.net.PlainSocketImpl.socketAccept(Native Method)
at java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:398)
at java.net.ServerSocket.implAccept(ServerSocket.java:522)
at java.net.ServerSocket.accept(ServerSocket.java:490)
at
sun.management.jmxremote.LocalRMIServerSocketFactory$1.accept(LocalRMIServerSocketFactory.java:52)
at
sun.rmi.transport.tcp.TCPTransport$AcceptLoop.executeAcceptLoop(TCPTransport.java:387)
at sun.rmi.transport.tcp.TCPTransport$AcceptLoop.run(TCPTransport.java:359)
at java.lang.Thread.run(Thread.java:722)

"Service Thread" daemon prio=10 tid=0x00007f40f00f1000 nid=0x5f22 runnable
[0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"C2 CompilerThread1" daemon prio=10 tid=0x00007f40f00ee800 nid=0x5f21
waiting on condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"C2 CompilerThread0" daemon prio=10 tid=0x00007f40f00eb800 nid=0x5f20
waiting on condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"Signal Dispatcher" daemon prio=10 tid=0x00007f40f00e9800 nid=0x5f1f
runnable [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"Finalizer" daemon prio=10 tid=0x00007f40f009c800 nid=0x5f1e in
Object.wait() [0x00007f40e5d2d000]
   java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000eecd1208> (a java.lang.ref.ReferenceQueue$Lock)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:135)
- locked <0x00000000eecd1208> (a java.lang.ref.ReferenceQueue$Lock)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:151)
at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:177)

"Reference Handler" daemon prio=10 tid=0x00007f40f009a800 nid=0x5f1d in
Object.wait() [0x00007f40e5e2e000]
   java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000eecd0d90> (a java.lang.ref.Reference$Lock)
at java.lang.Object.wait(Object.java:503)
at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:133)
- locked <0x00000000eecd0d90> (a java.lang.ref.Reference$Lock)

"main" prio=10 tid=0x00007f40f0009800 nid=0x5f17 in Object.wait()
[0x00007f40f5dce000]
   java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x00000000eedf8570> (a java.lang.Thread)
at java.lang.Thread.join(Thread.java:1258)
- locked <0x00000000eedf8570> (a java.lang.Thread)
at java.lang.Thread.join(Thread.java:1332)
at org.apache.hadoop.hdfs.server.datanode.DataNode.join(DataNode.java:1547)
at
org.apache.hadoop.hdfs.server.datanode.DataNode.secureMain(DataNode.java:1667)
at org.apache.hadoop.hdfs.server.datanode.DataNode.main(DataNode.java:1682)

"VM Thread" prio=10 tid=0x00007f40f0093000 nid=0x5f1c runnable

"GC task thread#0 (ParallelGC)" prio=10 tid=0x00007f40f0017800 nid=0x5f18
runnable

"GC task thread#1 (ParallelGC)" prio=10 tid=0x00007f40f0019000 nid=0x5f19
runnable

"GC task thread#2 (ParallelGC)" prio=10 tid=0x00007f40f001b000 nid=0x5f1a
runnable

"GC task thread#3 (ParallelGC)" prio=10 tid=0x00007f40f001d000 nid=0x5f1b
runnable

"VM Periodic Task Thread" prio=10 tid=0x00007f40f0376000 nid=0x5f24 waiting
on condition

JNI global references: 216



Any help would be great. Right now, I am not even sure where to look for
issues.

regards.

Re: datanodes not sending report

Posted by prem yadav <ip...@gmail.com>.
Sorry. I should have sent it to the hadoop list.
We have got the issue resolved.
The issue was: earlier hadoop was picking up <dfs.tmp.dir>/dfs/data as the
dfs dir. Later when we specified the <dfs.data.dir> property in the config,
hadoop did not append /dfs/data to the path and the datanode was looking
for block in the <dfs.data.dir>. We changed the path to include /dfs/data
and it worked fine.

regards,
./Prem


On Mon, Jan 7, 2013 at 2:53 PM, prem yadav <ip...@gmail.com> wrote:

> Hi,
>
> We have been running hadoop without much issues for some time. Today we
> has a problem where the datanodes has their disks full and the cluster
> stopped working.
> We fixed things, modified the config to add directories to dfs.data.dir
> and restarted.
>
> The hadoop version is 1.0.4.
>
> The issue is:
> the datanodes are not sending any block reports. No errors in the logs.
> The namenode shows there are 6 datanodes but never leaves the safe mode and
> the report ratio never goes up from 0.000.
>
> On one of the slave the jstack logs are:
>
> 2013-01-07 09:13:04
> Full thread dump Java HotSpot(TM) 64-Bit Server VM (23.5-b02 mixed mode):
>
> "Attach Listener" daemon prio=10 tid=0x00007f40f0766800 nid=0x6268 waiting
> on condition [0x0000000000000000]
>    java.lang.Thread.State: RUNNABLE
>
> "org.apache.hadoop.hdfs.server.datanode.DataBlockScanner@207a0c69" daemon
> prio=10 tid=0x00007f40e001a000 nid=0x5f52 waiting on condition
> [0x00007f40d9219000]
>    java.lang.Thread.State: TIMED_WAITING (sleeping)
> at java.lang.Thread.sleep(Native Method)
> at
> org.apache.hadoop.hdfs.server.datanode.DataBlockScanner.run(DataBlockScanner.java:620)
>  at java.lang.Thread.run(Thread.java:722)
>
> "IPC Server handler 2 on 50020" daemon prio=10 tid=0x00007f40e0017800
> nid=0x5f51 waiting on condition [0x00007f40d931a000]
>    java.lang.Thread.State: WAITING (parking)
> at sun.misc.Unsafe.park(Native Method)
> - parking to wait for  <0x00000000eedc95b8> (a
> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
>  at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2043)
>  at
> java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1364)
>
> "IPC Server handler 1 on 50020" daemon prio=10 tid=0x00007f40e0015000
> nid=0x5f50 waiting on condition [0x00007f40d941b000]
>    java.lang.Thread.State: WAITING (parking)
> at sun.misc.Unsafe.park(Native Method)
>  - parking to wait for  <0x00000000eedc95b8> (a
> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
> at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
>  at
> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2043)
> at
> java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
>  at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1364)
>
> "IPC Server handler 0 on 50020" daemon prio=10 tid=0x00007f40e0013000
> nid=0x5f4f waiting on condition [0x00007f40d951c000]
>    java.lang.Thread.State: WAITING (parking)
> at sun.misc.Unsafe.park(Native Method)
> - parking to wait for  <0x00000000eedc95b8> (a
> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
>  at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2043)
>  at
> java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1364)
>
> "IPC Server listener on 50020" daemon prio=10 tid=0x00007f40e000a000
> nid=0x5f4e runnable [0x00007f40d961d000]
>    java.lang.Thread.State: RUNNABLE
> at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
>  at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:228)
> at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:81)
>  at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:87)
> - locked <0x00000000eeda0720> (a sun.nio.ch.Util$2)
>  - locked <0x00000000eeda0710> (a java.util.Collections$UnmodifiableSet)
> - locked <0x00000000eeda04d0> (a sun.nio.ch.EPollSelectorImpl)
>  at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:98)
> at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:102)
>  at org.apache.hadoop.ipc.Server$Listener.run(Server.java:439)
>
> "IPC Server Responder" daemon prio=10 tid=0x00007f40e0008800 nid=0x5f4d
> runnable [0x00007f40d971e000]
>    java.lang.Thread.State: RUNNABLE
> at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
> at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:228)
>  at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:81)
> at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:87)
>  - locked <0x00000000eedc99e0> (a sun.nio.ch.Util$2)
> - locked <0x00000000eedc99d0> (a java.util.Collections$UnmodifiableSet)
>  - locked <0x00000000eedc97b0> (a sun.nio.ch.EPollSelectorImpl)
> at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:98)
>  at org.apache.hadoop.ipc.Server$Responder.run(Server.java:605)
>
> "org.apache.hadoop.hdfs.server.datanode.DataXceiverServer@75a61582"
> daemon prio=10 tid=0x00007f40e0007000 nid=0x5f4c runnable
> [0x00007f40d981f000]
>    java.lang.Thread.State: RUNNABLE
> at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method)
> at
> sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:226)
>  - locked <0x00000000eeddb870> (a java.lang.Object)
> at sun.nio.ch.ServerSocketAdaptor.accept(ServerSocketAdaptor.java:99)
>  - locked <0x00000000eeddb838> (a java.lang.Object)
> at
> org.apache.hadoop.hdfs.server.datanode.DataXceiverServer.run(DataXceiverServer.java:131)
>  at java.lang.Thread.run(Thread.java:722)
>
> "DataNode:
> [/data/hadoopfs,/data1/hadoopfs,/data2/hadoopfs,/data3/hadoopfs]" daemon
> prio=10 tid=0x00007f40f0761000 nid=0x5f4b in Object.wait()
> [0x00007f40d9920000]
>    java.lang.Thread.State: TIMED_WAITING (on object monitor)
> at java.lang.Object.wait(Native Method)
> - waiting on <0x00000000eeddb4f8> (a java.util.LinkedList)
>  at
> org.apache.hadoop.hdfs.server.datanode.DataNode.offerService(DataNode.java:1023)
> - locked <0x00000000eeddb4f8> (a java.util.LinkedList)
>  at
> org.apache.hadoop.hdfs.server.datanode.DataNode.run(DataNode.java:1458)
> at java.lang.Thread.run(Thread.java:722)
>
> "pool-1-thread-1" prio=10 tid=0x00007f40f075d800 nid=0x5f4a runnable
> [0x00007f40d9a21000]
>    java.lang.Thread.State: RUNNABLE
> at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
>  at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:228)
> at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:81)
>  at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:87)
> - locked <0x00000000eeda0d40> (a sun.nio.ch.Util$2)
>  - locked <0x00000000eeda0d30> (a java.util.Collections$UnmodifiableSet)
> - locked <0x00000000eeda0b00> (a sun.nio.ch.EPollSelectorImpl)
>  at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:98)
> at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:102)
>  at org.apache.hadoop.ipc.Server$Listener$Reader.run(Server.java:333)
> - locked <0x00000000eeda0ae8> (a
> org.apache.hadoop.ipc.Server$Listener$Reader)
>  at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1110)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:603)
>  at java.lang.Thread.run(Thread.java:722)
>
> "Timer-0" daemon prio=10 tid=0x00007f40f019c800 nid=0x5f49 in
> Object.wait() [0x00007f40d9d69000]
>    java.lang.Thread.State: TIMED_WAITING (on object monitor)
> at java.lang.Object.wait(Native Method)
> - waiting on <0x00000000eede50c0> (a java.util.TaskQueue)
>  at java.util.TimerThread.mainLoop(Timer.java:552)
> - locked <0x00000000eede50c0> (a java.util.TaskQueue)
>  at java.util.TimerThread.run(Timer.java:505)
>
> "611753678@qtp-1701186867-1 - Acceptor0
> SelectChannelConnector@0.0.0.0:50075" prio=10 tid=0x00007f40f0653000
> nid=0x5f48 runnable [0x00007f40d9e6a000]
>    java.lang.Thread.State: RUNNABLE
> at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
> at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:228)
>  at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:81)
> at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:87)
>  - locked <0x00000000eee000f0> (a sun.nio.ch.Util$2)
> - locked <0x00000000eee00100> (a java.util.Collections$UnmodifiableSet)
>  - locked <0x00000000eee000a8> (a sun.nio.ch.EPollSelectorImpl)
> at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:98)
>  at
> org.mortbay.io.nio.SelectorManager$SelectSet.doSelect(SelectorManager.java:498)
> at org.mortbay.io.nio.SelectorManager.doSelect(SelectorManager.java:192)
>  at
> org.mortbay.jetty.nio.SelectChannelConnector.accept(SelectChannelConnector.java:124)
> at
> org.mortbay.jetty.AbstractConnector$Acceptor.run(AbstractConnector.java:708)
>  at
> org.mortbay.thread.QueuedThreadPool$PoolThread.run(QueuedThreadPool.java:582)
>
> "1261953562@qtp-1701186867-0" prio=10 tid=0x00007f40f0651800 nid=0x5f47
> in Object.wait() [0x00007f40d9f6b000]
>    java.lang.Thread.State: TIMED_WAITING (on object monitor)
> at java.lang.Object.wait(Native Method)
> - waiting on <0x00000000eede8068> (a
> org.mortbay.thread.QueuedThreadPool$PoolThread)
>  at
> org.mortbay.thread.QueuedThreadPool$PoolThread.run(QueuedThreadPool.java:626)
> - locked <0x00000000eede8068> (a
> org.mortbay.thread.QueuedThreadPool$PoolThread)
>
> "Async Block Report Generator" daemon prio=10 tid=0x00007f40f05ec000
> nid=0x5f46 in Object.wait() [0x00007f40da06c000]
>    java.lang.Thread.State: TIMED_WAITING (on object monitor)
>  at java.lang.Object.wait(Native Method)
> - waiting on <0x00000000eeddaed0> (a
> org.apache.hadoop.hdfs.server.datanode.FSDataset$AsyncBlockReport)
>  at
> org.apache.hadoop.hdfs.server.datanode.FSDataset$AsyncBlockReport.waitForReportRequest(FSDataset.java:2254)
> - locked <0x00000000eeddaed0> (a
> org.apache.hadoop.hdfs.server.datanode.FSDataset$AsyncBlockReport)
>  at
> org.apache.hadoop.hdfs.server.datanode.FSDataset$AsyncBlockReport.run(FSDataset.java:2224)
> at java.lang.Thread.run(Thread.java:722)
>
> "refreshUsed-/data3/hadoopfs" daemon prio=10 tid=0x00007f40f05e7000
> nid=0x5f45 waiting on condition [0x00007f40da16d000]
>    java.lang.Thread.State: TIMED_WAITING (sleeping)
>  at java.lang.Thread.sleep(Native Method)
> at org.apache.hadoop.fs.DU$DURefreshThread.run(DU.java:80)
> at java.lang.Thread.run(Thread.java:722)
>
> "refreshUsed-/data2/hadoopfs" daemon prio=10 tid=0x00007f40f05e5800
> nid=0x5f42 waiting on condition [0x00007f40e41d7000]
>    java.lang.Thread.State: TIMED_WAITING (sleeping)
>  at java.lang.Thread.sleep(Native Method)
> at org.apache.hadoop.fs.DU$DURefreshThread.run(DU.java:80)
> at java.lang.Thread.run(Thread.java:722)
>
> "refreshUsed-/data1/hadoopfs" daemon prio=10 tid=0x00007f40f05e4800
> nid=0x5f3f waiting on condition [0x00007f40e42d8000]
>    java.lang.Thread.State: TIMED_WAITING (sleeping)
>  at java.lang.Thread.sleep(Native Method)
> at org.apache.hadoop.fs.DU$DURefreshThread.run(DU.java:80)
> at java.lang.Thread.run(Thread.java:722)
>
> "refreshUsed-/data/hadoopfs" daemon prio=10 tid=0x00007f40f05df000
> nid=0x5f3c waiting on condition [0x00007f40e43d9000]
>    java.lang.Thread.State: TIMED_WAITING (sleeping)
>  at java.lang.Thread.sleep(Native Method)
> at org.apache.hadoop.fs.DU$DURefreshThread.run(DU.java:80)
> at java.lang.Thread.run(Thread.java:722)
>
> "IPC Client (47) connection to master:54310 from hadoop" daemon prio=10
> tid=0x00007f40f05bd000 nid=0x5f39 in Object.wait() [0x00007f40e44da000]
>    java.lang.Thread.State: TIMED_WAITING (on object monitor)
>  at java.lang.Object.wait(Native Method)
> - waiting on <0x00000000eedca5f0> (a
> org.apache.hadoop.ipc.Client$Connection)
>  at org.apache.hadoop.ipc.Client$Connection.waitForWork(Client.java:706)
> - locked <0x00000000eedca5f0> (a org.apache.hadoop.ipc.Client$Connection)
>  at org.apache.hadoop.ipc.Client$Connection.run(Client.java:748)
>
> "Timer for 'DataNode' metrics system" daemon prio=10
> tid=0x00007f40f0509800 nid=0x5f27 in Object.wait() [0x00007f40e4804000]
>    java.lang.Thread.State: TIMED_WAITING (on object monitor)
> at java.lang.Object.wait(Native Method)
> - waiting on <0x00000000eedf86d0> (a java.util.TaskQueue)
>  at java.util.TimerThread.mainLoop(Timer.java:552)
> - locked <0x00000000eedf86d0> (a java.util.TaskQueue)
>  at java.util.TimerThread.run(Timer.java:505)
>
> "ganglia" daemon prio=10 tid=0x00007f40f0507000 nid=0x5f26 in
> Object.wait() [0x00007f40e4905000]
>    java.lang.Thread.State: WAITING (on object monitor)
> at java.lang.Object.wait(Native Method)
> - waiting on <0x00000000eedf8790> (a
> org.apache.hadoop.metrics2.impl.SinkQueue)
>  at java.lang.Object.wait(Object.java:503)
> at
> org.apache.hadoop.metrics2.impl.SinkQueue.waitForData(SinkQueue.java:109)
>  - locked <0x00000000eedf8790> (a
> org.apache.hadoop.metrics2.impl.SinkQueue)
> at org.apache.hadoop.metrics2.impl.SinkQueue.consumeAll(SinkQueue.java:78)
>  at
> org.apache.hadoop.metrics2.impl.MetricsSinkAdapter.publishMetricsFromQueue(MetricsSinkAdapter.java:113)
> at
> org.apache.hadoop.metrics2.impl.MetricsSinkAdapter$2.run(MetricsSinkAdapter.java:89)
>
> "RMI TCP Accept-0" daemon prio=10 tid=0x00007f40f0350000 nid=0x5f23
> runnable [0x00007f40e4d0d000]
>    java.lang.Thread.State: RUNNABLE
> at java.net.PlainSocketImpl.socketAccept(Native Method)
>  at
> java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:398)
> at java.net.ServerSocket.implAccept(ServerSocket.java:522)
>  at java.net.ServerSocket.accept(ServerSocket.java:490)
> at
> sun.management.jmxremote.LocalRMIServerSocketFactory$1.accept(LocalRMIServerSocketFactory.java:52)
>  at
> sun.rmi.transport.tcp.TCPTransport$AcceptLoop.executeAcceptLoop(TCPTransport.java:387)
> at sun.rmi.transport.tcp.TCPTransport$AcceptLoop.run(TCPTransport.java:359)
>  at java.lang.Thread.run(Thread.java:722)
>
> "Service Thread" daemon prio=10 tid=0x00007f40f00f1000 nid=0x5f22 runnable
> [0x0000000000000000]
>    java.lang.Thread.State: RUNNABLE
>
> "C2 CompilerThread1" daemon prio=10 tid=0x00007f40f00ee800 nid=0x5f21
> waiting on condition [0x0000000000000000]
>    java.lang.Thread.State: RUNNABLE
>
> "C2 CompilerThread0" daemon prio=10 tid=0x00007f40f00eb800 nid=0x5f20
> waiting on condition [0x0000000000000000]
>    java.lang.Thread.State: RUNNABLE
>
> "Signal Dispatcher" daemon prio=10 tid=0x00007f40f00e9800 nid=0x5f1f
> runnable [0x0000000000000000]
>    java.lang.Thread.State: RUNNABLE
>
> "Finalizer" daemon prio=10 tid=0x00007f40f009c800 nid=0x5f1e in
> Object.wait() [0x00007f40e5d2d000]
>    java.lang.Thread.State: WAITING (on object monitor)
>  at java.lang.Object.wait(Native Method)
> - waiting on <0x00000000eecd1208> (a java.lang.ref.ReferenceQueue$Lock)
>  at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:135)
> - locked <0x00000000eecd1208> (a java.lang.ref.ReferenceQueue$Lock)
>  at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:151)
> at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:177)
>
> "Reference Handler" daemon prio=10 tid=0x00007f40f009a800 nid=0x5f1d in
> Object.wait() [0x00007f40e5e2e000]
>    java.lang.Thread.State: WAITING (on object monitor)
> at java.lang.Object.wait(Native Method)
>  - waiting on <0x00000000eecd0d90> (a java.lang.ref.Reference$Lock)
> at java.lang.Object.wait(Object.java:503)
>  at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:133)
> - locked <0x00000000eecd0d90> (a java.lang.ref.Reference$Lock)
>
> "main" prio=10 tid=0x00007f40f0009800 nid=0x5f17 in Object.wait()
> [0x00007f40f5dce000]
>    java.lang.Thread.State: WAITING (on object monitor)
> at java.lang.Object.wait(Native Method)
>  - waiting on <0x00000000eedf8570> (a java.lang.Thread)
> at java.lang.Thread.join(Thread.java:1258)
>  - locked <0x00000000eedf8570> (a java.lang.Thread)
> at java.lang.Thread.join(Thread.java:1332)
> at org.apache.hadoop.hdfs.server.datanode.DataNode.join(DataNode.java:1547)
>  at
> org.apache.hadoop.hdfs.server.datanode.DataNode.secureMain(DataNode.java:1667)
> at org.apache.hadoop.hdfs.server.datanode.DataNode.main(DataNode.java:1682)
>
> "VM Thread" prio=10 tid=0x00007f40f0093000 nid=0x5f1c runnable
>
> "GC task thread#0 (ParallelGC)" prio=10 tid=0x00007f40f0017800 nid=0x5f18
> runnable
>
> "GC task thread#1 (ParallelGC)" prio=10 tid=0x00007f40f0019000 nid=0x5f19
> runnable
>
> "GC task thread#2 (ParallelGC)" prio=10 tid=0x00007f40f001b000 nid=0x5f1a
> runnable
>
> "GC task thread#3 (ParallelGC)" prio=10 tid=0x00007f40f001d000 nid=0x5f1b
> runnable
>
> "VM Periodic Task Thread" prio=10 tid=0x00007f40f0376000 nid=0x5f24
> waiting on condition
>
> JNI global references: 216
>
>
>
> Any help would be great. Right now, I am not even sure where to look for
> issues.
>
> regards.
>