You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hama.apache.org by 김민호 <mi...@samsung.com> on 2015/07/07 12:13:27 UTC

Need to increase the default number of connections to zookeeper

Hi all,

 

Recently, I set up Hama cluster using 2 machines.

This specification is as follows:

- 8 GB RAM

- 12 TB HDD

- (I don’t remember CPU spec.)

 

In order to run hama job, I set up configuration, bsp.tasks.maximum=40 and
bsp.child.java.opts=-Xmx4096m, in hama-site.xml. (skip rests of settings.)

So I performed examples which are pi Estimator and FastGraphGen but I got
below errors.

 

attempt_201507071627_0001_000023_0:
org.apache.zookeeper.KeeperException$ConnectionLossException:
KeeperErrorCode = ConnectionLoss for
/bsp/job_201507071627_0001/peers/cluster-0:61029

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.KeeperException.create(KeeperException.java:99)

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.KeeperException.create(KeeperException.java:51)

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZKSyncClient.isExists(ZKSyncClient.java:108)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:261)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.registerTask(ZooKeeperSyncC
lientImpl.java:279)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.register(ZooKeeperSyncClien
tImpl.java:261)

attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
initializeSyncService(BSPPeerImpl.java:305)

attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
<init>(BSPPeerImpl.java:185)

attempt_201507071627_0001_000023_0:     at
org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)

attempt_201507071627_0001_000023_0: 15/07/07 16:27:40 ERROR
sync.ZKSyncClient: Error creating zk path
/bsp/job_201507071627_0001/peers/cluster-0:61029

attempt_201507071627_0001_000023_0:
org.apache.zookeeper.KeeperException$ConnectionLossException:
KeeperErrorCode = ConnectionLoss for /bsp

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.KeeperException.create(KeeperException.java:99)

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.KeeperException.create(KeeperException.java:51)

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZKSyncClient.createZnode(ZKSyncClient.java:135)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:281)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.registerTask(ZooKeeperSyncC
lientImpl.java:279)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.register(ZooKeeperSyncClien
tImpl.java:261)

attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
initializeSyncService(BSPPeerImpl.java:305)

attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
<init>(BSPPeerImpl.java:185)

attempt_201507071627_0001_000023_0:     at
org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)

attempt_201507071627_0001_000023_0: 15/07/07 16:27:42 ERROR
sync.ZKSyncClient: Error checking zk path /bsp/job_201507071627_0001/sync/-1

attempt_201507071627_0001_000023_0:
org.apache.zookeeper.KeeperException$ConnectionLossException:
KeeperErrorCode = ConnectionLoss for /bsp/job_201507071627_0001/sync/-1

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.KeeperException.create(KeeperException.java:99)

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.KeeperException.create(KeeperException.java:51)

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZKSyncClient.isExists(ZKSyncClient.java:108)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:261)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
lientImpl.java:100)

attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
doFirstSync(BSPPeerImpl.java:312)

attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
<init>(BSPPeerImpl.java:238)

attempt_201507071627_0001_000023_0:     at
org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)

attempt_201507071627_0001_000023_0: 15/07/07 16:27:44 ERROR
sync.ZKSyncClient: Error creating zk path /bsp/job_201507071627_0001/sync/-1

attempt_201507071627_0001_000023_0:
org.apache.zookeeper.KeeperException$ConnectionLossException:
KeeperErrorCode = ConnectionLoss for /bsp

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.KeeperException.create(KeeperException.java:99)

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.KeeperException.create(KeeperException.java:51)

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)

attempt_201507071627_0001_000023_0:      at
org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZKSyncClient.createZnode(ZKSyncClient.java:135)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:281)

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
lientImpl.java:100)

attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
doFirstSync(BSPPeerImpl.java:312)

attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
<init>(BSPPeerImpl.java:238)

attempt_201507071627_0001_000023_0:     at
org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)

attempt_201507071627_0001_000023_0: 15/07/07 16:27:46 FATAL
bsp.GroomServer: SyncError from child

attempt_201507071627_0001_000023_0: org.apache.hama.bsp.sync.SyncException

attempt_201507071627_0001_000023_0:      at
org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
lientImpl.java:138)

attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
doFirstSync(BSPPeerImpl.java:312)

attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
<init>(BSPPeerImpl.java:238)

attempt_201507071627_0001_000023_0:     at
org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)

15/07/07 16:27:48 INFO bsp.BSPJobClient: Job failed.

 

This is a ZK error. Hama tasks try to get the /bsp node from zookeeper and
fails.

This is just because hama.zookeeper.property.maxClientCnxns is 30 in hama-
default.xml.

The problem has been encountered while the number of maximum tasks is
larger than it.

To solve the problem, Hama has a setting to increase the number of
connectiosns to ZK.

 

<property>

    <name>hama.zookeeper.property.maxClientCnxns</name>

    <value>100</value>

</property>

 

So we should update the default number of connections which is over 100
because server’s performance has been more improved than before.

If you agree my opinion, I will change the default value as 300.

 

Best regards,

Minho Kim

 


Re: Need to increase the default number of connections to zookeeper

Posted by Minho Kim <mi...@apache.org>.
Okay, Thanks Edward.

Best Regards,
Minho Kim

2015-07-08 14:21 GMT+09:00 Edward J. Yoon <ed...@apache.org>:

> Just FYI,
>
> I just committed below:
>
> Index: conf/hama-default.xml
> ===================================================================
> --- conf/hama-default.xml (revision 1689791)
> +++ conf/hama-default.xml (working copy)
> @@ -262,7 +262,7 @@
>    </property>
>    <property>
>      <name>hama.zookeeper.property.maxClientCnxns</name>
> -    <value>30</value>
> +    <value>100</value>
>      <description>Property from ZooKeeper's config zoo.cfg.
>      Limit on number of concurrent connections (at the socket level) that a
>
>
> On Tue, Jul 7, 2015 at 9:17 PM, Minho Kim <mi...@apache.org> wrote:
> > Oops,
> > I made a mistake. Edward is right. Each node has 192G RAM.
> >
> > Thanks,
> > Minho Kim
> >
> > 2015-07-07 19:50 GMT+09:00 Edward J. Yoon <ed...@apache.org>:
> >
> >> > - 8 GB RAM
> >>
> >> I guess it looks like a typo Minho. :-) AFAIK, each node has 192GB
> memory.
> >>
> >> +1 we need to increase the default maxClientCnxns since modern
> >> machines have enough RAM.
> >>
> >> On Tue, Jul 7, 2015 at 7:13 PM, 김민호 <mi...@samsung.com> wrote:
> >> > Hi all,
> >> >
> >> >
> >> >
> >> > Recently, I set up Hama cluster using 2 machines.
> >> >
> >> > This specification is as follows:
> >> >
> >> > - 8 GB RAM
> >> >
> >> > - 12 TB HDD
> >> >
> >> > - (I don’t remember CPU spec.)
> >> >
> >> >
> >> >
> >> > In order to run hama job, I set up configuration, bsp.tasks.maximum=40
> >> and
> >> > bsp.child.java.opts=-Xmx4096m, in hama-site.xml. (skip rests of
> >> settings.)
> >> >
> >> > So I performed examples which are pi Estimator and FastGraphGen but I
> got
> >> > below errors.
> >> >
> >> >
> >> >
> >> > attempt_201507071627_0001_000023_0:
> >> > org.apache.zookeeper.KeeperException$ConnectionLossException:
> >> > KeeperErrorCode = ConnectionLoss for
> >> > /bsp/job_201507071627_0001/peers/cluster-0:61029
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.hama.bsp.sync.ZKSyncClient.isExists(ZKSyncClient.java:108)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:261)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> >
> >>
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.registerTask(ZooKeeperSyncC
> >> > lientImpl.java:279)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> >
> >>
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.register(ZooKeeperSyncClien
> >> > tImpl.java:261)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> org.apache.hama.bsp.BSPPeerImpl.
> >> > initializeSyncService(BSPPeerImpl.java:305)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> org.apache.hama.bsp.BSPPeerImpl.
> >> > <init>(BSPPeerImpl.java:185)
> >> >
> >> > attempt_201507071627_0001_000023_0:     at
> >> >
> org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
> >> >
> >> > attempt_201507071627_0001_000023_0: 15/07/07 16:27:40 ERROR
> >> > sync.ZKSyncClient: Error creating zk path
> >> > /bsp/job_201507071627_0001/peers/cluster-0:61029
> >> >
> >> > attempt_201507071627_0001_000023_0:
> >> > org.apache.zookeeper.KeeperException$ConnectionLossException:
> >> > KeeperErrorCode = ConnectionLoss for /bsp
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> >
> org.apache.hama.bsp.sync.ZKSyncClient.createZnode(ZKSyncClient.java:135)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:281)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> >
> >>
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.registerTask(ZooKeeperSyncC
> >> > lientImpl.java:279)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> >
> >>
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.register(ZooKeeperSyncClien
> >> > tImpl.java:261)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> org.apache.hama.bsp.BSPPeerImpl.
> >> > initializeSyncService(BSPPeerImpl.java:305)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> org.apache.hama.bsp.BSPPeerImpl.
> >> > <init>(BSPPeerImpl.java:185)
> >> >
> >> > attempt_201507071627_0001_000023_0:     at
> >> >
> org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
> >> >
> >> > attempt_201507071627_0001_000023_0: 15/07/07 16:27:42 ERROR
> >> > sync.ZKSyncClient: Error checking zk path
> >> /bsp/job_201507071627_0001/sync/-1
> >> >
> >> > attempt_201507071627_0001_000023_0:
> >> > org.apache.zookeeper.KeeperException$ConnectionLossException:
> >> > KeeperErrorCode = ConnectionLoss for
> /bsp/job_201507071627_0001/sync/-1
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.hama.bsp.sync.ZKSyncClient.isExists(ZKSyncClient.java:108)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:261)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> >
> >>
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
> >> > lientImpl.java:100)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> org.apache.hama.bsp.BSPPeerImpl.
> >> > doFirstSync(BSPPeerImpl.java:312)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> org.apache.hama.bsp.BSPPeerImpl.
> >> > <init>(BSPPeerImpl.java:238)
> >> >
> >> > attempt_201507071627_0001_000023_0:     at
> >> >
> org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
> >> >
> >> > attempt_201507071627_0001_000023_0: 15/07/07 16:27:44 ERROR
> >> > sync.ZKSyncClient: Error creating zk path
> >> /bsp/job_201507071627_0001/sync/-1
> >> >
> >> > attempt_201507071627_0001_000023_0:
> >> > org.apache.zookeeper.KeeperException$ConnectionLossException:
> >> > KeeperErrorCode = ConnectionLoss for /bsp
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> >
> org.apache.hama.bsp.sync.ZKSyncClient.createZnode(ZKSyncClient.java:135)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> > org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:281)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> >
> >>
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
> >> > lientImpl.java:100)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> org.apache.hama.bsp.BSPPeerImpl.
> >> > doFirstSync(BSPPeerImpl.java:312)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> org.apache.hama.bsp.BSPPeerImpl.
> >> > <init>(BSPPeerImpl.java:238)
> >> >
> >> > attempt_201507071627_0001_000023_0:     at
> >> >
> org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
> >> >
> >> > attempt_201507071627_0001_000023_0: 15/07/07 16:27:46 FATAL
> >> > bsp.GroomServer: SyncError from child
> >> >
> >> > attempt_201507071627_0001_000023_0:
> >> org.apache.hama.bsp.sync.SyncException
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> >
> >>
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
> >> > lientImpl.java:138)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> org.apache.hama.bsp.BSPPeerImpl.
> >> > doFirstSync(BSPPeerImpl.java:312)
> >> >
> >> > attempt_201507071627_0001_000023_0:      at
> >> org.apache.hama.bsp.BSPPeerImpl.
> >> > <init>(BSPPeerImpl.java:238)
> >> >
> >> > attempt_201507071627_0001_000023_0:     at
> >> >
> org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
> >> >
> >> > 15/07/07 16:27:48 INFO bsp.BSPJobClient: Job failed.
> >> >
> >> >
> >> >
> >> > This is a ZK error. Hama tasks try to get the /bsp node from zookeeper
> >> and
> >> > fails.
> >> >
> >> > This is just because hama.zookeeper.property.maxClientCnxns is 30 in
> >> hama-
> >> > default.xml.
> >> >
> >> > The problem has been encountered while the number of maximum tasks is
> >> > larger than it.
> >> >
> >> > To solve the problem, Hama has a setting to increase the number of
> >> > connectiosns to ZK.
> >> >
> >> >
> >> >
> >> > <property>
> >> >
> >> >     <name>hama.zookeeper.property.maxClientCnxns</name>
> >> >
> >> >     <value>100</value>
> >> >
> >> > </property>
> >> >
> >> >
> >> >
> >> > So we should update the default number of connections which is over
> 100
> >> > because server’s performance has been more improved than before.
> >> >
> >> > If you agree my opinion, I will change the default value as 300.
> >> >
> >> >
> >> >
> >> > Best regards,
> >> >
> >> > Minho Kim
> >> >
> >> >
> >> >
> >>
> >>
> >>
> >> --
> >> Best Regards, Edward J. Yoon
> >>
>
>
>
> --
> Best Regards, Edward J. Yoon
>

Re: Need to increase the default number of connections to zookeeper

Posted by "Edward J. Yoon" <ed...@apache.org>.
Just FYI,

I just committed below:

Index: conf/hama-default.xml
===================================================================
--- conf/hama-default.xml (revision 1689791)
+++ conf/hama-default.xml (working copy)
@@ -262,7 +262,7 @@
   </property>
   <property>
     <name>hama.zookeeper.property.maxClientCnxns</name>
-    <value>30</value>
+    <value>100</value>
     <description>Property from ZooKeeper's config zoo.cfg.
     Limit on number of concurrent connections (at the socket level) that a


On Tue, Jul 7, 2015 at 9:17 PM, Minho Kim <mi...@apache.org> wrote:
> Oops,
> I made a mistake. Edward is right. Each node has 192G RAM.
>
> Thanks,
> Minho Kim
>
> 2015-07-07 19:50 GMT+09:00 Edward J. Yoon <ed...@apache.org>:
>
>> > - 8 GB RAM
>>
>> I guess it looks like a typo Minho. :-) AFAIK, each node has 192GB memory.
>>
>> +1 we need to increase the default maxClientCnxns since modern
>> machines have enough RAM.
>>
>> On Tue, Jul 7, 2015 at 7:13 PM, 김민호 <mi...@samsung.com> wrote:
>> > Hi all,
>> >
>> >
>> >
>> > Recently, I set up Hama cluster using 2 machines.
>> >
>> > This specification is as follows:
>> >
>> > - 8 GB RAM
>> >
>> > - 12 TB HDD
>> >
>> > - (I don’t remember CPU spec.)
>> >
>> >
>> >
>> > In order to run hama job, I set up configuration, bsp.tasks.maximum=40
>> and
>> > bsp.child.java.opts=-Xmx4096m, in hama-site.xml. (skip rests of
>> settings.)
>> >
>> > So I performed examples which are pi Estimator and FastGraphGen but I got
>> > below errors.
>> >
>> >
>> >
>> > attempt_201507071627_0001_000023_0:
>> > org.apache.zookeeper.KeeperException$ConnectionLossException:
>> > KeeperErrorCode = ConnectionLoss for
>> > /bsp/job_201507071627_0001/peers/cluster-0:61029
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.hama.bsp.sync.ZKSyncClient.isExists(ZKSyncClient.java:108)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:261)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> >
>> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.registerTask(ZooKeeperSyncC
>> > lientImpl.java:279)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> >
>> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.register(ZooKeeperSyncClien
>> > tImpl.java:261)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> org.apache.hama.bsp.BSPPeerImpl.
>> > initializeSyncService(BSPPeerImpl.java:305)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> org.apache.hama.bsp.BSPPeerImpl.
>> > <init>(BSPPeerImpl.java:185)
>> >
>> > attempt_201507071627_0001_000023_0:     at
>> > org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
>> >
>> > attempt_201507071627_0001_000023_0: 15/07/07 16:27:40 ERROR
>> > sync.ZKSyncClient: Error creating zk path
>> > /bsp/job_201507071627_0001/peers/cluster-0:61029
>> >
>> > attempt_201507071627_0001_000023_0:
>> > org.apache.zookeeper.KeeperException$ConnectionLossException:
>> > KeeperErrorCode = ConnectionLoss for /bsp
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.hama.bsp.sync.ZKSyncClient.createZnode(ZKSyncClient.java:135)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:281)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> >
>> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.registerTask(ZooKeeperSyncC
>> > lientImpl.java:279)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> >
>> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.register(ZooKeeperSyncClien
>> > tImpl.java:261)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> org.apache.hama.bsp.BSPPeerImpl.
>> > initializeSyncService(BSPPeerImpl.java:305)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> org.apache.hama.bsp.BSPPeerImpl.
>> > <init>(BSPPeerImpl.java:185)
>> >
>> > attempt_201507071627_0001_000023_0:     at
>> > org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
>> >
>> > attempt_201507071627_0001_000023_0: 15/07/07 16:27:42 ERROR
>> > sync.ZKSyncClient: Error checking zk path
>> /bsp/job_201507071627_0001/sync/-1
>> >
>> > attempt_201507071627_0001_000023_0:
>> > org.apache.zookeeper.KeeperException$ConnectionLossException:
>> > KeeperErrorCode = ConnectionLoss for /bsp/job_201507071627_0001/sync/-1
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.hama.bsp.sync.ZKSyncClient.isExists(ZKSyncClient.java:108)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:261)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> >
>> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
>> > lientImpl.java:100)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> org.apache.hama.bsp.BSPPeerImpl.
>> > doFirstSync(BSPPeerImpl.java:312)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> org.apache.hama.bsp.BSPPeerImpl.
>> > <init>(BSPPeerImpl.java:238)
>> >
>> > attempt_201507071627_0001_000023_0:     at
>> > org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
>> >
>> > attempt_201507071627_0001_000023_0: 15/07/07 16:27:44 ERROR
>> > sync.ZKSyncClient: Error creating zk path
>> /bsp/job_201507071627_0001/sync/-1
>> >
>> > attempt_201507071627_0001_000023_0:
>> > org.apache.zookeeper.KeeperException$ConnectionLossException:
>> > KeeperErrorCode = ConnectionLoss for /bsp
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.hama.bsp.sync.ZKSyncClient.createZnode(ZKSyncClient.java:135)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> > org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:281)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> >
>> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
>> > lientImpl.java:100)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> org.apache.hama.bsp.BSPPeerImpl.
>> > doFirstSync(BSPPeerImpl.java:312)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> org.apache.hama.bsp.BSPPeerImpl.
>> > <init>(BSPPeerImpl.java:238)
>> >
>> > attempt_201507071627_0001_000023_0:     at
>> > org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
>> >
>> > attempt_201507071627_0001_000023_0: 15/07/07 16:27:46 FATAL
>> > bsp.GroomServer: SyncError from child
>> >
>> > attempt_201507071627_0001_000023_0:
>> org.apache.hama.bsp.sync.SyncException
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> >
>> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
>> > lientImpl.java:138)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> org.apache.hama.bsp.BSPPeerImpl.
>> > doFirstSync(BSPPeerImpl.java:312)
>> >
>> > attempt_201507071627_0001_000023_0:      at
>> org.apache.hama.bsp.BSPPeerImpl.
>> > <init>(BSPPeerImpl.java:238)
>> >
>> > attempt_201507071627_0001_000023_0:     at
>> > org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
>> >
>> > 15/07/07 16:27:48 INFO bsp.BSPJobClient: Job failed.
>> >
>> >
>> >
>> > This is a ZK error. Hama tasks try to get the /bsp node from zookeeper
>> and
>> > fails.
>> >
>> > This is just because hama.zookeeper.property.maxClientCnxns is 30 in
>> hama-
>> > default.xml.
>> >
>> > The problem has been encountered while the number of maximum tasks is
>> > larger than it.
>> >
>> > To solve the problem, Hama has a setting to increase the number of
>> > connectiosns to ZK.
>> >
>> >
>> >
>> > <property>
>> >
>> >     <name>hama.zookeeper.property.maxClientCnxns</name>
>> >
>> >     <value>100</value>
>> >
>> > </property>
>> >
>> >
>> >
>> > So we should update the default number of connections which is over 100
>> > because server’s performance has been more improved than before.
>> >
>> > If you agree my opinion, I will change the default value as 300.
>> >
>> >
>> >
>> > Best regards,
>> >
>> > Minho Kim
>> >
>> >
>> >
>>
>>
>>
>> --
>> Best Regards, Edward J. Yoon
>>



-- 
Best Regards, Edward J. Yoon

Re: Need to increase the default number of connections to zookeeper

Posted by Minho Kim <mi...@apache.org>.
Oops,
I made a mistake. Edward is right. Each node has 192G RAM.

Thanks,
Minho Kim

2015-07-07 19:50 GMT+09:00 Edward J. Yoon <ed...@apache.org>:

> > - 8 GB RAM
>
> I guess it looks like a typo Minho. :-) AFAIK, each node has 192GB memory.
>
> +1 we need to increase the default maxClientCnxns since modern
> machines have enough RAM.
>
> On Tue, Jul 7, 2015 at 7:13 PM, 김민호 <mi...@samsung.com> wrote:
> > Hi all,
> >
> >
> >
> > Recently, I set up Hama cluster using 2 machines.
> >
> > This specification is as follows:
> >
> > - 8 GB RAM
> >
> > - 12 TB HDD
> >
> > - (I don’t remember CPU spec.)
> >
> >
> >
> > In order to run hama job, I set up configuration, bsp.tasks.maximum=40
> and
> > bsp.child.java.opts=-Xmx4096m, in hama-site.xml. (skip rests of
> settings.)
> >
> > So I performed examples which are pi Estimator and FastGraphGen but I got
> > below errors.
> >
> >
> >
> > attempt_201507071627_0001_000023_0:
> > org.apache.zookeeper.KeeperException$ConnectionLossException:
> > KeeperErrorCode = ConnectionLoss for
> > /bsp/job_201507071627_0001/peers/cluster-0:61029
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.hama.bsp.sync.ZKSyncClient.isExists(ZKSyncClient.java:108)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:261)
> >
> > attempt_201507071627_0001_000023_0:      at
> >
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.registerTask(ZooKeeperSyncC
> > lientImpl.java:279)
> >
> > attempt_201507071627_0001_000023_0:      at
> >
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.register(ZooKeeperSyncClien
> > tImpl.java:261)
> >
> > attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.BSPPeerImpl.
> > initializeSyncService(BSPPeerImpl.java:305)
> >
> > attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.BSPPeerImpl.
> > <init>(BSPPeerImpl.java:185)
> >
> > attempt_201507071627_0001_000023_0:     at
> > org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
> >
> > attempt_201507071627_0001_000023_0: 15/07/07 16:27:40 ERROR
> > sync.ZKSyncClient: Error creating zk path
> > /bsp/job_201507071627_0001/peers/cluster-0:61029
> >
> > attempt_201507071627_0001_000023_0:
> > org.apache.zookeeper.KeeperException$ConnectionLossException:
> > KeeperErrorCode = ConnectionLoss for /bsp
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.hama.bsp.sync.ZKSyncClient.createZnode(ZKSyncClient.java:135)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:281)
> >
> > attempt_201507071627_0001_000023_0:      at
> >
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.registerTask(ZooKeeperSyncC
> > lientImpl.java:279)
> >
> > attempt_201507071627_0001_000023_0:      at
> >
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.register(ZooKeeperSyncClien
> > tImpl.java:261)
> >
> > attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.BSPPeerImpl.
> > initializeSyncService(BSPPeerImpl.java:305)
> >
> > attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.BSPPeerImpl.
> > <init>(BSPPeerImpl.java:185)
> >
> > attempt_201507071627_0001_000023_0:     at
> > org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
> >
> > attempt_201507071627_0001_000023_0: 15/07/07 16:27:42 ERROR
> > sync.ZKSyncClient: Error checking zk path
> /bsp/job_201507071627_0001/sync/-1
> >
> > attempt_201507071627_0001_000023_0:
> > org.apache.zookeeper.KeeperException$ConnectionLossException:
> > KeeperErrorCode = ConnectionLoss for /bsp/job_201507071627_0001/sync/-1
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.hama.bsp.sync.ZKSyncClient.isExists(ZKSyncClient.java:108)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:261)
> >
> > attempt_201507071627_0001_000023_0:      at
> >
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
> > lientImpl.java:100)
> >
> > attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.BSPPeerImpl.
> > doFirstSync(BSPPeerImpl.java:312)
> >
> > attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.BSPPeerImpl.
> > <init>(BSPPeerImpl.java:238)
> >
> > attempt_201507071627_0001_000023_0:     at
> > org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
> >
> > attempt_201507071627_0001_000023_0: 15/07/07 16:27:44 ERROR
> > sync.ZKSyncClient: Error creating zk path
> /bsp/job_201507071627_0001/sync/-1
> >
> > attempt_201507071627_0001_000023_0:
> > org.apache.zookeeper.KeeperException$ConnectionLossException:
> > KeeperErrorCode = ConnectionLoss for /bsp
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.hama.bsp.sync.ZKSyncClient.createZnode(ZKSyncClient.java:135)
> >
> > attempt_201507071627_0001_000023_0:      at
> > org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:281)
> >
> > attempt_201507071627_0001_000023_0:      at
> >
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
> > lientImpl.java:100)
> >
> > attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.BSPPeerImpl.
> > doFirstSync(BSPPeerImpl.java:312)
> >
> > attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.BSPPeerImpl.
> > <init>(BSPPeerImpl.java:238)
> >
> > attempt_201507071627_0001_000023_0:     at
> > org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
> >
> > attempt_201507071627_0001_000023_0: 15/07/07 16:27:46 FATAL
> > bsp.GroomServer: SyncError from child
> >
> > attempt_201507071627_0001_000023_0:
> org.apache.hama.bsp.sync.SyncException
> >
> > attempt_201507071627_0001_000023_0:      at
> >
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
> > lientImpl.java:138)
> >
> > attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.BSPPeerImpl.
> > doFirstSync(BSPPeerImpl.java:312)
> >
> > attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.BSPPeerImpl.
> > <init>(BSPPeerImpl.java:238)
> >
> > attempt_201507071627_0001_000023_0:     at
> > org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
> >
> > 15/07/07 16:27:48 INFO bsp.BSPJobClient: Job failed.
> >
> >
> >
> > This is a ZK error. Hama tasks try to get the /bsp node from zookeeper
> and
> > fails.
> >
> > This is just because hama.zookeeper.property.maxClientCnxns is 30 in
> hama-
> > default.xml.
> >
> > The problem has been encountered while the number of maximum tasks is
> > larger than it.
> >
> > To solve the problem, Hama has a setting to increase the number of
> > connectiosns to ZK.
> >
> >
> >
> > <property>
> >
> >     <name>hama.zookeeper.property.maxClientCnxns</name>
> >
> >     <value>100</value>
> >
> > </property>
> >
> >
> >
> > So we should update the default number of connections which is over 100
> > because server’s performance has been more improved than before.
> >
> > If you agree my opinion, I will change the default value as 300.
> >
> >
> >
> > Best regards,
> >
> > Minho Kim
> >
> >
> >
>
>
>
> --
> Best Regards, Edward J. Yoon
>

Re: Need to increase the default number of connections to zookeeper

Posted by "Edward J. Yoon" <ed...@apache.org>.
> - 8 GB RAM

I guess it looks like a typo Minho. :-) AFAIK, each node has 192GB memory.

+1 we need to increase the default maxClientCnxns since modern
machines have enough RAM.

On Tue, Jul 7, 2015 at 7:13 PM, 김민호 <mi...@samsung.com> wrote:
> Hi all,
>
>
>
> Recently, I set up Hama cluster using 2 machines.
>
> This specification is as follows:
>
> - 8 GB RAM
>
> - 12 TB HDD
>
> - (I don’t remember CPU spec.)
>
>
>
> In order to run hama job, I set up configuration, bsp.tasks.maximum=40 and
> bsp.child.java.opts=-Xmx4096m, in hama-site.xml. (skip rests of settings.)
>
> So I performed examples which are pi Estimator and FastGraphGen but I got
> below errors.
>
>
>
> attempt_201507071627_0001_000023_0:
> org.apache.zookeeper.KeeperException$ConnectionLossException:
> KeeperErrorCode = ConnectionLoss for
> /bsp/job_201507071627_0001/peers/cluster-0:61029
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZKSyncClient.isExists(ZKSyncClient.java:108)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:261)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.registerTask(ZooKeeperSyncC
> lientImpl.java:279)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.register(ZooKeeperSyncClien
> tImpl.java:261)
>
> attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
> initializeSyncService(BSPPeerImpl.java:305)
>
> attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
> <init>(BSPPeerImpl.java:185)
>
> attempt_201507071627_0001_000023_0:     at
> org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
>
> attempt_201507071627_0001_000023_0: 15/07/07 16:27:40 ERROR
> sync.ZKSyncClient: Error creating zk path
> /bsp/job_201507071627_0001/peers/cluster-0:61029
>
> attempt_201507071627_0001_000023_0:
> org.apache.zookeeper.KeeperException$ConnectionLossException:
> KeeperErrorCode = ConnectionLoss for /bsp
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZKSyncClient.createZnode(ZKSyncClient.java:135)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:281)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.registerTask(ZooKeeperSyncC
> lientImpl.java:279)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.register(ZooKeeperSyncClien
> tImpl.java:261)
>
> attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
> initializeSyncService(BSPPeerImpl.java:305)
>
> attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
> <init>(BSPPeerImpl.java:185)
>
> attempt_201507071627_0001_000023_0:     at
> org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
>
> attempt_201507071627_0001_000023_0: 15/07/07 16:27:42 ERROR
> sync.ZKSyncClient: Error checking zk path /bsp/job_201507071627_0001/sync/-1
>
> attempt_201507071627_0001_000023_0:
> org.apache.zookeeper.KeeperException$ConnectionLossException:
> KeeperErrorCode = ConnectionLoss for /bsp/job_201507071627_0001/sync/-1
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZKSyncClient.isExists(ZKSyncClient.java:108)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:261)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
> lientImpl.java:100)
>
> attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
> doFirstSync(BSPPeerImpl.java:312)
>
> attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
> <init>(BSPPeerImpl.java:238)
>
> attempt_201507071627_0001_000023_0:     at
> org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
>
> attempt_201507071627_0001_000023_0: 15/07/07 16:27:44 ERROR
> sync.ZKSyncClient: Error creating zk path /bsp/job_201507071627_0001/sync/-1
>
> attempt_201507071627_0001_000023_0:
> org.apache.zookeeper.KeeperException$ConnectionLossException:
> KeeperErrorCode = ConnectionLoss for /bsp
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1041)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1069)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZKSyncClient.createZnode(ZKSyncClient.java:135)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZKSyncClient.writeNode(ZKSyncClient.java:281)
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
> lientImpl.java:100)
>
> attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
> doFirstSync(BSPPeerImpl.java:312)
>
> attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
> <init>(BSPPeerImpl.java:238)
>
> attempt_201507071627_0001_000023_0:     at
> org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
>
> attempt_201507071627_0001_000023_0: 15/07/07 16:27:46 FATAL
> bsp.GroomServer: SyncError from child
>
> attempt_201507071627_0001_000023_0: org.apache.hama.bsp.sync.SyncException
>
> attempt_201507071627_0001_000023_0:      at
> org.apache.hama.bsp.sync.ZooKeeperSyncClientImpl.enterBarrier(ZooKeeperSyncC
> lientImpl.java:138)
>
> attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
> doFirstSync(BSPPeerImpl.java:312)
>
> attempt_201507071627_0001_000023_0:      at org.apache.hama.bsp.BSPPeerImpl.
> <init>(BSPPeerImpl.java:238)
>
> attempt_201507071627_0001_000023_0:     at
> org.apache.hama.bsp.GroomServer$BSPPeerChild.main(GroomServer.java:1251)
>
> 15/07/07 16:27:48 INFO bsp.BSPJobClient: Job failed.
>
>
>
> This is a ZK error. Hama tasks try to get the /bsp node from zookeeper and
> fails.
>
> This is just because hama.zookeeper.property.maxClientCnxns is 30 in hama-
> default.xml.
>
> The problem has been encountered while the number of maximum tasks is
> larger than it.
>
> To solve the problem, Hama has a setting to increase the number of
> connectiosns to ZK.
>
>
>
> <property>
>
>     <name>hama.zookeeper.property.maxClientCnxns</name>
>
>     <value>100</value>
>
> </property>
>
>
>
> So we should update the default number of connections which is over 100
> because server’s performance has been more improved than before.
>
> If you agree my opinion, I will change the default value as 300.
>
>
>
> Best regards,
>
> Minho Kim
>
>
>



-- 
Best Regards, Edward J. Yoon