You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@myriad.apache.org by mi...@accenture.com on 2016/02/03 20:12:57 UTC

Re: MapR yarn-site.xml and mapred-site.xml

Hi Santosh,

        I am running Myriad using Marathon. I am using Mesos-dns as well and the yarn-site.xml and mapred-site.xml are exactly the same for both RM and NM. I just only turned off my RM from MapR Control Console which explains my error logs below. My understanding is that maybe I should remove the rm1, rm2 and rm3 information from the yarn-site file but I want to make sure I am doing it correctly.

        Here is the command I have sent to start RM from Marathon. I am able to see Myriad running via this link in my environment http://rm.marathon.mesos:8192/#/.

{
  "id": "rm",
  "instances": 1,
  "cpus": 0.2,
  "mem": 2048,
  "cmd": "env && export YARN_RESOURCEMANAGER_OPTS=-Dyarn.resourcemanager.hostname=rm.marathon.mesos && yarn resourcemanager",
  "ports": [ 0 ]
}




        Here is the yarn-site.xml file from our MapR cluster:




root@`hostname`:/home/diuser# cat /opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop/yarn-site.xml
<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at


    http://www.apache.org/licenses/LICENSE-2.0


  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>
  <!-- Resource Manager HA Configs -->
  <property>
    <name>yarn.resourcemanager.ha.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.recovery.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.cluster-id</name>
    <value>yarn-dicluster.techlabs.accenture.com</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.rm-ids</name>
    <value>rm1,rm2,rm3</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.id</name>
    <value>rm1</value>
  </property>
  <property>
    <name>yarn.resourcemanager.zk-address</name>
    <value>otherhostname041:5181,otherhostname042:5181,otherhostname043:5181</value>
  </property>

  <!-- Configuration for rm1 -->
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm1</name>
    <value>otherhostname041:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
    <value>otherhostname041:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address.rm1</name>
    <value>otherhostname041:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address.rm1</name>
    <value>otherhostname041:8033</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm1</name>
    <value>otherhostname041:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.https.address.rm1</name>
    <value>otherhostname041:8090</value>
  </property>


  <!-- Configuration for rm2 -->
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm2</name>
    <value>otherhostname042:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
    <value>otherhostname042:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address.rm2</name>
    <value>otherhostname042:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address.rm2</name>
    <value>otherhostname042:8033</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm2</name>
    <value>otherhostname042:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.https.address.rm2</name>
    <value>otherhostname042:8090</value>
  </property>


  <!-- Configuration for rm3 -->
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm3</name>
    <value>otherhostname043:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm3</name>
    <value>otherhostname043:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address.rm3</name>
    <value>otherhostname043:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address.rm3</name>
    <value>otherhostname043:8033</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm3</name>
    <value>otherhostname043:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.https.address.rm3</name>
    <value>otherhostname043:8090</value>
  </property>


  <!-- :::CAUTION::: DO NOT EDIT ANYTHING ON OR ABOVE THIS LINE —>

<property>
          <name>yarn.nodemanager.resource.cpu-vcores</name>
          <value>${nodemanager.resource.cpu-vcores}</value>
      </property>
      <property>
          <name>yarn.nodemanager.resource.memory-mb</name>
          <value>${nodemanager.resource.memory-mb}</value>
      </property>
      <!--These options enable dynamic port assignment by mesos -->
      <property>
          <name>yarn.nodemanager.address</name>
          <value>${myriad.yarn.nodemanager.address}</value>
      </property>
      <property>
          <name>yarn.nodemanager.webapp.address</name>
          <value>${myriad.yarn.nodemanager.webapp.address}</value>
      </property>
      <property>
          <name>yarn.nodemanager.webapp.https.address</name>
          <value>${myriad.yarn.nodemanager.webapp.address}</value>
      </property>
      <property>
          <name>yarn.nodemanager.localizer.address</name>
          <value>${myriad.yarn.nodemanager.localizer.address}</value>
      </property>
      <!-- Configure Myriad Scheduler here -->
      <property>
          <name>yarn.resourcemanager.scheduler.class</name>
          <value>org.apache.myriad.scheduler.yarn.MyriadFairScheduler</value>
          <description>One can configure other scehdulers as well from following list: org.apache.myriad.scheduler.yarn.MyriadCapacityScheduler, org.apache.myriad.scheduler.yarn.MyriadFifoScheduler</description>
      </property>


</configuration>






        Here is the mapred-site.xml from the cluster as well:




root@`hostname`:/home/diuser# cat /opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at


    http://www.apache.org/licenses/LICENSE-2.0


  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->


<!-- Put site-specific property overrides in this file. -->


<configuration>
  <property>
    <name>mapreduce.jobhistory.address</name>
    <value>otherhostname043:10020</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>otherhostname043:19888</value>
  </property>
  <!--
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn-tez</value>
  </property>
  —>

<!--This option enables dynamic port assignment by mesos -->
<property>
<name>mapreduce.shuffle.port</name>
<value>${myriad.mapreduce.shuffle.port}</value>
</property>



</configuration>








        The issue is when I try to run a job from one of the nodes in the cluster. I get this error below from the log $YARN_HOME/logs/yarn-mapr-nodemanager-`hostname`.log You can see below that it is reading from the yarn-site.xml for rm1, rm2, and rm3.





2016-02-02 15:33:52,116 INFO org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider: Failing over to rm1
2016-02-02 15:33:52,117 INFO org.apache.hadoop.io.retry.RetryInvocationHandler: Exception while invoking nodeHeartbeat of class ResourceTrackerPBClientImpl over rm1 after 27 fail over attempts. Trying to fail over after sleeping for 17384ms.
java.net.ConnectException: Call From `hostname`/10.1.194.49 to otherhostname041:8031 failed on connection exception: java.net.ConnectException: Connection refused; For more details see:  http://wiki.apache.org/hadoop/ConnectionRefused
        at sun.reflect.GeneratedConstructorAccessor22.newInstance(Unknown Source)
        at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
        at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:792)
        at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:732)
        at org.apache.hadoop.ipc.Client.call(Client.java:1482)
        at org.apache.hadoop.ipc.Client.call(Client.java:1409)
        at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229)
        at com.sun.proxy.$Proxy28.nodeHeartbeat(Unknown Source)
        at org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceTrackerPBClientImpl.nodeHeartbeat(ResourceTrackerPBClientImpl.java:80)
        at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:497)
        at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
        at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
        at com.sun.proxy.$Proxy29.nodeHeartbeat(Unknown Source)
        at org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl$1.run(NodeStatusUpdaterImpl.java:622)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.net.ConnectException: Connection refused
        at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
        at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
        at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
        at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
        at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:495)
        at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:614)
        at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:708)
        at org.apache.hadoop.ipc.Client$Connection.access$2900(Client.java:374)
        at org.apache.hadoop.ipc.Client.getConnection(Client.java:1531)
        at org.apache.hadoop.ipc.Client.call(Client.java:1448)
        ... 12 more
2016-02-02 15:34:09,502 INFO org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider: Failing over to rm2
2016-02-02 15:34:09,503 INFO org.apache.hadoop.io.retry.RetryInvocationHandler: Exception while invoking nodeHeartbeat of class ResourceTrackerPBClientImpl over rm2 after 28 fail over attempts. Trying to fail over after sleeping for 35554ms.
java.net.ConnectException: Call From `hostname`/10.1.194.49 to otherhostname042:8031 failed on connection exception: java.net.ConnectException: Connection refused; For more details see:  http://wiki.apache.org/hadoop/ConnectionRefused
        at sun.reflect.GeneratedConstructorAccessor22.newInstance(Unknown Source)
        at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
        at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:792)
        at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:732)
        at org.apache.hadoop.ipc.Client.call(Client.java:1482)
        at org.apache.hadoop.ipc.Client.call(Client.java:1409)
        at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229)
        at com.sun.proxy.$Proxy28.nodeHeartbeat(Unknown Source)
        at org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceTrackerPBClientImpl.nodeHeartbeat(ResourceTrackerPBClientImpl.java:80)
        at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:497)
        at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
        at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
        at com.sun.proxy.$Proxy29.nodeHeartbeat(Unknown Source)
        at org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl$1.run(NodeStatusUpdaterImpl.java:622)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.net.ConnectException: Connection refused
        at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
        at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
        at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
        at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
        at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:495)
        at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:614)
        at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:708)
        at org.apache.hadoop.ipc.Client$Connection.access$2900(Client.java:374)
        at org.apache.hadoop.ipc.Client.getConnection(Client.java:1531)
        at org.apache.hadoop.ipc.Client.call(Client.java:1448)
        ... 12 more
2016-02-02 15:34:45,058 INFO org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider: Failing over to rm3
2016-02-02 15:34:45,060 INFO org.apache.hadoop.io.retry.RetryInvocationHandler: Exception while invoking nodeHeartbeat of class ResourceTrackerPBClientImpl over rm3 after 29 fail over attempts. Trying to fail over after sleeping for 17219ms.
java.net.ConnectException: Call From `hostname`/10.1.194.49 to otherhostname043:8031 failed on connection exception: java.net.ConnectException: Connection refused; For more details see:  http://wiki.apache.org/hadoop/ConnectionRefused
        at sun.reflect.GeneratedConstructorAccessor22.newInstance(Unknown Source)
        at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
        at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:792)
        at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:732)
        at org.apache.hadoop.ipc.Client.call(Client.java:1482)
        at org.apache.hadoop.ipc.Client.call(Client.java:1409)
        at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229)
        at com.sun.proxy.$Proxy28.nodeHeartbeat(Unknown Source)
        at org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceTrackerPBClientImpl.nodeHeartbeat(ResourceTrackerPBClientImpl.java:80)
        at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:497)
        at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
        at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
        at com.sun.proxy.$Proxy29.nodeHeartbeat(Unknown Source)
        at org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl$1.run(NodeStatusUpdaterImpl.java:622)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.net.ConnectException: Connection refused
        at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
        at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
        at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
        at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
        at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:495)
        at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:614)
        at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:708)
        at org.apache.hadoop.ipc.Client$Connection.access$2900(Client.java:374)
        at org.apache.hadoop.ipc.Client.getConnection(Client.java:1531)
        at org.apache.hadoop.ipc.Client.call(Client.java:1448)
        ... 12 more
2016-02-02 15:35:02,280 INFO org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider: Failing over to rm1
2016-02-02 15:35:02,281 WARN org.apache.hadoop.io.retry.RetryInvocationHandler: Exception while invoking class org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceTrackerPBClientImpl.nodeHeartbeat over rm1. Not retrying because failovers (30) exceeded maximum allowed (30)




> Miguel Bernadin Accenture Technology Labs – System Engineering
Contact: W (408) 817-2742 | M (631) 835-6345 | miguel.bernadin@accenture.com






On 2/3/16, 10:49 AM, "Santosh Marella" <sm...@maprtech.com> wrote:

>Hi Miguel,
>
>   Are you running the YARN cluster using Myriad (I assume so)? How did you
>launch your RM - manually/using marathon/using warden? How does NMs
>discover where RM is - perhaps you can paste your yarn-site.xml from the RM
>node and on of your NM nodes.
>
>Thanks,
>Santosh
>
>On Wed, Feb 3, 2016 at 10:36 AM, <mi...@accenture.com> wrote:
>
>> Hello guys,
>>
>> I wanted to know if anyone with a MapR environment can share with me their
>> yarn-site.xml and mapred-site.xml files with me. When running the terasort
>> job, it looks like its looking for rm1, rm2, and rm3. I modified the file
>> in place rather than taking it from template.
>>
>> > Miguel Bernadin Accenture Technology Labs – System Engineering
>> Contact: W (408) 817-2742 | M (631) 835-6345 |
>> miguel.bernadin@accenture.com<ma...@accenture.com>
>>
>> ________________________________
>>
>> This message is for the designated recipient only and may contain
>> privileged, proprietary, or otherwise confidential information. If you have
>> received it in error, please notify the sender immediately and delete the
>> original. Any other use of the e-mail by you is prohibited. Where allowed
>> by local law, electronic communications with Accenture and its affiliates,
>> including e-mail and instant messaging (including content), may be scanned
>> by our systems for the purposes of information security and assessment of
>> internal compliance with Accenture policy.
>>
>> ______________________________________________________________________________________
>>
>> www.accenture.com
>>

________________________________

This message is for the designated recipient only and may contain privileged, proprietary, or otherwise confidential information. If you have received it in error, please notify the sender immediately and delete the original. Any other use of the e-mail by you is prohibited. Where allowed by local law, electronic communications with Accenture and its affiliates, including e-mail and instant messaging (including content), may be scanned by our systems for the purposes of information security and assessment of internal compliance with Accenture policy.
______________________________________________________________________________________

www.accenture.com

Re: MapR yarn-site.xml and mapred-site.xml

Posted by mi...@accenture.com.
Thank you Santosh!

It works perfectly! I will have fun with this.

> Miguel Bernadin Accenture Technology Labs – System Engineering
Contact: W (408) 817-2742 | M (631) 835-6345 | miguel.bernadin@accenture.com






On 2/3/16, 11:33 AM, "Santosh Marella" <sm...@maprtech.com> wrote:

>Thanks for the information, Miguel. I suggest using the the following
>yarn-site.xml:
>
>  <property>
>    <name>yarn.resourcemanager.hostname</name>
>    <value>rm.marathon.mesos</value>
>  </property>
>
>Santosh
>
>On Wed, Feb 3, 2016 at 11:12 AM, <mi...@accenture.com> wrote:
>
>> Hi Santosh,
>>
>>         I am running Myriad using Marathon. I am using Mesos-dns as well
>> and the yarn-site.xml and mapred-site.xml are exactly the same for both RM
>> and NM. I just only turned off my RM from MapR Control Console which
>> explains my error logs below. My understanding is that maybe I should
>> remove the rm1, rm2 and rm3 information from the yarn-site file but I want
>> to make sure I am doing it correctly.
>>
>>         Here is the command I have sent to start RM from Marathon. I am
>> able to see Myriad running via this link in my environment
>> http://rm.marathon.mesos:8192/#/.
>>
>> {
>>   "id": "rm",
>>   "instances": 1,
>>   "cpus": 0.2,
>>   "mem": 2048,
>>   "cmd": "env && export
>> YARN_RESOURCEMANAGER_OPTS=-Dyarn.resourcemanager.hostname=rm.marathon.mesos
>> && yarn resourcemanager",
>>   "ports": [ 0 ]
>> }
>>
>>
>>
>>
>>         Here is the yarn-site.xml file from our MapR cluster:
>>
>>
>>
>>
>> root@`hostname`:/home/diuser# cat
>> /opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop/yarn-site.xml
>> <?xml version="1.0"?>
>> <!--
>>   Licensed under the Apache License, Version 2.0 (the "License");
>>   you may not use this file except in compliance with the License.
>>   You may obtain a copy of the License at
>>
>>
>>     http://www.apache.org/licenses/LICENSE-2.0
>>
>>
>>   Unless required by applicable law or agreed to in writing, software
>>   distributed under the License is distributed on an "AS IS" BASIS,
>>   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>>   See the License for the specific language governing permissions and
>>   limitations under the License. See accompanying LICENSE file.
>> -->
>> <configuration>
>>   <!-- Resource Manager HA Configs -->
>>   <property>
>>     <name>yarn.resourcemanager.ha.enabled</name>
>>     <value>true</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
>>     <value>true</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
>>     <value>true</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.recovery.enabled</name>
>>     <value>true</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.cluster-id</name>
>>     <value>yarn-dicluster.techlabs.accenture.com</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.ha.rm-ids</name>
>>     <value>rm1,rm2,rm3</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.ha.id</name>
>>     <value>rm1</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.zk-address</name>
>>
>> <value>otherhostname041:5181,otherhostname042:5181,otherhostname043:5181</value>
>>   </property>
>>
>>   <!-- Configuration for rm1 -->
>>   <property>
>>     <name>yarn.resourcemanager.scheduler.address.rm1</name>
>>     <value>otherhostname041:8030</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
>>     <value>otherhostname041:8031</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.address.rm1</name>
>>     <value>otherhostname041:8032</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.admin.address.rm1</name>
>>     <value>otherhostname041:8033</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.webapp.address.rm1</name>
>>     <value>otherhostname041:8088</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.webapp.https.address.rm1</name>
>>     <value>otherhostname041:8090</value>
>>   </property>
>>
>>
>>   <!-- Configuration for rm2 -->
>>   <property>
>>     <name>yarn.resourcemanager.scheduler.address.rm2</name>
>>     <value>otherhostname042:8030</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
>>     <value>otherhostname042:8031</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.address.rm2</name>
>>     <value>otherhostname042:8032</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.admin.address.rm2</name>
>>     <value>otherhostname042:8033</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.webapp.address.rm2</name>
>>     <value>otherhostname042:8088</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.webapp.https.address.rm2</name>
>>     <value>otherhostname042:8090</value>
>>   </property>
>>
>>
>>   <!-- Configuration for rm3 -->
>>   <property>
>>     <name>yarn.resourcemanager.scheduler.address.rm3</name>
>>     <value>otherhostname043:8030</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.resource-tracker.address.rm3</name>
>>     <value>otherhostname043:8031</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.address.rm3</name>
>>     <value>otherhostname043:8032</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.admin.address.rm3</name>
>>     <value>otherhostname043:8033</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.webapp.address.rm3</name>
>>     <value>otherhostname043:8088</value>
>>   </property>
>>   <property>
>>     <name>yarn.resourcemanager.webapp.https.address.rm3</name>
>>     <value>otherhostname043:8090</value>
>>   </property>
>>
>>
>>   <!-- :::CAUTION::: DO NOT EDIT ANYTHING ON OR ABOVE THIS LINE —>
>>
>> <property>
>>           <name>yarn.nodemanager.resource.cpu-vcores</name>
>>           <value>${nodemanager.resource.cpu-vcores}</value>
>>       </property>
>>       <property>
>>           <name>yarn.nodemanager.resource.memory-mb</name>
>>           <value>${nodemanager.resource.memory-mb}</value>
>>       </property>
>>       <!--These options enable dynamic port assignment by mesos -->
>>       <property>
>>           <name>yarn.nodemanager.address</name>
>>           <value>${myriad.yarn.nodemanager.address}</value>
>>       </property>
>>       <property>
>>           <name>yarn.nodemanager.webapp.address</name>
>>           <value>${myriad.yarn.nodemanager.webapp.address}</value>
>>       </property>
>>       <property>
>>           <name>yarn.nodemanager.webapp.https.address</name>
>>           <value>${myriad.yarn.nodemanager.webapp.address}</value>
>>       </property>
>>       <property>
>>           <name>yarn.nodemanager.localizer.address</name>
>>           <value>${myriad.yarn.nodemanager.localizer.address}</value>
>>       </property>
>>       <!-- Configure Myriad Scheduler here -->
>>       <property>
>>           <name>yarn.resourcemanager.scheduler.class</name>
>>
>> <value>org.apache.myriad.scheduler.yarn.MyriadFairScheduler</value>
>>           <description>One can configure other scehdulers as well from
>> following list: org.apache.myriad.scheduler.yarn.MyriadCapacityScheduler,
>> org.apache.myriad.scheduler.yarn.MyriadFifoScheduler</description>
>>       </property>
>>
>>
>> </configuration>
>>
>>
>>
>>
>>
>>
>>         Here is the mapred-site.xml from the cluster as well:
>>
>>
>>
>>
>> root@`hostname`:/home/diuser# cat
>> /opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop/mapred-site.xml
>> <?xml version="1.0"?>
>> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
>> <!--
>>   Licensed under the Apache License, Version 2.0 (the "License");
>>   you may not use this file except in compliance with the License.
>>   You may obtain a copy of the License at
>>
>>
>>     http://www.apache.org/licenses/LICENSE-2.0
>>
>>
>>   Unless required by applicable law or agreed to in writing, software
>>   distributed under the License is distributed on an "AS IS" BASIS,
>>   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>>   See the License for the specific language governing permissions and
>>   limitations under the License. See accompanying LICENSE file.
>> -->
>>
>>
>> <!-- Put site-specific property overrides in this file. -->
>>
>>
>> <configuration>
>>   <property>
>>     <name>mapreduce.jobhistory.address</name>
>>     <value>otherhostname043:10020</value>
>>   </property>
>>   <property>
>>     <name>mapreduce.jobhistory.webapp.address</name>
>>     <value>otherhostname043:19888</value>
>>   </property>
>>   <!--
>>   <property>
>>     <name>mapreduce.framework.name</name>
>>     <value>yarn-tez</value>
>>   </property>
>>   —>
>>
>> <!--This option enables dynamic port assignment by mesos -->
>> <property>
>> <name>mapreduce.shuffle.port</name>
>> <value>${myriad.mapreduce.shuffle.port}</value>
>> </property>
>>
>>
>>
>> </configuration>
>>
>>
>>
>>
>>
>>
>>
>>
>>         The issue is when I try to run a job from one of the nodes in the
>> cluster. I get this error below from the log
>> $YARN_HOME/logs/yarn-mapr-nodemanager-`hostname`.log You can see below that
>> it is reading from the yarn-site.xml for rm1, rm2, and rm3.
>>
>>
>>
>>
>>
>> 2016-02-02 15:33:52,116 INFO
>> org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider: Failing
>> over to rm1
>> 2016-02-02 15:33:52,117 INFO
>> org.apache.hadoop.io.retry.RetryInvocationHandler: Exception while invoking
>> nodeHeartbeat of class ResourceTrackerPBClientImpl over rm1 after 27 fail
>> over attempts. Trying to fail over after sleeping for 17384ms.
>> java.net.ConnectException: Call From `hostname`/10.1.194.49 to
>> otherhostname041:8031 failed on connection exception:
>> java.net.ConnectException: Connection refused; For more details see:
>> http://wiki.apache.org/hadoop/ConnectionRefused
>>         at sun.reflect.GeneratedConstructorAccessor22.newInstance(Unknown
>> Source)
>>         at
>> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>>         at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
>>         at
>> org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:792)
>>         at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:732)
>>         at org.apache.hadoop.ipc.Client.call(Client.java:1482)
>>         at org.apache.hadoop.ipc.Client.call(Client.java:1409)
>>         at
>> org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229)
>>         at com.sun.proxy.$Proxy28.nodeHeartbeat(Unknown Source)
>>         at
>> org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceTrackerPBClientImpl.nodeHeartbeat(ResourceTrackerPBClientImpl.java:80)
>>         at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source)
>>         at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>         at java.lang.reflect.Method.invoke(Method.java:497)
>>         at
>> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
>>         at
>> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
>>         at com.sun.proxy.$Proxy29.nodeHeartbeat(Unknown Source)
>>         at
>> org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl$1.run(NodeStatusUpdaterImpl.java:622)
>>         at java.lang.Thread.run(Thread.java:745)
>> Caused by: java.net.ConnectException: Connection refused
>>         at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
>>         at
>> sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
>>         at
>> org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
>>         at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
>>         at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:495)
>>         at
>> org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:614)
>>         at
>> org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:708)
>>         at
>> org.apache.hadoop.ipc.Client$Connection.access$2900(Client.java:374)
>>         at org.apache.hadoop.ipc.Client.getConnection(Client.java:1531)
>>         at org.apache.hadoop.ipc.Client.call(Client.java:1448)
>>         ... 12 more
>> 2016-02-02 15:34:09,502 INFO
>> org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider: Failing
>> over to rm2
>> 2016-02-02 15:34:09,503 INFO
>> org.apache.hadoop.io.retry.RetryInvocationHandler: Exception while invoking
>> nodeHeartbeat of class ResourceTrackerPBClientImpl over rm2 after 28 fail
>> over attempts. Trying to fail over after sleeping for 35554ms.
>> java.net.ConnectException: Call From `hostname`/10.1.194.49 to
>> otherhostname042:8031 failed on connection exception:
>> java.net.ConnectException: Connection refused; For more details see:
>> http://wiki.apache.org/hadoop/ConnectionRefused
>>         at sun.reflect.GeneratedConstructorAccessor22.newInstance(Unknown
>> Source)
>>         at
>> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>>         at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
>>         at
>> org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:792)
>>         at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:732)
>>         at org.apache.hadoop.ipc.Client.call(Client.java:1482)
>>         at org.apache.hadoop.ipc.Client.call(Client.java:1409)
>>         at
>> org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229)
>>         at com.sun.proxy.$Proxy28.nodeHeartbeat(Unknown Source)
>>         at
>> org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceTrackerPBClientImpl.nodeHeartbeat(ResourceTrackerPBClientImpl.java:80)
>>         at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source)
>>         at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>         at java.lang.reflect.Method.invoke(Method.java:497)
>>         at
>> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
>>         at
>> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
>>         at com.sun.proxy.$Proxy29.nodeHeartbeat(Unknown Source)
>>         at
>> org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl$1.run(NodeStatusUpdaterImpl.java:622)
>>         at java.lang.Thread.run(Thread.java:745)
>> Caused by: java.net.ConnectException: Connection refused
>>         at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
>>         at
>> sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
>>         at
>> org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
>>         at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
>>         at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:495)
>>         at
>> org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:614)
>>         at
>> org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:708)
>>         at
>> org.apache.hadoop.ipc.Client$Connection.access$2900(Client.java:374)
>>         at org.apache.hadoop.ipc.Client.getConnection(Client.java:1531)
>>         at org.apache.hadoop.ipc.Client.call(Client.java:1448)
>>         ... 12 more
>> 2016-02-02 15:34:45,058 INFO
>> org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider: Failing
>> over to rm3
>> 2016-02-02 15:34:45,060 INFO
>> org.apache.hadoop.io.retry.RetryInvocationHandler: Exception while invoking
>> nodeHeartbeat of class ResourceTrackerPBClientImpl over rm3 after 29 fail
>> over attempts. Trying to fail over after sleeping for 17219ms.
>> java.net.ConnectException: Call From `hostname`/10.1.194.49 to
>> otherhostname043:8031 failed on connection exception:
>> java.net.ConnectException: Connection refused; For more details see:
>> http://wiki.apache.org/hadoop/ConnectionRefused
>>         at sun.reflect.GeneratedConstructorAccessor22.newInstance(Unknown
>> Source)
>>         at
>> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>>         at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
>>         at
>> org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:792)
>>         at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:732)
>>         at org.apache.hadoop.ipc.Client.call(Client.java:1482)
>>         at org.apache.hadoop.ipc.Client.call(Client.java:1409)
>>         at
>> org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229)
>>         at com.sun.proxy.$Proxy28.nodeHeartbeat(Unknown Source)
>>         at
>> org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceTrackerPBClientImpl.nodeHeartbeat(ResourceTrackerPBClientImpl.java:80)
>>         at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source)
>>         at
>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>>         at java.lang.reflect.Method.invoke(Method.java:497)
>>         at
>> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
>>         at
>> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
>>         at com.sun.proxy.$Proxy29.nodeHeartbeat(Unknown Source)
>>         at
>> org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl$1.run(NodeStatusUpdaterImpl.java:622)
>>         at java.lang.Thread.run(Thread.java:745)
>> Caused by: java.net.ConnectException: Connection refused
>>         at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
>>         at
>> sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
>>         at
>> org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
>>         at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
>>         at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:495)
>>         at
>> org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:614)
>>         at
>> org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:708)
>>         at
>> org.apache.hadoop.ipc.Client$Connection.access$2900(Client.java:374)
>>         at org.apache.hadoop.ipc.Client.getConnection(Client.java:1531)
>>         at org.apache.hadoop.ipc.Client.call(Client.java:1448)
>>         ... 12 more
>> 2016-02-02 15:35:02,280 INFO
>> org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider: Failing
>> over to rm1
>> 2016-02-02 15:35:02,281 WARN
>> org.apache.hadoop.io.retry.RetryInvocationHandler: Exception while invoking
>> class
>> org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceTrackerPBClientImpl.nodeHeartbeat
>> over rm1. Not retrying because failovers (30) exceeded maximum allowed (30)
>>
>>
>>
>>
>> > Miguel Bernadin Accenture Technology Labs – System Engineering
>> Contact: W (408) 817-2742 | M (631) 835-6345 |
>> miguel.bernadin@accenture.com
>>
>>
>>
>>
>>
>>
>> On 2/3/16, 10:49 AM, "Santosh Marella" <sm...@maprtech.com> wrote:
>>
>> >Hi Miguel,
>> >
>> >   Are you running the YARN cluster using Myriad (I assume so)? How did
>> you
>> >launch your RM - manually/using marathon/using warden? How does NMs
>> >discover where RM is - perhaps you can paste your yarn-site.xml from the
>> RM
>> >node and on of your NM nodes.
>> >
>> >Thanks,
>> >Santosh
>> >
>> >On Wed, Feb 3, 2016 at 10:36 AM, <mi...@accenture.com> wrote:
>> >
>> >> Hello guys,
>> >>
>> >> I wanted to know if anyone with a MapR environment can share with me
>> their
>> >> yarn-site.xml and mapred-site.xml files with me. When running the
>> terasort
>> >> job, it looks like its looking for rm1, rm2, and rm3. I modified the
>> file
>> >> in place rather than taking it from template.
>> >>
>> >> > Miguel Bernadin Accenture Technology Labs – System Engineering
>> >> Contact: W (408) 817-2742 | M (631) 835-6345 |
>> >> miguel.bernadin@accenture.com<ma...@accenture.com>
>> >>
>> >> ________________________________
>> >>
>> >> This message is for the designated recipient only and may contain
>> >> privileged, proprietary, or otherwise confidential information. If you
>> have
>> >> received it in error, please notify the sender immediately and delete
>> the
>> >> original. Any other use of the e-mail by you is prohibited. Where
>> allowed
>> >> by local law, electronic communications with Accenture and its
>> affiliates,
>> >> including e-mail and instant messaging (including content), may be
>> scanned
>> >> by our systems for the purposes of information security and assessment
>> of
>> >> internal compliance with Accenture policy.
>> >>
>> >>
>> ______________________________________________________________________________________
>> >>
>> >> www.accenture.com
>> >>
>>
>> ________________________________
>>
>> This message is for the designated recipient only and may contain
>> privileged, proprietary, or otherwise confidential information. If you have
>> received it in error, please notify the sender immediately and delete the
>> original. Any other use of the e-mail by you is prohibited. Where allowed
>> by local law, electronic communications with Accenture and its affiliates,
>> including e-mail and instant messaging (including content), may be scanned
>> by our systems for the purposes of information security and assessment of
>> internal compliance with Accenture policy.
>>
>> ______________________________________________________________________________________
>>
>> www.accenture.com
>>

________________________________

This message is for the designated recipient only and may contain privileged, proprietary, or otherwise confidential information. If you have received it in error, please notify the sender immediately and delete the original. Any other use of the e-mail by you is prohibited. Where allowed by local law, electronic communications with Accenture and its affiliates, including e-mail and instant messaging (including content), may be scanned by our systems for the purposes of information security and assessment of internal compliance with Accenture policy.
______________________________________________________________________________________

www.accenture.com

Re: MapR yarn-site.xml and mapred-site.xml

Posted by Santosh Marella <sm...@maprtech.com>.
Thanks for the information, Miguel. I suggest using the the following
yarn-site.xml:

  <property>
    <name>yarn.resourcemanager.hostname</name>
    <value>rm.marathon.mesos</value>
  </property>

Santosh

On Wed, Feb 3, 2016 at 11:12 AM, <mi...@accenture.com> wrote:

> Hi Santosh,
>
>         I am running Myriad using Marathon. I am using Mesos-dns as well
> and the yarn-site.xml and mapred-site.xml are exactly the same for both RM
> and NM. I just only turned off my RM from MapR Control Console which
> explains my error logs below. My understanding is that maybe I should
> remove the rm1, rm2 and rm3 information from the yarn-site file but I want
> to make sure I am doing it correctly.
>
>         Here is the command I have sent to start RM from Marathon. I am
> able to see Myriad running via this link in my environment
> http://rm.marathon.mesos:8192/#/.
>
> {
>   "id": "rm",
>   "instances": 1,
>   "cpus": 0.2,
>   "mem": 2048,
>   "cmd": "env && export
> YARN_RESOURCEMANAGER_OPTS=-Dyarn.resourcemanager.hostname=rm.marathon.mesos
> && yarn resourcemanager",
>   "ports": [ 0 ]
> }
>
>
>
>
>         Here is the yarn-site.xml file from our MapR cluster:
>
>
>
>
> root@`hostname`:/home/diuser# cat
> /opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop/yarn-site.xml
> <?xml version="1.0"?>
> <!--
>   Licensed under the Apache License, Version 2.0 (the "License");
>   you may not use this file except in compliance with the License.
>   You may obtain a copy of the License at
>
>
>     http://www.apache.org/licenses/LICENSE-2.0
>
>
>   Unless required by applicable law or agreed to in writing, software
>   distributed under the License is distributed on an "AS IS" BASIS,
>   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>   See the License for the specific language governing permissions and
>   limitations under the License. See accompanying LICENSE file.
> -->
> <configuration>
>   <!-- Resource Manager HA Configs -->
>   <property>
>     <name>yarn.resourcemanager.ha.enabled</name>
>     <value>true</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
>     <value>true</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
>     <value>true</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.recovery.enabled</name>
>     <value>true</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.cluster-id</name>
>     <value>yarn-dicluster.techlabs.accenture.com</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.ha.rm-ids</name>
>     <value>rm1,rm2,rm3</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.ha.id</name>
>     <value>rm1</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.zk-address</name>
>
> <value>otherhostname041:5181,otherhostname042:5181,otherhostname043:5181</value>
>   </property>
>
>   <!-- Configuration for rm1 -->
>   <property>
>     <name>yarn.resourcemanager.scheduler.address.rm1</name>
>     <value>otherhostname041:8030</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
>     <value>otherhostname041:8031</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.address.rm1</name>
>     <value>otherhostname041:8032</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.admin.address.rm1</name>
>     <value>otherhostname041:8033</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.webapp.address.rm1</name>
>     <value>otherhostname041:8088</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.webapp.https.address.rm1</name>
>     <value>otherhostname041:8090</value>
>   </property>
>
>
>   <!-- Configuration for rm2 -->
>   <property>
>     <name>yarn.resourcemanager.scheduler.address.rm2</name>
>     <value>otherhostname042:8030</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
>     <value>otherhostname042:8031</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.address.rm2</name>
>     <value>otherhostname042:8032</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.admin.address.rm2</name>
>     <value>otherhostname042:8033</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.webapp.address.rm2</name>
>     <value>otherhostname042:8088</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.webapp.https.address.rm2</name>
>     <value>otherhostname042:8090</value>
>   </property>
>
>
>   <!-- Configuration for rm3 -->
>   <property>
>     <name>yarn.resourcemanager.scheduler.address.rm3</name>
>     <value>otherhostname043:8030</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.resource-tracker.address.rm3</name>
>     <value>otherhostname043:8031</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.address.rm3</name>
>     <value>otherhostname043:8032</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.admin.address.rm3</name>
>     <value>otherhostname043:8033</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.webapp.address.rm3</name>
>     <value>otherhostname043:8088</value>
>   </property>
>   <property>
>     <name>yarn.resourcemanager.webapp.https.address.rm3</name>
>     <value>otherhostname043:8090</value>
>   </property>
>
>
>   <!-- :::CAUTION::: DO NOT EDIT ANYTHING ON OR ABOVE THIS LINE —>
>
> <property>
>           <name>yarn.nodemanager.resource.cpu-vcores</name>
>           <value>${nodemanager.resource.cpu-vcores}</value>
>       </property>
>       <property>
>           <name>yarn.nodemanager.resource.memory-mb</name>
>           <value>${nodemanager.resource.memory-mb}</value>
>       </property>
>       <!--These options enable dynamic port assignment by mesos -->
>       <property>
>           <name>yarn.nodemanager.address</name>
>           <value>${myriad.yarn.nodemanager.address}</value>
>       </property>
>       <property>
>           <name>yarn.nodemanager.webapp.address</name>
>           <value>${myriad.yarn.nodemanager.webapp.address}</value>
>       </property>
>       <property>
>           <name>yarn.nodemanager.webapp.https.address</name>
>           <value>${myriad.yarn.nodemanager.webapp.address}</value>
>       </property>
>       <property>
>           <name>yarn.nodemanager.localizer.address</name>
>           <value>${myriad.yarn.nodemanager.localizer.address}</value>
>       </property>
>       <!-- Configure Myriad Scheduler here -->
>       <property>
>           <name>yarn.resourcemanager.scheduler.class</name>
>
> <value>org.apache.myriad.scheduler.yarn.MyriadFairScheduler</value>
>           <description>One can configure other scehdulers as well from
> following list: org.apache.myriad.scheduler.yarn.MyriadCapacityScheduler,
> org.apache.myriad.scheduler.yarn.MyriadFifoScheduler</description>
>       </property>
>
>
> </configuration>
>
>
>
>
>
>
>         Here is the mapred-site.xml from the cluster as well:
>
>
>
>
> root@`hostname`:/home/diuser# cat
> /opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop/mapred-site.xml
> <?xml version="1.0"?>
> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
> <!--
>   Licensed under the Apache License, Version 2.0 (the "License");
>   you may not use this file except in compliance with the License.
>   You may obtain a copy of the License at
>
>
>     http://www.apache.org/licenses/LICENSE-2.0
>
>
>   Unless required by applicable law or agreed to in writing, software
>   distributed under the License is distributed on an "AS IS" BASIS,
>   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>   See the License for the specific language governing permissions and
>   limitations under the License. See accompanying LICENSE file.
> -->
>
>
> <!-- Put site-specific property overrides in this file. -->
>
>
> <configuration>
>   <property>
>     <name>mapreduce.jobhistory.address</name>
>     <value>otherhostname043:10020</value>
>   </property>
>   <property>
>     <name>mapreduce.jobhistory.webapp.address</name>
>     <value>otherhostname043:19888</value>
>   </property>
>   <!--
>   <property>
>     <name>mapreduce.framework.name</name>
>     <value>yarn-tez</value>
>   </property>
>   —>
>
> <!--This option enables dynamic port assignment by mesos -->
> <property>
> <name>mapreduce.shuffle.port</name>
> <value>${myriad.mapreduce.shuffle.port}</value>
> </property>
>
>
>
> </configuration>
>
>
>
>
>
>
>
>
>         The issue is when I try to run a job from one of the nodes in the
> cluster. I get this error below from the log
> $YARN_HOME/logs/yarn-mapr-nodemanager-`hostname`.log You can see below that
> it is reading from the yarn-site.xml for rm1, rm2, and rm3.
>
>
>
>
>
> 2016-02-02 15:33:52,116 INFO
> org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider: Failing
> over to rm1
> 2016-02-02 15:33:52,117 INFO
> org.apache.hadoop.io.retry.RetryInvocationHandler: Exception while invoking
> nodeHeartbeat of class ResourceTrackerPBClientImpl over rm1 after 27 fail
> over attempts. Trying to fail over after sleeping for 17384ms.
> java.net.ConnectException: Call From `hostname`/10.1.194.49 to
> otherhostname041:8031 failed on connection exception:
> java.net.ConnectException: Connection refused; For more details see:
> http://wiki.apache.org/hadoop/ConnectionRefused
>         at sun.reflect.GeneratedConstructorAccessor22.newInstance(Unknown
> Source)
>         at
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>         at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
>         at
> org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:792)
>         at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:732)
>         at org.apache.hadoop.ipc.Client.call(Client.java:1482)
>         at org.apache.hadoop.ipc.Client.call(Client.java:1409)
>         at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229)
>         at com.sun.proxy.$Proxy28.nodeHeartbeat(Unknown Source)
>         at
> org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceTrackerPBClientImpl.nodeHeartbeat(ResourceTrackerPBClientImpl.java:80)
>         at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source)
>         at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:497)
>         at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
>         at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
>         at com.sun.proxy.$Proxy29.nodeHeartbeat(Unknown Source)
>         at
> org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl$1.run(NodeStatusUpdaterImpl.java:622)
>         at java.lang.Thread.run(Thread.java:745)
> Caused by: java.net.ConnectException: Connection refused
>         at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
>         at
> sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
>         at
> org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
>         at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
>         at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:495)
>         at
> org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:614)
>         at
> org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:708)
>         at
> org.apache.hadoop.ipc.Client$Connection.access$2900(Client.java:374)
>         at org.apache.hadoop.ipc.Client.getConnection(Client.java:1531)
>         at org.apache.hadoop.ipc.Client.call(Client.java:1448)
>         ... 12 more
> 2016-02-02 15:34:09,502 INFO
> org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider: Failing
> over to rm2
> 2016-02-02 15:34:09,503 INFO
> org.apache.hadoop.io.retry.RetryInvocationHandler: Exception while invoking
> nodeHeartbeat of class ResourceTrackerPBClientImpl over rm2 after 28 fail
> over attempts. Trying to fail over after sleeping for 35554ms.
> java.net.ConnectException: Call From `hostname`/10.1.194.49 to
> otherhostname042:8031 failed on connection exception:
> java.net.ConnectException: Connection refused; For more details see:
> http://wiki.apache.org/hadoop/ConnectionRefused
>         at sun.reflect.GeneratedConstructorAccessor22.newInstance(Unknown
> Source)
>         at
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>         at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
>         at
> org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:792)
>         at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:732)
>         at org.apache.hadoop.ipc.Client.call(Client.java:1482)
>         at org.apache.hadoop.ipc.Client.call(Client.java:1409)
>         at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229)
>         at com.sun.proxy.$Proxy28.nodeHeartbeat(Unknown Source)
>         at
> org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceTrackerPBClientImpl.nodeHeartbeat(ResourceTrackerPBClientImpl.java:80)
>         at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source)
>         at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:497)
>         at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
>         at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
>         at com.sun.proxy.$Proxy29.nodeHeartbeat(Unknown Source)
>         at
> org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl$1.run(NodeStatusUpdaterImpl.java:622)
>         at java.lang.Thread.run(Thread.java:745)
> Caused by: java.net.ConnectException: Connection refused
>         at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
>         at
> sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
>         at
> org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
>         at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
>         at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:495)
>         at
> org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:614)
>         at
> org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:708)
>         at
> org.apache.hadoop.ipc.Client$Connection.access$2900(Client.java:374)
>         at org.apache.hadoop.ipc.Client.getConnection(Client.java:1531)
>         at org.apache.hadoop.ipc.Client.call(Client.java:1448)
>         ... 12 more
> 2016-02-02 15:34:45,058 INFO
> org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider: Failing
> over to rm3
> 2016-02-02 15:34:45,060 INFO
> org.apache.hadoop.io.retry.RetryInvocationHandler: Exception while invoking
> nodeHeartbeat of class ResourceTrackerPBClientImpl over rm3 after 29 fail
> over attempts. Trying to fail over after sleeping for 17219ms.
> java.net.ConnectException: Call From `hostname`/10.1.194.49 to
> otherhostname043:8031 failed on connection exception:
> java.net.ConnectException: Connection refused; For more details see:
> http://wiki.apache.org/hadoop/ConnectionRefused
>         at sun.reflect.GeneratedConstructorAccessor22.newInstance(Unknown
> Source)
>         at
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>         at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
>         at
> org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:792)
>         at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:732)
>         at org.apache.hadoop.ipc.Client.call(Client.java:1482)
>         at org.apache.hadoop.ipc.Client.call(Client.java:1409)
>         at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229)
>         at com.sun.proxy.$Proxy28.nodeHeartbeat(Unknown Source)
>         at
> org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceTrackerPBClientImpl.nodeHeartbeat(ResourceTrackerPBClientImpl.java:80)
>         at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source)
>         at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:497)
>         at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
>         at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
>         at com.sun.proxy.$Proxy29.nodeHeartbeat(Unknown Source)
>         at
> org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl$1.run(NodeStatusUpdaterImpl.java:622)
>         at java.lang.Thread.run(Thread.java:745)
> Caused by: java.net.ConnectException: Connection refused
>         at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
>         at
> sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
>         at
> org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
>         at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
>         at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:495)
>         at
> org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:614)
>         at
> org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:708)
>         at
> org.apache.hadoop.ipc.Client$Connection.access$2900(Client.java:374)
>         at org.apache.hadoop.ipc.Client.getConnection(Client.java:1531)
>         at org.apache.hadoop.ipc.Client.call(Client.java:1448)
>         ... 12 more
> 2016-02-02 15:35:02,280 INFO
> org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider: Failing
> over to rm1
> 2016-02-02 15:35:02,281 WARN
> org.apache.hadoop.io.retry.RetryInvocationHandler: Exception while invoking
> class
> org.apache.hadoop.yarn.server.api.impl.pb.client.ResourceTrackerPBClientImpl.nodeHeartbeat
> over rm1. Not retrying because failovers (30) exceeded maximum allowed (30)
>
>
>
>
> > Miguel Bernadin Accenture Technology Labs – System Engineering
> Contact: W (408) 817-2742 | M (631) 835-6345 |
> miguel.bernadin@accenture.com
>
>
>
>
>
>
> On 2/3/16, 10:49 AM, "Santosh Marella" <sm...@maprtech.com> wrote:
>
> >Hi Miguel,
> >
> >   Are you running the YARN cluster using Myriad (I assume so)? How did
> you
> >launch your RM - manually/using marathon/using warden? How does NMs
> >discover where RM is - perhaps you can paste your yarn-site.xml from the
> RM
> >node and on of your NM nodes.
> >
> >Thanks,
> >Santosh
> >
> >On Wed, Feb 3, 2016 at 10:36 AM, <mi...@accenture.com> wrote:
> >
> >> Hello guys,
> >>
> >> I wanted to know if anyone with a MapR environment can share with me
> their
> >> yarn-site.xml and mapred-site.xml files with me. When running the
> terasort
> >> job, it looks like its looking for rm1, rm2, and rm3. I modified the
> file
> >> in place rather than taking it from template.
> >>
> >> > Miguel Bernadin Accenture Technology Labs – System Engineering
> >> Contact: W (408) 817-2742 | M (631) 835-6345 |
> >> miguel.bernadin@accenture.com<ma...@accenture.com>
> >>
> >> ________________________________
> >>
> >> This message is for the designated recipient only and may contain
> >> privileged, proprietary, or otherwise confidential information. If you
> have
> >> received it in error, please notify the sender immediately and delete
> the
> >> original. Any other use of the e-mail by you is prohibited. Where
> allowed
> >> by local law, electronic communications with Accenture and its
> affiliates,
> >> including e-mail and instant messaging (including content), may be
> scanned
> >> by our systems for the purposes of information security and assessment
> of
> >> internal compliance with Accenture policy.
> >>
> >>
> ______________________________________________________________________________________
> >>
> >> www.accenture.com
> >>
>
> ________________________________
>
> This message is for the designated recipient only and may contain
> privileged, proprietary, or otherwise confidential information. If you have
> received it in error, please notify the sender immediately and delete the
> original. Any other use of the e-mail by you is prohibited. Where allowed
> by local law, electronic communications with Accenture and its affiliates,
> including e-mail and instant messaging (including content), may be scanned
> by our systems for the purposes of information security and assessment of
> internal compliance with Accenture policy.
>
> ______________________________________________________________________________________
>
> www.accenture.com
>