You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@ambari.apache.org by "Vincent.He (JIRA)" <ji...@apache.org> on 2015/08/05 04:43:05 UTC
[jira] [Commented] (AMBARI-12628) When HDFS HA enabled with Ambari
2.1, several service failed to start
[ https://issues.apache.org/jira/browse/AMBARI-12628?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14654725#comment-14654725 ]
Vincent.He commented on AMBARI-12628:
-------------------------------------
After more debug, make a big step in the issues and can workaround now,
1. I configured unlimit in /etc/profile.
2. When jmx.py to get the data, it can get the JSON data, but it also get an exception "/etc/profile: line 79: ulimit: open files: cannot modify limit: Operation not permitted", and the string passed to json parser is
2015-08-04 22:29:38,386 - checked_call returned (0, '/etc/profile: line 79: ulimit: open files: cannot modify limit: Operation not permitted\n{\n "beans" : [ {\n "name" : "Hadoop:service=NameNode,name=NameNodeStatus",\n "modelerType" : "org.apache.hadoop.hdfs.server.namenode.NameNode",\n "State" : "active",\n "NNRole" : "NameNode",\n "HostAndPort" : "h02.bigdata.com:8020",\n "SecurityEnabled" : false,\n "LastHATransitionTime" : 1438739950089\n } ]\n}')
This is not a JSON string due to prefix exception added.
So the issue is why shell.checked_call will add the exception to the normal string return.
> When HDFS HA enabled with Ambari 2.1, several service failed to start
> ----------------------------------------------------------------------
>
> Key: AMBARI-12628
> URL: https://issues.apache.org/jira/browse/AMBARI-12628
> Project: Ambari
> Issue Type: Bug
> Components: ambari-server
> Affects Versions: 2.1.0
> Reporter: Vincent.He
> Priority: Critical
>
> Install Ambari 2.1 with HDP 2.3, when enabled HA for HDFS, serval service failed to start like mapredurce history server, dig into more detail, it is different from issue 12374.
> The issues reported is failed to decoded the JSON string,
> 2015-08-03 02:09:35,420 - Getting jmx metrics from NN failed. URL: http://h03.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
> Traceback (most recent call last):
> File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/jmx.py", line 40, in get_value_from_jmx
> data_dict = json.loads(data)
> File "/usr/lib/python2.6/site-packages/ambari_simplejson/__init__.py", line 307, in loads
> return _default_decoder.decode(s)
> File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 335, in decode
> obj, end = self.raw_decode(s, idx=_w(s, 0).end())
> File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 353, in raw_decode
> raise ValueError("No JSON object could be decoded")
> ValueError: No JSON object could be decoded
> 2015-08-03 02:09:35,494 - Getting jmx metrics from NN failed. URL: http://h02.bigdata.lenovo.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
> Traceback (most recent call last):
> File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/jmx.py", line 40, in get_value_from_jmx
> data_dict = json.loads(data)
> File "/usr/lib/python2.6/site-packages/ambari_simplejson/__init__.py", line 307, in loads
> return _default_decoder.decode(s)
> File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 335, in decode
> obj, end = self.raw_decode(s, idx=_w(s, 0).end())
> File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 353, in raw_decode
> raise ValueError("No JSON object could be decoded")
> ValueError: No JSON object could be decoded
> Traceback (most recent call last):
> File "/var/lib/ambari-agent/cache/common-services/YARN/2.1.0.2.0/package/scripts/historyserver.py", line 168, in <module>
> HistoryServer().execute()
> File "/usr/lib/python2.6/site-packages/resource_management/libraries/script/script.py", line 218, in execute
> method(env)
> File "/var/lib/ambari-agent/cache/common-services/YARN/2.1.0.2.0/package/scripts/historyserver.py", line 91, in start
> self.configure(env) # FOR SECURITY
> File "/var/lib/ambari-agent/cache/common-services/YARN/2.1.0.2.0/package/scripts/historyserver.py", line 55, in configure
> yarn(name="historyserver")
> File "/usr/lib/python2.6/site-packages/ambari_commons/os_family_impl.py", line 89, in thunk
> return fn(*args, **kwargs)
> File "/var/lib/ambari-agent/cache/common-services/YARN/2.1.0.2.0/package/scripts/yarn.py", line 72, in yarn
> recursive_chmod=True
> File "/usr/lib/python2.6/site-packages/resource_management/core/base.py", line 157, in __init__
> self.env.run()
> File "/usr/lib/python2.6/site-packages/resource_management/core/environment.py", line 152, in run
> self.run_action(resource, action)
> File "/usr/lib/python2.6/site-packages/resource_management/core/environment.py", line 118, in run_action
> provider_action()
> File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 390, in action_create_on_execute
> self.action_delayed("create")
> File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 387, in action_delayed
> self.get_hdfs_resource_executor().action_delayed(action_name, self)
> File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 239, in action_delayed
> main_resource.resource.security_enabled, main_resource.resource.logoutput)
> File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 126, in __init__
> security_enabled, run_user)
> File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/namenode_ha_utils.py", line 113, in get_property_for_active_namenode
> raise Fail("There is no active namenodes.")
> resource_management.core.exceptions.Fail: There is no active namenodes.
> The key issue is "File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 353, in raw_decode
> raise ValueError("No JSON object could be decoded")
> "
> The output I got is
> [root@h02 patch]# curl -s http://h03.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
> {
> "beans" : [ {
> "name" : "Hadoop:service=NameNode,name=NameNodeStatus",
> "modelerType" : "org.apache.hadoop.hdfs.server.namenode.NameNode",
> "State" : "standby",
> "NNRole" : "NameNode",
> "HostAndPort" : "h03.bigdata.com:8020",
> "SecurityEnabled" : false,
> "LastHATransitionTime" : 1438594046119
> } ]
> }
> [root@h02 patch]# curl -s http://h02.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
> {
> "beans" : [ {
> "name" : "Hadoop:service=NameNode,name=NameNodeStatus",
> "modelerType" : "org.apache.hadoop.hdfs.server.namenode.NameNode",
> "State" : "active",
> "NNRole" : "NameNode",
> "HostAndPort" : "h02.bigdata.com:8020",
> "SecurityEnabled" : false,
> "LastHATransitionTime" : 1438594046591
> } ]
> }
> I also tried the patch in issue AMBARI-12374, got the same error, and the new URI, I got response,
> [root@h02 patch]# curl -s http://h03.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem
> {
> "beans" : [ {
> "name" : "Hadoop:service=NameNode,name=FSNamesystem",
> "modelerType" : "FSNamesystem",
> "tag.Context" : "dfs",
> "tag.HAState" : "standby",
> "tag.Hostname" : "h03.bigdata.com",
> "MissingBlocks" : 0,
> "MissingReplOneBlocks" : 0,
> "ExpiredHeartbeats" : 0,
> "TransactionsSinceLastCheckpoint" : -756,
> "TransactionsSinceLastLogRoll" : 0,
> "LastWrittenTransactionId" : 5760,
> "LastCheckpointTime" : 1438637246806,
> "CapacityTotal" : 377945479446528,
> "CapacityTotalGB" : 351989.0,
> "CapacityUsed" : 2162847744,
> "CapacityUsedGB" : 2.0,
> "CapacityRemaining" : 374078076620800,
> "CapacityRemainingGB" : 348387.0,
> "CapacityUsedNonDFS" : 3865239977984,
> "TotalLoad" : 16,
> "SnapshottableDirectories" : 0,
> "Snapshots" : 0,
> "BlocksTotal" : 588,
> "FilesTotal" : 825,
> "PendingReplicationBlocks" : 0,
> "UnderReplicatedBlocks" : 0,
> "CorruptBlocks" : 0,
> "ScheduledReplicationBlocks" : 0,
> "PendingDeletionBlocks" : 0,
> "ExcessBlocks" : 0,
> "PostponedMisreplicatedBlocks" : 0,
> "PendingDataNodeMessageCount" : 0,
> "MillisSinceLastLoadedEdits" : 49071,
> "BlockCapacity" : 2097152,
> "StaleDataNodes" : 0,
> "TotalFiles" : 825
> } ]
> }
> [root@h02 patch]# curl -s http://h02.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem
> {
> "beans" : [ {
> "name" : "Hadoop:service=NameNode,name=FSNamesystem",
> "modelerType" : "FSNamesystem",
> "tag.Context" : "dfs",
> "tag.HAState" : "active",
> "tag.Hostname" : "h02.bigdata.com",
> "MissingBlocks" : 0,
> "MissingReplOneBlocks" : 0,
> "ExpiredHeartbeats" : 0,
> "TransactionsSinceLastCheckpoint" : 227,
> "TransactionsSinceLastLogRoll" : 1,
> "LastWrittenTransactionId" : 6743,
> "LastCheckpointTime" : 1438637246983,
> "CapacityTotal" : 377945479446528,
> "CapacityTotalGB" : 351989.0,
> "CapacityUsed" : 2162847744,
> "CapacityUsedGB" : 2.0,
> "CapacityRemaining" : 374078076620800,
> "CapacityRemainingGB" : 348387.0,
> "CapacityUsedNonDFS" : 3865239977984,
> "TotalLoad" : 16,
> "SnapshottableDirectories" : 0,
> "Snapshots" : 0,
> "BlocksTotal" : 588,
> "FilesTotal" : 825,
> "PendingReplicationBlocks" : 0,
> "UnderReplicatedBlocks" : 0,
> "CorruptBlocks" : 0,
> "ScheduledReplicationBlocks" : 0,
> "PendingDeletionBlocks" : 0,
> "ExcessBlocks" : 0,
> "PostponedMisreplicatedBlocks" : 0,
> "PendingDataNodeMessageCount" : 0,
> "MillisSinceLastLoadedEdits" : 0,
> "BlockCapacity" : 2097152,
> "StaleDataNodes" : 0,
> "TotalFiles" : 825
> } ]
> }
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)