You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@ambari.apache.org by "Vincent.He (JIRA)" <ji...@apache.org> on 2015/08/04 03:34:04 UTC

[jira] [Created] (AMBARI-12628) When HDFS HA enabled with Ambari 2.1, several service failed to start

Vincent.He created AMBARI-12628:
-----------------------------------

             Summary: When HDFS HA enabled with Ambari 2.1, several service failed to start 
                 Key: AMBARI-12628
                 URL: https://issues.apache.org/jira/browse/AMBARI-12628
             Project: Ambari
          Issue Type: Bug
          Components: ambari-server
    Affects Versions: 2.1.0
            Reporter: Vincent.He
            Priority: Critical


Install Ambari 2.1 with HDP 2.3, when enabled HA for HDFS, serval service failed to start like mapredurce history server, dig into more detail, it is different from issue 12374.

The issues reported is failed to decoded the JSON string,
2015-08-03 02:09:35,420 - Getting jmx metrics from NN failed. URL: http://h03.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
Traceback (most recent call last):
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/jmx.py", line 40, in get_value_from_jmx
    data_dict = json.loads(data)
  File "/usr/lib/python2.6/site-packages/ambari_simplejson/__init__.py", line 307, in loads
    return _default_decoder.decode(s)
  File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 335, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 353, in raw_decode
    raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
2015-08-03 02:09:35,494 - Getting jmx metrics from NN failed. URL: http://h02.bigdata.lenovo.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
Traceback (most recent call last):
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/jmx.py", line 40, in get_value_from_jmx
    data_dict = json.loads(data)
  File "/usr/lib/python2.6/site-packages/ambari_simplejson/__init__.py", line 307, in loads
    return _default_decoder.decode(s)
  File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 335, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 353, in raw_decode
    raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
Traceback (most recent call last):
  File "/var/lib/ambari-agent/cache/common-services/YARN/2.1.0.2.0/package/scripts/historyserver.py", line 168, in <module>
    HistoryServer().execute()
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/script/script.py", line 218, in execute
    method(env)
  File "/var/lib/ambari-agent/cache/common-services/YARN/2.1.0.2.0/package/scripts/historyserver.py", line 91, in start
    self.configure(env) # FOR SECURITY
  File "/var/lib/ambari-agent/cache/common-services/YARN/2.1.0.2.0/package/scripts/historyserver.py", line 55, in configure
    yarn(name="historyserver")
  File "/usr/lib/python2.6/site-packages/ambari_commons/os_family_impl.py", line 89, in thunk
    return fn(*args, **kwargs)
  File "/var/lib/ambari-agent/cache/common-services/YARN/2.1.0.2.0/package/scripts/yarn.py", line 72, in yarn
    recursive_chmod=True
  File "/usr/lib/python2.6/site-packages/resource_management/core/base.py", line 157, in __init__
    self.env.run()
  File "/usr/lib/python2.6/site-packages/resource_management/core/environment.py", line 152, in run
    self.run_action(resource, action)
  File "/usr/lib/python2.6/site-packages/resource_management/core/environment.py", line 118, in run_action
    provider_action()
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 390, in action_create_on_execute
    self.action_delayed("create")
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 387, in action_delayed
    self.get_hdfs_resource_executor().action_delayed(action_name, self)
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 239, in action_delayed
    main_resource.resource.security_enabled, main_resource.resource.logoutput)
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 126, in __init__
    security_enabled, run_user)
  File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/namenode_ha_utils.py", line 113, in get_property_for_active_namenode
    raise Fail("There is no active namenodes.")
resource_management.core.exceptions.Fail: There is no active namenodes.

The key issue is "File "/usr/lib/python2.6/site-packages/ambari_simplejson/decoder.py", line 353, in raw_decode
    raise ValueError("No JSON object could be decoded")
"

The output I got is 
[root@h02 patch]# curl -s http://h03.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
{
  "beans" : [ {
    "name" : "Hadoop:service=NameNode,name=NameNodeStatus",
    "modelerType" : "org.apache.hadoop.hdfs.server.namenode.NameNode",
    "State" : "standby",
    "NNRole" : "NameNode",
    "HostAndPort" : "h03.bigdata.com:8020",
    "SecurityEnabled" : false,
    "LastHATransitionTime" : 1438594046119
  } ]
}
[root@h02 patch]# curl -s http://h02.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus
{
  "beans" : [ {
    "name" : "Hadoop:service=NameNode,name=NameNodeStatus",
    "modelerType" : "org.apache.hadoop.hdfs.server.namenode.NameNode",
    "State" : "active",
    "NNRole" : "NameNode",
    "HostAndPort" : "h02.bigdata.com:8020",
    "SecurityEnabled" : false,
    "LastHATransitionTime" : 1438594046591
  } ]
}

I also tried the patch in issue AMBARI-12374, got the same error, and the new URI, I got response,

[root@h02 patch]# curl -s http://h03.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem
{
  "beans" : [ {
    "name" : "Hadoop:service=NameNode,name=FSNamesystem",
    "modelerType" : "FSNamesystem",
    "tag.Context" : "dfs",
    "tag.HAState" : "standby",
    "tag.Hostname" : "h03.bigdata.com",
    "MissingBlocks" : 0,
    "MissingReplOneBlocks" : 0,
    "ExpiredHeartbeats" : 0,
    "TransactionsSinceLastCheckpoint" : -756,
    "TransactionsSinceLastLogRoll" : 0,
    "LastWrittenTransactionId" : 5760,
    "LastCheckpointTime" : 1438637246806,
    "CapacityTotal" : 377945479446528,
    "CapacityTotalGB" : 351989.0,
    "CapacityUsed" : 2162847744,
    "CapacityUsedGB" : 2.0,
    "CapacityRemaining" : 374078076620800,
    "CapacityRemainingGB" : 348387.0,
    "CapacityUsedNonDFS" : 3865239977984,
    "TotalLoad" : 16,
    "SnapshottableDirectories" : 0,
    "Snapshots" : 0,
    "BlocksTotal" : 588,
    "FilesTotal" : 825,
    "PendingReplicationBlocks" : 0,
    "UnderReplicatedBlocks" : 0,
    "CorruptBlocks" : 0,
    "ScheduledReplicationBlocks" : 0,
    "PendingDeletionBlocks" : 0,
    "ExcessBlocks" : 0,
    "PostponedMisreplicatedBlocks" : 0,
    "PendingDataNodeMessageCount" : 0,
    "MillisSinceLastLoadedEdits" : 49071,
    "BlockCapacity" : 2097152,
    "StaleDataNodes" : 0,
    "TotalFiles" : 825
  } ]
}
[root@h02 patch]# curl -s http://h02.bigdata.com:50070/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem
{
  "beans" : [ {
    "name" : "Hadoop:service=NameNode,name=FSNamesystem",
    "modelerType" : "FSNamesystem",
    "tag.Context" : "dfs",
    "tag.HAState" : "active",
    "tag.Hostname" : "h02.bigdata.com",
    "MissingBlocks" : 0,
    "MissingReplOneBlocks" : 0,
    "ExpiredHeartbeats" : 0,
    "TransactionsSinceLastCheckpoint" : 227,
    "TransactionsSinceLastLogRoll" : 1,
    "LastWrittenTransactionId" : 6743,
    "LastCheckpointTime" : 1438637246983,
    "CapacityTotal" : 377945479446528,
    "CapacityTotalGB" : 351989.0,
    "CapacityUsed" : 2162847744,
    "CapacityUsedGB" : 2.0,
    "CapacityRemaining" : 374078076620800,
    "CapacityRemainingGB" : 348387.0,
    "CapacityUsedNonDFS" : 3865239977984,
    "TotalLoad" : 16,
    "SnapshottableDirectories" : 0,
    "Snapshots" : 0,
    "BlocksTotal" : 588,
    "FilesTotal" : 825,
    "PendingReplicationBlocks" : 0,
    "UnderReplicatedBlocks" : 0,
    "CorruptBlocks" : 0,
    "ScheduledReplicationBlocks" : 0,
    "PendingDeletionBlocks" : 0,
    "ExcessBlocks" : 0,
    "PostponedMisreplicatedBlocks" : 0,
    "PendingDataNodeMessageCount" : 0,
    "MillisSinceLastLoadedEdits" : 0,
    "BlockCapacity" : 2097152,
    "StaleDataNodes" : 0,
    "TotalFiles" : 825
  } ]
}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)