You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2015/04/14 22:15:26 UTC
ambari git commit: AMBARI-10464 - Ambari Agent holding socket open on
50070 prevents NN from starting (jonathanhurley)
Repository: ambari
Updated Branches:
refs/heads/trunk 8669ec812 -> d2bc7bd37
AMBARI-10464 - Ambari Agent holding socket open on 50070 prevents NN from starting (jonathanhurley)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/d2bc7bd3
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/d2bc7bd3
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/d2bc7bd3
Branch: refs/heads/trunk
Commit: d2bc7bd37549ef29fa610c4687151df96e7b454b
Parents: 8669ec8
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Tue Apr 14 11:00:59 2015 -0400
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Tue Apr 14 15:57:51 2015 -0400
----------------------------------------------------------------------
.../src/main/python/ambari_agent/alerts/metric_alert.py | 4 +++-
.../HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py | 4 +++-
.../HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py | 4 +++-
.../HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py | 5 +++--
.../YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py | 4 +++-
.../2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py | 6 ++++--
.../0.8/services/HDFS/package/files/alert_checkpoint_time.py | 4 +++-
.../services/HDFS/package/files/alert_ha_namenode_health.py | 4 +++-
.../0.8/services/WEBHCAT/package/files/alert_webhcat_server.py | 5 +++--
.../services/YARN/package/files/alert_nodemanager_health.py | 4 +++-
10 files changed, 31 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py
index 8b5f15d..33f7508 100644
--- a/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py
+++ b/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py
@@ -31,6 +31,8 @@ from resource_management.libraries.functions.get_port_from_url import get_port_f
logger = logging.getLogger()
+CONNECTION_TIMEOUT = 5.0
+
class MetricAlert(BaseAlert):
def __init__(self, alert_meta, alert_source_meta):
@@ -157,7 +159,7 @@ class MetricAlert(BaseAlert):
response = None
try:
url_opener = urllib2.build_opener(RefreshHeaderProcessor())
- response = url_opener.open(url)
+ response = url_opener.open(url, timeout=CONNECTION_TIMEOUT)
content = response.read()
finally:
# explicitely close the connection as we've seen python hold onto these
http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
index 032310d..2455d3b 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
@@ -36,6 +36,8 @@ PERCENT_CRITICAL = 200
CHECKPOINT_TX_DEFAULT = 1000000
CHECKPOINT_PERIOD_DEFAULT = 21600
+CONNECTION_TIMEOUT = 5.0
+
def get_tokens():
"""
Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -133,7 +135,7 @@ def get_value_from_jmx(query, jmx_property):
response = None
try:
- response = urllib2.urlopen(query)
+ response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
data = response.read()
data_dict = json.loads(data)
http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
index 058b7b2..2066d46 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
@@ -35,6 +35,8 @@ NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
+CONNECTION_TIMEOUT = 5.0
+
def get_tokens():
"""
Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -163,7 +165,7 @@ def get_value_from_jmx(query, jmx_property):
response = None
try:
- response = urllib2.urlopen(query)
+ response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
data = response.read()
data_dict = json.loads(data)
http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
index e991f53..dd20be4 100644
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
@@ -53,7 +53,8 @@ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}
WEBHCAT_OK_RESPONSE = 'ok'
WEBHCAT_PORT_DEFAULT = 50111
-CURL_CONNECTION_TIMEOUT = '10'
+CURL_CONNECTION_TIMEOUT = '5'
+CONNECTION_TIMEOUT = 5.0
def get_tokens():
"""
@@ -177,7 +178,7 @@ def execute(parameters=None, host_name=None):
try:
# execute the query for the JSON that includes WebHCat status
start_time = time.time()
- url_response = urllib2.urlopen(query_url)
+ url_response = urllib2.urlopen(query_url, timeout=CONNECTION_TIMEOUT)
total_time = time.time() - start_time
json_response = json.loads(url_response.read())
http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
index 8c72f4c..516d858 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
@@ -40,6 +40,8 @@ CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager hea
NODEMANAGER_DEFAULT_PORT = 8042
+CONNECTION_TIMEOUT = 5.0
+
def get_tokens():
"""
Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -106,7 +108,7 @@ def execute(parameters=None, host_name=None):
try:
# execute the query for the JSON that includes templeton status
- url_response = urllib2.urlopen(query)
+ url_response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
except urllib2.HTTPError, httpError:
label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
str(httpError))
http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
index b297b0c..7c00625 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
@@ -29,6 +29,8 @@ OK_LABEL = 'All NodeManagers are healthy'
NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.address}}'
NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.https.address}}'
YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}'
+
+CONNECTION_TIMEOUT = 5.0
def get_tokens():
"""
@@ -99,7 +101,7 @@ def execute(parameters=None, host_name=None):
label = str(e)
result_code = 'UNKNOWN'
- return ((result_code, [label]))
+ return (result_code, [label])
def get_value_from_jmx(query, jmx_property):
@@ -109,7 +111,7 @@ def get_value_from_jmx(query, jmx_property):
# use a customer header process that will look for the non-standard
# "Refresh" header and attempt to follow the redirect
url_opener = urllib2.build_opener(RefreshHeaderProcessor())
- response = url_opener.open(query)
+ response = url_opener.open(query, timeout=CONNECTION_TIMEOUT)
data = response.read()
data_dict = json.loads(data)
http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
index 032310d..2455d3b 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
@@ -36,6 +36,8 @@ PERCENT_CRITICAL = 200
CHECKPOINT_TX_DEFAULT = 1000000
CHECKPOINT_PERIOD_DEFAULT = 21600
+CONNECTION_TIMEOUT = 5.0
+
def get_tokens():
"""
Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -133,7 +135,7 @@ def get_value_from_jmx(query, jmx_property):
response = None
try:
- response = urllib2.urlopen(query)
+ response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
data = response.read()
data_dict = json.loads(data)
http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
index 058b7b2..2066d46 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
@@ -35,6 +35,8 @@ NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
+CONNECTION_TIMEOUT = 5.0
+
def get_tokens():
"""
Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -163,7 +165,7 @@ def get_value_from_jmx(query, jmx_property):
response = None
try:
- response = urllib2.urlopen(query)
+ response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
data = response.read()
data_dict = json.loads(data)
http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
index e991f53..dd20be4 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
@@ -53,7 +53,8 @@ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}
WEBHCAT_OK_RESPONSE = 'ok'
WEBHCAT_PORT_DEFAULT = 50111
-CURL_CONNECTION_TIMEOUT = '10'
+CURL_CONNECTION_TIMEOUT = '5'
+CONNECTION_TIMEOUT = 5.0
def get_tokens():
"""
@@ -177,7 +178,7 @@ def execute(parameters=None, host_name=None):
try:
# execute the query for the JSON that includes WebHCat status
start_time = time.time()
- url_response = urllib2.urlopen(query_url)
+ url_response = urllib2.urlopen(query_url, timeout=CONNECTION_TIMEOUT)
total_time = time.time() - start_time
json_response = json.loads(url_response.read())
http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
index 8c72f4c..516d858 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
@@ -40,6 +40,8 @@ CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager hea
NODEMANAGER_DEFAULT_PORT = 8042
+CONNECTION_TIMEOUT = 5.0
+
def get_tokens():
"""
Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -106,7 +108,7 @@ def execute(parameters=None, host_name=None):
try:
# execute the query for the JSON that includes templeton status
- url_response = urllib2.urlopen(query)
+ url_response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
except urllib2.HTTPError, httpError:
label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
str(httpError))