You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2015/04/14 22:15:26 UTC

ambari git commit: AMBARI-10464 - Ambari Agent holding socket open on 50070 prevents NN from starting (jonathanhurley)

Repository: ambari
Updated Branches:
  refs/heads/trunk 8669ec812 -> d2bc7bd37


AMBARI-10464 - Ambari Agent holding socket open on 50070 prevents NN from starting (jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/d2bc7bd3
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/d2bc7bd3
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/d2bc7bd3

Branch: refs/heads/trunk
Commit: d2bc7bd37549ef29fa610c4687151df96e7b454b
Parents: 8669ec8
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Tue Apr 14 11:00:59 2015 -0400
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Tue Apr 14 15:57:51 2015 -0400

----------------------------------------------------------------------
 .../src/main/python/ambari_agent/alerts/metric_alert.py        | 4 +++-
 .../HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py     | 4 +++-
 .../HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py  | 4 +++-
 .../HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py     | 5 +++--
 .../YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py  | 4 +++-
 .../2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py     | 6 ++++--
 .../0.8/services/HDFS/package/files/alert_checkpoint_time.py   | 4 +++-
 .../services/HDFS/package/files/alert_ha_namenode_health.py    | 4 +++-
 .../0.8/services/WEBHCAT/package/files/alert_webhcat_server.py | 5 +++--
 .../services/YARN/package/files/alert_nodemanager_health.py    | 4 +++-
 10 files changed, 31 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py
index 8b5f15d..33f7508 100644
--- a/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py
+++ b/ambari-agent/src/main/python/ambari_agent/alerts/metric_alert.py
@@ -31,6 +31,8 @@ from resource_management.libraries.functions.get_port_from_url import get_port_f
 
 logger = logging.getLogger()
 
+CONNECTION_TIMEOUT = 5.0
+
 class MetricAlert(BaseAlert):
   
   def __init__(self, alert_meta, alert_source_meta):
@@ -157,7 +159,7 @@ class MetricAlert(BaseAlert):
       response = None
       try:
         url_opener = urllib2.build_opener(RefreshHeaderProcessor())
-        response = url_opener.open(url)
+        response = url_opener.open(url, timeout=CONNECTION_TIMEOUT)
         content = response.read()
       finally:
         # explicitely close the connection as we've seen python hold onto these

http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
index 032310d..2455d3b 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
@@ -36,6 +36,8 @@ PERCENT_CRITICAL = 200
 CHECKPOINT_TX_DEFAULT = 1000000
 CHECKPOINT_PERIOD_DEFAULT = 21600
 
+CONNECTION_TIMEOUT = 5.0
+
 def get_tokens():
   """
   Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -133,7 +135,7 @@ def get_value_from_jmx(query, jmx_property):
   response = None
   
   try:
-    response = urllib2.urlopen(query)
+    response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
     data = response.read()
 
     data_dict = json.loads(data)

http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
index 058b7b2..2066d46 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
@@ -35,6 +35,8 @@ NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
 NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
 DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
 
+CONNECTION_TIMEOUT = 5.0
+
 def get_tokens():
   """
   Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -163,7 +165,7 @@ def get_value_from_jmx(query, jmx_property):
   response = None
   
   try:
-    response = urllib2.urlopen(query)
+    response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
     data = response.read()
 
     data_dict = json.loads(data)

http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
index e991f53..dd20be4 100644
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
@@ -53,7 +53,8 @@ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}
 WEBHCAT_OK_RESPONSE = 'ok'
 WEBHCAT_PORT_DEFAULT = 50111
 
-CURL_CONNECTION_TIMEOUT = '10'
+CURL_CONNECTION_TIMEOUT = '5'
+CONNECTION_TIMEOUT = 5.0
 
 def get_tokens():
   """
@@ -177,7 +178,7 @@ def execute(parameters=None, host_name=None):
     try:
       # execute the query for the JSON that includes WebHCat status
       start_time = time.time()
-      url_response = urllib2.urlopen(query_url)
+      url_response = urllib2.urlopen(query_url, timeout=CONNECTION_TIMEOUT)
       total_time = time.time() - start_time
 
       json_response = json.loads(url_response.read())

http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
index 8c72f4c..516d858 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
@@ -40,6 +40,8 @@ CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager hea
 
 NODEMANAGER_DEFAULT_PORT = 8042
 
+CONNECTION_TIMEOUT = 5.0
+
 def get_tokens():
   """
   Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -106,7 +108,7 @@ def execute(parameters=None, host_name=None):
 
   try:
     # execute the query for the JSON that includes templeton status
-    url_response = urllib2.urlopen(query)
+    url_response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
   except urllib2.HTTPError, httpError:
     label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
       str(httpError))

http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
index b297b0c..7c00625 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
@@ -29,6 +29,8 @@ OK_LABEL = 'All NodeManagers are healthy'
 NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.address}}'
 NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.https.address}}'
 YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}'
+
+CONNECTION_TIMEOUT = 5.0
   
 def get_tokens():
   """
@@ -99,7 +101,7 @@ def execute(parameters=None, host_name=None):
     label = str(e)
     result_code = 'UNKNOWN'
 
-  return ((result_code, [label]))
+  return (result_code, [label])
 
 
 def get_value_from_jmx(query, jmx_property):
@@ -109,7 +111,7 @@ def get_value_from_jmx(query, jmx_property):
     # use a customer header process that will look for the non-standard
     # "Refresh" header and attempt to follow the redirect
     url_opener = urllib2.build_opener(RefreshHeaderProcessor())
-    response = url_opener.open(query)
+    response = url_opener.open(query, timeout=CONNECTION_TIMEOUT)
 
     data = response.read()
     data_dict = json.loads(data)

http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
index 032310d..2455d3b 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
@@ -36,6 +36,8 @@ PERCENT_CRITICAL = 200
 CHECKPOINT_TX_DEFAULT = 1000000
 CHECKPOINT_PERIOD_DEFAULT = 21600
 
+CONNECTION_TIMEOUT = 5.0
+
 def get_tokens():
   """
   Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -133,7 +135,7 @@ def get_value_from_jmx(query, jmx_property):
   response = None
   
   try:
-    response = urllib2.urlopen(query)
+    response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
     data = response.read()
 
     data_dict = json.loads(data)

http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
index 058b7b2..2066d46 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
@@ -35,6 +35,8 @@ NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
 NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
 DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
 
+CONNECTION_TIMEOUT = 5.0
+
 def get_tokens():
   """
   Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -163,7 +165,7 @@ def get_value_from_jmx(query, jmx_property):
   response = None
   
   try:
-    response = urllib2.urlopen(query)
+    response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
     data = response.read()
 
     data_dict = json.loads(data)

http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
index e991f53..dd20be4 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
@@ -53,7 +53,8 @@ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}
 WEBHCAT_OK_RESPONSE = 'ok'
 WEBHCAT_PORT_DEFAULT = 50111
 
-CURL_CONNECTION_TIMEOUT = '10'
+CURL_CONNECTION_TIMEOUT = '5'
+CONNECTION_TIMEOUT = 5.0
 
 def get_tokens():
   """
@@ -177,7 +178,7 @@ def execute(parameters=None, host_name=None):
     try:
       # execute the query for the JSON that includes WebHCat status
       start_time = time.time()
-      url_response = urllib2.urlopen(query_url)
+      url_response = urllib2.urlopen(query_url, timeout=CONNECTION_TIMEOUT)
       total_time = time.time() - start_time
 
       json_response = json.loads(url_response.read())

http://git-wip-us.apache.org/repos/asf/ambari/blob/d2bc7bd3/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
index 8c72f4c..516d858 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
@@ -40,6 +40,8 @@ CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager hea
 
 NODEMANAGER_DEFAULT_PORT = 8042
 
+CONNECTION_TIMEOUT = 5.0
+
 def get_tokens():
   """
   Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -106,7 +108,7 @@ def execute(parameters=None, host_name=None):
 
   try:
     # execute the query for the JSON that includes templeton status
-    url_response = urllib2.urlopen(query)
+    url_response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
   except urllib2.HTTPError, httpError:
     label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
       str(httpError))