You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2018/08/31 19:49:10 UTC
[ambari] branch branch-2.7 updated: [AMBARI-24582] - Ambari Alert -
HiveServer2 Process - False negative in Certain Scenarios
This is an automated email from the ASF dual-hosted git repository.
jonathanhurley pushed a commit to branch branch-2.7
in repository https://gitbox.apache.org/repos/asf/ambari.git
The following commit(s) were added to refs/heads/branch-2.7 by this push:
new 5cadc1d [AMBARI-24582] - Ambari Alert - HiveServer2 Process - False negative in Certain Scenarios
5cadc1d is described below
commit 5cadc1d2ce6bc11e07f106c03466342a345919e4
Author: Jonathan Hurley <jo...@apache.org>
AuthorDate: Fri Aug 31 13:52:34 2018 -0400
[AMBARI-24582] - Ambari Alert - HiveServer2 Process - False negative in Certain Scenarios
---
.../libraries/functions/hive_check.py | 11 +--
.../alerts/alert_hive_interactive_thrift_port.py | 22 +++---
.../package/alerts/alert_hive_thrift_port.py | 84 +++-------------------
.../stacks/2.0.6/HIVE/test_hive_service_check.py | 12 ++--
4 files changed, 31 insertions(+), 98 deletions(-)
diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/hive_check.py b/ambari-common/src/main/python/resource_management/libraries/functions/hive_check.py
index d8f6696..39df3b5 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/hive_check.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/hive_check.py
@@ -25,8 +25,9 @@ from resource_management.core.signal_utils import TerminateStrategy
def check_thrift_port_sasl(address, port, hive_auth="NOSASL", key=None, kinitcmd=None, smokeuser='ambari-qa',
- transport_mode="binary", http_endpoint="cliservice", ssl=False, ssl_keystore=None,
- ssl_password=None, check_command_timeout=30, ldap_username="", ldap_password=""):
+ hive_user='hive', transport_mode="binary", http_endpoint="cliservice",
+ ssl=False, ssl_keystore=None, ssl_password=None, check_command_timeout=30,
+ ldap_username="", ldap_password=""):
"""
Hive thrift SASL port check
"""
@@ -71,8 +72,10 @@ def check_thrift_port_sasl(address, port, hive_auth="NOSASL", key=None, kinitcmd
finally:
kinit_lock.release()
- cmd = "! beeline -u '%s' %s -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'" % \
- (format(";".join(beeline_url)), format(credential_str))
+ # -n the user to connect as (ignored when using the hive principal in the URL, can be different from the user running the beeline command)
+ # -e ';' executes a SQL commmand of NOOP
+ cmd = "beeline -n %s -u '%s' %s -e ';' 2>&1 | awk '{print}' | grep -i -e 'Connected to:' -e 'Transaction isolation:'" % \
+ (format(hive_user), format(";".join(beeline_url)), format(credential_str))
Execute(cmd,
user=smokeuser,
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_interactive_thrift_port.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_interactive_thrift_port.py
index 8d48412..39c982e 100644
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_interactive_thrift_port.py
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_interactive_thrift_port.py
@@ -69,8 +69,8 @@ SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user'
SMOKEUSER_DEFAULT = 'ambari-qa'
-HADOOPUSER_KEY = '{{cluster-env/hadoop.user.name}}'
-HADOOPUSER_DEFAULT = 'hadoop'
+HIVE_USER_KEY = '{{hive-env/hive_user}}'
+HIVE_USER_DEFAULT = 'hive'
CHECK_COMMAND_TIMEOUT_KEY = 'check.command.timeout'
CHECK_COMMAND_TIMEOUT_DEFAULT = 60.0
@@ -88,13 +88,10 @@ def get_tokens():
HIVE_SERVER_INTERACTIVE_PRINCIPAL_KEY, SMOKEUSER_KEYTAB_KEY, SMOKEUSER_PRINCIPAL_KEY,
HIVE_SERVER_INTERACTIVE_THRIFT_HTTP_PORT_KEY, HIVE_SERVER_INTERACTIVE_TRANSPORT_MODE_KEY,
HIVE_SERVER_TRANSPORT_MODE_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY, HIVE_SSL,
- HIVE_SSL_KEYSTORE_PATH, HIVE_SSL_KEYSTORE_PASSWORD, HIVE_LDAP_USERNAME, HIVE_LDAP_PASSWORD)
+ HIVE_SSL_KEYSTORE_PATH, HIVE_SSL_KEYSTORE_PASSWORD, HIVE_LDAP_USERNAME, HIVE_LDAP_PASSWORD,
+ HIVE_USER_KEY)
-@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
-def get_tokens():
- pass
-
@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
def execute(configurations={}, parameters={}, host_name=None):
"""
@@ -170,6 +167,10 @@ def execute(configurations={}, parameters={}, host_name=None):
if SMOKEUSER_KEY in configurations:
smokeuser = configurations[SMOKEUSER_KEY]
+ hive_user = HIVE_USER_DEFAULT
+ if HIVE_USER_KEY in configurations:
+ hive_user = configurations[HIVE_USER_KEY]
+
ldap_username = ""
ldap_password = ""
if HIVE_LDAP_USERNAME in configurations:
@@ -207,7 +208,7 @@ def execute(configurations={}, parameters={}, host_name=None):
try:
hive_check.check_thrift_port_sasl(host_name, port, hive_server2_authentication, hive_server_principal,
- kinitcmd, smokeuser, transport_mode=transport_mode, ssl=hive_ssl,
+ kinitcmd, smokeuser, hive_user = hive_user, transport_mode=transport_mode, ssl=hive_ssl,
ssl_keystore=hive_ssl_keystore_path, ssl_password=hive_ssl_keystore_password,
check_command_timeout=int(check_command_timeout), ldap_username=ldap_username,
ldap_password=ldap_password)
@@ -223,8 +224,3 @@ def execute(configurations={}, parameters={}, host_name=None):
result_code = 'UNKNOWN'
return (result_code, [label])
-
-
-@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
-def execute(configurations={}, parameters={}, host_name=None):
- pass
\ No newline at end of file
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
index 6db92b0..3f57947 100644
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
@@ -69,8 +69,8 @@ SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user'
SMOKEUSER_DEFAULT = 'ambari-qa'
-HADOOPUSER_KEY = '{{cluster-env/hadoop.user.name}}'
-HADOOPUSER_DEFAULT = 'hadoop'
+HIVE_USER_KEY = '{{hive-env/hive_user}}'
+HIVE_USER_DEFAULT = 'hive'
CHECK_COMMAND_TIMEOUT_KEY = 'check.command.timeout'
CHECK_COMMAND_TIMEOUT_DEFAULT = 60.0
@@ -87,16 +87,9 @@ def get_tokens():
HIVE_SERVER2_AUTHENTICATION_KEY, HIVE_SERVER_PRINCIPAL_KEY,
SMOKEUSER_KEYTAB_KEY, SMOKEUSER_PRINCIPAL_KEY, HIVE_SERVER_THRIFT_HTTP_PORT_KEY,
HIVE_SERVER_TRANSPORT_MODE_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY, HIVE_SSL,
- HIVE_SSL_KEYSTORE_PATH, HIVE_SSL_KEYSTORE_PASSWORD, HIVE_LDAP_USERNAME, HIVE_LDAP_PASSWORD)
+ HIVE_SSL_KEYSTORE_PATH, HIVE_SSL_KEYSTORE_PASSWORD, HIVE_LDAP_USERNAME, HIVE_LDAP_PASSWORD,
+ HIVE_USER_KEY)
-@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return (HIVE_SERVER_THRIFT_PORT_KEY, HIVE_SERVER_THRIFT_HTTP_PORT_KEY,
- HIVE_SERVER_TRANSPORT_MODE_KEY, HADOOPUSER_KEY)
@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
def execute(configurations={}, parameters={}, host_name=None):
@@ -161,7 +154,6 @@ def execute(configurations={}, parameters={}, host_name=None):
if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters:
smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY]
-
# check configurations last as they should always take precedence
if SMOKEUSER_PRINCIPAL_KEY in configurations:
smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY]
@@ -169,6 +161,10 @@ def execute(configurations={}, parameters={}, host_name=None):
if SMOKEUSER_KEY in configurations:
smokeuser = configurations[SMOKEUSER_KEY]
+ hive_user = HIVE_USER_DEFAULT
+ if HIVE_USER_KEY in configurations:
+ hive_user = configurations[HIVE_USER_KEY]
+
ldap_username = ""
ldap_password = ""
if HIVE_LDAP_USERNAME in configurations:
@@ -206,7 +202,7 @@ def execute(configurations={}, parameters={}, host_name=None):
try:
hive_check.check_thrift_port_sasl(host_name, port, hive_server2_authentication, hive_server_principal,
- kinitcmd, smokeuser, transport_mode=transport_mode, ssl=hive_ssl,
+ kinitcmd, smokeuser, hive_user = hive_user, transport_mode=transport_mode, ssl=hive_ssl,
ssl_keystore=hive_ssl_keystore_path, ssl_password=hive_ssl_keystore_password,
check_command_timeout=int(check_command_timeout),ldap_username=ldap_username,
ldap_password=ldap_password)
@@ -222,65 +218,3 @@ def execute(configurations={}, parameters={}, host_name=None):
result_code = 'UNKNOWN'
return (result_code, [label])
-
-
-@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
-def execute(configurations={}, parameters={}, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- configurations (dictionary): a mapping of configuration key to value
- parameters (dictionary): a mapping of script parameter key to value
- host_name (string): the name of this host where the alert is running
- """
-
- from resource_management.libraries.functions import reload_windows_env
- from resource_management.core.resources import Execute
- reload_windows_env()
- hive_home = os.environ['HIVE_HOME']
-
- if configurations is None:
- return ('UNKNOWN', ['There were no configurations supplied to the script.'])
-
- transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT
- if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations:
- transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY]
-
- port = THRIFT_PORT_DEFAULT
- if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations:
- port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY])
- elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations:
- port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY])
-
- hiveuser = HADOOPUSER_DEFAULT
- if HADOOPUSER_KEY in configurations:
- hiveuser = configurations[HADOOPUSER_KEY]
-
- result_code = None
- try:
- if host_name is None:
- host_name = socket.getfqdn()
-
- beeline_url = ['jdbc:hive2://{host_name}:{port}/', "transportMode={transport_mode}"]
- # append url according to used transport
- if transport_mode == "http":
- beeline_url.append('httpPath=cliservice')
- beeline_url_string = format(";".join(beeline_url))
- beeline_cmd = os.path.join(hive_home, "bin", "beeline.cmd")
- cmd = format("cmd /c {beeline_cmd} -u {beeline_url_string} -e '' 2>&1 | findstr Connected")
-
- start_time = time.time()
- try:
- Execute(cmd, user=hiveuser, timeout=30, timeout_kill_strategy=TerminateStrategy.KILL_PROCESS_TREE)
- total_time = time.time() - start_time
- result_code = 'OK'
- label = OK_MESSAGE.format(total_time, port)
- except:
- result_code = 'CRITICAL'
- label = CRITICAL_MESSAGE.format(host_name, port, traceback.format_exc())
- except:
- label = traceback.format_exc()
- result_code = 'UNKNOWN'
-
- return (result_code, [label])
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_hive_service_check.py b/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_hive_service_check.py
index 157b25c..16ac833 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_hive_service_check.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_hive_service_check.py
@@ -46,7 +46,7 @@ class TestServiceCheck(RMFTestCase):
stack_version = self.STACK_VERSION,
target = RMFTestCase.TARGET_COMMON_SERVICES
)
- self.assertResourceCalled('Execute', "! beeline -u 'jdbc:hive2://c6402.ambari.apache.org:10000/;transportMode=binary;auth=noSasl' -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'",
+ self.assertResourceCalled('Execute', "beeline -n hive -u 'jdbc:hive2://c6402.ambari.apache.org:10000/;transportMode=binary;auth=noSasl' -e ';' 2>&1 | awk '{print}' | grep -i -e 'Connected to:' -e 'Transaction isolation:'",
path = ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'],
user = 'ambari-qa',
timeout = 30,
@@ -165,7 +165,7 @@ class TestServiceCheck(RMFTestCase):
self.assertResourceCalled('Execute', '/usr/bin/kinit -kt /etc/security/keytabs/smokeuser.headless.keytab ambari-qa@EXAMPLE.COM; ',
user = 'ambari-qa',
)
- self.assertResourceCalled('Execute', "! beeline -u 'jdbc:hive2://c6402.ambari.apache.org:10000/;transportMode=binary;principal=hive/_HOST@EXAMPLE.COM' -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'",
+ self.assertResourceCalled('Execute', "beeline -n hive -u 'jdbc:hive2://c6402.ambari.apache.org:10000/;transportMode=binary;principal=hive/_HOST@EXAMPLE.COM' -e ';' 2>&1 | awk '{print}' | grep -i -e 'Connected to:' -e 'Transaction isolation:'",
path = ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'],
user = 'ambari-qa',
timeout = 30,
@@ -283,7 +283,7 @@ class TestServiceCheck(RMFTestCase):
stack_version = self.STACK_VERSION,
target = RMFTestCase.TARGET_COMMON_SERVICES)
- self.assertResourceCalled('Execute', "! beeline -u 'jdbc:hive2://c6402.ambari.apache.org:10010/;transportMode=binary' -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'",
+ self.assertResourceCalled('Execute', "beeline -n hive -u 'jdbc:hive2://c6402.ambari.apache.org:10010/;transportMode=binary' -e ';' 2>&1 | awk '{print}' | grep -i -e 'Connected to:' -e 'Transaction isolation:'",
path = ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'],
timeout = 30,
user = 'ambari-qa',
@@ -322,7 +322,7 @@ class TestServiceCheck(RMFTestCase):
target = RMFTestCase.TARGET_COMMON_SERVICES)
self.assertResourceCalled('Execute',
- "! beeline -u 'jdbc:hive2://c6402.ambari.apache.org:10010/;transportMode=binary' -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'",
+ "beeline -n hive -u 'jdbc:hive2://c6402.ambari.apache.org:10010/;transportMode=binary' -e ';' 2>&1 | awk '{print}' | grep -i -e 'Connected to:' -e 'Transaction isolation:'",
path = ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'],
timeout = 30,
user = 'ambari-qa',
@@ -330,7 +330,7 @@ class TestServiceCheck(RMFTestCase):
)
self.assertResourceCalled('Execute',
- "! beeline -u 'jdbc:hive2://c6402.ambari.apache.org:10500/;transportMode=binary' -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'",
+ "beeline -n hive -u 'jdbc:hive2://c6402.ambari.apache.org:10500/;transportMode=binary' -e ';' 2>&1 | awk '{print}' | grep -i -e 'Connected to:' -e 'Transaction isolation:'",
path = ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'],
timeout = 30,
user = 'ambari-qa',
@@ -339,7 +339,7 @@ class TestServiceCheck(RMFTestCase):
# LLAP call
self.assertResourceCalled('Execute',
- "! beeline -u 'jdbc:hive2://c6402.ambari.apache.org:10500/;transportMode=binary' --hiveconf \"hiveLlapServiceCheck=\" -f /usr/hdp/current/hive-server2-hive2/scripts/llap/sql/serviceCheckScript.sql -e '' 2>&1| awk '{print}'|grep -i -e 'Invalid status\|Invalid URL\|command not found\|Connection refused'",
+ '! beeline -u \'jdbc:hive2://c6402.ambari.apache.org:10500/;transportMode=binary\' --hiveconf "hiveLlapServiceCheck=" -f /usr/hdp/current/hive-server2-hive2/scripts/llap/sql/serviceCheckScript.sql -e \'\' 2>&1| awk \'{print}\'|grep -i -e \'Invalid status\\|Invalid URL\\|command not found\\|Connection refused\'',
path = ['/usr/sbin', '/usr/local/bin', '/bin', '/usr/bin', '/bin:/usr/hdp/2.3.0.0-1234/hadoop/bin:/usr/hdp/current/hive-server2-hive2/bin'],
tries = 1,
stderr = -1,