You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2018/08/31 19:49:10 UTC

[ambari] branch branch-2.7 updated: [AMBARI-24582] - Ambari Alert - HiveServer2 Process - False negative in Certain Scenarios

This is an automated email from the ASF dual-hosted git repository.

jonathanhurley pushed a commit to branch branch-2.7
in repository https://gitbox.apache.org/repos/asf/ambari.git


The following commit(s) were added to refs/heads/branch-2.7 by this push:
     new 5cadc1d  [AMBARI-24582] - Ambari Alert - HiveServer2 Process - False negative in Certain Scenarios
5cadc1d is described below

commit 5cadc1d2ce6bc11e07f106c03466342a345919e4
Author: Jonathan Hurley <jo...@apache.org>
AuthorDate: Fri Aug 31 13:52:34 2018 -0400

    [AMBARI-24582] - Ambari Alert - HiveServer2 Process - False negative in Certain Scenarios
---
 .../libraries/functions/hive_check.py              | 11 +--
 .../alerts/alert_hive_interactive_thrift_port.py   | 22 +++---
 .../package/alerts/alert_hive_thrift_port.py       | 84 +++-------------------
 .../stacks/2.0.6/HIVE/test_hive_service_check.py   | 12 ++--
 4 files changed, 31 insertions(+), 98 deletions(-)

diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/hive_check.py b/ambari-common/src/main/python/resource_management/libraries/functions/hive_check.py
index d8f6696..39df3b5 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/hive_check.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/hive_check.py
@@ -25,8 +25,9 @@ from resource_management.core.signal_utils import TerminateStrategy
 
 
 def check_thrift_port_sasl(address, port, hive_auth="NOSASL", key=None, kinitcmd=None, smokeuser='ambari-qa',
-                           transport_mode="binary", http_endpoint="cliservice", ssl=False, ssl_keystore=None,
-                           ssl_password=None, check_command_timeout=30, ldap_username="", ldap_password=""):
+                           hive_user='hive', transport_mode="binary", http_endpoint="cliservice",
+                           ssl=False, ssl_keystore=None, ssl_password=None, check_command_timeout=30,
+                           ldap_username="", ldap_password=""):
   """
   Hive thrift SASL port check
   """
@@ -71,8 +72,10 @@ def check_thrift_port_sasl(address, port, hive_auth="NOSASL", key=None, kinitcmd
     finally:
       kinit_lock.release()
 
-  cmd = "! beeline -u '%s' %s -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'" % \
-        (format(";".join(beeline_url)), format(credential_str))
+  # -n the user to connect as (ignored when using the hive principal in the URL, can be different from the user running the beeline command)
+  # -e ';' executes a SQL commmand of NOOP
+  cmd = "beeline -n %s -u '%s' %s -e ';' 2>&1 | awk '{print}' | grep -i -e 'Connected to:' -e 'Transaction isolation:'" % \
+        (format(hive_user), format(";".join(beeline_url)), format(credential_str))
 
   Execute(cmd,
     user=smokeuser,
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_interactive_thrift_port.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_interactive_thrift_port.py
index 8d48412..39c982e 100644
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_interactive_thrift_port.py
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_interactive_thrift_port.py
@@ -69,8 +69,8 @@ SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
 SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user'
 SMOKEUSER_DEFAULT = 'ambari-qa'
 
-HADOOPUSER_KEY = '{{cluster-env/hadoop.user.name}}'
-HADOOPUSER_DEFAULT = 'hadoop'
+HIVE_USER_KEY = '{{hive-env/hive_user}}'
+HIVE_USER_DEFAULT = 'hive'
 
 CHECK_COMMAND_TIMEOUT_KEY = 'check.command.timeout'
 CHECK_COMMAND_TIMEOUT_DEFAULT = 60.0
@@ -88,13 +88,10 @@ def get_tokens():
           HIVE_SERVER_INTERACTIVE_PRINCIPAL_KEY, SMOKEUSER_KEYTAB_KEY, SMOKEUSER_PRINCIPAL_KEY,
           HIVE_SERVER_INTERACTIVE_THRIFT_HTTP_PORT_KEY, HIVE_SERVER_INTERACTIVE_TRANSPORT_MODE_KEY,
           HIVE_SERVER_TRANSPORT_MODE_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY, HIVE_SSL,
-          HIVE_SSL_KEYSTORE_PATH, HIVE_SSL_KEYSTORE_PASSWORD, HIVE_LDAP_USERNAME, HIVE_LDAP_PASSWORD)
+          HIVE_SSL_KEYSTORE_PATH, HIVE_SSL_KEYSTORE_PASSWORD, HIVE_LDAP_USERNAME, HIVE_LDAP_PASSWORD,
+          HIVE_USER_KEY)
 
 
-@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
-def get_tokens():
-  pass
-
 @OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
 def execute(configurations={}, parameters={}, host_name=None):
   """
@@ -170,6 +167,10 @@ def execute(configurations={}, parameters={}, host_name=None):
   if SMOKEUSER_KEY in configurations:
     smokeuser = configurations[SMOKEUSER_KEY]
 
+  hive_user = HIVE_USER_DEFAULT
+  if HIVE_USER_KEY in configurations:
+    hive_user = configurations[HIVE_USER_KEY]
+
   ldap_username = ""
   ldap_password = ""
   if HIVE_LDAP_USERNAME in configurations:
@@ -207,7 +208,7 @@ def execute(configurations={}, parameters={}, host_name=None):
 
     try:
       hive_check.check_thrift_port_sasl(host_name, port, hive_server2_authentication, hive_server_principal,
-                                        kinitcmd, smokeuser, transport_mode=transport_mode, ssl=hive_ssl,
+                                        kinitcmd, smokeuser, hive_user = hive_user, transport_mode=transport_mode, ssl=hive_ssl,
                                         ssl_keystore=hive_ssl_keystore_path, ssl_password=hive_ssl_keystore_password,
                                         check_command_timeout=int(check_command_timeout), ldap_username=ldap_username,
                                         ldap_password=ldap_password)
@@ -223,8 +224,3 @@ def execute(configurations={}, parameters={}, host_name=None):
     result_code = 'UNKNOWN'
 
   return (result_code, [label])
-
-
-@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
-def execute(configurations={}, parameters={}, host_name=None):
-  pass
\ No newline at end of file
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
index 6db92b0..3f57947 100644
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
@@ -69,8 +69,8 @@ SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
 SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user'
 SMOKEUSER_DEFAULT = 'ambari-qa'
 
-HADOOPUSER_KEY = '{{cluster-env/hadoop.user.name}}'
-HADOOPUSER_DEFAULT = 'hadoop'
+HIVE_USER_KEY = '{{hive-env/hive_user}}'
+HIVE_USER_DEFAULT = 'hive'
 
 CHECK_COMMAND_TIMEOUT_KEY = 'check.command.timeout'
 CHECK_COMMAND_TIMEOUT_DEFAULT = 60.0
@@ -87,16 +87,9 @@ def get_tokens():
           HIVE_SERVER2_AUTHENTICATION_KEY, HIVE_SERVER_PRINCIPAL_KEY,
           SMOKEUSER_KEYTAB_KEY, SMOKEUSER_PRINCIPAL_KEY, HIVE_SERVER_THRIFT_HTTP_PORT_KEY,
           HIVE_SERVER_TRANSPORT_MODE_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY, HIVE_SSL,
-          HIVE_SSL_KEYSTORE_PATH, HIVE_SSL_KEYSTORE_PASSWORD, HIVE_LDAP_USERNAME, HIVE_LDAP_PASSWORD)
+          HIVE_SSL_KEYSTORE_PATH, HIVE_SSL_KEYSTORE_PASSWORD, HIVE_LDAP_USERNAME, HIVE_LDAP_PASSWORD,
+          HIVE_USER_KEY)
 
-@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
-def get_tokens():
-  """
-  Returns a tuple of tokens in the format {{site/property}} that will be used
-  to build the dictionary passed into execute
-  """
-  return (HIVE_SERVER_THRIFT_PORT_KEY, HIVE_SERVER_THRIFT_HTTP_PORT_KEY,
-          HIVE_SERVER_TRANSPORT_MODE_KEY, HADOOPUSER_KEY)
 
 @OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
 def execute(configurations={}, parameters={}, host_name=None):
@@ -161,7 +154,6 @@ def execute(configurations={}, parameters={}, host_name=None):
   if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters:
     smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY]
 
-
   # check configurations last as they should always take precedence
   if SMOKEUSER_PRINCIPAL_KEY in configurations:
     smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY]
@@ -169,6 +161,10 @@ def execute(configurations={}, parameters={}, host_name=None):
   if SMOKEUSER_KEY in configurations:
     smokeuser = configurations[SMOKEUSER_KEY]
 
+  hive_user = HIVE_USER_DEFAULT
+  if HIVE_USER_KEY in configurations:
+    hive_user = configurations[HIVE_USER_KEY]
+
   ldap_username = ""
   ldap_password = ""
   if HIVE_LDAP_USERNAME in configurations:
@@ -206,7 +202,7 @@ def execute(configurations={}, parameters={}, host_name=None):
 
     try:
       hive_check.check_thrift_port_sasl(host_name, port, hive_server2_authentication, hive_server_principal,
-                                        kinitcmd, smokeuser, transport_mode=transport_mode, ssl=hive_ssl,
+                                        kinitcmd, smokeuser, hive_user = hive_user, transport_mode=transport_mode, ssl=hive_ssl,
                                         ssl_keystore=hive_ssl_keystore_path, ssl_password=hive_ssl_keystore_password,
                                         check_command_timeout=int(check_command_timeout),ldap_username=ldap_username,
                                         ldap_password=ldap_password)
@@ -222,65 +218,3 @@ def execute(configurations={}, parameters={}, host_name=None):
     result_code = 'UNKNOWN'
 
   return (result_code, [label])
-
-
-@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
-def execute(configurations={}, parameters={}, host_name=None):
-  """
-  Returns a tuple containing the result code and a pre-formatted result label
-
-  Keyword arguments:
-  configurations (dictionary): a mapping of configuration key to value
-  parameters (dictionary): a mapping of script parameter key to value
-  host_name (string): the name of this host where the alert is running
-  """
-
-  from resource_management.libraries.functions import reload_windows_env
-  from resource_management.core.resources import Execute
-  reload_windows_env()
-  hive_home = os.environ['HIVE_HOME']
-
-  if configurations is None:
-    return ('UNKNOWN', ['There were no configurations supplied to the script.'])
-
-  transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT
-  if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations:
-    transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY]
-
-  port = THRIFT_PORT_DEFAULT
-  if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations:
-    port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY])
-  elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations:
-    port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY])
-
-  hiveuser = HADOOPUSER_DEFAULT
-  if HADOOPUSER_KEY in configurations:
-    hiveuser = configurations[HADOOPUSER_KEY]
-
-  result_code = None
-  try:
-    if host_name is None:
-      host_name = socket.getfqdn()
-
-    beeline_url = ['jdbc:hive2://{host_name}:{port}/', "transportMode={transport_mode}"]
-    # append url according to used transport
-    if transport_mode == "http":
-      beeline_url.append('httpPath=cliservice')
-    beeline_url_string = format(";".join(beeline_url))
-    beeline_cmd = os.path.join(hive_home, "bin", "beeline.cmd")
-    cmd = format("cmd /c {beeline_cmd} -u {beeline_url_string} -e '' 2>&1 | findstr Connected")
-
-    start_time = time.time()
-    try:
-      Execute(cmd, user=hiveuser, timeout=30, timeout_kill_strategy=TerminateStrategy.KILL_PROCESS_TREE)
-      total_time = time.time() - start_time
-      result_code = 'OK'
-      label = OK_MESSAGE.format(total_time, port)
-    except:
-      result_code = 'CRITICAL'
-      label = CRITICAL_MESSAGE.format(host_name, port, traceback.format_exc())
-  except:
-    label = traceback.format_exc()
-    result_code = 'UNKNOWN'
-
-  return (result_code, [label])
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_hive_service_check.py b/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_hive_service_check.py
index 157b25c..16ac833 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_hive_service_check.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HIVE/test_hive_service_check.py
@@ -46,7 +46,7 @@ class TestServiceCheck(RMFTestCase):
                         stack_version = self.STACK_VERSION,
                         target = RMFTestCase.TARGET_COMMON_SERVICES
     )
-    self.assertResourceCalled('Execute', "! beeline -u 'jdbc:hive2://c6402.ambari.apache.org:10000/;transportMode=binary;auth=noSasl'  -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'",
+    self.assertResourceCalled('Execute', "beeline -n hive -u 'jdbc:hive2://c6402.ambari.apache.org:10000/;transportMode=binary;auth=noSasl'  -e ';' 2>&1 | awk '{print}' | grep -i -e 'Connected to:' -e 'Transaction isolation:'",
                               path = ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'],
                               user = 'ambari-qa',
                               timeout = 30,
@@ -165,7 +165,7 @@ class TestServiceCheck(RMFTestCase):
     self.assertResourceCalled('Execute', '/usr/bin/kinit -kt /etc/security/keytabs/smokeuser.headless.keytab ambari-qa@EXAMPLE.COM; ',
                               user = 'ambari-qa',
                               )
-    self.assertResourceCalled('Execute', "! beeline -u 'jdbc:hive2://c6402.ambari.apache.org:10000/;transportMode=binary;principal=hive/_HOST@EXAMPLE.COM'  -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'",
+    self.assertResourceCalled('Execute', "beeline -n hive -u 'jdbc:hive2://c6402.ambari.apache.org:10000/;transportMode=binary;principal=hive/_HOST@EXAMPLE.COM'  -e ';' 2>&1 | awk '{print}' | grep -i -e 'Connected to:' -e 'Transaction isolation:'",
                               path = ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'],
                               user = 'ambari-qa',
                               timeout = 30,
@@ -283,7 +283,7 @@ class TestServiceCheck(RMFTestCase):
       stack_version = self.STACK_VERSION,
       target = RMFTestCase.TARGET_COMMON_SERVICES)
 
-    self.assertResourceCalled('Execute', "! beeline -u 'jdbc:hive2://c6402.ambari.apache.org:10010/;transportMode=binary'  -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'",
+    self.assertResourceCalled('Execute', "beeline -n hive -u 'jdbc:hive2://c6402.ambari.apache.org:10010/;transportMode=binary'  -e ';' 2>&1 | awk '{print}' | grep -i -e 'Connected to:' -e 'Transaction isolation:'",
       path = ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'],
       timeout = 30,
       user = 'ambari-qa',
@@ -322,7 +322,7 @@ class TestServiceCheck(RMFTestCase):
       target = RMFTestCase.TARGET_COMMON_SERVICES)
 
     self.assertResourceCalled('Execute',
-      "! beeline -u 'jdbc:hive2://c6402.ambari.apache.org:10010/;transportMode=binary'  -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'",
+      "beeline -n hive -u 'jdbc:hive2://c6402.ambari.apache.org:10010/;transportMode=binary'  -e ';' 2>&1 | awk '{print}' | grep -i -e 'Connected to:' -e 'Transaction isolation:'",
       path = ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'],
       timeout = 30,
       user = 'ambari-qa',
@@ -330,7 +330,7 @@ class TestServiceCheck(RMFTestCase):
     )
 
     self.assertResourceCalled('Execute',
-      "! beeline -u 'jdbc:hive2://c6402.ambari.apache.org:10500/;transportMode=binary'  -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'",
+      "beeline -n hive -u 'jdbc:hive2://c6402.ambari.apache.org:10500/;transportMode=binary'  -e ';' 2>&1 | awk '{print}' | grep -i -e 'Connected to:' -e 'Transaction isolation:'",
       path = ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'],
       timeout = 30,
       user = 'ambari-qa',
@@ -339,7 +339,7 @@ class TestServiceCheck(RMFTestCase):
 
     # LLAP call
     self.assertResourceCalled('Execute',
-      "! beeline -u 'jdbc:hive2://c6402.ambari.apache.org:10500/;transportMode=binary' --hiveconf \"hiveLlapServiceCheck=\" -f /usr/hdp/current/hive-server2-hive2/scripts/llap/sql/serviceCheckScript.sql -e '' 2>&1| awk '{print}'|grep -i -e 'Invalid status\|Invalid URL\|command not found\|Connection refused'",
+      '! beeline -u \'jdbc:hive2://c6402.ambari.apache.org:10500/;transportMode=binary\' --hiveconf "hiveLlapServiceCheck=" -f /usr/hdp/current/hive-server2-hive2/scripts/llap/sql/serviceCheckScript.sql -e \'\' 2>&1| awk \'{print}\'|grep -i -e \'Invalid status\\|Invalid URL\\|command not found\\|Connection refused\'',
       path = ['/usr/sbin', '/usr/local/bin', '/bin', '/usr/bin', '/bin:/usr/hdp/2.3.0.0-1234/hadoop/bin:/usr/hdp/current/hive-server2-hive2/bin'],
       tries = 1,
       stderr = -1,