You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2016/10/13 20:58:24 UTC

ambari git commit: AMBARI-18590 - RegionServer Registration Checks Fail During Upgrade If rDNS is Not Enabled (jonathanhurley)

Repository: ambari
Updated Branches:
  refs/heads/trunk 00d2f1193 -> 4140cc78a


AMBARI-18590 - RegionServer Registration Checks Fail During Upgrade If rDNS is Not Enabled (jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/4140cc78
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/4140cc78
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/4140cc78

Branch: refs/heads/trunk
Commit: 4140cc78a0153799938c2dbe3f80c11ab3be2e30
Parents: 00d2f11
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Thu Oct 13 11:16:15 2016 -0400
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Thu Oct 13 15:44:58 2016 -0400

----------------------------------------------------------------------
 .../HBASE/0.96.0.2.0/package/scripts/upgrade.py | 48 ++++++++++++++++++--
 1 file changed, 43 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/4140cc78/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py
index f1fa80c..703fe26 100644
--- a/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py
+++ b/ambari-server/src/main/resources/common-services/HBASE/0.96.0.2.0/package/scripts/upgrade.py
@@ -19,7 +19,10 @@ limitations under the License.
 
 """
 import re
+import socket
+
 from resource_management.core import shell
+from resource_management.core.exceptions import ComponentIsNotRunning
 from resource_management.core.exceptions import Fail
 from resource_management.core.logger import Logger
 from resource_management.libraries.functions import conf_select, stack_select
@@ -44,7 +47,7 @@ def post_regionserver(env):
   check_cmd = "echo 'status \"simple\"' | {0} shell".format(params.hbase_cmd)
 
   exec_cmd = "{0} {1}".format(params.kinit_cmd, check_cmd)
-  call_and_match(exec_cmd, params.hbase_user, params.hostname + ":", re.IGNORECASE)
+  is_regionserver_registered(exec_cmd, params.hbase_user, params.hostname, re.IGNORECASE)
 
 
 def is_region_server_process_running():
@@ -55,14 +58,49 @@ def is_region_server_process_running():
   except ComponentIsNotRunning:
     return False
 
-@retry(times=30, sleep_time=30, err_class=Fail) # keep trying for 15 mins
-def call_and_match(cmd, user, regex, regex_search_flags):
 
+@retry(times=30, sleep_time=30, err_class=Fail)
+def is_regionserver_registered(cmd, user, hostname, regex_search_flags):
+  """
+  Queries HBase through the HBase shell to see which servers have successfully registered. This is
+  useful in cases, such as upgrades, where we must ensure that a RegionServer has not only started,
+  but also completed it's registration handshake before moving into upgrading the next RegionServer.
+
+  The hbase shell is used along with the "show 'simple'" command in order to determine if the
+  specified host has registered.
+  :param cmd:
+  :param user:
+  :param hostname:
+  :param regex_search_flags:
+  :return:
+  """
   if not is_region_server_process_running():
     Logger.info("RegionServer process is not running")
     raise Fail("RegionServer process is not running")
 
+  # use hbase shell with "status 'simple'" command
   code, out = shell.call(cmd, user=user)
 
-  if not (out and re.search(regex, out, regex_search_flags)):
-    raise Fail("Could not verify RS available")
+  # if we don't have ouput, then we can't check
+  if not out:
+    raise Fail("Unable to retrieve status information from the HBase shell")
+
+  # try matching the hostname with a colon (which indicates a bound port)
+  bound_hostname_to_match = hostname + ":"
+  match = re.search(bound_hostname_to_match, out, regex_search_flags)
+
+  # if there's no match, try again with the IP address
+  if not match:
+    try:
+      ip_address = socket.gethostbyname(hostname)
+      bound_ip_address_to_match = ip_address + ":"
+      match = re.search(bound_ip_address_to_match, out, regex_search_flags)
+    except socket.error:
+      # this is merely a backup, so just log that it failed
+      Logger.warning("Unable to lookup the IP address of {0}, reverse DNS lookup may not be working.".format(hostname))
+      pass
+
+  # failed with both a hostname and an IP address, so raise the Fail and let the function auto retry
+  if not match:
+    raise Fail(
+      "The RegionServer named {0} has not yet registered with the HBase Master".format(hostname))