You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by sw...@apache.org on 2016/12/01 08:14:16 UTC
[33/50] ambari git commit: AMBARI-18997 ambari-server.pid might not
be created on slow hardware (dsen)
AMBARI-18997 ambari-server.pid might not be created on slow hardware (dsen)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/67292971
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/67292971
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/67292971
Branch: refs/heads/branch-feature-AMBARI-18901
Commit: 67292971c4b87246bb9f1df96939e89073af6dbc
Parents: fbff7f7
Author: Dmytro Sen <ds...@apache.org>
Authored: Wed Nov 30 14:04:51 2016 +0200
Committer: Dmytro Sen <ds...@apache.org>
Committed: Wed Nov 30 14:04:51 2016 +0200
----------------------------------------------------------------------
.../src/main/python/ambari_server/utils.py | 24 ++++-------
.../src/main/python/ambari_server_main.py | 44 ++++++++++++--------
.../src/test/python/TestAmbariServer.py | 4 +-
ambari-server/src/test/python/TestUtils.py | 26 +-----------
4 files changed, 36 insertions(+), 62 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/67292971/ambari-server/src/main/python/ambari_server/utils.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/python/ambari_server/utils.py b/ambari-server/src/main/python/ambari_server/utils.py
index f505444..62c93ae 100644
--- a/ambari-server/src/main/python/ambari_server/utils.py
+++ b/ambari-server/src/main/python/ambari_server/utils.py
@@ -117,19 +117,19 @@ def save_pid(pid, pidfile):
pass
-def save_main_pid_ex(pids, pidfile, exclude_list=[], kill_exclude_list=False, skip_daemonize=False):
+def save_main_pid_ex(pids, pidfile, exclude_list=[], skip_daemonize=False):
"""
Save pid which is not included to exclude_list to pidfile.
- If kill_exclude_list is set to true, all processes in that
- list would be killed. It's might be useful to daemonize child process
exclude_list contains list of full executable paths which should be excluded
"""
+ pid_saved = False
try:
pfile = open(pidfile, "w")
for item in pids:
if pid_exists(item["pid"]) and (item["exe"] not in exclude_list):
pfile.write("%s\n" % item["pid"])
+ pid_saved = True
logger.info("Ambari server started with PID " + str(item["pid"]))
if pid_exists(item["pid"]) and (item["exe"] in exclude_list) and not skip_daemonize:
try:
@@ -145,23 +145,13 @@ def save_main_pid_ex(pids, pidfile, exclude_list=[], kill_exclude_list=False, sk
except Exception as e:
logger.error("Failed to close PID file " + pidfile + " due to " + str(e))
pass
+ return pid_saved
-
-def wait_for_pid(pids, timeout):
+def get_live_pids_count(pids):
"""
- Check pid for existence during timeout
+ Check pids for existence
"""
- tstart = time.time()
- pid_live = 0
- while int(time.time()-tstart) <= timeout and len(pids) > 0:
- sys.stdout.write('.')
- sys.stdout.flush()
- pid_live = 0
- for item in pids:
- if pid_exists(item["pid"]):
- pid_live += 1
- time.sleep(1)
- return pid_live
+ return len([pid for pid in pids if pid_exists(pid)])
def get_symlink_path(path_to_link):
http://git-wip-us.apache.org/repos/asf/ambari/blob/67292971/ambari-server/src/main/python/ambari_server_main.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/python/ambari_server_main.py b/ambari-server/src/main/python/ambari_server_main.py
index b35cfc9..572de4e 100644
--- a/ambari-server/src/main/python/ambari_server_main.py
+++ b/ambari-server/src/main/python/ambari_server_main.py
@@ -38,8 +38,8 @@ from ambari_server.serverUtils import refresh_stack_hash
from ambari_server.setupHttps import get_fqdn
from ambari_server.setupSecurity import generate_env, \
ensure_can_start_under_current_user
-from ambari_server.utils import check_reverse_lookup, save_pid, locate_file, locate_all_file_paths, looking_for_pid, wait_for_pid, \
- save_main_pid_ex, check_exitcode
+from ambari_server.utils import check_reverse_lookup, save_pid, locate_file, locate_all_file_paths, looking_for_pid, \
+ save_main_pid_ex, check_exitcode, get_live_pids_count
from ambari_server.serverClassPath import ServerClassPath
logger = logging.getLogger(__name__)
@@ -101,8 +101,8 @@ SERVER_START_CMD_DEBUG_WINDOWS = "{0} " \
"-cp {3} " \
"org.apache.ambari.server.controller.AmbariServer"
-SERVER_INIT_TIMEOUT = 5
-SERVER_START_TIMEOUT = 30
+SERVER_START_TIMEOUT = 5
+SERVER_START_RETRIES = 4
SERVER_PING_TIMEOUT_WINDOWS = 5
SERVER_PING_ATTEMPTS_WINDOWS = 4
@@ -114,6 +114,7 @@ EXITCODE_NAME = "ambari-server.exitcode"
CHECK_DATABASE_SKIPPED_PROPERTY = "check_database_skipped"
AMBARI_SERVER_DIE_MSG = "Ambari Server java process died with exitcode {0}. Check {1} for more information."
+AMBARI_SERVER_NOT_STARTED_MSG = "Ambari Server java process hasn't been started or can't be determined."
# linux open-file limit
ULIMIT_OPEN_FILES_KEY = 'ulimit.open.files'
@@ -200,12 +201,23 @@ def wait_for_server_start(pidFile, scmStatus):
#wait for server process for SERVER_START_TIMEOUT seconds
sys.stdout.write('Waiting for server start...')
sys.stdout.flush()
-
- pids = looking_for_pid(SERVER_SEARCH_PATTERN, SERVER_INIT_TIMEOUT)
- found_pids = wait_for_pid(pids, SERVER_START_TIMEOUT)
-
- sys.stdout.write('\n')
- sys.stdout.flush()
+ pids = []
+ server_started = False
+ # looking_for_pid() might return partrial pid list on slow hardware
+ for i in range(1, SERVER_START_RETRIES):
+ pids = looking_for_pid(SERVER_SEARCH_PATTERN, SERVER_START_TIMEOUT)
+
+ sys.stdout.write('\n')
+ sys.stdout.flush()
+
+ if save_main_pid_ex(pids, pidFile, locate_all_file_paths('sh', '/bin') +
+ locate_all_file_paths('bash', '/bin') +
+ locate_all_file_paths('dash', '/bin'), IS_FOREGROUND):
+ server_started = True
+ break
+ else:
+ sys.stdout.write("Unable to determine server PID. Retrying...\n")
+ sys.stdout.flush()
if 'Database consistency check: failed' in open(configDefaults.SERVER_OUT_FILE).read():
print "DB configs consistency check failed. Run \"ambari-server start --skip-database-check\" to skip. " \
@@ -218,15 +230,13 @@ def wait_for_server_start(pidFile, scmStatus):
else:
print "DB configs consistency check: no errors and warnings were found."
-
- if found_pids <= 0:
+ if server_started:
+ return
+ elif get_live_pids_count(pids) <= 0:
exitcode = check_exitcode(os.path.join(configDefaults.PID_DIR, EXITCODE_NAME))
raise FatalException(-1, AMBARI_SERVER_DIE_MSG.format(exitcode, configDefaults.SERVER_OUT_FILE))
else:
- save_main_pid_ex(pids, pidFile, locate_all_file_paths('sh', '/bin') +
- locate_all_file_paths('bash', '/bin') +
- locate_all_file_paths('dash', '/bin'), True, IS_FOREGROUND)
-
+ raise FatalException(-1, AMBARI_SERVER_NOT_STARTED_MSG)
def server_process_main(options, scmStatus=None):
properties = get_ambari_properties()
@@ -358,7 +368,7 @@ def server_process_main(options, scmStatus=None):
raise FatalException(-1, AMBARI_SERVER_DIE_MSG.format(exitcode, configDefaults.SERVER_OUT_FILE))
else:
pidfile = os.path.join(configDefaults.PID_DIR, PID_NAME)
- save_pid(pidJava, pidfile)
+
print "Server PID at: "+pidfile
print "Server out at: "+configDefaults.SERVER_OUT_FILE
print "Server log at: "+configDefaults.SERVER_LOG_FILE
http://git-wip-us.apache.org/repos/asf/ambari/blob/67292971/ambari-server/src/test/python/TestAmbariServer.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/TestAmbariServer.py b/ambari-server/src/test/python/TestAmbariServer.py
index 424ddde..59fc975 100644
--- a/ambari-server/src/test/python/TestAmbariServer.py
+++ b/ambari-server/src/test/python/TestAmbariServer.py
@@ -4398,7 +4398,6 @@ class TestAmbariServer(TestCase):
@patch("sys.stdout.flush")
@patch("sys.stdout.write")
@patch("ambari_server_main.looking_for_pid")
- @patch("ambari_server_main.wait_for_pid")
@patch("ambari_server_main.save_main_pid_ex")
@patch("ambari_server_main.check_exitcode")
@patch("os.makedirs")
@@ -4448,7 +4447,7 @@ class TestAmbariServer(TestCase):
save_master_key_method, get_master_key_location_method,
os_chown_mock, is_server_running_mock, locate_file_mock,
os_makedirs_mock, check_exitcode_mock, save_main_pid_ex_mock,
- wait_for_pid_mock, looking_for_pid_mock, stdout_write_mock, stdout_flush_mock,
+ looking_for_pid_mock, stdout_write_mock, stdout_flush_mock,
get_is_active_instance_mock):
def reset_mocks():
@@ -4485,7 +4484,6 @@ class TestAmbariServer(TestCase):
"exe": "/test",
"cmd": "test arg"
}]
- wait_for_pid_mock.return_value = 1
check_exitcode_mock.return_value = 0
p = Properties()
http://git-wip-us.apache.org/repos/asf/ambari/blob/67292971/ambari-server/src/test/python/TestUtils.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/TestUtils.py b/ambari-server/src/test/python/TestUtils.py
index db94d92..3f2ccf8 100644
--- a/ambari-server/src/test/python/TestUtils.py
+++ b/ambari-server/src/test/python/TestUtils.py
@@ -120,30 +120,6 @@ class TestUtils(TestCase):
normpath_mock.return_value = "test value"
self.assertEquals(utils.get_symlink_path("/"), "test value")
- @patch('time.time')
- @patch.object(utils, 'pid_exists')
- @patch('time.sleep')
- def test_wait_for_pid(self, sleep_mock, pid_exists_mock, time_mock):
- pid_exists_mock.return_value = True
- time_mock.side_effect = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11]
-
- out = StringIO.StringIO()
- sys.stdout = out
- live_pids = utils.wait_for_pid([
- {"pid": "111",
- "exe": "",
- "cmd": ""
- },
- {"pid": "222",
- "exe": "",
- "cmd": ""
- },
- ], 10)
- self.assertEqual("..........", out.getvalue())
- sys.stdout = sys.__stdout__
-
- self.assertEquals(2, live_pids)
-
@patch.object(utils, 'pid_exists')
@patch('__builtin__.open')
@patch('os.kill')
@@ -170,7 +146,7 @@ class TestUtils(TestCase):
"exe": "/exe2",
"cmd": ""
},
- ], "/pidfile", ["/exe1"], True)
+ ], "/pidfile", ["/exe1"])
self.assertEquals(open_mock.call_count, 1)
self.assertEquals(pid_exists_mock.call_count, 4)
self.assertEquals(kill_mock.call_count, 1)