You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by st...@apache.org on 2017/06/02 21:10:13 UTC
ambari git commit: AMBARI-21142. Log more info about heartbeat
message/response when server - agent communication gets out of sync.
(stoader)
Repository: ambari
Updated Branches:
refs/heads/branch-2.5 4418358f8 -> 91a7d0efa
AMBARI-21142. Log more info about heartbeat message/response when server - agent communication gets out of sync. (stoader)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/91a7d0ef
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/91a7d0ef
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/91a7d0ef
Branch: refs/heads/branch-2.5
Commit: 91a7d0efadd1522a9f736a1c8006f47457bef9af
Parents: 4418358
Author: Toader, Sebastian <st...@hortonworks.com>
Authored: Fri Jun 2 23:09:56 2017 +0200
Committer: Toader, Sebastian <st...@hortonworks.com>
Committed: Fri Jun 2 23:09:56 2017 +0200
----------------------------------------------------------------------
.../src/main/python/ambari_agent/Controller.py | 6 +++++-
.../ambari/server/agent/HeartBeatHandler.java | 18 ++++++++++++++----
2 files changed, 19 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/91a7d0ef/ambari-agent/src/main/python/ambari_agent/Controller.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/Controller.py b/ambari-agent/src/main/python/ambari_agent/Controller.py
index 83f1da8..5fab595 100644
--- a/ambari-agent/src/main/python/ambari_agent/Controller.py
+++ b/ambari-agent/src/main/python/ambari_agent/Controller.py
@@ -320,6 +320,7 @@ class Controller(threading.Thread):
logger.log(logging_level, "Sending Heartbeat (id = %s)", self.responseId)
response = self.sendRequest(self.heartbeatUrl, data)
+
exitStatus = 0
if 'exitstatus' in response.keys():
exitStatus = int(response['exitstatus'])
@@ -365,7 +366,9 @@ class Controller(threading.Thread):
self.restartAgent()
if serverId != self.responseId + 1:
- logger.error("Error in responseId sequence - restarting")
+ logger.error("Error in responseId sequence - received responseId={0} from server while expecting {1} - restarting..."
+ .format(serverId, self.responseId + 1))
+
self.restartAgent()
else:
self.responseId = serverId
@@ -464,6 +467,7 @@ class Controller(threading.Thread):
#randomize the heartbeat
delay = randint(0, self.max_reconnect_retry_delay)
+ logger.info("Waiting {0} seconds before reconnecting to {1}".format(delay, self.heartbeatUrl))
time.sleep(delay)
# Sleep for some time
http://git-wip-us.apache.org/repos/asf/ambari/blob/91a7d0ef/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index 6b93462..fd43de5 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -207,10 +207,20 @@ public class HeartBeatHandler {
+ ", receivedResponseId=" + heartbeat.getResponseId());
if (heartbeat.getResponseId() == currentResponseId - 1) {
- LOG.warn("Old responseId received - response was lost - returning cached response");
- return hostResponses.get(hostname);
+ HeartBeatResponse heartBeatResponse = hostResponses.get(hostname);
+
+ LOG.warn("Old responseId={} received form host {} - response was lost - returning cached response with responseId={}",
+ heartbeat.getResponseId(),
+ hostname,
+ heartBeatResponse.getResponseId());
+
+ return heartBeatResponse;
} else if (heartbeat.getResponseId() != currentResponseId) {
- LOG.error("Error in responseId sequence - sending agent restart command");
+ LOG.error("Error in responseId sequence - received responseId={} from host {} - sending agent restart command with responseId={}",
+ heartbeat.getResponseId(),
+ hostname,
+ currentResponseId);
+
return createRestartCommand(currentResponseId);
}
@@ -232,7 +242,7 @@ public class HeartBeatHandler {
if (hostObject.getState().equals(HostState.HEARTBEAT_LOST)) {
// After loosing heartbeat agent should reregister
- LOG.warn("Host is in HEARTBEAT_LOST state - sending register command");
+ LOG.warn("Host {} is in HEARTBEAT_LOST state - sending register command", hostname);
return createRegisterCommand();
}