You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by rl...@apache.org on 2017/06/08 19:22:28 UTC
[14/50] [abbrv] ambari git commit: AMBARI-21142. Log more info about
heartbeat message/response when server - agent communication gets out of
sync. (stoader)
AMBARI-21142. Log more info about heartbeat message/response when server - agent communication gets out of sync. (stoader)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/b7101f78
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/b7101f78
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/b7101f78
Branch: refs/heads/branch-feature-AMBARI-20859
Commit: b7101f782be9a1291de589262f01083c70dfc935
Parents: c3c06ea
Author: Toader, Sebastian <st...@hortonworks.com>
Authored: Fri Jun 2 23:09:56 2017 +0200
Committer: Toader, Sebastian <st...@hortonworks.com>
Committed: Fri Jun 2 23:12:46 2017 +0200
----------------------------------------------------------------------
.../src/main/python/ambari_agent/Controller.py | 6 +++++-
.../ambari/server/agent/HeartBeatHandler.java | 18 ++++++++++++++----
2 files changed, 19 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/b7101f78/ambari-agent/src/main/python/ambari_agent/Controller.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/Controller.py b/ambari-agent/src/main/python/ambari_agent/Controller.py
index 0297f74..bc923c3 100644
--- a/ambari-agent/src/main/python/ambari_agent/Controller.py
+++ b/ambari-agent/src/main/python/ambari_agent/Controller.py
@@ -321,6 +321,7 @@ class Controller(threading.Thread):
logger.log(logging_level, "Sending Heartbeat (id = %s)", self.responseId)
response = self.sendRequest(self.heartbeatUrl, data)
+
exitStatus = 0
if 'exitstatus' in response.keys():
exitStatus = int(response['exitstatus'])
@@ -366,7 +367,9 @@ class Controller(threading.Thread):
self.restartAgent()
if serverId != self.responseId + 1:
- logger.error("Error in responseId sequence - restarting")
+ logger.error("Error in responseId sequence - received responseId={0} from server while expecting {1} - restarting..."
+ .format(serverId, self.responseId + 1))
+
self.restartAgent()
else:
self.responseId = serverId
@@ -465,6 +468,7 @@ class Controller(threading.Thread):
#randomize the heartbeat
delay = randint(0, self.max_reconnect_retry_delay)
+ logger.info("Waiting {0} seconds before reconnecting to {1}".format(delay, self.heartbeatUrl))
time.sleep(delay)
# Sleep for some time
http://git-wip-us.apache.org/repos/asf/ambari/blob/b7101f78/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index d800bc5..fc6e7a7 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -161,10 +161,20 @@ public class HeartBeatHandler {
+ ", receivedResponseId=" + heartbeat.getResponseId());
if (heartbeat.getResponseId() == currentResponseId - 1) {
- LOG.warn("Old responseId received - response was lost - returning cached response");
- return hostResponses.get(hostname);
+ HeartBeatResponse heartBeatResponse = hostResponses.get(hostname);
+
+ LOG.warn("Old responseId={} received form host {} - response was lost - returning cached response with responseId={}",
+ heartbeat.getResponseId(),
+ hostname,
+ heartBeatResponse.getResponseId());
+
+ return heartBeatResponse;
} else if (heartbeat.getResponseId() != currentResponseId) {
- LOG.error("Error in responseId sequence - sending agent restart command");
+ LOG.error("Error in responseId sequence - received responseId={} from host {} - sending agent restart command with responseId={}",
+ heartbeat.getResponseId(),
+ hostname,
+ currentResponseId);
+
return createRestartCommand(currentResponseId);
}
@@ -186,7 +196,7 @@ public class HeartBeatHandler {
if (hostObject.getState().equals(HostState.HEARTBEAT_LOST)) {
// After loosing heartbeat agent should reregister
- LOG.warn("Host is in HEARTBEAT_LOST state - sending register command");
+ LOG.warn("Host {} is in HEARTBEAT_LOST state - sending register command", hostname);
return createRegisterCommand();
}