You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by zw...@apache.org on 2021/03/29 20:07:54 UTC

[trafficserver] branch 9.1.x updated: Add failed state to hostdb to better track failing origins (#7291)

This is an automated email from the ASF dual-hosted git repository.

zwoop pushed a commit to branch 9.1.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/9.1.x by this push:
     new 280d4ea  Add failed state to hostdb to better track failing origins (#7291)
280d4ea is described below

commit 280d4eac94d3439c4e37219ca6347e658430f971
Author: Susan Hinrichs <sh...@verizonmedia.com>
AuthorDate: Thu Mar 25 11:47:00 2021 -0500

    Add failed state to hostdb to better track failing origins (#7291)
    
    (cherry picked from commit 660bd3b2e9a252a319f67ccd62bae91b176844e0)
---
 proxy/http/HttpSM.cc       | 47 ++++++++++++++++++++++++++++------------------
 proxy/http/HttpTransact.cc |  1 +
 2 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/proxy/http/HttpSM.cc b/proxy/http/HttpSM.cc
index 8780443..b9babb2 100644
--- a/proxy/http/HttpSM.cc
+++ b/proxy/http/HttpSM.cc
@@ -4312,6 +4312,7 @@ HttpSM::do_hostdb_update_if_necessary()
   } else {
     if (t_state.host_db_info.app.http_data.last_failure != 0) {
       t_state.host_db_info.app.http_data.last_failure = 0;
+      t_state.host_db_info.app.http_data.fail_count   = 0;
       issue_update |= 1;
       char addrbuf[INET6_ADDRPORTSTRLEN];
       SMDebug("http", "[%" PRId64 "] hostdb update marking IP: %s as up", sm_id,
@@ -5396,30 +5397,40 @@ HttpSM::mark_host_failure(HostDBInfo *info, time_t time_down)
 {
   char addrbuf[INET6_ADDRPORTSTRLEN];
 
-  if (info->app.http_data.last_failure == 0) {
-    char *url_str = t_state.hdr_info.client_request.url_string_get(&t_state.arena, nullptr);
-    Log::error("%s", lbw()
-                       .clip(1)
-                       .print("CONNECT Error: {} connecting to {} for '{}' (setting last failure time)",
-                              ts::bwf::Errno(t_state.current.server->connect_result), t_state.current.server->dst_addr,
-                              ts::bwf::FirstOf(url_str, "<none>"))
-                       .extend(1)
-                       .write('\0')
-                       .data());
-
-    if (url_str) {
-      t_state.arena.str_free(url_str);
+  if (time_down) {
+    // Increment the fail_count
+    ++info->app.http_data.fail_count;
+    if (info->app.http_data.fail_count >= t_state.txn_conf->connect_attempts_rr_retries) {
+      if (info->app.http_data.last_failure == 0) {
+        char *url_str = t_state.hdr_info.client_request.url_string_get(&t_state.arena, nullptr);
+        Log::error("%s", lbw()
+                           .clip(1)
+                           .print("CONNECT Error: {} connecting to {} for '{}' (setting last failure time)",
+                                  ts::bwf::Errno(t_state.current.server->connect_result), t_state.current.server->dst_addr,
+                                  ts::bwf::FirstOf(url_str, "<none>"))
+                           .extend(1)
+                           .write('\0')
+                           .data());
+
+        if (url_str) {
+          t_state.arena.str_free(url_str);
+        }
+      }
+      info->app.http_data.last_failure = time_down;
+      SMDebug("http", "[%" PRId64 "] hostdb update marking IP: %s as down", sm_id,
+              ats_ip_nptop(&t_state.current.server->dst_addr.sa, addrbuf, sizeof(addrbuf)));
+    } else {
+      SMDebug("http", "[%" PRId64 "] hostdb increment IP failcount %s to %d", sm_id,
+              ats_ip_nptop(&t_state.current.server->dst_addr.sa, addrbuf, sizeof(addrbuf)), info->app.http_data.fail_count);
     }
+  } else { // Clear the failure
+    info->app.http_data.fail_count   = 0;
+    info->app.http_data.last_failure = time_down;
   }
 
-  info->app.http_data.last_failure = time_down;
-
 #ifdef DEBUG
   ink_assert(ink_local_time() + t_state.txn_conf->down_server_timeout > time_down);
 #endif
-
-  SMDebug("http", "[%" PRId64 "] hostdb update marking IP: %s as down", sm_id,
-          ats_ip_nptop(&t_state.current.server->dst_addr.sa, addrbuf, sizeof(addrbuf)));
 }
 
 void
diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc
index a8bfd23..2e5e5c9 100644
--- a/proxy/http/HttpTransact.cc
+++ b/proxy/http/HttpTransact.cc
@@ -469,6 +469,7 @@ HttpTransact::is_server_negative_cached(State *s)
     //   down to 2*down_server_timeout
     if (s->client_request_time + s->txn_conf->down_server_timeout < s->host_db_info.app.http_data.last_failure) {
       s->host_db_info.app.http_data.last_failure = 0;
+      s->host_db_info.app.http_data.fail_count   = 0;
       ink_assert(!"extreme clock skew");
       return true;
     }