You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@apisix.apache.org by me...@apache.org on 2021/02/27 14:17:29 UTC
[apisix] branch master updated: fix(chash): ensure retry can try
every node (#3651)
This is an automated email from the ASF dual-hosted git repository.
membphis pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix.git
The following commit(s) were added to refs/heads/master by this push:
new e58a55f fix(chash): ensure retry can try every node (#3651)
e58a55f is described below
commit e58a55fbc6e5ad2007ff206ac44096945c869390
Author: 罗泽轩 <sp...@gmail.com>
AuthorDate: Sat Feb 27 22:17:22 2021 +0800
fix(chash): ensure retry can try every node (#3651)
Previously the default number of retry is equal to the number of node,
but the same node will be tried again according to its weight.
Also ensure the same picker will be used in the whole request,
especially during the retry.
---
apisix/balancer.lua | 8 +++++--
apisix/balancer/chash.lua | 39 ++++++++++++++++++++++++++++--
t/admin/balancer.t | 2 ++
t/node/chash-balance.t | 60 +++++++++++++++++++++++++++++++++++++++++++++++
t/node/healthcheck.t | 2 +-
5 files changed, 106 insertions(+), 5 deletions(-)
diff --git a/apisix/balancer.lua b/apisix/balancer.lua
index 2540f2f..4be56fc 100644
--- a/apisix/balancer.lua
+++ b/apisix/balancer.lua
@@ -161,8 +161,12 @@ local function pick_server(route, ctx)
version = version .. "#" .. checker.status_ver
end
- local server_picker = lrucache_server_picker(key, version,
- create_server_picker, up_conf, checker)
+ -- the same picker will be used in the whole request, especially during the retry
+ local server_picker = ctx.server_picker
+ if not server_picker then
+ server_picker = lrucache_server_picker(key, version,
+ create_server_picker, up_conf, checker)
+ end
if not server_picker then
return nil, "failed to fetch server picker"
end
diff --git a/apisix/balancer/chash.lua b/apisix/balancer/chash.lua
index df1568a..f9dbdbb 100644
--- a/apisix/balancer/chash.lua
+++ b/apisix/balancer/chash.lua
@@ -22,6 +22,9 @@ local str_gsub = string.gsub
local pairs = pairs
+local CONSISTENT_POINTS = 160 -- points per server, taken from `resty.chash`
+
+
local _M = {}
@@ -62,27 +65,59 @@ end
function _M.new(up_nodes, upstream)
local str_null = str_char(0)
+ local nodes_count = 0
+ local safe_limit = 0
local servers, nodes = {}, {}
for serv, weight in pairs(up_nodes) do
local id = str_gsub(serv, ":", str_null)
+ nodes_count = nodes_count + 1
+ safe_limit = safe_limit + weight
servers[id] = serv
nodes[id] = weight
end
+ safe_limit = safe_limit * CONSISTENT_POINTS
local picker = resty_chash:new(nodes)
return {
upstream = upstream,
get = function (ctx)
local id
- if ctx.balancer_try_count > 1 and ctx.chash_last_server_index then
- id, ctx.chash_last_server_index = picker:next(ctx.chash_last_server_index)
+ if ctx.balancer_tried_servers then
+ if ctx.balancer_tried_servers_count == nodes_count then
+ return nil, "all upstream servers tried"
+ end
+
+ -- the 'safe_limit' is a best effort limit to prevent infinite loop caused by bug
+ for i = 1, safe_limit do
+ id, ctx.chash_last_server_index = picker:next(ctx.chash_last_server_index)
+ if not ctx.balancer_tried_servers[servers[id]] then
+ break
+ end
+ end
else
local chash_key = fetch_chash_hash_key(ctx, upstream)
id, ctx.chash_last_server_index = picker:find(chash_key)
end
-- core.log.warn("chash id: ", id, " val: ", servers[id])
return servers[id]
+ end,
+ after_balance = function (ctx, before_retry)
+ if not before_retry then
+ if ctx.balancer_tried_servers then
+ core.tablepool.release("balancer_tried_servers", ctx.balancer_tried_servers)
+ ctx.balancer_tried_servers = nil
+ end
+
+ return nil
+ end
+
+ if not ctx.balancer_tried_servers then
+ ctx.balancer_tried_servers = core.tablepool.fetch("balancer_tried_servers", 0, 2)
+ end
+
+ ctx.balancer_tried_servers[ctx.balancer_server] = true
+ ctx.balancer_tried_servers_count = (ctx.balancer_tried_servers_count or 0) + 1
end
}
end
diff --git a/t/admin/balancer.t b/t/admin/balancer.t
index b9a76c5..d1b9027 100644
--- a/t/admin/balancer.t
+++ b/t/admin/balancer.t
@@ -52,6 +52,8 @@ add_block_preprocessor(sub {
for _, key in ipairs(keys) do
ngx.say("host: ", key, " count: ", res[key])
end
+
+ ctx.server_picker = nil
end
_EOC_
$block->set_value("init_by_lua_block", $init_by_lua_block);
diff --git a/t/node/chash-balance.t b/t/node/chash-balance.t
index 02eee5f..dfde2aa 100644
--- a/t/node/chash-balance.t
+++ b/t/node/chash-balance.t
@@ -496,3 +496,63 @@ GET /t
--- error_code_like: ^(?:50\d)$
--- error_log
failed to find valid upstream server, no valid upstream node
+
+
+
+=== TEST 13: set route(ensure retry can try every node)
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/server_port",
+ "upstream": {
+ "key": "arg_device_id",
+ "type": "chash",
+ "nodes": {
+ "127.0.0.1:1979": 1000,
+ "127.0.0.1:1980": 1
+ }
+ }
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- request
+GET /t
+--- response_body
+passed
+--- no_error_log
+[error]
+
+
+
+=== TEST 14: hit routes
+--- config
+ location /t {
+ content_by_lua_block {
+ local http = require "resty.http"
+ local uri = "http://127.0.0.1:" .. ngx.var.server_port
+ .. "/server_port?device_id=1"
+
+ local httpc = http.new()
+ local res, err = httpc:request_uri(uri, {method = "GET"})
+ if not res then
+ ngx.say(err)
+ return
+ end
+
+ ngx.say(res.status)
+ }
+ }
+--- request
+GET /t
+--- response_body
+200
diff --git a/t/node/healthcheck.t b/t/node/healthcheck.t
index d9bacb8..9f77a43 100644
--- a/t/node/healthcheck.t
+++ b/t/node/healthcheck.t
@@ -488,7 +488,7 @@ qr{.*http://127.0.0.1:1960/server_port.*
.*http://127.0.0.1:1961/server_port.*
.*http://127.0.0.1:1961/server_port.*
.*http://127.0.0.1:1961/server_port.*
-.*http://127.0.0.1:1960/server_port.*}
+.*http://127.0.0.1:1961/server_port.*}
--- timeout: 10