You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@apisix.apache.org by "Fabriceli (via GitHub)" <gi...@apache.org> on 2023/05/03 10:12:22 UTC

[GitHub] [apisix] Fabriceli commented on a diff in pull request #9204: fix(consul): support to fetch only health endpoint

Fabriceli commented on code in PR #9204:
URL: https://github.com/apache/apisix/pull/9204#discussion_r1183493411


##########
apisix/discovery/consul/init.lua:
##########
@@ -175,81 +188,291 @@ local function get_retry_delay(retry_delay)
     return retry_delay
 end
 
+local function get_opts(consul_server, is_catalog)
+    local opts = {
+        host = consul_server.host,
+        port = consul_server.port,
+        connect_timeout = consul_server.connect_timeout,
+        read_timeout = consul_server.read_timeout,
+    }
+    if consul_server.keepalive then
+        if is_catalog then
+            opts.default_args = {
+                wait = consul_server.wait_timeout, --blocked wait!=0; unblocked by wait=0
+                index = consul_server.catalog_index,
+            }
+        else
+            opts.default_args = {
+                wait = consul_server.wait_timeout, --blocked wait!=0; unblocked by wait=0
+                index = consul_server.health_index,
+            }
+        end
+    end
+
+    return opts
+end
+
+local function watch_catalog(consul_server)
+    local client = resty_consul:new(get_opts(consul_server, true))
+
+    ::RETRY::
+    local watch_result, watch_err = client:get(consul_server.consul_watch_catalog_url)
+    local watch_error_info = (watch_err ~= nil and watch_err)
+            or ((watch_result ~= nil and watch_result.status ~= 200)
+            and watch_result.status)
+    if watch_error_info then
+        log.error("connect consul: ", consul_server.consul_server_url,
+            " by sub url: ", consul_server.consul_watch_catalog_url,
+            ", got watch result: ", json_delay_encode(watch_result, true),
+            ", with error: ", watch_error_info)
+
+        return watch_type_catalog, default_catalog_error_index
+    end
+    if consul_server.catalog_index > 0
+            and consul_server.catalog_index == tonumber(watch_result.headers['X-Consul-Index']) then
+        local random_delay = math_random(default_random_seed)
+        log.info("watch catalog has no change, re-watch consul after ", random_delay, " seconds")
+        core_sleep(random_delay)
+        goto RETRY
+    end
+    return watch_type_catalog, watch_result.headers['X-Consul-Index']
+end
+
+local function watch_health(consul_server)
+    local client = resty_consul:new(get_opts(consul_server, false))
+
+    ::RETRY::
+    local watch_result, watch_err = client:get(consul_server.consul_watch_health_url)
+    local watch_error_info = (watch_err ~= nil and watch_err)
+            or ((watch_result ~= nil and watch_result.status ~= 200)
+            and watch_result.status)
+    if watch_error_info then
+        log.error("connect consul: ", consul_server.consul_server_url,
+            " by sub url: ", consul_server.consul_watch_health_url,
+            ", got watch result: ", json_delay_encode(watch_result, true),
+            ", with error: ", watch_error_info)
+
+        return watch_type_health, default_health_error_index
+    end
+    if consul_server.health_index > 0
+            and consul_server.health_index == tonumber(watch_result.headers['X-Consul-Index']) then
+        local random_delay = math_random(default_random_seed)
+        log.info("watch health has no change, re-watch consul after ", random_delay, " seconds")
+        core_sleep(random_delay)
+        goto RETRY
+    end
+    return watch_type_health, watch_result.headers['X-Consul-Index']
+end
+
+local function check_keepalive(consul_server, retry_delay)
+    if consul_server.keepalive then
+        local ok, err = ngx_timer_at(0, _M.connect, consul_server, retry_delay)
+        if not ok then
+            log.error("create ngx_timer_at got error: ", err)
+            return
+        end
+    end
+end
+
+local function update_index(consul_server, catalog_index, health_index)
+    local c_index = 0
+    local h_index = 0
+    if catalog_index ~= nil then
+        c_index = tonumber(catalog_index)
+    end
+
+    if health_index ~= nil then
+        h_index = tonumber(health_index)
+    end
+
+    if c_index > 0 then
+        consul_server.catalog_index = c_index
+    end
+
+    if h_index > 0 then
+        consul_server.health_index = h_index
+    end
+end
+
+local function is_not_empty(value)
+    if value == nil or value == null
+            or (type(value) == "table" and not next(value))
+            or (type(value) == "string" and value == "") then
+        return false
+    end
+
+    return true
+end
+
+local function watch_result_is_valid(watch_type, index, catalog_index, health_index)
+    if index <= 0 then
+        return false
+    end
+
+    if watch_type == watch_type_catalog then
+        if index == catalog_index then
+            return false
+        end
+    else
+        if index == health_index then
+            return false
+        end
+    end
+
+    return true
+end
 
 function _M.connect(premature, consul_server, retry_delay)
     if premature then
         return
     end
 
+    local catalog_thread, spawn_catalog_err = thread_spawn(watch_catalog, consul_server)
+    if not catalog_thread then
+        local random_delay = math_random(default_random_seed)
+        log.error("failed to spawn thread watch catalog: ", spawn_catalog_err,
+            ", retry connecting consul after ", random_delay, " seconds")
+        core_sleep(random_delay)
+
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+
+    local health_thread, err = thread_spawn(watch_health, consul_server)
+    if not health_thread then
+        thread_kill(catalog_thread)
+        local random_delay = math_random(default_random_seed)
+        log.error("failed to spawn thread watch health: ", err, ", retry connecting consul after ",
+            random_delay, " seconds")
+        core_sleep(random_delay)
+
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+
+    local thread_wait_ok, watch_type, index = thread_wait(catalog_thread, health_thread)
+    thread_kill(catalog_thread)
+    thread_kill(health_thread)
+    if not thread_wait_ok then
+        log.error("failed to wait thread: ", watch_type)
+        local random_delay = math_random(default_random_seed)
+        log.warn("failed to wait thread, retry connecting consul after ", random_delay, " seconds")
+        core_sleep(random_delay)
+
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+
+    -- double check index has changed
+    if not watch_result_is_valid(tonumber(watch_type),
+            tonumber(index), consul_server.catalog_index, consul_server.health_index) then
+
+        retry_delay = get_retry_delay(retry_delay)
+        log.warn("get all svcs got err, retry connecting consul after ", retry_delay, " seconds")
+        core_sleep(retry_delay)
+
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+
     local consul_client = resty_consul:new({
         host = consul_server.host,
         port = consul_server.port,
         connect_timeout = consul_server.connect_timeout,
         read_timeout = consul_server.read_timeout,
-        default_args = consul_server.default_args,
     })
+    local catalog_success, catalog_res, catalog_err = pcall(function()
+        return consul_client:get(consul_server.consul_watch_catalog_url)
+    end)
+    if not catalog_success then
+        log.error("connect consul: ", consul_server.consul_server_url,
+            " by sub url: ", consul_server.consul_watch_catalog_url,
+            ", got catalog result: ", json_delay_encode(catalog_res, true))
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+    local catalog_error_info = (catalog_err ~= nil and catalog_err)
+            or ((catalog_res ~= nil and catalog_res.status ~= 200)
+            and catalog_res.status)
+    if catalog_error_info then
+        log.error("connect consul: ", consul_server.consul_server_url,
+            " by sub url: ", consul_server.consul_watch_catalog_url,
+            ", got catalog result: ", json_delay_encode(catalog_res, true),
+            ", with error: ", catalog_error_info)
 
-    log.info("consul_server: ", json_delay_encode(consul_server, true))
-    local watch_result, watch_err = consul_client:get(consul_server.consul_watch_sub_url)
-    local watch_error_info = (watch_err ~= nil and watch_err)
-            or ((watch_result ~= nil and watch_result.status ~= 200)
-            and watch_result.status)
-    if watch_error_info then
+        retry_delay = get_retry_delay(retry_delay)
+        log.warn("get all svcs got err, retry connecting consul after ", retry_delay, " seconds")
+        core_sleep(retry_delay)
+
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+
+    -- get health index
+    local success, health_res, health_err = pcall(function()
+        return consul_client:get(consul_server.consul_watch_health_url)
+    end)
+    if not success then
         log.error("connect consul: ", consul_server.consul_server_url,
-            " by sub url: ", consul_server.consul_watch_sub_url,
-            ", got watch result: ", json_delay_encode(watch_result, true),
-            ", with error: ", watch_error_info)
+            " by sub url: ", consul_server.consul_watch_health_url,
+            ", got health result: ", json_delay_encode(health_res, true))
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+    local health_error_info = (health_err ~= nil and health_err)
+            or ((health_res ~= nil and health_res.status ~= 200)
+            and health_res.status)
+    if health_error_info then
+        log.error("connect consul: ", consul_server.consul_server_url,
+            " by sub url: ", consul_server.consul_watch_health_url,
+            ", got health result: ", json_delay_encode(health_res, true),
+            ", with error: ", health_error_info)
 
         retry_delay = get_retry_delay(retry_delay)
-        log.warn("retry connecting consul after ", retry_delay, " seconds")
+        log.warn("get all svcs got err, retry connecting consul after ", retry_delay, " seconds")
         core_sleep(retry_delay)
 
-        goto ERR
+        goto ERROR

Review Comment:
   done



##########
apisix/discovery/consul/init.lua:
##########
@@ -175,81 +188,291 @@ local function get_retry_delay(retry_delay)
     return retry_delay
 end
 
+local function get_opts(consul_server, is_catalog)
+    local opts = {
+        host = consul_server.host,
+        port = consul_server.port,
+        connect_timeout = consul_server.connect_timeout,
+        read_timeout = consul_server.read_timeout,
+    }
+    if consul_server.keepalive then
+        if is_catalog then
+            opts.default_args = {
+                wait = consul_server.wait_timeout, --blocked wait!=0; unblocked by wait=0
+                index = consul_server.catalog_index,
+            }
+        else
+            opts.default_args = {
+                wait = consul_server.wait_timeout, --blocked wait!=0; unblocked by wait=0
+                index = consul_server.health_index,
+            }
+        end
+    end
+
+    return opts
+end
+
+local function watch_catalog(consul_server)
+    local client = resty_consul:new(get_opts(consul_server, true))
+
+    ::RETRY::
+    local watch_result, watch_err = client:get(consul_server.consul_watch_catalog_url)
+    local watch_error_info = (watch_err ~= nil and watch_err)
+            or ((watch_result ~= nil and watch_result.status ~= 200)
+            and watch_result.status)
+    if watch_error_info then
+        log.error("connect consul: ", consul_server.consul_server_url,
+            " by sub url: ", consul_server.consul_watch_catalog_url,
+            ", got watch result: ", json_delay_encode(watch_result, true),
+            ", with error: ", watch_error_info)
+
+        return watch_type_catalog, default_catalog_error_index
+    end
+    if consul_server.catalog_index > 0
+            and consul_server.catalog_index == tonumber(watch_result.headers['X-Consul-Index']) then
+        local random_delay = math_random(default_random_seed)
+        log.info("watch catalog has no change, re-watch consul after ", random_delay, " seconds")
+        core_sleep(random_delay)
+        goto RETRY
+    end
+    return watch_type_catalog, watch_result.headers['X-Consul-Index']
+end
+
+local function watch_health(consul_server)
+    local client = resty_consul:new(get_opts(consul_server, false))
+
+    ::RETRY::
+    local watch_result, watch_err = client:get(consul_server.consul_watch_health_url)
+    local watch_error_info = (watch_err ~= nil and watch_err)
+            or ((watch_result ~= nil and watch_result.status ~= 200)
+            and watch_result.status)
+    if watch_error_info then
+        log.error("connect consul: ", consul_server.consul_server_url,
+            " by sub url: ", consul_server.consul_watch_health_url,
+            ", got watch result: ", json_delay_encode(watch_result, true),
+            ", with error: ", watch_error_info)
+
+        return watch_type_health, default_health_error_index
+    end
+    if consul_server.health_index > 0
+            and consul_server.health_index == tonumber(watch_result.headers['X-Consul-Index']) then
+        local random_delay = math_random(default_random_seed)
+        log.info("watch health has no change, re-watch consul after ", random_delay, " seconds")
+        core_sleep(random_delay)
+        goto RETRY
+    end
+    return watch_type_health, watch_result.headers['X-Consul-Index']
+end
+
+local function check_keepalive(consul_server, retry_delay)
+    if consul_server.keepalive then
+        local ok, err = ngx_timer_at(0, _M.connect, consul_server, retry_delay)
+        if not ok then
+            log.error("create ngx_timer_at got error: ", err)
+            return
+        end
+    end
+end
+
+local function update_index(consul_server, catalog_index, health_index)
+    local c_index = 0
+    local h_index = 0
+    if catalog_index ~= nil then
+        c_index = tonumber(catalog_index)
+    end
+
+    if health_index ~= nil then
+        h_index = tonumber(health_index)
+    end
+
+    if c_index > 0 then
+        consul_server.catalog_index = c_index
+    end
+
+    if h_index > 0 then
+        consul_server.health_index = h_index
+    end
+end
+
+local function is_not_empty(value)
+    if value == nil or value == null
+            or (type(value) == "table" and not next(value))
+            or (type(value) == "string" and value == "") then
+        return false
+    end
+
+    return true
+end
+
+local function watch_result_is_valid(watch_type, index, catalog_index, health_index)
+    if index <= 0 then
+        return false
+    end
+
+    if watch_type == watch_type_catalog then
+        if index == catalog_index then
+            return false
+        end
+    else
+        if index == health_index then
+            return false
+        end
+    end
+
+    return true
+end
 
 function _M.connect(premature, consul_server, retry_delay)
     if premature then
         return
     end
 
+    local catalog_thread, spawn_catalog_err = thread_spawn(watch_catalog, consul_server)
+    if not catalog_thread then
+        local random_delay = math_random(default_random_seed)
+        log.error("failed to spawn thread watch catalog: ", spawn_catalog_err,
+            ", retry connecting consul after ", random_delay, " seconds")
+        core_sleep(random_delay)
+
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+
+    local health_thread, err = thread_spawn(watch_health, consul_server)
+    if not health_thread then
+        thread_kill(catalog_thread)
+        local random_delay = math_random(default_random_seed)
+        log.error("failed to spawn thread watch health: ", err, ", retry connecting consul after ",
+            random_delay, " seconds")
+        core_sleep(random_delay)
+
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+
+    local thread_wait_ok, watch_type, index = thread_wait(catalog_thread, health_thread)
+    thread_kill(catalog_thread)
+    thread_kill(health_thread)
+    if not thread_wait_ok then
+        log.error("failed to wait thread: ", watch_type)
+        local random_delay = math_random(default_random_seed)
+        log.warn("failed to wait thread, retry connecting consul after ", random_delay, " seconds")
+        core_sleep(random_delay)
+
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+
+    -- double check index has changed
+    if not watch_result_is_valid(tonumber(watch_type),
+            tonumber(index), consul_server.catalog_index, consul_server.health_index) then
+
+        retry_delay = get_retry_delay(retry_delay)
+        log.warn("get all svcs got err, retry connecting consul after ", retry_delay, " seconds")
+        core_sleep(retry_delay)
+
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+
     local consul_client = resty_consul:new({
         host = consul_server.host,
         port = consul_server.port,
         connect_timeout = consul_server.connect_timeout,
         read_timeout = consul_server.read_timeout,
-        default_args = consul_server.default_args,
     })
+    local catalog_success, catalog_res, catalog_err = pcall(function()
+        return consul_client:get(consul_server.consul_watch_catalog_url)
+    end)
+    if not catalog_success then
+        log.error("connect consul: ", consul_server.consul_server_url,
+            " by sub url: ", consul_server.consul_watch_catalog_url,
+            ", got catalog result: ", json_delay_encode(catalog_res, true))
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+    local catalog_error_info = (catalog_err ~= nil and catalog_err)
+            or ((catalog_res ~= nil and catalog_res.status ~= 200)
+            and catalog_res.status)
+    if catalog_error_info then
+        log.error("connect consul: ", consul_server.consul_server_url,
+            " by sub url: ", consul_server.consul_watch_catalog_url,
+            ", got catalog result: ", json_delay_encode(catalog_res, true),
+            ", with error: ", catalog_error_info)
 
-    log.info("consul_server: ", json_delay_encode(consul_server, true))
-    local watch_result, watch_err = consul_client:get(consul_server.consul_watch_sub_url)
-    local watch_error_info = (watch_err ~= nil and watch_err)
-            or ((watch_result ~= nil and watch_result.status ~= 200)
-            and watch_result.status)
-    if watch_error_info then
+        retry_delay = get_retry_delay(retry_delay)
+        log.warn("get all svcs got err, retry connecting consul after ", retry_delay, " seconds")
+        core_sleep(retry_delay)
+
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+
+    -- get health index
+    local success, health_res, health_err = pcall(function()
+        return consul_client:get(consul_server.consul_watch_health_url)
+    end)
+    if not success then
         log.error("connect consul: ", consul_server.consul_server_url,
-            " by sub url: ", consul_server.consul_watch_sub_url,
-            ", got watch result: ", json_delay_encode(watch_result, true),
-            ", with error: ", watch_error_info)
+            " by sub url: ", consul_server.consul_watch_health_url,
+            ", got health result: ", json_delay_encode(health_res, true))
+        check_keepalive(consul_server, retry_delay)
+        return
+    end
+    local health_error_info = (health_err ~= nil and health_err)
+            or ((health_res ~= nil and health_res.status ~= 200)
+            and health_res.status)
+    if health_error_info then
+        log.error("connect consul: ", consul_server.consul_server_url,
+            " by sub url: ", consul_server.consul_watch_health_url,
+            ", got health result: ", json_delay_encode(health_res, true),
+            ", with error: ", health_error_info)
 
         retry_delay = get_retry_delay(retry_delay)
-        log.warn("retry connecting consul after ", retry_delay, " seconds")
+        log.warn("get all svcs got err, retry connecting consul after ", retry_delay, " seconds")
         core_sleep(retry_delay)
 
-        goto ERR
+        goto ERROR
     end
 
     log.info("connect consul: ", consul_server.consul_server_url,
-        ", watch_result status: ", watch_result.status,
-        ", watch_result.headers.index: ", watch_result.headers['X-Consul-Index'],
+        ", catalog_result status: ", catalog_res.status,
+        ", catalog_result.headers.index: ", catalog_res.headers['X-Consul-Index'],
         ", consul_server.index: ", consul_server.index,
         ", consul_server: ", json_delay_encode(consul_server, true))
 
     -- if current index different last index then update service
-    if consul_server.index ~= watch_result.headers['X-Consul-Index'] then
-        local up_services = core.table.new(0, #watch_result.body)
-        local consul_client_svc = resty_consul:new({
-            host = consul_server.host,
-            port = consul_server.port,
-            connect_timeout = consul_server.connect_timeout,
-            read_timeout = consul_server.read_timeout,
-        })
-        for service_name, _ in pairs(watch_result.body) do
+    if (consul_server.catalog_index ~= tonumber(catalog_res.headers['X-Consul-Index']))
+            or (consul_server.health_index ~= tonumber(health_res.headers['X-Consul-Index'])) then
+        local up_services = core.table.new(0, #catalog_res.body)
+        for service_name, _ in pairs(catalog_res.body) do
             -- check if the service_name is 'skip service'
             if skip_service_map[service_name] then
                 goto CONTINUE
             end
             -- get node from service
             local svc_url = consul_server.consul_sub_url .. "/" .. service_name
-            local result, err = consul_client_svc:get(svc_url)
-            local error_info = (err ~= nil and err) or
+            local svc_success, result, get_err = pcall(function()
+                return consul_client:get(svc_url, {passing = true})
+            end)
+            local error_info = (get_err ~= nil and get_err) or
                     ((result ~= nil and result.status ~= 200) and result.status)
-            if error_info then
+            if not svc_success or error_info then
                 log.error("connect consul: ", consul_server.consul_server_url,
                     ", by service url: ", svc_url, ", with error: ", error_info)
                 goto CONTINUE
             end
 
             -- decode body, decode json, update service, error handling
-            if result.body then
-                log.notice("service url: ", svc_url,
-                    ", header: ", json_delay_encode(result.headers, true),
-                    ", body: ", json_delay_encode(result.body, true))
+            -- check result body is not nil and not empty
+            if is_not_empty(result.body) then
                 -- add services to table
                 local nodes = up_services[service_name]
-                for  _, node in ipairs(result.body) do
-                    local svc_address, svc_port = node.ServiceAddress, node.ServicePort
-                    if not svc_address then
-                        svc_address = node.Address
+                for _, node in ipairs(result.body) do
+                    if not node.Service then
+                        goto CONTINUE
                     end
+                    local svc_address, svc_port = node.Service.Address, node.Service.Port

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@apisix.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org