You are viewing a plain text version of this content. The canonical link for it is here.
Posted to site-dev@apache.org by sebb <se...@gmail.com> on 2018/02/20 12:00:09 UTC

Re: svn commit: r1824832 - /infrastructure/site/trunk/content/dyn/dev_closer.lua

On 20 February 2018 at 06:47,  <he...@apache.org> wrote:
> Author: henkp
> Date: Tue Feb 20 06:47:17 2018
> New Revision: 1824832
>
> URL: http://svn.apache.org/viewvc?rev=1824832&view=rev
> Log:
> += dev_closer.lua
>
> Added:
>     infrastructure/site/trunk/content/dyn/dev_closer.lua   (with props)
>
> Added: infrastructure/site/trunk/content/dyn/dev_closer.lua
> URL: http://svn.apache.org/viewvc/infrastructure/site/trunk/content/dyn/dev_closer.lua?rev=1824832&view=auto
> ==============================================================================
> --- infrastructure/site/trunk/content/dyn/dev_closer.lua (added)
> +++ infrastructure/site/trunk/content/dyn/dev_closer.lua Tue Feb 20 06:47:17 2018
> @@ -0,0 +1,408 @@
> +--[[
> +
> +  This is a LUA CGI script that uses LibEZT to produce templated mirror content
> +
> +  It uses the output from the Apache GeoIP module to choose the appropriate mirror(s)
> +
> +  The script supports the following optional URL parameters:
> +  cca2 - override the country code
> +  preferred/Preferred - sets the preferred server if available, otherwise it is chosen at random
> +  as_json/asjson - don't process the template, but return the mirror data as JSON
> +  action=download together with filename - generate a redirect to the file on the preferred mirror
> +
> +]]
> +
> +-- version number of this file (automatically generated by SVN)
> +local VERSION = ("$Revision: 1820301 $"):match("(%d+)")
> +
> +function version()
> +    return VERSION
> +end
> +
> +local CACHE_TIMEOUT = 1800 -- should be 0 in test ; 1800 in production
> +local LOG_LOOKUPS   = 1    -- should be 1 in test ; 0 in production
> +
> +local JSON = require 'JSON'
> +local ezt = require 'libezt'
> +local lfs = require 'lfs'
> +local http = require 'socket.http' ; http.TIMEOUT = 1

http is re-used as a variable below.
Try HTTP here instead.

> +local mirror_file = "/x1/www/www.apache.org/mirrors/mirrors.list"
> +local MAXAGE = 24*3600 -- max mirror age
> +local PID = nil
> +local ATTIC_URI = 'http://attic.apache.org/projects/'
> +local ATTIC_DIR = '/x1/www/attic.apache.org/projects/'
> +local DIST_DIR  = '/x1/www/www.apache.org/dist/'
> +local ARCH_URI  = 'http://archive.apache.org/dist/'
> +local cache_hit = nil
> +local cache_in_arch = {} -- cache of archive lookups
> +local mirror_stamp = 0 -- when mirror_file was last processed
> +local mirror_map = {} -- map of all recent mirrors. [ftp|http|rsync][cc|backup]=url
> +local mirror_templates = {} -- cache of unprocessed mirror templates
> +local mirror_templates_generated = {} -- cache of generated templates
> +local mymap -- map of mirrors for the current request (based on the country code
> +
> +function get_mirrors()
> +    local now = os.time()
> +    local atleast = now - MAXAGE
> +    local f = io.open(mirror_file, "r")
> +    local mirrord = f:read("*a")
> +
> +    -- Check the age of the mirrors relative to the mirror list, rather than now. (As was done by mirrors.cgi)
> +    -- This allows the system to still work even if the list is a bit stale
> +    -- LUA does not have a standard API to get a file date
> +    -- However, the timestamp when the information was collected is more useful anyway
> +    -- Parse the file header: # date : Wed Sep  2 09:49:53 2015 [UTC]
> +    local mon, day, hh, mm, ss, yy = mirrord:match("# date : %w+ (%w+) +(%d+) (%d%d):(%d%d):(%d%d) (%d%d%d%d) %[UTC%]")
> +    if mon then
> +        local MON = {Jan=1,Feb=2,Mar=3,Apr=4,May=5,Jun=6,Jul=7,Aug=8,Sep=9,Oct=10,Nov=11,Dec=12}
> +        -- use isdst = false as the timestamp is UTC
> +        local filetime = os.time({year = yy, month = MON[mon], day = day, hour = hh, min = mm, sec = ss, isdst=false})
> +        atleast = filetime - MAXAGE
> +    end
> +
> +    mirror_map = {}
> +    f:close()
> +    for t, c, url, timestamp in mirrord:gmatch("([a-zA-Z]+)%s+([a-zA-Z]+)%s+(%S+)%s+(%d+)\r?\n") do
> +        if c then
> +            c = c:lower()
> +            -- Don't check the timestamp for backup mirrors
> +            if c == 'backup' or tonumber(timestamp) >= atleast then
> +                mirror_map[c] = mirror_map[c] or {}
> +                mirror_map[c][t] = mirror_map[c][t] or {}
> +                --url = url:gsub("/$", "")
> +                table.insert(mirror_map[c][t], url)
> +            end
> +        end
> +    end
> +    mirror_stamp = now
> +    return mirror_map
> +end
> +
> +function setpid()
> +  local f = io.open('/proc/self/stat')
> +  if f then
> +    local line = f:read()
> +    f:close()
> +    PID = line:sub(0,line:find(' ')-1)
> +  end
> +end
> +
> +function log_lookup(inarch,path)
> +  local f = io.open("/www/www.apache.org/dyn/stats/AAAA",'a')
> +  if f then
> +    if PID == nil then setpid() end
> +    f:write(os.date('%Y-%m-%d/%H:%M:%S')
> +      .. " [" .. ( PID or 'pid' ) .. ']'
> +      .. " look=" .. tostring(inarch)
> +      .. " hit="  .. tostring(cache_hit)
> +      .. ' ' .. path
> +      .. "\n"
> +      )
> +    f:close()
> +    end
> +end
> +
> +function is_in_attic(proj)
> +  return lfs.attributes(ATTIC_DIR .. proj .. '.html') ~= nil
> +end
> +function dist_path(path)  return DIST_DIR .. path end
> +function is_in_dist(path) return lfs.attributes(dist_path(path))~=nil end
> +function arch_uri(path)   return ARCH_URI .. path end
> +
> +function archive_url(path)
> +  local uri = arch_uri(path)
> +  return '<a href="' .. uri .. '" rel="nofollow">' .. uri .. '</a>'
> +end
> +
> +function is_in_arch(path)
> +  cache_hit = true
> +  if not cache_in_arch[path]
> +     or ( not cache_in_arch[path].result
> +          and cache_in_arch[path].timestamp < ( os.time() - CACHE_TIMEOUT )
> +        )
> +  then
> +    cache_hit = false
> +    r, c, h = http.request { method = "HEAD", url = arch_uri(path) }
> +    cache_in_arch[path] =
> +      { timestamp = os.time(), result = ( r and c == 200 ) }
> +  end
> +  return cache_in_arch[path].result
> +end
> +
> +function get_page(url)
> +    if not mirror_templates[url] or mirror_templates[url].timestamp < (os.time() - 2*CACHE_TIMEOUT) then
> +        local f = io.open(url, "r")
> +        mirror_templates[url] = {
> +            data = f and f:read("*a") or "No such page",
> +            timestamp = os.time()
> +        }
> +        if f then
> +            f:close()
> +        end
> +    end
> +    return mirror_templates[url]
> +end
> +
> +function get_output_cached(page, defs, r, ezt_defs)
> +    local pref = defs.preferred or ""
> +    local path_info = defs.path_info or ""
> +    local cacheKey = page .. ":" .. pref .. ":" .. path_info
> +    if not mirror_templates_generated[cacheKey] or mirror_templates_generated[cacheKey].timestamp < (os.time() - CACHE_TIMEOUT) then
> +        local template = get_page(page)
> +        local tdata = recurse(defs, template.data, r, ezt_defs)
> +        mirror_templates_generated[cacheKey] = {
> +            data = tdata,
> +            timestamp = os.time()
> +        }
> +    end
> +    return mirror_templates_generated[cacheKey]
> +end
> +
> +function recurse(defs, tdata, r, ezt_defs)
> +    -- SSI emulation
> +    tdata = tdata:gsub("<!%-%-%s*#include virtual=\"(.-)\"%s*%-%->",
> +        function(inc)
> +            local filepath = (defs.filepath .. inc):gsub("[/]+", "/")
> +            if r:stat(filepath) then
> +                local f = io.open(filepath, "r")
> +                local d = f:read("*a")
> +                f:close()
> +                return d
> +            else
> +                return ""
> +            end
> +        end
> +    )
> +
> +    -- Parse EZT
> +    local structure, error = ezt:import("[ezt]"..tdata.."[end]")
> +
> +    -- Render output
> +    if structure then return ezt:construct(structure, ezt_defs) else return error end
> +end
> +
> +-- true if the string (s) ends with (e)
> +function endsWith(s, e)
> +    return e == s:sub(-e:len())
> +end
> +
> +-- true if the string (s) begins with (b)
> +function beginsWith(s, b)
> +    return b == s:sub(1, b:len())
> +end
> +
> +-- return false if string is empty (or nil)
> +function nonEmpty(s)
> +    if s == null or s == '' then return nil end
> +    return s
> +end
> +
> +-- Temporary fix to extract the missing path_info for dyn/closer.cgi redirects only
> +function get_path_info(s)
> +    local CGI_SCRIPT = "/dyn/closer.cgi/" -- original CGI script name
> +    if beginsWith(s, CGI_SCRIPT) then
> +        return s:sub(CGI_SCRIPT:len()) -- keep just the suffix
> +    else
> +        return nil
> +    end
> +end
> +
> +-- The request parameter has the data structures and functions as described here:
> +-- http://httpd.apache.org/docs/trunk/mod/mod_lua.html#datastructures
> +-- http://httpd.apache.org/docs/trunk/mod/mod_lua.html#functions
> +
> +function handle(r)
> +    local get = r:parseargs()
> +
> +    local now = os.time()
> +    if mirror_stamp < (now - 3600) then
> +        get_mirrors()
> +    end
> +    local country = r.notes['GEOIP_COUNTRY_NAME'] or r.subprocess_env['GEOIP_COUNTRY_NAME'] or "Unknown"
> +    local cca2 = (get.cca2 or r.notes['GEOIP_COUNTRY_CODE'] or r.subprocess_env['GEOIP_COUNTRY_CODE'] or 'Backup'):lower()
> +    if cca2 == 'gb' then
> +        cca2 = 'uk'
> +    end
> +    local occa2 = cca2
> +    if not mirror_map[cca2] then
> +        cca2 = 'backup'
> +    end
> +    mymap = mirror_map[cca2] or mirror_map['backup']
> +    local bmap = mirror_map['backup']
> +    mymap['backup'] = bmap['http']
> +    local URL = {}
> +    for _, t in pairs({'http','ftp'}) do
> +        URL[t] = (mymap[t] and mymap[t][math.random(1, #mymap[t])]) or (bmap[t] and bmap[t][math.random(1, #bmap[t])])
> +    end
> +    local page = r.filename
> +    local got_f = get.f -- work on a copy of the parameter
> +    if got_f then
> +        local hname = r.hostname:gsub("www%.", "")
> +        got_f = got_f:gsub("^/var/www/html/", "/var/www/")
> +        got_f = got_f:gsub(hname, ""):gsub("/var/www//var/www/", "/var/www/")
> +        if r:stat(got_f) or r:stat(got_f:gsub("%.cgi", ".html"))  then
> +            page = got_f
> +        else
> +            page = got_f:gsub("/www/", "/www/" .. hname:gsub("%.[a-z][a-z]%.", ".") .. "/"):gsub("[/]+", "/")
> +        end
> +    end
> +    page = page:gsub("%.cgi", ".html"):gsub("%.lua", ".html")
> +    if not r:stat(page) or not (page:match("^/var/www/") or page:match("^/x1/www/")) then
> +        page = "/x1/www/www.apache.org/dyn/closer.html"
> +    end
> +
> +    local defs = {}
> +    local ezt_defs = {
> +        strings = {},
> +        arrays = {}
> +    }
> +
> +    defs.filepath = page:gsub("[^/]+$", "")
> +    defs.debug = get.debug and true or false
> +    defs.preferred = r:escape_html(get.preferred or get.Preferred or URL['http'] or "")
> +    defs.path_info = r:escape_html(get.path -- command-line override
> +         or nonEmpty(r.path_info) -- if path provided by server
> +         or get_path_info(r.uri) -- temporary fix to extract it from r.uri for dyn/closer.cgi calls
> +         -- Disable for now; it was previously effectively disabled because r.path_info was never false
> +--         or r.unparsed_uri:gsub("^.+%.cgi/*", ""):gsub("^.+%.lua/*", "") -- not sure what this is trying to do
> +         -- TODO in any case seems wrong to use the unparsed URI as that will include the query string
> +         or "/") -- default
> +        :gsub("^/","",1) -- trim leading "/" as per Python version
> +    defs.country = country
> +    defs.cca2 = cca2
> +    ezt_defs.strings = defs
> +    ezt_defs.arrays = {
> +        http = mymap['http'] or bmap['http'],
> +        ftp = mymap['ftp'] or bmap['ftp'],
> +        backup = bmap['http'],
> +    }
> +    -- Check that preferred http/ftp exists, otherwise default to none
> +    local prefIsOkay = false
> +    for _,b in ipairs({'http', 'ftp', 'backup'}) do
> +        for _, v in pairs(ezt_defs.arrays[b] or {}) do -- arrays[b] may not exist
> +            if r:escape_html(v) == defs.preferred then
> +                prefIsOkay = true
> +                break
> +            end
> +        end
> +        if prefIsOkay then
> +            break
> +        end
> +    end
> +    if not prefIsOkay then
> +        ezt_defs.preferred = ""
> +        defs.preferred = URL['http']
> +    end
> +
> +    -- string only repr of preferred URL
> +    if get.preferred and get.preferred == "true" then
> +        r.content_type = "text/plain"
> +        r:puts(defs.preferred)
> +        return apache2.OK
> +    end
> +
> +    local do_json = false
> +    if (get.as_json and not (get.as_json == "0")) or (get.asjson and not (get.asjson == "0")) then
> +        do_json = true
> +    end
> +    -- proj is the first path component of defs.path_info
> +    local proj = defs.path_info
> +    if proj and proj:find('/') then
> +      proj = proj:sub(1,proj:find('/')-1)
> +    end
> +    if get.action then
> +        if get.action == 'download' and get.filename then
> +            r.headers_out['Location'] = defs.preferred .. get.filename
> +            r.status = 302
> +            return apache2.OK
> +        elseif get.action == 'info' then
> +            r.content_type = "text/plain"
> +            r:puts(string.format("%s\ncloser revision: %s\nlibezt revision: %s\n",
> +                 _VERSION, -- LUA
> +                 version(), -- closer
> +                  ezt:version())) -- libezt
> +            -- Show any arguments
> +            for k, v in pairs( get ) do
> +                r:puts( string.format("%s: %s\n", k, v) )
> +            end
> +            -- Request parameters
> +            r:puts("r.hostname:     '",r.hostname or "nil", "'\n")
> +            r:puts("r.document_root:'",r.document_root or "nil", "'\n")
> +            r:puts("r.uri:          '",r.uri or "nil", "'\n")
> +            -- r:puts("r.the_request:  '",r.the_request or "nil", "'\n")
> +            -- r:puts("r.unparsed_uri: '",r.unparsed_uri or "nil", "'\n")
> +            r:puts("r.path_info:    '",r.path_info or "nil","'\n")
> +            r:puts("env[SCRIPT_URI] '",r.subprocess_env['SCRIPT_URI'] or "nil","'\n")
> +            -- calculated values
> +            r:puts("defs.path_info: '",defs.path_info or "nil","'\n")
> +            r:puts("defs.filepath:  '",defs.filepath or "nil","'\n")
> +            r:puts("proj         :  '",proj,"'\n")
> +            r:puts("proj in attic:  '",tostring(is_in_attic(proj)),"'\n")
> +            local in_dist = tostring(is_in_dist(defs.path_info))
> +            r:puts("dist/path    :  '",dist_path(defs.path_info),"'\n")
> +            r:puts("path in dist/:  '",in_dist,"'\n")
> +            local in_arch = tostring(is_in_arch(defs.path_info))
> +            r:puts("archive uri  :  '",arch_uri(defs.path_info),"'\n")
> +            r:puts("path in arch/:  '",in_arch,"'\n")
> +            r:puts("arch cachehit:  '",tostring(cache_hit),"'\n")
> +            return apache2.OK
> +        end
> +    end
> +    if not do_json then
> +        if is_in_attic(proj) then
> +            r.headers_out['Location'] = ATTIC_URI .. proj .. ".html"
> +            r.status = 302
> +            return apache2.OK
> +        end
> +        if not is_in_dist(defs.path_info) then
> +          local inarch = is_in_arch(defs.path_info)
> +          local arch_home = archive_url('') ;
> +          local arch_path = archive_url(defs.path_info) ;
> +          if inarch == nil then
> +            defs.lookup = 'A lookup on ' .. arch_home .. ' failed.'
> +          elseif inarch then
> +            defs.lookup = 'The object is in our archive : ' .. arch_path
> +          else
> +            defs.lookup = 'The object is in not in our archive ' .. arch_home
> +          end
> +          page = "/x1/www/www.apache.org/dyn/archive.html"
> +          if LOG_LOOKUPS then log_lookup(inarch,defs.path_info) end
> +        end
> +        local rootpath = defs.path_info:match("^([-a-z0-9]+)/")
> +        if rootpath and rootpath == "incubator" then
> +            rootpath = defs.path_info:match("^incubator/([-a-z0-9]+)/")
> +        end
> +        if rootpath then
> +            local f = io.open("/www/www.apache.org/dyn/stats/" .. rootpath .. ".log", "a")
> +            if f then
> +                -- get a bit of the IP to identify multiple unique request with same TS/CCA2
> +                local ipbit = r.useragent_ip:match("([a-f0-9]+):?:?$") or r.useragent_ip:match("^([a-f0-9]+)") or "000"
> +                f:write(os.time() .. " " .. ipbit .. " " .. occa2 .. " " .. defs.path_info .. "\n")
> +                f:close()
> +            end
> +        end
> +        local tdata = get_output_cached(page, defs, r, ezt_defs)
> +
> +        -- check for special content-type based on file name
> +        if endsWith(page,"--xml.html") then
> +            r.content_type = "text/xml"
> +        else
> +            r.content_type = "text/html"
> +        end
> +        r:puts(tdata.data)
> +        if r.hostname == 'www.apache.org' then
> +            r:puts("<!-- " .. occa2 .. " -->")
> +        end
> +    else
> +        r.content_type = "application/json"
> +        r:puts(JSON:encode_pretty({
> +            path_info = defs.path_info,
> +            preferred = defs.preferred,
> +            http = mymap['http'] or bmap['http'],
> +            ftp = mymap['ftp'] or bmap['ftp'],
> +            backup = bmap['http'],
> +            cca2 = occa2
> +        }))
> +    end
> +    return apache2.OK
> +end
>
> Propchange: infrastructure/site/trunk/content/dyn/dev_closer.lua
> ------------------------------------------------------------------------------
>     svn:executable = *
>
>