You are viewing a plain text version of this content. The canonical link for it is here.
Posted to site-dev@apache.org by sebb <se...@gmail.com> on 2018/02/20 12:00:09 UTC
Re: svn commit: r1824832 - /infrastructure/site/trunk/content/dyn/dev_closer.lua
On 20 February 2018 at 06:47, <he...@apache.org> wrote:
> Author: henkp
> Date: Tue Feb 20 06:47:17 2018
> New Revision: 1824832
>
> URL: http://svn.apache.org/viewvc?rev=1824832&view=rev
> Log:
> += dev_closer.lua
>
> Added:
> infrastructure/site/trunk/content/dyn/dev_closer.lua (with props)
>
> Added: infrastructure/site/trunk/content/dyn/dev_closer.lua
> URL: http://svn.apache.org/viewvc/infrastructure/site/trunk/content/dyn/dev_closer.lua?rev=1824832&view=auto
> ==============================================================================
> --- infrastructure/site/trunk/content/dyn/dev_closer.lua (added)
> +++ infrastructure/site/trunk/content/dyn/dev_closer.lua Tue Feb 20 06:47:17 2018
> @@ -0,0 +1,408 @@
> +--[[
> +
> + This is a LUA CGI script that uses LibEZT to produce templated mirror content
> +
> + It uses the output from the Apache GeoIP module to choose the appropriate mirror(s)
> +
> + The script supports the following optional URL parameters:
> + cca2 - override the country code
> + preferred/Preferred - sets the preferred server if available, otherwise it is chosen at random
> + as_json/asjson - don't process the template, but return the mirror data as JSON
> + action=download together with filename - generate a redirect to the file on the preferred mirror
> +
> +]]
> +
> +-- version number of this file (automatically generated by SVN)
> +local VERSION = ("$Revision: 1820301 $"):match("(%d+)")
> +
> +function version()
> + return VERSION
> +end
> +
> +local CACHE_TIMEOUT = 1800 -- should be 0 in test ; 1800 in production
> +local LOG_LOOKUPS = 1 -- should be 1 in test ; 0 in production
> +
> +local JSON = require 'JSON'
> +local ezt = require 'libezt'
> +local lfs = require 'lfs'
> +local http = require 'socket.http' ; http.TIMEOUT = 1
http is re-used as a variable below.
Try HTTP here instead.
> +local mirror_file = "/x1/www/www.apache.org/mirrors/mirrors.list"
> +local MAXAGE = 24*3600 -- max mirror age
> +local PID = nil
> +local ATTIC_URI = 'http://attic.apache.org/projects/'
> +local ATTIC_DIR = '/x1/www/attic.apache.org/projects/'
> +local DIST_DIR = '/x1/www/www.apache.org/dist/'
> +local ARCH_URI = 'http://archive.apache.org/dist/'
> +local cache_hit = nil
> +local cache_in_arch = {} -- cache of archive lookups
> +local mirror_stamp = 0 -- when mirror_file was last processed
> +local mirror_map = {} -- map of all recent mirrors. [ftp|http|rsync][cc|backup]=url
> +local mirror_templates = {} -- cache of unprocessed mirror templates
> +local mirror_templates_generated = {} -- cache of generated templates
> +local mymap -- map of mirrors for the current request (based on the country code
> +
> +function get_mirrors()
> + local now = os.time()
> + local atleast = now - MAXAGE
> + local f = io.open(mirror_file, "r")
> + local mirrord = f:read("*a")
> +
> + -- Check the age of the mirrors relative to the mirror list, rather than now. (As was done by mirrors.cgi)
> + -- This allows the system to still work even if the list is a bit stale
> + -- LUA does not have a standard API to get a file date
> + -- However, the timestamp when the information was collected is more useful anyway
> + -- Parse the file header: # date : Wed Sep 2 09:49:53 2015 [UTC]
> + local mon, day, hh, mm, ss, yy = mirrord:match("# date : %w+ (%w+) +(%d+) (%d%d):(%d%d):(%d%d) (%d%d%d%d) %[UTC%]")
> + if mon then
> + local MON = {Jan=1,Feb=2,Mar=3,Apr=4,May=5,Jun=6,Jul=7,Aug=8,Sep=9,Oct=10,Nov=11,Dec=12}
> + -- use isdst = false as the timestamp is UTC
> + local filetime = os.time({year = yy, month = MON[mon], day = day, hour = hh, min = mm, sec = ss, isdst=false})
> + atleast = filetime - MAXAGE
> + end
> +
> + mirror_map = {}
> + f:close()
> + for t, c, url, timestamp in mirrord:gmatch("([a-zA-Z]+)%s+([a-zA-Z]+)%s+(%S+)%s+(%d+)\r?\n") do
> + if c then
> + c = c:lower()
> + -- Don't check the timestamp for backup mirrors
> + if c == 'backup' or tonumber(timestamp) >= atleast then
> + mirror_map[c] = mirror_map[c] or {}
> + mirror_map[c][t] = mirror_map[c][t] or {}
> + --url = url:gsub("/$", "")
> + table.insert(mirror_map[c][t], url)
> + end
> + end
> + end
> + mirror_stamp = now
> + return mirror_map
> +end
> +
> +function setpid()
> + local f = io.open('/proc/self/stat')
> + if f then
> + local line = f:read()
> + f:close()
> + PID = line:sub(0,line:find(' ')-1)
> + end
> +end
> +
> +function log_lookup(inarch,path)
> + local f = io.open("/www/www.apache.org/dyn/stats/AAAA",'a')
> + if f then
> + if PID == nil then setpid() end
> + f:write(os.date('%Y-%m-%d/%H:%M:%S')
> + .. " [" .. ( PID or 'pid' ) .. ']'
> + .. " look=" .. tostring(inarch)
> + .. " hit=" .. tostring(cache_hit)
> + .. ' ' .. path
> + .. "\n"
> + )
> + f:close()
> + end
> +end
> +
> +function is_in_attic(proj)
> + return lfs.attributes(ATTIC_DIR .. proj .. '.html') ~= nil
> +end
> +function dist_path(path) return DIST_DIR .. path end
> +function is_in_dist(path) return lfs.attributes(dist_path(path))~=nil end
> +function arch_uri(path) return ARCH_URI .. path end
> +
> +function archive_url(path)
> + local uri = arch_uri(path)
> + return '<a href="' .. uri .. '" rel="nofollow">' .. uri .. '</a>'
> +end
> +
> +function is_in_arch(path)
> + cache_hit = true
> + if not cache_in_arch[path]
> + or ( not cache_in_arch[path].result
> + and cache_in_arch[path].timestamp < ( os.time() - CACHE_TIMEOUT )
> + )
> + then
> + cache_hit = false
> + r, c, h = http.request { method = "HEAD", url = arch_uri(path) }
> + cache_in_arch[path] =
> + { timestamp = os.time(), result = ( r and c == 200 ) }
> + end
> + return cache_in_arch[path].result
> +end
> +
> +function get_page(url)
> + if not mirror_templates[url] or mirror_templates[url].timestamp < (os.time() - 2*CACHE_TIMEOUT) then
> + local f = io.open(url, "r")
> + mirror_templates[url] = {
> + data = f and f:read("*a") or "No such page",
> + timestamp = os.time()
> + }
> + if f then
> + f:close()
> + end
> + end
> + return mirror_templates[url]
> +end
> +
> +function get_output_cached(page, defs, r, ezt_defs)
> + local pref = defs.preferred or ""
> + local path_info = defs.path_info or ""
> + local cacheKey = page .. ":" .. pref .. ":" .. path_info
> + if not mirror_templates_generated[cacheKey] or mirror_templates_generated[cacheKey].timestamp < (os.time() - CACHE_TIMEOUT) then
> + local template = get_page(page)
> + local tdata = recurse(defs, template.data, r, ezt_defs)
> + mirror_templates_generated[cacheKey] = {
> + data = tdata,
> + timestamp = os.time()
> + }
> + end
> + return mirror_templates_generated[cacheKey]
> +end
> +
> +function recurse(defs, tdata, r, ezt_defs)
> + -- SSI emulation
> + tdata = tdata:gsub("<!%-%-%s*#include virtual=\"(.-)\"%s*%-%->",
> + function(inc)
> + local filepath = (defs.filepath .. inc):gsub("[/]+", "/")
> + if r:stat(filepath) then
> + local f = io.open(filepath, "r")
> + local d = f:read("*a")
> + f:close()
> + return d
> + else
> + return ""
> + end
> + end
> + )
> +
> + -- Parse EZT
> + local structure, error = ezt:import("[ezt]"..tdata.."[end]")
> +
> + -- Render output
> + if structure then return ezt:construct(structure, ezt_defs) else return error end
> +end
> +
> +-- true if the string (s) ends with (e)
> +function endsWith(s, e)
> + return e == s:sub(-e:len())
> +end
> +
> +-- true if the string (s) begins with (b)
> +function beginsWith(s, b)
> + return b == s:sub(1, b:len())
> +end
> +
> +-- return false if string is empty (or nil)
> +function nonEmpty(s)
> + if s == null or s == '' then return nil end
> + return s
> +end
> +
> +-- Temporary fix to extract the missing path_info for dyn/closer.cgi redirects only
> +function get_path_info(s)
> + local CGI_SCRIPT = "/dyn/closer.cgi/" -- original CGI script name
> + if beginsWith(s, CGI_SCRIPT) then
> + return s:sub(CGI_SCRIPT:len()) -- keep just the suffix
> + else
> + return nil
> + end
> +end
> +
> +-- The request parameter has the data structures and functions as described here:
> +-- http://httpd.apache.org/docs/trunk/mod/mod_lua.html#datastructures
> +-- http://httpd.apache.org/docs/trunk/mod/mod_lua.html#functions
> +
> +function handle(r)
> + local get = r:parseargs()
> +
> + local now = os.time()
> + if mirror_stamp < (now - 3600) then
> + get_mirrors()
> + end
> + local country = r.notes['GEOIP_COUNTRY_NAME'] or r.subprocess_env['GEOIP_COUNTRY_NAME'] or "Unknown"
> + local cca2 = (get.cca2 or r.notes['GEOIP_COUNTRY_CODE'] or r.subprocess_env['GEOIP_COUNTRY_CODE'] or 'Backup'):lower()
> + if cca2 == 'gb' then
> + cca2 = 'uk'
> + end
> + local occa2 = cca2
> + if not mirror_map[cca2] then
> + cca2 = 'backup'
> + end
> + mymap = mirror_map[cca2] or mirror_map['backup']
> + local bmap = mirror_map['backup']
> + mymap['backup'] = bmap['http']
> + local URL = {}
> + for _, t in pairs({'http','ftp'}) do
> + URL[t] = (mymap[t] and mymap[t][math.random(1, #mymap[t])]) or (bmap[t] and bmap[t][math.random(1, #bmap[t])])
> + end
> + local page = r.filename
> + local got_f = get.f -- work on a copy of the parameter
> + if got_f then
> + local hname = r.hostname:gsub("www%.", "")
> + got_f = got_f:gsub("^/var/www/html/", "/var/www/")
> + got_f = got_f:gsub(hname, ""):gsub("/var/www//var/www/", "/var/www/")
> + if r:stat(got_f) or r:stat(got_f:gsub("%.cgi", ".html")) then
> + page = got_f
> + else
> + page = got_f:gsub("/www/", "/www/" .. hname:gsub("%.[a-z][a-z]%.", ".") .. "/"):gsub("[/]+", "/")
> + end
> + end
> + page = page:gsub("%.cgi", ".html"):gsub("%.lua", ".html")
> + if not r:stat(page) or not (page:match("^/var/www/") or page:match("^/x1/www/")) then
> + page = "/x1/www/www.apache.org/dyn/closer.html"
> + end
> +
> + local defs = {}
> + local ezt_defs = {
> + strings = {},
> + arrays = {}
> + }
> +
> + defs.filepath = page:gsub("[^/]+$", "")
> + defs.debug = get.debug and true or false
> + defs.preferred = r:escape_html(get.preferred or get.Preferred or URL['http'] or "")
> + defs.path_info = r:escape_html(get.path -- command-line override
> + or nonEmpty(r.path_info) -- if path provided by server
> + or get_path_info(r.uri) -- temporary fix to extract it from r.uri for dyn/closer.cgi calls
> + -- Disable for now; it was previously effectively disabled because r.path_info was never false
> +-- or r.unparsed_uri:gsub("^.+%.cgi/*", ""):gsub("^.+%.lua/*", "") -- not sure what this is trying to do
> + -- TODO in any case seems wrong to use the unparsed URI as that will include the query string
> + or "/") -- default
> + :gsub("^/","",1) -- trim leading "/" as per Python version
> + defs.country = country
> + defs.cca2 = cca2
> + ezt_defs.strings = defs
> + ezt_defs.arrays = {
> + http = mymap['http'] or bmap['http'],
> + ftp = mymap['ftp'] or bmap['ftp'],
> + backup = bmap['http'],
> + }
> + -- Check that preferred http/ftp exists, otherwise default to none
> + local prefIsOkay = false
> + for _,b in ipairs({'http', 'ftp', 'backup'}) do
> + for _, v in pairs(ezt_defs.arrays[b] or {}) do -- arrays[b] may not exist
> + if r:escape_html(v) == defs.preferred then
> + prefIsOkay = true
> + break
> + end
> + end
> + if prefIsOkay then
> + break
> + end
> + end
> + if not prefIsOkay then
> + ezt_defs.preferred = ""
> + defs.preferred = URL['http']
> + end
> +
> + -- string only repr of preferred URL
> + if get.preferred and get.preferred == "true" then
> + r.content_type = "text/plain"
> + r:puts(defs.preferred)
> + return apache2.OK
> + end
> +
> + local do_json = false
> + if (get.as_json and not (get.as_json == "0")) or (get.asjson and not (get.asjson == "0")) then
> + do_json = true
> + end
> + -- proj is the first path component of defs.path_info
> + local proj = defs.path_info
> + if proj and proj:find('/') then
> + proj = proj:sub(1,proj:find('/')-1)
> + end
> + if get.action then
> + if get.action == 'download' and get.filename then
> + r.headers_out['Location'] = defs.preferred .. get.filename
> + r.status = 302
> + return apache2.OK
> + elseif get.action == 'info' then
> + r.content_type = "text/plain"
> + r:puts(string.format("%s\ncloser revision: %s\nlibezt revision: %s\n",
> + _VERSION, -- LUA
> + version(), -- closer
> + ezt:version())) -- libezt
> + -- Show any arguments
> + for k, v in pairs( get ) do
> + r:puts( string.format("%s: %s\n", k, v) )
> + end
> + -- Request parameters
> + r:puts("r.hostname: '",r.hostname or "nil", "'\n")
> + r:puts("r.document_root:'",r.document_root or "nil", "'\n")
> + r:puts("r.uri: '",r.uri or "nil", "'\n")
> + -- r:puts("r.the_request: '",r.the_request or "nil", "'\n")
> + -- r:puts("r.unparsed_uri: '",r.unparsed_uri or "nil", "'\n")
> + r:puts("r.path_info: '",r.path_info or "nil","'\n")
> + r:puts("env[SCRIPT_URI] '",r.subprocess_env['SCRIPT_URI'] or "nil","'\n")
> + -- calculated values
> + r:puts("defs.path_info: '",defs.path_info or "nil","'\n")
> + r:puts("defs.filepath: '",defs.filepath or "nil","'\n")
> + r:puts("proj : '",proj,"'\n")
> + r:puts("proj in attic: '",tostring(is_in_attic(proj)),"'\n")
> + local in_dist = tostring(is_in_dist(defs.path_info))
> + r:puts("dist/path : '",dist_path(defs.path_info),"'\n")
> + r:puts("path in dist/: '",in_dist,"'\n")
> + local in_arch = tostring(is_in_arch(defs.path_info))
> + r:puts("archive uri : '",arch_uri(defs.path_info),"'\n")
> + r:puts("path in arch/: '",in_arch,"'\n")
> + r:puts("arch cachehit: '",tostring(cache_hit),"'\n")
> + return apache2.OK
> + end
> + end
> + if not do_json then
> + if is_in_attic(proj) then
> + r.headers_out['Location'] = ATTIC_URI .. proj .. ".html"
> + r.status = 302
> + return apache2.OK
> + end
> + if not is_in_dist(defs.path_info) then
> + local inarch = is_in_arch(defs.path_info)
> + local arch_home = archive_url('') ;
> + local arch_path = archive_url(defs.path_info) ;
> + if inarch == nil then
> + defs.lookup = 'A lookup on ' .. arch_home .. ' failed.'
> + elseif inarch then
> + defs.lookup = 'The object is in our archive : ' .. arch_path
> + else
> + defs.lookup = 'The object is in not in our archive ' .. arch_home
> + end
> + page = "/x1/www/www.apache.org/dyn/archive.html"
> + if LOG_LOOKUPS then log_lookup(inarch,defs.path_info) end
> + end
> + local rootpath = defs.path_info:match("^([-a-z0-9]+)/")
> + if rootpath and rootpath == "incubator" then
> + rootpath = defs.path_info:match("^incubator/([-a-z0-9]+)/")
> + end
> + if rootpath then
> + local f = io.open("/www/www.apache.org/dyn/stats/" .. rootpath .. ".log", "a")
> + if f then
> + -- get a bit of the IP to identify multiple unique request with same TS/CCA2
> + local ipbit = r.useragent_ip:match("([a-f0-9]+):?:?$") or r.useragent_ip:match("^([a-f0-9]+)") or "000"
> + f:write(os.time() .. " " .. ipbit .. " " .. occa2 .. " " .. defs.path_info .. "\n")
> + f:close()
> + end
> + end
> + local tdata = get_output_cached(page, defs, r, ezt_defs)
> +
> + -- check for special content-type based on file name
> + if endsWith(page,"--xml.html") then
> + r.content_type = "text/xml"
> + else
> + r.content_type = "text/html"
> + end
> + r:puts(tdata.data)
> + if r.hostname == 'www.apache.org' then
> + r:puts("<!-- " .. occa2 .. " -->")
> + end
> + else
> + r.content_type = "application/json"
> + r:puts(JSON:encode_pretty({
> + path_info = defs.path_info,
> + preferred = defs.preferred,
> + http = mymap['http'] or bmap['http'],
> + ftp = mymap['ftp'] or bmap['ftp'],
> + backup = bmap['http'],
> + cca2 = occa2
> + }))
> + end
> + return apache2.OK
> +end
>
> Propchange: infrastructure/site/trunk/content/dyn/dev_closer.lua
> ------------------------------------------------------------------------------
> svn:executable = *
>
>