You are viewing a plain text version of this content. The canonical link for it is here.
Posted to general@attic.apache.org by se...@apache.org on 2021/03/11 00:42:22 UTC

svn commit: r1887459 - in /attic/site: jiratext.py retire.py urlutils.py

Author: sebb
Date: Thu Mar 11 00:42:22 2021
New Revision: 1887459

URL: http://svn.apache.org/viewvc?rev=1887459&view=rev
Log:
Extract common code

Added:
    attic/site/urlutils.py   (with props)
Modified:
    attic/site/jiratext.py
    attic/site/retire.py

Modified: attic/site/jiratext.py
URL: http://svn.apache.org/viewvc/attic/site/jiratext.py?rev=1887459&r1=1887458&r2=1887459&view=diff
==============================================================================
--- attic/site/jiratext.py (original)
+++ attic/site/jiratext.py Thu Mar 11 00:42:22 2021
@@ -12,19 +12,9 @@ Find project resources for retired proje
 
 """
 
-import re
 import sys
-import json
 import subprocess
-import errno
-import time
-
-# from contextlib import redirect_stdout
-from os.path import getmtime, basename, join
-from os import environ
-
-from urllib.request import urlopen, Request
-from urllib.error import HTTPError
+from urlutils import loadjson, urlexists
 
 DEV="https://dist.apache.org/repos/dist/dev/"
 REL="https://dist.apache.org/repos/dist/release/"
@@ -33,43 +23,6 @@ CWIKI='https://cwiki.apache.org/confluen
 EMAIL='https://whimsy.apache.org/public/committee-retired.json'
 GITBOX='https://gitbox.apache.org/repositories.json' # ['projects']
 
-CACHE=environ.get('CACHE')
-
-def isFileStale(filename):
-    try:
-        t = getmtime(filename)
-    except OSError as e:
-        if not e.errno == errno.ENOENT:
-            raise e
-        return True
-    diff = time.time() - t
-    return diff > 600 # 5 min in seconds
-
-def urlcache(url):
-    if CACHE:
-        # basename seems to work OK with URLs
-        cache = join(CACHE,basename(url)+".tmp")
-        if isFileStale(cache):
-#             print("Caching %s" % url, file=sys.stderr)
-            req = Request(url)
-            resp = urlopen(req)
-            with open(cache,'wb') as w:
-                w.write(resp.read())
-        else:
-#             print("Using cache for %s" % url, file=sys.stderr)
-            pass
-        with open(cache,'r') as r:
-            return r.read()
-    else:
-#         print("Fetching %s" % url, file=sys.stderr)
-        req = Request(url)
-        resp = urlopen(req)
-        return resp.read()
-
-def loadjson(url):
-#     req = Request(url)
-#     resp = urlopen(req)
-    return json.loads(urlcache(url))
 
 # =====================================
 
@@ -78,12 +31,8 @@ gitbox = loadjson(GITBOX)['projects']
 
 def check_wiki(pid):
     url = CWIKI + pid.upper()
-    req = Request(url)
-    try:
-        urlopen(req)
+    if urlexists(url):
         print("Make CWIKI readonly: %s" % url)
-    except HTTPError:
-        pass
 
 def check_mail(pid):
     try:
@@ -135,7 +84,7 @@ def check_ldap(pid):
         print(res.stderr)
     
 def check_jira(pid):
-    jira = json.loads(urlcache(JIRA))
+    jira = loadjson(JIRA)
     for project in jira:
         key = project['key']
         catname = ''

Modified: attic/site/retire.py
URL: http://svn.apache.org/viewvc/attic/site/retire.py?rev=1887459&r1=1887458&r2=1887459&view=diff
==============================================================================
--- attic/site/retire.py (original)
+++ attic/site/retire.py Thu Mar 11 00:42:22 2021
@@ -25,8 +25,7 @@ from inspect import getsourcefile
 from string import Template
 import os
 import re
-import yaml
-from urllib.request import urlopen, Request
+from urlutils import loadyaml
 
 if len(sys.argv) == 1:
     print("Please provide a list of project ids")
@@ -37,11 +36,6 @@ projects =    join((MYHOME), 'xdocs', 'p
 stylesheets = join((MYHOME), 'xdocs', 'stylesheets')
 flagged = join((MYHOME), 'xdocs', 'flagged')
 
-def loadyaml(url):
-    req = Request(url)
-    resp = urlopen(req)
-    return yaml.safe_load(resp.read())
-
 #  get details of the retired projects
 retirees = loadyaml('https://whimsy.apache.org/public/committee-retired.json')['retired']
 lists = {}
@@ -98,12 +92,12 @@ def create_project(pid):
     with open(join(projects, '_template.xml'), 'r') as t:
         template = Template(t.read())
     meta = retirees[pid]
-    names = lists[pid]
-    names.remove('dev')
+    mnames = lists[pid]
+    mnames.remove('dev')
     out = template.substitute(tlpid = pid, 
         FullName = meta['display_name'],
         Month_Year = meta['retired'],
-        mail_names = ",".join(sorted(names)),
+        mail_names = ",".join(sorted(mnames)),
         description = meta['description'])
     with open(outfile, 'w') as o:
         o.write(out)

Added: attic/site/urlutils.py
URL: http://svn.apache.org/viewvc/attic/site/urlutils.py?rev=1887459&view=auto
==============================================================================
--- attic/site/urlutils.py (added)
+++ attic/site/urlutils.py Thu Mar 11 00:42:22 2021
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+
+"""
+    Utility URL methods
+"""
+
+import hashlib
+import sys
+import errno
+import time
+import yaml
+import json
+from os import environ
+from os.path import getmtime, join
+from urllib.request import urlopen, Request
+from urllib.error import HTTPError
+
+MAXAGE=int(environ.get('CACHE_AGE', '600'))  # 5 min in seconds
+CACHE=environ.get('CACHE')
+DEBUG=environ.get('CACHE_DEBUG')
+
+def isFileStale(filename):
+    """ is file older than max age (default 5 minutes) """
+    try:
+        t = getmtime(filename)
+    except OSError as e:
+        if not e.errno == errno.ENOENT:
+            raise e
+        return True
+    diff = time.time() - t
+    return diff > MAXAGE
+
+def hashurl(url):
+    """ create hash from url """
+    return hashlib.sha224(url.encode()).hexdigest()
+
+def geturl(url):
+    """ Get url contents -- no caching """
+    req = Request(url)
+    resp = urlopen(req)
+    return resp.read()
+
+def urlexists(url):
+    """ Does URL exist? """
+    req = Request(url)
+    try:
+        urlopen(req)
+        return True
+    except HTTPError:
+        return False
+
+def urlcache(url):
+    """ Get url contents -- optional caching """
+    if CACHE:
+        # basename seems to work OK with URLs
+        cache = join(CACHE, hashurl(url)+".tmp")
+        if isFileStale(cache):
+            if DEBUG:
+                print("Caching %s" % url, file=sys.stderr)
+            data = geturl(url)
+            with open(cache,'wb') as w:
+                w.write(data)
+            return data
+        else:
+            if DEBUG:
+                print("Using cache for %s" % url, file=sys.stderr)
+        with open(cache,'r') as r:
+            return r.read()
+    else:
+        if DEBUG:
+            print("Fetching %s" % url, file=sys.stderr)
+        return geturl(url)
+
+def loadyaml(url):
+    return yaml.safe_load(urlcache(url))
+
+def loadjson(url):
+    return json.loads(urlcache(url))
+
+# Test code
+if __name__ == '__main__':
+    for arg in sys.argv[1:]:
+        print(arg)
+        print(loadyaml(arg))

Propchange: attic/site/urlutils.py
------------------------------------------------------------------------------
    svn:eol-style = native