You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by se...@apache.org on 2016/04/05 01:08:57 UTC
[whimsy] branch master updated: Parse the www.a.o/dist site to get
the target links
This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://git-dual.apache.org/repos/asf/whimsy.git
The following commit(s) were added to refs/heads/master by this push:
new f7425f3 Parse the www.a.o/dist site to get the target links
f7425f3 is described below
commit f7425f3b484660a5ff3130d73be3b2b4cbbc1f98
Author: Sebb <se...@apache.org>
AuthorDate: Tue Apr 5 00:08:47 2016 +0100
Parse the www.a.o/dist site to get the target links
---
tools/mirror_check.rb | 47 +++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 43 insertions(+), 4 deletions(-)
diff --git a/tools/mirror_check.rb b/tools/mirror_check.rb
index a4ff939..93cfb3a 100755
--- a/tools/mirror_check.rb
+++ b/tools/mirror_check.rb
@@ -141,7 +141,43 @@ def check_page(base, page, severity=:E, expectedStatus="200")
end
def checkIndex(page, type)
- # TODO check the page contains all the correct folders
+ if type == :tlps
+ fav = page.match('favicon.ico')
+ zzz = page.match('zzz/')
+ if fav.length == 1 and zzz.length == 1
+ if fav.begin(0) < zzz.begin(0)
+ W "Incorrect page order - found favicon.ico before zzz/; folders should be listed before files"
+ else
+ I "Found favicon.ico and zzz/ in the page in the correct order (i.e. folders are listed before files)"
+ end
+ else
+ W "Expecting to find favicon.ico and zzz/ in the page"
+ end
+ end
+ asfData = @pages[type]
+ links = parseIndexPage(page)
+ links.each {|l|
+ W "Index #{type} the link #{l} is not shown on ASF site" unless asfData.include? l
+ }
+ asfData.each {|l|
+ W "Index #{type} the link #{l} is not shown on SUT" unless links.include? l
+ }
+end
+
+# parse an HTTP server Index page => array of file/folder names
+def parseIndexPage(page)
+ folders = []
+ # ASF main page references currently look like this: <a href="abdera/">abdera/</a>
+ # the Perl script looked for this match: m!> ?$dir/?<!
+ links = page.scan(%r{<a href=['"]([.a-z0-9-]+)/?['"]>([.a-z0-9-]+)/?</a>})
+ links.each { |l|
+ if l[1] == l[0]
+ folders << l[1]
+ else
+ print "Mistmatched names: #{l}\n"
+ end
+ }
+ folders
end
# Check page has sensible headers and footers
@@ -195,11 +231,11 @@ def checkHTTP(base)
else
W "Missing or unexpected img icon tags"
end
- checkIndex(body, 'TLP')
+ checkIndex(body, :tlps)
ibody = check_page(base, 'incubator/')
checkHdrFtr(base+'incubator/', ibody)
- checkIndex(ibody, 'Incubator')
+ checkIndex(ibody, :podlings)
check_page(base, 'harmony/', :E, expectedStatus="301")
@@ -232,7 +268,10 @@ end
def init
# build a list of validation errors
@tests = []
- @fails=0
+ @fails = 0
+ tlps = parseIndexPage(check_page('http://www.apache.org/dist/',''))
+ podlings = parseIndexPage(check_page('http://www.apache.org/dist/incubator/',''))
+ @pages = {:tlps => tlps, :podlings => podlings}
end
def showList(list, header)
--
To stop receiving notification emails like this one, please contact
['"commits@whimsical.apache.org" <co...@whimsical.apache.org>'].