You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by se...@apache.org on 2016/04/05 01:08:57 UTC

[whimsy] branch master updated: Parse the www.a.o/dist site to get the target links

This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://git-dual.apache.org/repos/asf/whimsy.git

The following commit(s) were added to refs/heads/master by this push:
       new  f7425f3   Parse the www.a.o/dist site to get the target links
f7425f3 is described below

commit f7425f3b484660a5ff3130d73be3b2b4cbbc1f98
Author: Sebb <se...@apache.org>
AuthorDate: Tue Apr 5 00:08:47 2016 +0100

    Parse the www.a.o/dist site to get the target links
---
 tools/mirror_check.rb | 47 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 4 deletions(-)

diff --git a/tools/mirror_check.rb b/tools/mirror_check.rb
index a4ff939..93cfb3a 100755
--- a/tools/mirror_check.rb
+++ b/tools/mirror_check.rb
@@ -141,7 +141,43 @@ def check_page(base, page, severity=:E, expectedStatus="200")
 end
 
 def checkIndex(page, type)
-  # TODO check the page contains all the correct folders
+  if type == :tlps
+    fav = page.match('favicon.ico')
+    zzz = page.match('zzz/')
+    if fav.length == 1 and zzz.length == 1
+      if fav.begin(0) < zzz.begin(0)
+        W "Incorrect page order - found favicon.ico before zzz/; folders should be listed before files"
+      else
+        I "Found favicon.ico and zzz/ in the page in the correct order (i.e. folders are listed before files)"
+      end
+    else
+      W "Expecting to find favicon.ico and zzz/ in the page"
+    end
+  end
+  asfData = @pages[type]
+  links = parseIndexPage(page)
+  links.each {|l|
+    W "Index #{type} the link #{l} is not shown on ASF site" unless asfData.include? l
+  }
+  asfData.each {|l|
+    W "Index #{type} the link #{l} is not shown on SUT" unless links.include? l    
+  }  
+end
+
+# parse an HTTP server Index page => array of file/folder names
+def parseIndexPage(page)
+  folders = []
+  # ASF main page references currently look like this: <a href="abdera/">abdera/</a>
+  # the Perl script looked for this match: m!> ?$dir/?<!
+  links = page.scan(%r{<a href=['"]([.a-z0-9-]+)/?['"]>([.a-z0-9-]+)/?</a>})
+  links.each { |l|
+    if l[1] == l[0]
+      folders << l[1]
+    else
+      print "Mistmatched names: #{l}\n"
+    end
+  }
+  folders
 end
 
 # Check page has sensible headers and footers
@@ -195,11 +231,11 @@ def checkHTTP(base)
   else
     W "Missing or unexpected img icon tags"
   end
-  checkIndex(body, 'TLP')
+  checkIndex(body, :tlps)
 
   ibody = check_page(base, 'incubator/')
   checkHdrFtr(base+'incubator/', ibody)
-  checkIndex(ibody, 'Incubator')
+  checkIndex(ibody, :podlings)
 
   check_page(base, 'harmony/', :E, expectedStatus="301")
 
@@ -232,7 +268,10 @@ end
 def init
   # build a list of validation errors
   @tests = []
-  @fails=0
+  @fails = 0
+  tlps = parseIndexPage(check_page('http://www.apache.org/dist/',''))
+  podlings = parseIndexPage(check_page('http://www.apache.org/dist/incubator/',''))
+  @pages = {:tlps => tlps, :podlings => podlings}
 end
 
 def showList(list, header)

-- 
To stop receiving notification emails like this one, please contact
['"commits@whimsical.apache.org" <co...@whimsical.apache.org>'].