You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by ru...@apache.org on 2017/04/23 22:01:21 UTC

[whimsy] branch master updated: "code spike" site checker

This is an automated email from the ASF dual-hosted git repository.

rubys pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git

The following commit(s) were added to refs/heads/master by this push:
       new  43b6cb0   "code spike" site checker
43b6cb0 is described below

commit 43b6cb06841f99c93eaca72c457d780f1959d749
Author: Sam Ruby <ru...@intertwingly.net>
AuthorDate: Sun Apr 23 18:00:38 2017 -0400

    "code spike" site checker
---
 tools/site-check.rb | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/tools/site-check.rb b/tools/site-check.rb
new file mode 100755
index 0000000..a0ec9e2
--- /dev/null
+++ b/tools/site-check.rb
@@ -0,0 +1,87 @@
+#!/usr/bin/env ruby
+$LOAD_PATH.unshift File.realpath(File.expand_path('../../lib', __FILE__))
+
+#
+# Scans committee pages for compliance with requirements and recommendations:
+#   https://www.apache.org/foundation/marks/pmcs#navigation
+#   http://www.apache.org/events/README.txt
+#
+# Makes no value judgements.  Simply extracts raw data for offline analysis.
+#
+
+require 'whimsy/asf'
+require 'net/http'
+require 'nokogiri'
+require 'json'
+
+# fetch uri, followin redirects
+def fetch(uri)
+  uri = URI.parse(uri)
+  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
+    request = Net::HTTP::Get.new(uri.request_uri)
+    response = http.request(request)
+    if response.code =~ /^3\d\d/
+      fetch response['location']
+    else
+      return uri, request, response
+    end
+  end
+end
+
+# scan all committees, including non-pmcs
+ASF::Committee.load_committee_info
+committees = (ASF::Committee.list + ASF::Committee.nonpmcs).uniq
+
+results = {}
+
+committees.sort_by {|committee| committee.name}.each do |committee|
+  next unless committee.site
+
+  # fetch, parse committee site
+  uri, request, response = fetch(committee.site)
+  doc = Nokogiri::HTML(response.body)
+
+  # default data
+  data = {
+    display_name: committee.display_name,
+    uri: uri.to_s,
+    events: nil,
+    foundation: nil,
+    license: nil,
+    sponsorship: nil,
+    security: nil,
+  }
+
+  # scan each link
+  doc.css('a').each do |a|
+    if a['href'] =~ %r{^https?://(www\.)?apache\.org/?$}
+      img = a.at('img')
+      if img
+        data[:foundation] = uri + img['src'].strip
+      else
+        data[:foundation] = a.text 
+      end
+    end
+
+    if a['href'] and a['href'].include? 'apache.org/events/'
+      img = a.at('img')
+      if img
+        data[:events] = uri + img['src'].strip
+      else
+        data[:events] = uri + a['href'].strip
+      end
+    end
+
+    data[:license] = uri + a['href'].strip if a.text.downcase == 'license'
+    data[:thanks] = uri + a['href'].strip if a.text.downcase == 'thanks'
+    data[:security] = uri + a['href'].strip if a.text.downcase == 'security'
+
+    if %w(sponsorship donate).include? a.text.downcase
+      data[:sponsorship] = uri + a['href'].strip
+    end
+  end
+
+  results[committee.name] = data
+end
+
+puts JSON.pretty_generate(results)

-- 
To stop receiving notification emails like this one, please contact
['"commits@whimsical.apache.org" <co...@whimsical.apache.org>'].