You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by cu...@apache.org on 2018/05/09 16:35:14 UTC
[whimsy] 02/03: Encapsulate details of site checking data
This is an automated email from the ASF dual-hosted git repository.
curcuru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git
commit 8abb8ca6757c8d4291d5cb73808dd6597f070949
Author: Shane Curcuru <as...@shanecurcuru.org>
AuthorDate: Wed May 9 12:33:04 2018 -0400
Encapsulate details of site checking data
---
lib/whimsy/sitestandards.rb | 203 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 203 insertions(+)
diff --git a/lib/whimsy/sitestandards.rb b/lib/whimsy/sitestandards.rb
new file mode 100644
index 0000000..5bda6d8
--- /dev/null
+++ b/lib/whimsy/sitestandards.rb
@@ -0,0 +1,203 @@
+#!/usr/bin/env ruby
+# Defines partial standards for Apache website checker
+# TODO better document with specific policies
+
+# Encapsulate (most) scans/validations done on website content
+module SiteStandards
+ extend self
+ CHECK_TEXT = 'text' # (optional) Regex of <a ...>Text to scan for</a>, of a.text.downcase.strip
+ CHECK_CAPTURE = 'capture' # a_href minimal regex to capture - for license, we capture the link if it points to apache.org somewhere
+ CHECK_VALIDATE = 'validate' # a_href detailed regex to expect for compliance; it must point to one of our actual licenses to pass
+ CHECK_TYPE = 'type' # true = validation checks href/url; false = checks text node
+ CHECK_POLICY = 'policy' # URL to policy statement for this check
+ CHECK_DOC = 'doc' # Explanation of what the check is looking for
+
+ # Checks done only for TLPs (i.e. not podlings)
+ TLP_CHECKS = {
+ 'uri' => { # Custom: merely saves uri of site
+ CHECK_TEXT => nil,
+ CHECK_CAPTURE => nil,
+ CHECK_VALIDATE => %r{https?://[^.]+\.apache\.org},
+ CHECK_TYPE => true,
+ CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#websites',
+ CHECK_DOC => 'The homepage for any ProjectName must be served from http://ProjectName.apache.org',
+ },
+ }
+ # Checks done only for Incubator podlings
+ PODLING_CHECKS = {
+ 'uri' => {
+ CHECK_TEXT => nil,
+ CHECK_CAPTURE => %r{https?://[^.]+\.incubator\.apache\.org},
+ CHECK_VALIDATE => %r{https?://[^.]+\.incubator\.apache\.org},
+ CHECK_TYPE => true,
+ CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#websites',
+ CHECK_DOC => 'The homepage for any ProjectName must be served from http://ProjectName(.incubator).apache.org',
+ },
+ 'disclaimer' => { # textnode_check: txt =~ / Incubation is required of all newly accepted projects /
+ CHECK_TEXT => %r{Incubation is required of all newly accepted projects},
+ CHECK_CAPTURE => nil,
+ CHECK_VALIDATE => %r{Apache \S+( \S+)?( \([Ii]ncubating\))? is an effort undergoing [Ii]ncubation at [Tt]he Apache Software Foundation \(ASF\),? sponsored by the (Apache )?\S+( PMC)?. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the complet [...]
+ CHECK_TYPE => false,
+ CHECK_POLICY => 'https://incubator.apache.org/guides/branding.html#disclaimers',
+ CHECK_DOC => 'All Apache Incubator Podling sites must contain the incubating disclaimer.',
+ },
+ }
+ # Checks done for all podlings|projects
+ COMMON_CHECKS = {
+ 'foundation' => { # Custom: a_href =~ ... then custom checking for hover/title text
+ CHECK_TEXT => %r{apache|asf|foundation}i,
+ CHECK_CAPTURE => %r{.}i,
+ CHECK_VALIDATE => %r{apache|asf|foundation}i,
+ CHECK_TYPE => false,
+ CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation',
+ CHECK_DOC => 'All projects must feature some prominent link back to the main ASF homepage at http://www.apache.org/',
+ },
+ 'events' => { # Custom: a_href.include? 'apache.org/events/' then custom check for img
+ CHECK_TEXT => nil,
+ CHECK_CAPTURE => %r{apache\.org\/events},
+ CHECK_VALIDATE => %r{^https?://.*apache.org/events/current-event},
+ CHECK_TYPE => true,
+ CHECK_POLICY => 'https://www.apache.org/events/README.txt',
+ CHECK_DOC => 'Projects SHOULD include a link to any current ApacheCon event, as provided by VP, Conferences.',
+ },
+ 'license' => { # link_check a_text =~ /^license$/ and a_href.include? 'apache.org'
+ CHECK_TEXT => /^license$/,
+ CHECK_CAPTURE => %r{apache\.org},
+ CHECK_VALIDATE => %r{^https?://.*apache.org/licenses/$},
+ CHECK_TYPE => true,
+ CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation',
+ CHECK_DOC => '"License" should link to: http://www.apache.org/licenses/',
+ },
+ 'thanks' => { # link_check a_text =~ /\Athanks[!]?\z/
+ CHECK_TEXT => /\Athanks[!]?\z/,
+ CHECK_CAPTURE => nil,
+ CHECK_VALIDATE => %r{^https?://.*apache.org/foundation/thanks},
+ CHECK_TYPE => true,
+ CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation',
+ CHECK_DOC => '"Thanks" should link to: http://www.apache.org/foundation/thanks.html',
+ },
+ 'security' => { # link_check a_text == 'security'
+ CHECK_TEXT => /security/,
+ CHECK_CAPTURE => nil,
+ CHECK_VALIDATE => %r{^https?://.*apache.org/[Ss]ecurity},
+ CHECK_TYPE => true,
+ CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation',
+ CHECK_DOC => '"Security" should link to either to a project-specific page [...], or to the main http://www.apache.org/security/ page.',
+ },
+ 'sponsorship' => { # link_check ['sponsorship', 'donate', 'sponsor apache','sponsoring apache'].include? a_text
+ CHECK_TEXT => %r{sponsorship|donate|sponsor\sapache|sponsoring\sapache},
+ CHECK_CAPTURE => nil,
+ CHECK_VALIDATE => %r{^https?://.*apache.org/foundation/sponsorship},
+ CHECK_TYPE => true,
+ CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation',
+ CHECK_DOC => '"Sponsorship", "Sponsor Apache", or "Donate" should link to: http://www.apache.org/foundation/sponsorship.html',
+ },
+
+ 'trademarks' => { # textnode_check: if (txt =~ /\btrademarks\b/ and not data[:trademarks]) or txt =~/are trademarks of [Tt]he Apache Software/
+ CHECK_TEXT => %r{\btrademarks\b},
+ CHECK_CAPTURE => nil,
+ CHECK_VALIDATE => %r{trademarks of [Tt]he Apache Software Foundation},
+ CHECK_TYPE => false,
+ CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#attributions',
+ CHECK_DOC => 'All project or product homepages must feature a prominent trademark attribution of all applicable Apache trademarks.',
+ },
+ 'copyright' => { # textnode_check: txt =~ /Copyright / or txt =~ /©/
+ CHECK_TEXT => %r{Copyright|©},
+ CHECK_CAPTURE => nil,
+ CHECK_VALIDATE => %r{[Cc]opyright [^.]+ Apache Software Foundation}, # Do we need '[Tt]he ASF'?
+ CHECK_TYPE => false,
+ CHECK_POLICY => 'https://www.apache.org/legal/src-headers.html#headers',
+ CHECK_DOC => 'All website content SHOULD include a copyright notice for the ASF.',
+ },
+
+ 'image' => { # Custom: merely looks in IMAGE_DIR for #{id}.*
+ CHECK_TEXT => nil,
+ CHECK_CAPTURE => nil,
+ CHECK_VALIDATE => %r{projectname.jpg},
+ CHECK_TYPE => true,
+ CHECK_POLICY => 'https://www.apache.org/img/',
+ CHECK_DOC => 'Projects SHOULD include a 212px wide copy of their logo in https://www.apache.org/img/ to be included in ASF homepage.',
+ },
+ }
+
+ SITE_PASS = 'label-success'
+ SITE_WARN = 'label-warning'
+ SITE_FAIL = 'label-danger'
+ # Determine the color of a given table cell, given:
+ # - overall analysis of the sites, in particular the third column
+ # which is a list projects that successfully matched the check
+ # - list of links for the project in question
+ # - the column in question (which indicates the check being reported on)
+ # - the name of the project
+ def label(analysis, links, col, name)
+ if not links[col]
+ SITE_FAIL
+ elsif analysis[2].include? col and not analysis[2][col].include? name
+ SITE_WARN
+ else
+ SITE_PASS
+ end
+ end
+
+ # Get hash of checks to be done for tlp | podling
+ # @param tlp true if project; podling otherwise
+ def get_checks(tlp = true)
+ tlp ? (return TLP_CHECKS.merge(COMMON_CHECKS)) : (return PODLING_CHECKS.merge(COMMON_CHECKS))
+ end
+
+ # Get filename of check data for tlp | podling
+ # @param tlp true if project; podling otherwise
+ def get_filename(tlp = true)
+ tlp ? (return 'site-scan.json') : (return 'pods-scan.json')
+ end
+
+ # Get URL to default filename location on server
+ def get_url(is_local = true)
+ is_local ? (return '../public/') : (return 'https://whimsy.apache.org/public/')
+ end
+
+ # Get check data for tlp | podling
+ # Uses a local_copy if available; w.a.o/public otherwise
+ # @param tlp true if project; podling otherwise
+ # @return [hash of site data, crawl_time]
+ def get_sites(tlp = true)
+ local_copy = File.expand_path("#{get_url(true)}#{get_filename(tlp)}", __FILE__).untaint
+ if File.exist? local_copy
+ crawl_time = File.mtime(local_copy).httpdate # show time in same format as last-mod
+ sites = JSON.parse(File.read(local_copy))
+ else
+ response = Net::HTTP.get_response(URI("#{get_url(false)}#{get_filename(tlp)}"))
+ crawl_time = response['last-modified']
+ sites = JSON.parse(response.body)
+ end
+ return sites, crawl_time
+ end
+
+ # Analyze data returned from site-scan.rb by using checks[CHECK_VALIDATE] regex
+ # If value =~ CHECK_VALIDATE, SITE_PASS
+ # If value is present (presumably from CHECK_TEXT|CAPTURE), then SITE_WARN
+ # If value not present, SITE_FAIL (i.e. site-scan.rb didn't find it)
+ # @param sites hash of site-scan data collected
+ # @param checks to apply to sites to determine status
+ # @return [overall counts, description of statuses, success listings]
+ def analyze(sites, checks)
+ success = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
+ counts = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
+ checks.each do |nam, check_data|
+ success[nam] = sites.select{ |k, site| site[nam] =~ check_data[SiteStandards::CHECK_VALIDATE] }.keys
+ counts[nam][SITE_PASS] = success[nam].count
+ counts[nam][SITE_WARN] = 0 # Reorder output
+ counts[nam][SITE_FAIL] = sites.select{ |k, site| site[nam].nil? }.count
+ counts[nam][SITE_WARN] = sites.size - counts[nam][SITE_PASS] - counts[nam][SITE_FAIL]
+ end
+
+ return [
+ counts, {
+ SITE_PASS => '# Sites with links to primary ASF page',
+ SITE_WARN => '# Sites with link, but not an expected ASF one',
+ SITE_FAIL => '# Sites with no link for this topic'
+ }, success
+ ]
+ end
+end
+
--
To stop receiving notification emails like this one, please contact
curcuru@apache.org.