You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@whimsical.apache.org by cu...@apache.org on 2018/05/09 16:35:14 UTC

[whimsy] 02/03: Encapsulate details of site checking data

This is an automated email from the ASF dual-hosted git repository.

curcuru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git

commit 8abb8ca6757c8d4291d5cb73808dd6597f070949
Author: Shane Curcuru <as...@shanecurcuru.org>
AuthorDate: Wed May 9 12:33:04 2018 -0400

    Encapsulate details of site checking data
---
 lib/whimsy/sitestandards.rb | 203 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 203 insertions(+)

diff --git a/lib/whimsy/sitestandards.rb b/lib/whimsy/sitestandards.rb
new file mode 100644
index 0000000..5bda6d8
--- /dev/null
+++ b/lib/whimsy/sitestandards.rb
@@ -0,0 +1,203 @@
+#!/usr/bin/env ruby
+# Defines partial standards for Apache website checker
+# TODO better document with specific policies
+
+# Encapsulate (most) scans/validations done on website content
+module SiteStandards
+  extend self
+  CHECK_TEXT      = 'text'      # (optional) Regex of <a ...>Text to scan for</a>, of a.text.downcase.strip
+  CHECK_CAPTURE   = 'capture'   # a_href minimal regex to capture - for license, we capture the link if it points to apache.org somewhere
+  CHECK_VALIDATE  = 'validate'  # a_href detailed regex to expect for compliance; it must point to one of our actual licenses to pass
+  CHECK_TYPE      = 'type'      # true = validation checks href/url; false = checks text node
+  CHECK_POLICY    = 'policy'    # URL to policy statement for this check
+  CHECK_DOC       = 'doc'       # Explanation of what the check is looking for
+  
+  # Checks done only for TLPs (i.e. not podlings)
+  TLP_CHECKS = {
+    'uri' => { # Custom: merely saves uri of site
+      CHECK_TEXT => nil,
+      CHECK_CAPTURE => nil, 
+      CHECK_VALIDATE => %r{https?://[^.]+\.apache\.org},
+      CHECK_TYPE => true,
+      CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#websites',
+      CHECK_DOC => 'The homepage for any ProjectName must be served from http://ProjectName.apache.org',
+      },
+  }
+  # Checks done only for Incubator podlings
+  PODLING_CHECKS = {
+    'uri' => {
+      CHECK_TEXT => nil,
+      CHECK_CAPTURE => %r{https?://[^.]+\.incubator\.apache\.org},
+      CHECK_VALIDATE => %r{https?://[^.]+\.incubator\.apache\.org},
+      CHECK_TYPE => true,
+      CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#websites',
+      CHECK_DOC => 'The homepage for any ProjectName must be served from http://ProjectName(.incubator).apache.org',
+      },
+    'disclaimer' => { # textnode_check: txt =~ / Incubation is required of all newly accepted projects /
+      CHECK_TEXT => %r{Incubation is required of all newly accepted projects},
+      CHECK_CAPTURE => nil,
+      CHECK_VALIDATE =>  %r{Apache \S+( \S+)?( \([Ii]ncubating\))? is an effort undergoing [Ii]ncubation at [Tt]he Apache Software Foundation \(ASF\),? sponsored by the (Apache )?\S+( PMC)?. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the complet [...]
+      CHECK_TYPE => false,
+      CHECK_POLICY => 'https://incubator.apache.org/guides/branding.html#disclaimers',
+      CHECK_DOC => 'All Apache Incubator Podling sites must contain the incubating disclaimer.',
+      },
+  }
+  # Checks done for all podlings|projects
+  COMMON_CHECKS = { 
+    'foundation' => { # Custom: a_href =~ ... then custom checking for hover/title text
+      CHECK_TEXT => %r{apache|asf|foundation}i,
+      CHECK_CAPTURE => %r{.}i,
+      CHECK_VALIDATE => %r{apache|asf|foundation}i,
+      CHECK_TYPE => false,
+      CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation',
+      CHECK_DOC => 'All projects must feature some prominent link back to the main ASF homepage at http://www.apache.org/',
+    },
+    'events' => { # Custom: a_href.include? 'apache.org/events/' then custom check for img
+      CHECK_TEXT => nil,
+      CHECK_CAPTURE => %r{apache\.org\/events},
+      CHECK_VALIDATE => %r{^https?://.*apache.org/events/current-event},
+      CHECK_TYPE => true,
+      CHECK_POLICY => 'https://www.apache.org/events/README.txt',
+      CHECK_DOC => 'Projects SHOULD include a link to any current ApacheCon event, as provided by VP, Conferences.',
+    },
+    'license' => { # link_check a_text =~ /^license$/ and a_href.include? 'apache.org'
+      CHECK_TEXT => /^license$/,
+      CHECK_CAPTURE => %r{apache\.org},
+      CHECK_VALIDATE => %r{^https?://.*apache.org/licenses/$},
+      CHECK_TYPE => true,
+      CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation',
+      CHECK_DOC => '"License" should link to: http://www.apache.org/licenses/',
+    },
+    'thanks' => { # link_check a_text =~ /\Athanks[!]?\z/
+        CHECK_TEXT => /\Athanks[!]?\z/,
+        CHECK_CAPTURE => nil,
+        CHECK_VALIDATE => %r{^https?://.*apache.org/foundation/thanks},
+        CHECK_TYPE => true,
+        CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation',
+        CHECK_DOC => '"Thanks" should link to: http://www.apache.org/foundation/thanks.html',
+    },
+    'security' => { # link_check a_text == 'security'
+      CHECK_TEXT => /security/,
+      CHECK_CAPTURE => nil,
+      CHECK_VALIDATE => %r{^https?://.*apache.org/[Ss]ecurity},
+      CHECK_TYPE => true,
+      CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation',
+      CHECK_DOC => '"Security" should link to either to a project-specific page [...], or to the main http://www.apache.org/security/ page.',
+    },
+    'sponsorship' => { # link_check ['sponsorship', 'donate', 'sponsor apache','sponsoring apache'].include? a_text
+      CHECK_TEXT => %r{sponsorship|donate|sponsor\sapache|sponsoring\sapache},
+      CHECK_CAPTURE => nil,
+      CHECK_VALIDATE => %r{^https?://.*apache.org/foundation/sponsorship},
+      CHECK_TYPE => true,
+      CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation',
+      CHECK_DOC => '"Sponsorship", "Sponsor Apache", or "Donate" should link to: http://www.apache.org/foundation/sponsorship.html',
+    },
+
+    'trademarks' => { # textnode_check: if (txt =~ /\btrademarks\b/  and not data[:trademarks]) or txt =~/are trademarks of [Tt]he Apache Software/
+      CHECK_TEXT => %r{\btrademarks\b},
+      CHECK_CAPTURE => nil,
+      CHECK_VALIDATE => %r{trademarks of [Tt]he Apache Software Foundation},
+      CHECK_TYPE => false,
+      CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#attributions',
+      CHECK_DOC => 'All project or product homepages must feature a prominent trademark attribution of all applicable Apache trademarks.',
+    },  
+    'copyright' => { # textnode_check: txt =~ /Copyright / or txt =~ /©/ 
+      CHECK_TEXT => %r{Copyright|©},
+      CHECK_CAPTURE => nil,
+      CHECK_VALIDATE => %r{[Cc]opyright [^.]+ Apache Software Foundation}, # Do we need '[Tt]he ASF'?
+      CHECK_TYPE => false,
+      CHECK_POLICY => 'https://www.apache.org/legal/src-headers.html#headers',
+      CHECK_DOC => 'All website content SHOULD include a copyright notice for the ASF.',
+    },
+
+    'image' => { # Custom: merely looks in IMAGE_DIR for #{id}.*
+      CHECK_TEXT => nil,
+      CHECK_CAPTURE => nil,
+      CHECK_VALIDATE => %r{projectname.jpg},
+      CHECK_TYPE => true,
+      CHECK_POLICY => 'https://www.apache.org/img/',
+      CHECK_DOC => 'Projects SHOULD include a 212px wide copy of their logo in https://www.apache.org/img/ to be included in ASF homepage.',
+    },
+  }
+  
+  SITE_PASS       = 'label-success'
+  SITE_WARN       = 'label-warning'
+  SITE_FAIL       = 'label-danger'
+  # Determine the color of a given table cell, given:
+  #   - overall analysis of the sites, in particular the third column
+  #     which is a list projects that successfully matched the check
+  #   - list of links for the project in question
+  #   - the column in question (which indicates the check being reported on)
+  #   - the name of the project
+  def label(analysis, links, col, name)
+    if not links[col]
+      SITE_FAIL
+    elsif analysis[2].include? col and not analysis[2][col].include? name
+      SITE_WARN
+    else
+      SITE_PASS
+    end
+  end
+  
+  # Get hash of checks to be done for tlp | podling
+  # @param tlp true if project; podling otherwise
+  def get_checks(tlp = true)
+    tlp ? (return TLP_CHECKS.merge(COMMON_CHECKS)) : (return PODLING_CHECKS.merge(COMMON_CHECKS))
+  end
+  
+  # Get filename of check data for tlp | podling
+  # @param tlp true if project; podling otherwise
+  def get_filename(tlp = true)
+    tlp ? (return 'site-scan.json') : (return 'pods-scan.json')
+  end
+  
+  # Get URL to default filename location on server
+  def get_url(is_local = true)
+    is_local ? (return '../public/') : (return 'https://whimsy.apache.org/public/')
+  end
+  
+  # Get check data for tlp | podling
+  #   Uses a local_copy if available; w.a.o/public otherwise
+  # @param tlp true if project; podling otherwise
+  # @return [hash of site data, crawl_time]
+  def get_sites(tlp = true)
+      local_copy = File.expand_path("#{get_url(true)}#{get_filename(tlp)}", __FILE__).untaint
+      if File.exist? local_copy
+        crawl_time = File.mtime(local_copy).httpdate # show time in same format as last-mod
+        sites = JSON.parse(File.read(local_copy))
+      else
+        response = Net::HTTP.get_response(URI("#{get_url(false)}#{get_filename(tlp)}"))
+        crawl_time = response['last-modified']
+        sites = JSON.parse(response.body)
+      end
+    return sites, crawl_time
+  end
+  
+  # Analyze data returned from site-scan.rb by using checks[CHECK_VALIDATE] regex
+  #   If value =~ CHECK_VALIDATE, SITE_PASS
+  #   If value is present (presumably from CHECK_TEXT|CAPTURE), then SITE_WARN
+  #   If value not present, SITE_FAIL (i.e. site-scan.rb didn't find it)
+  # @param sites hash of site-scan data collected
+  # @param checks to apply to sites to determine status
+  # @return [overall counts, description of statuses, success listings]
+  def analyze(sites, checks)
+      success = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
+      counts = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) }
+      checks.each do |nam, check_data|
+        success[nam] = sites.select{ |k, site| site[nam] =~ check_data[SiteStandards::CHECK_VALIDATE]  }.keys
+        counts[nam][SITE_PASS] = success[nam].count
+        counts[nam][SITE_WARN] = 0 # Reorder output 
+        counts[nam][SITE_FAIL] = sites.select{ |k, site| site[nam].nil? }.count
+        counts[nam][SITE_WARN] = sites.size - counts[nam][SITE_PASS] - counts[nam][SITE_FAIL]
+      end
+      
+      return [
+        counts, {
+        SITE_PASS => '# Sites with links to primary ASF page',
+        SITE_WARN => '# Sites with link, but not an expected ASF one',
+        SITE_FAIL => '# Sites with no link for this topic'
+        }, success
+      ]
+  end
+end
+

-- 
To stop receiving notification emails like this one, please contact
curcuru@apache.org.