You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/08/18 21:05:07 UTC

svn commit: r432683 [1/3] - in /spamassassin/branches/3.1: build/automc/ masses/rule-qa/ masses/rule-qa/automc/

Author: jm
Date: Fri Aug 18 12:05:06 2006
New Revision: 432683

URL: http://svn.apache.org/viewvc?rev=432683&view=rev
Log:
backport automc, nightly mass-check infrastructure code to 3.1.0

Added:
    spamassassin/branches/3.1/build/automc/
    spamassassin/branches/3.1/build/automc/buildbot.css
    spamassassin/branches/3.1/build/automc/buildbot_ready   (with props)
    spamassassin/branches/3.1/build/automc/etc-apache-local-conf-httpd.conf
    spamassassin/branches/3.1/build/automc/etc-init.d-bbmass   (with props)
    spamassassin/branches/3.1/build/automc/etc-init.d-buildbot   (with props)
    spamassassin/branches/3.1/build/automc/etc-init.d-freqsd   (with props)
    spamassassin/branches/3.1/build/automc/populate_cor_nightly   (with props)
    spamassassin/branches/3.1/build/automc/run_nightly   (with props)
    spamassassin/branches/3.1/build/automc/run_preflight   (with props)
    spamassassin/branches/3.1/masses/rule-qa/automc/gen_info_xml   (with props)
    spamassassin/branches/3.1/masses/rule-qa/get-rulemetadata-for-revision   (with props)
    spamassassin/branches/3.1/masses/rule-qa/rule-hits-over-time   (with props)
Modified:
    spamassassin/branches/3.1/masses/rule-qa/README.nightly
    spamassassin/branches/3.1/masses/rule-qa/automc/RUNME.after
    spamassassin/branches/3.1/masses/rule-qa/automc/RUNME.before
    spamassassin/branches/3.1/masses/rule-qa/automc/config.example
    spamassassin/branches/3.1/masses/rule-qa/automc/ruleqa.cgi
    spamassassin/branches/3.1/masses/rule-qa/corpus-hourly
    spamassassin/branches/3.1/masses/rule-qa/corpus-nightly
    spamassassin/branches/3.1/masses/rule-qa/corpus-tagtime
    spamassassin/branches/3.1/masses/rule-qa/corpus.example

Added: spamassassin/branches/3.1/build/automc/buildbot.css
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/build/automc/buildbot.css?rev=432683&view=auto
==============================================================================
--- spamassassin/branches/3.1/build/automc/buildbot.css (added)
+++ spamassassin/branches/3.1/build/automc/buildbot.css Fri Aug 18 12:05:06 2006
@@ -0,0 +1,53 @@
+* {
+	font-family: Verdana, Cursor;
+	font-size: 10px;
+	font-weight: bold;
+}
+
+a:link,a:visited,a:active {
+	color: #666666;
+}
+a:hover {
+	color: #FFFFFF;
+}
+
+.table {
+	border-spacing: 2px;
+}
+
+td.Event, td.Activity, td.Change, td.Time, td.Builder {
+	color: #333333;
+	border: 1px solid #666666;
+	background-color: #CCCCCC;
+}
+
+/* LastBuild, BuildStep states */
+.success {
+	color: #FFFFFF;
+	border: 1px solid #2f8f0f;
+	background-color: #8fdf5f;
+}
+
+.failure {
+	color: #FFFFFF;
+	border: 1px solid #f33636;
+	background-color: #e98080;
+}
+
+.warnings {
+	color: #FFFFFF;
+	border: 1px solid #fc901f;
+	background-color: #ffc343;
+}
+
+.exception, td.offline {
+	color: #FFFFFF;
+	border: 1px solid #8000c0;
+	background-color: #e0b0ff;
+}
+
+.start,.running, td.building {
+	color: #666666;
+	border: 1px solid #ffff00;
+	background-color: #fffc6c;
+}

Added: spamassassin/branches/3.1/build/automc/buildbot_ready
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/build/automc/buildbot_ready?rev=432683&view=auto
==============================================================================
--- spamassassin/branches/3.1/build/automc/buildbot_ready (added)
+++ spamassassin/branches/3.1/build/automc/buildbot_ready Fri Aug 18 12:05:06 2006
@@ -0,0 +1,242 @@
+#!/usr/bin/perl
+
+my $BBMHOME   = '/home/bbmass';
+my $BYMAILLOG = '/home/bbmass/bymail/extract.log';
+my $BYMAILCF  = '/home/bbmass/bymail/latest.cf';
+my $RULEQAURL = 'http://ruleqa.spamassassin.org/';
+
+  # where do the reporting scripts get run from?
+my $REPORTSCRIPTSADIR = "/home/automc/svn/spamassassin";
+
+# ---------------------------------------------------------------------------
+
+use strict;
+use warnings;
+sub run;
+
+# directory used to lock between slaves; no longer used
+my $LOCKDIR = "/not/in/use";
+my $got_lock = undef;
+
+$|=1;
+my $perl = $^X;
+if (!$perl) {
+  die "no perl path found in ARGV!";
+}
+
+my $command = shift @ARGV;
+
+if ($command eq 'start') {
+  do_start();
+}
+elsif ($command eq 'stop') {
+  do_stop();
+}
+exit;
+
+# ---------------------------------------------------------------------------
+
+sub do_start {
+  # ensure all pre-reqs (and rules) are built
+  system ("$perl Makefile.PL < /dev/null");
+  system ("make");
+
+  # for mass-check to report, without having to have a working "svn" client
+  # in the chroot
+
+  # --non-interactive not on the zone yet.  duh!
+  # system ("svn info --non-interactive > masses/svninfo.tmp");
+  system ("svn info > masses/svninfo.tmp < /dev/null");
+
+  # mass-check-by-mail support
+  # only use files that are < 1 hour old
+  #
+  if (-f $BYMAILCF && -M $BYMAILCF < (1/24)) {
+    print "\nFound recent rules-by-mail rules file, will mass-check:\n\n";
+    system ("ls -l $BYMAILCF");
+    system ("cp $BYMAILCF masses/mailed.cf");
+    print "".("-" x 75)."\n";
+    system ("cat masses/mailed.cf");
+    print "".("-" x 75)."\n";
+
+    if (-f $BYMAILLOG) {
+      print "\nRules-by-mail extraction log:\n\n";
+      system ("ls -l $BYMAILLOG");
+      system ("cat $BYMAILLOG");
+    }
+  } else {
+    unlink "masses/mailed.cf";
+  }
+
+  # if (!perform_singleton_actions()) {
+    # # give the "winning" slave time to do the work we also need done
+    # print "sleeping for 60 seconds so that corpus is rebuilt\n";
+    # sleep 60;
+  # }
+}
+
+sub perform_singleton_actions {
+  if (!try_get_exclusive_lock()) {
+    print "cannot get the exclusive lock; some other BB slave is\n".    
+        "doing the singleton actions for us.\n";
+    return 0;
+  }
+
+  $SIG{INT} = $SIG{TERM} = sub {
+    kill_lock(); die "killed!";
+  };
+
+  eval {
+    _perform_singleton_actions_unlocked();
+  };
+  if ($@) {
+    warn $@;
+    kill_lock();
+    die "aargh";
+  }
+
+  kill_lock();
+  $SIG{INT} = 'DEFAULT';
+  $SIG{TERM} = 'DEFAULT';
+  return 1;
+}
+
+sub _perform_singleton_actions_unlocked
+{
+  # print "got lock: performing singleton actions\n";
+  # run "build/automc/populate_cor";
+  # better off doing this periodically from the qproc
+}
+
+# ---------------------------------------------------------------------------
+
+sub do_stop {
+  my $rev = get_current_svn_revision();
+  my $slave = get_current_slave_name();
+
+  use POSIX qw(strftime);
+  my $daterev = strftime("%Y%m%d", gmtime(time)) . "-r$rev-b";
+  my $rurl = $RULEQAURL.$daterev;
+
+  chdir("masses") or die;
+
+  print "REPORTS\n\n";
+  print "Rule-QA results from this mass-check will be published at\n\n";
+  print "    $rurl\n\n";
+  print "QUICK FREQS REPORT (this mass-check only):\n\n";
+
+  system ("$perl hit-frequencies -c tstrules -x -p -T -s 0");
+
+  print "\n\nBUILDING SLOW REPORTS:\n\n";
+
+  my $logdir = "$BBMHOME/tmp/logs-r$rev";
+  if (!-d $logdir) {
+    run ("mkdir $logdir");
+  }
+  run ("mv ham.log $logdir/ham-$slave.log");
+  run ("mv spam.log $logdir/spam-$slave.log");
+
+  # ensure those log files are readable by the freqsd
+  run ("chmod 755 $logdir");
+  run ("chmod 644 $logdir/ham-$slave.log $logdir/spam-$slave.log");
+
+  # run our fast report generator
+  $ENV{HOME} = $BBMHOME;
+  umask(022);
+
+  run ("$REPORTSCRIPTSADIR/masses/rule-qa/corpus-hourly --dir $logdir --tag b");
+  run ("$REPORTSCRIPTSADIR/masses/rule-qa/automc/gen_info_xml");
+
+  # enqueue a request for the slow stuff with the report-generation daemon
+  run ("$REPORTSCRIPTSADIR/build/automc/freqsd --enq $logdir");
+}
+
+# ---------------------------------------------------------------------------
+
+sub get_current_svn_revision {
+  open (SVNINFO, "(svn info --non-interactive || svn info) < /dev/null 2>&1 |");
+
+  # note: use 'Last Changed Rev' instead of 'Revision'.  Because we share
+  # an SVN repository with other projects, this means that the same
+  # rev of *our* codebase may appear under multiple rev#s, as other projects
+  # check their changes in.
+
+  my $revision;
+  while (<SVNINFO>) {
+    # Last Changed Rev: 332684
+    next unless /^Last Changed Rev: (\d+)/;
+    $revision = $1;
+    last;
+  }
+  close SVNINFO;
+  return $revision if $revision;
+}
+
+sub run {
+  my ($cmd, $ignoreexit) = @_;
+
+  print "[$cmd]\n";
+  system ($cmd);
+
+  if (!$ignoreexit) {
+    die "command '$cmd' failed with status $?" if (($? >> 8) != 0);
+  }
+}
+
+sub get_current_slave_name {
+  my $pwd = `pwd`;
+  $pwd =~ /\/slaves\/([^\/]+)\// and return $1;
+  warn "cannot work out slave name from $pwd";
+  return "unknown";
+}
+
+
+sub try_get_exclusive_lock {
+  if (!-d $LOCKDIR) {
+    print "singleton lock: $LOCKDIR does not exist, so no locking is required.\n";
+    return 1;
+  }
+  if (!-w $LOCKDIR) {
+    die "cannot write to $LOCKDIR";
+  }
+
+  $got_lock = undef;
+  my $newf = $LOCKDIR."/singleton_lock.active";
+  if (-f $newf) {
+    open (IN, "<$newf");
+    my $pid = <IN> + 0; close IN;
+
+    if ($pid > 0 && kill(0, $pid)) {
+      print "singleton lock: locked by $pid, still active.\n";
+      return 0;
+    }
+    else {
+      print "singleton lock: locked by $pid, no longer active. killing lock\n";
+      # should have to do this too often, which is just as well, as there's
+      # a tiny little racelet here
+      unlink $newf;
+    }
+  }
+
+  my $tmpf = $LOCKDIR."/singleton_lock.new.$$";
+  open (OUT, ">$tmpf") or die "cannot write to $tmpf";
+  print OUT $$;
+  close OUT or die "cannot write to $tmpf";
+
+  if (!-f $newf && rename($tmpf, $newf)) {
+    print "singleton lock: taking\n";
+    $got_lock = $newf;
+    return 1;
+  }
+  else {
+    print "singleton lock: missed the lock\n";
+    unlink $tmpf;
+    return 0;
+  }
+}
+
+sub kill_lock {
+  return unless $got_lock;
+  unlink $got_lock or warn "singleton lock: unlink $got_lock failed: $!";
+}
+

Propchange: spamassassin/branches/3.1/build/automc/buildbot_ready
------------------------------------------------------------------------------
    svn:executable = *

Added: spamassassin/branches/3.1/build/automc/etc-apache-local-conf-httpd.conf
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/build/automc/etc-apache-local-conf-httpd.conf?rev=432683&view=auto
==============================================================================
--- spamassassin/branches/3.1/build/automc/etc-apache-local-conf-httpd.conf (added)
+++ spamassassin/branches/3.1/build/automc/etc-apache-local-conf-httpd.conf Fri Aug 18 12:05:06 2006
@@ -0,0 +1,161 @@
+# Included on the zone from /etc/apache2/httpd.conf:
+#   Include /etc/apache/local-conf/httpd.conf
+#
+# Update Apache config like so:
+#
+#   cd /home/automc/svn/spamassassin; sudo -u automc svn up
+#   sudo /etc/init.d/apache2 restart ; tail /var/apache2/logs/error_log
+
+# ---------------------------------------------------------------------------
+
+# May 12 2005 jm
+<IfModule mod_dir.c>
+    DirectoryIndex index.html index.htm index.shtml index.php index.php4 index.php3 index.phtml index.cgi
+</IfModule>
+
+UseCanonicalName On
+
+NameVirtualHost *:80
+
+# jm dec 9 : proxy shouldn't wait for EOF, we want partial files too
+# SetEnv force-proxy-request-1.0 1
+SetEnv proxy-nokeepalive 1
+# try to cause more frequent buf writes with a tiny buf size
+ProxyIOBufferSize 16
+# reverse proxying only!
+ProxyRequests Off
+
+# ---------------------------------------------------------------------------
+
+<VirtualHost *:80>
+  ServerName buildbot.spamassassin.org
+  <Directory /var/www/buildbot.spamassassin.org>
+    options indexes SymLinksIfOwnerMatch
+    AllowOverride indexes
+  </Directory>
+  DocumentRoot /var/www/buildbot.spamassassin.org
+  ServerAdmin webmaster@spamassassin.org
+  userdir disabled
+
+  # mod_rewrite debugging:
+  # RewriteLogLevel 9
+  # RewriteLog /var/apache2/logs/rewrite_log
+
+  # backwards compat for old URLs
+  RewriteEngine On
+  RewriteRule ^/(ruleqa|preflight|bb)$ $1/ [R]
+  RewriteRule ^/ruleqa/(.*) http://ruleqa.spamassassin.org/$1 [R,L]
+  RewriteRule ^/preflight/(.*) http://bbmass.spamassassin.org/$1 [R,L]
+  RewriteRule ^/bb/(.*) http://buildbot.spamassassin.org/$1 [R,L]
+
+  # index: straight through
+  RewriteRule ^/$ http://127.0.0.1:8010/ [P]
+
+  # anything else; proxy, if it doesn't exist as a file/dir
+  RewriteCond /var/www/buildbot.spamassassin.org/%{REQUEST_FILENAME} !-d
+  RewriteCond /var/www/buildbot.spamassassin.org/%{REQUEST_FILENAME} !-f
+  RewriteRule ^/(.*) http://127.0.0.1:8010/$1 [P]
+
+  ProxyRequests Off
+  <Proxy *>
+  Order deny,allow
+  Allow from all
+  </Proxy>
+
+</VirtualHost>
+
+# ---------------------------------------------------------------------------
+
+<VirtualHost *:80>
+  ServerName bbmass.spamassassin.org
+  <Directory /var/www/bbmass.spamassassin.org>
+    options indexes SymLinksIfOwnerMatch
+    AllowOverride indexes
+  </Directory>
+  DocumentRoot /var/www/bbmass.spamassassin.org
+  ServerAdmin webmaster@spamassassin.org
+  userdir disabled
+
+  RewriteEngine On
+
+  # index: straight through
+  RewriteRule ^/$ http://127.0.0.1:8011/ [P]
+
+  # anything else; proxy, if it doesn't exist as a file/dir
+  RewriteCond /var/www/bbmass.spamassassin.org/%{REQUEST_FILENAME} !-d
+  RewriteCond /var/www/bbmass.spamassassin.org/%{REQUEST_FILENAME} !-f
+  RewriteRule ^/(.*) http://127.0.0.1:8011/$1 [P]
+
+  ProxyRequests Off
+  <Proxy *>
+  Order deny,allow
+  Allow from all
+  </Proxy>
+
+</VirtualHost>
+
+# ---------------------------------------------------------------------------
+
+<VirtualHost *:80>
+  ServerName ruleqa.spamassassin.org
+  <Directory /var/www/ruleqa.spamassassin.org>
+    options indexes SymLinksIfOwnerMatch
+    AllowOverride indexes
+  </Directory>
+  DocumentRoot /var/www/ruleqa.spamassassin.org
+  ServerAdmin webmaster@spamassassin.org
+  userdir disabled
+
+  # debugging:
+  # RewriteLogLevel 9
+  # RewriteLog /var/apache2/logs/rewrite_log
+
+  # "%2F" is used to grep rule subsets, allow it
+  AllowEncodedSlashes On
+
+  # strip out known-local files and loops; all else goes to the ruleqa code
+  RewriteEngine On
+
+  # index: straight through
+  RewriteRule ^/$ /ruleqa.cgi [PT,L]
+
+  # satisfy local files
+  RewriteCond /var/www/ruleqa.spamassassin.org/%{REQUEST_FILENAME} !-d
+  RewriteCond /var/www/ruleqa.spamassassin.org/%{REQUEST_FILENAME} !-f
+
+  # otherwise the CGI
+  RewriteCond %{REQUEST_URI} !^/ruleqa\.cgi
+  RewriteRule ^/(.*) /ruleqa.cgi/$1 [PT,L]
+
+  ScriptAlias /ruleqa.cgi /home/automc/svn/spamassassin/masses/rule-qa/automc/ruleqa.cgi
+
+</VirtualHost>
+
+# ---------------------------------------------------------------------------
+
+<VirtualHost *:80>
+  ServerName rsync.spamassassin.org
+  <Directory /home/corpus-rsync/corpus>
+    options indexes SymLinksIfOwnerMatch
+    AllowOverride indexes
+  </Directory>
+  DocumentRoot /home/corpus-rsync/corpus
+  ServerAdmin webmaster@spamassassin.org
+  userdir disabled
+</VirtualHost>
+
+# ---------------------------------------------------------------------------
+
+<VirtualHost *:80>
+  ServerName updatedist.spamassassin.org
+  <Directory /var/www/buildbot.spamassassin.org/updatestage>
+    options indexes SymLinksIfOwnerMatch
+    AllowOverride indexes
+  </Directory>
+  DocumentRoot /var/www/buildbot.spamassassin.org/updatestage
+  ServerAdmin webmaster@spamassassin.org
+  userdir disabled
+</VirtualHost>
+
+# ---------------------------------------------------------------------------
+

Added: spamassassin/branches/3.1/build/automc/etc-init.d-bbmass
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/build/automc/etc-init.d-bbmass?rev=432683&view=auto
==============================================================================
--- spamassassin/branches/3.1/build/automc/etc-init.d-bbmass (added)
+++ spamassassin/branches/3.1/build/automc/etc-init.d-bbmass Fri Aug 18 12:05:06 2006
@@ -0,0 +1,69 @@
+#!/bin/sh
+
+# to create a new slave:
+#
+# PASSWORD=[whatever]
+# buildbot slave /home/buildbot/slaves/slave-name buildbot.spamassassin.org:9989 slave-name $PASSWORD
+
+# to use a real tmpfs, add this to /etc/vfstab:
+#
+# swap            -               /export/home/bbmass/tmpfs       tmpfs - no      -
+#
+# this is now off; the disk I/O speeds have improved, it's no longer necessary.
+
+. /etc/profile
+
+PATH=$PATH:/sbin:/usr/sbin:/usr/local/bin:/usr/sfw/bin:/opt/sfw/bin:/opt/SUNWspro/bin:/opt/subversion-1.1.4/bin:/usr/apache2/bin:/local/bin
+export PATH
+
+prog=bbmass
+runall () {
+    buildbot $1 /home/buildbot/bots/bbmass
+    nice buildbot $1 /home/bbmass/slaves/mc-fast
+    nice buildbot $1 /home/bbmass/slaves/mc-med
+    nice buildbot $1 /home/bbmass/slaves/mc-slow
+    nice buildbot $1 /home/bbmass/slaves/mc-slower
+}
+
+start() {
+    # echo "Mounting tmpfs: "
+    # mount /export/home/bbmass/tmpfs
+    # echo "Done."
+
+    chmod 1777 /export/home/bbmass/tmpfs
+
+    echo "Starting $prog: " 
+    runall start
+    RETVAL=$?
+    echo
+    return $RETVAL
+}
+
+stop() {
+    runall stop
+    RETVAL=$?
+    return $RETVAL
+
+    # umount /export/home/bbmass/tmpfs
+}
+
+case "$1" in
+	start)
+	    start
+	    ;;
+	
+	stop)
+	    stop
+	    ;;
+	
+	restart)
+	    stop; sleep 2; start
+	    ;;
+	
+	*)
+	    echo $"Usage: $0 {start|stop|restart|condrestart}"
+	    exit 1
+
+esac
+
+exit 0

Propchange: spamassassin/branches/3.1/build/automc/etc-init.d-bbmass
------------------------------------------------------------------------------
    svn:executable = *

Added: spamassassin/branches/3.1/build/automc/etc-init.d-buildbot
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/build/automc/etc-init.d-buildbot?rev=432683&view=auto
==============================================================================
--- spamassassin/branches/3.1/build/automc/etc-init.d-buildbot (added)
+++ spamassassin/branches/3.1/build/automc/etc-init.d-buildbot Fri Aug 18 12:05:06 2006
@@ -0,0 +1,53 @@
+#!/bin/sh
+
+# to create a new slave:
+#
+# PASSWORD=[whatever]
+# buildbot slave /home/buildbot/slaves/slave-name buildbot.spamassassin.org:9988 slave-name $PASSWORD
+
+. /etc/profile
+
+PATH=$PATH:/sbin:/usr/sbin:/usr/local/bin:/usr/sfw/bin:/opt/sfw/bin:/opt/SUNWspro/bin:/opt/subversion-1.1.4/bin:/usr/apache2/bin:/local/bin
+export PATH
+
+prog=buildbot
+runall () {
+    buildbot $1 /home/buildbot/bots/sa
+    nice buildbot $1 /home/buildbot/slaves/zone-sol10
+    nice buildbot $1 /home/buildbot/slaves/sol10-perl561
+}
+
+start() {
+    echo "Starting $prog: " 
+    runall start
+    RETVAL=$?
+    echo
+    return $RETVAL
+}
+
+stop() {
+    runall stop
+    RETVAL=$?
+    return $RETVAL
+}
+
+case "$1" in
+	start)
+	    start
+	    ;;
+	
+	stop)
+	    stop
+	    ;;
+	
+	restart)
+	    stop; sleep 2; start
+	    ;;
+	
+	*)
+	    echo $"Usage: $0 {start|stop|restart|condrestart}"
+	    exit 1
+
+esac
+
+exit 0

Propchange: spamassassin/branches/3.1/build/automc/etc-init.d-buildbot
------------------------------------------------------------------------------
    svn:executable = *

Added: spamassassin/branches/3.1/build/automc/etc-init.d-freqsd
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/build/automc/etc-init.d-freqsd?rev=432683&view=auto
==============================================================================
--- spamassassin/branches/3.1/build/automc/etc-init.d-freqsd (added)
+++ spamassassin/branches/3.1/build/automc/etc-init.d-freqsd Fri Aug 18 12:05:06 2006
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+. /etc/profile
+
+PATH=$PATH:/sbin:/usr/sbin:/usr/local/bin:/usr/sfw/bin:/opt/sfw/bin:/opt/SUNWspro/bin:/opt/subversion-1.1.4/bin:/usr/apache2/bin:/local/bin
+export PATH
+
+prog=freqsd
+
+start() {
+    echo "Starting $prog: " 
+
+    cd /home/automc/svn/spamassassin
+    rm /home/automc/freqsd/log
+
+    HOME=/home/automc
+    export HOME
+
+    su automc -c "./build/automc/freqsd -pidfile /home/automc/freqsd/pid" \
+	> /home/automc/freqsd/log 2>&1 \
+	< /dev/null &
+
+    echo
+    return 0
+}
+
+stop() {
+    cd /home/automc/svn/spamassassin
+    su automc -c "./build/automc/freqsd -pidfile /home/automc/freqsd/pid -kill"
+
+    sleep 1
+    pkill -15 -f automc/freqsd            # just in case
+    sleep 1
+    pkill -15 -u automc
+}
+
+case "$1" in
+	start)
+	    start
+	    ;;
+	
+	stop)
+	    stop
+	    ;;
+	
+	restart)
+	    stop; sleep 2; start
+	    ;;
+	
+	*)
+	    echo $"Usage: $0 {start|stop|restart|condrestart}"
+	    exit 1
+
+esac
+
+exit 0

Propchange: spamassassin/branches/3.1/build/automc/etc-init.d-freqsd
------------------------------------------------------------------------------
    svn:executable = *

Added: spamassassin/branches/3.1/build/automc/populate_cor_nightly
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/build/automc/populate_cor_nightly?rev=432683&view=auto
==============================================================================
--- spamassassin/branches/3.1/build/automc/populate_cor_nightly (added)
+++ spamassassin/branches/3.1/build/automc/populate_cor_nightly Fri Aug 18 12:05:06 2006
@@ -0,0 +1,33 @@
+#!/usr/bin/perl
+#
+# create "targets" files for the mass-check corpora on the zone
+# TODO: mass-check should be smart enough to work this out itself
+
+use strict;
+use warnings;
+
+my $SADIR="/home/automc/svn/spamassassin";
+
+# both of these must be writable
+my $TGTS_DIR = "/home/bbmass/mc-nightly";
+my $RAWCOR_DIR = "/home/bbmass/rawcor";
+
+if (!-d $TGTS_DIR) {
+  mkdir ($TGTS_DIR) or die "cannot create $TGTS_DIR";
+}
+
+chdir $RAWCOR_DIR or die "cd $RAWCOR_DIR failed";
+foreach my $submitter (<*>) {
+  next unless (-d $submitter);
+
+  $submitter =~ s,[^A-Za-z0-9],,gs;
+
+  open (OUT, ">$TGTS_DIR/targets.$submitter")
+    or warn "cannot write to $TGTS_DIR/targets.$submitter";
+
+  print OUT "ham:detect:$RAWCOR_DIR/$submitter/ham/*\n",
+            "spam:detect:$RAWCOR_DIR/$submitter/spam/*\n";
+
+  close OUT or die "write failed";
+}
+

Propchange: spamassassin/branches/3.1/build/automc/populate_cor_nightly
------------------------------------------------------------------------------
    svn:executable = *

Added: spamassassin/branches/3.1/build/automc/run_nightly
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/build/automc/run_nightly?rev=432683&view=auto
==============================================================================
--- spamassassin/branches/3.1/build/automc/run_nightly (added)
+++ spamassassin/branches/3.1/build/automc/run_nightly Fri Aug 18 12:05:06 2006
@@ -0,0 +1,55 @@
+#!/bin/sh
+#
+# driver for nightly mass-checks on the zone. run from cron as:
+# 10 9,10 * * * /home/automc/svn/spamassassin/build/automc/run_nightly > /var/www/buildbot.spamassassin.org/bbmass/nightly_masschecks.txt 2>&1
+#
+# details: http://wiki.apache.org/spamassassin/NightlyMassCheck
+
+# add usernames who you want to do nightlies for here:
+nightly_users="
+  zmi
+  doc
+  jm
+"
+
+# and where the $HOMEs are:
+nightly_trees="/home/bbmass/mc-nightly"
+
+
+# ensure we're running after 0900 UTC; complex due to daylight savings.
+# If we're running in the 08xx UTC hour-space, sleep for 1 hour; when
+# we wake, we will be in 09xx.
+(
+  TIME="%e,%U,%S"; TZ=UTC; export TIME TZ
+  if date | egrep '^... ... .. 08:'; then
+    date; echo "sleeping for 1 hour to compensate for DST"
+    sleep 3600 ; date
+  fi
+)
+
+# create the targets files:
+/home/automc/svn/spamassassin/build/automc/populate_cor_nightly
+
+echo "run_nightly starting at" ; date
+
+# and run each mass-check using "corpus-nightly"
+for user in $nightly_users ; do
+  dir="$nightly_trees/$user"
+
+  echo "Starting corpus-nightly for $user ($dir)"
+
+  (
+    HOME=$dir
+
+    # oh Solaris, you really suck
+    PERL=/local/perl586/bin/perl
+    PATH=$PATH:/usr/sfw/bin:/opt/sfw/bin:/opt/SUNWspro/bin:/usr/X/bin:/usr/ucb:/usr/sbin:/usr/ccs/bin:/opt/subversion-1.1.4/bin:/usr/apache2/bin:/local/bin
+
+    export HOME PERL PATH
+
+    # use bash to work around Solaris breakage
+    nice bash $dir/svn/masses/rule-qa/corpus-nightly
+  )
+
+done
+

Propchange: spamassassin/branches/3.1/build/automc/run_nightly
------------------------------------------------------------------------------
    svn:executable = *

Added: spamassassin/branches/3.1/build/automc/run_preflight
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/build/automc/run_preflight?rev=432683&view=auto
==============================================================================
--- spamassassin/branches/3.1/build/automc/run_preflight (added)
+++ spamassassin/branches/3.1/build/automc/run_preflight Fri Aug 18 12:05:06 2006
@@ -0,0 +1,128 @@
+#!/usr/bin/perl -w
+
+use strict;
+sub run;
+
+my $BYMAILCF = '/home/bbmass/bymail/latest.cf';
+
+
+my $perl = $^X;
+if (!$perl) {
+  die "no perl path found in ARGV!";
+}
+
+my $slavename;
+
+$|=1;
+my $pwd = `pwd`;
+$pwd =~ /slaves\/([-_A-Za-z0-9]+)\//; if ($1) {
+  $slavename = $1;
+} else {
+  die "cannot work out slavename!  $pwd";
+}
+
+my %mass_check_args = (
+
+  'mc-fast' =>      '--tail=1000',
+  'mc-med' =>       '--tail=6000  --head=5000',
+  'mc-slow' =>      '--tail=16000 --head=10000',
+  'mc-slower' =>    '--tail=36000 --head=20000',
+
+);
+
+# super-nice please!
+#
+system ("renice +19 $$");
+
+# cd to masses
+#
+chdir "masses" or die;
+
+unlink ("ham.log", "spam.log");
+
+# just the sandbox rules, sandbox plugins, the timing plugin,
+# and the default system-wide plugins (so DNS evals can work)
+#
+system ("rm -rf tstrules");
+run "mkdir tstrules";
+run "cp ../rules/*.pre tstrules";
+run "cp ../rules/*.pm tstrules";
+run "cp plugins/*.* tstrules";
+
+# don't just copy; instead, transcribe while dropping score and describe lines
+# (to avoid '[26260] warn: config: warning: score set for non-existent rule
+# HTML_SHORT_LENGTH').
+## run "cp ../rules/70_sandbox.cf tstrules";
+open IN, "<../rules/70_sandbox.cf" or die "cannot read ../rules/sandbox.cf";
+open OUT, ">tstrules/70_sandbox.cf" or die "cannot write tstrules/70_sandbox.cf";
+
+while (<IN>) {
+  s/^\s+//;
+  next if /^(?:
+    score|
+    describe|
+    lang
+    )/x;
+
+  print OUT;
+}
+
+close IN or die "close failed!";
+close OUT or die "close failed!";
+
+# well, ok just those, and anything that's been mailed-in
+# 
+if (-f 'mailed.cf') {
+  run "cp mailed.cf tstrules/70_mailed.cf";
+}
+
+run "ls -l tstrules";
+
+# lint those rule files, and abort if they fail; this is a sanity check
+# now that we have mailed-in rules used in this mass-check
+#
+if (run ("../spamassassin --lint -x -C tstrules", 1) != 0) {
+  warn "*** lint failed; exiting without mass-check! ***\n\n";
+  warn "Deleting 'mailed.cf', if it exists.\n"; unlink $BYMAILCF;
+  warn "Rule files:\n"; system ("cat tstrules/*.cf");
+  die "lint failure";
+} else {
+  print "lint passed.\n";
+}
+
+# notes on this mass-check command:
+#
+# this is run in a chroot jail, just in case there's hostile rule code in
+# there. 
+# de-encapsulate 'report_safe' messages from petuniapress.com.
+# produce lots of noisy output to stop the buildbot from timing out on
+# mass-checks of large corpora.
+# store AICache data in /tmpfs/aicache.
+
+run "/local/bbmasstools/masschroot $perl ".
+    "mass-check -c=tstrules --cache -j=1 ".
+    "--noisy --deencap='petuniapress.com' ".
+    "--cachedir=/tmpfs/aicache ".
+    $mass_check_args{$slavename}." ".
+    "ham:detect:/home/bbmass/rawcor/*/ham/* ".
+    "spam:detect:/home/bbmass/rawcor/*/spam/*";
+
+    # TODO: add --after="6 months ago"?
+
+exit;
+
+# ---------------------------------------------------------------------------
+
+sub run {
+  my ($cmd, $ignoreexit) = @_;
+
+  print "[$cmd]\n";
+  system ($cmd);
+
+  if (!$ignoreexit) {
+    die "command '$cmd' failed with status $?" if (($? >> 8) != 0);
+  } else {
+    return ($? >> 8);
+  }
+}
+

Propchange: spamassassin/branches/3.1/build/automc/run_preflight
------------------------------------------------------------------------------
    svn:executable = *

Modified: spamassassin/branches/3.1/masses/rule-qa/README.nightly
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/masses/rule-qa/README.nightly?rev=432683&r1=432682&r2=432683&view=diff
==============================================================================
--- spamassassin/branches/3.1/masses/rule-qa/README.nightly (original)
+++ spamassassin/branches/3.1/masses/rule-qa/README.nightly Fri Aug 18 12:05:06 2006
@@ -22,10 +22,10 @@
    local time, this is US/Pacific, adjust appropriately for your
    timezone)
 
-   The "tagtime" and "corpus-nightly" cron jobs should be run twice a
-   day to handle daylight savings since cron does not.  They exit if
-   it's 0800-0859 or 1000-1059 UTC (which means you can "corpus-nightly"
-   any other time of day if you want).
+   The "tagtime" and "corpus-nightly" cron jobs will automatically handle
+   daylight savings, since cron does not.  They do this by sleeping for 1 hour
+   if they detect that they are run in the 0800-0859 hour. (This means you can
+    "corpus-nightly" any other time of day, if you want).
 
    The "corpus-hourly" script only needs to be run if you are producing
    optional mass-check summary reports.
@@ -34,8 +34,8 @@
 
 ------- start of cut text --------------
 PATH=/home/corpus/scripts:/usr/local/bin:/usr/local/sbin:/bin:/sbin:/usr/bin:/usr/sbin
-0 1,2 * * * corpus-tagtime
-10 1,2 * * * corpus-nightly >/home/corpus/log/nightly 2>&1
+0 1 * * * corpus-tagtime
+10 1 * * * corpus-nightly >/home/corpus/log/nightly 2>&1
 30 * * * * corpus-hourly >/home/corpus/log/hourly 2>&1
 5 1 * * * pyzor discover >/dev/null 2>/dev/null
 ------- end ----------------------------

Modified: spamassassin/branches/3.1/masses/rule-qa/automc/RUNME.after
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/masses/rule-qa/automc/RUNME.after?rev=432683&r1=432682&r2=432683&view=diff
==============================================================================
--- spamassassin/branches/3.1/masses/rule-qa/automc/RUNME.after (original)
+++ spamassassin/branches/3.1/masses/rule-qa/automc/RUNME.after Fri Aug 18 12:05:06 2006
@@ -1,24 +1,25 @@
-#!/bin/sh
+#!/usr/bin/bash
 
+. /etc/profile
 cd /home/automc/svn/spamassassin/masses/rule-qa/automc ; . config
 exec >> LOG.after 2>&1
 set -x
 
 echo "RUNME.after starting"; date
 
-PATH=$PATH:/usr/local/bin
+PATH=/local/perl586/bin:$PATH:/usr/local/bin
+date=date ; [ -x /opt/sfw/bin/gdate ] && date=gdate
 
-$SADIR/masses/rule-qa/corpus-hourly
+# rebuild the XML indexes of SVN commits, since it appears they
+# can get out-of-date due to some oddness of "svn log"
+$SADIR/masses/rule-qa/automc/gen_info_xml -f
+
+# hit-frequencies now requires up-to-date rules to generate valid reports!
+# so we need to do an svn update, and a make
+# TODO: it'd be nice if hit-frequencies didn't require this.
+( cd $SADIR ; svn update ; perl Makefile.PL ; make ) < /dev/null
 
-today=`date +"%Y%m%d"`
-mkdir -p $html/$today
-cp -p $html/*.* $html/$today
-date
-
-# only post the comments once, on the first time the results are read
-if [ ! -f $MCTMP/comments-posted ] ; then
-  ./post-comments
-  touch $MCTMP/comments-posted
-fi
+# then generate the various forms of 'freqs' files...
+$SADIR/masses/rule-qa/corpus-hourly
 
 echo "RUNME.after finished"; date

Modified: spamassassin/branches/3.1/masses/rule-qa/automc/RUNME.before
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/masses/rule-qa/automc/RUNME.before?rev=432683&r1=432682&r2=432683&view=diff
==============================================================================
--- spamassassin/branches/3.1/masses/rule-qa/automc/RUNME.before (original)
+++ spamassassin/branches/3.1/masses/rule-qa/automc/RUNME.before Fri Aug 18 12:05:06 2006
@@ -1,10 +1,14 @@
-#!/bin/sh
+#!/usr/bin/bash
 
+. /etc/profile
 cd /home/automc/svn/spamassassin/masses/rule-qa/automc ; . config
 
+date=date ; [ -x /opt/sfw/bin/gdate ] && date=gdate
+egrep=egrep ; [ -x /usr/sfw/bin/gegrep ] && egrep=gegrep
+
 # ensure we're running on or after 0830 UTC, because cron can't; it's
 # in a different timezone, and we have daylight savings to contend with
-if TZ=UTC date | egrep -q '^... ... .. (03|04|05|06|07|09|10|11|12|13):'; then
+if TZ=UTC $date | $egrep -q '^... ... .. (03|04|05|06|07|09|10|11|12|13):'; then
   exit
 fi
 
@@ -14,23 +18,6 @@
 
 PATH=$PATH:/usr/local/bin
 rm -rf $MCTMP; mkdir -p $MCTMP
-
-./scrape-bugzilla
-cp 70_scraped.cf $SADIR/rules
-(
-  cd $SADIR/rules
-  svn commit \
-	--username $SVN_USERNAME \
-	--file $MCTMP/commit.msg 70_scraped.cf
-)
-
-# update the "tagtime" file; of course, we're not tagging, but it helps for
-# the corpus scripts
-touch $tagtime
-
-# and remove the flag file that indicates that the comment-posting step
-# should be inhibited later
-rm -f $MCTMP/comments-posted
 
 # and discard the old logfile
 mv LOG.after LOG.after.old

Modified: spamassassin/branches/3.1/masses/rule-qa/automc/config.example
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/masses/rule-qa/automc/config.example?rev=432683&r1=432682&r2=432683&view=diff
==============================================================================
--- spamassassin/branches/3.1/masses/rule-qa/automc/config.example (original)
+++ spamassassin/branches/3.1/masses/rule-qa/automc/config.example Fri Aug 18 12:05:06 2006
@@ -9,17 +9,9 @@
 # by the automc user
 SADIR=/home/automc/svn/spamassassin
 
-# username and password for bugzilla account to comment with
-BZ_USERNAME=automc@example.com
-BZ_PASSWORD_FILE=/home/automc/.bz_passwd.cf
-
-# SVN username to commit changes to "rules/70_scraped.cf" with
-SVN_USERNAME=automc
-
 # URL of the "ruleqa.cgi" script as installed
 RULEQA_URL=http://buildbot.spamassassin.org/ruleqa
 
 # these must match what's in ~/.corpus, cf. "masses/rule-qa/README.nightly"
 html=/home/automc/corpus/html
-tagtime=/home/automc/corpus/tagtime
 

Added: spamassassin/branches/3.1/masses/rule-qa/automc/gen_info_xml
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/masses/rule-qa/automc/gen_info_xml?rev=432683&view=auto
==============================================================================
--- spamassassin/branches/3.1/masses/rule-qa/automc/gen_info_xml (added)
+++ spamassassin/branches/3.1/masses/rule-qa/automc/gen_info_xml Fri Aug 18 12:05:06 2006
@@ -0,0 +1,179 @@
+#!/local/perl586/bin/perl -w
+my $automcdir = "/home/automc/svn/spamassassin/masses/rule-qa/automc";
+
+# need this to ensure that 'svn log' will include ALL changes
+my $svn_checkins_root = "http://svn.apache.org/repos/asf/spamassassin/";
+
+my $full_rebuild = 0;
+if ($ARGV[0] && $ARGV[0] =~ /^-f/) {
+  $full_rebuild = 1;
+}
+
+use XML::Simple;
+
+use strict;
+use bytes;
+use POSIX qw(strftime);
+
+my $myperl = $^X;
+
+open (CF, "<$automcdir/config");
+my %conf; while(<CF>) { /^(\S+)=(\S+)/ and $conf{$1} = $2; }
+close CF;
+
+die "no conf{html}: $conf{html}" unless (-d $conf{html});
+
+my $svn_log;
+
+# all known date/revision combos.  warning: could get slow in future
+my @daterevs = get_all_daterevs();
+
+foreach my $dr (@daterevs) {
+  my $drdir = get_datadir_for_daterev($dr);
+
+  # this one is always rebuilt.
+  # print "$drdir/fastinfo.xml: creating...\n";
+  my $fastinfo = get_fastinfo($dr, $drdir);
+  if (!defined $fastinfo) { next; }
+
+  open (OUT, ">$drdir/fastinfo.xml") or die "cannot write $drdir/fastinfo.xml";
+  print OUT $fastinfo;
+  close OUT or die "failed to write to $drdir/fastinfo.xml";
+  chmod 0666, "$drdir/fastinfo.xml"; # or warn "failed to chmod $drdir/fastinfo.xml";
+
+  # this one is only built if it doesn't already exist, because
+  # it's quite expensive to build
+  if (!$full_rebuild && -f "$drdir/info.xml" && -s _) {
+    # print "$drdir/info.xml: good\n";
+    next;
+  }
+
+  print "$drdir/info.xml: creating...\n";
+  my $info = get_info($dr, $drdir);
+  open (OUT, ">$drdir/info.xml") or die "cannot write $drdir/info.xml";
+  print OUT $info;
+  close OUT or die "failed to write to $drdir/info.xml";
+  chmod 0666, "$drdir/info.xml"; # or warn "failed to chmod $drdir/info.xml";
+}
+
+sub get_info {
+  my ($dr, $drdir) = @_;
+
+  $dr =~ /^(\d+)[-\/]r(\d+)-(\S+)$/;
+  my $date = $1;
+  my $rev = $2;
+  my $tag = $3;
+
+  my $info = {
+  };
+
+  if (!$svn_log) {
+    get_svn_log();
+  }
+
+  foreach my $logentry (@{$svn_log->{logentry}}) {
+    next if ($logentry->{revision} > $rev);
+    $info->{msg} = $logentry->{msg} || '';
+    $info->{checkin_rev} = $logentry->{revision} || '';
+    $info->{checkin_date} = $logentry->{date} || '';   # '2005-10-31T04:20:10.686485Z'
+    $info->{author} = $logentry->{author} || '';
+    last;
+  }
+
+  return XMLout($info);
+}
+
+sub get_fastinfo {
+  my ($dr, $drdir) = @_;
+
+  $dr =~ /^(\d+)[-\/]r(\d+)-(\S+)$/;
+  my $date = $1;
+  my $rev = $2;
+  my $tag = $3;
+
+  if (!defined $tag) {
+    warn "dir with no tag! ignored: $dr\n";
+    return;
+  }
+
+  my $info = {
+    date => $date,
+    rev => $rev,
+    tag => $tag,
+    includes_net => 0
+  };
+
+  if (-f "$drdir/NET.all") {
+    $info->{includes_net} = 1;
+  }
+
+  $info->{mclogmds} = '';
+  $info->{submitters} = '';
+  if (-f "$drdir/DETAILS.all") {
+    read_submitters("$drdir/DETAILS.all", $info);
+  }
+  elsif (-f "$drdir/NET.all") {
+    # this can happen when a weekly mass-check shares a rev with a previous or
+    # later night's nightly m-c; all the non-weekly data winds up under another
+    # date.
+    read_submitters("$drdir/NET.all", $info);
+  }
+
+  return XMLout($info);
+}
+
+sub read_submitters {
+  my ($fname, $info) = @_;
+
+  open (IN, "<$fname") or warn "cannot read $fname";
+  while (<IN>) {
+    # spam results used for 329933 DETAILS all: spam-mc-fast.log spam-foo.log
+    if (/^# spam results used for \S+ \S+ \S+: (.*)$/) {
+      my $sub = $1;
+      $sub =~ s/\bspam-/ /gs;
+      $sub =~ s/\.log\b/ /gs;
+      $sub =~ s/\s+/ /gs;
+      $sub =~ s/^ //gs;
+      $sub =~ s/ $//gs;
+      $info->{submitters} = $sub;
+    }
+    if (/^#\s*(<mclogmds>.*<\/mclogmds>)/) {
+      $info->{mclogmds} = XMLin($1); # incorporate raw XML
+    }
+    last if (!/^#/);        # don't want anything after the comments end
+  }
+  close IN;
+}
+
+sub get_all_daterevs {
+  return sort map {
+      s/^.*\/(\d+)\/(r\d\S+)$/$1-$2/; $_;
+    } grep { /\/(\d+\/r\d\S+)$/ && -d $_ } (<$conf{html}/2*/r*>);
+}
+
+sub get_datadir_for_daterev {
+  my $npath = shift;
+  $npath =~ s/-/\//;
+  return $conf{html}."/".$npath."/";
+}
+
+sub get_svn_log {
+  print "getting svn log...\n";
+  if (open (IN, "svn log --xml $svn_checkins_root |")) {
+    eval {
+      my $xml = join('', <IN>);
+      $svn_log = XMLin($xml);
+    };
+    if ($@) {
+      die "svn xml: $@";
+    }
+    close IN or die "svn failed: $!";
+  }
+  if (!$svn_log) {
+    die "no svn log --xml";
+  }
+  print "got ".(scalar @{$svn_log->{logentry}})." log entries\n";
+
+  # use Data::Dumper; print Dumper($svn_log); die;
+}
+

Propchange: spamassassin/branches/3.1/masses/rule-qa/automc/gen_info_xml
------------------------------------------------------------------------------
    svn:executable = *