You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/10/22 02:19:08 UTC

svn commit: r327610 - in /spamassassin/trunk: build/automc/buildbot_ready masses/rule-hits-over-time masses/rule-qa/corpus-hourly masses/rule-qa/corpus.example

Author: jm
Date: Fri Oct 21 17:19:05 2005
New Revision: 327610

URL: http://svn.apache.org/viewcvs?rev=327610&view=rev
Log:
hook up the two parts of the preflight system

Added:
    spamassassin/trunk/masses/rule-hits-over-time   (with props)
Modified:
    spamassassin/trunk/build/automc/buildbot_ready
    spamassassin/trunk/masses/rule-qa/corpus-hourly
    spamassassin/trunk/masses/rule-qa/corpus.example

Modified: spamassassin/trunk/build/automc/buildbot_ready
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/build/automc/buildbot_ready?rev=327610&r1=327609&r2=327610&view=diff
==============================================================================
--- spamassassin/trunk/build/automc/buildbot_ready (original)
+++ spamassassin/trunk/build/automc/buildbot_ready Fri Oct 21 17:19:05 2005
@@ -35,8 +35,58 @@
 # ---------------------------------------------------------------------------
 
 sub do_stop {
+  my $rev = get_current_svn_revision();
+  my $slave = get_current_slave_name();
+
   chdir("masses") or die;
+
+  print "FAST FREQS REPORT:\n\n";
   system ("$perl hit-frequencies -c tstrules -x -p -s 0");
+
+  print "\n\nBUILDING SLOW FREQS REPORT:\n\n";
+
+  my $logdir = "/home/bbmass/tmp/logs-r$rev";
+  if (!-d $logdir) {
+    run ("mkdir $logdir");
+  }
+  run ("mv ham.log $logdir/ham-$slave.log");
+  run ("mv spam.log $logdir/spam-$slave.log");
+  run ("./rule-qa/corpus-hourly --dir $logdir");
 }
 
+
+
+# ---------------------------------------------------------------------------
+
+sub get_current_svn_revision {
+  open (SVNINFO, "(svn info --non-interactive || svn info) < /dev/null 2>&1 |");
+
+  my $revision;
+  while (<SVNINFO>) {
+    # Revision: 320871
+    next unless /^Revision: (\d+)/;
+    $revision = $1;
+    last;
+  }
+  close SVNINFO;
+  return $revision if $revision;
+}
+
+sub run {
+  my ($cmd, $ignoreexit) = @_;
+
+  print "[$cmd]\n";
+  system ($cmd);
+
+  if (!$ignoreexit) {
+    die "command '$cmd' failed with status $?" if (($? >> 8) != 0);
+  }
+}
+
+sub get_current_slave_name {
+  my $pwd = `pwd`;
+  $pwd =~ /\/slaves\/([^\/]+)\// and return $1;
+  warn "cannot work out slave name from $pwd";
+  return "unknown";
+}
 

Added: spamassassin/trunk/masses/rule-hits-over-time
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-hits-over-time?rev=327610&view=auto
==============================================================================
--- spamassassin/trunk/masses/rule-hits-over-time (added)
+++ spamassassin/trunk/masses/rule-hits-over-time Fri Oct 21 17:19:05 2005
@@ -0,0 +1,43 @@
+#!/usr/bin/perl
+
+my $lastbucket = 0;
+my $nextbucket = 0;
+my $PERIOD = (24 * 60 * 60 * 7);
+
+while (<>) {
+  my $found = 0;
+  /SARE_SUBJ/ and $found = 1;
+  s/^.*\btime=//; s/,.*$//;
+  
+  my $t = $_;
+  if ($lastbucket == 0) {
+    $lastbucket = $t;
+    $nextbucket = $t + $PERIOD;    # plus 2 hrs
+  }
+  if ($t < $nextbucket) {
+    if ($found) {
+      $seen_y++;
+    } else {
+      $seen_n++;
+    }
+  }
+  else {
+    while ($t >= $nextbucket) {
+      completeline();
+      $lastbucket = $nextbucket;
+      $nextbucket += $PERIOD;
+    }
+  }
+}
+
+sub completeline {
+  print "$lastbucket $seen_y $seen_n\n";
+  $seen_y = 0;
+  $seen_n = 0;
+}
+
+print STDERR '
+
+plot "times" using 0:1, "times" using 0:2
+
+';

Propchange: spamassassin/trunk/masses/rule-hits-over-time
------------------------------------------------------------------------------
    svn:executable = *

Modified: spamassassin/trunk/masses/rule-qa/corpus-hourly
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-qa/corpus-hourly?rev=327610&r1=327609&r2=327610&view=diff
==============================================================================
--- spamassassin/trunk/masses/rule-qa/corpus-hourly (original)
+++ spamassassin/trunk/masses/rule-qa/corpus-hourly Fri Oct 21 17:19:05 2005
@@ -3,8 +3,19 @@
 # settings are located in $HOME/.corpus
 
 use strict;
+use Getopt::Long;
+
+use vars qw(
+    $corpusdir
+);
+GetOptions(
+    "dir=s" => \$corpusdir
+);
+
+
 use File::Path;
 use Time::ParseDate;
+use Cwd;
 use POSIX qw(nice strftime);
 
 use constant WEEK => 60*60*24;
@@ -12,7 +23,6 @@
 
 my $configuration = "$ENV{HOME}/.corpus";
 my %opt;
-# my $revision = "unknown";
 my %revision = ();
 my %logs_by_rev = ();
 my %is_net_revision = ();
@@ -22,33 +32,31 @@
 my $time_start = time;
 
 &configure;
-# &version;
 &init;
-&update;
+
+if (!$corpusdir) {
+  print "reading logs from '$corpusdir'\n";
+  $corpusdir = $opt{corpus};
+}
+else {
+  &update_rsync;
+}
+
 &locate;
-# &rename;
 &current;
 &clean_up;
 
-# sub version {
-# my $line;
-# if (open(TESTING, "$opt{tree}/rules/70_testing.cf")) {
-# chomp($line = <TESTING>);
-# if ($line =~ m/^#.*Rev(?:ision)?:\s*(\S+).*/) {
-# $revision = $1;
-# }
-# close(TESTING);
-# }
-# }
-
 sub configure {
   # does rough equivalent of source
   open(C, $configuration) || die "open failed: $configuration: $!\n";
+  my $pwd = getcwd;
   while(<C>) {
 	chomp;
 	s/#.*//;
 	if (/^\s*(.*?)\s*=\s*(.*?)\s*$/) {
-	  $opt{$1} = $2;
+          my ($key, $val) = ($1, $2);
+          $val =~ s/\$PWD/$pwd/gs;
+	  $opt{$key} = $val;
 	}
   }
   close(C);
@@ -67,8 +75,8 @@
   $ENV{TZ} = 'UTC';
 }
 
-sub update {
-  chdir $opt{corpus};
+sub update_rsync {
+  chdir $corpusdir;
 
   # allow non-running of rsync under some circumstances
   if ($opt{rsync_command}) {
@@ -77,7 +85,9 @@
     system "rsync -CPcvuzt --timeout=300 $opt{username}" . '@rsync.spamassassin.org::corpus/*.log .';
   }
 
-  if (!$opt{always_update_html}) {
+  # this block is no longer required -- we do sensible things with modtime
+  # comparisons to work it out!
+  if (0 && !$opt{always_update_html}) {
     if (-f "rsync.last") {
       open(FIND, "find . -type f -newer rsync.last |");
       my $files = "";
@@ -96,25 +106,27 @@
       }
     }
   }
+
   open(RSYNC, "> rsync.last");
   close(RSYNC);
   system "chmod +r *.log";
 }
 
 sub locate {
-  chdir "$opt{tree}/masses";
-  opendir(CORPUS, $opt{corpus});
+  # chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
+
+  opendir(CORPUS, $corpusdir);
   @files = sort readdir(CORPUS);
   closedir(CORPUS);
 
   @files = grep {
-    /^(?:spam|ham)-(?:net-)?\w+\.log$/ && -f "$opt{corpus}/$_" && -M _ < 10 
+    /^(?:spam|ham)-(?:net-)?\w+\.log$/ && -f "$corpusdir/$_" && -M _ < 10 
   } @files;
 
   foreach my $file (@files) {
     # my $time = 0;
     my $tag = 0;
-    open(FILE, "$opt{corpus}/$file") or warn "cannot read $opt{corpus}/$file";
+    open(FILE, "$corpusdir/$file") or warn "cannot read $corpusdir/$file";
     while (my $line = <FILE>) {
       last if $line !~ /^#/;
       if ($line =~ /, on (... ... .. )(..)(:..:.. ... ....)/) {
@@ -123,7 +135,7 @@
         my $timet = Time::ParseDate::parsedate($datepre.$hh.$datepost,
                     GMT => 1, PREFER_PAST => 1);
         $time{$file} = $timet;
-        print "$opt{corpus}/$file: time=$timet\n";
+        print "$corpusdir/$file: time=$timet\n";
 
         # if ($hh != 8) { $time++; }
       }
@@ -136,10 +148,10 @@
 
         if ($file =~ /-net-/) {
           $is_net_revision{$rev} = 1;
-          print "$opt{corpus}/$file: rev=$rev (net)\n";
+          print "$corpusdir/$file: rev=$rev (net)\n";
         }
         else {
-          print "$opt{corpus}/$file: rev=$rev (non-net)\n";
+          print "$corpusdir/$file: rev=$rev (non-net)\n";
         }
       }
     }
@@ -150,22 +162,6 @@
   }
 }
 
-sub rename {
-  use File::Copy qw(move);
-
-  my $hour = (gmtime($time_start))[2];
-  if ($hour == 9) {
-	chdir $opt{html};
-	opendir(HTML, $opt{html});
-	my @html = readdir(HTML);
-	closedir(HTML);
-	@html = grep { -f } @html;
-	for (@html) {
-	  move($_, "last/$_");
-	}
-  }
-}
-
 sub sort_all {
   my ($a1, $a2) = ($a =~ m/(\(.*?\)|\S+)(?::(\S+))?$/);
   my ($b1, $b2) = ($b =~ m/(\(.*?\)|\S+)(?::(\S+))?$/);
@@ -224,7 +220,7 @@
   print STDERR "input h: " . join(' ', @ham) . "\n";
   print STDERR "input s: " . join(' ', @spam) . "\n";
 
-  chdir $opt{corpus};
+  chdir $corpusdir;
 
   # net vs. local
   if ($class eq "NET") {
@@ -355,12 +351,12 @@
       return;
     }
 
-    chdir "$opt{tree}/masses";
+    chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
     for my $user (sort keys %spam) {
       next unless $ham{$user};
-      system("cat $opt{corpus}/$ham{$user} >> $opt{tmp}/ham.log.$$");
-      system("cat $opt{corpus}/$spam{$user} >> $opt{tmp}/spam.log.$$");
-      open(IN, "./hit-frequencies -xpa $flags $opt{corpus}/$spam{$user} $opt{corpus}/$ham{$user} |");
+      system("cat $corpusdir/$ham{$user} >> $opt{tmp}/ham.log.$$");
+      system("cat $corpusdir/$spam{$user} >> $opt{tmp}/spam.log.$$");
+      open(IN, "./hit-frequencies -xpa $flags $corpusdir/$spam{$user} $corpusdir/$ham{$user} |");
       while(<IN>) {
         chomp;
         push @output, "$_:$user\n";
@@ -380,7 +376,7 @@
     for my $which (("0-1", "1-2", "2-3", "3-6")) {
       my ($after, $before) = split(/-/, $which);
       # get and filter logs
-      chdir $opt{corpus};
+      chdir $corpusdir;
       for my $type (("ham", "spam")) {
         open(TMP, "> $opt{tmp}/$type.log.$$");
         my @array = ($type eq "ham") ? @ham : @spam;
@@ -394,7 +390,7 @@
         close (TMP);
       }
       # print out by age
-      chdir "$opt{tree}/masses";
+      chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
       open(IN, "./hit-frequencies -xpa $flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$ |");
       while(<IN>) {
         chomp;
@@ -409,7 +405,7 @@
     system("cat " . join(" ", @ham) . " > $opt{tmp}/ham.log.$$");
     system("cat " . join(" ", @spam) . " > $opt{tmp}/spam.log.$$");
 
-    chdir "$opt{tree}/masses";
+    chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
     open(IN, "./hit-frequencies -xpa $flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$ |");
     while(<IN>) { print(OUT); }
     close(IN);
@@ -422,11 +418,17 @@
 
 sub create_outputdir {
   my ($rev, $time) = @_;
-  my $dir = $opt{html} .'/'. strftime("%Y%m%d", gmtime($time)) . "/r$rev";
+  my $revpath = strftime("%Y%m%d", gmtime($time)) . "/r$rev";
+  my $dir = $opt{html} .'/'. $revpath;
+
   # print "output dir: $dir\n";
   if (!-d $dir) {
-    mkpath($dir) or warn "failed to mkdir $dir";
+    my $mode = oct($main->{conf}->{html_mode});
+    mkpath([$dir], 0, $mode) or warn "failed to mkdir $dir";
   }
+
+  print "URL: http://buildbot.spamassassin.org/ruleqa/ruleqa?daterev=$revpath\n";
+
   return $dir;
 }
 

Modified: spamassassin/trunk/masses/rule-qa/corpus.example
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-qa/corpus.example?rev=327610&r1=327609&r2=327610&view=diff
==============================================================================
--- spamassassin/trunk/masses/rule-qa/corpus.example (original)
+++ spamassassin/trunk/masses/rule-qa/corpus.example Fri Oct 21 17:19:05 2005
@@ -4,6 +4,9 @@
 # location for summary results
 html=/home/html/root/users/corpus
 
+# file mode for directories/files under "html".  keep the leading "0"!
+html_mode=0755
+
 # location of tagtime file
 tagtime=/home/corpus/log/tagtime