You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/10/22 02:19:08 UTC
svn commit: r327610 - in /spamassassin/trunk: build/automc/buildbot_ready
masses/rule-hits-over-time masses/rule-qa/corpus-hourly
masses/rule-qa/corpus.example
Author: jm
Date: Fri Oct 21 17:19:05 2005
New Revision: 327610
URL: http://svn.apache.org/viewcvs?rev=327610&view=rev
Log:
hook up the two parts of the preflight system
Added:
spamassassin/trunk/masses/rule-hits-over-time (with props)
Modified:
spamassassin/trunk/build/automc/buildbot_ready
spamassassin/trunk/masses/rule-qa/corpus-hourly
spamassassin/trunk/masses/rule-qa/corpus.example
Modified: spamassassin/trunk/build/automc/buildbot_ready
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/build/automc/buildbot_ready?rev=327610&r1=327609&r2=327610&view=diff
==============================================================================
--- spamassassin/trunk/build/automc/buildbot_ready (original)
+++ spamassassin/trunk/build/automc/buildbot_ready Fri Oct 21 17:19:05 2005
@@ -35,8 +35,58 @@
# ---------------------------------------------------------------------------
sub do_stop {
+ my $rev = get_current_svn_revision();
+ my $slave = get_current_slave_name();
+
chdir("masses") or die;
+
+ print "FAST FREQS REPORT:\n\n";
system ("$perl hit-frequencies -c tstrules -x -p -s 0");
+
+ print "\n\nBUILDING SLOW FREQS REPORT:\n\n";
+
+ my $logdir = "/home/bbmass/tmp/logs-r$rev";
+ if (!-d $logdir) {
+ run ("mkdir $logdir");
+ }
+ run ("mv ham.log $logdir/ham-$slave.log");
+ run ("mv spam.log $logdir/spam-$slave.log");
+ run ("./rule-qa/corpus-hourly --dir $logdir");
}
+
+
+# ---------------------------------------------------------------------------
+
+sub get_current_svn_revision {
+ open (SVNINFO, "(svn info --non-interactive || svn info) < /dev/null 2>&1 |");
+
+ my $revision;
+ while (<SVNINFO>) {
+ # Revision: 320871
+ next unless /^Revision: (\d+)/;
+ $revision = $1;
+ last;
+ }
+ close SVNINFO;
+ return $revision if $revision;
+}
+
+sub run {
+ my ($cmd, $ignoreexit) = @_;
+
+ print "[$cmd]\n";
+ system ($cmd);
+
+ if (!$ignoreexit) {
+ die "command '$cmd' failed with status $?" if (($? >> 8) != 0);
+ }
+}
+
+sub get_current_slave_name {
+ my $pwd = `pwd`;
+ $pwd =~ /\/slaves\/([^\/]+)\// and return $1;
+ warn "cannot work out slave name from $pwd";
+ return "unknown";
+}
Added: spamassassin/trunk/masses/rule-hits-over-time
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-hits-over-time?rev=327610&view=auto
==============================================================================
--- spamassassin/trunk/masses/rule-hits-over-time (added)
+++ spamassassin/trunk/masses/rule-hits-over-time Fri Oct 21 17:19:05 2005
@@ -0,0 +1,43 @@
+#!/usr/bin/perl
+
+my $lastbucket = 0;
+my $nextbucket = 0;
+my $PERIOD = (24 * 60 * 60 * 7);
+
+while (<>) {
+ my $found = 0;
+ /SARE_SUBJ/ and $found = 1;
+ s/^.*\btime=//; s/,.*$//;
+
+ my $t = $_;
+ if ($lastbucket == 0) {
+ $lastbucket = $t;
+ $nextbucket = $t + $PERIOD; # plus 2 hrs
+ }
+ if ($t < $nextbucket) {
+ if ($found) {
+ $seen_y++;
+ } else {
+ $seen_n++;
+ }
+ }
+ else {
+ while ($t >= $nextbucket) {
+ completeline();
+ $lastbucket = $nextbucket;
+ $nextbucket += $PERIOD;
+ }
+ }
+}
+
+sub completeline {
+ print "$lastbucket $seen_y $seen_n\n";
+ $seen_y = 0;
+ $seen_n = 0;
+}
+
+print STDERR '
+
+plot "times" using 0:1, "times" using 0:2
+
+';
Propchange: spamassassin/trunk/masses/rule-hits-over-time
------------------------------------------------------------------------------
svn:executable = *
Modified: spamassassin/trunk/masses/rule-qa/corpus-hourly
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-qa/corpus-hourly?rev=327610&r1=327609&r2=327610&view=diff
==============================================================================
--- spamassassin/trunk/masses/rule-qa/corpus-hourly (original)
+++ spamassassin/trunk/masses/rule-qa/corpus-hourly Fri Oct 21 17:19:05 2005
@@ -3,8 +3,19 @@
# settings are located in $HOME/.corpus
use strict;
+use Getopt::Long;
+
+use vars qw(
+ $corpusdir
+);
+GetOptions(
+ "dir=s" => \$corpusdir
+);
+
+
use File::Path;
use Time::ParseDate;
+use Cwd;
use POSIX qw(nice strftime);
use constant WEEK => 60*60*24;
@@ -12,7 +23,6 @@
my $configuration = "$ENV{HOME}/.corpus";
my %opt;
-# my $revision = "unknown";
my %revision = ();
my %logs_by_rev = ();
my %is_net_revision = ();
@@ -22,33 +32,31 @@
my $time_start = time;
&configure;
-# &version;
&init;
-&update;
+
+if (!$corpusdir) {
+ print "reading logs from '$corpusdir'\n";
+ $corpusdir = $opt{corpus};
+}
+else {
+ &update_rsync;
+}
+
&locate;
-# &rename;
¤t;
&clean_up;
-# sub version {
-# my $line;
-# if (open(TESTING, "$opt{tree}/rules/70_testing.cf")) {
-# chomp($line = <TESTING>);
-# if ($line =~ m/^#.*Rev(?:ision)?:\s*(\S+).*/) {
-# $revision = $1;
-# }
-# close(TESTING);
-# }
-# }
-
sub configure {
# does rough equivalent of source
open(C, $configuration) || die "open failed: $configuration: $!\n";
+ my $pwd = getcwd;
while(<C>) {
chomp;
s/#.*//;
if (/^\s*(.*?)\s*=\s*(.*?)\s*$/) {
- $opt{$1} = $2;
+ my ($key, $val) = ($1, $2);
+ $val =~ s/\$PWD/$pwd/gs;
+ $opt{$key} = $val;
}
}
close(C);
@@ -67,8 +75,8 @@
$ENV{TZ} = 'UTC';
}
-sub update {
- chdir $opt{corpus};
+sub update_rsync {
+ chdir $corpusdir;
# allow non-running of rsync under some circumstances
if ($opt{rsync_command}) {
@@ -77,7 +85,9 @@
system "rsync -CPcvuzt --timeout=300 $opt{username}" . '@rsync.spamassassin.org::corpus/*.log .';
}
- if (!$opt{always_update_html}) {
+ # this block is no longer required -- we do sensible things with modtime
+ # comparisons to work it out!
+ if (0 && !$opt{always_update_html}) {
if (-f "rsync.last") {
open(FIND, "find . -type f -newer rsync.last |");
my $files = "";
@@ -96,25 +106,27 @@
}
}
}
+
open(RSYNC, "> rsync.last");
close(RSYNC);
system "chmod +r *.log";
}
sub locate {
- chdir "$opt{tree}/masses";
- opendir(CORPUS, $opt{corpus});
+ # chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
+
+ opendir(CORPUS, $corpusdir);
@files = sort readdir(CORPUS);
closedir(CORPUS);
@files = grep {
- /^(?:spam|ham)-(?:net-)?\w+\.log$/ && -f "$opt{corpus}/$_" && -M _ < 10
+ /^(?:spam|ham)-(?:net-)?\w+\.log$/ && -f "$corpusdir/$_" && -M _ < 10
} @files;
foreach my $file (@files) {
# my $time = 0;
my $tag = 0;
- open(FILE, "$opt{corpus}/$file") or warn "cannot read $opt{corpus}/$file";
+ open(FILE, "$corpusdir/$file") or warn "cannot read $corpusdir/$file";
while (my $line = <FILE>) {
last if $line !~ /^#/;
if ($line =~ /, on (... ... .. )(..)(:..:.. ... ....)/) {
@@ -123,7 +135,7 @@
my $timet = Time::ParseDate::parsedate($datepre.$hh.$datepost,
GMT => 1, PREFER_PAST => 1);
$time{$file} = $timet;
- print "$opt{corpus}/$file: time=$timet\n";
+ print "$corpusdir/$file: time=$timet\n";
# if ($hh != 8) { $time++; }
}
@@ -136,10 +148,10 @@
if ($file =~ /-net-/) {
$is_net_revision{$rev} = 1;
- print "$opt{corpus}/$file: rev=$rev (net)\n";
+ print "$corpusdir/$file: rev=$rev (net)\n";
}
else {
- print "$opt{corpus}/$file: rev=$rev (non-net)\n";
+ print "$corpusdir/$file: rev=$rev (non-net)\n";
}
}
}
@@ -150,22 +162,6 @@
}
}
-sub rename {
- use File::Copy qw(move);
-
- my $hour = (gmtime($time_start))[2];
- if ($hour == 9) {
- chdir $opt{html};
- opendir(HTML, $opt{html});
- my @html = readdir(HTML);
- closedir(HTML);
- @html = grep { -f } @html;
- for (@html) {
- move($_, "last/$_");
- }
- }
-}
-
sub sort_all {
my ($a1, $a2) = ($a =~ m/(\(.*?\)|\S+)(?::(\S+))?$/);
my ($b1, $b2) = ($b =~ m/(\(.*?\)|\S+)(?::(\S+))?$/);
@@ -224,7 +220,7 @@
print STDERR "input h: " . join(' ', @ham) . "\n";
print STDERR "input s: " . join(' ', @spam) . "\n";
- chdir $opt{corpus};
+ chdir $corpusdir;
# net vs. local
if ($class eq "NET") {
@@ -355,12 +351,12 @@
return;
}
- chdir "$opt{tree}/masses";
+ chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
for my $user (sort keys %spam) {
next unless $ham{$user};
- system("cat $opt{corpus}/$ham{$user} >> $opt{tmp}/ham.log.$$");
- system("cat $opt{corpus}/$spam{$user} >> $opt{tmp}/spam.log.$$");
- open(IN, "./hit-frequencies -xpa $flags $opt{corpus}/$spam{$user} $opt{corpus}/$ham{$user} |");
+ system("cat $corpusdir/$ham{$user} >> $opt{tmp}/ham.log.$$");
+ system("cat $corpusdir/$spam{$user} >> $opt{tmp}/spam.log.$$");
+ open(IN, "./hit-frequencies -xpa $flags $corpusdir/$spam{$user} $corpusdir/$ham{$user} |");
while(<IN>) {
chomp;
push @output, "$_:$user\n";
@@ -380,7 +376,7 @@
for my $which (("0-1", "1-2", "2-3", "3-6")) {
my ($after, $before) = split(/-/, $which);
# get and filter logs
- chdir $opt{corpus};
+ chdir $corpusdir;
for my $type (("ham", "spam")) {
open(TMP, "> $opt{tmp}/$type.log.$$");
my @array = ($type eq "ham") ? @ham : @spam;
@@ -394,7 +390,7 @@
close (TMP);
}
# print out by age
- chdir "$opt{tree}/masses";
+ chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
open(IN, "./hit-frequencies -xpa $flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$ |");
while(<IN>) {
chomp;
@@ -409,7 +405,7 @@
system("cat " . join(" ", @ham) . " > $opt{tmp}/ham.log.$$");
system("cat " . join(" ", @spam) . " > $opt{tmp}/spam.log.$$");
- chdir "$opt{tree}/masses";
+ chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
open(IN, "./hit-frequencies -xpa $flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$ |");
while(<IN>) { print(OUT); }
close(IN);
@@ -422,11 +418,17 @@
sub create_outputdir {
my ($rev, $time) = @_;
- my $dir = $opt{html} .'/'. strftime("%Y%m%d", gmtime($time)) . "/r$rev";
+ my $revpath = strftime("%Y%m%d", gmtime($time)) . "/r$rev";
+ my $dir = $opt{html} .'/'. $revpath;
+
# print "output dir: $dir\n";
if (!-d $dir) {
- mkpath($dir) or warn "failed to mkdir $dir";
+ my $mode = oct($main->{conf}->{html_mode});
+ mkpath([$dir], 0, $mode) or warn "failed to mkdir $dir";
}
+
+ print "URL: http://buildbot.spamassassin.org/ruleqa/ruleqa?daterev=$revpath\n";
+
return $dir;
}
Modified: spamassassin/trunk/masses/rule-qa/corpus.example
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-qa/corpus.example?rev=327610&r1=327609&r2=327610&view=diff
==============================================================================
--- spamassassin/trunk/masses/rule-qa/corpus.example (original)
+++ spamassassin/trunk/masses/rule-qa/corpus.example Fri Oct 21 17:19:05 2005
@@ -4,6 +4,9 @@
# location for summary results
html=/home/html/root/users/corpus
+# file mode for directories/files under "html". keep the leading "0"!
+html_mode=0755
+
# location of tagtime file
tagtime=/home/corpus/log/tagtime