You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/04/30 17:07:00 UTC
svn commit: r398355 - /spamassassin/trunk/masses/rule-qa/corpus-hourly
Author: jm
Date: Sun Apr 30 08:06:58 2006
New Revision: 398355
URL: http://svn.apache.org/viewcvs?rev=398355&view=rev
Log:
better way to expose metadata for log reporting; via XML in hit-freqs reports
Modified:
spamassassin/trunk/masses/rule-qa/corpus-hourly
Modified: spamassassin/trunk/masses/rule-qa/corpus-hourly
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-qa/corpus-hourly?rev=398355&r1=398354&r2=398355&view=diff
==============================================================================
--- spamassassin/trunk/masses/rule-qa/corpus-hourly (original)
+++ spamassassin/trunk/masses/rule-qa/corpus-hourly Sun Apr 30 08:06:58 2006
@@ -19,8 +19,6 @@
$opt_override ||= '';
$opt_tag ||= 'n'; # nightly is the default
-my $LOGS_REPORT_FILE = "/var/www/ruleqa.spamassassin.org/reports/logs_report.txt";
-
use File::Path;
use File::Copy;
use Time::ParseDate;
@@ -38,6 +36,9 @@
my $configuration = "$ENV{HOME}/.corpus";
my %opt;
my %revision = ();
+my %filesize = ();
+my %dateline = ();
+my %mtime = ();
my %logs_by_daterev = ();
my %is_net_daterev = ();
my %time = ();
@@ -137,8 +138,6 @@
@files = sort readdir(CORPUS);
closedir(CORPUS);
- open_logs_report();
-
@files = grep {
/^(?:spam|ham)-(?:net-)?[-\w]+\.log$/ && -f "$corpusdir/$_" && -M _ < 10
} @files;
@@ -161,14 +160,18 @@
}
elsif ($line =~ m/^# Date:\s*(\S+)/) {
# a better way to do the above. TODO: parse it instead
+ $dateline{$file} = $1;
}
elsif ($line =~ m/^# SVN revision:\s*(\S+)/) {
- my $rev = $1;
- $revision{$file} = $rev;
+ $revision{$file} = $1;
}
}
close(FILE);
+ my @s = stat("$corpusdir/$file");
+ $filesize{$file} = $s[7];
+ $mtime{$file} = $s[9];
+
if (!defined $time{$file}) {
warn "$corpusdir/$file: no time found, ignored\n"; next;
}
@@ -191,59 +194,7 @@
else {
print "$corpusdir/$file: rev=$daterev time=$time{$file} (set 0)\n";
}
-
- add_to_logs_report({
- file => "$corpusdir/$file",
- daterev => $daterev,
- timet => $time{$file},
- isnet => $is_net_daterev{$daterev},
- rev => $revision{$file},
- headers => $headers
- });
}
-
- complete_logs_report();
-}
-
-sub open_logs_report {
- open (LOGREP, ">>$LOGS_REPORT_FILE")
- or die "cannot write to $LOGS_REPORT_FILE";
-
- my $now = localtime time;
-
- print LOGREP qq{
-
----------------------------------------------------------------------------
-Reading logs at $now:
-
- };
-}
-
-sub add_to_logs_report {
- my ($o) = @_;
-
- $o->{file} =~ /(?:ham|spam)(?:-net)?-(\S+)\.log/
- or return;
-
- my $who = $1;
- my $ls = `ls -lL $o->{file}`;
-
- my $hdrs = $o->{headers};
- $hdrs =~ s/^\# / /gm;
- $hdrs =~ s/^(.{0,70}).*?$/$1 [...]/gm;
-
- print LOGREP qq{
-Contributor: $who:
- $ls
- DateRev: $o->{daterev}
-$hdrs
-
- };
-
-}
-
-sub complete_logs_report {
- close LOGREP;
}
sub sort_all {
@@ -390,11 +341,8 @@
open(OUT, "> $tmpfname") or warn "cannot write to $tmpfname";
print OUT "# ham results used for $daterev $class $age: " . join(" ", @ham) . "\n";
print OUT "# spam results used for $daterev $class $age: " . join(" ", @spam) . "\n";
- for (@ham) {
- print OUT "# $_ was at r$revision{$_}\n";
- }
- for (@spam) {
- print OUT "# $_ was at r$revision{$_}\n";
+ for (@ham, @spam) {
+ print OUT "# ".log_metadata_xml($daterev, $_);
}
push (@tmps, $tmpfname);
@@ -522,5 +470,25 @@
$output_revpath =~ s/\//-/; # looks nicer
return $dir;
+}
+
+sub log_metadata_xml {
+ my $daterev = shift;
+ my $f = shift;
+
+ # this is extracted into the info.xml file later by the gen_info_xml script
+
+ my $str = qq{
+ <mclogmd>
+ <daterev>$daterev</daterev>
+ <rev>$revision{$f}</rev>
+ <fsize>$filesize{$f}</fsize>
+ <mcstartdate>$dateline{$f}</mcstartdate>
+ <mtime>$mtime{$f}</mtime>
+ </mclogmd>
+ };
+
+ $str =~ s/\s+/ /gs; # on a single line please
+ return $str;
}