You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/10/25 02:51:37 UTC
svn commit: r328203 - /spamassassin/trunk/masses/rule-qa/corpus-hourly
Author: jm
Date: Mon Oct 24 17:51:35 2005
New Revision: 328203
URL: http://svn.apache.org/viewcvs?rev=328203&view=rev
Log:
add LOGS gzipping
Modified:
spamassassin/trunk/masses/rule-qa/corpus-hourly
Modified: spamassassin/trunk/masses/rule-qa/corpus-hourly
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/rule-qa/corpus-hourly?rev=328203&r1=328202&r2=328203&view=diff
==============================================================================
--- spamassassin/trunk/masses/rule-qa/corpus-hourly (original)
+++ spamassassin/trunk/masses/rule-qa/corpus-hourly Mon Oct 24 17:51:35 2005
@@ -14,6 +14,7 @@
use File::Path;
+use File::Copy;
use Time::ParseDate;
use Cwd;
use POSIX qw(nice strftime);
@@ -317,116 +318,133 @@
}
}
- my $tmpfname = "$fname.$$";
- open(OUT, "> $tmpfname") or warn "cannot write to $tmpfname";
- print OUT "# ham results used for $rev $class $age: " . join(" ", @ham) . "\n";
- print OUT "# spam results used for $rev $class $age: " . join(" ", @spam) . "\n";
- for (@ham) {
- print OUT "# $_ was at r$revision{$_}\n";
- }
- for (@spam) {
- print OUT "# $_ was at r$revision{$_}\n";
- }
-
- push (@tmps, $tmpfname);
-
my $when = scalar localtime time;
print qq{creating: $fname
-started $when...
-};
-
- my $flags = "";
- $flags = "-t net -s 1" if $class eq "NET";
- $flags = "-M HTML_MESSAGE" if $class eq "HTML";
- $flags = "-o" if $class eq "OVERLAP";
- if ($opt{rules_dir}) {
- $flags .= " -c '$opt{rules_dir}'";
+ started $when...
+ };
+ my $bytes = 0;
+
+ if ($class eq 'LOGS') {
+ foreach my $f (@ham, @spam) {
+ $f =~ s/[^-_A-Za-z0-9]+/_/gs; # sanitize!
+
+ system("gzip -c < $f > $fname-$f.gz.$$");
+ if ($? >> 8 != 0) {
+ warn "gzip -c < $f > $fname-$f.gz.$$ failed";
+ }
+
+ rename("$fname-$f.gz.$$", "$fname-$f.gz") or
+ warn "cannot rename $fname-$f.gz.$$ to $fname-$f.gz";
+ $bytes += (-s "$fname-$f");
+ }
}
+ else {
+ my $tmpfname = "$fname.$$";
- if ($age eq "all") {
- my %spam;
- my %ham;
- my @output;
-
- for my $file (@spam) {
- $spam{$1} = $file if ($file =~ m/-(\w[-\w]+)\.log$/);
+ open(OUT, "> $tmpfname") or warn "cannot write to $tmpfname";
+ print OUT "# ham results used for $rev $class $age: " . join(" ", @ham) . "\n";
+ print OUT "# spam results used for $rev $class $age: " . join(" ", @spam) . "\n";
+ for (@ham) {
+ print OUT "# $_ was at r$revision{$_}\n";
}
- for my $file (@ham) {
- $ham{$1} = $file if ($file =~ m/-(\w[-\w]+)\.log$/);
+ for (@spam) {
+ print OUT "# $_ was at r$revision{$_}\n";
}
- unlink "$opt{tmp}/ham.log.$$";
- unlink "$opt{tmp}/spam.log.$$";
- if (scalar keys %spam <= 0 || scalar keys %ham <= 0) {
- warn "no files found for $class.$age";
- return;
+ push (@tmps, $tmpfname);
+
+ my $flags = "";
+ $flags = "-t net -s 1" if $class eq "NET";
+ $flags = "-M HTML_MESSAGE" if $class eq "HTML";
+ $flags = "-o" if $class eq "OVERLAP";
+ if ($opt{rules_dir}) {
+ $flags .= " -c '$opt{rules_dir}'";
}
- chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
- for my $user (sort keys %spam) {
- next unless $ham{$user};
- system("cat $corpusdir/$ham{$user} >> $opt{tmp}/ham.log.$$");
- system("cat $corpusdir/$spam{$user} >> $opt{tmp}/spam.log.$$");
- open(IN, "./hit-frequencies -xpa $flags $corpusdir/$spam{$user} $corpusdir/$ham{$user} |");
+ if ($age eq "all") {
+ my %spam;
+ my %ham;
+ my @output;
+
+ for my $file (@spam) {
+ $spam{$1} = $file if ($file =~ m/-(\w[-\w]+)\.log$/);
+ }
+ for my $file (@ham) {
+ $ham{$1} = $file if ($file =~ m/-(\w[-\w]+)\.log$/);
+ }
+ unlink "$opt{tmp}/ham.log.$$";
+ unlink "$opt{tmp}/spam.log.$$";
+
+ if (scalar keys %spam <= 0 || scalar keys %ham <= 0) {
+ warn "no files found for $class.$age";
+ return;
+ }
+
+ chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
+ for my $user (sort keys %spam) {
+ next unless $ham{$user};
+ system("cat $corpusdir/$ham{$user} >> $opt{tmp}/ham.log.$$");
+ system("cat $corpusdir/$spam{$user} >> $opt{tmp}/spam.log.$$");
+ open(IN, "./hit-frequencies -xpa $flags $corpusdir/$spam{$user} $corpusdir/$ham{$user} |");
+ while(<IN>) {
+ chomp;
+ push @output, "$_:$user\n";
+ }
+ close(IN);
+ }
+ open(IN, "./hit-frequencies -xpa $flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$ |");
while(<IN>) {
- chomp;
- push @output, "$_:$user\n";
+ push @output, $_;
}
close(IN);
+ for (sort sort_all @output) { print OUT; }
}
- open(IN, "./hit-frequencies -xpa $flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$ |");
- while(<IN>) {
- push @output, $_;
- }
- close(IN);
- for (sort sort_all @output) { print OUT; }
- }
- elsif ($age eq "age") {
- my @output;
-
- for my $which (("0-1", "1-2", "2-3", "3-6")) {
- my ($after, $before) = split(/-/, $which);
- # get and filter logs
- chdir $corpusdir;
- for my $type (("ham", "spam")) {
- open(TMP, "> $opt{tmp}/$type.log.$$");
- my @array = ($type eq "ham") ? @ham : @spam;
- for my $file (@array) {
- open(IN, $file) or warn "cannot read $file";
- while (<IN>) {
- print TMP $_ if time_filter($after, $before);
+ elsif ($age eq "age") {
+ my @output;
+
+ for my $which (("0-1", "1-2", "2-3", "3-6")) {
+ my ($after, $before) = split(/-/, $which);
+ # get and filter logs
+ chdir $corpusdir;
+ for my $type (("ham", "spam")) {
+ open(TMP, "> $opt{tmp}/$type.log.$$");
+ my @array = ($type eq "ham") ? @ham : @spam;
+ for my $file (@array) {
+ open(IN, $file) or warn "cannot read $file";
+ while (<IN>) {
+ print TMP $_ if time_filter($after, $before);
+ }
+ close(IN);
}
- close(IN);
+ close (TMP);
}
- close (TMP);
+ # print out by age
+ chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
+ open(IN, "./hit-frequencies -xpa $flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$ |");
+ while(<IN>) {
+ chomp;
+ push @output, "$_:$which\n";
+ }
+ close(IN);
}
- # print out by age
+ for (sort sort_all @output) { print OUT; }
+ }
+ elsif (@ham && @spam) {
+ # get logs
+ system("cat " . join(" ", @ham) . " > $opt{tmp}/ham.log.$$");
+ system("cat " . join(" ", @spam) . " > $opt{tmp}/spam.log.$$");
+
chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
open(IN, "./hit-frequencies -xpa $flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$ |");
- while(<IN>) {
- chomp;
- push @output, "$_:$which\n";
- }
+ while(<IN>) { print(OUT); }
close(IN);
}
- for (sort sort_all @output) { print OUT; }
- }
- elsif (@ham && @spam) {
- # get logs
- system("cat " . join(" ", @ham) . " > $opt{tmp}/ham.log.$$");
- system("cat " . join(" ", @spam) . " > $opt{tmp}/spam.log.$$");
- chdir "$opt{tree}/masses" or die "cannot chdir $opt{tree}/masses";
- open(IN, "./hit-frequencies -xpa $flags $opt{tmp}/spam.log.$$ $opt{tmp}/ham.log.$$ |");
- while(<IN>) { print(OUT); }
- close(IN);
+ $bytes = (-s OUT);
+ close(OUT);
+ rename($tmpfname, $fname) or warn "cannot rename $tmpfname to $fname";
}
- my $bytes = (-s OUT);
- close(OUT);
-
- rename($tmpfname, $fname) or warn "cannot rename $tmpfname to $fname";
-
$when = scalar localtime time;
print qq{created: $bytes bytes, finished at $when
URL:
@@ -444,7 +462,6 @@
# print "output dir: $dir\n";
if (!-d $dir) {
- my $mode = oct($opt{html_mode});
my $prevu = umask 0;
mkpath([$dir], 0, oct($opt{html_mode})) or warn "failed to mkdir $dir";
umask $prevu;