You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/06/29 16:01:38 UTC
svn commit: r418049 - in /spamassassin/trunk: build/automc/run_preflight
lib/Mail/SpamAssassin/AICache.pm lib/Mail/SpamAssassin/ArchiveIterator.pm
masses/mass-check
Author: jm
Date: Thu Jun 29 07:01:37 2006
New Revision: 418049
URL: http://svn.apache.org/viewvc?rev=418049&view=rev
Log:
allow cache files to be written in an entirely different directory tree
Modified:
spamassassin/trunk/build/automc/run_preflight
spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm
spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
spamassassin/trunk/masses/mass-check
Modified: spamassassin/trunk/build/automc/run_preflight
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/automc/run_preflight?rev=418049&r1=418048&r2=418049&view=diff
==============================================================================
--- spamassassin/trunk/build/automc/run_preflight (original)
+++ spamassassin/trunk/build/automc/run_preflight Thu Jun 29 07:01:37 2006
@@ -91,14 +91,16 @@
# notes on this mass-check command:
#
# this is run in a chroot jail, just in case there's hostile rule code in
-# there. limit to the most recent messages of each type, as the corpora are
-# getting big. de-encapsulate 'report_safe' messages from petuniapress.com.
+# there.
+# de-encapsulate 'report_safe' messages from petuniapress.com.
# produce lots of noisy output to stop the buildbot from timing out on
# mass-checks of large corpora.
+# store AICache data in /tmpfs/aicache.
run "/local/bbmasstools/masschroot $perl ".
"mass-check -c=tstrules --cache -j=1 ".
"--noisy --deencap='petuniapress.com' ".
+ "--cachedir=/tmpfs/aicache ".
$mass_check_args{$slavename}." ".
"ham:detect:/home/bbmass/rawcor/*/ham/* ".
"spam:detect:/home/bbmass/rawcor/*/spam/*";
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm?rev=418049&r1=418048&r2=418049&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm Thu Jun 29 07:01:37 2006
@@ -35,6 +35,8 @@
package Mail::SpamAssassin::AICache;
use File::Spec;
+use File::Path;
+use File::Basename;
use strict;
use warnings;
@@ -54,17 +56,24 @@
$self->{cache} = {};
$self->{dirty} = 0;
+ $self->{prefix} ||= '/';
my $use_cache = 1;
if ($self->{type} eq 'dir') {
- $self->{cache_file} = File::Spec->catdir($self->{path}, '.spamassassin_cache');
+ $self->{cache_file} = File::Spec->catdir(
+ $self->{prefix},
+ $self->{path}, '.spamassassin_cache');
+
$self->{cache_mtime} = (stat($self->{cache_file}))[9] || 0;
}
else {
my @split = File::Spec->splitpath($self->{path});
- $self->{cache_file} = File::Spec->catdir($split[1], join('_',
- '.spamassassin_cache', $self->{type}, $split[2]));
+ $self->{cache_file} = File::Spec->catdir(
+ $self->{prefix},
+ $split[1],
+ join('_', '.spamassassin_cache', $self->{type}, $split[2]));
+
$self->{cache_mtime} = (stat($self->{cache_file}))[9] || 0;
# for mbox and mbx, verify whether mtime on cache file is >= mtime of
@@ -122,7 +131,16 @@
my ($self) = @_;
# Cache is dirty, so write out new file
- if ($self->{dirty}) {
+ if ($self->{dirty})
+ {
+ # create enclosing dir tree, if required
+ eval {
+ mkpath(dirname($self->{cache_file}));
+ };
+ if ($@) {
+ warn "Can't mkpath for AI cache file (".$self->{cache_file}."): $@ $!";
+ }
+
if (open(CACHE, ">" . $self->{cache_file})) {
while(my($k,$v) = each %{$self->{cache}}) {
print CACHE "$k\t$v\n";
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm?rev=418049&r1=418048&r2=418049&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm Thu Jun 29 07:01:37 2006
@@ -170,6 +170,12 @@
Set to 0 (default) if you don't want to use cached information to help speed
up ArchiveIterator. Set to 1 to enable.
+=item opt_cachedir
+
+Set to the path of a directory where you wish to store cached information for
+C<opt_cache>, if you don't want to mix them with the input files (as is the
+default). The directory must be both readable and writable.
+
=item wanted_sub
Reference to a subroutine which will process message data. Usually
@@ -915,11 +921,7 @@
return;
}
- if ($self->{opt_cache}) {
- $AICache = Mail::SpamAssassin::AICache->new({ 'type' => 'dir',
- 'path' => $folder,
- });
- }
+ $self->create_cache('dir', $folder);
foreach my $mail (@files) {
$self->scan_file($class, $mail);
@@ -997,10 +999,9 @@
my $info = {};
my $count;
+ $self->create_cache('mbox', $file);
+
if ($self->{opt_cache}) {
- $AICache = Mail::SpamAssassin::AICache->new({ 'type' => 'mbox',
- 'path' => $file,
- });
if ($count = $AICache->count()) {
$info = $AICache->check();
}
@@ -1100,10 +1101,9 @@
my $info = {};
my $count;
+ $self->create_cache('mbx', $file);
+
if ($self->{opt_cache}) {
- $AICache = Mail::SpamAssassin::AICache->new({ 'type' => 'mbx',
- 'path' => $file,
- });
if ($count = $AICache->count()) {
$info = $AICache->check();
}
@@ -1216,6 +1216,18 @@
sub min {
return ($_[0] < $_[1] ? $_[0] : $_[1]);
+}
+
+sub create_cache {
+ my ($self, $type, $path) = @_;
+
+ if ($self->{opt_cache}) {
+ $AICache = Mail::SpamAssassin::AICache->new({
+ 'type' => $type,
+ 'prefix' => $self->{opt_cachedir},
+ 'path' => $path,
+ });
+ }
}
############################################################################
Modified: spamassassin/trunk/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mass-check?rev=418049&r1=418048&r2=418049&view=diff
==============================================================================
--- spamassassin/trunk/masses/mass-check (original)
+++ spamassassin/trunk/masses/mass-check Thu Jun 29 07:01:37 2006
@@ -53,6 +53,7 @@
message selection options
-n no date sorting or spam/ham interleaving
--cache use cache information when selecting messages
+ --cachedir=dir write cache info for --cache in this directory tree
--after=N only test mails received after time_t N (negative values
are an offset from current time, e.g. -86400 = last day)
or after date as parsed by Time::ParseDate (e.g. '-6 months')
@@ -93,7 +94,7 @@
$opt_spamlog $opt_tail $opt_rules $opt_restart $opt_loguris
$opt_logmem $opt_after $opt_before $opt_rewrite $opt_deencap
$opt_learn $opt_reuse $opt_lint $opt_cache $opt_noisy
- $total_messages $statusevery
+ $total_messages $statusevery $opt_cachedir
%reuse %orig_conf %reuse_conf $reuse_rules_loaded_p);
use FindBin;
@@ -126,7 +127,7 @@
"progress", "rewrite:s", "showdots", "spamlog=s", "tail=i",
"rules=s", "restart=i", "after=s", "before=s", "loguris",
"deencap=s", "logmem", "learn=i", "reuse", "lint", "cache",
- "noisy",
+ "cachedir=s", "noisy",
"dir" => sub { $opt_format = "dir"; },
"file" => sub { $opt_format = "file"; },
"mbox" => sub { $opt_format = "mbox"; },
@@ -291,6 +292,7 @@
'opt_head' => $opt_head,
'opt_tail' => $opt_tail,
'opt_cache' => $opt_cache,
+ 'opt_cachedir' => $opt_cachedir,
'opt_after' => $opt_after,
'opt_before' => $opt_before,
'opt_restart' => $opt_restart,