You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/06/29 16:01:38 UTC

svn commit: r418049 - in /spamassassin/trunk: build/automc/run_preflight lib/Mail/SpamAssassin/AICache.pm lib/Mail/SpamAssassin/ArchiveIterator.pm masses/mass-check

Author: jm
Date: Thu Jun 29 07:01:37 2006
New Revision: 418049

URL: http://svn.apache.org/viewvc?rev=418049&view=rev
Log:
allow cache files to be written in an entirely different directory tree

Modified:
    spamassassin/trunk/build/automc/run_preflight
    spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
    spamassassin/trunk/masses/mass-check

Modified: spamassassin/trunk/build/automc/run_preflight
URL: http://svn.apache.org/viewvc/spamassassin/trunk/build/automc/run_preflight?rev=418049&r1=418048&r2=418049&view=diff
==============================================================================
--- spamassassin/trunk/build/automc/run_preflight (original)
+++ spamassassin/trunk/build/automc/run_preflight Thu Jun 29 07:01:37 2006
@@ -91,14 +91,16 @@
 # notes on this mass-check command:
 #
 # this is run in a chroot jail, just in case there's hostile rule code in
-# there. limit to the most recent messages of each type, as the corpora are
-# getting big.  de-encapsulate 'report_safe' messages from petuniapress.com.
+# there. 
+# de-encapsulate 'report_safe' messages from petuniapress.com.
 # produce lots of noisy output to stop the buildbot from timing out on
 # mass-checks of large corpora.
+# store AICache data in /tmpfs/aicache.
 
 run "/local/bbmasstools/masschroot $perl ".
     "mass-check -c=tstrules --cache -j=1 ".
     "--noisy --deencap='petuniapress.com' ".
+    "--cachedir=/tmpfs/aicache ".
     $mass_check_args{$slavename}." ".
     "ham:detect:/home/bbmass/rawcor/*/ham/* ".
     "spam:detect:/home/bbmass/rawcor/*/spam/*";

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm?rev=418049&r1=418048&r2=418049&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm Thu Jun 29 07:01:37 2006
@@ -35,6 +35,8 @@
 package Mail::SpamAssassin::AICache;
 
 use File::Spec;
+use File::Path;
+use File::Basename;
 
 use strict;
 use warnings;
@@ -54,17 +56,24 @@
 
   $self->{cache} = {};
   $self->{dirty} = 0;
+  $self->{prefix} ||= '/';
 
   my $use_cache = 1;
 
   if ($self->{type} eq 'dir') {
-    $self->{cache_file} = File::Spec->catdir($self->{path}, '.spamassassin_cache');
+    $self->{cache_file} = File::Spec->catdir(
+                $self->{prefix},
+                $self->{path}, '.spamassassin_cache');
+
     $self->{cache_mtime} = (stat($self->{cache_file}))[9] || 0;
   }
   else {
     my @split = File::Spec->splitpath($self->{path});
-    $self->{cache_file} = File::Spec->catdir($split[1], join('_',
-	'.spamassassin_cache', $self->{type}, $split[2]));
+    $self->{cache_file} = File::Spec->catdir(
+                $self->{prefix},
+                $split[1],
+                join('_', '.spamassassin_cache', $self->{type}, $split[2]));
+
     $self->{cache_mtime} = (stat($self->{cache_file}))[9] || 0;
 
     # for mbox and mbx, verify whether mtime on cache file is >= mtime of
@@ -122,7 +131,16 @@
   my ($self) = @_;
 
   # Cache is dirty, so write out new file
-  if ($self->{dirty}) {
+  if ($self->{dirty})
+  {
+    # create enclosing dir tree, if required
+    eval {
+      mkpath(dirname($self->{cache_file}));
+    };
+    if ($@) {
+      warn "Can't mkpath for AI cache file (".$self->{cache_file}."): $@ $!";
+    }
+
     if (open(CACHE, ">" . $self->{cache_file})) {
       while(my($k,$v) = each %{$self->{cache}}) {
 	print CACHE "$k\t$v\n";

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm?rev=418049&r1=418048&r2=418049&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm Thu Jun 29 07:01:37 2006
@@ -170,6 +170,12 @@
 Set to 0 (default) if you don't want to use cached information to help speed
 up ArchiveIterator.  Set to 1 to enable.
 
+=item opt_cachedir
+
+Set to the path of a directory where you wish to store cached information for
+C<opt_cache>, if you don't want to mix them with the input files (as is the
+default).  The directory must be both readable and writable.
+
 =item wanted_sub
 
 Reference to a subroutine which will process message data.  Usually
@@ -915,11 +921,7 @@
     return;
   }
 
-  if ($self->{opt_cache}) {
-    $AICache = Mail::SpamAssassin::AICache->new({	'type' => 'dir',
-      							'path' => $folder,
-					        });
-  }
+  $self->create_cache('dir', $folder);
 
   foreach my $mail (@files) {
     $self->scan_file($class, $mail);
@@ -997,10 +999,9 @@
     my $info = {};
     my $count;
 
+    $self->create_cache('mbox', $file);
+
     if ($self->{opt_cache}) {
-      $AICache = Mail::SpamAssassin::AICache->new({	'type' => 'mbox',
-      							'path' => $file,
-					          });
       if ($count = $AICache->count()) {
         $info = $AICache->check();
       }
@@ -1100,10 +1101,9 @@
     my $info = {};
     my $count;
 
+    $self->create_cache('mbx', $file);
+
     if ($self->{opt_cache}) {
-      $AICache = Mail::SpamAssassin::AICache->new({	'type' => 'mbx',
-      							'path' => $file,
-					          });
       if ($count = $AICache->count()) {
         $info = $AICache->check();
       }
@@ -1216,6 +1216,18 @@
 
 sub min {
   return ($_[0] < $_[1] ? $_[0] : $_[1]);
+}
+
+sub create_cache {
+  my ($self, $type, $path) = @_;
+
+  if ($self->{opt_cache}) {
+    $AICache = Mail::SpamAssassin::AICache->new({
+                                    'type' => $type,
+                                    'prefix' => $self->{opt_cachedir},
+                                    'path' => $path,
+                              });
+  }
 }
 
 ############################################################################

Modified: spamassassin/trunk/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mass-check?rev=418049&r1=418048&r2=418049&view=diff
==============================================================================
--- spamassassin/trunk/masses/mass-check (original)
+++ spamassassin/trunk/masses/mass-check Thu Jun 29 07:01:37 2006
@@ -53,6 +53,7 @@
   message selection options
   -n            no date sorting or spam/ham interleaving
   --cache	use cache information when selecting messages
+  --cachedir=dir write cache info for --cache in this directory tree
   --after=N     only test mails received after time_t N (negative values
                 are an offset from current time, e.g. -86400 = last day)
                 or after date as parsed by Time::ParseDate (e.g. '-6 months')
@@ -93,7 +94,7 @@
 	    $opt_spamlog $opt_tail $opt_rules $opt_restart $opt_loguris
 	    $opt_logmem $opt_after $opt_before $opt_rewrite $opt_deencap
 	    $opt_learn $opt_reuse $opt_lint $opt_cache $opt_noisy
-	    $total_messages $statusevery
+	    $total_messages $statusevery $opt_cachedir
 	    %reuse %orig_conf %reuse_conf $reuse_rules_loaded_p);
 
 use FindBin;
@@ -126,7 +127,7 @@
 	   "progress", "rewrite:s", "showdots", "spamlog=s", "tail=i",
 	   "rules=s", "restart=i", "after=s", "before=s", "loguris",
 	   "deencap=s", "logmem", "learn=i", "reuse", "lint", "cache",
-           "noisy",
+           "cachedir=s", "noisy",
 	   "dir" => sub { $opt_format = "dir"; },
 	   "file" => sub { $opt_format = "file"; },
 	   "mbox" => sub { $opt_format = "mbox"; },
@@ -291,6 +292,7 @@
 	'opt_head' => $opt_head,
 	'opt_tail' => $opt_tail,
 	'opt_cache' => $opt_cache,
+	'opt_cachedir' => $opt_cachedir,
 	'opt_after' => $opt_after,
 	'opt_before' => $opt_before,
 	'opt_restart' => $opt_restart,