You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/07/31 19:46:50 UTC

svn commit: r561408 - in /spamassassin/trunk/lib/Mail/SpamAssassin: AICache.pm ArchiveIterator.pm

Author: jm
Date: Tue Jul 31 10:46:49 2007
New Revision: 561408

URL: http://svn.apache.org/viewvc?view=rev&rev=561408
Log:
optimize the 'dir' form of the mass-check cache; it now does its jobs without any sanity-check stat() operations at all.  hopefully this can increase rule-qa mass-check speeds.  also do the traditional 3-phase rename/rename/unlink file-replacement algorithm when updating the AICache file

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm?view=diff&rev=561408&r1=561407&r2=561408
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm Tue Jul 31 10:46:49 2007
@@ -112,7 +112,10 @@
 
   return $self->{cache} unless $name;
 
-  return if ($self->{type} eq 'dir' && (stat($name))[9] > $self->{cache_mtime});
+  # for dir collections: just use the info on a file, if an entry
+  # exists for that file.  it's very unlikely that a file will be
+  # changed to contain a different Date header, and it's slow to check.
+  # return if ($self->{type} eq 'dir' && (stat($name))[9] > $self->{cache_mtime});
 
   $name = $self->canon($name);
   return $self->{cache}->{$name};
@@ -134,26 +137,34 @@
 sub finish {
   my ($self) = @_;
 
+  return undef unless $self->{dirty};
+
   # Cache is dirty, so write out new file
-  if ($self->{dirty})
-  {
-    # create enclosing dir tree, if required
-    eval {
-      mkpath(dirname($self->{cache_file}));
-    };
-    if ($@) {
-      warn "Can't mkpath for AI cache file (".$self->{cache_file}."): $@ $!";
-    }
 
-    if (open(CACHE, ">" . $self->{cache_file})) {
-      while(my($k,$v) = each %{$self->{cache}}) {
-	print CACHE "$k\t$v\n";
-      }
-      close(CACHE);
-    }
-    else {
-      warn "Can't write AI cache file (".$self->{cache_file}."): $!";
+  # create enclosing dir tree, if required
+  eval {
+    mkpath(dirname($self->{cache_file}));
+  };
+  if ($@) {
+    warn "Can't mkpath for AI cache file (".$self->{cache_file}."): $@ $!";
+  }
+
+  # use trad unix 3-phase swapover, for safety
+  my $bakf = $self->{cache_file}.".bak";
+  my $oldf = $self->{cache_file};
+  my $newf = $self->{cache_file}.".new";
+  if (open(CACHE, ">$newf")) {
+    while(my($k,$v) = each %{$self->{cache}}) {
+      print CACHE "$k\t$v\n";
     }
+    close(CACHE);
+
+    rename $oldf, $bakf or warn "mv $oldf $bakf failed: $!";
+    rename $newf, $oldf or warn "mv $newf $oldf failed: $!";
+    unlink $bakf        or warn "rm $bakf failed: $!";
+  }
+  else {
+    warn "Can't write AI cache file ($newf): $!";
   }
 
   return undef;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm?view=diff&rev=561408&r1=561407&r2=561408
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm Tue Jul 31 10:46:49 2007
@@ -650,7 +650,7 @@
   }
   closedir(DIR);
 
-  @files = grep { -f } map { "$folder/$_" } @files;
+  @files = map { "$folder/$_" } @files;
 
   if (!@files) {
     # this is not a problem; no need to warn about it
@@ -674,13 +674,20 @@
 
   $self->_bump_scan_progress();
 
-  my @s = stat($mail);
-  return unless $self->_message_is_useful_by_file_modtime($s[9]);
+  # only perform these stat() operations if we're not using a cache;
+  # it's faster to perform lookups in the cache, and more accurate
+  if (!defined $AICache) {
+    my @s = stat($mail);
+    return unless $self->_message_is_useful_by_file_modtime($s[9]);
+  }
 
   my $date = AI_TIME_UNKNOWN;
-
   if ($self->{determine_receive_date}) {
     unless (defined $AICache and $date = $AICache->check($mail)) {
+      # silently skip directories/non-files; some folders may
+      # contain extraneous dirs etc.
+      next if (!-f $mail);      
+
       my $header;
       if (!_mail_open($mail)) {
         $self->{access_problem} = 1;