You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/07/31 19:46:50 UTC
svn commit: r561408 - in /spamassassin/trunk/lib/Mail/SpamAssassin:
AICache.pm ArchiveIterator.pm
Author: jm
Date: Tue Jul 31 10:46:49 2007
New Revision: 561408
URL: http://svn.apache.org/viewvc?view=rev&rev=561408
Log:
optimize the 'dir' form of the mass-check cache; it now does its jobs without any sanity-check stat() operations at all. hopefully this can increase rule-qa mass-check speeds. also do the traditional 3-phase rename/rename/unlink file-replacement algorithm when updating the AICache file
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm
spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm?view=diff&rev=561408&r1=561407&r2=561408
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/AICache.pm Tue Jul 31 10:46:49 2007
@@ -112,7 +112,10 @@
return $self->{cache} unless $name;
- return if ($self->{type} eq 'dir' && (stat($name))[9] > $self->{cache_mtime});
+ # for dir collections: just use the info on a file, if an entry
+ # exists for that file. it's very unlikely that a file will be
+ # changed to contain a different Date header, and it's slow to check.
+ # return if ($self->{type} eq 'dir' && (stat($name))[9] > $self->{cache_mtime});
$name = $self->canon($name);
return $self->{cache}->{$name};
@@ -134,26 +137,34 @@
sub finish {
my ($self) = @_;
+ return undef unless $self->{dirty};
+
# Cache is dirty, so write out new file
- if ($self->{dirty})
- {
- # create enclosing dir tree, if required
- eval {
- mkpath(dirname($self->{cache_file}));
- };
- if ($@) {
- warn "Can't mkpath for AI cache file (".$self->{cache_file}."): $@ $!";
- }
- if (open(CACHE, ">" . $self->{cache_file})) {
- while(my($k,$v) = each %{$self->{cache}}) {
- print CACHE "$k\t$v\n";
- }
- close(CACHE);
- }
- else {
- warn "Can't write AI cache file (".$self->{cache_file}."): $!";
+ # create enclosing dir tree, if required
+ eval {
+ mkpath(dirname($self->{cache_file}));
+ };
+ if ($@) {
+ warn "Can't mkpath for AI cache file (".$self->{cache_file}."): $@ $!";
+ }
+
+ # use trad unix 3-phase swapover, for safety
+ my $bakf = $self->{cache_file}.".bak";
+ my $oldf = $self->{cache_file};
+ my $newf = $self->{cache_file}.".new";
+ if (open(CACHE, ">$newf")) {
+ while(my($k,$v) = each %{$self->{cache}}) {
+ print CACHE "$k\t$v\n";
}
+ close(CACHE);
+
+ rename $oldf, $bakf or warn "mv $oldf $bakf failed: $!";
+ rename $newf, $oldf or warn "mv $newf $oldf failed: $!";
+ unlink $bakf or warn "rm $bakf failed: $!";
+ }
+ else {
+ warn "Can't write AI cache file ($newf): $!";
}
return undef;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm?view=diff&rev=561408&r1=561407&r2=561408
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm Tue Jul 31 10:46:49 2007
@@ -650,7 +650,7 @@
}
closedir(DIR);
- @files = grep { -f } map { "$folder/$_" } @files;
+ @files = map { "$folder/$_" } @files;
if (!@files) {
# this is not a problem; no need to warn about it
@@ -674,13 +674,20 @@
$self->_bump_scan_progress();
- my @s = stat($mail);
- return unless $self->_message_is_useful_by_file_modtime($s[9]);
+ # only perform these stat() operations if we're not using a cache;
+ # it's faster to perform lookups in the cache, and more accurate
+ if (!defined $AICache) {
+ my @s = stat($mail);
+ return unless $self->_message_is_useful_by_file_modtime($s[9]);
+ }
my $date = AI_TIME_UNKNOWN;
-
if ($self->{determine_receive_date}) {
unless (defined $AICache and $date = $AICache->check($mail)) {
+ # silently skip directories/non-files; some folders may
+ # contain extraneous dirs etc.
+ next if (!-f $mail);
+
my $header;
if (!_mail_open($mail)) {
$self->{access_problem} = 1;