You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2004/04/29 00:32:17 UTC
svn commit: rev 10381 - in incubator/spamassassin/trunk: . lib/Mail lib/Mail/SpamAssassin lib/Mail/SpamAssassin/Locker t
Author: jm
Date: Wed Apr 28 15:32:17 2004
New Revision: 10381
Added:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Flock.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm
- copied, changed from rev 10379, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Unix.pm
incubator/spamassassin/trunk/t/bayesdbm_flock.t (contents, props changed)
Removed:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Unix.pm
Modified:
incubator/spamassassin/trunk/MANIFEST
incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Win32.pm
Log:
bug 3242: NFS-unsafe locker added, using flock(). currently it's polling (attempt lock,sleep,attempt lock,sleep). controlled using the 'lock_method' config parameter.
Modified: incubator/spamassassin/trunk/MANIFEST
==============================================================================
--- incubator/spamassassin/trunk/MANIFEST (original)
+++ incubator/spamassassin/trunk/MANIFEST Wed Apr 28 15:32:17 2004
@@ -46,7 +46,8 @@
lib/Mail/SpamAssassin/HTML.pm
lib/Mail/SpamAssassin/Locales.pm
lib/Mail/SpamAssassin/Locker.pm
-lib/Mail/SpamAssassin/Locker/Unix.pm
+lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm
+lib/Mail/SpamAssassin/Locker/Flock.pm
lib/Mail/SpamAssassin/Locker/Win32.pm
lib/Mail/SpamAssassin/MailingList.pm
lib/Mail/SpamAssassin/Message.pm
@@ -341,3 +342,4 @@
t/spamd_syslog.t
t/prefs_include.t
t/body_mod.t
+t/bayesdbm_flock.t
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm Wed Apr 28 15:32:17 2004
@@ -279,27 +279,46 @@
# Make sure that we clean $PATH if we're tainted
Mail::SpamAssassin::Util::clean_path_in_taint_mode();
- # this could probably be made a little faster; for now I'm going
- # for slow but safe, by keeping in quotes
- if (Mail::SpamAssassin::Util::am_running_on_windows()) {
- eval '
- use Mail::SpamAssassin::Locker::Win32;
- $self->{locker} = new Mail::SpamAssassin::Locker::Win32 ($self);
- '; ($@) and die $@;
- } else {
- eval '
- use Mail::SpamAssassin::Locker::Unix;
- $self->{locker} = new Mail::SpamAssassin::Locker::Unix ($self);
- '; ($@) and die $@;
- }
-
+ # TODO: this should be in Conf!
$self->{encapsulated_content_description} = 'original message before SpamAssassin';
if (!defined $self->{username}) {
$self->{username} = (Mail::SpamAssassin::Util::portable_getpwuid ($>))[0];
}
+ $self->create_locker();
+
$self;
+}
+
+sub create_locker {
+ my ($self) = @_;
+
+ my $class;
+ my $m = $self->{conf}->{lock_method};
+
+ # let people choose what they want -- even if they may not work on their
+ # OS. (they could be using cygwin!)
+ if ($m eq 'win32') { $class = 'Win32'; }
+ elsif ($m eq 'flock') { $class = 'Flock'; }
+ elsif ($m eq 'nfssafe') { $class = 'UnixNFSSafe'; }
+ else {
+ # OS-specific defaults
+ if (Mail::SpamAssassin::Util::am_running_on_windows()) {
+ $class = 'Win32';
+ } else {
+ $class = 'UnixNFSSafe';
+ }
+ }
+
+ # this could probably be made a little faster; for now I'm going
+ # for slow but safe, by keeping in quotes
+ eval '
+ use Mail::SpamAssassin::Locker::'.$class.';
+ $self->{locker} = new Mail::SpamAssassin::Locker::'.$class.' ($self);
+ '; ($@) and die $@;
+
+ if (!defined $self->{locker}) { die "oops! no locker"; }
}
###########################################################################
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Wed Apr 28 15:32:17 2004
@@ -282,6 +282,7 @@
$self->{check_mx_delay} = 5;
$self->{ok_locales} = 'all';
$self->{ok_languages} = 'all';
+ $self->{lock_method} = '';
$self->{allow_user_rules} = 0;
$self->{user_rules_to_compile} = { };
$self->{fold_headers} = 1;
@@ -2249,8 +2250,9 @@
Used by BayesStore::SQL storage implementation.
-If this options is set the BayesStore::SQL module will override the set username with
-the value given. This could be useful for implementing global or group bayes databases.
+If this options is set the BayesStore::SQL module will override the set
+username with the value given. This could be useful for implementing global or
+group bayes databases.
=cut
@@ -2270,6 +2272,46 @@
# leave as RE for now?
if (/^pyzor_options\s+([-A-Za-z0-9_\/ ]+)$/) {
$self->{pyzor_options} = $1;
+ next;
+ }
+
+=item lock_method type
+
+Select the file-locking method used to protect database files on-disk. By
+default, SpamAssassin uses an NFS-safe locking method on UNIX; however, if you
+are sure that the database files you'll be using for Bayes and AWL storage will
+never be accessed over NFS, a non-NFS-safe locking system can be selected.
+
+This will be quite a bit faster, but may risk file corruption if the files are
+ever accessed by multiple clients at once, and one or more of them is accessing
+them through an NFS filesystem.
+
+Note that different platforms require different locking systems.
+
+The supported locking systems for C<type> are as follows:
+
+=over 4
+
+=item nfssafe - an NFS-safe locking system, UNIX-only, default
+
+=item flock - simple UNIX C<flock()> locking. NFS-unsafe, UNIX only
+
+=item win32 - Win32 locking using C<sysopen (..., O_CREAT|O_EXCL)>. Windows-only, default
+
+=back
+
+=cut
+
+ if ($key eq 'lock_method') {
+ if ($value !~ /^(nfssafe|flock|win32)$/) {
+ warn "invalid value for lock_method: $value\n";
+ goto failed_line;
+
+ } else {
+ $self->{lock_method} = $value;
+ # recreate the locker
+ $self->{main}->create_locker();
+ }
next;
}
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker.pm Wed Apr 28 15:32:17 2004
@@ -62,4 +62,11 @@
###########################################################################
+sub jittery_one_second_sleep {
+ my ($self) = @_;
+ select(undef, undef, undef, (rand(1.0) + 0.5));
+}
+
+###########################################################################
+
1;
Added: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Flock.pm
==============================================================================
--- (empty file)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Flock.pm Wed Apr 28 15:32:17 2004
@@ -0,0 +1,144 @@
+# <@LICENSE>
+# Copyright 2004 Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# </...@LICENSE>
+
+package Mail::SpamAssassin::Locker::Flock;
+
+use strict;
+use bytes;
+
+use Mail::SpamAssassin;
+use Mail::SpamAssassin::Locker;
+use Mail::SpamAssassin::Util;
+use File::Spec;
+use IO::File;
+use Fcntl qw(:DEFAULT :flock);
+
+use vars qw{
+ @ISA
+};
+
+@ISA = qw(Mail::SpamAssassin::Locker);
+
+###########################################################################
+
+sub new {
+ my $class = shift;
+ my $self = $class->SUPER::new(@_);
+ $self;
+}
+
+###########################################################################
+# NFS-safe locking (I hope!):
+# Attempt to create a file lock, using NFS-safe locking techniques.
+#
+# Locking code adapted from code by Alexis Rosen <al...@panix.com>
+# by Kelsey Cummings <kg...@sonic.net>, with mods by jm and quinlan
+
+sub safe_lock {
+ my ($self, $path, $max_retries) = @_;
+ my $is_locked = 0;
+ my @stat;
+
+ $max_retries ||= 30;
+
+ my $lock_file = "$path.lock";
+ my $umask = umask 077;
+ my $fh = new IO::File();
+
+ if (!$fh->open ("$lock_file", O_RDWR|O_CREAT)) {
+ umask $umask; # just in case
+ die "lock: $$ cannot create lockfile $lock_file: $!\n";
+ }
+
+ dbg("lock: $$ created $lock_file");
+
+ for (my $retries = 0; $retries < $max_retries; $retries++) {
+ if ($retries > 0) { $self->jittery_one_second_sleep(); }
+ dbg("lock: $$ trying to get lock on $path with $retries retries");
+
+ # HELLO!?! IO::File doesn't have a flock() method?!
+ if (flock ($fh, LOCK_EX|LOCK_NB)) {
+ dbg("lock: $$ link to $lock_file: link ok");
+ $is_locked = 1;
+ last;
+ }
+ }
+
+ # just to be nice: let people know when it was locked
+ $fh->print ("$$\n");
+ $fh->flush ();
+
+ # keep the FD around - we need to keep the lockfile open or the lock
+ # is unlocked!
+ $self->{lock_fhs} ||= { };
+ $self->{lock_fhs}->{$path} = $fh;
+ return $is_locked;
+}
+
+###########################################################################
+
+sub safe_unlock {
+ my ($self, $path) = @_;
+
+ if (!exists $self->{lock_fhs} || !defined $self->{lock_fhs}->{$path}) {
+ warn "unlock: $$ no lock handle for $path\n";
+ return;
+ }
+
+ my $fh = $self->{lock_fhs}->{$path};
+ delete $self->{lock_fhs}->{$path};
+
+ flock ($fh, LOCK_UN);
+ $fh->close();
+
+ dbg("unlock: $$ unlocked $path.lock");
+
+ # do NOT unlink! this would open a race, whereby:
+ # procA: ....unlock (unlocked lockfile)
+ # procB: lock (gets lock on lockfile)
+ # procA: unlink (deletes lockfile)
+ # (procB's lock is now deleted as well!)
+ # procC: create, lock (gets lock on new file)
+ #
+ # unlink ("$path.lock");
+ #
+ # side-effect: we leave a .lock file around. but hey!
+}
+
+###########################################################################
+
+sub refresh_lock {
+ my($self, $path) = @_;
+
+ return unless $path;
+
+ if (!exists $self->{lock_fhs} || !defined $self->{lock_fhs}->{$path}) {
+ warn "refresh_lock: $$ no lock handle for $path\n";
+ return;
+ }
+
+ my $fh = $self->{lock_fhs}->{$path};
+ $fh->print ("$$\n");
+ $fh->flush ();
+
+ dbg("refresh: $$ refresh $path.lock");
+}
+
+###########################################################################
+
+sub dbg { Mail::SpamAssassin::dbg (@_); }
+
+1;
Copied: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm (from rev 10379, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Unix.pm)
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Unix.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm Wed Apr 28 15:32:17 2004
@@ -14,7 +14,7 @@
# limitations under the License.
# </...@LICENSE>
-package Mail::SpamAssassin::Locker::Unix;
+package Mail::SpamAssassin::Locker::UnixNFSSafe;
use strict;
use bytes;
@@ -70,9 +70,7 @@
dbg("lock: $$ created $lock_tmp");
for (my $retries = 0; $retries < $max_retries; $retries++) {
- if ($retries > 0) {
- select(undef, undef, undef, (rand(1.0) + 0.5));
- }
+ if ($retries > 0) { $self->jittery_one_second_sleep(); }
print LTMP "$hname.$$\n";
dbg("lock: $$ trying to get lock on $path with $retries retries");
if (link($lock_tmp, $lock_file)) {
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Win32.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Win32.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Win32.pm Wed Apr 28 15:32:17 2004
@@ -59,6 +59,7 @@
for (my $retries = 0; $retries < $max_retries; $retries++) {
if ($retries > 0) {
sleep(1);
+ # TODO: $self->jittery_one_second_sleep();?
}
dbg("lock: $$ trying to get lock on $path with $retries retries");
if (sysopen(LOCKFILE, $lock_file, O_RDWR|O_CREAT|O_EXCL)) {
Added: incubator/spamassassin/trunk/t/bayesdbm_flock.t
==============================================================================
--- (empty file)
+++ incubator/spamassassin/trunk/t/bayesdbm_flock.t Wed Apr 28 15:32:17 2004
@@ -0,0 +1,256 @@
+#!/usr/bin/perl
+
+use Data::Dumper;
+use lib '.'; use lib 't';
+use SATest; sa_t_init("bayesdbm_flock");
+use Test;
+
+use constant HAS_DB_FILE => eval { require DB_File; };
+
+BEGIN {
+ if (-e 't/test_dir') {
+ chdir 't';
+ }
+
+ if (-e 'test_dir') {
+ unshift(@INC, '../blib/lib');
+ }
+
+ plan tests => (HAS_DB_FILE ? 42 : 0);
+};
+
+exit unless HAS_DB_FILE;
+
+tstlocalrules ("
+ bayes_learn_to_journal 0
+ lock_method flock
+");
+
+use Mail::SpamAssassin;
+
+my $sa = create_saobj();
+
+$sa->init();
+
+ok($sa);
+
+ok($sa->{bayes_scanner});
+
+ok(!$sa->{bayes_scanner}->is_scan_available());
+
+open(MAIL,"< data/spam/001");
+
+my $raw_message = do {
+ local $/;
+ <MAIL>;
+};
+
+close(MAIL);
+ok($raw_message);
+
+my $mail = $sa->parse( $raw_message );
+
+ok($mail);
+
+my $body = $sa->{bayes_scanner}->get_body_from_msg($mail);
+
+ok($body);
+
+my @toks = $sa->{bayes_scanner}->tokenize($mail, $body);
+
+ok(scalar(@toks) > 0);
+
+my($msgid,$msgid_hdr) = $sa->{bayes_scanner}->get_msgid($mail);
+
+# $msgid is the generated hash messageid, $msgid_hdr is the Message-Id header ...
+ok($msgid eq '502e12b89b9c74074744ffc18a95d80cff2effcd@sa_generated');
+ok($msgid_hdr eq '9PS291LhupY');
+
+ok($sa->{bayes_scanner}->{store}->tie_db_writable());
+
+ok(!$sa->{bayes_scanner}->{store}->seen_get($msgid));
+
+$sa->{bayes_scanner}->{store}->untie_db();
+
+ok($sa->{bayes_scanner}->learn(1, $mail));
+
+ok(!$sa->{bayes_scanner}->learn(1, $mail));
+
+ok($sa->{bayes_scanner}->{store}->tie_db_writable());
+
+ok($sa->{bayes_scanner}->{store}->seen_get($msgid) eq 's');
+
+$sa->{bayes_scanner}->{store}->untie_db();
+
+ok($sa->{bayes_scanner}->{store}->tie_db_writable());
+
+my $tokerror = 0;
+foreach my $tok (@toks) {
+ my ($spam, $ham, $atime) = $sa->{bayes_scanner}->{store}->tok_get($tok);
+ if ($spam == 0 || $ham > 0) {
+ $tokerror = 1;
+ }
+}
+ok(!$tokerror);
+
+$sa->{bayes_scanner}->{store}->untie_db();
+
+ok($sa->{bayes_scanner}->learn(0, $mail));
+
+ok($sa->{bayes_scanner}->{store}->tie_db_writable());
+
+ok($sa->{bayes_scanner}->{store}->seen_get($msgid) eq 'h');
+
+$sa->{bayes_scanner}->{store}->untie_db();
+
+ok($sa->{bayes_scanner}->{store}->tie_db_writable());
+
+$tokerror = 0;
+foreach my $tok (@toks) {
+ my ($spam, $ham, $atime) = $sa->{bayes_scanner}->{store}->tok_get($tok);
+ if ($spam > 0 || $ham == 0) {
+ $tokerror = 1;
+ }
+}
+ok(!$tokerror);
+
+$sa->{bayes_scanner}->{store}->untie_db();
+
+ok($sa->{bayes_scanner}->forget($mail));
+
+ok($sa->{bayes_scanner}->{store}->tie_db_writable());
+
+ok(!$sa->{bayes_scanner}->{store}->seen_get($msgid));
+
+$sa->{bayes_scanner}->{store}->untie_db();
+
+undef $sa;
+
+sa_t_init('bayes'); # this wipes out what is there and begins anew
+
+# make sure we learn to a journal
+tstlocalrules ("
+ bayes_learn_to_journal 1
+");
+
+$sa = create_saobj();
+
+$sa->init();
+
+ok(!-e 'log/user_state/bayes_journal');
+
+ok($sa->{bayes_scanner}->learn(1, $mail));
+
+ok(-e 'log/user_state/bayes_journal');
+
+$sa->{bayes_scanner}->sync(1); # always returns 0, so no need to check return
+
+ok(!-e 'log/user_state/bayes_journal');
+
+ok(-e 'log/user_state/bayes_seen');
+
+ok(-e 'log/user_state/bayes_toks');
+
+undef $sa;
+
+sa_t_init('bayes'); # this wipes out what is there and begins anew
+
+# make sure we learn to a journal
+tstlocalrules ("
+bayes_learn_to_journal 0
+bayes_min_spam_num 10
+bayes_min_ham_num 10
+");
+
+# we get to bastardize the existing pattern matching code here. It lets us provide
+# our own checking callback and keep using the existing ok_all_patterns call
+%patterns = ( 1 => 'Learned from message' );
+
+ok(salearnrun("--spam data/spam", \&check_examined));
+ok_all_patterns();
+
+ok(salearnrun("--ham data/nice", \&check_examined));
+ok_all_patterns();
+
+ok(salearnrun("--ham data/whitelists", \&check_examined));
+ok_all_patterns();
+
+%patterns = ( 'non-token data: bayes db version' => 'db version' );
+ok(salearnrun("--dump magic", \&patterns_run_cb));
+ok_all_patterns();
+
+use constant SCAN_USING_PERL_CODE_TEST => 1;
+# jm: off! not working for some reason. Mind you, this is
+# not a supported way to call these APIs! so no biggie
+
+if (SCAN_USING_PERL_CODE_TEST) {
+$sa = create_saobj();
+
+$sa->init();
+
+open(MAIL,"< ../sample-nonspam.txt");
+
+$raw_message = do {
+ local $/;
+ <MAIL>;
+};
+
+close(MAIL);
+
+$mail = $sa->parse( $raw_message );
+
+$body = $sa->{bayes_scanner}->get_body_from_msg($mail);
+
+my $msgstatus = Mail::SpamAssassin::PerMsgStatus->new($sa, $mail);
+
+ok($msgstatus);
+
+my $score = $sa->{bayes_scanner}->scan($msgstatus, $mail, $body);
+
+# Pretty much we can't count on the data returned with such little training
+# so just make sure that the score wasn't equal to .5 which is the default
+# return value.
+print "\treturned score: $score\n";
+ok($score != .5);
+
+open(MAIL,"< ../sample-spam.txt");
+
+$raw_message = do {
+ local $/;
+ <MAIL>;
+};
+
+close(MAIL);
+
+$mail = $sa->parse( $raw_message );
+
+$body = $sa->{bayes_scanner}->get_body_from_msg($mail);
+
+$msgstatus = Mail::SpamAssassin::PerMsgStatus->new($sa, $mail);
+
+$score = $sa->{bayes_scanner}->scan($msgstatus, $mail, $body);
+
+# Pretty much we can't count on the data returned with such little training
+# so just make sure that the score wasn't equal to .5 which is the default
+# return value.
+print "\treturned score: $score\n";
+ok($score != .5);
+
+}
+
+sub check_examined {
+ local ($_);
+ my $string = shift;
+
+ if (defined $string) {
+ $_ = $string;
+ } else {
+ $_ = join ('', <IN>);
+ }
+
+ if ($_ =~ /Learned from \d+ message\(s\) \(\d+ message\(s\) examined\)/) {
+ $found{'Learned from message'}++;
+ }
+}
+
+