You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2004/04/29 00:32:17 UTC

svn commit: rev 10381 - in incubator/spamassassin/trunk: . lib/Mail lib/Mail/SpamAssassin lib/Mail/SpamAssassin/Locker t

Author: jm
Date: Wed Apr 28 15:32:17 2004
New Revision: 10381

Added:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Flock.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm
      - copied, changed from rev 10379, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Unix.pm
   incubator/spamassassin/trunk/t/bayesdbm_flock.t   (contents, props changed)
Removed:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Unix.pm
Modified:
   incubator/spamassassin/trunk/MANIFEST
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Win32.pm
Log:
bug 3242: NFS-unsafe locker added, using flock().  currently it's polling (attempt lock,sleep,attempt lock,sleep).  controlled using the 'lock_method' config parameter.

Modified: incubator/spamassassin/trunk/MANIFEST
==============================================================================
--- incubator/spamassassin/trunk/MANIFEST	(original)
+++ incubator/spamassassin/trunk/MANIFEST	Wed Apr 28 15:32:17 2004
@@ -46,7 +46,8 @@
 lib/Mail/SpamAssassin/HTML.pm
 lib/Mail/SpamAssassin/Locales.pm
 lib/Mail/SpamAssassin/Locker.pm
-lib/Mail/SpamAssassin/Locker/Unix.pm
+lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm
+lib/Mail/SpamAssassin/Locker/Flock.pm
 lib/Mail/SpamAssassin/Locker/Win32.pm
 lib/Mail/SpamAssassin/MailingList.pm
 lib/Mail/SpamAssassin/Message.pm
@@ -341,3 +342,4 @@
 t/spamd_syslog.t
 t/prefs_include.t
 t/body_mod.t
+t/bayesdbm_flock.t

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm	Wed Apr 28 15:32:17 2004
@@ -279,27 +279,46 @@
   # Make sure that we clean $PATH if we're tainted
   Mail::SpamAssassin::Util::clean_path_in_taint_mode();
 
-  # this could probably be made a little faster; for now I'm going
-  # for slow but safe, by keeping in quotes
-  if (Mail::SpamAssassin::Util::am_running_on_windows()) {
-    eval '
-      use Mail::SpamAssassin::Locker::Win32;
-      $self->{locker} = new Mail::SpamAssassin::Locker::Win32 ($self);
-    '; ($@) and die $@;
-  } else {
-    eval '
-      use Mail::SpamAssassin::Locker::Unix;
-      $self->{locker} = new Mail::SpamAssassin::Locker::Unix ($self);
-    '; ($@) and die $@;
-  }
-
+  # TODO: this should be in Conf!
   $self->{encapsulated_content_description} = 'original message before SpamAssassin';
 
   if (!defined $self->{username}) {
     $self->{username} = (Mail::SpamAssassin::Util::portable_getpwuid ($>))[0];
   }
 
+  $self->create_locker();
+
   $self;
+}
+
+sub create_locker {
+  my ($self) = @_;
+
+  my $class;
+  my $m = $self->{conf}->{lock_method};
+
+  # let people choose what they want -- even if they may not work on their
+  # OS.  (they could be using cygwin!)
+  if ($m eq 'win32') { $class = 'Win32'; }
+  elsif ($m eq 'flock') { $class = 'Flock'; }
+  elsif ($m eq 'nfssafe') { $class = 'UnixNFSSafe'; }
+  else {
+    # OS-specific defaults
+    if (Mail::SpamAssassin::Util::am_running_on_windows()) {
+      $class = 'Win32';
+    } else {
+      $class = 'UnixNFSSafe';
+    }
+  }
+
+  # this could probably be made a little faster; for now I'm going
+  # for slow but safe, by keeping in quotes
+  eval '
+    use Mail::SpamAssassin::Locker::'.$class.';
+    $self->{locker} = new Mail::SpamAssassin::Locker::'.$class.' ($self);
+  '; ($@) and die $@;
+
+  if (!defined $self->{locker}) { die "oops! no locker"; }
 }
 
 ###########################################################################

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm	Wed Apr 28 15:32:17 2004
@@ -282,6 +282,7 @@
   $self->{check_mx_delay} = 5;
   $self->{ok_locales} = 'all';
   $self->{ok_languages} = 'all';
+  $self->{lock_method} = '';
   $self->{allow_user_rules} = 0;
   $self->{user_rules_to_compile} = { };
   $self->{fold_headers} = 1;
@@ -2249,8 +2250,9 @@
 
 Used by BayesStore::SQL storage implementation.
 
-If this options is set the BayesStore::SQL module will override the set username with
-the value given.  This could be useful for implementing global or group bayes databases.
+If this options is set the BayesStore::SQL module will override the set
+username with the value given.  This could be useful for implementing global or
+group bayes databases.
 
 =cut
 
@@ -2270,6 +2272,46 @@
     # leave as RE for now?
     if (/^pyzor_options\s+([-A-Za-z0-9_\/ ]+)$/) {
       $self->{pyzor_options} = $1;
+      next;
+    }
+
+=item lock_method type
+
+Select the file-locking method used to protect database files on-disk. By
+default, SpamAssassin uses an NFS-safe locking method on UNIX; however, if you
+are sure that the database files you'll be using for Bayes and AWL storage will
+never be accessed over NFS, a non-NFS-safe locking system can be selected.
+
+This will be quite a bit faster, but may risk file corruption if the files are
+ever accessed by multiple clients at once, and one or more of them is accessing
+them through an NFS filesystem.
+
+Note that different platforms require different locking systems.
+
+The supported locking systems for C<type> are as follows:
+
+=over 4
+
+=item nfssafe - an NFS-safe locking system, UNIX-only, default
+
+=item flock - simple UNIX C<flock()> locking.  NFS-unsafe, UNIX only
+
+=item win32 - Win32 locking using C<sysopen (..., O_CREAT|O_EXCL)>.  Windows-only, default
+
+=back
+
+=cut
+
+    if ($key eq 'lock_method') {
+      if ($value !~ /^(nfssafe|flock|win32)$/) {
+        warn "invalid value for lock_method: $value\n";
+        goto failed_line;
+
+      } else {
+        $self->{lock_method} = $value;
+        # recreate the locker
+        $self->{main}->create_locker();
+      }
       next;
     }
 

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker.pm	Wed Apr 28 15:32:17 2004
@@ -62,4 +62,11 @@
 
 ###########################################################################
 
+sub jittery_one_second_sleep {
+  my ($self) = @_;
+  select(undef, undef, undef, (rand(1.0) + 0.5));
+}
+
+###########################################################################
+
 1;

Added: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Flock.pm
==============================================================================
--- (empty file)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Flock.pm	Wed Apr 28 15:32:17 2004
@@ -0,0 +1,144 @@
+# <@LICENSE>
+# Copyright 2004 Apache Software Foundation
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# </...@LICENSE>
+
+package Mail::SpamAssassin::Locker::Flock;
+
+use strict;
+use bytes;
+
+use Mail::SpamAssassin;
+use Mail::SpamAssassin::Locker;
+use Mail::SpamAssassin::Util;
+use File::Spec;
+use IO::File;
+use Fcntl qw(:DEFAULT :flock);
+
+use vars qw{
+  @ISA
+};
+
+@ISA = qw(Mail::SpamAssassin::Locker);
+
+###########################################################################
+
+sub new {
+  my $class = shift;
+  my $self = $class->SUPER::new(@_);
+  $self;
+}
+
+###########################################################################
+# NFS-safe locking (I hope!):
+# Attempt to create a file lock, using NFS-safe locking techniques.
+#
+# Locking code adapted from code by Alexis Rosen <al...@panix.com>
+# by Kelsey Cummings <kg...@sonic.net>, with mods by jm and quinlan
+
+sub safe_lock {
+  my ($self, $path, $max_retries) = @_;
+  my $is_locked = 0;
+  my @stat;
+
+  $max_retries ||= 30;
+
+  my $lock_file = "$path.lock";
+  my $umask = umask 077;
+  my $fh = new IO::File();
+
+  if (!$fh->open ("$lock_file", O_RDWR|O_CREAT)) {
+      umask $umask; # just in case
+      die "lock: $$ cannot create lockfile $lock_file: $!\n";
+  }
+
+  dbg("lock: $$ created $lock_file");
+
+  for (my $retries = 0; $retries < $max_retries; $retries++) {
+    if ($retries > 0) { $self->jittery_one_second_sleep(); }
+    dbg("lock: $$ trying to get lock on $path with $retries retries");
+
+    # HELLO!?! IO::File doesn't have a flock() method?!
+    if (flock ($fh, LOCK_EX|LOCK_NB)) {
+      dbg("lock: $$ link to $lock_file: link ok");
+      $is_locked = 1;
+      last;
+    }
+  }
+
+  # just to be nice: let people know when it was locked
+  $fh->print ("$$\n");
+  $fh->flush ();
+
+  # keep the FD around - we need to keep the lockfile open or the lock
+  # is unlocked!
+  $self->{lock_fhs} ||= { };
+  $self->{lock_fhs}->{$path} = $fh;
+  return $is_locked;
+}
+
+###########################################################################
+
+sub safe_unlock {
+  my ($self, $path) = @_;
+
+  if (!exists $self->{lock_fhs} || !defined $self->{lock_fhs}->{$path}) {
+    warn "unlock: $$ no lock handle for $path\n";
+    return;
+  }
+
+  my $fh = $self->{lock_fhs}->{$path};
+  delete $self->{lock_fhs}->{$path};
+
+  flock ($fh, LOCK_UN);
+  $fh->close();
+
+  dbg("unlock: $$ unlocked $path.lock");
+
+  # do NOT unlink! this would open a race, whereby:
+  # procA: ....unlock                           (unlocked lockfile)
+  # procB:            lock                      (gets lock on lockfile)
+  # procA:                 unlink               (deletes lockfile)
+  # (procB's lock is now deleted as well!)
+  # procC:                        create, lock  (gets lock on new file)
+  #
+  # unlink ("$path.lock"); 
+  #
+  # side-effect: we leave a .lock file around. but hey!
+}
+
+###########################################################################
+
+sub refresh_lock {
+  my($self, $path) = @_;
+
+  return unless $path;
+
+  if (!exists $self->{lock_fhs} || !defined $self->{lock_fhs}->{$path}) {
+    warn "refresh_lock: $$ no lock handle for $path\n";
+    return;
+  }
+
+  my $fh = $self->{lock_fhs}->{$path};
+  $fh->print ("$$\n");
+  $fh->flush ();
+
+  dbg("refresh: $$ refresh $path.lock");
+}
+
+###########################################################################
+
+sub dbg { Mail::SpamAssassin::dbg (@_); }
+
+1;

Copied: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm (from rev 10379, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Unix.pm)
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Unix.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm	Wed Apr 28 15:32:17 2004
@@ -14,7 +14,7 @@
 # limitations under the License.
 # </...@LICENSE>
 
-package Mail::SpamAssassin::Locker::Unix;
+package Mail::SpamAssassin::Locker::UnixNFSSafe;
 
 use strict;
 use bytes;
@@ -70,9 +70,7 @@
   dbg("lock: $$ created $lock_tmp");
 
   for (my $retries = 0; $retries < $max_retries; $retries++) {
-    if ($retries > 0) {
-      select(undef, undef, undef, (rand(1.0) + 0.5));
-    }
+    if ($retries > 0) { $self->jittery_one_second_sleep(); }
     print LTMP "$hname.$$\n";
     dbg("lock: $$ trying to get lock on $path with $retries retries");
     if (link($lock_tmp, $lock_file)) {

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Win32.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Win32.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Locker/Win32.pm	Wed Apr 28 15:32:17 2004
@@ -59,6 +59,7 @@
   for (my $retries = 0; $retries < $max_retries; $retries++) {
     if ($retries > 0) {
       sleep(1);
+      # TODO: $self->jittery_one_second_sleep();?
     }
     dbg("lock: $$ trying to get lock on $path with $retries retries");
     if (sysopen(LOCKFILE, $lock_file, O_RDWR|O_CREAT|O_EXCL)) {

Added: incubator/spamassassin/trunk/t/bayesdbm_flock.t
==============================================================================
--- (empty file)
+++ incubator/spamassassin/trunk/t/bayesdbm_flock.t	Wed Apr 28 15:32:17 2004
@@ -0,0 +1,256 @@
+#!/usr/bin/perl
+
+use Data::Dumper;
+use lib '.'; use lib 't';
+use SATest; sa_t_init("bayesdbm_flock");
+use Test;
+
+use constant HAS_DB_FILE => eval { require DB_File; };
+
+BEGIN { 
+  if (-e 't/test_dir') {
+    chdir 't';
+  }
+
+  if (-e 'test_dir') {
+    unshift(@INC, '../blib/lib');
+  }
+
+  plan tests => (HAS_DB_FILE ? 42 : 0);
+};
+
+exit unless HAS_DB_FILE;
+
+tstlocalrules ("
+        bayes_learn_to_journal 0
+        lock_method flock
+");
+
+use Mail::SpamAssassin;
+
+my $sa = create_saobj();
+
+$sa->init();
+
+ok($sa);
+
+ok($sa->{bayes_scanner});
+
+ok(!$sa->{bayes_scanner}->is_scan_available());
+
+open(MAIL,"< data/spam/001");
+
+my $raw_message = do {
+  local $/;
+  <MAIL>;
+};
+
+close(MAIL);
+ok($raw_message);
+
+my $mail = $sa->parse( $raw_message );
+
+ok($mail);
+
+my $body = $sa->{bayes_scanner}->get_body_from_msg($mail);
+
+ok($body);
+
+my @toks = $sa->{bayes_scanner}->tokenize($mail, $body);
+
+ok(scalar(@toks) > 0);
+
+my($msgid,$msgid_hdr) = $sa->{bayes_scanner}->get_msgid($mail);
+
+# $msgid is the generated hash messageid, $msgid_hdr is the Message-Id header ...
+ok($msgid eq '502e12b89b9c74074744ffc18a95d80cff2effcd@sa_generated');
+ok($msgid_hdr eq '9PS291LhupY');
+
+ok($sa->{bayes_scanner}->{store}->tie_db_writable());
+
+ok(!$sa->{bayes_scanner}->{store}->seen_get($msgid));
+
+$sa->{bayes_scanner}->{store}->untie_db();
+
+ok($sa->{bayes_scanner}->learn(1, $mail));
+
+ok(!$sa->{bayes_scanner}->learn(1, $mail));
+
+ok($sa->{bayes_scanner}->{store}->tie_db_writable());
+
+ok($sa->{bayes_scanner}->{store}->seen_get($msgid) eq 's');
+
+$sa->{bayes_scanner}->{store}->untie_db();
+
+ok($sa->{bayes_scanner}->{store}->tie_db_writable());
+
+my $tokerror = 0;
+foreach my $tok (@toks) {
+  my ($spam, $ham, $atime) = $sa->{bayes_scanner}->{store}->tok_get($tok);
+  if ($spam == 0 || $ham > 0) {
+    $tokerror = 1;
+  }
+}
+ok(!$tokerror);
+
+$sa->{bayes_scanner}->{store}->untie_db();
+
+ok($sa->{bayes_scanner}->learn(0, $mail));
+
+ok($sa->{bayes_scanner}->{store}->tie_db_writable());
+
+ok($sa->{bayes_scanner}->{store}->seen_get($msgid) eq 'h');
+
+$sa->{bayes_scanner}->{store}->untie_db();
+
+ok($sa->{bayes_scanner}->{store}->tie_db_writable());
+
+$tokerror = 0;
+foreach my $tok (@toks) {
+  my ($spam, $ham, $atime) = $sa->{bayes_scanner}->{store}->tok_get($tok);
+  if ($spam  > 0 || $ham == 0) {
+    $tokerror = 1;
+  }
+}
+ok(!$tokerror);
+
+$sa->{bayes_scanner}->{store}->untie_db();
+
+ok($sa->{bayes_scanner}->forget($mail));
+
+ok($sa->{bayes_scanner}->{store}->tie_db_writable());
+
+ok(!$sa->{bayes_scanner}->{store}->seen_get($msgid));
+
+$sa->{bayes_scanner}->{store}->untie_db();
+
+undef $sa;
+
+sa_t_init('bayes'); # this wipes out what is there and begins anew
+
+# make sure we learn to a journal
+tstlocalrules ("
+        bayes_learn_to_journal 1
+");
+
+$sa = create_saobj();
+
+$sa->init();
+
+ok(!-e 'log/user_state/bayes_journal');
+
+ok($sa->{bayes_scanner}->learn(1, $mail));
+
+ok(-e 'log/user_state/bayes_journal');
+
+$sa->{bayes_scanner}->sync(1); # always returns 0, so no need to check return
+
+ok(!-e 'log/user_state/bayes_journal');
+
+ok(-e 'log/user_state/bayes_seen');
+
+ok(-e 'log/user_state/bayes_toks');
+
+undef $sa;
+
+sa_t_init('bayes'); # this wipes out what is there and begins anew
+
+# make sure we learn to a journal
+tstlocalrules ("
+bayes_learn_to_journal 0
+bayes_min_spam_num 10
+bayes_min_ham_num 10
+");
+
+# we get to bastardize the existing pattern matching code here.  It lets us provide
+# our own checking callback and keep using the existing ok_all_patterns call
+%patterns = ( 1 => 'Learned from message' );
+
+ok(salearnrun("--spam data/spam", \&check_examined));
+ok_all_patterns();
+
+ok(salearnrun("--ham data/nice", \&check_examined));
+ok_all_patterns();
+
+ok(salearnrun("--ham data/whitelists", \&check_examined));
+ok_all_patterns();
+
+%patterns = ( 'non-token data: bayes db version' => 'db version' );
+ok(salearnrun("--dump magic", \&patterns_run_cb));
+ok_all_patterns();
+
+use constant SCAN_USING_PERL_CODE_TEST => 1;
+# jm: off! not working for some reason.   Mind you, this is
+# not a supported way to call these APIs!  so no biggie
+
+if (SCAN_USING_PERL_CODE_TEST) {
+$sa = create_saobj();
+
+$sa->init();
+
+open(MAIL,"< ../sample-nonspam.txt");
+
+$raw_message = do {
+  local $/;
+  <MAIL>;
+};
+
+close(MAIL);
+
+$mail = $sa->parse( $raw_message );
+
+$body = $sa->{bayes_scanner}->get_body_from_msg($mail);
+
+my $msgstatus = Mail::SpamAssassin::PerMsgStatus->new($sa, $mail);
+
+ok($msgstatus);
+
+my $score = $sa->{bayes_scanner}->scan($msgstatus, $mail, $body);
+
+# Pretty much we can't count on the data returned with such little training
+# so just make sure that the score wasn't equal to .5 which is the default
+# return value.
+print "\treturned score: $score\n";
+ok($score != .5);
+
+open(MAIL,"< ../sample-spam.txt");
+
+$raw_message = do {
+  local $/;
+  <MAIL>;
+};
+
+close(MAIL);
+
+$mail = $sa->parse( $raw_message );
+
+$body = $sa->{bayes_scanner}->get_body_from_msg($mail);
+
+$msgstatus = Mail::SpamAssassin::PerMsgStatus->new($sa, $mail);
+
+$score = $sa->{bayes_scanner}->scan($msgstatus, $mail, $body);
+
+# Pretty much we can't count on the data returned with such little training
+# so just make sure that the score wasn't equal to .5 which is the default
+# return value.
+print "\treturned score: $score\n";
+ok($score != .5);
+
+}
+
+sub check_examined {
+  local ($_);
+  my $string = shift;
+
+  if (defined $string) {
+    $_ = $string;
+  } else {
+    $_ = join ('', <IN>);
+  }
+
+  if ($_ =~ /Learned from \d+ message\(s\) \(\d+ message\(s\) examined\)/) {
+    $found{'Learned from message'}++;
+  }
+}
+
+