You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2004/02/19 03:55:43 UTC

svn commit: rev 6756 - in incubator/spamassassin/trunk: . lib/Mail/SpamAssassin

Author: jm
Date: Wed Feb 18 18:55:42 2004
New Revision: 6756

Modified:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
   incubator/spamassassin/trunk/sa-learn.raw
Log:
bug 2273: bayes_ignore_from, bayes_ignore_to added, thanks to David Koppelman

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm	Wed Feb 18 18:55:42 2004
@@ -229,6 +229,7 @@
     'main'              => $main,
     'conf'		=> $main->{conf},
     'log_raw_counts'	=> 0,
+    'use_ignores'       => 1,
     'tz'		=> Mail::SpamAssassin::Util::local_tz(),
   };
   bless ($self, $class);
@@ -645,11 +646,37 @@
 
 ###########################################################################
 
+sub ignore_message {
+  my ($Bayes,$PMS) = @_;
+
+  return 0 unless $Bayes->{use_ignores};
+
+  my $ignore = $PMS->check_from_in_list('bayes_ignore_from')
+    		|| $PMS->check_to_in_list('bayes_ignore_to');
+
+  dbg("Not using Bayes, bayes_ignore_from or _to rule") if $ignore;
+
+  return $ignore;
+}
+
+###########################################################################
+
 sub learn {
   my ($self, $isspam, $msg, $id) = @_;
 
   if (!$self->{conf}->{use_bayes}) { return; }
   if (!defined $msg) { return; }
+
+  if( $self->{use_ignores} )  # Remove test when PerMsgStatus available.
+  {
+    # DMK, koppel@ece.lsu.edu:  Hoping that the ultimate fix to bug 2263 will
+    # make it unnecessary to construct a PerMsgStatus here.
+    my $PMS = new Mail::SpamAssassin::PerMsgStatus $self->{main}, $msg;
+    my $ignore = $self->ignore_message($PMS);
+    $PMS->finish();
+    return if $ignore;
+  }
+
   my $body = $self->get_body_from_msg ($msg);
   my $ret;
 
@@ -1028,9 +1055,11 @@
 sub scan {
   my ($self, $permsgstatus, $msg, $body) = @_;
 
-  if ( !$self->is_scan_available() ) {
+  if( $self->ignore_message($permsgstatus) ) {
     goto skip;
   }
+
+  goto skip unless $self->is_scan_available();
 
   my ($ns, $nn) = $self->{store}->nspam_nham_get();
 

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm	Wed Feb 18 18:55:42 2004
@@ -39,6 +39,7 @@
   my ($opts) = shift;
 
   %opt = ( 'force-expire' => 0,
+           'use-ignores'  => 0,
   	   'norebuild'    => 0,
 	 );
 
@@ -60,6 +61,7 @@
 	     'no-rebuild|norebuild'		=> \$opt{'norebuild'},
 	     'local|L'				=> \$opt{'local'},
 	     'force-expire'			=> \$opt{'force-expire'},
+             'use-ignores'                      => \$opt{'use-ignores'},
 
              'stopafter=i'                      => \$opt{'stopafter'},
 	     'learnprob=f'			=> \$opt{'learnprob'},
@@ -179,6 +181,8 @@
       wait_for_lock	=> 1,
       caller_will_untie	=> 1
   });
+
+  $spamtest->{bayes_scanner}{use_ignores} = $opt{'use-ignores'};
 
   if ($rebuildonly) {
     $spamtest->rebuild_learner_caches({

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm	Wed Feb 18 18:55:42 2004
@@ -239,6 +239,8 @@
   $self->{bayes_min_ham_num} = 200;
   $self->{bayes_min_spam_num} = 200;
   $self->{bayes_learn_during_report} = 1;
+  $self->{bayes_ignore_from} = { };
+  $self->{bayes_ignore_to} = { };
 
   # Allow alternate bayes storage implementation
   $self->{bayes_store_module} = '';
@@ -1619,6 +1621,46 @@
       $self->{check_mx_delay} = $value+0; next;
     }
 
+=item bayes_ignore_from add@ress.com
+
+Bayesian classification and autolearning will not be performed on mail
+from the listed addresses.  Program C<sa-learn> will also ignore the
+listed addresses if it is invoked using the C<--use-ignores> option.
+One or more addresses can be listed, see C<whitelist_from>.
+
+Spam messages from certain senders may contain many words that
+frequently occur in ham.  For example, one might read messages from a
+preferred bookstore but also get unwanted spam messages from other
+bookstores.  If the unwanted messages are learned as spam then any
+messages discussing books, including the preferred bookstore and
+antiquarian messages would be in danger of being marked as spam.  The
+addresses of the annoying bookstores would be listed.  (Assuming they
+were halfway legitimate and didn't send you mail through myriad
+affiliates.)
+
+Those who have pieces of spam in legitimate messages or otherwise
+receive ham messages containing potentially spammy words might fear
+that some spam messages might be in danger of being marked as ham.
+The addresses of the spam mailing lists, correspondents, etc.  would
+be listed.
+
+=cut
+
+
+    if (/^bayes_ignore_from\s+(.+)$/) {
+      $self->add_to_addrlist ('bayes_ignore_from', split (' ', $1)); next;
+    }
+
+=item bayes_ignore_to add@ress.com
+
+Bayesian classification and autolearning will not be performed on mail
+to the listed addresses.  See C<bayes_ignore_from> for details.
+
+=cut
+
+    if (/^bayes_ignore_to\s+(.+)$/) {
+      $self->add_to_addrlist ('bayes_ignore_to', split (' ', $1)); next;
+    }
 
 =item dns_available { yes | test[: name1 name2...] | no }   (default: test)
 

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/EvalTests.pm	Wed Feb 18 18:55:42 2004
@@ -913,6 +913,35 @@
 
 ###########################################################################
 
+sub check_from_in_list {
+  my ($self,$list) = @_;
+  my $list_ref = $self->{conf}{$list};
+  warn "Could not find list $list" unless defined $list_ref;
+
+  foreach my $addr ( all_from_addrs $self ) {
+    return 1 if _check_whitelist $self $list_ref, $addr;
+  }
+
+  return 0;
+}
+
+###########################################################################
+
+sub check_to_in_list {
+  my ($self,$list) = @_;
+  my $list_ref = $self->{conf}{$list};
+  warn "Could not find list $list" unless defined $list_ref;
+
+  foreach my $addr ( all_to_addrs $self ) {
+    return 1 if _check_whitelist $self $list_ref, $addr;
+  }
+
+  return 0;
+}
+
+
+###########################################################################
+
 sub check_from_in_whitelist {
   my ($self) = @_;
   local ($_);

Modified: incubator/spamassassin/trunk/sa-learn.raw
==============================================================================
--- incubator/spamassassin/trunk/sa-learn.raw	(original)
+++ incubator/spamassassin/trunk/sa-learn.raw	Wed Feb 18 18:55:42 2004
@@ -76,6 +76,7 @@
  --ham				   Learn messages as ham (non-spam)
  --spam				   Learn messages as spam
  --forget			   Forget a message
+ --use-ignores                     Use bayes_ignore_from and bayes_ignore_to
  --rebuild			   Rebuild the database if needed
  --force-expire			   Force an expiry run, rebuild every time
  --dbpath <path>                   Allows commandline override (in bayes_path form)
@@ -334,6 +335,14 @@
 them this time around.  If the messages have already been filtered through
 SpamAssassin, the learner will ignore any modifications SpamAssassin may have
 made.
+
+=item B<--use-ignore>
+
+Don't learn the message if a from address matches configuration file
+item C<bayes_ignore_from> or a to address matches C<bayes_ignore_to>.
+The option might be used when learning from a large file of messages
+from which the hammy spam messages or spammy ham messages have not
+been removed.
 
 =item B<--rebuild>