You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/04/16 05:18:35 UTC

svn commit: r1899897 - /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm

Author: hege
Date: Sat Apr 16 05:18:35 2022
New Revision: 1899897

URL: http://svn.apache.org/viewvc?rev=1899897&view=rev
Log:
Allow disabling stopwords processing with "bayes_stopword_languages disable"

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm?rev=1899897&r1=1899896&r2=1899897&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Bayes.pm Sat Apr 16 05:18:35 2022
@@ -56,7 +56,10 @@ keyword like in the following example:
  bayes_stopword_en (?:you|me)
  bayes_stopword_se (?:du|mig)
 
-Regexps will be anchored automatically at beginning and end.
+Regexps are case-insensitive will be anchored automatically at beginning and
+end.
+
+To disable stopwords usage, specify C<bayes_stopword_languages disable>.
 
 Only one bayes_stopword_languages or bayes_stopword_xx configuration line
 can be used.  New configuration line will override the old one, for example
@@ -294,16 +297,21 @@ sub set_config {
     code => sub {
       my ($self, $key, $value, $line) = @_;
       my @langs;
-      foreach my $lang (split(/(?:\s*,\s*|\s+)/, lc($value))) {
-        if ($lang !~ /^([a-z]{2})$/) {
-          return $Mail::SpamAssassin::Conf::INVALID_VALUE;
-        }
-        push @langs, $lang;
+      if ($value eq 'disable') {
+        @{$self->{bayes_stopword_languages}} = ();
       }
-      if (!@langs) {
-        return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
+      else {
+        foreach my $lang (split(/(?:\s*,\s*|\s+)/, lc($value))) {
+          if ($lang !~ /^([a-z]{2})$/) {
+            return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+          }
+          push @langs, $lang;
+        }
+        if (!@langs) {
+          return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
+        }
+        @{$self->{bayes_stopword_languages}} = @langs;
       }
-      @{$self->{bayes_stopword_languages}} = @langs;
     }
   });
 
@@ -320,7 +328,7 @@ sub parse_config {
       $self->inhibit_further_callbacks();
       my $lang = lc($1);
       foreach my $re (split(/\s+/, $opts->{value})) {
-        my ($rec, $err) = compile_regexp($re, 0);
+        my ($rec, $err) = compile_regexp('^(?i)'.$re.'$', 0);
         if (!$rec) {
           warn "bayes: invalid regexp for $opts->{key}: $err\n";
           return 0;
@@ -1274,18 +1282,20 @@ TOKEN: foreach my $token (split) {
     next if $len < 3;
 
     # check stopwords regexp if not cached
-    if (!exists $self->{stopword_cache}{$token}) {
-      foreach my $lang (@{$conf->{bayes_stopword_languages}}) {
-        if ($token =~ /^$self->{bayes_stopword}{$lang}$/i) {
-          dbg("bayes: skipped token '$token' because it's in stopword list for language '$lang'");
-          $self->{stopword_cache}{$token} = 1;
-          next TOKEN;
+    if (@{$conf->{bayes_stopword_languages}}) {
+      if (!exists $self->{stopword_cache}{$token}) {
+        foreach my $lang (@{$conf->{bayes_stopword_languages}}) {
+          if ($token =~ $self->{bayes_stopword}{$lang}) {
+            dbg("bayes: skipped token '$token' because it's in stopword list for language '$lang'");
+            $self->{stopword_cache}{$token} = 1;
+            next TOKEN;
+          }
         }
+        $self->{stopword_cache}{$token} = 0;
+      } else {
+        # bail out if cached known
+        next if $self->{stopword_cache}{$token};
       }
-      $self->{stopword_cache}{$token} = 0;
-    } else {
-      # bail out if cached known
-      next if $self->{stopword_cache}{$token};
     }
 
     # are we in the body?  If so, apply some body-specific breakouts