You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by km...@apache.org on 2017/04/11 05:54:45 UTC

svn commit: r1790926 - in /spamassassin/branches/3.4/lib/Mail/SpamAssassin: Conf.pm Message.pm PerMsgStatus.pm Plugin/Bayes.pm Plugin/FreeMail.pm Util.pm

Author: kmcgrail
Date: Tue Apr 11 05:54:45 2017
New Revision: 1790926

URL: http://svn.apache.org/viewvc?rev=1790926&view=rev
Log:
KG: Syncing Trunk to 3.4: 

Revision 1707582 "random changes, cosmetic or trivial - 

Revision 1707583 "Plugin/Bayes.pm: add missing $tokprefix to u8: and 8: tokens, shorten also tokens in Content-Disposition and Content-Transfer-Encoding"

Modified:
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm?rev=1790926&r1=1790925&r2=1790926&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm Tue Apr 11 05:54:45 2017
@@ -4615,12 +4615,12 @@ sub mtime {
 
 sub parse_scores_only {
   my ($self) = @_;
-  $_[0]->{parser}->parse ($_[1], 1);
+  $self->{parser}->parse ($_[1], 1);
 }
 
 sub parse_rules {
   my ($self) = @_;
-  $_[0]->{parser}->parse ($_[1], 0);
+  $self->{parser}->parse ($_[1], 0);
 }
 
 ###########################################################################

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm?rev=1790926&r1=1790925&r2=1790926&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm Tue Apr 11 05:54:45 2017
@@ -1040,7 +1040,7 @@ sub _parse_normal {
 
   # attempt to figure out a name for this attachment if there is one ...
   my $disp = $msg->header('content-disposition') || '';
-  if ($disp =~ /name="?([^\";]+)"?/i) {
+  if ($disp =~ /name=\s*"?([^";]+)"?/i) {
     $msg->{'name'} = $1;
   }
   elsif ($ct[3]) {

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1790926&r1=1790925&r2=1790926&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue Apr 11 05:54:45 2017
@@ -1291,8 +1291,8 @@ sub rewrite_no_report_safe {
 sub qp_encode_header {
   my ($self, $text) = @_;
 
-  # do nothing unless there's an 8-bit char
-  return $text unless ($text =~ /[\x80-\xff]/);
+  # return unchanged if there are no 8-bit characters
+  return $text  if $text !~ tr/\x00-\x7F//c;
 
   my $cs = 'ISO-8859-1';
   if ($self->{report_charset}) {
@@ -1982,7 +1982,8 @@ sub _get {
   else {
     my @results = $getraw ? $self->{msg}->raw_header($request)
                           : $self->{msg}->get_header($request);
-  # dbg("message: get(%s) = %s", $request, join(", ",@results));
+  # dbg("message: get(%s)%s = %s",
+  #     $request, $getraw?'raw':'', join(", ",@results));
     if (@results) {
       $result = join('', @results);
     } else {  # metadata

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm?rev=1790926&r1=1790925&r2=1790926&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm Tue Apr 11 05:54:45 2017
@@ -200,7 +200,9 @@ use constant ADD_INVIZ_TOKENS_NO_PREFIX
   'X-Authentication-Warning' => '*a',
   'Organization'	=> '*o',
   'Organisation'        => '*o',
-  'Content-Type'	=> '*c',
+  'Content-Type'	=> '*ct',
+  'Content-Disposition'	=> '*cd',
+  'Content-Transfer-Encoding' => '*ce',
   'x-spam-relays-trusted' => '*RT',
   'x-spam-relays-untrusted' => '*RU',
 );
@@ -1120,7 +1122,7 @@ sub tokenize {
   # generate an SHA1 hash and take the lower 40 bits as our token
   my %tokens;
   foreach my $token (@tokens) {
-    # skip empty tokens
+  # dbg("bayes: token: %s", $token);
     $tokens{substr(sha1($token), -5)} = $token  if $token ne '';
   }
 
@@ -1217,7 +1219,7 @@ sub _tokenize_line {
 	my(@t) = $token =~ /( (?: [\xE0-\xEF] | [\xF0-\xF4][\x80-\xBF] )
                               [\x80-\xBF]{2} )/xsg;
 	if (@t) {
-          push (@rettokens, map('u8:'.$_, @t));
+          push (@rettokens, map($tokprefix.'u8:'.$_, @t));
 	  next;
 	}
       }
@@ -1227,7 +1229,7 @@ sub _tokenize_line {
 	# but I'm doing tuples to keep the dbs small(er)."  Sounds like a plan
 	# to me! (jm)
 	while ($token =~ s/^(..?)//) {
-	  push (@rettokens, "8:$1");
+	  push (@rettokens, $tokprefix.'8:'.$1);
 	}
 	next;
       }

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm?rev=1790926&r1=1790925&r2=1790926&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm Tue Apr 11 05:54:45 2017
@@ -342,15 +342,15 @@ sub _parse_body {
         my $body = $pms->get_decoded_stripped_body_text_array();
         BODY: foreach (@$body) {
             # strip urls with possible emails inside
-            s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
+            s{<?https?://\S{0,255}(?:\@|%40)\S{0,255}}{ }gi;
             # strip emails contained in <>, not mailto:
             # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
-            s#<?(?<!mailto:)$self->{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
+            s{<?(?<!mailto:)$self->{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)}{ }gi;
             while (/$self->{email_regex}/g) {
                 my $email = lc($1);
                 push(@body_emails, $email) unless defined $seen{$email};
                 $seen{$email} = 1;
-                last BODY if scalar @body_emails >= 40; # sanity
+                last BODY if @body_emails >= 40; # sanity
             }
         }
         my $count_all = 0;

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm?rev=1790926&r1=1790925&r2=1790926&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm Tue Apr 11 05:54:45 2017
@@ -692,6 +692,7 @@ sub base64_decode {
       m|^(?:[A-Za-z0-9+/=]{2,}={0,2})$|s)
   {
     # only use MIME::Base64 when the XS and Perl are both correct and quiet
+    local $1;
     s/(=+)(?!=*$)/'A' x length($1)/ge;
 
     # If only a certain number of bytes are requested, truncate the encoded
@@ -707,7 +708,7 @@ sub base64_decode {
   }
   tr{A-Za-z0-9+/=}{}cd;			# remove non-base64 characters
   s/=+$//;				# remove terminating padding
-  tr{A-Za-z0-9+/=}{ -_`};		# translate to uuencode
+  tr{A-Za-z0-9+/=}{ -_};		# translate to uuencode
   s/.$// if (length($_) % 4 == 1);	# unpack cannot cope with extra byte
 
   my $length;
@@ -728,19 +729,20 @@ sub base64_decode {
 }
 
 sub qp_decode {
-  local $_ = shift;
+  my $str = $_[0];
 
   # RFC 2045: when decoding a Quoted-Printable body, any trailing
   # white space on a line must be deleted
-  s/[ \t]+(?=\r?\n)//gs;
+  $str =~ s/[ \t]+(?=\r?\n)//gs;
 
-  s/=\r?\n//gs;  # soft line breaks
+  $str =~ s/=\r?\n//gs;  # soft line breaks
 
   # RFC 2045 explicitly prohibits lowercase characters a-f in QP encoding
   # do we really want to allow them???
-  s/=([0-9a-fA-F]{2})/chr(hex($1))/ge;
+  local $1;
+  $str =~ s/=([0-9a-fA-F]{2})/chr(hex($1))/ge;
 
-  return $_;
+  return $str;
 }
 
 sub base64_encode {