You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by km...@apache.org on 2017/04/11 05:54:45 UTC
svn commit: r1790926 - in /spamassassin/branches/3.4/lib/Mail/SpamAssassin:
Conf.pm Message.pm PerMsgStatus.pm Plugin/Bayes.pm Plugin/FreeMail.pm Util.pm
Author: kmcgrail
Date: Tue Apr 11 05:54:45 2017
New Revision: 1790926
URL: http://svn.apache.org/viewvc?rev=1790926&view=rev
Log:
KG: Syncing Trunk to 3.4:
Revision 1707582 "random changes, cosmetic or trivial -
Revision 1707583 "Plugin/Bayes.pm: add missing $tokprefix to u8: and 8: tokens, shorten also tokens in Content-Disposition and Content-Transfer-Encoding"
Modified:
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm?rev=1790926&r1=1790925&r2=1790926&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm Tue Apr 11 05:54:45 2017
@@ -4615,12 +4615,12 @@ sub mtime {
sub parse_scores_only {
my ($self) = @_;
- $_[0]->{parser}->parse ($_[1], 1);
+ $self->{parser}->parse ($_[1], 1);
}
sub parse_rules {
my ($self) = @_;
- $_[0]->{parser}->parse ($_[1], 0);
+ $self->{parser}->parse ($_[1], 0);
}
###########################################################################
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm?rev=1790926&r1=1790925&r2=1790926&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm Tue Apr 11 05:54:45 2017
@@ -1040,7 +1040,7 @@ sub _parse_normal {
# attempt to figure out a name for this attachment if there is one ...
my $disp = $msg->header('content-disposition') || '';
- if ($disp =~ /name="?([^\";]+)"?/i) {
+ if ($disp =~ /name=\s*"?([^";]+)"?/i) {
$msg->{'name'} = $1;
}
elsif ($ct[3]) {
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1790926&r1=1790925&r2=1790926&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue Apr 11 05:54:45 2017
@@ -1291,8 +1291,8 @@ sub rewrite_no_report_safe {
sub qp_encode_header {
my ($self, $text) = @_;
- # do nothing unless there's an 8-bit char
- return $text unless ($text =~ /[\x80-\xff]/);
+ # return unchanged if there are no 8-bit characters
+ return $text if $text !~ tr/\x00-\x7F//c;
my $cs = 'ISO-8859-1';
if ($self->{report_charset}) {
@@ -1982,7 +1982,8 @@ sub _get {
else {
my @results = $getraw ? $self->{msg}->raw_header($request)
: $self->{msg}->get_header($request);
- # dbg("message: get(%s) = %s", $request, join(", ",@results));
+ # dbg("message: get(%s)%s = %s",
+ # $request, $getraw?'raw':'', join(", ",@results));
if (@results) {
$result = join('', @results);
} else { # metadata
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm?rev=1790926&r1=1790925&r2=1790926&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm Tue Apr 11 05:54:45 2017
@@ -200,7 +200,9 @@ use constant ADD_INVIZ_TOKENS_NO_PREFIX
'X-Authentication-Warning' => '*a',
'Organization' => '*o',
'Organisation' => '*o',
- 'Content-Type' => '*c',
+ 'Content-Type' => '*ct',
+ 'Content-Disposition' => '*cd',
+ 'Content-Transfer-Encoding' => '*ce',
'x-spam-relays-trusted' => '*RT',
'x-spam-relays-untrusted' => '*RU',
);
@@ -1120,7 +1122,7 @@ sub tokenize {
# generate an SHA1 hash and take the lower 40 bits as our token
my %tokens;
foreach my $token (@tokens) {
- # skip empty tokens
+ # dbg("bayes: token: %s", $token);
$tokens{substr(sha1($token), -5)} = $token if $token ne '';
}
@@ -1217,7 +1219,7 @@ sub _tokenize_line {
my(@t) = $token =~ /( (?: [\xE0-\xEF] | [\xF0-\xF4][\x80-\xBF] )
[\x80-\xBF]{2} )/xsg;
if (@t) {
- push (@rettokens, map('u8:'.$_, @t));
+ push (@rettokens, map($tokprefix.'u8:'.$_, @t));
next;
}
}
@@ -1227,7 +1229,7 @@ sub _tokenize_line {
# but I'm doing tuples to keep the dbs small(er)." Sounds like a plan
# to me! (jm)
while ($token =~ s/^(..?)//) {
- push (@rettokens, "8:$1");
+ push (@rettokens, $tokprefix.'8:'.$1);
}
next;
}
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm?rev=1790926&r1=1790925&r2=1790926&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/FreeMail.pm Tue Apr 11 05:54:45 2017
@@ -342,15 +342,15 @@ sub _parse_body {
my $body = $pms->get_decoded_stripped_body_text_array();
BODY: foreach (@$body) {
# strip urls with possible emails inside
- s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
+ s{<?https?://\S{0,255}(?:\@|%40)\S{0,255}}{ }gi;
# strip emails contained in <>, not mailto:
# also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
- s#<?(?<!mailto:)$self->{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
+ s{<?(?<!mailto:)$self->{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)}{ }gi;
while (/$self->{email_regex}/g) {
my $email = lc($1);
push(@body_emails, $email) unless defined $seen{$email};
$seen{$email} = 1;
- last BODY if scalar @body_emails >= 40; # sanity
+ last BODY if @body_emails >= 40; # sanity
}
}
my $count_all = 0;
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm?rev=1790926&r1=1790925&r2=1790926&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm Tue Apr 11 05:54:45 2017
@@ -692,6 +692,7 @@ sub base64_decode {
m|^(?:[A-Za-z0-9+/=]{2,}={0,2})$|s)
{
# only use MIME::Base64 when the XS and Perl are both correct and quiet
+ local $1;
s/(=+)(?!=*$)/'A' x length($1)/ge;
# If only a certain number of bytes are requested, truncate the encoded
@@ -707,7 +708,7 @@ sub base64_decode {
}
tr{A-Za-z0-9+/=}{}cd; # remove non-base64 characters
s/=+$//; # remove terminating padding
- tr{A-Za-z0-9+/=}{ -_`}; # translate to uuencode
+ tr{A-Za-z0-9+/=}{ -_}; # translate to uuencode
s/.$// if (length($_) % 4 == 1); # unpack cannot cope with extra byte
my $length;
@@ -728,19 +729,20 @@ sub base64_decode {
}
sub qp_decode {
- local $_ = shift;
+ my $str = $_[0];
# RFC 2045: when decoding a Quoted-Printable body, any trailing
# white space on a line must be deleted
- s/[ \t]+(?=\r?\n)//gs;
+ $str =~ s/[ \t]+(?=\r?\n)//gs;
- s/=\r?\n//gs; # soft line breaks
+ $str =~ s/=\r?\n//gs; # soft line breaks
# RFC 2045 explicitly prohibits lowercase characters a-f in QP encoding
# do we really want to allow them???
- s/=([0-9a-fA-F]{2})/chr(hex($1))/ge;
+ local $1;
+ $str =~ s/=([0-9a-fA-F]{2})/chr(hex($1))/ge;
- return $_;
+ return $str;
}
sub base64_encode {