You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2019/08/21 09:19:39 UTC
svn commit: r1865612 - in /spamassassin:
branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
Author: hege
Date: Wed Aug 21 09:19:39 2019
New Revision: 1865612
URL: http://svn.apache.org/viewvc?rev=1865612&view=rev
Log:
Improve schemeless uri parser start boundary
Modified:
spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1865612&r1=1865611&r2=1865612&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm Wed Aug 21 09:19:39 2019
@@ -2140,6 +2140,9 @@ sub _tbirdurire {
my $tbirdenddelim = '><"`}\]{[|\s' . "\x1b\xa0"; # The \x1b as per bug 4522 # \xa0 (nbsp) added 7/2019
my $nonASCII = '\x80-\xff';
+ # schemeless uri start delimiter, combo of most punctuations and delims above
+ my $scstartdelim = qr/[\!\"\#\$\&\'\(\)\*\+\,\/\:\;\<\=\>\?\@\[\\\]\^\`\{\|\}\~\s\x1b\xa0]/;
+
# bug 7100: we allow a comma to delimit the end of an email address because it will never appear in a domain name, and
# it's a common thing to find in text
my $tbirdenddelimemail = $tbirdenddelim . ',(\'' . $nonASCII; # tbird ignores non-ASCII mail addresses for now, until RFC changes
@@ -2157,7 +2160,7 @@ sub _tbirdurire {
$self->{tbirdurire} = qr/(?:\b|(?<=$iso2022shift)|(?<=[$tbirdstartdelim]))
(?:(?:($uriknownscheme)(?=(?:[$tbirdenddelim]|\z))) |
(?:($urimailscheme)(?=(?:[$tbirdenddelimemail]|\z))) |
- (?:(?<![a-z\d._-])($urischemeless)(?=(?:[$tbirdenddelim]|\z))))/ix;
+ (?:(?:^|(?<=$scstartdelim))($urischemeless)(?=(?:[$tbirdenddelim]|\z))))/ix;
return $self->{tbirdurire};
}
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1865612&r1=1865611&r2=1865612&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Wed Aug 21 09:19:39 2019
@@ -2272,6 +2272,9 @@ sub _tbirdurire {
my $tbirdenddelim = '><"`}\]{[|\s' . "\x1b\xa0"; # The \x1b as per bug 4522 # \xa0 (nbsp) added 7/2019
my $nonASCII = '\x80-\xff';
+ # schemeless uri start delimiter, combo of most punctuations and delims above
+ my $scstartdelim = qr/[\!\"\#\$\&\'\(\)\*\+\,\/\:\;\<\=\>\?\@\[\\\]\^\`\{\|\}\~\s\x1b\xa0]/;
+
# bug 7100: we allow a comma to delimit the end of an email address because it will never appear in a domain name, and
# it's a common thing to find in text
my $tbirdenddelimemail = $tbirdenddelim . ',(\'' . $nonASCII; # tbird ignores non-ASCII mail addresses for now, until RFC changes
@@ -2289,7 +2292,7 @@ sub _tbirdurire {
$self->{tbirdurire} = qr/(?:\b|(?<=$iso2022shift)|(?<=[$tbirdstartdelim]))
(?:(?:($uriknownscheme)(?=(?:[$tbirdenddelim]|\z))) |
(?:($urimailscheme)(?=(?:[$tbirdenddelimemail]|\z))) |
- (?:(?<![a-z\d._-])($urischemeless)(?=(?:[$tbirdenddelim]|\z))))/ix;
+ (?:(?:^|(?<=$scstartdelim))($urischemeless)(?=(?:[$tbirdenddelim]|\z))))/ix;
return $self->{tbirdurire};
}