You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2019/07/31 10:43:49 UTC
svn commit: r1864043 - in /spamassassin: branches/3.4/lib/Mail/SpamAssassin/
branches/3.4/t/ trunk/lib/Mail/SpamAssassin/ trunk/t/
Author: hege
Date: Wed Jul 31 10:43:49 2019
New Revision: 1864043
URL: http://svn.apache.org/viewvc?rev=1864043&view=rev
Log:
uri_to_domain - ignore cid:, fix mailto: parameter handling
Modified:
spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm
spamassassin/branches/3.4/t/uri.t
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm
spamassassin/trunk/t/uri.t
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1864043&r1=1864042&r2=1864043&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm Wed Jul 31 10:43:49 2019
@@ -2424,9 +2424,9 @@ sub _get_parsed_uri_list {
}
if ($uri =~ /^mailto:/i) {
- # skip a mail link that does not have a valid TLD or other than one @ after decoding any URLEncoded characters
+ # skip a mail link that does not have a valid TLD or @ after decoding any URLEncoded characters
$uri = Mail::SpamAssassin::Util::url_encode($uri) if ($uri =~ /\%(?:2[1-9a-fA-F]|[3-6][0-9a-fA-F]|7[0-9a-eA-E])/);
- next if ($uri !~ /^[^@]+@[^@]+$/);
+ next unless $uri =~ /\@/;
my $domuri = $self->{main}->{registryboundaries}->uri_to_domain($uri);
next unless $domuri;
push (@uris, $rawuri);
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm?rev=1864043&r1=1864042&r2=1864043&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm Wed Jul 31 10:43:49 2019
@@ -33,6 +33,9 @@ use re 'taint';
our @ISA = qw();
use Mail::SpamAssassin::Logger;
+use Mail::SpamAssassin::Constants qw(:ip);
+
+my $IP_ADDRESS = IP_ADDRESS;
# called from SpamAssassin->init() to create $self->{util_rb}
sub new {
@@ -216,18 +219,25 @@ sub uri_to_domain {
my $uri = lc shift;
# Javascript is not going to help us, so return.
- return if ($uri =~ /^javascript:/);
+ # Likewise ignore cid:
+ return if ($uri =~ /^(?:javascript|cid):/);
- $uri =~ s{\#.*$}{}gs; # drop fragment
- $uri =~ s{^[a-z]+:/{0,2}}{}gs; # drop the protocol
- $uri =~ s{^[^/]*\@}{}gs; # username/passwd
-
- # strip path and CGI params. note: bug 4213 shows that "&" should
- # *not* be likewise stripped here -- it's permitted in hostnames by
- # some common MUAs!
- $uri =~ s{[/?].*$}{}gs;
+ if ($uri =~ s/^mailto://) { # handle mailto: specially
+ $uri =~ s/\?.*//; # drop parameters ?subject= etc
+ return unless $uri =~ s/.*@//; # drop username or abort
+ } else {
+ $uri =~ s{\#.*$}{}gs; # drop fragment
+ $uri =~ s{^[a-z]+:/{0,2}}{}gs; # drop the protocol
+ $uri =~ s{^[^/]*\@}{}gs; # username/passwd
+ # strip path and CGI params. note: bug 4213 shows that "&" should
+ # *not* be likewise stripped here -- it's permitted in hostnames by
+ # some common MUAs!
+ $uri =~ s{[/?].*$}{}gs;
+ $uri =~ s{:\d*$}{}gs; # port, bug 4191: sometimes the # is missing
+ }
- $uri =~ s{:\d*$}{}gs; # port, bug 4191: sometimes the # is missing
+ # return if there's not atleast a dot
+ return if $uri !~ /\./;
# skip undecoded URIs if the encoded bits shouldn't be.
# we'll see the decoded version as well. see url_encode()
@@ -236,12 +246,12 @@ sub uri_to_domain {
my $host = $uri; # unstripped/full domain name
# keep IPs intact
- if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) {
- # get rid of hostname part of domain, understanding delegation
- $uri = $self->trim_domain($uri);
-
+ if ($uri !~ /^$IP_ADDRESS$/) {
# ignore invalid domains
return unless ($self->is_domain_valid($uri));
+
+ # get rid of hostname part of domain, understanding delegation
+ $uri = $self->trim_domain($uri);
}
# $uri is now the domain only, optionally return unstripped host name
Modified: spamassassin/branches/3.4/t/uri.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/t/uri.t?rev=1864043&r1=1864042&r2=1864043&view=diff
==============================================================================
--- spamassassin/branches/3.4/t/uri.t (original)
+++ spamassassin/branches/3.4/t/uri.t Wed Jul 31 10:43:49 2019
@@ -16,7 +16,7 @@ if (-e 'test_dir') { # runnin
}
use strict;
-use Test::More tests => 95;
+use Test::More tests => 98;
use lib '.'; use lib 't';
use SATest; sa_t_init("uri");
@@ -85,6 +85,9 @@ sub try_domains {
ok(try_domains('javascript:{some crap}', undef));
ok(try_domains('mailto:nobody@example.com', 'example.com'));
+ok(try_domains('mailto:nobody@example.com?subject=foo', 'example.com'));
+ok(try_domains('mailto:nobody', undef));
+ok(try_domains('cid:foobar.net', undef));
ok(try_domains('http://66.92.69.221/', '66.92.69.221'));
ok(try_domains('http://www.spamassassin.org:8080/lists.html', 'spamassassin.org'));
ok(try_domains('http://www.spamassassin.org/lists.html#some_tag', 'spamassassin.org'));
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1864043&r1=1864042&r2=1864043&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Wed Jul 31 10:43:49 2019
@@ -2544,9 +2544,9 @@ sub _get_parsed_uri_list {
}
if ($uri =~ /^mailto:/i) {
- # skip a mail link that does not have a valid TLD or other than one @ after decoding any URLEncoded characters
+ # skip a mail link that does not have a valid TLD or @ after decoding any URLEncoded characters
$uri = Mail::SpamAssassin::Util::url_encode($uri) if ($uri =~ /\%(?:2[1-9a-fA-F]|[3-6][0-9a-fA-F]|7[0-9a-eA-E])/);
- next if ($uri !~ /^[^@]+@[^@]+$/);
+ next unless $uri =~ /\@/;
my $domuri = $self->{main}->{registryboundaries}->uri_to_domain($uri);
next unless $domuri;
push (@uris, $rawuri);
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm?rev=1864043&r1=1864042&r2=1864043&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm Wed Jul 31 10:43:49 2019
@@ -223,18 +223,25 @@ sub uri_to_domain {
my $uri = lc shift;
# Javascript is not going to help us, so return.
- return if ($uri =~ /^javascript:/);
+ # Likewise ignore cid:
+ return if ($uri =~ /^(?:javascript|cid):/);
- $uri =~ s{\#.*$}{}gs; # drop fragment
- $uri =~ s{^[a-z]+:/{0,2}}{}gs; # drop the protocol
- $uri =~ s{^[^/]*\@}{}gs; # username/passwd
-
- # strip path and CGI params. note: bug 4213 shows that "&" should
- # *not* be likewise stripped here -- it's permitted in hostnames by
- # some common MUAs!
- $uri =~ s{[/?].*$}{}gs;
+ if ($uri =~ s/^mailto://) { # handle mailto: specially
+ $uri =~ s/\?.*//; # drop parameters ?subject= etc
+ return unless $uri =~ s/.*@//; # drop username or abort
+ } else {
+ $uri =~ s{\#.*$}{}gs; # drop fragment
+ $uri =~ s{^[a-z]+:/{0,2}}{}gs; # drop the protocol
+ $uri =~ s{^[^/]*\@}{}gs; # username/passwd
+ # strip path and CGI params. note: bug 4213 shows that "&" should
+ # *not* be likewise stripped here -- it's permitted in hostnames by
+ # some common MUAs!
+ $uri =~ s{[/?].*$}{}gs;
+ $uri =~ s{:\d*$}{}gs; # port, bug 4191: sometimes the # is missing
+ }
- $uri =~ s{:\d*$}{}gs; # port, bug 4191: sometimes the # is missing
+ # return if there's not atleast a dot
+ return if $uri !~ /\./;
# skip undecoded URIs if the encoded bits shouldn't be.
# we'll see the decoded version as well. see url_encode()
@@ -244,11 +251,11 @@ sub uri_to_domain {
# keep IPs intact
if ($uri !~ /^$IP_ADDRESS$/) {
- # get rid of hostname part of domain, understanding delegation
- $uri = $self->trim_domain($uri);
-
# ignore invalid domains
return unless ($self->is_domain_valid($uri));
+
+ # get rid of hostname part of domain, understanding delegation
+ $uri = $self->trim_domain($uri);
}
# $uri is now the domain only, optionally return unstripped host name
Modified: spamassassin/trunk/t/uri.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/uri.t?rev=1864043&r1=1864042&r2=1864043&view=diff
==============================================================================
--- spamassassin/trunk/t/uri.t (original)
+++ spamassassin/trunk/t/uri.t Wed Jul 31 10:43:49 2019
@@ -16,7 +16,7 @@ if (-e 'test_dir') { # runnin
}
use strict;
-use Test::More tests => 95;
+use Test::More tests => 98;
use lib '.'; use lib 't';
use SATest; sa_t_init("uri");
@@ -85,6 +85,9 @@ sub try_domains {
ok(try_domains('javascript:{some crap}', undef));
ok(try_domains('mailto:nobody@example.com', 'example.com'));
+ok(try_domains('mailto:nobody@example.com?subject=foo', 'example.com'));
+ok(try_domains('mailto:nobody', undef));
+ok(try_domains('cid:foobar.net', undef));
ok(try_domains('http://66.92.69.221/', '66.92.69.221'));
ok(try_domains('http://www.spamassassin.org:8080/lists.html', 'spamassassin.org'));
ok(try_domains('http://www.spamassassin.org/lists.html#some_tag', 'spamassassin.org'));