You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2019/07/31 10:43:49 UTC

svn commit: r1864043 - in /spamassassin: branches/3.4/lib/Mail/SpamAssassin/ branches/3.4/t/ trunk/lib/Mail/SpamAssassin/ trunk/t/

Author: hege
Date: Wed Jul 31 10:43:49 2019
New Revision: 1864043

URL: http://svn.apache.org/viewvc?rev=1864043&view=rev
Log:
uri_to_domain - ignore cid:, fix mailto: parameter handling

Modified:
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm
    spamassassin/branches/3.4/t/uri.t
    spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm
    spamassassin/trunk/t/uri.t

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1864043&r1=1864042&r2=1864043&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm Wed Jul 31 10:43:49 2019
@@ -2424,9 +2424,9 @@ sub _get_parsed_uri_list {
         }
 
         if ($uri =~ /^mailto:/i) {
-          # skip a mail link that does not have a valid TLD or other than one @ after decoding any URLEncoded characters
+          # skip a mail link that does not have a valid TLD or @ after decoding any URLEncoded characters
           $uri = Mail::SpamAssassin::Util::url_encode($uri) if ($uri =~ /\%(?:2[1-9a-fA-F]|[3-6][0-9a-fA-F]|7[0-9a-eA-E])/);
-          next if ($uri !~ /^[^@]+@[^@]+$/);
+          next unless $uri =~ /\@/;
           my $domuri = $self->{main}->{registryboundaries}->uri_to_domain($uri);
           next unless $domuri;
           push (@uris, $rawuri);

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm?rev=1864043&r1=1864042&r2=1864043&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/RegistryBoundaries.pm Wed Jul 31 10:43:49 2019
@@ -33,6 +33,9 @@ use re 'taint';
 our @ISA = qw();
 
 use Mail::SpamAssassin::Logger;
+use Mail::SpamAssassin::Constants qw(:ip);
+
+my $IP_ADDRESS = IP_ADDRESS;
 
 # called from SpamAssassin->init() to create $self->{util_rb}
 sub new {
@@ -216,18 +219,25 @@ sub uri_to_domain {
   my $uri = lc shift;
 
   # Javascript is not going to help us, so return.
-  return if ($uri =~ /^javascript:/);
+  # Likewise ignore cid:
+  return if ($uri =~ /^(?:javascript|cid):/);
 
-  $uri =~ s{\#.*$}{}gs;			# drop fragment
-  $uri =~ s{^[a-z]+:/{0,2}}{}gs;	# drop the protocol
-  $uri =~ s{^[^/]*\@}{}gs;		# username/passwd
-
-  # strip path and CGI params.  note: bug 4213 shows that "&" should
-  # *not* be likewise stripped here -- it's permitted in hostnames by
-  # some common MUAs!
-  $uri =~ s{[/?].*$}{}gs;              
+  if ($uri =~ s/^mailto://) { # handle mailto: specially
+    $uri =~ s/\?.*//;			# drop parameters ?subject= etc
+    return unless $uri =~ s/.*@//;	# drop username or abort
+  } else {
+    $uri =~ s{\#.*$}{}gs;		# drop fragment
+    $uri =~ s{^[a-z]+:/{0,2}}{}gs;	# drop the protocol
+    $uri =~ s{^[^/]*\@}{}gs;		# username/passwd
+    # strip path and CGI params.  note: bug 4213 shows that "&" should
+    # *not* be likewise stripped here -- it's permitted in hostnames by
+    # some common MUAs!
+    $uri =~ s{[/?].*$}{}gs;              
+    $uri =~ s{:\d*$}{}gs;		# port, bug 4191: sometimes the # is missing
+  }
 
-  $uri =~ s{:\d*$}{}gs;		# port, bug 4191: sometimes the # is missing
+  # return if there's not atleast a dot
+  return if $uri !~ /\./;
 
   # skip undecoded URIs if the encoded bits shouldn't be.
   # we'll see the decoded version as well.  see url_encode()
@@ -236,12 +246,12 @@ sub uri_to_domain {
   my $host = $uri;  # unstripped/full domain name
 
   # keep IPs intact
-  if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) { 
-    # get rid of hostname part of domain, understanding delegation
-    $uri = $self->trim_domain($uri);
-
+  if ($uri !~ /^$IP_ADDRESS$/) { 
     # ignore invalid domains
     return unless ($self->is_domain_valid($uri));
+
+    # get rid of hostname part of domain, understanding delegation
+    $uri = $self->trim_domain($uri);
   }
   
   # $uri is now the domain only, optionally return unstripped host name

Modified: spamassassin/branches/3.4/t/uri.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/t/uri.t?rev=1864043&r1=1864042&r2=1864043&view=diff
==============================================================================
--- spamassassin/branches/3.4/t/uri.t (original)
+++ spamassassin/branches/3.4/t/uri.t Wed Jul 31 10:43:49 2019
@@ -16,7 +16,7 @@ if (-e 'test_dir') {            # runnin
 }
 
 use strict;
-use Test::More tests => 95;
+use Test::More tests => 98;
 use lib '.'; use lib 't';
 use SATest; sa_t_init("uri");
 
@@ -85,6 +85,9 @@ sub try_domains {
 
 ok(try_domains('javascript:{some crap}', undef));
 ok(try_domains('mailto:nobody@example.com', 'example.com'));
+ok(try_domains('mailto:nobody@example.com?subject=foo', 'example.com'));
+ok(try_domains('mailto:nobody', undef));
+ok(try_domains('cid:foobar.net', undef));
 ok(try_domains('http://66.92.69.221/', '66.92.69.221'));
 ok(try_domains('http://www.spamassassin.org:8080/lists.html', 'spamassassin.org'));
 ok(try_domains('http://www.spamassassin.org/lists.html#some_tag', 'spamassassin.org'));

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1864043&r1=1864042&r2=1864043&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Wed Jul 31 10:43:49 2019
@@ -2544,9 +2544,9 @@ sub _get_parsed_uri_list {
         }
 
         if ($uri =~ /^mailto:/i) {
-          # skip a mail link that does not have a valid TLD or other than one @ after decoding any URLEncoded characters
+          # skip a mail link that does not have a valid TLD or @ after decoding any URLEncoded characters
           $uri = Mail::SpamAssassin::Util::url_encode($uri) if ($uri =~ /\%(?:2[1-9a-fA-F]|[3-6][0-9a-fA-F]|7[0-9a-eA-E])/);
-          next if ($uri !~ /^[^@]+@[^@]+$/);
+          next unless $uri =~ /\@/;
           my $domuri = $self->{main}->{registryboundaries}->uri_to_domain($uri);
           next unless $domuri;
           push (@uris, $rawuri);

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm?rev=1864043&r1=1864042&r2=1864043&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/RegistryBoundaries.pm Wed Jul 31 10:43:49 2019
@@ -223,18 +223,25 @@ sub uri_to_domain {
   my $uri = lc shift;
 
   # Javascript is not going to help us, so return.
-  return if ($uri =~ /^javascript:/);
+  # Likewise ignore cid:
+  return if ($uri =~ /^(?:javascript|cid):/);
 
-  $uri =~ s{\#.*$}{}gs;			# drop fragment
-  $uri =~ s{^[a-z]+:/{0,2}}{}gs;	# drop the protocol
-  $uri =~ s{^[^/]*\@}{}gs;		# username/passwd
-
-  # strip path and CGI params.  note: bug 4213 shows that "&" should
-  # *not* be likewise stripped here -- it's permitted in hostnames by
-  # some common MUAs!
-  $uri =~ s{[/?].*$}{}gs;              
+  if ($uri =~ s/^mailto://) { # handle mailto: specially
+    $uri =~ s/\?.*//;			# drop parameters ?subject= etc
+    return unless $uri =~ s/.*@//;	# drop username or abort
+  } else {
+    $uri =~ s{\#.*$}{}gs;		# drop fragment
+    $uri =~ s{^[a-z]+:/{0,2}}{}gs;	# drop the protocol
+    $uri =~ s{^[^/]*\@}{}gs;		# username/passwd
+    # strip path and CGI params.  note: bug 4213 shows that "&" should
+    # *not* be likewise stripped here -- it's permitted in hostnames by
+    # some common MUAs!
+    $uri =~ s{[/?].*$}{}gs;              
+    $uri =~ s{:\d*$}{}gs;		# port, bug 4191: sometimes the # is missing
+  }
 
-  $uri =~ s{:\d*$}{}gs;		# port, bug 4191: sometimes the # is missing
+  # return if there's not atleast a dot
+  return if $uri !~ /\./;
 
   # skip undecoded URIs if the encoded bits shouldn't be.
   # we'll see the decoded version as well.  see url_encode()
@@ -244,11 +251,11 @@ sub uri_to_domain {
 
   # keep IPs intact
   if ($uri !~ /^$IP_ADDRESS$/) {
-    # get rid of hostname part of domain, understanding delegation
-    $uri = $self->trim_domain($uri);
-
     # ignore invalid domains
     return unless ($self->is_domain_valid($uri));
+
+    # get rid of hostname part of domain, understanding delegation
+    $uri = $self->trim_domain($uri);
   }
   
   # $uri is now the domain only, optionally return unstripped host name

Modified: spamassassin/trunk/t/uri.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/uri.t?rev=1864043&r1=1864042&r2=1864043&view=diff
==============================================================================
--- spamassassin/trunk/t/uri.t (original)
+++ spamassassin/trunk/t/uri.t Wed Jul 31 10:43:49 2019
@@ -16,7 +16,7 @@ if (-e 'test_dir') {            # runnin
 }
 
 use strict;
-use Test::More tests => 95;
+use Test::More tests => 98;
 use lib '.'; use lib 't';
 use SATest; sa_t_init("uri");
 
@@ -85,6 +85,9 @@ sub try_domains {
 
 ok(try_domains('javascript:{some crap}', undef));
 ok(try_domains('mailto:nobody@example.com', 'example.com'));
+ok(try_domains('mailto:nobody@example.com?subject=foo', 'example.com'));
+ok(try_domains('mailto:nobody', undef));
+ok(try_domains('cid:foobar.net', undef));
 ok(try_domains('http://66.92.69.221/', '66.92.69.221'));
 ok(try_domains('http://www.spamassassin.org:8080/lists.html', 'spamassassin.org'));
 ok(try_domains('http://www.spamassassin.org/lists.html#some_tag', 'spamassassin.org'));