You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by gb...@apache.org on 2022/04/14 11:09:46 UTC
svn commit: r1899844 - /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm
Author: gbechis
Date: Thu Apr 14 11:09:46 2022
New Revision: 1899844
URL: http://svn.apache.org/viewvc?rev=1899844&view=rev
Log:
add support for 3rd tld url shorteners that creates a random 3rd level subdomain.
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm?rev=1899844&r1=1899843&r2=1899844&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm Thu Apr 14 11:09:46 2022
@@ -133,6 +133,24 @@ sub new {
return $self;
}
+=head1 PRIVILEGED SETTINGS
+
+=over 4
+
+=item url_shortener (default: none)
+
+A domain that should be considered as an url shortener.
+If the domain begins with a '.', 3rd level tld of the main
+domain will be checked.
+
+Example:
+url_shortener bit.ly
+url_shortener .page.link
+
+=back
+
+=cut
+
sub set_config {
my($self, $conf) = @_;
my @cmds = ();
@@ -151,8 +169,6 @@ sub set_config {
}
});
-=head1 PRIVILEGED SETTINGS
-
=over 4
=item url_shortener_cache_type (default: none)
@@ -386,6 +402,7 @@ sub check_dnsbl {
# Sort short URLs into hash to de-dup them
my %short_urls;
my $uris = $pms->get_uri_detail_list();
+ my $tldsRE = $self->{main}->{registryboundaries}->{valid_tlds_re};
while (my($uri, $info) = each %{$uris}) {
next unless ($info->{domains});
foreach ( keys %{ $info->{domains} } ) {
@@ -402,6 +419,18 @@ sub check_dnsbl {
}
$short_urls{$uri} = 1;
next;
+ } elsif(/^(?!www)[a-z\d._-]{0,251}\.([a-z\d._-]{0,251}\.${tldsRE})/) {
+ # if domain is a 3rd level domain check if there is a url shortener
+ # on the 2nd level tld
+ my $dom = '.' . $1;
+ if (exists $self->{url_shorteners}->{$dom}) {
+ if ($uri !~ /^https?:\/\/(?:www\.)?$_\/.+$/i) {
+ dbg("Discarding URI: $uri");
+ next;
+ }
+ $short_urls{$uri} = 1;
+ next;
+ }
}
}
}
@@ -457,7 +486,6 @@ sub recursive_lookup {
$location = $response->headers->{location};
# Bail out if $short_url redirects to itself
return undef if ($short_url eq $location);
- dbg("Found $short_url => $location");
if ($self->{caching}) {
if ($self->cache_add($short_url, $location)) {
dbg("Added $short_url to cache");
@@ -499,10 +527,20 @@ sub recursive_lookup {
$self->{short_url_loop} = 1;
return $location;
} else {
+ my $tldsRE = $self->{main}->{registryboundaries}->{valid_tlds_re};
if (exists $self->{url_shorteners}->{$domain}) {
$been_here{$location} = 1;
# Recurse...
return $self->recursive_lookup($location, $pms, %been_here);
+ } elsif($domain =~ /^(?!www)[a-z\d._-]{0,251}\.([a-z\d._-]{0,251}\.${tldsRE})/) {
+ # if domain is a 3rd level domain check if there is a url shortener
+ # on the 2nd level tld
+ my $dom = '.' . $1;
+ if (exists $self->{url_shorteners}->{$dom}) {
+ $been_here{$location} = 1;
+ # Recurse...
+ return $self->recursive_lookup($location, $pms, %been_here);
+ }
}
}
}