You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by gb...@apache.org on 2021/05/28 08:04:45 UTC
svn commit: r1890266 - in /spamassassin/trunk:
lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm t/decodeshorturl.t
Author: gbechis
Date: Fri May 28 08:04:45 2021
New Revision: 1890266
URL: http://svn.apache.org/viewvc?rev=1890266&view=rev
Log:
remove legacy "dynamic" rules and use proper 'eval' rules
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm
spamassassin/trunk/t/decodeshorturl.t
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm?rev=1890266&r1=1890265&r2=1890266&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm Fri May 28 08:04:45 2021
@@ -28,6 +28,11 @@ DecodeShortURLs - Expand shortened URLs
url_shortener bit.ly
url_shortener go.to
...
+ body HAS_SHORT_URL eval:short_url()
+ describe HAS_SHORT_URL Message contains one or more shortened URLs
+
+ body SHORT_URL_CHAINED eval:short_url_chained()
+ describe SHORT_URL_CHAINED Message has shortened URL chained to other shorteners
=head1 DESCRIPTION
@@ -40,12 +45,6 @@ SpamAssassin which can then be accessed
This plugin also sets the rule HAS_SHORT_URL if any matching short URLs are
found.
-Regular 'uri' rules can be used to detect and score links disabled by the
-shortening service for abuse and URL_BITLY_BLOCKED is supplied as an example.
-It should be safe to score this rule highly on a match as experience shows
-that bit.ly only blocks access to a URL if it has seen consistent abuse and
-problem reports.
-
This plug-in will follow 'chained' shorteners e.g.
from short URL to short URL to short URL and finally to the real URL
@@ -58,12 +57,10 @@ which point it will fire the rule 'SHORT
If a shortener returns a '404 Not Found' result for the short URL then the
rule 'SHORT_URL_404' will be fired.
-If a shortener does not return an HTTP redirect, then a dynamic rule will
-be fired: 'SHORT_C<SHORTENER>_C<CODE>' where C<SHORTENER> is the uppercase
-name of the shortener with dots converted to underscores. e.g.:
-'SHORT_T_CO_200' This is to handle the case of t.co which now returns an
-HTTP 200 and an abuse page instead of redirecting to an abuse page like
-every other shortener does...
+If a shortener returns a '200 OK' result for the short URL then the
+rule 'SHORT_URL_200' will be fired.
+
+This can cover the case when an abuse page is displayed.
=head1 NOTES
@@ -85,7 +82,7 @@ could not have been developed without hi
package Mail::SpamAssassin::Plugin::DecodeShortURLs;
-my $VERSION = 0.11;
+my $VERSION = 0.12;
use Mail::SpamAssassin::Plugin;
use strict;
@@ -124,7 +121,12 @@ sub new {
$self->set_config($mailsaobject->{conf});
$self->register_method_priority ('check_dnsbl', -10);
- $self->register_eval_rule('short_url_tests');
+ $self->register_eval_rule('short_url');
+ $self->register_eval_rule('short_url_200');
+ $self->register_eval_rule('short_url_404');
+ $self->register_eval_rule('short_url_chained');
+ $self->register_eval_rule('short_url_maxchain');
+ $self->register_eval_rule('short_url_loop');
return $self;
}
@@ -313,6 +315,42 @@ sub _connect_dbi_cache {
}
}
+sub short_url {
+ my ($self, $opts) = @_;
+
+ return $self->{short_url};
+}
+
+sub short_url_200 {
+ my ($self, $opts) = @_;
+
+ return $self->{short_url_200};
+}
+
+sub short_url_404 {
+ my ($self, $opts) = @_;
+
+ return $self->{short_url_404};
+}
+
+sub short_url_chained {
+ my ($self, $opts) = @_;
+
+ return $self->{short_url_chained};
+}
+
+sub short_url_maxchain {
+ my ($self, $opts) = @_;
+
+ return $self->{short_url_maxchain};
+}
+
+sub short_url_loop {
+ my ($self, $opts) = @_;
+
+ return $self->{short_url_loop};
+}
+
sub check_dnsbl {
my ($self, $opts) = @_;
my $pms = $opts->{permsgstatus};
@@ -377,7 +415,7 @@ sub recursive_lookup {
if ($count >= 10) {
dbg("Error: more than 10 shortener redirections");
# Fire test
- $pms->got_hit('SHORT_URL_MAXCHAIN');
+ $self->{short_url_maxchain} = 1;
return undef;
}
@@ -393,16 +431,8 @@ sub recursive_lookup {
my $response = $self->{ua}->head($short_url);
if (!$response->is_redirect) {
dbg("URL is not redirect: $short_url = ".$response->status_line);
- if ((my ($domain) = ($short_url =~ /^https?:\/\/(\S+)\//))) {
- if (exists $self->{url_shorteners}->{$domain}) {
- $domain =~ s/\./_/g;
- $domain = uc($domain);
- my $h = 'SHORT_' . $domain . '_' . $response->code;
- dbg("hit rule: $h");
- $pms->got_hit($h);
- }
- }
- $pms->got_hit('SHORT_URL_404') if($response->code == '404');
+ $self->{short_url_200} = 1 if($response->code == '200');
+ $self->{short_url_404} = 1 if($response->code == '404');
return undef;
}
$location = $response->headers->{location};
@@ -418,12 +448,12 @@ sub recursive_lookup {
}
# At this point we have a new URL in $response
- $pms->got_hit('HAS_SHORT_URL');
+ $self->{short_url} = 1;
$pms->add_uri_detail_list($location);
# Set chained here otherwise we might mark a disabled page or
# redirect back to the same host as chaining incorrectly.
- $pms->got_hit('SHORT_URL_CHAINED') if $count > 0;
+ $self->{short_url_chained} = 1 if $count > 0;
# Check if we are being redirected to a local page
# Don't recurse in this case...
@@ -440,7 +470,7 @@ sub recursive_lookup {
if (exists $been_here{$location}) {
# Loop detected
dbg("Error: loop detected");
- $pms->got_hit('SHORT_URL_LOOP');
+ $self->{short_url_loop} = 1;
return $location;
} else {
if (exists $self->{url_shorteners}->{$domain}) {
@@ -455,11 +485,6 @@ sub recursive_lookup {
return $location;
}
-sub short_url_tests {
- # Set by parsed_metadata
- return 0;
-}
-
sub cache_add {
my ($self, $short_url, $decoded_url) = @_;
return undef if not $self->{caching};
Modified: spamassassin/trunk/t/decodeshorturl.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/decodeshorturl.t?rev=1890266&r1=1890265&r2=1890266&view=diff
==============================================================================
--- spamassassin/trunk/t/decodeshorturl.t (original)
+++ spamassassin/trunk/t/decodeshorturl.t Fri May 28 08:04:45 2021
@@ -17,10 +17,10 @@ url_shortener bit.ly
url_shortener tinyurl.com
ifplugin Mail::SpamAssassin::Plugin::DecodeShortURLs
- body HAS_SHORT_URL eval:short_url_tests()
+ body HAS_SHORT_URL eval:short_url()
describe HAS_SHORT_URL Message contains one or more shortened URLs
- body SHORT_URL_CHAINED eval:short_url_tests()
+ body SHORT_URL_CHAINED eval:short_url_chained()
describe SHORT_URL_CHAINED Message has shortened URL chained to other shorteners
endif
");