You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/05/01 17:51:21 UTC
svn commit: r1900462 - in /spamassassin/trunk: UPGRADE lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm sql/decodeshorturl_mysql.sql sql/decodeshorturl_pg.sql sql/decodeshorturl_sqlite.sql t/data/spam/decodeshorturl/base.eml t/decodeshorturl.t
Author: hege
Date: Sun May 1 17:51:21 2022
New Revision: 1900462
URL: http://svn.apache.org/viewvc?rev=1900462&view=rev
Log:
Clean up DecodeShortURLs code. Add MySQL/Postgres support.
Added:
spamassassin/trunk/sql/decodeshorturl_pg.sql
Modified:
spamassassin/trunk/UPGRADE
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm
spamassassin/trunk/sql/decodeshorturl_mysql.sql
spamassassin/trunk/sql/decodeshorturl_sqlite.sql
spamassassin/trunk/t/data/spam/decodeshorturl/base.eml
spamassassin/trunk/t/decodeshorturl.t
Modified: spamassassin/trunk/UPGRADE
URL: http://svn.apache.org/viewvc/spamassassin/trunk/UPGRADE?rev=1900462&r1=1900461&r2=1900462&view=diff
==============================================================================
--- spamassassin/trunk/UPGRADE (original)
+++ spamassassin/trunk/UPGRADE Sun May 1 17:51:21 2022
@@ -196,6 +196,8 @@ Note for Users Upgrading to SpamAssassin
- New DMARC policy check plugin
+- New DecodeShortURLs plugin
+
Note for Users Upgrading to SpamAssassin 3.4.5
----------------------------------------------
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm?rev=1900462&r1=1900461&r2=1900462&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm Sun May 1 17:51:21 2022
@@ -94,7 +94,6 @@ use vars qw(@ISA);
my $VERSION = 0.12;
use constant HAS_LWP_USERAGENT => eval { require LWP::UserAgent; };
-use constant HAS_DBI => eval { require DBI; };
sub dbg { my $msg = shift; return Mail::SpamAssassin::Logger::dbg("DecodeShortURLs: $msg", @_); }
sub info { my $msg = shift; return Mail::SpamAssassin::Logger::info("DecodeShortURLs: $msg", @_); }
@@ -108,17 +107,12 @@ sub new {
bless ($self, $class);
if ($mailsaobject->{local_tests_only} || !HAS_LWP_USERAGENT) {
+ dbg("local tests only, disabling checks");
$self->{disabled} = 1;
- } else {
- $self->{disabled} = 0;
}
-
- unless ($self->{disabled}) {
- $self->{ua} = new LWP::UserAgent;
- $self->{ua}->{max_redirect} = 0;
- $self->{ua}->{timeout} = 5;
- $self->{ua}->env_proxy;
- $self->{caching} = 0;
+ elsif (!HAS_LWP_USERAGENT) {
+ dbg("module LWP::UserAgent not installed, disabling checks");
+ $self->{disabled} = 1;
}
$self->set_config($mailsaobject->{conf});
@@ -160,7 +154,7 @@ sub set_config {
default => {},
code => sub {
my ($self, $key, $value, $line) = @_;
- if ($value =~ /^$/) {
+ if ($value eq '') {
return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
}
foreach my $domain (split(/\s+/, $value)) {
@@ -173,11 +167,12 @@ sub set_config {
=item url_shortener_cache_type (default: none)
-The specific type of cache type that is being utilized. Currently only sqlite
-is configured however plans to support redis cache is planned.
+The cache type that is being utilized. Currently only supported value is
+C<dbi> that implies C<url_shortener_cache_dsn> is a DBI connect string.
+DBI module is required.
Example:
-url_shortener_cache_type sqlite
+url_shortener_cache_type dbi
=back
@@ -185,7 +180,7 @@ url_shortener_cache_type sqlite
push (@cmds, {
setting => 'url_shortener_cache_type',
- default => undef,
+ default => '',
is_priv => 1,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING
});
@@ -194,15 +189,22 @@ url_shortener_cache_type sqlite
=item url_shortener_cache_dsn (default: none)
-The dsn to a database file to write cache entries to. The database will
-be created automatically if is does not already exist but the supplied path
-and file must be read/writable by the user running spamassassin or spamd.
+The DBI dsn of the database to use.
-Note: You will need to have the proper DBI version of the cache type installed.
+For SQLite, the database will be created automatically if it does not
+already exist, the supplied path and file must be read/writable by the
+user running spamassassin or spamd.
-Example:
+For MySQL/MariaDB or PostgreSQL, see sql-directory for database table
+creation clauses.
+
+You will need to have the proper DBI module for your database. For example
+DBD::SQLite, DBD::mysql, DBD::MariaDB or DBD::Pg.
-url_shortener_cache_dsn dbi:SQLite:dbname=/tmp/DecodeShortURLs.sq3
+Minimum required SQLite version is 3.24.0 (available from DBD::SQLite 1.59).
+
+Examples:
+url_shortener_cache_dsn dbi:SQLite:dbname=/var/lib/spamassassin/DecodeShortURLs.db
=back
@@ -219,7 +221,8 @@ url_shortener_cache_dsn dbi:SQLite:dbnam
=item url_shortener_cache_username (default: none)
-The username that should be used to connect to the database.
+The username that should be used to connect to the database. Not used for
+SQLite.
=back
@@ -236,7 +239,8 @@ The username that should be used to conn
=item url_shortener_cache_password (default: none)
-The password that should be used to connect to the database.
+The password that should be used to connect to the database. Not used for
+SQLite.
=back
@@ -256,12 +260,7 @@ The password that should be used to conn
The length of time a cache entry will be valid for in seconds.
Default is 86400 (1 day).
-NOTE: you will also need to run the following via cron to actually remove the
-records from the database:
-
-echo "DELETE FROM short_url_cache WHERE modified < NOW() - C<ttl>; | sqlite3 /path/to/database"
-
-NOTE: replace C<ttl> above with the same value you use for this option
+See C<url_shortener_cache_autoclean> for database cleaning.
=back
@@ -276,6 +275,28 @@ NOTE: replace C<ttl> above with the same
=over 4
+=item url_shortener_cache_autoclean (default: 1000)
+
+Automatically purge old entries from database. Value describes a random run
+chance of 1/x. The default value of 1000 means that cleaning is run
+approximately once for every 1000 messages processed. Value of 1 would mean
+database is cleaned every time a message is processed.
+
+Set 0 to disable automatic cleaning and to do it manually.
+
+=back
+
+=cut
+
+ push (@cmds, {
+ setting => 'url_shortener_cache_autoclean',
+ is_admin => 1,
+ default => 1000,
+ type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
+ });
+
+=over 4
+
=item url_shortener_loginfo (default: 0 (off))
If this option is enabled (set to 1), then short URLs and the decoded URLs will be logged with info priority.
@@ -312,37 +333,156 @@ The max depth of short urls that will be
}
sub initialise_url_shortener_cache {
- my($self, $opts) = @_;
+ my ($self, $conf) = @_;
- if($self->{url_shortener_cache_type} eq "dbi" && defined $self->{url_shortener_cache_dsn} && HAS_DBI) {
- $self->{url_shortener_dbi_cache} = $self->{url_shortener_cache_dsn};
- return _connect_dbi_cache($self, $opts);
- } else {
- warn "Wrong cache type selected";
+ return if $self->{dbh};
+ return if !$conf->{url_shortener_cache_type};
+
+ if (!$conf->{url_shortener_cache_dsn}) {
+ warn "DecodeShortURLs: invalid cache configuration\n";
return;
}
-}
-sub _connect_dbi_cache {
- my($self, $opts) = @_;
-
- # Initialise cache if enabled
- if ($self->{url_shortener_dbi_cache} && HAS_DBI) {
+ ##
+ ## SQLite
+ ##
+ if ($conf->{url_shortener_cache_type} =~ /^(?:dbi|sqlite)$/i
+ && $conf->{url_shortener_cache_dsn} =~ /^dbi:SQLite/)
+ {
eval {
local $SIG{'__DIE__'};
+ require DBI;
+ require DBD::SQLite;
+ DBD::SQLite->VERSION(1.59_01); # Required for ON CONFLICT
$self->{dbh} = DBI->connect_cached(
- $self->{url_shortener_cache_dsn},
- $self->{url_shortener_cache_username},
- $self->{url_shortener_cache_password},
- {RaiseError => 1, PrintError => 0, InactiveDestroy => 1}
- ) or die $!;
+ $conf->{url_shortener_cache_dsn}, '', '',
+ {RaiseError => 1, PrintError => 0, InactiveDestroy => 1, AutoCommit => 1}
+ );
+ $self->{dbh}->do("
+ CREATE TABLE IF NOT EXISTS short_url_cache (
+ short_url TEXT PRIMARY KEY NOT NULL,
+ decoded_url TEXT NOT NULL,
+ hits INTEGER NOT NULL DEFAULT 1,
+ created INTEGER NOT NULL DEFAULT (strftime('%s','now')),
+ modified INTEGER NOT NULL DEFAULT (strftime('%s','now'))
+ )
+ ");
+ # Maintaining index for cleaning is likely more expensive than occasional full table scan
+ #$self->{dbh}->do("
+ # CREATE INDEX IF NOT EXISTS short_url_modified
+ # ON short_url_cache(modified)
+ #");
+ $self->{sth_insert} = $self->{dbh}->prepare("
+ INSERT INTO short_url_cache (short_url, decoded_url, modified)
+ VALUES (?,?,strftime('%s','now'))
+ ON CONFLICT(short_url) DO UPDATE
+ SET decoded_url = excluded.decoded_url,
+ modified = excluded.modified,
+ hits = hits + 1
+ ");
+ $self->{sth_select} = $self->{dbh}->prepare("
+ SELECT decoded_url FROM short_url_cache
+ WHERE short_url = ? AND modified >= strftime('%s','now') - $conf->{url_shortener_cache_ttl}
+ ");
+ $self->{sth_delete} = $self->{dbh}->prepare("
+ DELETE FROM short_url_cache
+ WHERE modified < strftime('%s','now') - $conf->{url_shortener_cache_ttl}
+ ");
};
if ($@) {
- dbg("warn: $@");
- } else {
- $self->{caching} = 1;
+ warn "DecodeShortURLs: cache connect failed: $@\n";
+ undef $self->{dbh};
+ undef $self->{sth_insert};
+ undef $self->{sth_select};
+ undef $self->{sth_delete};
+ }
+ }
+ ##
+ ## MySQL/MariaDB
+ ##
+ elsif (lc $conf->{url_shortener_cache_type} eq 'dbi'
+ && $conf->{url_shortener_cache_dsn} =~ /^dbi:(?:mysql|MariaDB)/)
+ {
+ eval {
+ local $SIG{'__DIE__'};
+ require DBI;
+ $self->{dbh} = DBI->connect_cached(
+ $conf->{url_shortener_cache_dsn},
+ $conf->{url_shortener_cache_username},
+ $conf->{url_shortener_cache_password},
+ {RaiseError => 1, PrintError => 0, InactiveDestroy => 1, AutoCommit => 1}
+ );
+ $self->{sth_insert} = $self->{dbh}->prepare("
+ INSERT INTO short_url_cache (short_url, decoded_url, modified)
+ VALUES (?,?,NOW())
+ ON DUPLICATE KEY UPDATE
+ decoded_url = VALUES(decoded_url),
+ modified = VALUES(modified),
+ hits = hits + 1
+ ");
+ $self->{sth_select} = $self->{dbh}->prepare("
+ SELECT decoded_url FROM short_url_cache
+ WHERE short_url = ? AND modified >= DATE_SUB(NOW(), INTERVAL $conf->{url_shortener_cache_ttl} SECOND)
+ ");
+ $self->{sth_delete} = $self->{dbh}->prepare("
+ DELETE FROM short_url_cache
+ WHERE modified < DATE_SUB(NOW(), INTERVAL $conf->{url_shortener_cache_ttl} SECOND)
+ ");
+ };
+ if ($@) {
+ warn "DecodeShortURLs: cache connect failed: $@\n";
+ undef $self->{dbh};
+ undef $self->{sth_insert};
+ undef $self->{sth_select};
+ undef $self->{sth_delete};
}
}
+ ##
+ ## PostgreSQL
+ ##
+ elsif (lc $conf->{url_shortener_cache_type} eq 'dbi'
+ && $conf->{url_shortener_cache_dsn} =~ /^dbi:Pg/)
+ {
+ eval {
+ local $SIG{'__DIE__'};
+ require DBI;
+ $self->{dbh} = DBI->connect_cached(
+ $conf->{url_shortener_cache_dsn},
+ $conf->{url_shortener_cache_username},
+ $conf->{url_shortener_cache_password},
+ {RaiseError => 1, PrintError => 0, InactiveDestroy => 1, AutoCommit => 1}
+ );
+ $self->{sth_insert} = $self->{dbh}->prepare("
+ INSERT INTO short_url_cache (short_url, decoded_url, modified)
+ VALUES (?,?,NOW())
+ ON CONFLICT (short_url) DO UPDATE SET
+ decoded_url = EXCLUDED.decoded_url,
+ modified = EXCLUDED.modified,
+ hits = short_url_cache.hits + 1
+ ");
+ $self->{sth_select} = $self->{dbh}->prepare("
+ SELECT decoded_url FROM short_url_cache
+ WHERE short_url = ? AND modified >= NOW() - INTERVAL '$conf->{url_shortener_cache_ttl} SECONDS'
+ ");
+ $self->{sth_delete} = $self->{dbh}->prepare("
+ DELETE FROM short_url_cache
+ WHERE modified < NOW() - INTERVAL '$conf->{url_shortener_cache_ttl} SECONDS'
+ ");
+ };
+ if ($@) {
+ warn "DecodeShortURLs: cache connect failed: $@\n";
+ undef $self->{dbh};
+ undef $self->{sth_insert};
+ undef $self->{sth_select};
+ undef $self->{sth_delete};
+ }
+ ##
+ ## ...
+ ##
+ } else {
+ warn "DecodeShortURLs: invalid cache configuration\n";
+ return;
+ }
}
sub short_url {
@@ -381,239 +521,201 @@ sub short_url_loop {
return $pms->{short_url_loop};
}
+sub _check_shortener_uri {
+ my ($uri, $conf) = @_;
+
+ return 0 unless $uri =~ m{^
+ https?:// # Only http
+ (?:[^\@/?#]*\@)? # Ignore user:pass@
+ ([^/?#:]+) # (Capture hostname)
+ (?::\d+)? # Possible port
+ .*?\w # Some path wanted
+ }ix;
+ my $host = lc $1;
+ if (exists $conf->{url_shorteners}->{$host}) {
+ return 1;
+ }
+ # if domain is a 3rd level domain check if there is a url shortener
+ # on the 2nd level tld
+ elsif ($host =~ /^(?!www)[^.]+(\.[^.]+\.[^.]+)$/i &&
+ exists $conf->{url_shorteners}->{$1}) {
+ return 1;
+ }
+ return 0;
+}
+
sub check_dnsbl {
my ($self, $opts) = @_;
- my $pms = $opts->{permsgstatus};
- my $msg = $opts->{msg};
return if $self->{disabled};
- # don't keep dereferencing these
- $self->{url_shorteners} = $pms->{main}->{conf}->{url_shorteners};
- if(defined $pms->{main}->{conf}->{url_shortener_cache_type}) {
- $self->{url_shortener_cache_type} = $pms->{main}->{conf}->{url_shortener_cache_type};
- $self->{url_shortener_cache_dsn} = $pms->{main}->{conf}->{url_shortener_cache_dsn};
- $self->{url_shortener_cache_username} = $pms->{main}->{conf}->{url_shortener_cache_username};
- $self->{url_shortener_cache_password} = $pms->{main}->{conf}->{url_shortener_cache_password};
- $self->{url_shortener_cache_ttl} = $pms->{main}->{conf}->{url_shortener_cache_ttl};
- }
- $self->{url_shortener_loginfo} = $pms->{main}->{conf}->{url_shortener_loginfo};
+ my $pms = $opts->{permsgstatus};
+ my $conf = $pms->{conf};
# Sort short URLs into hash to de-dup them
my %short_urls;
my $uris = $pms->get_uri_detail_list();
- my $tldsRE = $self->{main}->{registryboundaries}->{valid_tlds_re};
while (my($uri, $info) = each %{$uris}) {
- next unless ($info->{domains});
- foreach ( keys %{ $info->{domains} } ) {
- if (exists $self->{url_shorteners}->{lc $_}) {
- # NOTE: $info->{domains} appears to contain all the domains parsed
- # from the single input URI with no way to work out what the base
- # domain is. So to prevent someone from stuffing the URI with a
- # shortener to force this plug-in to follow a link that *isn't* on
- # the list of shorteners; we enforce that the shortener must be the
- # base URI and that a path must be present.
- if ($uri !~ /^https?:\/\/(?:www\.)?$_\/.+$/i) {
- dbg("Discarding URI: $uri");
- next;
- }
- $short_urls{$uri} = 1;
- next;
- } elsif(/^(?!www)[a-z\d._-]{0,251}\.([a-z\d._-]{0,251}\.${tldsRE})/) {
- # if domain is a 3rd level domain check if there is a url shortener
- # on the 2nd level tld
- my $dom = '.' . $1;
- if (exists $self->{url_shorteners}->{$dom}) {
- if ($uri !~ /^https?:\/\/(?:www\.)?$_\/.+$/i) {
- dbg("Discarding URI: $uri");
- next;
- }
- $short_urls{$uri} = 1;
- next;
- }
- }
+ next unless $info->{domains} && $info->{cleaned};
+ if (_check_shortener_uri($uri, $conf)) {
+ $short_urls{$uri} = 1;
}
}
# Make sure we have some work to do
# Before we open any log files etc.
- my $count = scalar keys %short_urls;
- return unless $count gt 0;
+ return unless %short_urls;
+
+ # Initialize cache
+ $self->initialise_url_shortener_cache($conf);
- $self->initialise_url_shortener_cache($opts) if defined $self->{url_shortener_cache_type};
+ # Initialize LWP
+ my $ua = LWP::UserAgent->new();
+ $ua->{max_redirect} = 0;
+ $ua->{timeout} = 5;
+ $ua->env_proxy;
- my $max_short_urls = $pms->{main}->{conf}->{max_short_urls};
+ # Launch HTTP queries
+ my $lookups = 0;
foreach my $short_url (keys %short_urls) {
- next if $max_short_urls <= 0;
- my $location = $self->recursive_lookup($short_url, $pms);
- $max_short_urls--;
+ $self->recursive_lookup($short_url, $pms, $ua);
+ last if ++$lookups >= $conf->{max_short_urls};
+ }
+
+ # Automatically purge old entries
+ if ($self->{dbh} && $conf->{url_shortener_cache_autoclean}
+ && rand() < 1/$conf->{url_shortener_cache_autoclean})
+ {
+ dbg("cleaning stale cache entries");
+ eval { $self->{sth_delete}->execute(); };
+ if ($@) { dbg("cache cleaning failed: $@"); }
}
}
sub recursive_lookup {
- my ($self, $short_url, $pms, %been_here) = @_;
+ my ($self, $short_url, $pms, $ua, %been_here) = @_;
+ my $conf = $pms->{conf};
my $count = scalar keys %been_here;
- dbg("Redirection count $count") if $count gt 0;
+ dbg("redirection count $count") if $count;
if ($count >= 10) {
- dbg("Error: more than 10 shortener redirections");
+ dbg("found more than 10 shortener redirections");
# Fire test
- $self->{short_url_maxchain} = 1;
+ $pms->{short_url_maxchain} = 1;
return;
}
my $location;
- if ($self->{caching} && ($location = $self->cache_get($short_url))) {
- if ($self->{url_shortener_loginfo}) {
- info("Found cached $short_url => $location");
+ if (defined($location = $self->cache_get($short_url))) {
+ if ($conf->{url_shortener_loginfo}) {
+ info("found cached $short_url => $location");
} else {
- dbg("Found cached $short_url => $location");
+ dbg("found cached $short_url => $location");
}
} else {
# Not cached; do lookup
- if($count eq 0) {
- undef $pms->{short_url_200};
- undef $pms->{short_url_404};
- undef $pms->{short_url_chained};
- }
- my $response = $self->{ua}->head($short_url);
+ my $response = $ua->head($short_url);
if (!$response->is_redirect) {
dbg("URL is not redirect: $short_url = ".$response->status_line);
- $pms->{short_url_200} = 1 if($response->code == '200');
- $pms->{short_url_404} = 1 if($response->code == '404');
+ if ($response->code eq '200') {
+ $pms->{short_url_200} = 1;
+ } elsif ($response->code eq '404') {
+ $pms->{short_url_404} = 1;
+ }
return;
}
$location = $response->headers->{location};
- # Bail out if $short_url redirects to itself
- return if ($short_url eq $location);
- if ($self->{caching}) {
- if ($self->cache_add($short_url, $location)) {
- dbg("Added $short_url to cache");
- } else {
- dbg("Cannot add $short_url to cache");
- }
- }
- if($self->{url_shortener_loginfo}) {
- info("Found $short_url => $location");
+ if ($self->{url_shortener_loginfo}) {
+ info("found $short_url => $location");
} else {
- dbg("Found $short_url => $location");
+ dbg("found $short_url => $location");
}
}
+ # Update cache
+ $self->cache_add($short_url, $location);
+
+ # Bail out if $short_url redirects to itself
+ if ($short_url eq $location) {
+ dbg("URL is redirect to itself");
+ return;
+ }
+
# At this point we have a new URL in $response
$pms->{short_url} = 1;
# Set chained here otherwise we might mark a disabled page or
# redirect back to the same host as chaining incorrectly.
- $pms->{short_url_chained} = 1 if $count > 0;
-
- $pms->add_uri_detail_list($location);
+ $pms->{short_url_chained} = 1 if $count;
# Check if we are being redirected to a local page
# Don't recurse in this case...
- if($location !~ /^https?:/) {
- my($host) = ($short_url =~ /^(https?:\/\/\S+)\//);
- $location = "$host/$location";
- dbg("Looks like a local redirection: $short_url => $location");
- $pms->add_uri_detail_list($location);
- return $location;
- }
-
- # Check for recursion
- if ((my ($domain) = ($location =~ /^https?:\/\/(\S+)\//))) {
- if (exists $been_here{$location}) {
- # Loop detected
- dbg("Error: loop detected");
- $self->{short_url_loop} = 1;
- return $location;
+ if ($location !~ m{^[a-z]+://}i) {
+ my $orig_location = $location;
+ my $orig_short_url = $short_url;
+ # Strip to..
+ if (index($location, '/') == 0) {
+ $short_url =~ s{^([a-z]+://.*?)[/?#].*}{$1}; # ..absolute path
} else {
- my $tldsRE = $self->{main}->{registryboundaries}->{valid_tlds_re};
- if (exists $self->{url_shorteners}->{$domain}) {
- $been_here{$location} = 1;
- # Recurse...
- return $self->recursive_lookup($location, $pms, %been_here);
- } elsif($domain =~ /^(?!www)[a-z\d._-]{0,251}\.([a-z\d._-]{0,251}\.${tldsRE})/) {
- # if domain is a 3rd level domain check if there is a url shortener
- # on the 2nd level tld
- my $dom = '.' . $1;
- if (exists $self->{url_shorteners}->{$dom}) {
- $been_here{$location} = 1;
- # Recurse...
- return $self->recursive_lookup($location, $pms, %been_here);
- }
- }
+ $short_url =~ s{^([a-z]+://.*)/}{$1}; # ..relative path
}
+ $location = "$short_url/$location";
+ dbg("looks like a local redirection: $orig_short_url => $location ($orig_location)");
+ $pms->add_uri_detail_list($location) if !$pms->{uri_detail_list}->{$location};
+ return;
}
- # No recursion; just return the final location...
- return $location;
+ if (exists $been_here{$location}) {
+ # Loop detected
+ dbg("error: loop detected: $location");
+ $pms->{short_url_loop} = 1;
+ return;
+ }
+ $been_here{$location} = 1;
+ $pms->add_uri_detail_list($location) if !$pms->{uri_detail_list}->{$location};
+
+ # Check for recursion
+ if (_check_shortener_uri($location, $conf)) {
+ # Recurse...
+ $self->recursive_lookup($location, $pms, $ua, %been_here);
+ }
}
sub cache_add {
my ($self, $short_url, $decoded_url) = @_;
- return 0 if not $self->{caching};
- return 0 if((length($short_url) > 256) or (length($decoded_url) > 512));
+ return if !$self->{dbh};
+ return if length($short_url) > 256 || length($decoded_url) > 512;
- eval {
- $self->{sth_insert} = $self->{dbh}->prepare_cached("
- INSERT INTO short_url_cache (short_url, decoded_url, created, modified)
- VALUES (?,?,?,?)
- ");
- };
+ eval { $self->{sth_insert}->execute($short_url, $decoded_url); };
if ($@) {
- dbg("warn: $@");
- return 0;
- };
+ dbg("could not add to cache: $@");
+ }
- $self->{sth_insert}->execute($short_url, $decoded_url, time(), time());
- return 1;
+ return;
}
sub cache_get {
my ($self, $key) = @_;
- return if not $self->{caching};
- eval {
- $self->{sth_select} = $self->{dbh}->prepare_cached("
- SELECT decoded_url FROM short_url_cache
- WHERE short_url = ? AND modified > ?
- ");
- };
- if ($@) {
- dbg("warn: $@");
- return;
- }
+ return if !$self->{dbh};
- eval {
- $self->{sth_update} = $self->{dbh}->prepare_cached("
- UPDATE short_url_cache
- SET modified=?, hits=hits+1
- WHERE short_url = ?
- ");
- };
+ eval { $self->{sth_select}->execute($key); };
if ($@) {
- dbg("warn: $@");
- return;
+ dbg("cache get failed: $@");
+ return;
}
- my $tcheck = time() - $self->{url_shortener_cache_ttl};
- $self->{sth_select}->execute($key, $tcheck);
- my $row = $self->{sth_select}->fetchrow_array();
- if($row) {
- # Found cache entry; touch it to prevent expiry
- $self->{sth_update}->execute(time(),$key);
- $self->{sth_select}->finish();
- $self->{sth_update}->finish();
- return $row;
+ my @row = $self->{sth_select}->fetchrow_array();
+ if (@row) {
+ return $row[0];
}
- $self->{sth_select}->finish();
- $self->{sth_update}->finish();
return;
}
# Version features
sub has_short_url { 1 }
+sub has_autoclean { 1 }
1;
Modified: spamassassin/trunk/sql/decodeshorturl_mysql.sql
URL: http://svn.apache.org/viewvc/spamassassin/trunk/sql/decodeshorturl_mysql.sql?rev=1900462&r1=1900461&r2=1900462&view=diff
==============================================================================
--- spamassassin/trunk/sql/decodeshorturl_mysql.sql (original)
+++ spamassassin/trunk/sql/decodeshorturl_mysql.sql Sun May 1 17:51:21 2022
@@ -1,10 +1,10 @@
CREATE TABLE `short_url_cache`
-( `short_url` VARCHAR(256) NOT NULL ,
- `decoded_url` VARCHAR(512) NOT NULL ,
- `hits` MEDIUMINT NOT NULL DEFAULT 1,
- `created` INT(11) NOT NULL ,
- `modified` INT(11) NOT NULL ,
+( `short_url` VARCHAR(256) NOT NULL,
+ `decoded_url` VARCHAR(512) NOT NULL,
+ `hits` INT NOT NULL DEFAULT 1,
+ `created` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ `modified` TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`short_url`)
) ENGINE = InnoDB;
-ALTER TABLE `short_url_cache` ADD INDEX `short_url_by_modified` (`short_url`, `modified`);
-ALTER TABLE `short_url_cache` ADD INDEX `short_url_modified` (`modified`);
+-- Maintaining index for cleaning is likely more expensive than occasional full table scan
+-- ALTER TABLE `short_url_cache` ADD INDEX `short_url_modified` (`modified`);
Added: spamassassin/trunk/sql/decodeshorturl_pg.sql
URL: http://svn.apache.org/viewvc/spamassassin/trunk/sql/decodeshorturl_pg.sql?rev=1900462&view=auto
==============================================================================
--- spamassassin/trunk/sql/decodeshorturl_pg.sql (added)
+++ spamassassin/trunk/sql/decodeshorturl_pg.sql Sun May 1 17:51:21 2022
@@ -0,0 +1,10 @@
+CREATE TABLE short_url_cache (
+ short_url VARCHAR(256) NOT NULL,
+ decoded_url VARCHAR(512) NOT NULL,
+ hits INT NOT NULL DEFAULT 1,
+ created TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ modified TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ PRIMARY KEY (short_url)
+);
+-- Maintaining index for cleaning is likely more expensive than occasional full table scan
+-- ALTER TABLE short_url_cache ADD INDEX short_url_modified (modified);
Modified: spamassassin/trunk/sql/decodeshorturl_sqlite.sql
URL: http://svn.apache.org/viewvc/spamassassin/trunk/sql/decodeshorturl_sqlite.sql?rev=1900462&r1=1900461&r2=1900462&view=diff
==============================================================================
--- spamassassin/trunk/sql/decodeshorturl_sqlite.sql (original)
+++ spamassassin/trunk/sql/decodeshorturl_sqlite.sql Sun May 1 17:51:21 2022
@@ -1,11 +1,2 @@
-CREATE TABLE short_url_cache (
- short_url TEXT PRIMARY KEY NOT NULL,
- decoded_url TEXT NOT NULL,
- hits INTEGER NOT NULL DEFAULT 1,
- created INTEGER NOT NULL ,
- modified INTEGER NOT NULL
-);
-CREATE INDEX short_url_by_modified
- ON short_url_cache(short_url, modified);
-CREATE INDEX short_url_modified
- ON short_url_cache(modified);
+-- Manual database creation for SQLite is not necessary,
+-- DecodeShortURLs plugin will create and clean database automatically.
Modified: spamassassin/trunk/t/data/spam/decodeshorturl/base.eml
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/data/spam/decodeshorturl/base.eml?rev=1900462&r1=1900461&r2=1900462&view=diff
==============================================================================
--- spamassassin/trunk/t/data/spam/decodeshorturl/base.eml (original)
+++ spamassassin/trunk/t/data/spam/decodeshorturl/base.eml Sun May 1 17:51:21 2022
@@ -11,3 +11,7 @@ http://bit.ly/30yH6WK
which should link to:
https://spamassassin.apache.org/
+
+should get 404:
+http://tinyurl.com/qqqxxxyyyzzz
+
Modified: spamassassin/trunk/t/decodeshorturl.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/decodeshorturl.t?rev=1900462&r1=1900461&r2=1900462&view=diff
==============================================================================
--- spamassassin/trunk/t/decodeshorturl.t (original)
+++ spamassassin/trunk/t/decodeshorturl.t Sun May 1 17:51:21 2022
@@ -6,7 +6,7 @@ use SATest; sa_t_init("decodeshorturl");
use Test::More;
plan skip_all => "Net tests disabled" unless conf_bool('run_net_tests');
-plan tests => 2;
+plan tests => 3;
tstpre ("
loadplugin Mail::SpamAssassin::Plugin::DecodeShortURLs
@@ -19,21 +19,23 @@ dns_query_restriction allow tinyurl.com
url_shortener bit.ly
url_shortener tinyurl.com
-ifplugin Mail::SpamAssassin::Plugin::DecodeShortURLs
- body HAS_SHORT_URL eval:short_url()
- describe HAS_SHORT_URL Message contains one or more shortened URLs
-
- body SHORT_URL_CHAINED eval:short_url_chained()
- describe SHORT_URL_CHAINED Message has shortened URL chained to other shorteners
-endif
+body HAS_SHORT_URL eval:short_url()
+describe HAS_SHORT_URL Message contains one or more shortened URLs
+
+body SHORT_URL_CHAINED eval:short_url_chained()
+describe SHORT_URL_CHAINED Message has shortened URL chained to other shorteners
+
+body SHORT_URL_404 eval:short_url_404()
+describe SHORT_URL_404 Short URL is invalid
");
%patterns_url = (
- q{ HAS_SHORT_URL } => 'Message contains one or more shortened URLs',
+ q{ 1.0 HAS_SHORT_URL } => 'Message contains one or more shortened URLs',
+ q{ 1.0 HAS_SHORT_404 } => 'Message contains one or more shortened URLs',
);
%patterns_url_chain = (
- q{ SHORT_URL_CHAINED } => 'Message has shortened URL chained to other shorteners',
+ q{ 1.0 SHORT_URL_CHAINED } => 'Message has shortened URL chained to other shorteners',
);
%patterns = %patterns_url;