You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/05/25 09:46:03 UTC

svn commit: r1901227 - in /spamassassin/trunk: lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm t/decodeshorturl.t

Author: hege
Date: Wed May 25 09:46:02 2022
New Revision: 1901227

URL: http://svn.apache.org/viewvc?rev=1901227&view=rev
Log:
- Add short_url_redir() function to check if a valid redirection was found
- short_url() will result in hit as long as url_shortener matching URL was found, no HTTP request required (fixes local tests only or missing LWP module)

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm
    spamassassin/trunk/t/decodeshorturl.t

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm?rev=1901227&r1=1901226&r2=1901227&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm Wed May 25 09:46:02 2022
@@ -29,6 +29,9 @@ DecodeShortURLs - Check for shortened UR
   body HAS_SHORT_URL          eval:short_url()
   describe HAS_SHORT_URL      Message has one or more shortened URLs
 
+  body SHORT_URL_REDIR        eval:short_url_redir()
+  describe SHORT_URL_REDIR    Message has shortened URL that resulted in a valid redirection
+
   body SHORT_URL_CHAINED      eval:short_url_chained()
   describe SHORT_URL_CHAINED  Message has shortened URL chained to other shorteners
 
@@ -100,18 +103,19 @@ sub new {
   my $self = $class->SUPER::new($mailsaobject);
   bless ($self, $class);
 
-  if ($mailsaobject->{local_tests_only} || !HAS_LWP_USERAGENT) {
-    dbg("local tests only, disabling checks");
-    $self->{disabled} = 1;
+  if ($mailsaobject->{local_tests_only}) {
+    dbg("local tests only, disabling HTTP requests");
+    $self->{net_disabled} = 1;
   }
   elsif (!HAS_LWP_USERAGENT) {
-    dbg("module LWP::UserAgent not installed, disabling checks");
-    $self->{disabled} = 1;
+    dbg("module LWP::UserAgent not installed, disabling HTTP requests");
+    $self->{net_disabled} = 1;
   }
 
   $self->set_config($mailsaobject->{conf});
   $self->register_method_priority ('check_dnsbl', -10);
   $self->register_eval_rule('short_url', $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
+  $self->register_eval_rule('short_url_redir', $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
   $self->register_eval_rule('short_url_200', $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
   $self->register_eval_rule('short_url_404', $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
   $self->register_eval_rule('short_url_code', $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
@@ -613,9 +617,20 @@ sub initialise_url_shortener_cache {
 sub short_url {
   my ($self, $pms) = @_;
 
+  # Run checks if check_dnsbl didn't
+  if ($self->{net_disabled}) {
+    $self->_check_short($pms);
+  }
+
   return $pms->{short_url};
 }
 
+sub short_url_redir {
+  my ($self, $pms) = @_;
+
+  return $pms->{short_url_redir};
+}
+
 sub short_url_200 {
   my ($self, $pms) = @_;
 
@@ -690,9 +705,13 @@ sub _check_shortener_uri {
 sub check_dnsbl {
   my ($self, $opts) = @_;
 
-  return if $self->{disabled};
+  $self->_check_short($opts->{permsgstatus});
+}
+
+sub _check_short {
+  my ($self, $pms) = @_;
 
-  my $pms = $opts->{permsgstatus};
+  return if $pms->{short_url_checked}++;
   my $conf = $pms->{conf};
 
   # Sort short URLs into hash to de-dup them
@@ -706,10 +725,15 @@ sub check_dnsbl {
     }
   }
 
-  # Make sure we have some work to do
-  # Before we open any log files etc.
+  # Bail out if no shortener was found
   return unless %short_urls;
 
+  # Mark that a URL shortener was found
+  $pms->{short_url} = 1;
+
+  # Bail out if network lookups not enabled
+  return if $self->{net_disabled};
+
   # Initialize cache
   $self->initialise_url_shortener_cache($conf);
 
@@ -795,8 +819,8 @@ sub recursive_lookup {
     return;
   }
 
-  # At this point we have a new URL in $response
-  $pms->{short_url} = 1;
+  # At this point we have a valid redirection and new URL in $response
+  $pms->{short_url_redir} = 1;
 
   # Set chained here otherwise we might mark a disabled page or
   # redirect back to the same host as chaining incorrectly.
@@ -888,5 +912,9 @@ sub has_get { 1 } # url_shortener_get
 sub has_clear { 1 } # clear_url_shortener
 sub has_timeout { 1 } # url_shortener_timeout
 sub has_max_redirections { 1 } # max_short_url_redirections
+# short_url() will always hit if matching url_shortener was found, even
+# without HTTP requests.  To check if a valid HTTP redirection response was
+# seen, use short_url_redir().
+sub has_short_url_redir { 1 }
 
 1;

Modified: spamassassin/trunk/t/decodeshorturl.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/decodeshorturl.t?rev=1901227&r1=1901226&r2=1901227&view=diff
==============================================================================
--- spamassassin/trunk/t/decodeshorturl.t (original)
+++ spamassassin/trunk/t/decodeshorturl.t Wed May 25 09:46:02 2022
@@ -11,7 +11,7 @@ use constant HAS_DBD_SQLITE => eval { re
 use constant SQLITE => (HAS_DBI && HAS_DBD_SQLITE);
 
 plan skip_all => "Net tests disabled"                unless conf_bool('run_net_tests');
-my $tests = 6;
+my $tests = 8;
 $tests += 4 if (SQLITE);
 plan tests => $tests;
 
@@ -29,17 +29,10 @@ url_shortener .page.link
 url_shortener_get bit.ly
 
 body HAS_SHORT_URL              eval:short_url()
-describe HAS_SHORT_URL          Message contains one or more shortened URLs
-
+body HAS_SHORT_REDIR            eval:short_url_redir()
 body SHORT_URL_CHAINED          eval:short_url_chained()
-describe SHORT_URL_CHAINED      Message has shortened URL chained to other shorteners
-
 body SHORT_URL_404		eval:short_url_404()
-describe SHORT_URL_404		Short URL is invalid
-
 body SHORT_URL_C404		eval:short_url_code('404')
-describe SHORT_URL_C404		Short URL is invalid
-
 uri URI_BITLY_BLOCKED           m,^https://bitly\.com/a/blocked,
 uri URI_PAGE_LINK		m,^http://activity\.wps\.com/,
 });
@@ -50,6 +43,7 @@ uri URI_PAGE_LINK		m,^http://activity\.w
 
 %patterns = (
    q{ 1.0 HAS_SHORT_URL } => '',
+   q{ 1.0 HAS_SHORT_REDIR } => '',
    q{ 1.0 SHORT_URL_404 } => '',
    q{ 1.0 SHORT_URL_C404 } => '',
    q{ 1.0 URI_BITLY_BLOCKED } => '',
@@ -66,6 +60,16 @@ ok_all_patterns();
 
 
 ###
+### short_url() should hit even without network enabled
+###
+
+%patterns = (
+   q{ 1.0 HAS_SHORT_URL } => '',
+);
+sarun ("-t -L < data/spam/decodeshorturl/base.eml", \&patterns_run_cb);
+ok_all_patterns();
+
+###
 ### With SQLITE caching
 ###