You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jh...@apache.org on 2018/07/10 17:45:56 UTC

svn commit: r1835588 - /spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm

Author: jhardin
Date: Tue Jul 10 17:45:56 2018
New Revision: 1835588

URL: http://svn.apache.org/viewvc?rev=1835588&view=rev
Log:
Add capture of URI from HTTP "refresh" meta - observed in phishing spam. See bug 6784.

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm?rev=1835588&r1=1835587&r2=1835588&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm Tue Jul 10 17:45:56 2018
@@ -66,7 +66,7 @@ my %elements_whitespace = map {; $_ => 1
 
 # elements that push URIs
 my %elements_uri = map {; $_ => 1 }
-  qw( body table tr td a area link img frame iframe embed script form base bgsound ),
+  qw( body table tr td a area link img frame iframe embed script form base bgsound meta ),
 ;
 
 # style attribute not accepted
@@ -406,6 +406,17 @@ sub html_uri {
       }
     }
   }
+  elsif ($tag eq "meta" &&
+    exists $attr->{'http-equiv'} &&
+    exists $attr->{content} &&
+    $attr->{'http-equiv'} =~ /refresh/i &&
+    $attr->{content} =~ /\burl\s*=/i)
+  {
+      my $uri = $attr->{content};
+      $uri =~ s/^.*\burl\s*=\s*//i;
+      $uri =~ s/\s*;.*//i;
+      $self->push_uri($tag, $uri);
+  }
 }
 
 # this might not be quite right, may need to pay attention to table nesting
@@ -680,6 +691,8 @@ sub html_tests {
   {
     $self->{charsets} .= exists $self->{charsets} ? " $1" : $1;
   }
+
+  # todo: capture URI from meta refresh tag
 }
 
 sub display_text {