You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2005/03/14 21:49:41 UTC

svn commit: r157460 - in spamassassin/trunk: lib/Mail/SpamAssassin/HTML.pm lib/Mail/SpamAssassin/Message.pm rules/70_testing.cf

Author: felicity
Date: Mon Mar 14 12:49:37 2005
New Revision: 157460

URL: http://svn.apache.org/viewcvs?view=rev&rev=157460
Log:
bug 3661: add more support for <span ...>, display:none, color, and background-color.  fix issues with visible vs invisible rendered text.  add new test rules for invisible span and invisble text.

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
    spamassassin/trunk/rules/70_testing.cf

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm?view=diff&r1=157459&r2=157460
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm Mon Mar 14 12:49:37 2005
@@ -64,7 +64,7 @@
 
 # elements that change text style
 my %elements_text_style = map {; $_ => 1 }
-  qw( body font table tr th td big small basefont marquee ),
+  qw( body font table tr th td big small basefont marquee span ),
 ;
 
 # elements that insert whitespace
@@ -92,6 +92,7 @@
 $ok_attributes{td}{$_} = 1 for qw( bgcolor );
 $ok_attributes{th}{$_} = 1 for qw( bgcolor );
 $ok_attributes{tr}{$_} = 1 for qw( bgcolor );
+$ok_attributes{span}{$_} = 1 for qw( style );
 
 sub new {
   my ($class) = @_;
@@ -140,8 +141,9 @@
       $info->{cleaned} = \@tmp;
       # list out the URLs for debugging ...
       if (Mail::SpamAssassin::dbg_check('uri')) {
+        dbg("uri: html uri found, $uri");
         foreach my $nuri (@tmp) {
-          dbg("uri: uri found, $nuri");
+          dbg("uri: cleaned html uri, $nuri");
         }
       }
     }
@@ -250,7 +252,6 @@
   $self->{closed_body} = 0;
   $self->{closed_extra} = 0;
   $self->{text} = [];		# rendered text
-  $self->{text_invisible} = '';	# vec of invisibility state in $self->{text}
 
   $self->{length} += $1 if (length($text) =~ m/^(\d+)$/);	# untaint
 
@@ -449,7 +450,6 @@
   }
 }
 
-# body, font, table, tr, th, td, big, small
 sub text_style {
   my ($self, $tag, $attr, $num) = @_;
 
@@ -504,6 +504,39 @@
 	# relative font size
 	$new{size} = $self->{basefont} + $1;
       }
+      elsif ($tag eq "span" && $name eq "style") {
+        my $style = $new{style} = $attr->{style};
+	my @parts = split(/;/, $style);
+	foreach (@parts) {
+	  if (/\s*(background-)?color:\s*([^;]+)\s*/i) {
+	    my $whcolor = $1 ? 'bgcolor' : 'fgcolor';
+	    my $value = lc $2;
+
+	    if ($value =~ /rgb/) {
+	      $value =~ tr/0-9,//cd;
+	      my @rgb = split(/,/, $value);
+	      splice @rgb, 3;
+	      for(my $i=0; $i<3; $i++) {
+	        if (!defined $rgb[$i]) {
+	          $_ = 0;
+	        }
+	        elsif ($rgb[$i] > 255) {
+	          $rgb[$i] = 255;
+                }
+	      }
+
+              $new{$whcolor} = sprintf("#%02x%02x%02x", @rgb);
+            }
+	    else {
+	      $new{$whcolor} = name_to_rgb($value);
+	    }
+	  }
+	  elsif (/\s*display:\s*none\b/i) {
+	    $new{display} = 'none';
+            $self->put_results(span_invisible => 1);
+          }
+	}
+      }
       else {
 	if ($name eq "bgcolor") {
 	  # overwrite with hex value, $new{bgcolor} is set below
@@ -540,6 +573,7 @@
 
   my $fg = $self->{text_style}[-1]->{fgcolor};
   my $bg = $self->{text_style}[-1]->{bgcolor};
+  my $display = $self->{text_style}[-1]->{display};
 
   # invisibility
   if (substr($fg,-6) eq substr($bg,-6)) {
@@ -572,6 +606,11 @@
     }
   }
 
+  # <span style="display: none">
+  if ($display && lc $display eq 'none') {
+    return 1;
+  }
+
   return 0;
 }
 
@@ -699,6 +738,11 @@
   my $text = shift;
   my %display = @_;
 
+  # Unless it's specified to be invisible, then it's not invisible. ;)
+  if (!exists $display{invisible}) {
+    $display{invisible} = 0;
+  }
+
   if ($display{whitespace}) {
     # trim trailing whitespace from previous element if it was not whitespace
     if (@{ $self->{text} } &&
@@ -788,6 +832,7 @@
 
   if ($invisible_for_bayes) {
     $self->display_text($text, invisible => 1);
+    $self->put_results(invisible_text => 1);
   }
   else {
     $self->display_text($text);

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm?view=diff&r1=157459&r2=157460
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Mon Mar 14 12:49:37 2005
@@ -794,6 +794,10 @@
   my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1);
   return $self->{text_rendered} unless @parts;
 
+  # the html metadata may have already been set, so let's not bother if it's
+  # already been done.
+  my $html_needs_setting = !exists $self->{metadata}->{html};
+
   # Go through each part
   my $text = $self->get_header ('subject') || '';
   for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) {
@@ -808,8 +812,12 @@
       $text .= $rnd;
 
       # TVD - if there are multiple parts, what should we do?
-      # right now, just use the last one ...
-      $self->{metadata}->{html} = $p->{html_results} if ( $type eq 'text/html' );
+      # right now, just use the last one.  we may need to give some priority
+      # at some point, ie: use text/html rendered if it exists, or
+      # text/plain rendered as html otherwise.
+      if ($html_needs_setting && $type eq 'text/html') {
+        $self->{metadata}->{html} = $p->{html_results};
+      }
     }
     else {
       $text .= $p->decode();
@@ -846,6 +854,10 @@
   my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1);
   return $self->{text_visible_rendered} unless @parts;
 
+  # the html metadata may have already been set, so let's not bother if it's
+  # already been done.
+  my $html_needs_setting = !exists $self->{metadata}->{html};
+
   # Go through each part
   my $text = $self->get_header ('subject') || '';
   for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) {
@@ -858,6 +870,14 @@
     if ( defined $rnd ) {
       # Only text/* types are rendered ...
       $text .= $rnd;
+
+      # TVD - if there are multiple parts, what should we do?
+      # right now, just use the last one.  we may need to give some priority
+      # at some point, ie: use text/html rendered if it exists, or
+      # text/plain rendered as html otherwise.
+      if ($html_needs_setting && $type eq 'text/html') {
+        $self->{metadata}->{html} = $p->{html_results};
+      }
     }
     else {
       $text .= $p->decode();
@@ -888,6 +908,10 @@
   my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1);
   return $self->{text_invisible_rendered} unless @parts;
 
+  # the html metadata may have already been set, so let's not bother if it's
+  # already been done.
+  my $html_needs_setting = !exists $self->{metadata}->{html};
+
   # Go through each part
   my $text = '';
   for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) {
@@ -900,6 +924,14 @@
     if ( defined $rnd ) {
       # Only text/* types are rendered ...
       $text .= $rnd;
+
+      # TVD - if there are multiple parts, what should we do?
+      # right now, just use the last one.  we may need to give some priority
+      # at some point, ie: use text/html rendered if it exists, or
+      # text/plain rendered as html otherwise.
+      if ($html_needs_setting && $type eq 'text/html') {
+        $self->{metadata}->{html} = $p->{html_results};
+      }
     }
   }
 

Modified: spamassassin/trunk/rules/70_testing.cf
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/70_testing.cf?view=diff&r1=157459&r2=157460
==============================================================================
--- spamassassin/trunk/rules/70_testing.cf (original)
+++ spamassassin/trunk/rules/70_testing.cf Mon Mar 14 12:49:37 2005
@@ -880,3 +880,6 @@
 # bug 2733
 body T_HTML_WEB_BUGS               eval:html_test('web_bugs_2')
 
+# bug 3661
+body T_HTML_INVIS_SPAN	eval:html_test('span_invisible')
+body T_HTML_INVIS_TEXT	eval:html_test('invisible_text')