You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2005/03/14 21:49:41 UTC
svn commit: r157460 - in spamassassin/trunk: lib/Mail/SpamAssassin/HTML.pm
lib/Mail/SpamAssassin/Message.pm rules/70_testing.cf
Author: felicity
Date: Mon Mar 14 12:49:37 2005
New Revision: 157460
URL: http://svn.apache.org/viewcvs?view=rev&rev=157460
Log:
bug 3661: add more support for <span ...>, display:none, color, and background-color. fix issues with visible vs invisible rendered text. add new test rules for invisible span and invisble text.
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
spamassassin/trunk/rules/70_testing.cf
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm?view=diff&r1=157459&r2=157460
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm Mon Mar 14 12:49:37 2005
@@ -64,7 +64,7 @@
# elements that change text style
my %elements_text_style = map {; $_ => 1 }
- qw( body font table tr th td big small basefont marquee ),
+ qw( body font table tr th td big small basefont marquee span ),
;
# elements that insert whitespace
@@ -92,6 +92,7 @@
$ok_attributes{td}{$_} = 1 for qw( bgcolor );
$ok_attributes{th}{$_} = 1 for qw( bgcolor );
$ok_attributes{tr}{$_} = 1 for qw( bgcolor );
+$ok_attributes{span}{$_} = 1 for qw( style );
sub new {
my ($class) = @_;
@@ -140,8 +141,9 @@
$info->{cleaned} = \@tmp;
# list out the URLs for debugging ...
if (Mail::SpamAssassin::dbg_check('uri')) {
+ dbg("uri: html uri found, $uri");
foreach my $nuri (@tmp) {
- dbg("uri: uri found, $nuri");
+ dbg("uri: cleaned html uri, $nuri");
}
}
}
@@ -250,7 +252,6 @@
$self->{closed_body} = 0;
$self->{closed_extra} = 0;
$self->{text} = []; # rendered text
- $self->{text_invisible} = ''; # vec of invisibility state in $self->{text}
$self->{length} += $1 if (length($text) =~ m/^(\d+)$/); # untaint
@@ -449,7 +450,6 @@
}
}
-# body, font, table, tr, th, td, big, small
sub text_style {
my ($self, $tag, $attr, $num) = @_;
@@ -504,6 +504,39 @@
# relative font size
$new{size} = $self->{basefont} + $1;
}
+ elsif ($tag eq "span" && $name eq "style") {
+ my $style = $new{style} = $attr->{style};
+ my @parts = split(/;/, $style);
+ foreach (@parts) {
+ if (/\s*(background-)?color:\s*([^;]+)\s*/i) {
+ my $whcolor = $1 ? 'bgcolor' : 'fgcolor';
+ my $value = lc $2;
+
+ if ($value =~ /rgb/) {
+ $value =~ tr/0-9,//cd;
+ my @rgb = split(/,/, $value);
+ splice @rgb, 3;
+ for(my $i=0; $i<3; $i++) {
+ if (!defined $rgb[$i]) {
+ $_ = 0;
+ }
+ elsif ($rgb[$i] > 255) {
+ $rgb[$i] = 255;
+ }
+ }
+
+ $new{$whcolor} = sprintf("#%02x%02x%02x", @rgb);
+ }
+ else {
+ $new{$whcolor} = name_to_rgb($value);
+ }
+ }
+ elsif (/\s*display:\s*none\b/i) {
+ $new{display} = 'none';
+ $self->put_results(span_invisible => 1);
+ }
+ }
+ }
else {
if ($name eq "bgcolor") {
# overwrite with hex value, $new{bgcolor} is set below
@@ -540,6 +573,7 @@
my $fg = $self->{text_style}[-1]->{fgcolor};
my $bg = $self->{text_style}[-1]->{bgcolor};
+ my $display = $self->{text_style}[-1]->{display};
# invisibility
if (substr($fg,-6) eq substr($bg,-6)) {
@@ -572,6 +606,11 @@
}
}
+ # <span style="display: none">
+ if ($display && lc $display eq 'none') {
+ return 1;
+ }
+
return 0;
}
@@ -699,6 +738,11 @@
my $text = shift;
my %display = @_;
+ # Unless it's specified to be invisible, then it's not invisible. ;)
+ if (!exists $display{invisible}) {
+ $display{invisible} = 0;
+ }
+
if ($display{whitespace}) {
# trim trailing whitespace from previous element if it was not whitespace
if (@{ $self->{text} } &&
@@ -788,6 +832,7 @@
if ($invisible_for_bayes) {
$self->display_text($text, invisible => 1);
+ $self->put_results(invisible_text => 1);
}
else {
$self->display_text($text);
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm?view=diff&r1=157459&r2=157460
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Mon Mar 14 12:49:37 2005
@@ -794,6 +794,10 @@
my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1);
return $self->{text_rendered} unless @parts;
+ # the html metadata may have already been set, so let's not bother if it's
+ # already been done.
+ my $html_needs_setting = !exists $self->{metadata}->{html};
+
# Go through each part
my $text = $self->get_header ('subject') || '';
for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) {
@@ -808,8 +812,12 @@
$text .= $rnd;
# TVD - if there are multiple parts, what should we do?
- # right now, just use the last one ...
- $self->{metadata}->{html} = $p->{html_results} if ( $type eq 'text/html' );
+ # right now, just use the last one. we may need to give some priority
+ # at some point, ie: use text/html rendered if it exists, or
+ # text/plain rendered as html otherwise.
+ if ($html_needs_setting && $type eq 'text/html') {
+ $self->{metadata}->{html} = $p->{html_results};
+ }
}
else {
$text .= $p->decode();
@@ -846,6 +854,10 @@
my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1);
return $self->{text_visible_rendered} unless @parts;
+ # the html metadata may have already been set, so let's not bother if it's
+ # already been done.
+ my $html_needs_setting = !exists $self->{metadata}->{html};
+
# Go through each part
my $text = $self->get_header ('subject') || '';
for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) {
@@ -858,6 +870,14 @@
if ( defined $rnd ) {
# Only text/* types are rendered ...
$text .= $rnd;
+
+ # TVD - if there are multiple parts, what should we do?
+ # right now, just use the last one. we may need to give some priority
+ # at some point, ie: use text/html rendered if it exists, or
+ # text/plain rendered as html otherwise.
+ if ($html_needs_setting && $type eq 'text/html') {
+ $self->{metadata}->{html} = $p->{html_results};
+ }
}
else {
$text .= $p->decode();
@@ -888,6 +908,10 @@
my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1);
return $self->{text_invisible_rendered} unless @parts;
+ # the html metadata may have already been set, so let's not bother if it's
+ # already been done.
+ my $html_needs_setting = !exists $self->{metadata}->{html};
+
# Go through each part
my $text = '';
for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) {
@@ -900,6 +924,14 @@
if ( defined $rnd ) {
# Only text/* types are rendered ...
$text .= $rnd;
+
+ # TVD - if there are multiple parts, what should we do?
+ # right now, just use the last one. we may need to give some priority
+ # at some point, ie: use text/html rendered if it exists, or
+ # text/plain rendered as html otherwise.
+ if ($html_needs_setting && $type eq 'text/html') {
+ $self->{metadata}->{html} = $p->{html_results};
+ }
}
}
Modified: spamassassin/trunk/rules/70_testing.cf
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/70_testing.cf?view=diff&r1=157459&r2=157460
==============================================================================
--- spamassassin/trunk/rules/70_testing.cf (original)
+++ spamassassin/trunk/rules/70_testing.cf Mon Mar 14 12:49:37 2005
@@ -880,3 +880,6 @@
# bug 2733
body T_HTML_WEB_BUGS eval:html_test('web_bugs_2')
+# bug 3661
+body T_HTML_INVIS_SPAN eval:html_test('span_invisible')
+body T_HTML_INVIS_TEXT eval:html_test('invisible_text')