You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2004/02/28 04:11:34 UTC
svn commit: rev 6918 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin
Author: quinlan
Date: Fri Feb 27 19:11:33 2004
New Revision: 6918
Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
Log:
only process HTML elements
stub subroutine for CSS styles
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm Fri Feb 27 19:11:33 2004
@@ -52,6 +52,9 @@
# attributes: stuff we accept
my $re_attr_extra = 'family|wrap|/';
+# style attribute not accepted
+my $re_attr_no_style = 'base|basefont|head|html|meta|param|script|style|title';
+
# style attributes
my %ok_attribute = (
text => [qw(body)],
@@ -171,17 +174,19 @@
sub html_tag {
my ($self, $tag, $attr, $num) = @_;
- if ($tag =~ /^(?:$re_strict|$re_loose|$re_other)$/io) {
+ my $is_element = ($tag =~ /^(?:$re_strict|$re_loose|$re_other)$/io);
+
+ # general tracking
+ if ($is_element) {
$self->{html}{elements}++;
$self->{html}{elements_seen}++ if !exists $self->{html}{"inside_$tag"};
}
$self->{html}{tags}++;
$self->{html}{tags_seen}++ if !exists $self->{html}{"inside_$tag"};
-
$self->{html}{"inside_$tag"} += $num;
$self->{html}{"inside_$tag"} = 0 if $self->{html}{"inside_$tag"} < 0;
- # attributes
+ # check attributes
for my $name (keys %$attr) {
if ($name !~ /^(?:$re_attr|$re_attr_extra)$/io) {
$self->{html}{attr_bad}++;
@@ -192,24 +197,30 @@
$self->{"attr_seen_$name"} = 1;
}
- # TODO: cover other changes
- if ($tag =~ /^(?:body|font|table|tr|th|td|big|small|basefont|marquee)$/) {
- $self->text_style($tag, $attr, $num);
- }
-
- if ($num == 1) {
- $self->html_format($tag, $attr, $num);
- $self->html_uri($tag, $attr, $num);
- $self->html_tests($tag, $attr, $num);
-
- $self->{html_last_tag} = $tag;
- }
-
- if ($tag =~ /^(?:b|i|u|strong|em|big|center|h\d)$/) {
- $self->{html}{shouting} += $num;
-
- if ($self->{html}{shouting} > $self->{html}{max_shouting}) {
- $self->{html}{max_shouting} = $self->{html}{shouting};
+ # ignore non-elements
+ if ($is_element) {
+ if ($tag =~ /^(?:body|font|table|tr|th|td|big|small|basefont|marquee)$/) {
+ $self->text_style($tag, $attr, $num);
+ }
+ # TODO: cover "style" and CSS
+ if ($tag !~ /^(?:$re_attr_no_style)$/ && exists $attr->{style}) {
+ $self->css_style($tag, $attr, $num);
+ }
+
+ # start tags
+ if ($num == 1) {
+ $self->html_format($tag, $attr, $num);
+ $self->html_uri($tag, $attr, $num);
+ $self->html_tests($tag, $attr, $num);
+ $self->{html_last_tag} = $tag;
+ }
+
+ # shouting
+ if ($tag =~ /^(?:b|i|u|strong|em|big|center|h\d)$/) {
+ $self->{html}{shouting} += $num;
+ if ($self->{html}{shouting} > $self->{html}{max_shouting}) {
+ $self->{html}{max_shouting} = $self->{html}{shouting};
+ }
}
}
}
@@ -647,6 +658,12 @@
while (my %current = %{ pop @{ $self->{text_style} } }) {
last if $current{tag} eq $tag;
}
+}
+
+# process CSS style attribute
+sub css_style {
+ my ($self, $tag, $attr, $num) = @_;
+
}
# body, font, table, tr, th, td, big, small