You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2004/02/19 05:04:44 UTC

svn commit: rev 6761 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin rules

Author: quinlan
Date: Wed Feb 18 20:04:43 2004
New Revision: 6761

Modified:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm
   incubator/spamassassin/trunk/rules/70_testing.cf
Log:
bug 2996: HTML attribute testing


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/HTML.pm	Wed Feb 18 20:04:43 2004
@@ -46,6 +46,12 @@
 # other non-standard tags
 $re_other = 'o:\w+/?|x-sigsep|x-tab';
 
+# attributes: HTML 4.01 deprecated, loose DTD, frameset DTD
+my $re_attr = 'abbr|accept-charset|accept|accesskey|action|align|alink|alt|archive|axis|background|bgcolor|border|cellpadding|cellspacing|char|charoff|charset|checked|cite|class|classid|clear|code|codebase|codetype|color|cols|colspan|compact|content|coords|data|datetime|declare|defer|dir|disabled|enctype|face|for|frame|frameborder|headers|height|href|hreflang|hspace|http-equiv|id|ismap|label|lang|language|link|longdesc|marginheight|marginwidth|maxlength|media|method|multiple|name|nohref|noresize|noshade|nowrap|object|onblur|onchange|onclick|ondblclick|onfocus|onkeydown|onkeypress|onkeyup|onload|onmousedown|onmousemove|onmouseout|onmouseover|onmouseup|onreset|onselect|onsubmit|onunload|profile|prompt|readonly|rel|rev|rows|rowspan|rules|scheme|scope|scrolling|selected|shape|size|span|src|standby|start|style|summary|tabindex|target|text|title|type|usemap|valign|value|valuetype|version|vlink|vspace|width';
+
+# attributes: stuff we accept
+my $re_attr_extra = 'family|wrap|/';
+
 # style attributes
 my %ok_attribute = (
 		 text => [qw(body)],
@@ -175,6 +181,17 @@
   $self->{html}{"inside_$tag"} += $num;
   $self->{html}{"inside_$tag"} = 0 if $self->{html}{"inside_$tag"} < 0;
 
+  # attributes
+  for my $name (keys %$attr) {
+    if ($name !~ /^(?:$re_attr|$re_attr_extra)$/io) {
+      $self->{html}{attr_bad}++;
+      $self->{html}{attr_unique_bad}++ if !exists $self->{"attr_seen_$name"};
+    }
+    $self->{html}{attr_all}++;
+    $self->{html}{attr_unique_all}++ if !exists $self->{"attr_seen_$name"};
+    $self->{"attr_seen_$name"} = 1;
+  }
+
   # TODO: cover other changes
   if ($tag =~ /^(?:body|font|table|tr|th|td|big|small|basefont|marquee)$/) {
     $self->text_style($tag, $attr, $num);
@@ -828,7 +845,6 @@
 				    ($size =~ /\+(\d+)/ && $1 >= 1));
   }
   if ($tag eq "font" && exists $attr->{face}) {
-    #print STDERR "FONT " . $attr->{face} . "\n";
     if ($attr->{face} =~ /[A-Z]{3}/ && $attr->{face} !~ /M[ST][A-Z]|ITC/) {
       $self->{html}{font_face_caps} = 1;
     }

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm	Wed Feb 18 20:04:43 2004
@@ -374,6 +374,12 @@
       if (exists $r->{tags} && exists $r->{obfuscation}) {
 	$r->{obfuscation_ratio} = $r->{obfuscation} / $r->{tags};
       }
+      if (exists $r->{attr_bad} && exists $r->{attr_all}) {
+	$r->{attr_bad} = $r->{attr_bad} / $r->{attr_all};
+      }
+      if (exists $r->{attr_unique_bad} && exists $r->{attr_unique_all}) {
+	$r->{attr_unique_bad} = $r->{attr_unique_bad} / $r->{attr_unique_all};
+      }
     }
     else {
       $self->{'rendered_type'} = $self->{'type'};

Modified: incubator/spamassassin/trunk/rules/70_testing.cf
==============================================================================
--- incubator/spamassassin/trunk/rules/70_testing.cf	(original)
+++ incubator/spamassassin/trunk/rules/70_testing.cf	Wed Feb 18 20:04:43 2004
@@ -664,3 +664,15 @@
 uri T_URI_HTTP_TO_HEX_IP	/^https?:\/\/(?:[^\@]*\@|)0x[0-9a-f]{8}/i
 describe T_URI_HTTP_TO_HEX_IP	URI contains a link to a hexadecimal IP address
 
+# bug 2996: HTML attribute testing
+body T_HTML_ATTR_00	eval:html_range('attr_bad','0.0','0.2')
+body T_HTML_ATTR_20	eval:html_range('attr_bad','0.2','0.4')
+body T_HTML_ATTR_40	eval:html_range('attr_bad','0.4','0.6')
+body T_HTML_ATTR_60	eval:html_range('attr_bad','0.6','0.8')
+body T_HTML_ATTR_80	eval:html_range('attr_bad','0.8','1.0')
+		
+body T_HTML_ATTR_UNIQUE_00	eval:html_range('attr_unique_bad','0.0','0.2')
+body T_HTML_ATTR_UNIQUE_20	eval:html_range('attr_unique_bad','0.2','0.4')
+body T_HTML_ATTR_UNIQUE_40	eval:html_range('attr_unique_bad','0.4','0.6')
+body T_HTML_ATTR_UNIQUE_60	eval:html_range('attr_unique_bad','0.6','0.8')
+body T_HTML_ATTR_UNIQUE_80	eval:html_range('attr_unique_bad','0.8','1.0')