You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by mm...@apache.org on 2010/03/14 01:42:24 UTC

svn commit: r922698 - /spamassassin/branches/3.3/lib/Mail/SpamAssassin/Plugin/ImageInfo.pm

Author: mmartinec
Date: Sun Mar 14 00:42:24 2010
New Revision: 922698

URL: http://svn.apache.org/viewvc?rev=922698&view=rev
Log:
Bug 6370: update ImageInfo plugin to latest release

Modified:
    spamassassin/branches/3.3/lib/Mail/SpamAssassin/Plugin/ImageInfo.pm

Modified: spamassassin/branches/3.3/lib/Mail/SpamAssassin/Plugin/ImageInfo.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.3/lib/Mail/SpamAssassin/Plugin/ImageInfo.pm?rev=922698&r1=922697&r2=922698&view=diff
==============================================================================
--- spamassassin/branches/3.3/lib/Mail/SpamAssassin/Plugin/ImageInfo.pm (original)
+++ spamassassin/branches/3.3/lib/Mail/SpamAssassin/Plugin/ImageInfo.pm Sun Mar 14 00:42:24 2010
@@ -5,9 +5,9 @@
 # The ASF licenses this file to you under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at:
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -16,20 +16,36 @@
 # </...@LICENSE>
 #
 # -------------------------------------------------------
+# ImageInfo Plugin for SpamAssassin
+# Version: 0.7
+# Created: 2006-08-02
+# Modified: 2007-01-17
+#
+# Changes:
+#   0.7 - added image_name_regex to allow pattern matching on the image name
+#       - added support for image/pjpeg content types (progressive jpeg)
+#       - updated imageinfo.cf with a few sample rules for using image_name_regex()
+#   0.6 - fixed dems_ bug in image_size_range_
+#   0.5 - added image_named and image_to_text_ratio
+#   0.4 - added image_size_exact and image_size_range
+#   0.3 - added jpeg support
+#   0.2 - optimized by theo
+#   0.1 - added gif/png support
+#
 #
 # Usage:
 #  image_count()
 #
-#     body RULENAME  eval:image_count(<type>,<min>,[max]) 
-#        type: 'all','gif','png', or 'jpeg'  
-#        min: required, message contains at least this 
+#     body RULENAME  eval:image_count(<type>,<min>,[max])
+#        type: 'all','gif','png', or 'jpeg'
+#        min: required, message contains at least this
 #             many images
-#        max: optional, if specified, message must not 
+#        max: optional, if specified, message must not
 #             contain more than this number of images
 #
-#  examples
-# 
-#     body ONE_IMAGE  eval:image_count('all',1,1) 
+#  image_count() examples
+#
+#     body ONE_IMAGE  eval:image_count('all',1,1)
 #     body ONE_OR_MORE_IMAGES  eval:image_count('all',1)
 #     body ONE_PNG eval:image_count('png',1,1)
 #     body TWO_GIFS eval:image_count('gif',2,2)
@@ -44,13 +60,21 @@
 #        max: optional, if specified, message must not
 #             contain more than this much pixel area
 #
-#  examples
+#   pixel_coverage() examples
+#
+#     body LARGE_IMAGE_AREA  eval:pixel_coverage('all',150000)  # catches any images that are 150k pixel/sq or higher
+#     body SMALL_GIF_AREA  eval:pixel_coverage('gif',1,40000)   # catches only gifs that 1 to 40k pixel/sql
+#
+#  image_name_regex()
+#
+#     body RULENAME  eval:image_name_regex(<regex>)
+#        regex: full quoted regexp, see examples below
+#
+#  image_name_regex() examples
+#
+#     body CG_DOUBLEDOT_GIF  eval:image_name_regex('/^\w{2,9}\.\.gif$/i') # catches double dot gifs  abcd..gif
 #
-#     body LARGE_IMAGE_AREA  eval:pixel_coverage('all',150000)
-#     body SMALL_GIF_AREA  eval:pixel_coverage('gif',1,40000)
 #
-#  See the ruleset for ways to meta image_count() 
-#  and pixel_coverage() together.  
 #
 # -------------------------------------------------------
 
@@ -75,12 +99,13 @@ sub new {
   $class = ref($class) || $class;
   my $self = $class->SUPER::new($mailsaobject);
   bless ($self, $class);
-  
+
   $self->register_eval_rule ("image_count");
   $self->register_eval_rule ("pixel_coverage");
   $self->register_eval_rule ("image_size_exact");
   $self->register_eval_rule ("image_size_range");
   $self->register_eval_rule ("image_named");
+  $self->register_eval_rule ("image_name_regex");
   $self->register_eval_rule ("image_to_text_ratio");
 
   return $self;
@@ -105,13 +130,13 @@ my %get_details = (
     #my $has_global_color_table = $global_color_table ? 1 : 0;
     #my $sorted_colors = ($packed & 0x08)?1:0;
     #my $resolution = ((($packed & 0x70) >> 4) + 1);
- 
+
     if ($height && $width) {
       my $area = $width * $height;
       $pms->{imageinfo}->{pc_gif} += $area;
       $pms->{imageinfo}->{dems_gif}->{"${height}x${width}"} = 1;
       $pms->{imageinfo}->{names_all}->{$part->{'name'}} = 1 if $part->{'name'};
-      dbg("imageinfo: gif image ".($part->{'name'} ? $part->{'name'} : '')." is $height x $width pixels ($area pixels sq.), with $color_table_size color table"); 
+      dbg("imageinfo: gif image ".($part->{'name'} ? $part->{'name'} : '')." is $height x $width pixels ($area pixels sq.), with $color_table_size color table");
     }
   },
 
@@ -126,15 +151,15 @@ my %get_details = (
     my $chunksize = 8;
     my ($width, $height) = ( 0, 0 );
     my ($depth, $ctype, $compression, $filter, $interlace);
-  
+
     while ($pos < $datalen) {
       my ($len, $type) = unpack("Na4", substr($data, $pos, $chunksize));
       $pos += $chunksize;
- 
+
       last if $type eq "IEND";  # end of png image.
 
       next unless ( $type eq "IHDR" && $len == 13 );
-      
+
       my $bytes = substr($data, $pos, $len + 4);
       my $crc = unpack("N", substr($bytes, -4, 4, ""));
 
@@ -181,7 +206,7 @@ my %get_details = (
     }
 
     if ($height && $width) {
-      my $area = $height * $width; 
+      my $area = $height * $width;
       $pms->{imageinfo}->{pc_jpeg} += $area;
       $pms->{imageinfo}->{dems_jpeg}->{"${height}x${width}"} = 1;
       $pms->{imageinfo}->{names_all}->{$part->{'name'}} = 1 if $part->{'name'};
@@ -201,13 +226,12 @@ sub _get_images {
     $pms->{'imageinfo'}->{"count_$type"} = 0;
   }
 
-  foreach my $p ($pms->{msg}->find_parts(qr@^image/(?:gif|png|jpe?g)$@, 1)) {
+  foreach my $p ($pms->{msg}->find_parts(qr@^image/(?:gif|png|jpeg)$@, 1)) {
     # make sure its base64 encoded
     my $cte = lc $p->get_header('content-transfer-encoding') || '';
     next if ($cte !~ /^base64$/);
 
     my ($type) = $p->{'type'} =~ m@/(\w+)$@;
-    $type='jpeg' if $type eq 'jpg';
     if ($type && exists $get_details{$type}) {
        $get_details{$type}->($pms,$p);
        $pms->{'imageinfo'}->{"count_$type"} ++;
@@ -246,9 +270,36 @@ sub image_named {
 
 # -----------------------------------------
 
+sub image_name_regex {
+  my ($self,$pms,$body,$re) = @_;
+  return unless (defined $re);
+
+  # make sure we have image data read in.
+  if (!exists $pms->{'imageinfo'}) {
+    $self->_get_images($pms);
+  }
+
+  return 0 unless (exists $pms->{'imageinfo'}->{"names_all"});
+
+  my $hit = 0;
+  foreach my $name (keys %{$pms->{'imageinfo'}->{"names_all"}}) {
+    dbg("imageinfo: checking image named $name against regex $re");
+    if (eval { $name =~ /$re/ }) { $hit = 1 }
+    dbg("imageinfo: error in regex /$re/ - $@") if $@;
+    if ($hit) {
+      dbg("imageinfo: image_name_regex hit on $name");
+      return 1;
+    }
+  }
+  return 0;
+
+}
+
+# -----------------------------------------
+
 sub image_count {
   my ($self,$pms,$body,$type,$min,$max) = @_;
-  
+
   return unless defined $min;
 
   # make sure we have image data read in.
@@ -271,7 +322,7 @@ sub pixel_coverage {
   if (!exists $pms->{'imageinfo'}) {
     $self->_get_images($pms);
   }
-  
+
   # dbg("imageinfo: pc_$type: $min, ".($max ? $max:'').", $type, ".$pms->{'imageinfo'}->{"pc_$type"});
   return result_check($min, $max, $pms->{'imageinfo'}->{"pc_$type"});
 }
@@ -287,12 +338,12 @@ sub image_to_text_ratio {
     $self->_get_images($pms);
   }
 
-  # depending on how you call this eval (body vs rawbody), 
+  # depending on how you call this eval (body vs rawbody),
   # the $textlen will differ.
   my $textlen = length(join('',@$body));
 
   return 0 unless ( $textlen > 0 && exists $pms->{'imageinfo'}->{"pc_$type"} && $pms->{'imageinfo'}->{"pc_$type"} > 0);
-  
+
   my $ratio = $textlen / $pms->{'imageinfo'}->{"pc_$type"};
   dbg("imageinfo: image ratio=$ratio, min=$min max=$max");
   return result_check($min, $max, $ratio, 1);
@@ -325,7 +376,8 @@ sub image_size_range {
     $self->_get_images($pms);
   }
 
-  return unless (exists $pms->{'imageinfo'}->{"dems_$type"});
+  my $name = 'dems_'.$type;
+  return unless (exists $pms->{'imageinfo'}->{$name});
 
   foreach my $dem ( keys %{$pms->{'imageinfo'}->{"dems_$type"}}) {
     my ($h,$w) = split(/x/,$dem);