You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by mm...@apache.org on 2008/08/28 02:44:57 UTC

svn commit: r689682 - in /spamassassin/trunk/lib/Mail: ./ SpamAssassin/ SpamAssassin/Conf/ SpamAssassin/Message/ SpamAssassin/Plugin/

Author: mmartinec
Date: Wed Aug 27 17:44:56 2008
New Revision: 689682

URL: http://svn.apache.org/viewvc?rev=689682&view=rev
Log:
- continue work on avoiding user data to be tested as perl booleans,
  instead test for defined or for an empty string as appropriate;
- pms->get can now distinguish between empty and nonexistent header
  fields, undef is returned for nonexistent header field unless a
  default value argument is explicitly set to some defined value
  like an empty string;
- modified calls to pms->get to deal with undef as appropriate;
- Conf.pm, Conf/Parser.pm and Plugin/Check.pm now work together and turn
  a rule 'exists:name_of_header' into a defined(name_of_header) instead
  of a  name_of_header =~ /./  to match the documentation ("Define a
  header existence test") and make it possible to distinguish empty
  from nonexistent header fields; in principle the new code could allow
  operators like 'eq' and 'ne' or function calls in header tests
  in addition to regexp matching operators '=~' and '!~' (but this
  is currently not allowed by the parser);
- remove links to www.hughes-family.org/bugzilla in comments;
- DNSEval.pm: do not allow '@' within a domain name when parsing an
  e-mail address: "abc@xyz"@example.com is a legitimate address and
  its domain is example.com, not xyz"@example.com;
- HeaderEval.pm: turn a sloppy $from !~ /hotmail.com/ (and friends)
  into a somewhat better /\bhotmail\.com$/i (and friends)

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/MailingList.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DCC.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DKIM.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DNSEval.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/RelayEval.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/SPF.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin.pm Wed Aug 27 17:44:56 2008
@@ -981,7 +981,7 @@
   $self->init(1);
 
   my @addrlist;
-  my @hdrs = $mail_obj->get_header ('From');
+  my @hdrs = $mail_obj->get_header('From');
   if ($#hdrs >= 0) {
     push (@addrlist, $self->find_all_addrs_in_line (join (" ", @hdrs)));
   }
@@ -2003,14 +2003,14 @@
   foreach my $header (qw(To From Cc Reply-To Sender
   				Errors-To Mail-Followup-To))
   {
-    my @hdrs = $mail_obj->get_header ($header);
+    my @hdrs = $mail_obj->get_header($header);
     if ($#hdrs < 0) { next; }
-    push (@addrlist, $self->find_all_addrs_in_line (join (" ", @hdrs)));
+    push (@addrlist, $self->find_all_addrs_in_line(join (" ", @hdrs)));
   }
 
   # find addrs in body, too
   foreach my $line (@{$mail_obj->get_body()}) {
-    push (@addrlist, $self->find_all_addrs_in_line ($line));
+    push (@addrlist, $self->find_all_addrs_in_line($line));
   }
 
   my @ret;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Wed Aug 27 17:44:56 2008
@@ -2119,7 +2119,7 @@
         }
       }
       elsif ($value =~ /^(\S+)\s+exists:(.*)$/) {
-        $self->{parser}->add_test ($1, "$2 =~ /./", $TYPE_HEAD_TESTS);
+        $self->{parser}->add_test ($1, "defined($2)", $TYPE_HEAD_TESTS);
         $self->{descriptions}->{$1} = "Found a $2 header";
       }
       else {

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm Wed Aug 27 17:44:56 2008
@@ -1047,9 +1047,13 @@
   }
   if ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS)
   {
-    my ($pat) = ($text =~ /^\s*\S+\s*(?:\=|\!)\~\s*(\S.*?\S)\s*$/);
-    if ($pat) { $pat =~ s/\s+\[if-unset:\s+(.+)\]\s*$//; }
-    return unless $self->is_delimited_regexp_valid($name, $pat);
+    if ($text =~ /^!?defined\([A-Za-z][A-Za-z0-9-]*\)$/) {
+      # fine, implements 'exists:'
+    } else {
+      my ($pat) = ($text =~ /^\s*\S+\s*(?:\=|\!)\~\s*(\S.*?\S)\s*$/);
+      if ($pat) { $pat =~ s/\s+\[if-unset:\s+(.+)\]\s*$//; }
+      return unless $self->is_delimited_regexp_valid($name, $pat);
+    }
   }
   elsif ($type == $Mail::SpamAssassin::Conf::TYPE_META_TESTS)
   {

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/MailingList.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/MailingList.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/MailingList.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/MailingList.pm Wed Aug 27 17:44:56 2008
@@ -44,12 +44,12 @@
 # List-Subscribe: <mailto:[a-zA-Z\.-]+-subscribe@
 sub detect_ml_ezmlm {
     my ($self) = @_;
-    return 0 unless $self->get('mailing-list') =~ /ezmlm$/;
-    return 0 unless $self->get('precedence') eq "bulk\n";
-    return 0 unless $self->get('list-post') =~ /^<mailto:/;
-    return 0 unless $self->get('list-help') =~ /^<mailto:/;
-    return 0 unless $self->get('list-unsubscribe') =~ /<mailto:[a-zA-Z\.-]+-unsubscribe\@/;
-    return 0 unless $self->get('list-subscribe') =~ /<mailto:[a-zA-Z\.-]+-subscribe\@/;
+    return 0 unless $self->get('mailing-list','') =~ /ezmlm$/;
+    return 0 unless $self->get('precedence','') eq "bulk\n";
+    return 0 unless $self->get('list-post','') =~ /^<mailto:/;
+    return 0 unless $self->get('list-help','') =~ /^<mailto:/;
+    return 0 unless $self->get('list-unsubscribe','') =~ /<mailto:[a-zA-Z\.-]+-unsubscribe\@/;
+    return 0 unless $self->get('list-subscribe','') =~ /<mailto:[a-zA-Z\.-]+-subscribe\@/;
     return 1; # assume ezmlm then.
 }
 
@@ -75,23 +75,23 @@
 #  X-BeenThere: 
 sub detect_ml_mailman {
     my ($self) = @_;
-    return 0 unless $self->get('x-mailman-version') =~ /^\d/;
-    return 0 unless $self->get('precedence') =~ /^(?:bulk|list)$/;
+    return 0 unless $self->get('x-mailman-version','') =~ /^\d/;
+    return 0 unless $self->get('precedence','') =~ /^(?:bulk|list)$/;
 
-    if ($self->get('x-list-administrivia') =~ /yes/ ||
-        $self->get('subject') =~ /mailing list memberships reminder$/)
+    if ($self->get('x-list-administrivia','') =~ /yes/ ||
+        $self->get('subject','') =~ /mailing list memberships reminder$/)
     {
         return 0 unless $self->get('errors-to');
         return 0 unless $self->get('x-beenthere');
-        return 0 unless $self->get('x-no-archive') =~ /yes/;
+        return 0 unless $self->get('x-no-archive','') =~ /yes/;
         return 1;
     }
 
     return 0 unless $self->get('list-id');
-    return 0 unless $self->get('list-help') =~ /^<mailto:/;
-    return 0 unless $self->get('list-post') =~ /^<mailto:/;
-    return 0 unless $self->get('list-subscribe') =~ /<mailto:.*=subscribe>/;
-    return 0 unless $self->get('list-unsubscribe') =~ /<mailto:.*=unsubscribe>/;
+    return 0 unless $self->get('list-help','') =~ /^<mailto:/;
+    return 0 unless $self->get('list-post','') =~ /^<mailto:/;
+    return 0 unless $self->get('list-subscribe','') =~ /<mailto:.*=subscribe>/;
+    return 0 unless $self->get('list-unsubscribe','') =~ /<mailto:.*=unsubscribe>/;
     return 1; # assume this is a valid mailman list
 }
 
@@ -123,11 +123,11 @@
 # sub detect_ml_listbuilder {
 #   my ($self, $full) = @_;
 # 
-#   my $reply = $self->get ('Reply-To:addr');
+#   my $reply = $self->get('Reply-To:addr','');
 #   if ($reply !~ /\@lb.bcentral.com/) { return 0; }
 # 
 #   # Received: from unknown (HELO lbrout14.listbuilder.com) (204.71.191.9)
-#   my $rcvd = $self->get('received');
+#   my $rcvd = $self->get('received','');
 #   return 0 unless ($rcvd =~ /\blbrout\d+\.listbuilder\.com\b/i);
 #   return 0 unless ($rcvd =~ /\b204\.71\.191\.\d+\b/);
 # 

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Wed Aug 27 17:44:56 2008
@@ -629,12 +629,12 @@
   my @hdrs;
   if ( $raw ) {
     if (@hdrs = $self->raw_header($hdr)) {
-      @hdrs = map { s/\015?\012\s+/ /gs; $_; } @hdrs;
+      s/\015?\012\s+/ /gs  for @hdrs;
     }
   }
   else {
     if (@hdrs = $self->header($hdr)) {
-      @hdrs = map { "$_\n" } @hdrs;
+      $_ .= "\n"  for @hdrs;
     }
   }
 
@@ -642,7 +642,7 @@
     return @hdrs;
   }
   else {
-     return @hdrs ? $hdrs[-1] : undef;
+    return @hdrs ? $hdrs[-1] : undef;
   }
 }
 

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Wed Aug 27 17:44:56 2008
@@ -1528,17 +1528,17 @@
   # ToCc: the combined recipients list
   elsif ($request eq 'ToCc') {
     $result = join("\n", $self->{msg}->get_header('To', $getraw));
-    if ($result) {
+    if ($result ne '') {
       chomp $result;
       $result .= ", " if $result =~ /\S/;
     }
     $result .= join("\n", $self->{msg}->get_header('Cc', $getraw));
-    $result = undef if !$result;
+    $result = undef if $result eq '';
   }
   # MESSAGEID: handle lists which move the real message-id to another
   # header for resending.
   elsif ($request eq 'MESSAGEID') {
-    $result = join("\n", grep { defined($_) && length($_) > 0 }
+    $result = join("\n", grep { defined($_) && $_ ne '' }
 		   $self->{msg}->get_header('X-Message-Id', $getraw),
 		   $self->{msg}->get_header('Resent-Message-Id', $getraw),
 		   $self->{msg}->get_header('X-Original-Message-ID', $getraw),
@@ -1552,7 +1552,6 @@
       $result = join('', @results);
     } else {  # metadata
       $result = $self->{msg}->get_metadata($request);
-    # undef $result  if defined $result && $result eq '';  # needed?
     }
   }
       
@@ -1601,23 +1600,25 @@
   return $result;
 }
 
-# heavily optimized for speed
+# optimized for speed
 # $_[0] is self
 # $_[1] is request
 # $_[2] is defval
 sub get {
-  # return cache entry if it is defined
-  return $_[0]->{c}->{$_[1]} if defined $_[0]->{c}->{$_[1]};
-
-  # fill in cache entry if it is empty
-  if (!exists $_[0]->{c}->{$_[1]}) {
-    $_[0]->{c}->{$_[1]} = _get(@_);
-    return $_[0]->{c}->{$_[1]} if defined $_[0]->{c}->{$_[1]};
-  }
-
-  # if the requested header wasn't found, we should return either
-  # a default value as specified by the caller, or the blank string ''
-  return defined $_[2] ? $_[2] : '';
+  my $cache = $_[0]->{c};
+  my $found;
+  if (exists $cache->{$_[1]}) {
+    # return cache entry if it is known
+    $found = $cache->{$_[1]};
+  } else {
+    # fill in a cache entry
+    $found = _get(@_);
+    $cache->{$_[1]} = $found;
+  }
+  # if the requested header wasn't found, we should return a default value
+  # as specified by the caller; if defval argument is present it represents
+  # a default value even if undef
+  return (defined $found ? $found : $_[2]);
 }
 
 ###########################################################################
@@ -1780,7 +1781,8 @@
   my %parsed = map { $_ => 'parsed' } $self->_get_parsed_uri_list();
 
   # Look for the domain in DK/DKIM headers
-  my $dk = join(" ", $self->get('DomainKey-Signature'), $self->get('DKIM-Signature'));
+  my $dk = join(" ", grep {defined} ( $self->get('DomainKey-Signature'),
+                                      $self->get('DKIM-Signature') ));
   while ($dk =~ /\bd\s*=\s*([^;]+)/g) {
     my $dom = $1;
     $dom =~ s/\s+//g;
@@ -2287,8 +2289,8 @@
   # lines, we cannot trust any Envelope-From headers, since they're likely to
   # be incorrect fetchmail guesses.
 
-  if ($self->get ("X-Sender") =~ /\@/) {
-    my $rcvd = join (' ', $self->get ("Received"));
+  if ($self->get("X-Sender",'') =~ /\@/) {
+    my $rcvd = join(' ', $self->get("Received",''));
     if ($rcvd =~ /\(fetchmail/) {
       dbg("message: X-Sender and fetchmail signatures found, cannot trust envelope-from");
       return;
@@ -2296,10 +2298,10 @@
   }
 
   # procmailrc notes this (we now recommend adding it to Received instead)
-  if ($envf = $self->get ("X-Envelope-From")) {
+  if ($envf = $self->get("X-Envelope-From",'')) {
     # heuristic: this could have been relayed via a list which then used
     # a *new* Envelope-from.  check
-    if ($self->get ("ALL") =~ /(?:^|\n)Received:\s.*\nX-Envelope-From:\s/s) {
+    if ($self->get("ALL",'') =~ /(?:^|\n)Received:\s.*\nX-Envelope-From:\s/s) {
       dbg("message: X-Envelope-From header found after 1 or more Received lines, cannot trust envelope-from");
       return;
     } else {
@@ -2308,10 +2310,10 @@
   }
 
   # qmail, new-inject(1)
-  if ($envf = $self->get ("Envelope-Sender")) {
+  if ($envf = $self->get("Envelope-Sender",'')) {
     # heuristic: this could have been relayed via a list which then used
     # a *new* Envelope-from.  check
-    if ($self->get ("ALL") =~ /(?:^|\n)Received:\s.*\nEnvelope-Sender:\s/s) {
+    if ($self->get("ALL",'') =~ /(?:^|\n)Received:\s.*\nEnvelope-Sender:\s/s) {
       dbg("message: Envelope-Sender header found after 1 or more Received lines, cannot trust envelope-from");
     } else {
       goto ok;
@@ -2325,10 +2327,10 @@
   #   data.  This use of return-path is required; mail systems MUST support
   #   it.  The return-path line preserves the information in the <reverse-
   #   path> from the MAIL command.
-  if ($envf = $self->get ("Return-Path")) {
+  if ($envf = $self->get("Return-Path",'')) {
     # heuristic: this could have been relayed via a list which then used
     # a *new* Envelope-from.  check
-    if ($self->get ("ALL") =~ /(?:^|\n)Received:\s.*\nReturn-Path:\s/s) {
+    if ($self->get("ALL",'') =~ /(?:^|\n)Received:\s.*\nReturn-Path:\s/s) {
       dbg("message: Return-Path header found after 1 or more Received lines, cannot trust envelope-from");
     } else {
       goto ok;
@@ -2368,7 +2370,7 @@
 
   my $cur_rcvd_index = -1;  # none found yet
   my $result = '';
-  foreach my $hdr (split("\n", $self->get('ALL'))) {
+  foreach my $hdr (split("\n", $self->get('ALL',''))) {
     if ($hdr =~ /^received: /i) {
       $cur_rcvd_index++;
       next if (defined $start_rcvd && !$include_start_rcvd &&
@@ -2449,11 +2451,9 @@
   my @addrs;
 
   # Resent- headers take priority, if present. see bug 672
-  # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=672
   my $resent = $self->get('Resent-From');
   if (defined $resent && $resent =~ /\S/) {
     @addrs = $self->{main}->find_all_addrs_in_line ($resent);
-
   }
   else {
     # bug 2292: Used to use find_all_addrs_in_line() with the same
@@ -2465,7 +2465,7 @@
     # bug 3366: some addresses come in as 'foo@bar...', which is invalid.
     # so deal with the multiple periods.
     ## no critic
-    @addrs = grep { defined($_) && length($_) > 0 } map { tr/././s; $_; }
+    @addrs = map { tr/././s; $_ } grep { defined($_) && $_ ne '' }
         ($self->get('From:addr'),		# std
          $self->get('Envelope-Sender:addr'),	# qmail: new-inject(1)
          $self->get('Resent-Sender:addr'),	# procmailrc manpage
@@ -2491,20 +2491,17 @@
   my @addrs;
 
   # Resent- headers take priority, if present. see bug 672
-  # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=672
-  my $resent = $self->get('Resent-To') . $self->get('Resent-Cc');
-  if (defined $resent && $resent =~ /\S/) {
-    @addrs = $self->{main}->find_all_addrs_in_line (
-  	 $self->get('Resent-To') .             # std, rfc822
-  	 $self->get('Resent-Cc'));             # std, rfc822
-
+  my $resent = join('', grep {defined} ($self->get('Resent-To'),
+                                        $self->get('Resent-Cc')));
+  if ($resent =~ /\S/) {
+    @addrs = $self->{main}->find_all_addrs_in_line($resent);
   } else {
     # OK, a fetchmail trick: try to find the recipient address from
     # the most recent 3 Received lines.  This is required for sendmail,
     # since it does not add a helpful header like exim, qmail
     # or Postfix do.
     #
-    my $rcvd = $self->get('Received');
+    my $rcvd = $self->get('Received','');
     $rcvd =~ s/\n[ \t]+/ /gs;
     $rcvd =~ s/\n+/\n/gs;
 
@@ -2515,19 +2512,20 @@
     }
 
     @addrs = $self->{main}->find_all_addrs_in_line (
-	 join(" ", @rcvdaddrs)."\n" .
-         $self->get('To') .			# std 
-  	 $self->get('Apparently-To') .		# sendmail, from envelope
-  	 $self->get('Delivered-To') .		# Postfix, poss qmail
-  	 $self->get('Envelope-Recipients') .	# qmail: new-inject(1)
-  	 $self->get('Apparently-Resent-To') .	# procmailrc manpage
-  	 $self->get('X-Envelope-To') .		# procmailrc manpage
-  	 $self->get('Envelope-To') .		# exim
-	 $self->get('X-Delivered-To') .		# procmail quick start
-	 $self->get('X-Original-To') .		# procmail quick start
-	 $self->get('X-Rcpt-To') .		# procmail quick start
-	 $self->get('X-Real-To') .		# procmail quick start
-	 $self->get('Cc'));			# std
+       join('', grep { defined($_) && $_ ne '' } (
+	 join(" ", @rcvdaddrs)."\n",
+         $self->get('To'),			# std 
+  	 $self->get('Apparently-To'),		# sendmail, from envelope
+  	 $self->get('Delivered-To'),		# Postfix, poss qmail
+  	 $self->get('Envelope-Recipients'),	# qmail: new-inject(1)
+  	 $self->get('Apparently-Resent-To'),	# procmailrc manpage
+  	 $self->get('X-Envelope-To'),		# procmailrc manpage
+  	 $self->get('Envelope-To'),		# exim
+	 $self->get('X-Delivered-To'),		# procmail quick start
+	 $self->get('X-Original-To'),		# procmail quick start
+	 $self->get('X-Rcpt-To'),		# procmail quick start
+	 $self->get('X-Real-To'),		# procmail quick start
+	 $self->get('Cc'))));			# std
     # those are taken from various sources; thanks to Nancy McGough, who
     # noted some in <http://www.ii.com/internet/robots/procmail/qs/#envelope>
   }

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/AWL.pm Wed Aug 27 17:44:56 2008
@@ -339,7 +339,7 @@
     my $timer = $self->{main}->time_method("total_awl");
 
     my $from = lc $pms->get('From:addr');
-    return 0 unless $from =~ /\S/;
+    return 0 unless defined $from && $from =~ /\S/;
 
     # find the earliest usable "originating IP".  ignore private nets
     my $origip;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm Wed Aug 27 17:44:56 2008
@@ -464,7 +464,10 @@
   my ($self, $pms, $priority) = @_;
   # hash to hold the rules, "header\tdefault value" => rulename
   my %ordered;
-  my %testcode;
+  my %testcode;  # tuples: [op_type, op, arg]
+     # op_type: 1=infix, 0:prefix/function
+     # op: operator, e.g. '=~', '!~', or a function like 'defined'
+     # arg: additional argument like a regexp for a patt matching op
 
   $self->run_generic_tests ($pms, $priority,
     consttype => $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS,
@@ -474,23 +477,30 @@
     loop_body => sub
   {
     my ($self, $pms, $conf, $rulename, $rule, %opts) = @_;
-    my $def = '';
+    my $def;
     $rule = untaint_var($rule);  # presumably checked
-    my ($hdrname, $testtype, $pat) =
-        $rule =~ /^\s*(\S+)\s*(\=|\!)\~\s*(\S.*?\S)\s*$/;
-
-    if (!defined $pat) {
-      warn "rules: invalid rule: $rulename\n";
+    my ($hdrname, $op, $op_infix, $pat);
+    if ($rule =~ /^\s* (\S+) \s* ([=!]~) \s* (\S .*? \S) \s*$/x) {
+      ($hdrname, $op, $pat) = ($1,$2,$3);  # e.g.: Subject =~ /patt/
+      $op_infix = 1;
+      if (!defined $pat) {
+        warn "rules: invalid rule: $rulename\n";
+        $pms->{rule_errors}++;
+        next;
+      }
+      if ($pat =~ s/\s+\[if-unset:\s+(.+)\]\s*$//) { $def = $1 }
+    } elsif ($rule =~ /^\s* (\S+) \s* \( \s* (\S+) \s* \) \s*$/x) {
+      # implements exists:name_of_header (and similar function or prefix ops)
+      ($hdrname, $op) = ($2,$1);  # e.g.: !defined(Subject)
+      $op_infix = 0;
+    } else {
+      warn "rules: unrecognized rule: $rulename\n";
       $pms->{rule_errors}++;
       next;
     }
 
-    if ($pat =~ s/\s+\[if-unset:\s+(.+)\]\s*$//) { $def = $1; }
-
-    $hdrname =~ s/#/[HASH]/g;                # avoid probs with eval below
-    $def =~ s/#/[HASH]/g;
-
-    push(@{$ordered{"$hdrname\t$def"}}, $rulename);
+    push(@{ $ordered{$hdrname . (!defined $def ? '' : "\t".$def)} },
+         $rulename);
 
     next if ($opts{doing_user_rules} &&
             !$self->is_user_rule_sub($rulename.'_head_test'));
@@ -498,25 +508,33 @@
     # caller can set this member of the Mail::SpamAssassin object to
     # override this; useful for profiling rule runtimes, although I think
     # the HitFreqsRuleTiming.pm plugin is probably better nowadays anyway
-      if ($self->{main}->{use_rule_subs}) {
+    if ($self->{main}->{use_rule_subs}) {
+      my $expr;
+      if ($op =~ /^!?[A-Za-z_]+$/) {  # function or its negation
+        $expr = $op . '($text)';
+      } else {  # infix operator
+        $expr = '$text ' . $op . ' ' . $pat;
+        $expr .= 'g'  if $op eq '=~' || $op eq '!~';
+      }
       $self->add_temporary_method ($rulename.'_head_test', '{
           my($self,$text) = @_;
           '.$self->hash_line_for_rule($pms, $rulename).'
-	    while ($text '.$testtype.'~ '.$pat.'g) {
-            $self->got_hit(q#'.$rulename.'#, "", ruletype => "header");
+	    while ('.$expr.') {
+            $self->got_hit(q{'.$rulename.'}, "", ruletype => "header");
             '. $self->hit_rule_plugin_code($pms, $rulename, "header", "last") . '
             }
         }');
     }
     else {
       # store for use below
-      $testcode{$rulename} = $testtype.'~ '.$pat;
+      $testcode{$rulename} = [$op_infix, $op, $pat];
     }
   },
     pre_loop_body => sub
   {
     my ($self, $pms, $conf, %opts) = @_;
     $self->add_evalstr ('
+      no warnings q(uninitialized);
       my $hval;
     ');
   },
@@ -527,38 +545,50 @@
     while(my($k,$v) = each %ordered) {
       my($hdrname, $def) = split(/\t/, $k, 2);
       $self->add_evalstr ('
-        $hval = $self->get(q#'.$hdrname.'#, q#'.$def.'#);
+        $hval = $self->get(q{'.$hdrname.'}' .
+                           (!defined($def) ? '' : ', q{'.$def.'}') . ');
       ');
       foreach my $rulename (@{$v}) {
         if ($self->{main}->{use_rule_subs}) {
           $self->add_evalstr ('
-            if ($scoresptr->{q#'.$rulename.'#}) {
+            if ($scoresptr->{q{'.$rulename.'}}) {
               '.$rulename.'_head_test($self, $hval);
               '.$self->ran_rule_plugin_code($rulename, "header").'
             }
           ');
         }
         else {
-          my $testcode = $testcode{$rulename};
+          my $tc_ref = $testcode{$rulename};
+          my ($op_infix, $op, $pat);
+          ($op_infix, $op, $pat) = @$tc_ref  if defined $tc_ref;
 
           my $posline = '';
           my $ifwhile = 'if';
           my $hitdone = '';
           my $matchg = '';
-          if (($conf->{tflags}->{$rulename}||'') =~ /\bmultiple\b/)
-          {
-            $posline = 'pos $hval = 0;';
-            $ifwhile = 'while';
-            $hitdone = 'last';
-            $matchg = 'g';
+
+          my $expr;
+          if (!$op_infix) {  # function or its negation
+            $expr = $op . '($hval)';
+          }
+          else {  # infix operator
+            if ( ($conf->{tflags}->{$rulename}||'') =~ /\bmultiple\b/ &&
+                 ($op eq '=~' || $op eq '!~') )  # a pattern matching operator
+            {
+              $posline = 'pos $hval = 0;';
+              $ifwhile = 'while';
+              $hitdone = 'last';
+              $matchg = 'g';
+            }
+            $expr = '$hval ' . $op . ' ' . $pat . $matchg;
           }
 
           $self->add_evalstr ('
-          if ($scoresptr->{q#'.$rulename.'#}) {
+          if ($scoresptr->{q{'.$rulename.'}}) {
             '.$posline.'
             '.$self->hash_line_for_rule($pms, $rulename).'
-            '.$ifwhile.' ($hval '.$testcode.$matchg.') {
-              $self->got_hit(q#'.$rulename.'#, "", ruletype => "header");
+            '.$ifwhile.' ('.$expr.') {
+              $self->got_hit(q{'.$rulename.'}, "", ruletype => "header");
               '.$self->hit_rule_plugin_code($pms, $rulename, "header", $hitdone).'
             }
             '.$self->ran_rule_plugin_code($rulename, "header").'

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DCC.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DCC.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DCC.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DCC.pm Wed Aug 27 17:44:56 2008
@@ -401,7 +401,7 @@
 
   # short-circuit if there's already a X-DCC header with value of
   # "bulk" from an upstream DCC check
-  if ($permsgstatus->get('ALL') =~
+  if ($permsgstatus->get('ALL','') =~
       /^(X-DCC-([^:]{1,80})?-?Metrics:.*bulk.*)$/m) {
     $permsgstatus->{dcc_response} = $1;
     return;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DKIM.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DKIM.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DKIM.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DKIM.pm Wed Aug 27 17:44:56 2008
@@ -361,7 +361,8 @@
     $author = '' if !defined $author;  # when a From header field is missing
     # Mail::DKIM sometimes leaves leading or trailing whitespace in address
     $author =~ s/^[ \t]+//s;  $author =~ s/[ \t]+\z//s;  # trim
-    if ($author ne $scan->{dkim_author_address}) {
+    if (defined($scan->{dkim_author_address}) &&
+        $author ne $scan->{dkim_author_address}) {
       dbg("dkim: author parsing inconsistency, SA: <%s>, DKIM: <%s>",
            $author, $scan->{dkim_author_address});
     # currently SpamAssassin's parsing is better than Mail::Address parsing
@@ -618,6 +619,7 @@
   my $any_match_at_all = 0;
   my $expiration_supported = Mail::DKIM->VERSION >= 0.29 ? 1 : 0;
   my $author = $scan->{dkim_author_address};  # address in a From header field
+  $author = ''  if !defined $author;
 
   # walk through all signatures present in a message
   foreach my $signature (@{$scan->{dkim_signatures}}) {

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DNSEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DNSEval.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DNSEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DNSEval.pm Wed Aug 27 17:44:56 2008
@@ -116,12 +116,12 @@
   my ($self, $pms) = @_;
   my %acctags;
 
-  if ($pms->get('EnvelopeFrom:addr') =~ /[@.]a--([a-z0-9]{3,})\./i) {
+  if ($pms->get('EnvelopeFrom:addr','') =~ /[@.]a--([a-z0-9]{3,})\./i) {
     (my $tag = $1) =~ tr/A-Z/a-z/;
     $acctags{$tag} = -1;
   }
   my $accreditor_field = $pms->get('Accreditor');
-  if (defined($accreditor_field)) {
+  if (defined $accreditor_field) {
     my @accreditors = split(/,/, $accreditor_field);
     foreach my $accreditor (@accreditors) {
       my @terms = split(' ', $accreditor);
@@ -171,7 +171,7 @@
   my @originating;
   for my $header ('X-Yahoo-Post-IP', 'X-Originating-IP', 'X-Apparently-From', 'X-SenderIP') {
     my $str = $pms->get($header);
-    next unless $str;
+    next unless defined $str && $str ne '';
     push (@originating, ($str =~ m/($IP_ADDRESS)/g));
   }
 
@@ -310,7 +310,7 @@
 
   my %hosts;
   for my $address (@addresses) {
-    if ($address =~ m/\@(\S+\.\S+)/) {
+    if (defined $address && $address =~ m/ \@ ( [^\@\s]+ \. [^\@\s]+ )/x) {
       $hosts{lc($1)} = 1;
     }
   }
@@ -338,7 +338,7 @@
     next unless defined $from;
 
     $from =~ tr/././s;		# bug 3366
-    if ($from =~ /\@(\S+\.\S+)/) {
+    if ($from =~ m/ \@ ( [^\@\s]+ \. [^\@\s]+ )/x ) {
       $host = lc($1);
       last;
     }

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm Wed Aug 27 17:44:56 2008
@@ -148,7 +148,7 @@
   my ($self, $pms, undef, $ratio) = @_;
 
   my $subject = $pms->get('Subject');
-  if (! $subject) {
+  if (!defined $subject || $subject eq '') {
     return 0;
   }
   my $max = 0;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm Wed Aug 27 17:44:56 2008
@@ -85,7 +85,8 @@
   my ($self, $pms) = @_;
   local ($_);
 
-  $_ = $pms->get('Received'); s/\s/ /gs;
+  $_ = $pms->get('Received','');
+  s/\s/ /gs;
 
   # this is the hostname format used by AOL for their relays. Spammers love 
   # forging it.  Don't make it more specific to match aol.com only, though --
@@ -121,7 +122,7 @@
   return 0 if grep { $_ eq "all" } @locales;
 
   for my $h (qw(From Subject)) {
-    my @hdrs = $pms->get("$h:raw");
+    my @hdrs = $pms->get("$h:raw");  # ??? get() returns a scalar ???
     if ($#hdrs >= 0) {
       $hdr = join(" ", @hdrs);
     } else {
@@ -138,7 +139,7 @@
 sub check_for_unique_subject_id {
   my ($self, $pms) = @_;
   local ($_);
-  $_ = lc $pms->get('Subject');
+  $_ = lc $pms->get('Subject','');
   study;
 
   my $id = 0;
@@ -265,7 +266,7 @@
 
   $header .= ":raw" unless ($header eq "ALL" || $header =~ /:raw$/);
   my $str = $pms->get($header);
-  return 0 unless $str;
+  return 0 unless defined $str && $str ne '';
 
   # avoid overlap between tests
   if ($header eq "ALL") {
@@ -295,8 +296,8 @@
 
   my $txt = $pms->get("Mailing-List");
   if (defined $txt && $txt =~ /^contact \S+\@\S+\; run by ezmlm$/) {
-    my $dlto = $pms->get("Delivered-To");
-    my $rcvd = $pms->get("Received");
+    my $dlto = $pms->get("Delivered-To",'');
+    my $rcvd = $pms->get("Received",'');
 
     # ensure we have other indicative headers too
     if ($dlto =~ /^mailing list \S+\@\S+/ &&
@@ -306,13 +307,14 @@
     }
   }
 
-  if ($pms->get("Received") !~ /\S/) {
+  my $rcvd = $pms->get("Received");
+  if (!defined $rcvd || $rcvd !~ /\S/) {
     # we have no Received headers!  These tests cannot run in that case
     return 1;
   }
 
   # MSN groups removes Received lines. thanks MSN
-  if ($pms->get("Received") =~ /from groups\.msn\.com \(\S+\.msn\.com /) {
+  if ($rcvd =~ /from groups\.msn\.com \(\S+\.msn\.com /) {
     return 1;
   }
 
@@ -328,7 +330,7 @@
   $pms->{hotmail_addr_with_forged_hotmail_received} = 0;
   $pms->{hotmail_addr_but_no_hotmail_received} = 0;
 
-  my $rcvd = $pms->get('Received');
+  my $rcvd = $pms->get('Received','');
   $rcvd =~ s/\s+/ /gs;		# just spaces, simplify the regexp
 
   return if ($rcvd =~
@@ -340,7 +342,7 @@
   my $ip = $pms->get('X-Originating-Ip');
   my $IP_ADDRESS = IP_ADDRESS;
 
-  if ($ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; }
+  if (defined $ip && $ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; }
 
   # Hotmail formats its received headers like this:
   # Received: from hotmail.com (f135.law8.hotmail.com [216.33.241.135])
@@ -363,7 +365,7 @@
   } else {
     # check to see if From claimed to be @hotmail.com
     my $from = $pms->get('From:addr');
-    if ($from !~ /hotmail.com/) { return; }
+    if (!defined $from || $from !~ /\bhotmail\.com$/i) { return; }
     $pms->{hotmail_addr_but_no_hotmail_received} = 1;
   }
 }
@@ -386,21 +388,22 @@
 sub check_for_msn_groups_headers {
   my ($self, $pms) = @_;
 
-  return 0 unless ($pms->get('To') =~ /<(\S+)\@groups\.msn\.com>/i);
+  my $to = $pms->get('To');
+  return 0 unless defined $to && $to =~ /<(\S+)\@groups\.msn\.com>/i;
   my $listname = $1;
 
-  # from Theo Van Dinter, see
-  # http://www.hughes-family.org/bugzilla/show_bug.cgi?id=591
+  # from Theo Van Dinter, see bug 591
   # Updated by DOS, based on messages from Bob Menschel, bug 4301
 
-  return 0 unless $pms->get('Received') =~ /from mail pickup service by ((?:p\d\d\.)groups\.msn\.com)\b/;
+  return 0 unless $pms->get('Received','') =~
+                 /from mail pickup service by ((?:p\d\d\.)groups\.msn\.com)\b/;
   my $server = $1;
 
   if ($listname =~ /^notifications$/) {
-    return 0 unless $pms->get('Message-Id') =~ /^<\S+\@$server>/;
+    return 0 unless $pms->get('Message-Id','') =~ /^<\S+\@$server>/;
   } else {
-    return 0 unless $pms->get('Message-Id') =~ /^<$listname-\S+\@groups\.msn\.com>/;
-    return 0 unless $pms->get('EnvelopeFrom:addr') =~ /$listname-bounce\@groups\.msn\.com/;
+    return 0 unless $pms->get('Message-Id','') =~ /^<$listname-\S+\@groups\.msn\.com>/;
+    return 0 unless $pms->get('EnvelopeFrom:addr','') =~ /$listname-bounce\@groups\.msn\.com/;
   }
   return 1;
 
@@ -453,14 +456,14 @@
   my ($self, $pms) = @_;
 
   my $from = $pms->get('From:addr');
-  if ($from !~ /eudoramail.com/) { return 0; }
+  if (!defined $from || $from !~ /\beudoramail\.com$/i) { return 0; }
 
-  my $rcvd = $pms->get('Received');
+  my $rcvd = $pms->get('Received','');
   $rcvd =~ s/\s+/ /gs;		# just spaces, simplify the regexp
 
   my $ip = $pms->get('X-Sender-Ip');
   my $IP_ADDRESS = IP_ADDRESS;
-  if ($ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; }
+  if (defined $ip && $ip =~ /$IP_ADDRESS/) { $ip = 1; } else { $ip = 0; }
 
   # Eudoramail formats its received headers like this:
   # Received: from Unknown/Local ([?.?.?.?]) by shared1-mail.whowhere.com;
@@ -483,13 +486,13 @@
   my ($self, $pms) = @_;
 
   my $from = $pms->get('From:addr');
-  if ($from !~ /yahoo\.com$/) { return 0; }
+  if (!defined $from || $from !~ /\byahoo\.com$/i) { return 0; }
 
-  my $rcvd = $pms->get('Received');
+  my $rcvd = $pms->get('Received','');
   
-  if ($pms->get("Resent-From") && $pms->get("Resent-To")) {
+  if ($pms->get("Resent-From",'') ne '' && $pms->get("Resent-To",'') ne '') {
     my $xrcvd = $pms->get("X-Received");
-    $rcvd = $xrcvd if $xrcvd;
+    $rcvd = $xrcvd  if defined $xrcvd && $xrcvd ne '';
   }
   $rcvd =~ s/\s+/ /gs;		# just spaces, simplify the regexp
 
@@ -526,7 +529,7 @@
   # <http://xent.com/pipermail/fork/> for an example.
   #
   if ($rcvd =~ /\bmailer\d+\.bulk\.scd\.yahoo\.com\b/
-                && $from =~ /\@reply\.yahoo\.com$/) { return 0; }
+                && $from =~ /\@reply\.yahoo\.com$/i) { return 0; }
 
   if ($rcvd =~ /by \w+\.\w+\.yahoo\.com \(\d+\.\d+\.\d+\/\d+\.\d+\.\d+\)(?: with ESMTP)? id \w+/) {
       # possibly sent from "mail this story to a friend"
@@ -540,16 +543,17 @@
   my ($self, $pms) = @_;
 
   my $from = $pms->get('From:addr');
-  if($from !~ /\bjuno.com/) { return 0; }
+  if(!defined $from || $from !~ /\bjuno\.com$/i) { return 0; }
 
   if($self->gated_through_received_hdr_remover($pms)) { return 0; }
 
-  my $xmailer = $pms->get('X-Mailer');
   my $xorig = $pms->get('X-Originating-IP');
-  my $rcvd = $pms->get('Received');
+  my $xmailer = $pms->get('X-Mailer','');
+  my $rcvd = $pms->get('Received','');
   my $IP_ADDRESS = IP_ADDRESS;
 
-  if (!$xorig) {  # New style Juno has no X-Originating-IP header, and other changes
+  if (defined $xorig && $xorig ne '') {
+    # New style Juno has no X-Originating-IP header, and other changes
     if($rcvd !~ /from.*\b(?:juno|untd)\.com.*[\[\(]$IP_ADDRESS[\]\)].*by/
         && $rcvd !~ / cookie\.(?:juno|untd)\.com /) { return 1; }
     if($xmailer !~ /Juno /) { return 1; }
@@ -570,7 +574,7 @@
 sub check_for_matching_env_and_hdr_from {
   my ($self, $pms) =@_;
   # two blank headers match so don't bother checking
-  return (lc $pms->get('EnvelopeFrom:addr') eq lc $pms->get('From:addr'));
+  return (lc $pms->get('EnvelopeFrom:addr','') eq lc $pms->get('From:addr',''));
 }
 
 sub sorted_recipients {
@@ -604,7 +608,7 @@
 
   # ToCc: pseudo-header works best, but sometimes Bcc: is better
   for ('ToCc', 'Bcc') {
-    my $to = $pms->get($_);	# get recipients
+    my $to = $pms->get($_,'');	# get recipients
     $to =~ s/\(.*?\)//g;	# strip out the (comments)
     push(@inputs, ($to =~ m/([\w.=-]+\@\w+(?:[\w.-]+\.)+\w+)/g));
     last if scalar(@inputs) >= TOCC_SIMILAR_COUNT;
@@ -648,8 +652,8 @@
   my ($self, $pms) = @_;
 
   my $hdr = $pms->get('To');
-  $hdr ||= $pms->get('Apparently-To');
-  return 1 if ($hdr eq '');
+  $hdr = $pms->get('Apparently-To')  if !defined $hdr || $hdr eq '';
+  return 1  if !defined $hdr || $hdr eq '';
 
   return 0;
 }
@@ -658,7 +662,7 @@
   my ($self, $pms) = @_;
   local ($_);
 
-  my $rcv = $pms->get('Received');
+  my $rcv = $pms->get('Received','');
 
   # e.g.
   # Received: from mail3.icytundra.com by gw05 with ESMTP; Thu, 21 Jun 2001 02:28:32 -0400
@@ -737,7 +741,7 @@
 
   my (@received);
   my $received = $pms->get('Received');
-  if (defined($received) && length($received)) {
+  if (defined $received && $received ne '') {
     @received = grep {$_ =~ m/\S/} (split(/\n/,$received));
   }
   # if we have no Received: headers, chances are we're archived mail
@@ -886,7 +890,7 @@
 
 sub subject_is_all_caps {
    my ($self, $pms) = @_;
-   my $subject = $pms->get('Subject');
+   my $subject = $pms->get('Subject','');
 
    $subject =~ s/^\s+//;
    $subject =~ s/\s+$//;
@@ -897,7 +901,7 @@
    # now, check to see if the subject is encoded using a non-ASCII charset.
    # If so, punt on this test to avoid FPs.  We just list the known charsets
    # this test will FP on, here.
-   my $subjraw = $pms->get('Subject:raw');
+   my $subjraw = $pms->get('Subject:raw','');
    my $CLTFAC = Mail::SpamAssassin::Constants::CHARSETS_LIKELY_TO_FP_AS_CAPS;
    if ($subjraw =~ /=\?${CLTFAC}\?/i) {
      return 0;
@@ -910,9 +914,9 @@
   my ($self, $pms, $test) = @_;
 
   my $full_to = $pms->get('To:addr');
-  return 0 unless $full_to;
+  return 0 unless defined $full_to && $full_to ne '';
 
-  my $subject = $pms->get('Subject');
+  my $subject = $pms->get('Subject','');
 
   if ($test eq "address") {
     return $subject =~ /\b\Q$full_to\E\b/i;	# "user@domain.com"
@@ -939,7 +943,8 @@
   local ($_);
 
   my $id = $pms->get('MESSAGEID');
-  return 0 if $id !~ /^<[0-9a-f]{4}([0-9a-f]{8})\$[0-9a-f]{8}\$[0-9a-f]{8}\@/;
+  return 0 if !defined $id ||
+              $id !~ /^<[0-9a-f]{4}([0-9a-f]{8})\$[0-9a-f]{8}\$[0-9a-f]{8}\@/;
 
   my $timetoken = hex($1);
   my $x = 0.0023283064365387;
@@ -947,13 +952,13 @@
 
   my $fudge = 250;
 
-  $_ = $pms->get('Date');
+  $_ = $pms->get('Date','');
   $_ = Mail::SpamAssassin::Util::parse_rfc822_date($_) || 0;
   my $expected = int (($_ * $x) + $y);
   my $diff = $timetoken - $expected;
   return 0 if (abs($diff) < $fudge);
 
-  $_ = $pms->get('Received');
+  $_ = $pms->get('Received','');
   /(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+).*?$/;
   $_ = Mail::SpamAssassin::Util::parse_rfc822_date($_) || 0;
   $expected = int(($_ * $x) + $y);
@@ -967,14 +972,14 @@
   local ($_);
 
   # Lyris eats message-ids.  also some ezmlm, I think :(
-  $_ = $pms->get("List-Unsubscribe");
+  $_ = $pms->get("List-Unsubscribe",'');
   return 1 if (/<mailto:(?:leave-\S+|\S+-unsubscribe)\@\S+>$/);
 
   # ezmlm again
   if($self->gated_through_received_hdr_remover($pms)) { return 1; }
 
   # Allen notes this as 'Wacky sendmail version?'
-  $_ = $pms->get("Received");
+  $_ = $pms->get("Received",'');
   return 1 if /\/CWT\/DCE\)/;
 
   # Apr  2 2003 jm: iPlanet rewrites lots of stuff, including Message-IDs
@@ -994,7 +999,7 @@
 sub check_unresolved_template {
   my ($self, $pms) = @_;
 
-  my $all = $pms->get('ALL');	# cached access
+  my $all = $pms->get('ALL','');  # cached access
   $all =~ s/\n[ \t]+/ /gs;	# fix continuation lines
   
   for my $header (split(/\n/, $all)) {
@@ -1011,8 +1016,8 @@
 sub check_ratware_name_id {
   my ($self, $pms) = @_;
 
-  my $mid = $pms->get('MESSAGEID');
-  my $from = $pms->get('From');
+  my $mid = $pms->get('MESSAGEID','');
+  my $from = $pms->get('From','');
   if ($mid =~ m/<[A-Z]{28}\.([^>]+?)>/) {
      if ($from =~ m/\"[^\"]+\"\s*<\Q$1\E>/) {
        return 1;
@@ -1027,8 +1032,9 @@
   my $to = $pms->get('To:addr');
   my $from = $pms->get('EnvelopeFrom:addr');
 
-  return 0 unless ($to && $from);
-  return 0 if ($from =~ /^SRS\d=/);
+  return 0 unless defined $from && $from ne '';
+  return 0 unless defined $to   && $to   ne '';
+  return 0 if $from =~ /^SRS\d=/;
 
   if ($to =~ /^([^@]+)@(.+)$/) {
     my($user,$dom) = ($1,$2);

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm Wed Aug 27 17:44:56 2008
@@ -75,7 +75,7 @@
 
   return 0 if grep { $_ eq "all" } @locales;
 
-  $type = get_charset_from_ct_line ($type);
+  $type = get_charset_from_ct_line($type)  if defined $type;
 
   if (defined $type &&
     !Mail::SpamAssassin::Locales::is_charset_ok_for_locales
@@ -428,10 +428,8 @@
   $self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments};
   my @charsets;
   my $type = $pms->get('Content-Type');
-  $type = get_charset_from_ct_line ($type);
-  if (defined $type) {
-    push (@charsets, $type);
-  }
+  $type = get_charset_from_ct_line($type)  if defined $type;
+  push (@charsets, $type)  if defined $type;
   if (defined $pms->{mime_html_charsets}) {
     push (@charsets, split(' ', $pms->{mime_html_charsets}));
   }
@@ -447,6 +445,7 @@
 
 sub get_charset_from_ct_line {
   my $type = shift;
+  if (!defined $type) { return undef; }
   if ($type =~ /charset="([^"]+)"/i) { return $1; }
   if ($type =~ /charset='([^']+)'/i) { return $1; }
   if ($type =~ /charset=(\S+)/i) { return $1; }

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/RelayEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/RelayEval.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/RelayEval.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/RelayEval.pm Wed Aug 27 17:44:56 2008
@@ -111,9 +111,9 @@
 sub sent_by_applemail {
   my ($self, $pms) = @_;
 
-  return 0 unless ($pms->get("MIME-Version") =~ /Apple Message framework/);
-  return 0 unless ($pms->get("X-Mailer") =~ /^Apple Mail \(\d+\.\d+\)/);
-  return 0 unless ($pms->get("Message-Id") =~
+  return 0 unless ($pms->get("MIME-Version",'') =~ /Apple Message framework/);
+  return 0 unless ($pms->get("X-Mailer",'') =~ /^Apple Mail \(\d+\.\d+\)/);
+  return 0 unless ($pms->get("Message-Id",'') =~
 		   /^<[A-F0-9]+(?:-[A-F0-9]+){4}\@\S+.\S+>$/);
   return 1;
 }
@@ -209,7 +209,7 @@
   }
 
   my $from = $pms->get('From:addr');
-  if ($from !~ /\b\Q$domain\E/i) {
+  if (!defined $from || $from !~ /\b\Q$domain\E/i) {
       # '0e0' is Perl idiom for "true but zero":
       $pms->{from_domain_in_received}->{$domain} = '0e0';
       return 0;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/SPF.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/SPF.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/SPF.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/SPF.pm Wed Aug 27 17:44:56 2008
@@ -308,7 +308,7 @@
     $scanner->{checked_for_received_spf_header} = 1;
     dbg("spf: checking to see if the message has a Received-SPF header that we can use");
 
-    my @internal_hdrs = split("\n", $scanner->get('ALL-INTERNAL'));
+    my @internal_hdrs = split("\n", $scanner->get('ALL-INTERNAL',''));
     unless ($scanner->{conf}->{use_newest_received_spf_header}) {
       # look for the LAST (earliest in time) header, it'll be the most accurate
       @internal_hdrs = reverse(@internal_hdrs);
@@ -658,7 +658,7 @@
     # from the Return-Path, X-Envelope-From, or whatever header.
     # it's better to get it from Received though, as that is updated
     # hop-by-hop.
-    $sender = $scanner->get ("EnvelopeFrom:addr");
+    $sender = $scanner->get("EnvelopeFrom:addr",'');
   }
 
   if (!$sender) {

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm?rev=689682&r1=689681&r2=689682&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm Wed Aug 27 17:44:56 2008
@@ -112,7 +112,7 @@
 
   my $subject = $permsgstatus->get('Subject');
 
-  return 0 unless $subject;
+  return 0 unless defined $subject && $subject ne '';
 
   return $self->_check_subject($permsgstatus->{conf}->{whitelist_subject}, $subject);
 }
@@ -122,7 +122,7 @@
 
   my $subject = $permsgstatus->get('Subject');
 
-  return 0 unless $subject;
+  return 0 unless defined $subject && $subject ne '';
 
   return $self->_check_subject($permsgstatus->{conf}->{blacklist_subject}, $subject);
 }