You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by mm...@apache.org on 2008/08/26 18:28:52 UTC

svn commit: r689129 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Message.pm Message/Node.pm PerMsgStatus.pm

Author: mmartinec
Date: Tue Aug 26 09:28:52 2008
New Revision: 689129

URL: http://svn.apache.org/viewvc?rev=689129&view=rev
Log:
bug 5965: do not treat user data as perl booleans (a string "0" is a false); differentiate between missing and empty header fields; tweak header parsing

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm?rev=689129&r1=689128&r2=689129&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Tue Aug 26 09:28:52 2008
@@ -149,7 +149,8 @@
     # if we get here, it means that the input was null, so fake the message
     # content as a single newline...
     @message = ("\n");
-  } elsif ($message[0] =~ /^From\s/) {
+  } elsif ($message[0] =~ /^From\s+(?!:)/) {
+    # careful not to confuse with obsolete syntax which allowed WSP before ':'
     # mbox formated mailbox
     $self->{'mbox_sep'} = shift @message;
   } elsif ($message[0] =~ MBX_SEPARATOR) {
@@ -189,14 +190,13 @@
     dbg("message: line ending changed to CRLF");
   }
 
-  # Go through all the headers of the message
-  my $header = '';
-  while ( my $current = shift @message ) {
-    unless ($self->{'missing_head_body_separator'}) {
-      $self->{'pristine_headers'} .= $current;
-    }
+  # Go through all the header fields of the message
+  my $header;
+  for (;;) {
+    # make sure not to lose the last header field when there is no body
+    my $eof = !@message;
+    my $current = $eof ? "\n" : shift @message;
 
-    # NB: Really need to figure out special folding rules here!
     if ( $current =~ /^[ \t]/ ) {
       # This wasn't useful in terms of a rule, but we may want to treat it
       # specially at some point.  Perhaps ignore it?
@@ -204,14 +204,13 @@
       #  $self->{'obsolete_folding_whitespace'} = 1;
       #}
 
-      # append continuations if there's a header in process
-      if ($header) {
-        $header .= $current;
-      }
+      $header = ''  if !defined $header;  # header starts with a continuation!?
+      $header .= $current;  # append continuations, no matter what
+      $self->{'pristine_headers'} .= $current;
     }
-    else {
+    else {  # not a continuation
       # Ok, there's a header here, let's go ahead and add it in.
-      if ($header) {
+      if (defined $header) {  # deal with a previous header field
         my ($key, $value) = split (/:/s, $header, 2);
 
         # If it's not a valid header (aka: not in the form "foo: bar"), skip it.
@@ -229,31 +228,26 @@
         }
       }
 
-      # not a continuation...
-      $header = $current;
-    }
-
-    if ($header) {
-      if ($header =~ /^\r?$/) {
+      if ($current =~ /^\r?$/) {  # a regular end of header section
+        $self->{'pristine_headers'} .= $current  if !$eof;
         last;
-      }
-      else {
-        # Check for missing head/body separator
+
+      } elsif ($current !~ /^[\041-\071\073-\176]+[ \t]*:/ ||
+	       $current =~ /^--/) {  # mime boundary
+        # obsolete header field syntax allowed WSP before a colon;
+	# Check for missing head/body separator
 	# RFC 2822, s2.2:
 	# A field name MUST be composed of printable US-ASCII characters
-	# (i.e., characters that have values between 33 (041) and 126 (176), inclusive),
-	# except colon (072).
-	# FOR THIS NEXT PART: list off the valid REs for what can be next:
-	#	Header, header continuation, blank line
-        if (!@message || $message[0] !~ /^(?:[\041-\071\073-\176]+:|[ \t]|\r?$)/ || $message[0] =~ /^--/) {
-	  # No body or no separator before mime boundary is invalid
-          $self->{'missing_head_body_separator'} = 1;
-	  
-	  # we *have* to go back through again to make sure we catch the last
-	  # header, so fake a separator and loop again.
-	  unshift(@message, "\n");
-        }
+	# (i.e., characters that have values between 33 (041) and 126 (176),
+	# inclusive), except colon (072).
+
+	$self->{'missing_head_body_separator'} = 1;
+        last;
       }
+
+      # start collecting a new header field
+      $header = $current;
+      $self->{'pristine_headers'} .= $current;
     }
   }
   undef $header;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=689129&r1=689128&r2=689129&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Tue Aug 26 09:28:52 2008
@@ -98,7 +98,7 @@
   my ($self, $re, $onlyleaves, $recursive) = @_;
 
   # Didn't pass an RE?  Just abort.
-  return () unless $re;
+  return () unless defined $re && $re ne '';
 
   $onlyleaves = 0 unless defined $onlyleaves;
 
@@ -152,7 +152,7 @@
   my $self   = shift;
   my $rawkey = shift;
 
-  return unless ( defined $rawkey );
+  return unless defined $rawkey;
 
   # we're going to do things case insensitively
   my $key    = lc($rawkey);
@@ -581,7 +581,7 @@
 sub _decode_header {
   my($self, $header) = @_;
 
-  return '' unless $header;
+  return '' unless defined $header && $header ne '';
 
   # deal with folding and cream the newlines and such
   $header =~ s/\n[ \t]+/\n /g;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=689129&r1=689128&r2=689129&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue Aug 26 09:28:52 2008
@@ -1546,16 +1546,13 @@
   }
   # a conventional header
   else {
-    if ($getraw) {
-      $result = join('', $self->{msg}->raw_header($request));
-    } else {
-      $result = join('', $self->{msg}->get_header($request));
-    }
-
-    # metadata
-    if (!$result) {
+    my @results = $getraw ? $self->{msg}->raw_header($request)
+                          : $self->{msg}->get_header($request);
+    if (@results) {
+      $result = join('', @results);
+    } else {  # metadata
       $result = $self->{msg}->get_metadata($request);
-      $result = undef if !$result;
+    # undef $result  if defined $result && $result eq '';  # needed?
     }
   }
       
@@ -1620,7 +1617,7 @@
 
   # if the requested header wasn't found, we should return either
   # a default value as specified by the caller, or the blank string ''
-  return $_[2] || '';
+  return defined $_[2] ? $_[2] : '';
 }
 
 ###########################################################################