You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by mm...@apache.org on 2008/08/26 18:28:52 UTC
svn commit: r689129 - in /spamassassin/trunk/lib/Mail/SpamAssassin:
Message.pm Message/Node.pm PerMsgStatus.pm
Author: mmartinec
Date: Tue Aug 26 09:28:52 2008
New Revision: 689129
URL: http://svn.apache.org/viewvc?rev=689129&view=rev
Log:
bug 5965: do not treat user data as perl booleans (a string "0" is a false); differentiate between missing and empty header fields; tweak header parsing
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm?rev=689129&r1=689128&r2=689129&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Tue Aug 26 09:28:52 2008
@@ -149,7 +149,8 @@
# if we get here, it means that the input was null, so fake the message
# content as a single newline...
@message = ("\n");
- } elsif ($message[0] =~ /^From\s/) {
+ } elsif ($message[0] =~ /^From\s+(?!:)/) {
+ # careful not to confuse with obsolete syntax which allowed WSP before ':'
# mbox formated mailbox
$self->{'mbox_sep'} = shift @message;
} elsif ($message[0] =~ MBX_SEPARATOR) {
@@ -189,14 +190,13 @@
dbg("message: line ending changed to CRLF");
}
- # Go through all the headers of the message
- my $header = '';
- while ( my $current = shift @message ) {
- unless ($self->{'missing_head_body_separator'}) {
- $self->{'pristine_headers'} .= $current;
- }
+ # Go through all the header fields of the message
+ my $header;
+ for (;;) {
+ # make sure not to lose the last header field when there is no body
+ my $eof = !@message;
+ my $current = $eof ? "\n" : shift @message;
- # NB: Really need to figure out special folding rules here!
if ( $current =~ /^[ \t]/ ) {
# This wasn't useful in terms of a rule, but we may want to treat it
# specially at some point. Perhaps ignore it?
@@ -204,14 +204,13 @@
# $self->{'obsolete_folding_whitespace'} = 1;
#}
- # append continuations if there's a header in process
- if ($header) {
- $header .= $current;
- }
+ $header = '' if !defined $header; # header starts with a continuation!?
+ $header .= $current; # append continuations, no matter what
+ $self->{'pristine_headers'} .= $current;
}
- else {
+ else { # not a continuation
# Ok, there's a header here, let's go ahead and add it in.
- if ($header) {
+ if (defined $header) { # deal with a previous header field
my ($key, $value) = split (/:/s, $header, 2);
# If it's not a valid header (aka: not in the form "foo: bar"), skip it.
@@ -229,31 +228,26 @@
}
}
- # not a continuation...
- $header = $current;
- }
-
- if ($header) {
- if ($header =~ /^\r?$/) {
+ if ($current =~ /^\r?$/) { # a regular end of header section
+ $self->{'pristine_headers'} .= $current if !$eof;
last;
- }
- else {
- # Check for missing head/body separator
+
+ } elsif ($current !~ /^[\041-\071\073-\176]+[ \t]*:/ ||
+ $current =~ /^--/) { # mime boundary
+ # obsolete header field syntax allowed WSP before a colon;
+ # Check for missing head/body separator
# RFC 2822, s2.2:
# A field name MUST be composed of printable US-ASCII characters
- # (i.e., characters that have values between 33 (041) and 126 (176), inclusive),
- # except colon (072).
- # FOR THIS NEXT PART: list off the valid REs for what can be next:
- # Header, header continuation, blank line
- if (!@message || $message[0] !~ /^(?:[\041-\071\073-\176]+:|[ \t]|\r?$)/ || $message[0] =~ /^--/) {
- # No body or no separator before mime boundary is invalid
- $self->{'missing_head_body_separator'} = 1;
-
- # we *have* to go back through again to make sure we catch the last
- # header, so fake a separator and loop again.
- unshift(@message, "\n");
- }
+ # (i.e., characters that have values between 33 (041) and 126 (176),
+ # inclusive), except colon (072).
+
+ $self->{'missing_head_body_separator'} = 1;
+ last;
}
+
+ # start collecting a new header field
+ $header = $current;
+ $self->{'pristine_headers'} .= $current;
}
}
undef $header;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=689129&r1=689128&r2=689129&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Tue Aug 26 09:28:52 2008
@@ -98,7 +98,7 @@
my ($self, $re, $onlyleaves, $recursive) = @_;
# Didn't pass an RE? Just abort.
- return () unless $re;
+ return () unless defined $re && $re ne '';
$onlyleaves = 0 unless defined $onlyleaves;
@@ -152,7 +152,7 @@
my $self = shift;
my $rawkey = shift;
- return unless ( defined $rawkey );
+ return unless defined $rawkey;
# we're going to do things case insensitively
my $key = lc($rawkey);
@@ -581,7 +581,7 @@
sub _decode_header {
my($self, $header) = @_;
- return '' unless $header;
+ return '' unless defined $header && $header ne '';
# deal with folding and cream the newlines and such
$header =~ s/\n[ \t]+/\n /g;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=689129&r1=689128&r2=689129&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue Aug 26 09:28:52 2008
@@ -1546,16 +1546,13 @@
}
# a conventional header
else {
- if ($getraw) {
- $result = join('', $self->{msg}->raw_header($request));
- } else {
- $result = join('', $self->{msg}->get_header($request));
- }
-
- # metadata
- if (!$result) {
+ my @results = $getraw ? $self->{msg}->raw_header($request)
+ : $self->{msg}->get_header($request);
+ if (@results) {
+ $result = join('', @results);
+ } else { # metadata
$result = $self->{msg}->get_metadata($request);
- $result = undef if !$result;
+ # undef $result if defined $result && $result eq ''; # needed?
}
}
@@ -1620,7 +1617,7 @@
# if the requested header wasn't found, we should return either
# a default value as specified by the caller, or the blank string ''
- return $_[2] || '';
+ return defined $_[2] ? $_[2] : '';
}
###########################################################################