You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/02/29 23:12:05 UTC
svn commit: rev 6936 - in incubator/spamassassin/trunk: . lib/Mail lib/Mail/SpamAssassin lib/Mail/SpamAssassin/Message masses spamd t
Author: felicity
Date: Sun Feb 29 14:12:03 2004
New Revision: 6936
Added:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata.pm
- copied, changed from rev 6935, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgMetadata.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
- copied, changed from rev 6935, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgNode.pm
Removed:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgMetadata.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgNode.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm
Modified:
incubator/spamassassin/trunk/MANIFEST
incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm
incubator/spamassassin/trunk/masses/mass-check
incubator/spamassassin/trunk/spamassassin.raw
incubator/spamassassin/trunk/spamd/spamd.raw
incubator/spamassassin/trunk/t/mimeparse.t
Log:
doing a bunch of code touchups... Made MsgNode and MsgMetadata Message::Node and Message::Metadata respectively. merged MsgParser into Message. made M::SA::parse() just call Message::new(). added/reworked some docuementation. found a few places where Message->finish() wasn't being called. etc.
Modified: incubator/spamassassin/trunk/MANIFEST
==============================================================================
--- incubator/spamassassin/trunk/MANIFEST (original)
+++ incubator/spamassassin/trunk/MANIFEST Sun Feb 29 14:12:03 2004
@@ -49,9 +49,8 @@
lib/Mail/SpamAssassin/Locker.pm
lib/Mail/SpamAssassin/MailingList.pm
lib/Mail/SpamAssassin/Message.pm
-lib/Mail/SpamAssassin/MsgNode.pm
-lib/Mail/SpamAssassin/MsgMetadata.pm
-lib/Mail/SpamAssassin/MsgParser.pm
+lib/Mail/SpamAssassin/Message/Node.pm
+lib/Mail/SpamAssassin/Message/Metadata.pm
lib/Mail/SpamAssassin/NetSet.pm
lib/Mail/SpamAssassin/PerMsgLearner.pm
lib/Mail/SpamAssassin/PerMsgStatus.pm
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm Sun Feb 29 14:12:03 2004
@@ -70,7 +70,6 @@
use Mail::SpamAssassin::ConfSourceLDAP;
use Mail::SpamAssassin::PerMsgStatus;
use Mail::SpamAssassin::Message;
-use Mail::SpamAssassin::MsgParser;
use Mail::SpamAssassin::Bayes;
use Mail::SpamAssassin::PluginHandler;
@@ -302,100 +301,32 @@
###########################################################################
-=item parse()
+=item parse($message, $parse_now)
-Parse will return a Mail::SpamAssassin::Message object. To use it,
-simply call C<Mail::SpamAssassin->parse($msg)>, where $msg is either undef
-(will use STDIN), a scalar of the entire message, an array reference
-of the message with 1 line per array element, or a file glob with the
-entire contents of the message.
-
-This function will return a base Message object with just the headers
-being parsed. M::SA::Message->find_parts() will end up doing a
-full recursive mime parse of the message as necessary. That procedure is
-recursive and ends up generating a tree of M::SA::MsgNode objects.
-parse() will generate the parent node of the tree, then pass the body of
-the message to M::SA::MsgParser->parse_body() which begins the recursive
-process.
+Parse will return a Mail::SpamAssassin::Message object with just the
+headers parsed. When calling this function, there are two optional
+parameters that can be passed in: $message is either undef (which will
+use STDIN), a scalar of the entire message, an array reference of the
+message with 1 line per array element, or a file glob which holds the
+entire contents of the message; and $parse_now, which specifies whether
+or not to create the MIME tree at parse time or later as necessary.
+
+The I<$parse_now> option, by default, is set to false (0).
+This allows SpamAssassin to not have to generate the tree of
+Mail::SpamAssassin::Message::Node objects and their related data if the
+tree is not going to be used. This is handy, for instance, when running
+C<spamassassin -d>, which only needs the pristine header and body which
+is always parsed and stored by this function.
=cut
-# NOTE: This function is allowed (in bad OO form) to modify the
-# Message object directly as Message doesn't really have a
-# constructor in the traditional OO way of things.
-
sub parse {
- my($self, $message) = @_;
- $message ||= \*STDIN;
-
- # protect it from abuse ...
- local $_;
-
- # Figure out how the message was passed to us, and deal with it.
- my @message;
- if (ref $message eq 'ARRAY') {
- @message = @{$message};
- }
- elsif (ref $message eq 'GLOB') {
- if (defined fileno $message) {
- @message = <$message>;
- }
- }
- else {
- @message = split ( /^/m, $message );
- }
-
- # Generate the main object and parse the appropriate MIME-related headers into it.
- my $msg = Mail::SpamAssassin::Message->new();
- my $header = '';
-
- # Go through all the headers of the message
- while ( my $last = shift @message ) {
- if ( $last =~ /^From\s/ ) {
- $msg->{'mbox_sep'} = $last;
- next;
- }
-
- # Store the non-modified headers in a scalar
- $msg->{'pristine_headers'} .= $last;
-
- # NB: Really need to figure out special folding rules here!
- if ( $last =~ /^[ \t]+/ ) { # if its a continuation
- $header .= $last; # fold continuations
- next;
- }
-
- # Ok, there's a header here, let's go ahead and add it in.
- if ($header) {
- my ( $key, $value ) = split ( /:\s*/, $header, 2 );
- $msg->header( $key, $value );
- }
-
- # not a continuation...
- $header = $last;
-
- # Ok, we found the header/body blank line ...
- last if ( $last =~ /^\r?$/m );
- }
-
- # Store the pristine body for later -- store as a copy since @message
- # will get modified below
- $msg->{'pristine_body'} = join('', @message);
-
- # CRLF -> LF
- for ( @message ) {
- s/\r\n/\n/;
- }
-
- # If the message does need to get parsed, save off a copy of the body
- # in a format we can easily parse later so we don't have to rip from
- # pristine_body ...
- #
- $msg->{'toparse'} = \@message;
-
+ my($self, $message, $parsenow) = @_;
+ my $msg = Mail::SpamAssassin::Message->new({message=>$message, parsenow=>$parsenow});
return $msg;
}
+
###########################################################################
=item $f->trim_rules ($regexp)
@@ -490,7 +421,6 @@
$self->init(1);
my $msg = Mail::SpamAssassin::PerMsgStatus->new($self, $mail_obj);
- # Message-Id is used for a filename on disk, so we can't have '/' in it.
$msg->check();
$msg;
}
@@ -687,23 +617,6 @@
###########################################################################
-=item $status = $f->check_message_text ($mailtext)
-
-Check a mail, encapsulated in a plain string, to determine if it is spam or
-not.
-
-Otherwise identical to C<$f->check()> above.
-
-=cut
-
-sub check_message_text {
- my $self = shift;
- my $mail_obj = $self->parse (shift);
- return $self->check ($mail_obj);
-}
-
-###########################################################################
-
=item $f->report_as_spam ($mail, $options)
Report a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as human-verified spam.
@@ -1197,12 +1110,13 @@
dbg ("ignore: test message to precompile patterns and load modules");
$self->init($use_user_prefs);
- my $mail = $self->parse(\@testmsg);
+ my $mail = $self->parse(\@testmsg, 1);
my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
{ disable_auto_learning => 1 } );
$status->word_is_in_dictionary("aba"); # load triplets.txt into memory
$status->check();
$status->finish();
+ $mail->finish();
# load SQL modules now as well
my $dsn = $self->{conf}->{user_scores_dsn};
@@ -1243,13 +1157,14 @@
$self->init(1);
$self->{syntax_errors} += $self->{conf}->{errors};
- my $mail = $self->parse(\@testmsg);
+ my $mail = $self->parse(\@testmsg, 1);
my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
{ disable_auto_learning => 1 } );
$status->check();
$self->{syntax_errors} += $status->{rule_errors};
$status->finish();
+ $mail->finish();
return ($self->{syntax_errors});
}
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm Sun Feb 29 14:12:03 2004
@@ -29,6 +29,7 @@
$IPV4_ADDRESS $IP_ADDRESS $IP_IN_RESERVED_RANGE $LOCALHOST
);
+
# ---------------------------------------------------------------------------
# Initialize a regexp for reserved IPs, i.e. ones that could be
# used inside a company and be the first or second relay hit by
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Sun Feb 29 14:12:03 2004
@@ -31,51 +31,135 @@
=cut
-# the message structure is now:
+# the message structure, after initiating a parse() cycle, is now:
#
-# Message object, also top-level node in MsgNode tree
+# Message object, also top-level node in Message::Node tree
# |
-# +---> MsgNode for other parts in MIME structure
-# | |---> [ more MsgNode parts ... ]
+# +---> Message::Node for other parts in MIME structure
+# | |---> [ more Message::Node parts ... ]
# | [ others ... ]
# |
-# +---> MsgMetadata object to hold metadata
+# +---> Message::Metadata object to hold metadata
package Mail::SpamAssassin::Message;
use strict;
use Mail::SpamAssassin;
-use Mail::SpamAssassin::MsgNode;
-use Mail::SpamAssassin::MsgMetadata;
+use Mail::SpamAssassin::Message::Node;
+use Mail::SpamAssassin::Message::Metadata;
use vars qw(@ISA);
-@ISA = qw(Mail::SpamAssassin::MsgNode);
+@ISA = qw(Mail::SpamAssassin::Message::Node);
+
+use constant MAX_BODY_LINE_LENGTH => 2048;
# ---------------------------------------------------------------------------
=item new()
+Creates a Mail::SpamAssassin::Message object. Takes a hash reference
+as a parameter. The used hash key/value pairs are as follows:
+
+C<message> is either undef (which will use STDIN), a scalar of the
+entire message, an array reference of the message with 1 line per array
+element, or a file glob which holds the entire contents of the message.
+
+C<parse_now> specifies whether or not to create the MIME tree
+at object-creation time or later as necessary.
+
+The I<parse_now> option, by default, is set to false (0).
+This allows SpamAssassin to not have to generate the tree of
+Mail::SpamAssassin::Message::Node objects and their related data if the
+tree is not going to be used. This is handy, for instance, when running
+C<spamassassin -d>, which only needs the pristine header and body which
+is always handled when the object is created.
+
=cut
sub new {
my $class = shift;
$class = ref($class) || $class;
-# my %opts = @_;
-# my $self = $class->SUPER::new(%opts);
my $self = $class->SUPER::new();
$self->{pristine_headers} = '';
$self->{pristine_body} = '';
-# # allow callers to set certain options ...
-# foreach ( 'already_parsed' ) {
-# $self->{$_} = $opts{$_} if ( exists $opts{$_} );
-# }
-
bless($self,$class);
# create the metadata holder class
- $self->{metadata} = Mail::SpamAssassin::MsgMetadata->new($self);
+ $self->{metadata} = Mail::SpamAssassin::Message::Metadata->new($self);
+
+ # Ok, go ahead and do the message "parsing"
+ my($opts) = @_;
+ my $message = $opts->{'message'} || \*STDIN;
+ my $parsenow = $opts->{'parsenow'} || 0;
+
+ # protect it from abuse ...
+ local $_;
+
+ # Figure out how the message was passed to us, and deal with it.
+ my @message;
+ if (ref $message eq 'ARRAY') {
+ @message = @{$message};
+ }
+ elsif (ref $message eq 'GLOB') {
+ if (defined fileno $message) {
+ @message = <$message>;
+ }
+ }
+ else {
+ @message = split ( /^/m, $message );
+ }
+
+ # Go through all the headers of the message
+ my $header = '';
+ while ( my $last = shift @message ) {
+ if ( $last =~ /^From\s/ ) {
+ $self->{'mbox_sep'} = $last;
+ next;
+ }
+
+ # Store the non-modified headers in a scalar
+ $self->{'pristine_headers'} .= $last;
+
+ # NB: Really need to figure out special folding rules here!
+ if ( $last =~ /^[ \t]+/ ) { # if its a continuation
+ $header .= $last; # fold continuations
+ next;
+ }
+
+ # Ok, there's a header here, let's go ahead and add it in.
+ if ($header) {
+ my ( $key, $value ) = split ( /:\s*/, $header, 2 );
+ $self->header( $key, $value );
+ }
+
+ # not a continuation...
+ $header = $last;
+
+ # Ok, we found the header/body blank line ...
+ last if ( $last =~ /^\r?$/m );
+ }
+
+ # Store the pristine body for later -- store as a copy since @message
+ # will get modified below
+ $self->{'pristine_body'} = join('', @message);
+
+ # CRLF -> LF
+ for ( @message ) {
+ s/\r\n/\n/;
+ }
+
+ # If the message does need to get parsed, save off a copy of the body
+ # in a format we can easily parse later so we don't have to rip from
+ # pristine_body ... If we do want to parse now, go ahead and do so ...
+ #
+ if ($parsenow) {
+ $self->_do_parse(\@message);
+ }
+ else {
+ $self->{'toparse'} = \@message;
+ }
$self;
}
@@ -91,12 +175,21 @@
=cut
sub _do_parse {
- my($self) = @_;
+ my($self, $array) = @_;
+
+ # We can either be passed the array to parse, or we may have find it
+ # in the object data ...
+ my $toparse;
+ if (defined $array) {
+ $toparse = $array;
+ }
+ elsif (exists $self->{'toparse'}) {
+ $toparse = $self->{'toparse'};
+ delete $self->{'toparse'};
+ }
# If we're called when we don't need to be, then just go ahead and return.
- return if (!exists $self->{'toparse'});
- my $toparse = $self->{'toparse'};
- delete $self->{'toparse'};
+ return if (!defined $toparse);
dbg("---- MIME PARSER START ----");
@@ -106,13 +199,16 @@
dbg("main message type: ".$self->{'type'});
# Make the tree
- Mail::SpamAssassin::MsgParser->parse_body( $self, $self, $boundary, $toparse, 1 );
+ $self->parse_body( $self, $self, $boundary, $toparse, 1 );
dbg("---- MIME PARSER END ----");
}
=item find_parts()
+Used to search the tree for specific MIME parts. See
+I<Mail::SpamAssassin::Message::Node> for more details.
+
=cut
# Used to find any MIME parts whose simple content-type matches a given regexp
@@ -125,7 +221,7 @@
# ok, we need to do the parsing now...
$self->_do_parse() if (exists $self->{'toparse'});
- # and pass through to the MsgNode version of the method
+ # and pass through to the Message::Node version of the method
return $self->SUPER::find_parts($re, $onlyleaves, $recursive);
}
@@ -133,6 +229,17 @@
=item get_pristine_header()
+Returns pristine headers of the message. If no specific header name
+is given as a parameter (case-insensitive), then all headers will
+be returned. If called in an array context, an array will be returned
+with each header (specific or all) in a different element. In a scalar
+context, either all of the headers are returned as a scalar, or the last
+specific header is returned.
+
+ie: If 'Subject' is specified as the header, and there are 2 Subject
+headers in a message, the last/bottom one in the message is returned in
+scalar context or both are returned in array context.
+
=cut
sub get_pristine_header {
@@ -148,12 +255,21 @@
}
}
+=item get_mbox_seperator()
+
+Returns the mbox seperator found in the message, or undef if there
+wasn't one.
+
+=cut
+
sub get_mbox_seperator {
return $_[0]->{mbox_sep};
}
=item get_body()
+Returns an array of the pristine message body, one line per array element.
+
=cut
sub get_body {
@@ -166,6 +282,8 @@
=item get_pristine()
+Returns a scalar of the entire pristine message.
+
=cut
sub get_pristine {
@@ -175,6 +293,8 @@
=item get_pristine_body()
+Returns a scalar of the pristine message body.
+
=cut
sub get_pristine_body {
@@ -183,6 +303,206 @@
}
# ---------------------------------------------------------------------------
+
+=head1 PARSING METHODS, NON-PUBLIC
+
+These methods take a RFC2822-esque formatted message and create a tree
+with all of the MIME body parts included. Those parts will be decoded
+as necessary, and text/html parts will be rendered into a standard text
+format, suitable for use in SpamAssassin.
+
+=item parse_body()
+
+parse_body() passes the body part that was passed in onto the
+correct part parser, either _parse_multipart() for multipart/* parts,
+or _parse_normal() for everything else. Multipart sections become the
+root of sub-trees, while everything else becomes a leaf in the tree.
+
+For multipart messages, the first call to parse_body() doesn't create a
+new sub-tree and just uses the parent node to contain children. All other
+calls to parse_body() will cause a new sub-tree root to be created and
+children will exist underneath that root. (this is just so the tree
+doesn't have a root node which points at the actual root node ...)
+
+=cut
+
+sub parse_body {
+ my($self, $msg, $_msg, $boundary, $body, $initial) = @_;
+
+ # Figure out the simple content-type, or set it to text/plain
+ my $type = $_msg->header('Content-Type') || 'text/plain; charset=us-ascii';
+
+ # multipart sections are required to have a boundary set ... If this
+ # one doesn't, assume it's malformed and send it to be parsed as a
+ # non-multipart section
+ #
+ if ( $type =~ /^multipart\//i && defined $boundary ) {
+ # Treat an initial multipart parse differently. This will keep the tree:
+ # obj(multipart->[ part1, part2 ]) instead of
+ # obj(obj(multipart ...))
+ #
+ if ( $initial ) {
+ $self->_parse_multipart( $msg, $_msg, $boundary, $body );
+ }
+ else {
+ $self->_parse_multipart( $_msg, $_msg, $boundary, $body );
+ $msg->add_body_part( $_msg );
+ }
+ }
+ else {
+ # If it's not multipart, go ahead and just deal with it.
+ $self->_parse_normal( $msg, $_msg, $boundary, $body );
+ }
+}
+
+=item _parse_multipart()
+
+Generate a root node, and for each child part call parse_body()
+to generate the tree.
+
+=cut
+
+sub _parse_multipart {
+ my($self, $msg, $_msg, $boundary, $body) = @_;
+
+ dbg("parsing multipart, got boundary: ".(defined $boundary ? $boundary : ''));
+
+ # ignore preamble per RFC 1521, unless there's no boundary ...
+ if ( defined $boundary ) {
+ my $line;
+ my $tmp_line = @{$body};
+ for ($line=0; $line < $tmp_line; $line++) {
+ last if $body->[$line] =~ /^\-\-\Q$boundary\E$/;
+ }
+
+ # Found a boundary, ignore the preamble
+ if ( $line < $tmp_line ) {
+ splice @{$body}, 0, $line+1;
+ }
+
+ # Else, there's no boundary, so leave the whole part...
+ }
+
+ my $part_msg = Mail::SpamAssassin::Message::Node->new(); # prepare a new tree node
+ my $in_body = 0;
+ my $header;
+ my $part_array;
+
+ my $line_count = @{$body};
+ foreach ( @{$body} ) {
+ # if we're on the last body line, or we find a boundary marker, deal with the mime part
+ if ( --$line_count == 0 || (defined $boundary && /^\-\-\Q$boundary\E/) ) {
+ my $line = $_; # remember the last line
+
+ # per rfc 1521, the CRLF before the boundary is part of the boundary:
+ # NOTE: The CRLF preceding the encapsulation line is conceptually
+ # attached to the boundary so that it is possible to have a part
+ # that does not end with a CRLF (line break). Body parts that must
+ # be considered to end with line breaks, therefore, must have two
+ # CRLFs preceding the encapsulation line, the first of which is part
+ # of the preceding body part, and the second of which is part of the
+ # encapsulation boundary.
+ if ($part_array) {
+ chomp( $part_array->[-1] ); # trim the CRLF that's part of the boundary
+ splice @{$part_array}, -1 if ( $part_array->[-1] eq '' ); # blank line for the boundary only ...
+
+ my($p_boundary);
+ ($part_msg->{'type'}, $p_boundary) = Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
+ $p_boundary ||= $boundary;
+ dbg("found part of type ".$part_msg->{'type'}.", boundary: ".(defined $p_boundary ? $p_boundary : ''));
+ $self->parse_body( $msg, $part_msg, $p_boundary, $part_array, 0 );
+ }
+
+ last if (defined $boundary && $line =~ /^\-\-\Q${boundary}\E\-\-$/);
+
+ # make sure we start with a new clean node
+ $in_body = 0;
+ $part_msg = Mail::SpamAssassin::Message::Node->new();
+ undef $part_array;
+ undef $header;
+
+ next;
+ }
+
+ if ($in_body) {
+ # we run into a perl bug if the lines are astronomically long (probably due
+ # to lots of regexp backtracking); so cut short any individual line over
+ # MAX_BODY_LINE_LENGTH bytes in length. This can wreck HTML totally -- but
+ # IMHO the only reason a luser would use MAX_BODY_LINE_LENGTH-byte lines is
+ # to crash filters, anyway.
+ while (length ($_) > MAX_BODY_LINE_LENGTH) {
+ push (@{$part_array}, substr($_, 0, MAX_BODY_LINE_LENGTH)."\n");
+ substr($_, 0, MAX_BODY_LINE_LENGTH) = '';
+ }
+ push ( @{$part_array}, $_ );
+ }
+ else {
+ s/\s+$//;
+ if (m/^\S/) {
+ if ($header) {
+ my ( $key, $value ) = split ( /:\s*/, $header, 2 );
+ $part_msg->header( $key, $value );
+ }
+ $header = $_;
+ }
+ elsif (/^$/) {
+ if ($header) {
+ my ( $key, $value ) = split ( /:\s*/, $header, 2 );
+ $part_msg->header( $key, $value );
+ }
+ $in_body = 1;
+ }
+ else {
+ $_ =~ s/^\s*//;
+ $header .= $_;
+ }
+ }
+ }
+
+}
+
+=item _parse_normal()
+
+Generate a leaf node and add it to the parent.
+
+=cut
+
+sub _parse_normal {
+ my ($self, $msg, $part_msg, $boundary, $body) = @_;
+
+ dbg("parsing normal part");
+
+ $part_msg->{'type'} =
+ Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
+
+ # multipart sections are required to have a boundary set ... If this
+ # one doesn't, assume it's malformed and revert to text/plain
+ $part_msg->{'type'} = 'text/plain' if ( $part_msg->{'type'} =~ /^multipart\//i && !defined $boundary );
+
+ # attempt to figure out a name for this attachment if there is one ...
+ my $disp = $part_msg->header('content-disposition') || '';
+ my($filename) = $disp =~ /name="?([^\";]+)"?/i || $part_msg->{'type'} =~ /name="?([^\";]+)"?/i;
+
+ $part_msg->{'raw'} = $body;
+ $part_msg->{'boundary'} = $boundary;
+ $part_msg->{'name'} = $filename if $filename;
+
+ $msg->add_body_part($part_msg);
+
+ # now that we've added the leaf node, let's go ahead and kill
+ # body_parts (used for sub-trees). it could end up being recursive,
+ # and well, let's avoid that. ;)
+ #
+ # BTW: please leave this after add_body_parts() since it'll add it back.
+ #
+ delete $part_msg->{body_parts};
+}
+
+# ---------------------------------------------------------------------------
+
+=item $str = get_metadata($hdr)
+
+=cut
sub extract_message_metadata {
my ($self, $main) = @_;
Copied: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata.pm (from rev 6935, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgMetadata.pm)
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgMetadata.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata.pm Sun Feb 29 14:12:03 2004
@@ -18,7 +18,7 @@
=head1 NAME
-Mail::SpamAssassin::MsgMetadata - extract metadata from a message
+Mail::SpamAssassin::Message::Metadata - extract metadata from a message
=head1 SYNOPSIS
@@ -47,7 +47,7 @@
=cut
-package Mail::SpamAssassin::MsgMetadata;
+package Mail::SpamAssassin::Message::Metadata;
use strict;
use bytes;
Copied: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (from rev 6935, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgNode.pm)
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgNode.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Sun Feb 29 14:12:03 2004
@@ -18,7 +18,7 @@
=head1 NAME
-Mail::SpamAssassin::MsgNode - decode, render, and make available MIME message parts
+Mail::SpamAssassin::Message::Node - decode, render, and make available MIME message parts
=head1 SYNOPSIS
@@ -33,7 +33,7 @@
=cut
-package Mail::SpamAssassin::MsgNode;
+package Mail::SpamAssassin::Message::Node;
use strict;
use Mail::SpamAssassin;
use Mail::SpamAssassin::HTML;
@@ -47,7 +47,6 @@
sub new {
my $class = shift;
$class = ref($class) || $class;
- my %opts = @_; # unused currently in MsgNode, used in Message
my $self = {
headers => {},
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm Sun Feb 29 14:12:03 2004
@@ -41,7 +41,7 @@
package Mail::SpamAssassin::Received;
1;
-package Mail::SpamAssassin::MsgMetadata;
+package Mail::SpamAssassin::Message::Metadata;
use strict;
use bytes;
Modified: incubator/spamassassin/trunk/masses/mass-check
==============================================================================
--- incubator/spamassassin/trunk/masses/mass-check (original)
+++ incubator/spamassassin/trunk/masses/mass-check Sun Feb 29 14:12:03 2004
@@ -247,7 +247,7 @@
# remove SpamAssassin markup, if present and the mail was spam
$_ = $ma->get_header ("X-Spam-Status");
if (defined($_) && /^Yes,/) {
- my $new_ma = $spamtest->parse ($spamtest->remove_spamassassin_markup($ma));
+ my $new_ma = $spamtest->parse ($spamtest->remove_spamassassin_markup($ma), 1);
$ma->finish();
$ma = $new_ma;
}
Modified: incubator/spamassassin/trunk/spamassassin.raw
==============================================================================
--- incubator/spamassassin/trunk/spamassassin.raw (original)
+++ incubator/spamassassin/trunk/spamassassin.raw Sun Feb 29 14:12:03 2004
@@ -193,9 +193,8 @@
# Make sure the message is clean first ...
my $new_mail = $spamtest->parse ($spamtest->remove_spamassassin_markup ($mail));
$mail->finish();
- $mail = $new_mail;
- if ( $spamtest->report_as_spam ($mail) ) {
+ if ( $spamtest->report_as_spam ($new_mail) ) {
warn "Warning, unable to report spam\nFor more information, re-run with -D option to see debug output.\n";
}
exit(0);
@@ -206,9 +205,8 @@
# Make sure the message is clean first ...
my $new_mail = $spamtest->parse ($spamtest->remove_spamassassin_markup ($mail));
$mail->finish();
- $mail = $new_mail;
- if ( $spamtest->revoke_as_spam ($mail) ) {
+ if ( $spamtest->revoke_as_spam ($new_mail) ) {
warn "Warning, unable to revoke spam\nFor more information, re-run with -D option to see debug output.\n";
}
exit(0);
Modified: incubator/spamassassin/trunk/spamd/spamd.raw
==============================================================================
--- incubator/spamassassin/trunk/spamd/spamd.raw (original)
+++ incubator/spamassassin/trunk/spamd/spamd.raw Sun Feb 29 14:12:03 2004
@@ -826,15 +826,16 @@
"."
);
- my $mail = $spamtest->parse (\@msglines);
-
# Check length if we're supposed to
if($expected_length && ($actual_length != $expected_length)) {
protocol_error ("(Content-Length mismatch: Expected $expected_length bytes, got $actual_length bytes)");
return 1;
}
- # Now use copy-on-writed (hopefully) SA object
+ # Parse the input message
+ my $mail = $spamtest->parse (\@msglines, 1);
+
+ # Go ahead and check the message
my $status = $spamtest->check($mail);
my $msg_score = sprintf("%.1f",$status->get_score);
Modified: incubator/spamassassin/trunk/t/mimeparse.t
==============================================================================
--- incubator/spamassassin/trunk/t/mimeparse.t (original)
+++ incubator/spamassassin/trunk/t/mimeparse.t Sun Feb 29 14:12:03 2004
@@ -88,11 +88,8 @@
foreach my $k ( sort keys %files ) {
open(INP, $k) || die "Can't find $k:$!";
- my $mail = Mail::SpamAssassin->parse(\*INP);
+ my $mail = Mail::SpamAssassin->parse(\*INP, 1);
close(INP);
-
- # We know it's not parsed, so deal with it!
- $mail->_do_parse();
my $res = join("\n",$mail->content_summary());
#print "---\n$res\n---\n";