You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/02/29 23:12:05 UTC

svn commit: rev 6936 - in incubator/spamassassin/trunk: . lib/Mail lib/Mail/SpamAssassin lib/Mail/SpamAssassin/Message masses spamd t

Author: felicity
Date: Sun Feb 29 14:12:03 2004
New Revision: 6936

Added:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata.pm
      - copied, changed from rev 6935, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgMetadata.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
      - copied, changed from rev 6935, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgNode.pm
Removed:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgMetadata.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgNode.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgParser.pm
Modified:
   incubator/spamassassin/trunk/MANIFEST
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm
   incubator/spamassassin/trunk/masses/mass-check
   incubator/spamassassin/trunk/spamassassin.raw
   incubator/spamassassin/trunk/spamd/spamd.raw
   incubator/spamassassin/trunk/t/mimeparse.t
Log:
doing a bunch of code touchups...  Made MsgNode and MsgMetadata Message::Node and Message::Metadata respectively.  merged MsgParser into Message.  made M::SA::parse() just call Message::new().  added/reworked some docuementation.  found a few places where Message->finish() wasn't being called.  etc.

Modified: incubator/spamassassin/trunk/MANIFEST
==============================================================================
--- incubator/spamassassin/trunk/MANIFEST	(original)
+++ incubator/spamassassin/trunk/MANIFEST	Sun Feb 29 14:12:03 2004
@@ -49,9 +49,8 @@
 lib/Mail/SpamAssassin/Locker.pm
 lib/Mail/SpamAssassin/MailingList.pm
 lib/Mail/SpamAssassin/Message.pm
-lib/Mail/SpamAssassin/MsgNode.pm
-lib/Mail/SpamAssassin/MsgMetadata.pm
-lib/Mail/SpamAssassin/MsgParser.pm
+lib/Mail/SpamAssassin/Message/Node.pm
+lib/Mail/SpamAssassin/Message/Metadata.pm
 lib/Mail/SpamAssassin/NetSet.pm
 lib/Mail/SpamAssassin/PerMsgLearner.pm
 lib/Mail/SpamAssassin/PerMsgStatus.pm

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin.pm	Sun Feb 29 14:12:03 2004
@@ -70,7 +70,6 @@
 use Mail::SpamAssassin::ConfSourceLDAP;
 use Mail::SpamAssassin::PerMsgStatus;
 use Mail::SpamAssassin::Message;
-use Mail::SpamAssassin::MsgParser;
 use Mail::SpamAssassin::Bayes;
 use Mail::SpamAssassin::PluginHandler;
 
@@ -302,100 +301,32 @@
 
 ###########################################################################
 
-=item parse()
+=item parse($message, $parse_now)
 
-Parse will return a Mail::SpamAssassin::Message object.  To use it,
-simply call C<Mail::SpamAssassin->parse($msg)>, where $msg is either undef
-(will use STDIN), a scalar of the entire message, an array reference
-of the message with 1 line per array element, or a file glob with the
-entire contents of the message.
-
-This function will return a base Message object with just the headers
-being parsed.  M::SA::Message->find_parts() will end up doing a
-full recursive mime parse of the message as necessary.  That procedure is
-recursive and ends up generating a tree of M::SA::MsgNode objects.
-parse() will generate the parent node of the tree, then pass the body of
-the message to M::SA::MsgParser->parse_body() which begins the recursive
-process.
+Parse will return a Mail::SpamAssassin::Message object with just the
+headers parsed.  When calling this function, there are two optional
+parameters that can be passed in: $message is either undef (which will
+use STDIN), a scalar of the entire message, an array reference of the
+message with 1 line per array element, or a file glob which holds the
+entire contents of the message; and $parse_now, which specifies whether
+or not to create the MIME tree at parse time or later as necessary.
+
+The I<$parse_now> option, by default, is set to false (0).
+This allows SpamAssassin to not have to generate the tree of
+Mail::SpamAssassin::Message::Node objects and their related data if the
+tree is not going to be used.  This is handy, for instance, when running
+C<spamassassin -d>, which only needs the pristine header and body which
+is always parsed and stored by this function.
 
 =cut
 
-# NOTE: This function is allowed (in bad OO form) to modify the
-# Message object directly as Message doesn't really have a
-# constructor in the traditional OO way of things.
-
 sub parse {
-  my($self, $message) = @_;
-  $message ||= \*STDIN;
-
-  # protect it from abuse ...
-  local $_;
-
-  # Figure out how the message was passed to us, and deal with it.
-  my @message;
-  if (ref $message eq 'ARRAY') {
-     @message = @{$message};
-  }
-  elsif (ref $message eq 'GLOB') {
-    if (defined fileno $message) {
-      @message = <$message>;
-    }
-  }
-  else {
-    @message = split ( /^/m, $message );
-  }
-
-  # Generate the main object and parse the appropriate MIME-related headers into it.
-  my $msg = Mail::SpamAssassin::Message->new();
-  my $header = '';
-
-  # Go through all the headers of the message
-  while ( my $last = shift @message ) {
-    if ( $last =~ /^From\s/ ) {
-      $msg->{'mbox_sep'} = $last;
-      next;
-    }
-
-    # Store the non-modified headers in a scalar
-    $msg->{'pristine_headers'} .= $last;
-
-    # NB: Really need to figure out special folding rules here!
-    if ( $last =~ /^[ \t]+/ ) {                    # if its a continuation
-      $header .= $last;                            # fold continuations
-      next;
-    }
-
-    # Ok, there's a header here, let's go ahead and add it in.
-    if ($header) {
-      my ( $key, $value ) = split ( /:\s*/, $header, 2 );
-      $msg->header( $key, $value );
-    }
-
-    # not a continuation...
-    $header = $last;
-
-    # Ok, we found the header/body blank line ...
-    last if ( $last =~ /^\r?$/m );
-  }
-
-  # Store the pristine body for later -- store as a copy since @message
-  # will get modified below
-  $msg->{'pristine_body'} = join('', @message);
-
-  # CRLF -> LF
-  for ( @message ) {
-    s/\r\n/\n/;
-  }
-
-  # If the message does need to get parsed, save off a copy of the body
-  # in a format we can easily parse later so we don't have to rip from
-  # pristine_body ...
-  #
-  $msg->{'toparse'} = \@message;
-
+  my($self, $message, $parsenow) = @_;
+  my $msg = Mail::SpamAssassin::Message->new({message=>$message, parsenow=>$parsenow});
   return $msg;
 }
 
+
 ###########################################################################
 
 =item $f->trim_rules ($regexp)
@@ -490,7 +421,6 @@
 
   $self->init(1);
   my $msg = Mail::SpamAssassin::PerMsgStatus->new($self, $mail_obj);
-  # Message-Id is used for a filename on disk, so we can't have '/' in it.
   $msg->check();
   $msg;
 }
@@ -687,23 +617,6 @@
 
 ###########################################################################
 
-=item $status = $f->check_message_text ($mailtext)
-
-Check a mail, encapsulated in a plain string, to determine if it is spam or
-not.
-
-Otherwise identical to C<$f->check()> above.
-
-=cut
-
-sub check_message_text {
-  my $self = shift;
-  my $mail_obj = $self->parse (shift);
-  return $self->check ($mail_obj);
-}
-
-###########################################################################
-
 =item $f->report_as_spam ($mail, $options)
 
 Report a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as human-verified spam.
@@ -1197,12 +1110,13 @@
   dbg ("ignore: test message to precompile patterns and load modules");
   $self->init($use_user_prefs);
 
-  my $mail = $self->parse(\@testmsg);
+  my $mail = $self->parse(\@testmsg, 1);
   my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
                         { disable_auto_learning => 1 } );
   $status->word_is_in_dictionary("aba"); # load triplets.txt into memory
   $status->check();
   $status->finish();
+  $mail->finish();
 
   # load SQL modules now as well
   my $dsn = $self->{conf}->{user_scores_dsn};
@@ -1243,13 +1157,14 @@
   $self->init(1);
   $self->{syntax_errors} += $self->{conf}->{errors};
 
-  my $mail = $self->parse(\@testmsg);
+  my $mail = $self->parse(\@testmsg, 1);
   my $status = Mail::SpamAssassin::PerMsgStatus->new($self, $mail,
                         { disable_auto_learning => 1 } );
   $status->check();
 
   $self->{syntax_errors} += $status->{rule_errors};
   $status->finish();
+  $mail->finish();
 
   return ($self->{syntax_errors});
 }

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm	Sun Feb 29 14:12:03 2004
@@ -29,6 +29,7 @@
   $IPV4_ADDRESS $IP_ADDRESS $IP_IN_RESERVED_RANGE $LOCALHOST
 );
 
+
 # ---------------------------------------------------------------------------
 # Initialize a regexp for reserved IPs, i.e. ones that could be
 # used inside a company and be the first or second relay hit by

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm	Sun Feb 29 14:12:03 2004
@@ -31,51 +31,135 @@
 
 =cut
 
-# the message structure is now:
+# the message structure, after initiating a parse() cycle, is now:
 #
-# Message object, also top-level node in MsgNode tree
+# Message object, also top-level node in Message::Node tree
 #    |
-#    +---> MsgNode for other parts in MIME structure
-#    |       |---> [ more MsgNode parts ... ]
+#    +---> Message::Node for other parts in MIME structure
+#    |       |---> [ more Message::Node parts ... ]
 #    |       [ others ... ]
 #    |
-#    +---> MsgMetadata object to hold metadata
+#    +---> Message::Metadata object to hold metadata
 
 package Mail::SpamAssassin::Message;
 use strict;
 use Mail::SpamAssassin;
-use Mail::SpamAssassin::MsgNode;
-use Mail::SpamAssassin::MsgMetadata;
+use Mail::SpamAssassin::Message::Node;
+use Mail::SpamAssassin::Message::Metadata;
 
 use vars qw(@ISA);
 
-@ISA = qw(Mail::SpamAssassin::MsgNode);
+@ISA = qw(Mail::SpamAssassin::Message::Node);
+
+use constant MAX_BODY_LINE_LENGTH =>        2048;
 
 # ---------------------------------------------------------------------------
 
 =item new()
 
+Creates a Mail::SpamAssassin::Message object.  Takes a hash reference
+as a parameter.  The used hash key/value pairs are as follows:
+
+C<message> is either undef (which will use STDIN), a scalar of the
+entire message, an array reference of the message with 1 line per array
+element, or a file glob which holds the entire contents of the message.
+
+C<parse_now> specifies whether or not to create the MIME tree
+at object-creation time or later as necessary.
+
+The I<parse_now> option, by default, is set to false (0).
+This allows SpamAssassin to not have to generate the tree of
+Mail::SpamAssassin::Message::Node objects and their related data if the
+tree is not going to be used.  This is handy, for instance, when running
+C<spamassassin -d>, which only needs the pristine header and body which
+is always handled when the object is created.
+
 =cut
 
 sub new {
   my $class = shift;
   $class = ref($class) || $class;
-#  my %opts = @_;
-#  my $self = $class->SUPER::new(%opts);
   my $self = $class->SUPER::new();
 
   $self->{pristine_headers} =	'';
   $self->{pristine_body} =	'';
 
-#  # allow callers to set certain options ...
-#  foreach ( 'already_parsed' ) {
-#    $self->{$_} = $opts{$_} if ( exists $opts{$_} );
-#  }
-
   bless($self,$class);
 
   # create the metadata holder class
-  $self->{metadata} = Mail::SpamAssassin::MsgMetadata->new($self);
+  $self->{metadata} = Mail::SpamAssassin::Message::Metadata->new($self);
+
+  # Ok, go ahead and do the message "parsing"
+  my($opts) = @_;
+  my $message = $opts->{'message'} || \*STDIN;
+  my $parsenow = $opts->{'parsenow'} || 0;
+
+  # protect it from abuse ...
+  local $_;
+
+  # Figure out how the message was passed to us, and deal with it.
+  my @message;
+  if (ref $message eq 'ARRAY') {
+     @message = @{$message};
+  }
+  elsif (ref $message eq 'GLOB') {
+    if (defined fileno $message) {
+      @message = <$message>;
+    }
+  }
+  else {
+    @message = split ( /^/m, $message );
+  }
+
+  # Go through all the headers of the message
+  my $header = '';
+  while ( my $last = shift @message ) {
+    if ( $last =~ /^From\s/ ) {
+      $self->{'mbox_sep'} = $last;
+      next;
+    }
+
+    # Store the non-modified headers in a scalar
+    $self->{'pristine_headers'} .= $last;
+
+    # NB: Really need to figure out special folding rules here!
+    if ( $last =~ /^[ \t]+/ ) {                    # if its a continuation
+      $header .= $last;                            # fold continuations
+      next;
+    }
+
+    # Ok, there's a header here, let's go ahead and add it in.
+    if ($header) {
+      my ( $key, $value ) = split ( /:\s*/, $header, 2 );
+      $self->header( $key, $value );
+    }
+
+    # not a continuation...
+    $header = $last;
+
+    # Ok, we found the header/body blank line ...
+    last if ( $last =~ /^\r?$/m );
+  }
+
+  # Store the pristine body for later -- store as a copy since @message
+  # will get modified below
+  $self->{'pristine_body'} = join('', @message);
+
+  # CRLF -> LF
+  for ( @message ) {
+    s/\r\n/\n/;
+  }
+
+  # If the message does need to get parsed, save off a copy of the body
+  # in a format we can easily parse later so we don't have to rip from
+  # pristine_body ...  If we do want to parse now, go ahead and do so ...
+  #
+  if ($parsenow) {
+    $self->_do_parse(\@message);
+  }
+  else {
+    $self->{'toparse'} = \@message;
+  }
 
   $self;
 }
@@ -91,12 +175,21 @@
 =cut
 
 sub _do_parse {
-  my($self) = @_;
+  my($self, $array) = @_;
+
+  # We can either be passed the array to parse, or we may have find it
+  # in the object data ...
+  my $toparse;
+  if (defined $array) {
+    $toparse = $array;
+  }
+  elsif (exists $self->{'toparse'}) {
+    $toparse = $self->{'toparse'};
+    delete $self->{'toparse'};
+  }
 
   # If we're called when we don't need to be, then just go ahead and return.
-  return if (!exists $self->{'toparse'});
-  my $toparse = $self->{'toparse'};
-  delete $self->{'toparse'};
+  return if (!defined $toparse);
 
   dbg("---- MIME PARSER START ----");
 
@@ -106,13 +199,16 @@
   dbg("main message type: ".$self->{'type'});
 
   # Make the tree
-  Mail::SpamAssassin::MsgParser->parse_body( $self, $self, $boundary, $toparse, 1 );
+  $self->parse_body( $self, $self, $boundary, $toparse, 1 );
 
   dbg("---- MIME PARSER END ----");
 }
 
 =item find_parts()
 
+Used to search the tree for specific MIME parts.  See
+I<Mail::SpamAssassin::Message::Node> for more details.
+
 =cut
 
 # Used to find any MIME parts whose simple content-type matches a given regexp
@@ -125,7 +221,7 @@
   # ok, we need to do the parsing now...
   $self->_do_parse() if (exists $self->{'toparse'});
 
-  # and pass through to the MsgNode version of the method
+  # and pass through to the Message::Node version of the method
   return $self->SUPER::find_parts($re, $onlyleaves, $recursive);
 }
 
@@ -133,6 +229,17 @@
 
 =item get_pristine_header()
 
+Returns pristine headers of the message.  If no specific header name
+is given as a parameter (case-insensitive), then all headers will
+be returned.  If called in an array context, an array will be returned
+with each header (specific or all) in a different element.  In a scalar
+context, either all of the headers are returned as a scalar, or the last
+specific header is returned.
+
+ie: If 'Subject' is specified as the header, and there are 2 Subject
+headers in a message, the last/bottom one in the message is returned in
+scalar context or both are returned in array context.
+
 =cut
 
 sub get_pristine_header {
@@ -148,12 +255,21 @@
   }
 }
 
+=item get_mbox_seperator()
+
+Returns the mbox seperator found in the message, or undef if there
+wasn't one.
+
+=cut
+
 sub get_mbox_seperator {
   return $_[0]->{mbox_sep};
 }
 
 =item get_body()
 
+Returns an array of the pristine message body, one line per array element.
+
 =cut
 
 sub get_body {
@@ -166,6 +282,8 @@
 
 =item get_pristine()
 
+Returns a scalar of the entire pristine message.
+
 =cut
 
 sub get_pristine {
@@ -175,6 +293,8 @@
 
 =item get_pristine_body()
 
+Returns a scalar of the pristine message body.
+
 =cut
 
 sub get_pristine_body {
@@ -183,6 +303,206 @@
 }
 
 # ---------------------------------------------------------------------------
+
+=head1 PARSING METHODS, NON-PUBLIC
+
+These methods take a RFC2822-esque formatted message and create a tree
+with all of the MIME body parts included.  Those parts will be decoded
+as necessary, and text/html parts will be rendered into a standard text
+format, suitable for use in SpamAssassin.
+
+=item parse_body()
+
+parse_body() passes the body part that was passed in onto the
+correct part parser, either _parse_multipart() for multipart/* parts,
+or _parse_normal() for everything else.  Multipart sections become the
+root of sub-trees, while everything else becomes a leaf in the tree.
+
+For multipart messages, the first call to parse_body() doesn't create a
+new sub-tree and just uses the parent node to contain children.  All other
+calls to parse_body() will cause a new sub-tree root to be created and
+children will exist underneath that root.  (this is just so the tree
+doesn't have a root node which points at the actual root node ...)
+
+=cut
+
+sub parse_body {
+  my($self, $msg, $_msg, $boundary, $body, $initial) = @_;
+
+  # Figure out the simple content-type, or set it to text/plain
+  my $type = $_msg->header('Content-Type') || 'text/plain; charset=us-ascii';
+
+  # multipart sections are required to have a boundary set ...  If this
+  # one doesn't, assume it's malformed and send it to be parsed as a
+  # non-multipart section
+  #
+  if ( $type =~ /^multipart\//i && defined $boundary ) {
+    # Treat an initial multipart parse differently.  This will keep the tree:
+    # obj(multipart->[ part1, part2 ]) instead of
+    # obj(obj(multipart ...))
+    #
+    if ( $initial ) {
+      $self->_parse_multipart( $msg, $_msg, $boundary, $body );
+    }
+    else {
+      $self->_parse_multipart( $_msg, $_msg, $boundary, $body );
+      $msg->add_body_part( $_msg );
+    }
+  }
+  else {
+    # If it's not multipart, go ahead and just deal with it.
+    $self->_parse_normal( $msg, $_msg, $boundary, $body );
+  }
+}
+
+=item _parse_multipart()
+
+Generate a root node, and for each child part call parse_body()
+to generate the tree.
+
+=cut
+
+sub _parse_multipart {
+  my($self, $msg, $_msg, $boundary, $body) = @_;
+
+  dbg("parsing multipart, got boundary: ".(defined $boundary ? $boundary : ''));
+
+  # ignore preamble per RFC 1521, unless there's no boundary ...
+  if ( defined $boundary ) {
+    my $line;
+    my $tmp_line = @{$body};
+    for ($line=0; $line < $tmp_line; $line++) {
+      last if $body->[$line] =~ /^\-\-\Q$boundary\E$/;
+    }
+
+    # Found a boundary, ignore the preamble
+    if ( $line < $tmp_line ) {
+      splice @{$body}, 0, $line+1;
+    }
+
+    # Else, there's no boundary, so leave the whole part...
+  }
+
+  my $part_msg = Mail::SpamAssassin::Message::Node->new();    # prepare a new tree node
+  my $in_body = 0;
+  my $header;
+  my $part_array;
+
+  my $line_count = @{$body};
+  foreach ( @{$body} ) {
+    # if we're on the last body line, or we find a boundary marker, deal with the mime part
+    if ( --$line_count == 0 || (defined $boundary && /^\-\-\Q$boundary\E/) ) {
+      my $line = $_; # remember the last line
+
+      # per rfc 1521, the CRLF before the boundary is part of the boundary:
+      # NOTE: The CRLF preceding the encapsulation line is conceptually
+      # attached to the boundary so that it is possible to have a part
+      # that does not end with a CRLF (line break). Body parts that must
+      # be considered to end with line breaks, therefore, must have two
+      # CRLFs preceding the encapsulation line, the first of which is part
+      # of the preceding body part, and the second of which is part of the
+      # encapsulation boundary.
+      if ($part_array) {
+        chomp( $part_array->[-1] );  # trim the CRLF that's part of the boundary
+        splice @{$part_array}, -1 if ( $part_array->[-1] eq '' ); # blank line for the boundary only ...
+
+        my($p_boundary);
+	($part_msg->{'type'}, $p_boundary) = Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
+        $p_boundary ||= $boundary;
+	dbg("found part of type ".$part_msg->{'type'}.", boundary: ".(defined $p_boundary ? $p_boundary : ''));
+        $self->parse_body( $msg, $part_msg, $p_boundary, $part_array, 0 );
+      }
+
+      last if (defined $boundary && $line =~ /^\-\-\Q${boundary}\E\-\-$/);
+
+      # make sure we start with a new clean node
+      $in_body  = 0;
+      $part_msg = Mail::SpamAssassin::Message::Node->new();
+      undef $part_array;
+      undef $header;
+
+      next;
+    }
+
+    if ($in_body) {
+      # we run into a perl bug if the lines are astronomically long (probably due
+      # to lots of regexp backtracking); so cut short any individual line over
+      # MAX_BODY_LINE_LENGTH bytes in length.  This can wreck HTML totally -- but
+      # IMHO the only reason a luser would use MAX_BODY_LINE_LENGTH-byte lines is
+      # to crash filters, anyway.
+      while (length ($_) > MAX_BODY_LINE_LENGTH) {
+        push (@{$part_array}, substr($_, 0, MAX_BODY_LINE_LENGTH)."\n");
+        substr($_, 0, MAX_BODY_LINE_LENGTH) = '';
+      }
+      push ( @{$part_array}, $_ );
+    }
+    else {
+      s/\s+$//;
+      if (m/^\S/) {
+        if ($header) {
+          my ( $key, $value ) = split ( /:\s*/, $header, 2 );
+          $part_msg->header( $key, $value );
+        }
+        $header = $_;
+      }
+      elsif (/^$/) {
+        if ($header) {
+          my ( $key, $value ) = split ( /:\s*/, $header, 2 );
+          $part_msg->header( $key, $value );
+        }
+        $in_body = 1;
+      }
+      else {
+        $_ =~ s/^\s*//;
+        $header .= $_;
+      }
+    }
+  }
+
+}
+
+=item _parse_normal()
+
+Generate a leaf node and add it to the parent.
+
+=cut
+
+sub _parse_normal {
+  my ($self, $msg, $part_msg, $boundary, $body) = @_;
+
+  dbg("parsing normal part");
+
+  $part_msg->{'type'} =
+    Mail::SpamAssassin::Util::parse_content_type($part_msg->header('content-type'));
+
+  # multipart sections are required to have a boundary set ...  If this
+  # one doesn't, assume it's malformed and revert to text/plain
+  $part_msg->{'type'} = 'text/plain' if ( $part_msg->{'type'} =~ /^multipart\//i && !defined $boundary );
+
+  # attempt to figure out a name for this attachment if there is one ...
+  my $disp = $part_msg->header('content-disposition') || '';
+  my($filename) = $disp =~ /name="?([^\";]+)"?/i || $part_msg->{'type'} =~ /name="?([^\";]+)"?/i;
+
+  $part_msg->{'raw'} = $body;
+  $part_msg->{'boundary'} = $boundary;
+  $part_msg->{'name'} = $filename if $filename;
+
+  $msg->add_body_part($part_msg);
+
+  # now that we've added the leaf node, let's go ahead and kill
+  # body_parts (used for sub-trees).  it could end up being recursive,
+  # and well, let's avoid that. ;)
+  #
+  # BTW: please leave this after add_body_parts() since it'll add it back.
+  #
+  delete $part_msg->{body_parts};
+}
+
+# ---------------------------------------------------------------------------
+
+=item $str = get_metadata($hdr)
+
+=cut
 
 sub extract_message_metadata {
   my ($self, $main) = @_;

Copied: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata.pm (from rev 6935, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgMetadata.pm)
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgMetadata.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Metadata.pm	Sun Feb 29 14:12:03 2004
@@ -18,7 +18,7 @@
 
 =head1 NAME
 
-Mail::SpamAssassin::MsgMetadata - extract metadata from a message
+Mail::SpamAssassin::Message::Metadata - extract metadata from a message
 
 =head1 SYNOPSIS
 
@@ -47,7 +47,7 @@
 
 =cut
 
-package Mail::SpamAssassin::MsgMetadata;
+package Mail::SpamAssassin::Message::Metadata;
 use strict;
 use bytes;
 

Copied: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (from rev 6935, incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgNode.pm)
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgNode.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm	Sun Feb 29 14:12:03 2004
@@ -18,7 +18,7 @@
 
 =head1 NAME
 
-Mail::SpamAssassin::MsgNode - decode, render, and make available MIME message parts
+Mail::SpamAssassin::Message::Node - decode, render, and make available MIME message parts
 
 =head1 SYNOPSIS
 
@@ -33,7 +33,7 @@
 
 =cut
 
-package Mail::SpamAssassin::MsgNode;
+package Mail::SpamAssassin::Message::Node;
 use strict;
 use Mail::SpamAssassin;
 use Mail::SpamAssassin::HTML;
@@ -47,7 +47,6 @@
 sub new {
   my $class = shift;
   $class = ref($class) || $class;
-  my %opts = @_;	# unused currently in MsgNode, used in Message
 
   my $self = {
     headers		=> {},

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Received.pm	Sun Feb 29 14:12:03 2004
@@ -41,7 +41,7 @@
 package Mail::SpamAssassin::Received;
 1;
 
-package Mail::SpamAssassin::MsgMetadata;
+package Mail::SpamAssassin::Message::Metadata;
 use strict;
 use bytes;
 

Modified: incubator/spamassassin/trunk/masses/mass-check
==============================================================================
--- incubator/spamassassin/trunk/masses/mass-check	(original)
+++ incubator/spamassassin/trunk/masses/mass-check	Sun Feb 29 14:12:03 2004
@@ -247,7 +247,7 @@
   # remove SpamAssassin markup, if present and the mail was spam
   $_ = $ma->get_header ("X-Spam-Status");
   if (defined($_) && /^Yes,/) {
-    my $new_ma = $spamtest->parse ($spamtest->remove_spamassassin_markup($ma));
+    my $new_ma = $spamtest->parse ($spamtest->remove_spamassassin_markup($ma), 1);
     $ma->finish();
     $ma = $new_ma;
   }

Modified: incubator/spamassassin/trunk/spamassassin.raw
==============================================================================
--- incubator/spamassassin/trunk/spamassassin.raw	(original)
+++ incubator/spamassassin/trunk/spamassassin.raw	Sun Feb 29 14:12:03 2004
@@ -193,9 +193,8 @@
     # Make sure the message is clean first ...
     my $new_mail = $spamtest->parse ($spamtest->remove_spamassassin_markup ($mail));
     $mail->finish();
-    $mail = $new_mail;
 
-    if ( $spamtest->report_as_spam ($mail) ) {
+    if ( $spamtest->report_as_spam ($new_mail) ) {
       warn "Warning, unable to report spam\nFor more information, re-run with -D option to see debug output.\n";
     }
     exit(0);
@@ -206,9 +205,8 @@
     # Make sure the message is clean first ...
     my $new_mail = $spamtest->parse ($spamtest->remove_spamassassin_markup ($mail));
     $mail->finish();
-    $mail = $new_mail;
 
-    if ( $spamtest->revoke_as_spam ($mail) ) {
+    if ( $spamtest->revoke_as_spam ($new_mail) ) {
       warn "Warning, unable to revoke spam\nFor more information, re-run with -D option to see debug output.\n";
     }
     exit(0);

Modified: incubator/spamassassin/trunk/spamd/spamd.raw
==============================================================================
--- incubator/spamassassin/trunk/spamd/spamd.raw	(original)
+++ incubator/spamassassin/trunk/spamd/spamd.raw	Sun Feb 29 14:12:03 2004
@@ -826,15 +826,16 @@
       "."
     );
 
-    my $mail = $spamtest->parse (\@msglines);
-
     # Check length if we're supposed to
     if($expected_length && ($actual_length != $expected_length)) {
         protocol_error ("(Content-Length mismatch: Expected $expected_length bytes, got $actual_length bytes)");
         return 1;
     }
 
-    # Now use copy-on-writed (hopefully) SA object
+    # Parse the input message
+    my $mail = $spamtest->parse (\@msglines, 1);
+
+    # Go ahead and check the message
     my $status = $spamtest->check($mail);
 
     my $msg_score = sprintf("%.1f",$status->get_score);

Modified: incubator/spamassassin/trunk/t/mimeparse.t
==============================================================================
--- incubator/spamassassin/trunk/t/mimeparse.t	(original)
+++ incubator/spamassassin/trunk/t/mimeparse.t	Sun Feb 29 14:12:03 2004
@@ -88,11 +88,8 @@
 
 foreach my $k ( sort keys %files ) {
   open(INP, $k) || die "Can't find $k:$!";
-  my $mail = Mail::SpamAssassin->parse(\*INP);
+  my $mail = Mail::SpamAssassin->parse(\*INP, 1);
   close(INP);
-
-  # We know it's not parsed, so deal with it!
-  $mail->_do_parse();
 
   my $res = join("\n",$mail->content_summary());
   #print "---\n$res\n---\n";