You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2004/01/31 04:28:19 UTC

svn commit: rev 6371 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin

Author: quinlan
Date: Fri Jan 30 19:28:18 2004
New Revision: 6371

Modified:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
Log:
store list of messages in a temporary file
restore offset as byte offset of message starting at "From "
move secure_tmpfile to Util.pm


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm	Fri Jan 30 19:28:18 2004
@@ -84,9 +84,9 @@
     my $messages;
 
     # message-array
-    ($MESSAGES,$messages) = $self->message_array(\@targets);
+    $MESSAGES = $self->message_array(\@targets);
 
-    while ($message = (shift @{$messages})) {
+    while ($message = $self->next_message()) {
       my ($class, undef, $date) = index_unpack($message);
       $result = $self->run_message($message);
       &{$self->{result_sub}}($class, $result, $date) if $result;
@@ -122,8 +122,8 @@
 	    }
 
 	    # if messages remain, and we don't need to restart, send a message
-	    if (($MESSAGES>$total_count) && !$needs_restart) {
-	      print { $socket } (shift @{$messages}) . "\n";
+	    if (($MESSAGES > $total_count) && !$needs_restart) {
+	      print { $socket } $self->next_message() . "\n";
 	      $total_count++;
 	      #warn ">> recv: $MESSAGES $total_count\n";
 	    }
@@ -142,9 +142,9 @@
 	    last; # this will get out of the read for this client
 	  }
 	  elsif ($line eq "START\n") {
-	    if ($MESSAGES>$total_count) {
+	    if ($MESSAGES > $total_count) {
 	      # we still have messages, send one to child
-	      print { $socket } (shift @{$messages}) . "\n";
+	      print { $socket } $self->next_message() . "\n";
 	      $total_count++;
 	      #warn ">> new: $MESSAGES $total_count\n";
 	    }
@@ -163,7 +163,7 @@
 	}
 
         # some error happened during the read!
-        if ( !defined $line || !$line ) {
+        if (!defined $line || !$line) {
           $needs_restart = 1;
           warn "Got an undef from readline?!?  Restarting all children, probably lost some results. :(\n";
           $select->remove($socket);
@@ -174,7 +174,7 @@
 
       # If there are still messages to process, and we need to restart
       # the children, and all of the children are idle, let's go ahead.
-      if ($needs_restart && $select->count() == 0 && ($MESSAGES>$total_count)) {
+      if ($needs_restart && $select->count() == 0 && ($MESSAGES > $total_count)) {
 	$needs_restart = 0;
 
 	#warn "debug: Needs restart, $MESSAGES total, $total_count done.\n";
@@ -262,7 +262,23 @@
     }
     push @messages, (splice @s), (splice @h);
   }
-  return (scalar(@messages),\@messages);
+  my $tmpf;
+  ($tmpf, $self->{messageh}) = Mail::SpamAssassin::Util::secure_tmpfile();
+  unlink $tmpf;
+  my $count = scalar @messages;
+  my $message;
+  while ($message = shift @messages) {
+    print { $self->{messageh} } "$message\n";
+  }
+  seek ($self->{messageh}, 0, 0);
+  return $count;
+}
+
+sub next_message {
+  my ($self) = @_;
+  my $line = readline $self->{messageh};
+  chomp $line if defined $line;
+  return $line;
 }
 
 sub start_children {
@@ -523,33 +539,41 @@
     }
     mail_open($file) or return;
     
+    my $start = 0;		# start of a message
+    my $where = 0;		# current byte offset
+    my $first = '';		# first line of message
     my $header = '';		# header text
-    my $offset = undef;	# byte offset of this message
-    while (defined($_=<INPUT>)) {
-      # Note: This will give the start of the message as the start of
-      # the line _following_ the mbox seperator.
-      #
-      if ( /^From / .. /^\r?$/ ) {
-        if ( $_ eq "\n" || $_ eq "\r\n" ) {
-	  my $t;
-	  if ($self->{opt_n}) {
-	    $t = $no++;
-	  } else {
-	    $t = $self->receive_date($header);
-	    $header = '';
-	    if ( !$self->message_is_useful_by_date($t)) {
-	      undef $offset;
-	      next;
-	    }
+    my $in_header = 0;		# are in we a header?
+    while (!eof INPUT) {
+      my $offset = $start;	# byte offset of this message
+      my $header = $first;	# remember first line
+      while (<INPUT>) {
+	if ($in_header) {
+	  if (/^$/) {
+	    $in_header = 0;
+	  }
+	  else {
+	    $header .= $_;
 	  }
-	  $self->{$class}->{index_pack($class, "m", $t, "$file.$offset")} = $t;
-	  undef $offset;
-        }
-	elsif ( !defined $offset ) {
-	  $offset = tell INPUT;
 	}
-
-        $header .= $_;
+	if (substr($_,0,5) eq "From ") {
+	  $in_header = 1;
+	  $first = $_;
+	  $start = $where;
+	  $where = tell INPUT;
+	  last;
+	}
+	$where = tell INPUT;
+      }
+      if ($header) {
+	my $t;
+	if ($self->{opt_n}) {
+	  $t = $no++;
+	} else {
+	  $t = $self->receive_date($header);
+	  next if !$self->message_is_useful_by_date($t);
+	}
+	$self->{$class}->{index_pack($class, "m", $t, "$file.$offset")} = $t;
       }
     }
     close INPUT;

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm	Fri Jan 30 19:28:18 2004
@@ -2167,7 +2167,7 @@
     return $self->{fulltext_tmpfile};
   }
 
-  my ($tmpf, $tmpfh) = secure_tmpfile();
+  my ($tmpf, $tmpfh) = Mail::SpamAssassin::Util::secure_tmpfile();
   print $tmpfh $$fulltext;
   close $tmpfh;
 
@@ -2182,41 +2182,6 @@
     unlink $self->{fulltext_tmpfile};
     $self->{fulltext_tmpfile} = undef;
   }
-}
-
-use Fcntl;
-
-# thanks to http://www2.picante.com:81/~gtaylor/autobuse/ for this
-# code.
-sub secure_tmpfile {
-  my $tmpdir = File::Spec->tmpdir();
-  if (!$tmpdir) {
-    die "cannot write to a temporary directory! set TMP or TMPDIR in env";
-  }
-
-  $tmpdir = Mail::SpamAssassin::Util::untaint_file_path ($tmpdir);
-  my $template = $tmpdir."/sa.$$.";
-
-  my $reportfile;
-  my $umask = 0;
-  do {
-      # we do not rely on the obscurity of this name for security...
-      # we use a average-quality PRG since this is all we need
-      my $suffix = join ('',
-                         (0..9, 'A'..'Z','a'..'z')[rand 62,
-                                                   rand 62,
-                                                   rand 62,
-                                                   rand 62,
-                                                   rand 62,
-                                                   rand 62]);
-      $reportfile = $template . $suffix;
-
-      # ...rather, we require O_EXCL|O_CREAT to guarantee us proper
-      # ownership of our file; read the open(2) man page.
-  } while (! sysopen (TMPFILE, $reportfile, O_WRONLY|O_CREAT|O_EXCL, 0600));
-  umask $umask;
-
-  return ($reportfile, \*TMPFILE);
 }
 
 ###########################################################################

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm	Fri Jan 30 19:28:18 2004
@@ -42,6 +42,7 @@
 use File::Spec;
 use Time::Local;
 use Sys::Hostname (); # don't import hostname() into this namespace!
+use Fcntl;
 
 use constant HAS_MIME_BASE64 => eval { require MIME::Base64; };
 use constant RUNNING_ON_WINDOWS => ($^O =~ /^(?:mswin|dos|os2)/oi);
@@ -623,6 +624,41 @@
     else {
       return join("",@characters);
     }
+}
+
+###########################################################################
+
+# thanks to http://www2.picante.com:81/~gtaylor/autobuse/ for this
+# code.
+sub secure_tmpfile {
+  my $tmpdir = File::Spec->tmpdir();
+  if (!$tmpdir) {
+    die "cannot write to a temporary directory! set TMP or TMPDIR in env";
+  }
+
+  $tmpdir = Mail::SpamAssassin::Util::untaint_file_path ($tmpdir);
+  my $template = $tmpdir."/sa.$$.";
+
+  my $reportfile;
+  my $umask = 0;
+  do {
+    # we do not rely on the obscurity of this name for security...
+    # we use a average-quality PRG since this is all we need
+    my $suffix = join ('',
+		       (0..9, 'A'..'Z','a'..'z')[rand 62,
+						 rand 62,
+						 rand 62,
+						 rand 62,
+						 rand 62,
+						 rand 62]);
+    $reportfile = $template . $suffix;
+
+    # ...rather, we require O_EXCL|O_CREAT to guarantee us proper
+    # ownership of our file; read the open(2) man page.
+  } while (! sysopen (TMPFILE, $reportfile, O_RDWR|O_CREAT|O_EXCL, 0600));
+  umask $umask;
+
+  return ($reportfile, \*TMPFILE);
 }
 
 ###########################################################################