You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/10/12 04:30:38 UTC

svn commit: rev 54644 - spamassassin/trunk/lib/Mail/SpamAssassin

Author: felicity
Date: Mon Oct 11 19:30:37 2004
New Revision: 54644

Modified:
   spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
Log:
bug 3893: ArchiveIterator, when run with opt_n, would report the message number as the receive_date.  to fix this, we can allow receive_date to be determined at run time, then propagate the value back up the chain.

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm	(original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm	Mon Oct 11 19:30:37 2004
@@ -246,17 +246,14 @@
 
   # non-forking model (generally sa-learn), everything in a single process
   if ($self->{opt_j} == 0) {
-    my $message;
-    my $class;
-    my $result;
     my $messages;
 
     # message-array
     ($MESSAGES, $messages) = $self->message_array(\@targets);
 
-    while ($message = shift @{$messages}) {
-      my ($class, undef, $date) = index_unpack($message);
-      $result = $self->run_message($message);
+    while (my $message = shift @{$messages}) {
+#      my ($class, undef, $date) = index_unpack($message);
+      my($class, undef, $date, undef, $result) = $self->run_message($message);
       &{$self->{result_sub}}($class, $result, $date) if $result;
     }
   }
@@ -288,15 +285,11 @@
     # only do 1 process, message list in a temp file, no restarting
     if ($self->{opt_j} == 1 && !defined $self->{opt_restart}) {
       my $message;
-      my $class;
-      my $result;
       my $messages;
       my $total_count = 0;
 
-      while (($MESSAGES > $total_count) && ($message = $self->next_message()))
-      {
-        my ($class, undef, $date) = index_unpack($message);
-        $result = $self->run_message($message);
+      while (($MESSAGES > $total_count) && ($message = $self->next_message())) {
+        my($class, undef, $date, undef, $result) = $self->run_message($message);
         &{$self->{result_sub}}($class, $result, $date) if $result;
 	$total_count++;
       }
@@ -534,8 +527,15 @@
 	  close $parent;
 	  exit;
 	}
-	$result = $self->run_message($line);
+
+	my($class, $format, $date, $where, $result) = $self->run_message($line);
 	$result ||= '';
+
+	# If opt_n is set, the input date was -1, so reset it if possible
+        if ($self->{opt_n} && $class && $format && defined $date && $where) {
+	  $line = index_pack($class, $format, $date, $where);
+        }
+
 	print "$result\nRESULT $line\n";
       }
       exit;
@@ -635,8 +635,7 @@
 
   foreach my $mail (@files) {
     if ($self->{opt_n}) {
-      $self->{$class}->{index_pack($class, "f", $no, $mail)} = $no;
-      $no++;
+      $self->{$class}->{index_pack($class, "f", -1, $mail)} = $no++;
       next;
     }
     my $header;
@@ -656,8 +655,7 @@
   my ($self, $class, $mail) = @_;
 
   if ($self->{opt_n}) {
-    $self->{$class}->{index_pack($class, "f", $no, $mail)} = $no;
-    $no++;
+    $self->{$class}->{index_pack($class, "f", -1, $mail)} = $no++;
     return;
   }
   my $header;
@@ -727,13 +725,13 @@
       if ($header) {
 	my $t;
 	if ($self->{opt_n}) {
-	  $t = $no++;
+	  $self->{$class}->{index_pack($class, "m", -1, "$file.$offset")} = $no++;
 	}
 	else {
 	  $t = Mail::SpamAssassin::Util::receive_date($header);
 	  next if !$self->message_is_useful_by_date($t);
+	  $self->{$class}->{index_pack($class, "m", $t, "$file.$offset")} = $t;
 	}
-	$self->{$class}->{index_pack($class, "m", $t, "$file.$offset")} = $t;
       }
     }
     close INPUT;
@@ -789,12 +787,12 @@
 
 		my $t;
 		if ($self->{opt_n}) {
-		    $t = $no++;
+		  $self->{$class}->{index_pack($class, "b", -1, "$file.$offset")} = $no++;
 		} else {
-		    $t = Mail::SpamAssassin::Util::receive_date($header);
-		    next if !$self->message_is_useful_by_date($t);
+		  $t = Mail::SpamAssassin::Util::receive_date($header);
+		  next if !$self->message_is_useful_by_date($t);
+		  $self->{$class}->{index_pack($class, "b", $t, "$file.$offset")} = $t;
 		}
-		$self->{$class}->{index_pack($class, "b", $t, "$file.$offset")} = $t;
 		seek(INPUT, $offset + $size, 0);
 	    } else {
 		die "archive-iterator: error: failure to read message body!\n";
@@ -812,18 +810,18 @@
   my ($class, $format, $date, $mail) = index_unpack($msg);
 
   if ($format eq "f") {
-    return $self->run_file($class, $mail, $date);
+    return $self->run_file($class, $format, $mail, $date);
   }
   elsif ($format eq "m") {
-    return $self->run_mailbox($class, $mail, $date);
+    return $self->run_mailbox($class, $format, $mail, $date);
   }
   elsif ($format eq "b") {
-    return $self->run_mbx($class, $mail, $date);
+    return $self->run_mbx($class, $format, $mail, $date);
   }
 }
 
 sub run_file {
-  my ($self, $class, $where, $date) = @_;
+  my ($self, $class, $format, $where, $date) = @_;
 
   mail_open($where) or return;
   # skip too-big mails
@@ -831,17 +829,30 @@
     close INPUT;
     return;
   }
-  my @msg = (<INPUT>);
+  my @msg;
+  my $header = '';
+  while(<INPUT>) {
+    if (!$header && /^$/) {
+      $header = join('', @msg);
+    }
+
+    push(@msg, $_);
+  }
   close INPUT;
 
-  &{$self->{wanted_sub}}($class, $where, $date, \@msg);
+  if ($date == -1) {
+    $date = Mail::SpamAssassin::Util::receive_date($header);
+  }
+
+  return($class, $format, $date, $where, &{$self->{wanted_sub}}($class, $where, $date, \@msg));
 }
 
 sub run_mailbox {
-  my ($self, $class, $where, $date) = @_;
+  my ($self, $class, $format, $where, $date) = @_;
 
   my ($file, $offset) = ($where =~ m/(.*)\.(\d+)$/);
   my @msg;
+  my $header = '';
   mail_open($file) or return;
   seek(INPUT,$offset,0);
   my $past = 0;
@@ -852,38 +863,60 @@
     else {
       $past = 1;
     }
+
     # skip too-big mails
     if (! $self->{opt_all} && @msg > BIG_LINES) {
       close INPUT;
       return;
     }
+
+    if (!$header && /^$/) {
+      $header = join('', @msg);
+    }
+
     push (@msg, $_);
   }
   close INPUT;
-  &{$self->{wanted_sub}}($class, $where, $date, \@msg);
+
+  if ($date == -1) {
+    $date = Mail::SpamAssassin::Util::receive_date($header);
+  }
+
+  return($class, $format, $date, $where, &{$self->{wanted_sub}}($class, $where, $date, \@msg));
 }
 
 sub run_mbx {
-    my ($self, $class, $where, $date) = @_;
+  my ($self, $class, $format, $where, $date) = @_;
 
-    my ($file, $offset) = ($where =~ m/(.*)\.(\d+)$/);
-    my @msg;
+  my ($file, $offset) = ($where =~ m/(.*)\.(\d+)$/);
+  my @msg;
+  my $header = '';
 
-    mail_open($file) or return;
-    seek(INPUT, $offset, 0);
+  mail_open($file) or return;
+  seek(INPUT, $offset, 0);
     
-    while (<INPUT>) {
-	last if ($_ =~ MBX_SEPARATOR);
+  while (<INPUT>) {
+    last if ($_ =~ MBX_SEPARATOR);
 	
-	# skip mails that are too big
-	if (! $self->{opt_all} && @msg > BIG_LINES) {
-	    close INPUT;
-	    return;
-	}
-	push (@msg, $_);
+    # skip mails that are too big
+    if (! $self->{opt_all} && @msg > BIG_LINES) {
+      close INPUT;
+      return;
     }
-    close INPUT;
-    &{$self->{wanted_sub}}($class, $where, $date, \@msg);
+
+    if (!$header && /^$/) {
+      $header = join('', @msg);
+    }
+
+    push (@msg, $_);
+  }
+  close INPUT;
+
+  if ($date == -1) {
+    $date = Mail::SpamAssassin::Util::receive_date($header);
+  }
+
+  return($class, $format, $date, $where, &{$self->{wanted_sub}}($class, $where, $date, \@msg));
 }
 
 ############################################################################

Re: svn commit: rev 54644 - spamassassin/trunk/lib/Mail/SpamAssassin

Posted by Theo Van Dinter <fe...@kluge.net>.
On Wed, Oct 13, 2004 at 12:58:52PM -0700, Justin Mason wrote:
> I agree with Daniel that "magic numbers" make for hard-to-read code.
> How's about defining a constant TIME_UNKNOWN_YET or similar, which
> maps to -1, and using that instead?

Ok, I created AI_TIME_UNKNOWN and made it a generic constant.  Committed
r55898. :)

-- 
Randomly Generated Tagline:
"But you have to allow a little for the desire to evangelize when you
 think you have good news."         - Larry Wall

Re: svn commit: rev 54644 - spamassassin/trunk/lib/Mail/SpamAssassin

Posted by Theo Van Dinter <fe...@kluge.net>.
On Wed, Oct 13, 2004 at 11:53:52AM -0700, Dan Quinlan wrote:
> This code is really getting unreadable and is hard to follow and this
> patch makes it worse, the use of magic numbers like -1 is also bad.

I think it's like the rest of our code, if you're used to it, it makes sense.
If it doesn't, it takes a little bit.

I could have used undef instead of -1, but -1 is an invalid received date
which if used before the time is actually determined will at least make it
known what the state is -- undef would throw a warning/etc.

-- 
Randomly Generated Tagline:
"Fractions are like popcorn ... white and fluffy ..."   - Prof. Branche

Re: svn commit: rev 54644 - spamassassin/trunk/lib/Mail/SpamAssassin

Posted by Daniel Quinlan <qu...@pathname.com>.
felicity@apache.org writes:

> bug 3893: ArchiveIterator, when run with opt_n, would report the
> message number as the receive_date.  to fix this, we can allow
> receive_date to be determined at run time, then propagate the value
> back up the chain.

This code is really getting unreadable and is hard to follow and this
patch makes it worse, the use of magic numbers like -1 is also bad.

Daniel

-- 
Daniel Quinlan                     ApacheCon! 13-17 November (3 SpamAssassin
http://www.pathname.com/~quinlan/  http://www.apachecon.com/  sessions & more)