You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/01/30 05:15:40 UTC

svn commit: rev 6359 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin

Author: felicity
Date: Thu Jan 29 20:15:38 2004
New Revision: 6359

Modified:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
Log:
faster version of scan_mailbox.  brings the time for me from 55s to 44s.


Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm	Thu Jan 29 20:15:38 2004
@@ -523,41 +523,33 @@
     }
     mail_open($file) or return;
     
-    my $start = 0;		# start of a message
-    my $where = 0;		# current byte offset
-    my $first = '';		# first line of message
     my $header = '';		# header text
-    my $in_header = 0;		# are in we a header?
-    while (!eof INPUT) {
-      my $offset = $start;	# byte offset of this message
-      my $header = $first;	# remember first line
-      while (<INPUT>) {
-	if ($in_header) {
-	  if (/^$/) {
-	    $in_header = 0;
+    my $offset = undef;	# byte offset of this message
+    while (defined($_=<INPUT>)) {
+      # Note: This will give the start of the message as the start of
+      # the line _following_ the mbox seperator.
+      #
+      if ( /^From / .. /^\r?$/ ) {
+        if ( $_ eq "\n" || $_ eq "\r\n" ) {
+	  my $t;
+	  if ($self->{opt_n}) {
+	    $t = $no++;
+	  } else {
+	    $t = $self->receive_date($header);
+	    $header = '';
+	    if ( !$self->message_is_useful_by_date($t)) {
+	      undef $offset;
+	      next;
+	    }
 	  }
-	  else {
-	    $header .= $_;
-	  }
-	}
-	if (substr($_,0,5) eq "From ") {
-	  $in_header = 1;
-	  $first = $_;
-	  $start = $where;
-	  $where = tell INPUT;
-	  last;
-	}
-	$where = tell INPUT;
-      }
-      if ($header) {
-	my $t;
-	if ($self->{opt_n}) {
-	  $t = $no++;
-	} else {
-	  $t = $self->receive_date($header);
-	  next if !$self->message_is_useful_by_date($t);
+	  $self->{$class}->{index_pack($class, "m", $t, "$file.$offset")} = $t;
+	  undef $offset;
+        }
+	elsif ( !defined $offset ) {
+	  $offset = tell INPUT;
 	}
-	$self->{$class}->{index_pack($class, "m", $t, "$file.$offset")} = $t;
+
+        $header .= $_;
       }
     }
     close INPUT;