You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/05/02 19:30:38 UTC

svn commit: rev 10488 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin masses

Author: felicity
Date: Sun May  2 10:30:37 2004
New Revision: 10488

Modified:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
   incubator/spamassassin/trunk/masses/mass-check
Log:
two things: move receive_date from Bayes and ArchiveIterator and put it in Util instead -- two copies of the same logic code makes no sense.  add a --before option to mass-check, works in the same was as --after.

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm	Sun May  2 10:30:37 2004
@@ -28,13 +28,7 @@
 use constant BIG_BYTES => 256*1024;	# 256k is a big email
 use constant BIG_LINES => BIG_BYTES/65;	# 65 bytes/line is a good approximation
 
-my $no;
-my $tz;
-
-BEGIN {
-  $no = 1;
-  $tz = local_tz();
-}
+my $no = 1;
 
 use vars qw {
   $MESSAGES
@@ -55,8 +49,6 @@
   $self->{s} = { };		# spam, of course
   $self->{h} = { };		# ham, as if you couldn't guess
 
-  $self->{opt_after} ||= 0;	# default to 0
-
   $self;
 }
 
@@ -414,79 +406,24 @@
   return 1;
 }
 
-sub first_date {
-  my (@strings) = @_;
-
-  foreach my $string (@strings) {
-    my $time = Mail::SpamAssassin::Util::parse_rfc822_date($string);
-    return $time if defined($time) && $time;
-  }
-  return undef;
-}
-
-sub receive_date {
-  my ($self, $header) = @_;
-
-  $header ||= '';
-  $header =~ s/\n[ \t]+/ /gs;	# fix continuation lines
-
-  my @rcvd = ($header =~ /^Received:(.*)/img);
-  my @local;
-  my $time;
-
-  if (@rcvd) {
-    if ($rcvd[0] =~ /qmail \d+ invoked by uid \d+/ ||
-	$rcvd[0] =~ /\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/)
-    {
-      push @local, (shift @rcvd);
-    }
-    if (@rcvd && ($rcvd[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) {
-      push @local, (shift @rcvd);
-    }
-    elsif (@local) {
-      unshift @rcvd, (shift @local);
-    }
-  }
-
-  if (@rcvd) {
-    $time = first_date(shift @rcvd);
-    return $time if defined($time);
-  }
-  if (@local) {
-    $time = first_date(@local);
-    return $time if defined($time);
-  }
-  if ($header =~ /^(?:From|X-From-Line:)\s+(.+)$/im) {
-    my $string = $1;
-    $string .= " $tz" unless $string =~ /(?:[-+]\d{4}|\b[A-Z]{2,4}\b)/;
-    $time = first_date($string);
-    return $time if defined($time);
-  }
-  if (@rcvd) {
-    $time = first_date(@rcvd);
-    return $time if defined($time);
-  }
-  if ($header =~ /^Resent-Date:\s*(.+)$/im) {
-    $time = first_date($1);
-    return $time if defined($time);
-  }
-  if ($header =~ /^Date:\s*(.+)$/im) {
-    $time = first_date($1);
-    return $time if defined($time);
-  }
-
-  return time;
-}
-
 ############################################################################
 
 sub message_is_useful_by_date  {
   my ($self, $date) = @_;
 
-  return 1 unless $self->{opt_after};	# not using that feature
-  return 0 unless $date;		# undef or 0 date = unusable
+  return 0 unless $date;	# undef or 0 date = unusable
 
-  return $date > $self->{opt_after};
+  if (!$self->{opt_after} && !$self->{opt_before}) {
+    # Not using the feature
+    return 1;
+  }
+  elsif (!$self->{opt_before}) {
+    # Just case about after
+    return $date > $self->{opt_after};
+  }
+  else {
+    return (($date < $self->{opt_before}) && ($date > $self->{opt_after}));
+  }
 }
 
 ############################################################################
@@ -531,7 +468,7 @@
       $header .= $_;
     }
     close(INPUT);
-    my $date = $self->receive_date($header);
+    my $date = Mail::SpamAssassin::Util::receive_date($header);
     next if !$self->message_is_useful_by_date($date);
     $self->{$class}->{index_pack($class, "f", $date, $mail)} = $date;
   }
@@ -552,7 +489,7 @@
     $header .= $_;
   }
   close(INPUT);
-  my $date = $self->receive_date($header);
+  my $date = Mail::SpamAssassin::Util::receive_date($header);
   return if !$self->message_is_useful_by_date($date);
   $self->{$class}->{index_pack($class, "f", $date, $mail)} = $date;
 }
@@ -615,7 +552,7 @@
 	  $t = $no++;
 	}
 	else {
-	  $t = $self->receive_date($header);
+	  $t = Mail::SpamAssassin::Util::receive_date($header);
 	  next if !$self->message_is_useful_by_date($t);
 	}
 	$self->{$class}->{index_pack($class, "m", $t, "$file.$offset")} = $t;

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm	Sun May  2 10:30:37 2004
@@ -801,7 +801,7 @@
     $self->{store}->nspam_nham_change (0, 1);
   }
 
-  my $msgatime = $self->receive_date(scalar $msg->get_all_headers(0,1));
+  my $msgatime = $msg->receive_date();
 
   # If the message atime comes back as being more than 1 day in the
   # future, something's messed up and we should revert to current time as
@@ -1180,7 +1180,7 @@
   # If the message atime comes back as being in the future, something's
   # messed up and we should revert to current time as a safety measure.
   #
-  my $msgatime = $self->receive_date(scalar $msg->get_all_headers(0,1));
+  my $msgatime = $msg->receive_date();
   my $now = time;
   $msgatime = $now if ( $msgatime > $now );
 
@@ -1409,72 +1409,6 @@
     $self->{store}->untie_db();
   }
   return 1;
-}
-
-# Stolen from Archive Iteraator ...  Should probably end up in M::SA::Util
-# Modified to call first_date via $self->first_date()
-sub receive_date {
-  my ($self, $header) = @_;
-
-  $header ||= '';
-  $header =~ s/\n[ \t]+/ /gs;	# fix continuation lines
-
-  my @rcvd = ($header =~ /^Received:(.*)/img);
-  my @local;
-  my $time;
-
-  if (@rcvd) {
-    if ($rcvd[0] =~ /qmail \d+ invoked by uid \d+/ ||
-	$rcvd[0] =~ /\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/)
-    {
-      push @local, (shift @rcvd);
-    }
-    if (@rcvd && ($rcvd[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) {
-      push @local, (shift @rcvd);
-    }
-    elsif (@local) {
-      unshift @rcvd, (shift @local);
-    }
-  }
-
-  if (@rcvd) {
-    $time = $self->first_date(shift @rcvd);
-    return $time if defined($time);
-  }
-  if (@local) {
-    $time = $self->first_date(@local);
-    return $time if defined($time);
-  }
-  if ($header =~ /^(?:From|X-From-Line:)\s+(.+)$/im) {
-    my $string = $1;
-    $string .= " ".$self->{tz} unless $string =~ /(?:[-+]\d{4}|\b[A-Z]{2,4}\b)/;
-    $time = $self->first_date($string);
-    return $time if defined($time);
-  }
-  if (@rcvd) {
-    $time = $self->first_date(@rcvd);
-    return $time if defined($time);
-  }
-  if ($header =~ /^Resent-Date:\s*(.+)$/im) {
-    $time = $self->first_date($1);
-    return $time if defined($time);
-  }
-  if ($header =~ /^Date:\s*(.+)$/im) {
-    $time = $self->first_date($1);
-    return $time if defined($time);
-  }
-
-  return time;
-}
-
-sub first_date {
-  my ($self, @strings) = @_;
-
-  foreach my $string (@strings) {
-    my $time = Mail::SpamAssassin::Util::parse_rfc822_date($string);
-    return $time if defined($time) && $time;
-  }
-  return undef;
 }
 
 1;

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm	Sun May  2 10:30:37 2004
@@ -804,6 +804,21 @@
 
 # ---------------------------------------------------------------------------
 
+=item receive_date()
+
+Return a time_t value with the received date of the current message,
+or current time if received time couldn't be determined.
+
+=cut
+
+sub receive_date {
+  my($self) = @_;
+
+  return Mail::SpamAssassin::Util::receive_date(scalar $self->get_all_headers(0,1));
+}
+
+# ---------------------------------------------------------------------------
+
 sub dbg { Mail::SpamAssassin::dbg (@_); }
 
 1;

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm	Sun May  2 10:30:37 2004
@@ -814,6 +814,72 @@
 
 ###########################################################################
 
+sub first_date {
+  my (@strings) = @_;
+
+  foreach my $string (@strings) {
+    my $time = parse_rfc822_date($string);
+    return $time if defined($time) && $time;
+  }
+  return undef;
+}
+
+sub receive_date {
+  my ($header) = @_;
+
+  $header ||= '';
+  $header =~ s/\n[ \t]+/ /gs;	# fix continuation lines
+
+  my @rcvd = ($header =~ /^Received:(.*)/img);
+  my @local;
+  my $time;
+
+  if (@rcvd) {
+    if ($rcvd[0] =~ /qmail \d+ invoked by uid \d+/ ||
+	$rcvd[0] =~ /\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/)
+    {
+      push @local, (shift @rcvd);
+    }
+    if (@rcvd && ($rcvd[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) {
+      push @local, (shift @rcvd);
+    }
+    elsif (@local) {
+      unshift @rcvd, (shift @local);
+    }
+  }
+
+  if (@rcvd) {
+    $time = first_date(shift @rcvd);
+    return $time if defined($time);
+  }
+  if (@local) {
+    $time = first_date(@local);
+    return $time if defined($time);
+  }
+  if ($header =~ /^(?:From|X-From-Line:)\s+(.+)$/im) {
+    my $string = $1;
+    $string .= " ".local_tz() unless $string =~ /(?:[-+]\d{4}|\b[A-Z]{2,4}\b)/;
+    $time = first_date($string);
+    return $time if defined($time);
+  }
+  if (@rcvd) {
+    $time = first_date(@rcvd);
+    return $time if defined($time);
+  }
+  if ($header =~ /^Resent-Date:\s*(.+)$/im) {
+    $time = first_date($1);
+    return $time if defined($time);
+  }
+  if ($header =~ /^Date:\s*(.+)$/im) {
+    $time = first_date($1);
+    return $time if defined($time);
+  }
+
+  return time;
+}
+
+###########################################################################
+
 sub dbg { Mail::SpamAssassin::dbg (@_); }
 
 1;

Modified: incubator/spamassassin/trunk/masses/mass-check
==============================================================================
--- incubator/spamassassin/trunk/masses/mass-check	(original)
+++ incubator/spamassassin/trunk/masses/mass-check	Sun May  2 10:30:37 2004
@@ -48,6 +48,7 @@
   --after=N     only test mails received after time_t N (negative values
                 are an offset from current time, e.g. -86400 = last day)
                 or after date as parsed by Time::ParseDate (e.g. '-6 months')
+  --before=N    same as --after, except received times are before time_t N
   --all         don't skip big messages
   --head=N      only check first N ham and N spam (N messages if -n used)
   --tail=N      only check last N ham and N spam (N messages if -n used)
@@ -75,7 +76,7 @@
 	    $opt_debug $opt_format $opt_hamlog $opt_head $opt_loghits
 	    $opt_mid $opt_mh $opt_ms $opt_net $opt_nosort $opt_progress
 	    $opt_showdots $opt_spamlog $opt_tail $opt_rules $opt_restart
-	    $opt_loguris $opt_after $opt_rewrite $opt_deencap);
+	    $opt_loguris $opt_after $opt_before $opt_rewrite $opt_deencap);
 
 use FindBin;
 use lib "$FindBin::Bin/../lib";
@@ -98,7 +99,7 @@
 GetOptions("c=s", "p=s", "f=s", "j=i", "n", "o", "all", "bayes", "debug",
 	   "hamlog=s", "head=i", "loghits", "mh", "mid", "ms", "net",
 	   "progress", "rewrite:s", "showdots", "spamlog=s", "tail=i",
-	   "rules=s", "restart=i", "after=s", "loguris", "deencap=s",
+	   "rules=s", "restart=i", "after=s", "before=s", "loguris", "deencap=s",
 	   "dir" => sub { $opt_format = "dir"; },
 	   "file" => sub { $opt_format = "file"; },
 	   "mbox" => sub { $opt_format = "mbox"; },
@@ -160,18 +161,25 @@
 my $ham_count = 0;
 my $init_results = 0;
 
-if ($opt_after && $opt_after =~ /^-\d+$/) {
-  $opt_after = time + $opt_after;
-}
-elsif ($opt_after && $opt_after !~ /^-?\d+$/) {
-  if (HAS_TIME_PARSEDATE) {
-    $opt_after = Time::ParseDate::parsedate($opt_after, GMT => 1, PREFER_PAST => 1);
+# Deal with --before and --after
+foreach my $time ($opt_before, $opt_after) {
+  if ($time && $time =~ /^-\d+$/) {
+    $time = time + $time;
   }
-  else { 
-    die "You need Time::ParseDate if you use the --after option.";
+  elsif ($time && $time !~ /^-?\d+$/) {
+    if (HAS_TIME_PARSEDATE) {
+      $time = Time::ParseDate::parsedate($time, GMT => 1, PREFER_PAST => 1);
+    }
+    else { 
+      die "You need Time::ParseDate if you use either the --before or --after option.";
+    }
   }
 }
 
+if ($opt_before && $opt_after && $opt_after >= $opt_before) {
+  die "--before ($opt_before) <= --after ($opt_after) -- conflict!";
+}
+
 my $iter = new Mail::SpamAssassin::ArchiveIterator({
 	'opt_j' => $opt_j,
 	'opt_n' => $opt_n,
@@ -179,6 +187,7 @@
 	'opt_head' => $opt_head,
 	'opt_tail' => $opt_tail,
 	'opt_after' => $opt_after,
+	'opt_before' => $opt_before,
 	'opt_restart' => $opt_restart,
 });