You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/05/02 19:30:38 UTC
svn commit: rev 10488 - in incubator/spamassassin/trunk: lib/Mail/SpamAssassin masses
Author: felicity
Date: Sun May 2 10:30:37 2004
New Revision: 10488
Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
incubator/spamassassin/trunk/masses/mass-check
Log:
two things: move receive_date from Bayes and ArchiveIterator and put it in Util instead -- two copies of the same logic code makes no sense. add a --before option to mass-check, works in the same was as --after.
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm Sun May 2 10:30:37 2004
@@ -28,13 +28,7 @@
use constant BIG_BYTES => 256*1024; # 256k is a big email
use constant BIG_LINES => BIG_BYTES/65; # 65 bytes/line is a good approximation
-my $no;
-my $tz;
-
-BEGIN {
- $no = 1;
- $tz = local_tz();
-}
+my $no = 1;
use vars qw {
$MESSAGES
@@ -55,8 +49,6 @@
$self->{s} = { }; # spam, of course
$self->{h} = { }; # ham, as if you couldn't guess
- $self->{opt_after} ||= 0; # default to 0
-
$self;
}
@@ -414,79 +406,24 @@
return 1;
}
-sub first_date {
- my (@strings) = @_;
-
- foreach my $string (@strings) {
- my $time = Mail::SpamAssassin::Util::parse_rfc822_date($string);
- return $time if defined($time) && $time;
- }
- return undef;
-}
-
-sub receive_date {
- my ($self, $header) = @_;
-
- $header ||= '';
- $header =~ s/\n[ \t]+/ /gs; # fix continuation lines
-
- my @rcvd = ($header =~ /^Received:(.*)/img);
- my @local;
- my $time;
-
- if (@rcvd) {
- if ($rcvd[0] =~ /qmail \d+ invoked by uid \d+/ ||
- $rcvd[0] =~ /\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/)
- {
- push @local, (shift @rcvd);
- }
- if (@rcvd && ($rcvd[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) {
- push @local, (shift @rcvd);
- }
- elsif (@local) {
- unshift @rcvd, (shift @local);
- }
- }
-
- if (@rcvd) {
- $time = first_date(shift @rcvd);
- return $time if defined($time);
- }
- if (@local) {
- $time = first_date(@local);
- return $time if defined($time);
- }
- if ($header =~ /^(?:From|X-From-Line:)\s+(.+)$/im) {
- my $string = $1;
- $string .= " $tz" unless $string =~ /(?:[-+]\d{4}|\b[A-Z]{2,4}\b)/;
- $time = first_date($string);
- return $time if defined($time);
- }
- if (@rcvd) {
- $time = first_date(@rcvd);
- return $time if defined($time);
- }
- if ($header =~ /^Resent-Date:\s*(.+)$/im) {
- $time = first_date($1);
- return $time if defined($time);
- }
- if ($header =~ /^Date:\s*(.+)$/im) {
- $time = first_date($1);
- return $time if defined($time);
- }
-
- return time;
-}
-
############################################################################
sub message_is_useful_by_date {
my ($self, $date) = @_;
- return 1 unless $self->{opt_after}; # not using that feature
- return 0 unless $date; # undef or 0 date = unusable
+ return 0 unless $date; # undef or 0 date = unusable
- return $date > $self->{opt_after};
+ if (!$self->{opt_after} && !$self->{opt_before}) {
+ # Not using the feature
+ return 1;
+ }
+ elsif (!$self->{opt_before}) {
+ # Just case about after
+ return $date > $self->{opt_after};
+ }
+ else {
+ return (($date < $self->{opt_before}) && ($date > $self->{opt_after}));
+ }
}
############################################################################
@@ -531,7 +468,7 @@
$header .= $_;
}
close(INPUT);
- my $date = $self->receive_date($header);
+ my $date = Mail::SpamAssassin::Util::receive_date($header);
next if !$self->message_is_useful_by_date($date);
$self->{$class}->{index_pack($class, "f", $date, $mail)} = $date;
}
@@ -552,7 +489,7 @@
$header .= $_;
}
close(INPUT);
- my $date = $self->receive_date($header);
+ my $date = Mail::SpamAssassin::Util::receive_date($header);
return if !$self->message_is_useful_by_date($date);
$self->{$class}->{index_pack($class, "f", $date, $mail)} = $date;
}
@@ -615,7 +552,7 @@
$t = $no++;
}
else {
- $t = $self->receive_date($header);
+ $t = Mail::SpamAssassin::Util::receive_date($header);
next if !$self->message_is_useful_by_date($t);
}
$self->{$class}->{index_pack($class, "m", $t, "$file.$offset")} = $t;
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm Sun May 2 10:30:37 2004
@@ -801,7 +801,7 @@
$self->{store}->nspam_nham_change (0, 1);
}
- my $msgatime = $self->receive_date(scalar $msg->get_all_headers(0,1));
+ my $msgatime = $msg->receive_date();
# If the message atime comes back as being more than 1 day in the
# future, something's messed up and we should revert to current time as
@@ -1180,7 +1180,7 @@
# If the message atime comes back as being in the future, something's
# messed up and we should revert to current time as a safety measure.
#
- my $msgatime = $self->receive_date(scalar $msg->get_all_headers(0,1));
+ my $msgatime = $msg->receive_date();
my $now = time;
$msgatime = $now if ( $msgatime > $now );
@@ -1409,72 +1409,6 @@
$self->{store}->untie_db();
}
return 1;
-}
-
-# Stolen from Archive Iteraator ... Should probably end up in M::SA::Util
-# Modified to call first_date via $self->first_date()
-sub receive_date {
- my ($self, $header) = @_;
-
- $header ||= '';
- $header =~ s/\n[ \t]+/ /gs; # fix continuation lines
-
- my @rcvd = ($header =~ /^Received:(.*)/img);
- my @local;
- my $time;
-
- if (@rcvd) {
- if ($rcvd[0] =~ /qmail \d+ invoked by uid \d+/ ||
- $rcvd[0] =~ /\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/)
- {
- push @local, (shift @rcvd);
- }
- if (@rcvd && ($rcvd[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) {
- push @local, (shift @rcvd);
- }
- elsif (@local) {
- unshift @rcvd, (shift @local);
- }
- }
-
- if (@rcvd) {
- $time = $self->first_date(shift @rcvd);
- return $time if defined($time);
- }
- if (@local) {
- $time = $self->first_date(@local);
- return $time if defined($time);
- }
- if ($header =~ /^(?:From|X-From-Line:)\s+(.+)$/im) {
- my $string = $1;
- $string .= " ".$self->{tz} unless $string =~ /(?:[-+]\d{4}|\b[A-Z]{2,4}\b)/;
- $time = $self->first_date($string);
- return $time if defined($time);
- }
- if (@rcvd) {
- $time = $self->first_date(@rcvd);
- return $time if defined($time);
- }
- if ($header =~ /^Resent-Date:\s*(.+)$/im) {
- $time = $self->first_date($1);
- return $time if defined($time);
- }
- if ($header =~ /^Date:\s*(.+)$/im) {
- $time = $self->first_date($1);
- return $time if defined($time);
- }
-
- return time;
-}
-
-sub first_date {
- my ($self, @strings) = @_;
-
- foreach my $string (@strings) {
- my $time = Mail::SpamAssassin::Util::parse_rfc822_date($string);
- return $time if defined($time) && $time;
- }
- return undef;
}
1;
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Sun May 2 10:30:37 2004
@@ -804,6 +804,21 @@
# ---------------------------------------------------------------------------
+=item receive_date()
+
+Return a time_t value with the received date of the current message,
+or current time if received time couldn't be determined.
+
+=cut
+
+sub receive_date {
+ my($self) = @_;
+
+ return Mail::SpamAssassin::Util::receive_date(scalar $self->get_all_headers(0,1));
+}
+
+# ---------------------------------------------------------------------------
+
sub dbg { Mail::SpamAssassin::dbg (@_); }
1;
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Sun May 2 10:30:37 2004
@@ -814,6 +814,72 @@
###########################################################################
+sub first_date {
+ my (@strings) = @_;
+
+ foreach my $string (@strings) {
+ my $time = parse_rfc822_date($string);
+ return $time if defined($time) && $time;
+ }
+ return undef;
+}
+
+sub receive_date {
+ my ($header) = @_;
+
+ $header ||= '';
+ $header =~ s/\n[ \t]+/ /gs; # fix continuation lines
+
+ my @rcvd = ($header =~ /^Received:(.*)/img);
+ my @local;
+ my $time;
+
+ if (@rcvd) {
+ if ($rcvd[0] =~ /qmail \d+ invoked by uid \d+/ ||
+ $rcvd[0] =~ /\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/)
+ {
+ push @local, (shift @rcvd);
+ }
+ if (@rcvd && ($rcvd[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) {
+ push @local, (shift @rcvd);
+ }
+ elsif (@local) {
+ unshift @rcvd, (shift @local);
+ }
+ }
+
+ if (@rcvd) {
+ $time = first_date(shift @rcvd);
+ return $time if defined($time);
+ }
+ if (@local) {
+ $time = first_date(@local);
+ return $time if defined($time);
+ }
+ if ($header =~ /^(?:From|X-From-Line:)\s+(.+)$/im) {
+ my $string = $1;
+ $string .= " ".local_tz() unless $string =~ /(?:[-+]\d{4}|\b[A-Z]{2,4}\b)/;
+ $time = first_date($string);
+ return $time if defined($time);
+ }
+ if (@rcvd) {
+ $time = first_date(@rcvd);
+ return $time if defined($time);
+ }
+ if ($header =~ /^Resent-Date:\s*(.+)$/im) {
+ $time = first_date($1);
+ return $time if defined($time);
+ }
+ if ($header =~ /^Date:\s*(.+)$/im) {
+ $time = first_date($1);
+ return $time if defined($time);
+ }
+
+ return time;
+}
+
+###########################################################################
+
sub dbg { Mail::SpamAssassin::dbg (@_); }
1;
Modified: incubator/spamassassin/trunk/masses/mass-check
==============================================================================
--- incubator/spamassassin/trunk/masses/mass-check (original)
+++ incubator/spamassassin/trunk/masses/mass-check Sun May 2 10:30:37 2004
@@ -48,6 +48,7 @@
--after=N only test mails received after time_t N (negative values
are an offset from current time, e.g. -86400 = last day)
or after date as parsed by Time::ParseDate (e.g. '-6 months')
+ --before=N same as --after, except received times are before time_t N
--all don't skip big messages
--head=N only check first N ham and N spam (N messages if -n used)
--tail=N only check last N ham and N spam (N messages if -n used)
@@ -75,7 +76,7 @@
$opt_debug $opt_format $opt_hamlog $opt_head $opt_loghits
$opt_mid $opt_mh $opt_ms $opt_net $opt_nosort $opt_progress
$opt_showdots $opt_spamlog $opt_tail $opt_rules $opt_restart
- $opt_loguris $opt_after $opt_rewrite $opt_deencap);
+ $opt_loguris $opt_after $opt_before $opt_rewrite $opt_deencap);
use FindBin;
use lib "$FindBin::Bin/../lib";
@@ -98,7 +99,7 @@
GetOptions("c=s", "p=s", "f=s", "j=i", "n", "o", "all", "bayes", "debug",
"hamlog=s", "head=i", "loghits", "mh", "mid", "ms", "net",
"progress", "rewrite:s", "showdots", "spamlog=s", "tail=i",
- "rules=s", "restart=i", "after=s", "loguris", "deencap=s",
+ "rules=s", "restart=i", "after=s", "before=s", "loguris", "deencap=s",
"dir" => sub { $opt_format = "dir"; },
"file" => sub { $opt_format = "file"; },
"mbox" => sub { $opt_format = "mbox"; },
@@ -160,18 +161,25 @@
my $ham_count = 0;
my $init_results = 0;
-if ($opt_after && $opt_after =~ /^-\d+$/) {
- $opt_after = time + $opt_after;
-}
-elsif ($opt_after && $opt_after !~ /^-?\d+$/) {
- if (HAS_TIME_PARSEDATE) {
- $opt_after = Time::ParseDate::parsedate($opt_after, GMT => 1, PREFER_PAST => 1);
+# Deal with --before and --after
+foreach my $time ($opt_before, $opt_after) {
+ if ($time && $time =~ /^-\d+$/) {
+ $time = time + $time;
}
- else {
- die "You need Time::ParseDate if you use the --after option.";
+ elsif ($time && $time !~ /^-?\d+$/) {
+ if (HAS_TIME_PARSEDATE) {
+ $time = Time::ParseDate::parsedate($time, GMT => 1, PREFER_PAST => 1);
+ }
+ else {
+ die "You need Time::ParseDate if you use either the --before or --after option.";
+ }
}
}
+if ($opt_before && $opt_after && $opt_after >= $opt_before) {
+ die "--before ($opt_before) <= --after ($opt_after) -- conflict!";
+}
+
my $iter = new Mail::SpamAssassin::ArchiveIterator({
'opt_j' => $opt_j,
'opt_n' => $opt_n,
@@ -179,6 +187,7 @@
'opt_head' => $opt_head,
'opt_tail' => $opt_tail,
'opt_after' => $opt_after,
+ 'opt_before' => $opt_before,
'opt_restart' => $opt_restart,
});