You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/02/01 22:14:40 UTC
svn commit: rev 6421 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin
Author: felicity
Date: Sun Feb 1 13:14:39 2004
New Revision: 6421
Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm
Log:
use 3 different states for ArchiveIterator, 1 for sa-learn (do everything in memory), 2 for mass-check: tempfile to keep memory use down, 1 process (no forking/etc), 2+ processes (forking, IPC, etc.)
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/ArchiveIterator.pm Sun Feb 1 13:14:39 2004
@@ -77,7 +77,7 @@
die "set_functions never called";
}
- if ($self->{opt_j} == 1) {
+ if ($self->{opt_j} == 0) { # sa-learn-style, all in memory, etc.
my $message;
my $class;
my $result;
@@ -92,18 +92,7 @@
&{$self->{result_sub}}($class, $result, $date) if $result;
}
}
- elsif ($self->{opt_j} > 1) {
- my $select = IO::Select->new();
-
- my $total_count = 0;
- my $needs_restart = 0;
- my @child = ();
- my @pid = ();
- my $messages;
-
- # Have some kids ...
- $self->start_children($self->{opt_j}, \@child, \@pid, $select);
-
+ else { # mass-check-style, keep minimum memory usage, allow fork(), etc.
my $tmpf;
($tmpf, $self->{messageh}) = Mail::SpamAssassin::Util::secure_tmpfile();
unlink $tmpf;
@@ -125,87 +114,114 @@
seek ($self->{messageh}, 0, 0);
$MESSAGES = $self->next_message();
- # feed childen, make them work for it, repeat.
- while ($select->count()) {
- foreach my $socket ($select->can_read()) {
- my $result = '';
- my $line;
- while ($line = readline $socket) {
- if ($line =~ /^RESULT (.+)$/) {
- my($class,$type,$date) = index_unpack($1);
- #warn ">> RESULT: $class, $type, $date\n";
-
- if (defined $self->{opt_restart} && ($total_count % $self->{opt_restart}) == 0) {
- $needs_restart = 1;
- }
-
- # if messages remain, and we don't need to restart, send a message
- if (($MESSAGES > $total_count) && !$needs_restart) {
- print { $socket } $self->next_message() . "\n";
- $total_count++;
- #warn ">> recv: $MESSAGES $total_count\n";
- }
- else {
- # stop listening on this child since we're done with it.
- #warn ">> removeresult: $needs_restart $MESSAGES $total_count\n";
- $select->remove($socket);
- }
+ if ($self->{opt_j} == 1) { # only one process
+ my $message;
+ my $class;
+ my $result;
+ my $messages;
+
+ while ($message = $self->next_message()) {
+ my ($class, undef, $date) = index_unpack($message);
+ $result = $self->run_message($message);
+ &{$self->{result_sub}}($class, $result, $date) if $result;
+ }
+ }
+ else { # more than one process
+ my $select = IO::Select->new();
- # Deal with the result we got.
- if ($result) {
- chop $result; # need to chop the \n before RESULT
- &{$self->{result_sub}}($class, $result, $date);
- }
+ my $total_count = 0;
+ my $needs_restart = 0;
+ my @child = ();
+ my @pid = ();
+ my $messages;
+
+ # Have some kids ...
+ $self->start_children($self->{opt_j}, \@child, \@pid, $select);
+
+ # feed childen, make them work for it, repeat.
+ while ($select->count()) {
+ foreach my $socket ($select->can_read()) {
+ my $result = '';
+ my $line;
+ while ($line = readline $socket) {
+ if ($line =~ /^RESULT (.+)$/) {
+ my($class,$type,$date) = index_unpack($1);
+ #warn ">> RESULT: $class, $type, $date\n";
+
+ if (defined $self->{opt_restart} && ($total_count % $self->{opt_restart}) == 0) {
+ $needs_restart = 1;
+ }
+
+ # if messages remain, and we don't need to restart, send a message
+ if (($MESSAGES > $total_count) && !$needs_restart) {
+ print { $socket } $self->next_message() . "\n";
+ $total_count++;
+ #warn ">> recv: $MESSAGES $total_count\n";
+ }
+ else {
+ # stop listening on this child since we're done with it.
+ #warn ">> removeresult: $needs_restart $MESSAGES $total_count\n";
+ $select->remove($socket);
+ }
+
+ # Deal with the result we got.
+ if ($result) {
+ chop $result; # need to chop the \n before RESULT
+ &{$self->{result_sub}}($class, $result, $date);
+ }
+
+ last; # this will get out of the read for this client
+ }
+ elsif ($line eq "START\n") {
+ if ($MESSAGES > $total_count) {
+ # we still have messages, send one to child
+ print { $socket } $self->next_message() . "\n";
+ $total_count++;
+ #warn ">> new: $MESSAGES $total_count\n";
+ }
+ else {
+ # no more messages, so stop listening on this child
+ #warn ">> removestart: $needs_restart $MESSAGES $total_count\n";
+ $select->remove($socket);
+ }
- last; # this will get out of the read for this client
- }
- elsif ($line eq "START\n") {
- if ($MESSAGES > $total_count) {
- # we still have messages, send one to child
- print { $socket } $self->next_message() . "\n";
- $total_count++;
- #warn ">> new: $MESSAGES $total_count\n";
+ last; # this will get out of the read for this client
}
else {
- # no more messages, so stop listening on this child
- #warn ">> removestart: $needs_restart $MESSAGES $total_count\n";
- $select->remove($socket);
+ # result line, remember it.
+ $result .= $line;
}
-
- last; # this will get out of the read for this client
}
- else {
- # result line, remember it.
- $result .= $line;
- }
- }
- # some error happened during the read!
- if (!defined $line || !$line) {
- $needs_restart = 1;
- warn "Got an undef from readline?!? Restarting all children, probably lost some results. :(\n";
- $select->remove($socket);
+ # some error happened during the read!
+ if (!defined $line || !$line) {
+ $needs_restart = 1;
+ warn "Got an undef from readline?!? Restarting all children, probably lost some results. :(\n";
+ $select->remove($socket);
+ }
}
- }
- #warn ">> out of loop, $MESSAGES $total_count $needs_restart ".$select->count()."\n";
+ #warn ">> out of loop, $MESSAGES $total_count $needs_restart ".$select->count()."\n";
- # If there are still messages to process, and we need to restart
- # the children, and all of the children are idle, let's go ahead.
- if ($needs_restart && $select->count() == 0 && ($MESSAGES > $total_count)) {
- $needs_restart = 0;
-
- #warn "debug: Needs restart, $MESSAGES total, $total_count done.\n";
- $self->reap_children($self->{opt_j}, \@child, \@pid);
- @child=();
- @pid=();
- $self->start_children($self->{opt_j}, \@child, \@pid, $select);
+ # If there are still messages to process, and we need to restart
+ # the children, and all of the children are idle, let's go ahead.
+ if ($needs_restart && $select->count() == 0 && ($MESSAGES > $total_count)) {
+ $needs_restart = 0;
+
+ #warn "debug: Needs restart, $MESSAGES total, $total_count done.\n";
+ $self->reap_children($self->{opt_j}, \@child, \@pid);
+ @child=();
+ @pid=();
+ $self->start_children($self->{opt_j}, \@child, \@pid, $select);
+ }
}
+
+ # reap children
+ $self->reap_children($self->{opt_j}, \@child, \@pid);
}
+ # Ok, get rid of the tempfile now ...
close($self->{messageh});
- # reap children
- $self->reap_children($self->{opt_j}, \@child, \@pid);
}
}
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/CmdLearn.pm Sun Feb 1 13:14:39 2004
@@ -237,7 +237,7 @@
}
my $iter = new Mail::SpamAssassin::ArchiveIterator ({
- 'opt_j' => 1,
+ 'opt_j' => 0,
'opt_n' => 1,
'opt_all' => 1,
});