You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2018/10/18 14:52:58 UTC
svn commit: r1844251 - in /spamassassin/trunk/lib/Mail/SpamAssassin:
BayesStore/BDB.pm Message.pm NetSet.pm PerMsgStatus.pm Plugin/DCC.pm
Plugin/Pyzor.pm Plugin/Razor2.pm Plugin/Shortcircuit.pm
Author: hege
Date: Thu Oct 18 14:52:58 2018
New Revision: 1844251
URL: http://svn.apache.org/viewvc?rev=1844251&view=rev
Log:
Fork fixes for Pyzor/Razor2/DCC, handle shortcircuiting better, improve fulltext_tmpfile handling.
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/BDB.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
spamassassin/trunk/lib/Mail/SpamAssassin/NetSet.pm
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DCC.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Pyzor.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/BDB.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/BDB.pm?rev=1844251&r1=1844250&r2=1844251&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/BDB.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/BDB.pm Thu Oct 18 14:52:58 2018
@@ -78,9 +78,6 @@ sub new {
sub DESTROY {
my $self = shift;
- # Ignore exiting helper processes (razor_fork etc)
- return if defined $ENV{'IS_FORKED_HELPER_PROCESS'};
-
$self->_close_db;
}
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm?rev=1844251&r1=1844250&r2=1844251&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message.pm Thu Oct 18 14:52:58 2018
@@ -680,9 +680,6 @@ sub finish {
sub DESTROY {
my $self = shift;
- # Ignore exiting helper processes (razor_fork etc)
- return if defined $ENV{'IS_FORKED_HELPER_PROCESS'};
-
# best practices: prevent potential calls to eval and to system routines
# in code of a DESTROY method from clobbering global variables $@ and $!
local($@,$!); # keep outer error handling unaffected by DESTROY
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/NetSet.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/NetSet.pm?rev=1844251&r1=1844250&r2=1844251&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/NetSet.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/NetSet.pm Thu Oct 18 14:52:58 2018
@@ -60,9 +60,6 @@ sub new {
sub DESTROY {
my($self) = shift;
- # Ignore exiting helper processes (razor_fork etc)
- return if defined $ENV{'IS_FORKED_HELPER_PROCESS'};
-
if (exists $self->{cache}) {
local($@, $!, $_); # protect outer layers from a potential surprise
my($hits, $attempts) = ($self->{cache_hits}, $self->{cache_attempts});
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1844251&r1=1844250&r2=1844251&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Thu Oct 18 14:52:58 2018
@@ -270,6 +270,7 @@ sub new {
'async' => Mail::SpamAssassin::AsyncLoop->new($main),
'master_deadline' => $msg->{master_deadline}, # dflt inherited from msg
'deadline_exceeded' => 0, # time limit exceeded, skipping further tests
+ 'tmpfiles' => { },
};
#$self->{main}->{use_rule_subs} = 1;
@@ -310,11 +311,13 @@ sub new {
sub DESTROY {
my ($self) = shift;
- # Ignore exiting helper processes (razor_fork etc)
- return if defined $ENV{'IS_FORKED_HELPER_PROCESS'};
-
- local $@;
- eval { $self->delete_fulltext_tmpfile() }; # Bug 5808
+ # best practices: prevent potential calls to eval and to system routines
+ # in code of a DESTROY method from clobbering global variables $@ and $!
+ local($@,$!); # keep outer error handling unaffected by DESTROY
+ # Bug 5808 - cleanup tmpfiles
+ foreach my $fn (keys %{$self->{tmpfiles}}) {
+ unlink($fn) or dbg("check: cannot unlink $fn: $!");
+ }
}
###########################################################################
@@ -2983,27 +2986,32 @@ sub sa_die { Mail::SpamAssassin::sa_die(
=item $status->create_fulltext_tmpfile (fulltext_ref)
This function creates a temporary file containing the passed scalar
-reference data (typically the full/pristine text of the message).
-This is typically used by external programs like pyzor and dccproc, to
-avoid hangs due to buffering issues. Methods that need this, should
-call $self->create_fulltext_tmpfile($fulltext) to retrieve the temporary
-filename; it will be created if it has not already been.
+reference data. If no scalar is passed, full/pristine message text is
+assumed. This is typically used by external programs like pyzor and
+dccproc, to avoid hangs due to buffering issues.
-Note: This can only be called once until $status->delete_fulltext_tmpfile() is
-called.
+All tempfiles are automatically cleaned up by PerMsgStatus destructor.
=cut
sub create_fulltext_tmpfile {
my ($self, $fulltext) = @_;
- if (defined $self->{fulltext_tmpfile}) {
- return $self->{fulltext_tmpfile};
+ my $pristine;
+ if (!defined $fulltext) {
+ if (defined $self->{fulltext_tmpfile}) {
+ return $self->{fulltext_tmpfile};
+ }
+ $fulltext = \$self->{msg}->get_pristine();
+ $pristine = 1;
}
my ($tmpf, $tmpfh) = Mail::SpamAssassin::Util::secure_tmpfile();
$tmpfh or die "failed to create a temporary file";
+ # record all created files so we can remove on DESTROY
+ $self->{tmpfiles}->{$tmpf} = 1;
+
# PerlIO's buffered print writes in 8 kB chunks - which can be slow.
# print $tmpfh $$fulltext or die "error writing to $tmpf: $!";
#
@@ -3016,30 +3024,33 @@ sub create_fulltext_tmpfile {
}
close $tmpfh or die "error closing $tmpf: $!";
- $self->{fulltext_tmpfile} = $tmpf;
+ $self->{fulltext_tmpfile} = $tmpf if $pristine;
dbg("check: create_fulltext_tmpfile, written %d bytes to file %s",
length($$fulltext), $tmpf);
- return $self->{fulltext_tmpfile};
+ return $tmpf;
}
-=item $status->delete_fulltext_tmpfile ()
+=item $status->delete_fulltext_tmpfile (tmpfile)
Will cleanup after a $status->create_fulltext_tmpfile() call. Deletes the
-temporary file and uncaches the filename.
+temporary file and uncaches the filename. Generally there no need to call
+this, PerMsgStatus destructor cleans up all tmpfiles.
=cut
sub delete_fulltext_tmpfile {
- my ($self) = @_;
- if (defined $self->{fulltext_tmpfile}) {
- if (!unlink $self->{fulltext_tmpfile}) {
- my $msg = sprintf("cannot unlink %s: %s", $self->{fulltext_tmpfile}, $!);
- # don't fuss too much if file is missing, perhaps it wasn't even created
- if ($! == ENOENT) { warn $msg } else { die $msg }
+ my ($self, $tmpfile) = @_;
+
+ $tmpfile = $self->{fulltext_tmpfile} if !defined $tmpfile;
+ if (defined $tmpfile && $self->{tmpfiles}->{$tmpfile}) {
+ unlink($tmpfile) or dbg("cannot unlink $tmpfile: $!");
+ if ($self->{fulltext_tmpfile} &&
+ $tmpfile eq $self->{fulltext_tmpfile}) {
+ delete $self->{fulltext_tmpfile};
}
- $self->{fulltext_tmpfile} = undef;
+ delete $self->{tmpfiles}->{$tmpfile};
}
}
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DCC.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DCC.pm?rev=1844251&r1=1844250&r2=1844251&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DCC.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DCC.pm Thu Oct 18 14:52:58 2018
@@ -671,8 +671,10 @@ sub _check_async {
my $timer = $self->{main}->time_method("check_dcc");
- if ($pms->{deadline_exceeded}) {
- $timeout = 1;
+ $pms->{dcc_abort} = $pms->{deadline_exceeded} || $pms->{shortcircuited};
+
+ if ($pms->{dcc_abort}) {
+ $timeout = 0;
} elsif ($timeout) {
# Calculate how much time left from original timeout
$timeout = $self->{main}->{conf}->{dcc_timeout} -
@@ -714,9 +716,14 @@ sub _check_async {
info("dcc: empty response from dccifd?");
}
}
+ } elsif ($pms->{dcc_abort}) {
+ dbg("dcc: bailing out due to deadline/shortcircuit");
+ delete $pms->{dcc_sock};
+ delete $pms->{dcc_range_callbacks};
} elsif ($timeout) {
dbg("dcc: no response from dccifd, timed out");
delete $pms->{dcc_sock};
+ delete $pms->{dcc_range_callbacks};
} else {
dbg("dcc: still waiting for dccifd response");
}
@@ -803,6 +810,7 @@ sub check_dcc_reputation_range {
return 0 if $self->{dcc_disabled};
return 0 if !$self->{main}->{conf}->{use_dcc};
return 0 if !$self->{main}->{conf}->{use_dcc_rep};
+ return 0 if $pms->{dcc_abort};
# Check if callback overriding rulename
my $cb;
@@ -1029,7 +1037,7 @@ sub ask_dcc {
local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" };
# use a temp file -- open2() is unreliable, buffering-wise, under spamd
- my $tmpf = $pms->create_fulltext_tmpfile($fulltext);
+ my $tmpf = $pms->create_fulltext_tmpfile();
my @opts = split(/\s+/, $conf->{dcc_options} || '');
untaint_var(\@opts);
@@ -1090,8 +1098,6 @@ sub ask_dcc {
$pid, exit_status_str($?,$errno));
}
- $pms->delete_fulltext_tmpfile();
-
$pms->leave_helper_run_mode();
if ($timer->timed_out()) {
@@ -1123,6 +1129,9 @@ sub check_post_learn {
return if $self->{dcc_disabled};
return if !$self->{main}->{conf}->{use_dcc};
+ my $pms = $opts->{permsgstatus};
+ return if $pms->{dcc_abort};
+
# learn only if allowed
my $conf = $self->{main}->{conf};
my $learn_score = $conf->{dcc_learn_score};
@@ -1134,7 +1143,6 @@ sub check_post_learn {
# and if SpamAssassin concluded that the message is spam
# worse than our threshold
- my $pms = $opts->{permsgstatus};
if ($pms->is_spam()) {
my $score = $pms->get_score();
my $required_score = $pms->get_required_score();
@@ -1233,7 +1241,7 @@ sub dccsight_learn {
$pid, exit_status_str($?,$errno));
}
- $pms->delete_fulltext_tmpfile();
+ $pms->delete_fulltext_tmpfile($tmpf);
$pms->leave_helper_run_mode();
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Pyzor.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Pyzor.pm?rev=1844251&r1=1844250&r2=1844251&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Pyzor.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Pyzor.pm Thu Oct 18 14:52:58 2018
@@ -307,10 +307,13 @@ sub check_pyzor {
# initialize valid tags
$pms->{tag_data}->{PYZOR} = '';
+ # create fulltext tmpfile now (before possible forking)
+ $pms->{pyzor_tmpfile} = $pms->create_fulltext_tmpfile();
+
## non-forking method
if (!$self->{main}->{conf}->{pyzor_fork}) {
- my @results = $self->pyzor_lookup($pms, $full);
+ my @results = $self->pyzor_lookup($pms);
return $self->_check_result($pms, \@results);
}
@@ -329,26 +332,39 @@ sub check_pyzor {
};
my $pid = fork();
+ if (!defined $pid) {
+ info("pyzor: child fork failed: $!");
+ delete $pms->{pyzor_backchannel};
+ return 0;
+ }
if (!$pid) {
- # Must be set so DESTROY blocks etc can ignore us when exiting
- $ENV{'IS_FORKED_HELPER_PROCESS'} = 1;
+ $0 = "$0 (pyzor)";
+ $SIG{CHLD} = 'DEFAULT';
+ $SIG{PIPE} = 'IGNORE';
+ $SIG{$_} = sub {
+ eval { dbg("pyzor: child process $$ caught signal $_[0]"); };
+ _exit(6); # avoid END and destructor processing
+ kill('KILL',$$); # still kicking? die!
+ } foreach qw(INT HUP TERM TSTP QUIT USR1 USR2);
dbg("pyzor: child process $$ forked");
$pms->{pyzor_backchannel}->setup_backchannel_child_post_fork();
- my @results = $self->pyzor_lookup($pms, $full);
+ my @results = $self->pyzor_lookup($pms);
my $backmsg;
eval {
$backmsg = Storable::freeze(\@results);
};
if ($@) {
dbg("pyzor: child return value freeze failed: $@");
- _exit(0); # prevent touching filehandles as per perlfork(1)
+ _exit(0); # avoid END and destructor processing
}
if (!syswrite($pms->{pyzor_backchannel}->{parent}, $backmsg)) {
dbg("pyzor: child backchannel write failed: $!");
}
- _exit(0); # prevent touching filehandles as per perlfork(1)
+ _exit(0); # avoid END and destructor processing
}
+ $pms->{pyzor_pid} = $pid;
+
eval {
$pms->{pyzor_backchannel}->setup_backchannel_parent_post_fork($pid);
} or do {
@@ -361,7 +377,7 @@ sub check_pyzor {
}
sub pyzor_lookup {
- my ($self, $pms, $fulltext) = @_;
+ my ($self, $pms) = @_;
my $conf = $self->{main}->{conf};
my $timeout = $conf->{pyzor_timeout};
@@ -370,9 +386,6 @@ sub pyzor_lookup {
my $path = untaint_file_path($conf->{pyzor_path});
my $opts = untaint_var($conf->{pyzor_options}) || '';
- # use a temp file here -- open2() is unreliable, buffering-wise, under spamd
- my $tmpf = $pms->create_fulltext_tmpfile($fulltext);
-
$pms->enter_helper_run_mode();
my $pid;
@@ -382,10 +395,11 @@ sub pyzor_lookup {
my $err = $timer->run_and_catch(sub {
local $SIG{PIPE} = sub { die "__brokenpipe__ignore__\n" };
- dbg("pyzor: opening pipe: " . join(' ', $path, $opts, "check", "<$tmpf"));
+ dbg("pyzor: opening pipe: ".
+ join(' ', $path, $opts, "check", "<".$pms->{pyzor_tmpfile}));
$pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*PYZOR,
- $tmpf, 1, $path, split(' ', $opts), "check");
+ $pms->{pyzor_tmpfile}, 1, $path, split(' ', $opts), "check");
$pid or die "$!\n";
# read+split avoids a Perl I/O bug (Bug 5985)
@@ -421,8 +435,6 @@ sub pyzor_lookup {
or info("pyzor: [%s] error: %s", $pid, exit_status_str($?, $errno));
}
- $pms->delete_fulltext_tmpfile();
-
$pms->leave_helper_run_mode();
if ($timer->timed_out()) {
@@ -451,14 +463,31 @@ sub _check_forked_result {
my ($self, $pms, $finish) = @_;
return 0 if !$pms->{pyzor_backchannel};
+ return 0 if !$pms->{pyzor_pid};
my $timer = $self->{main}->time_method("check_pyzor");
- my $kid_pid = (keys %{$pms->{pyzor_backchannel}->{kids}})[0];
+ $pms->{pyzor_abort} = $pms->{deadline_exceeded} || $pms->{shortcircuited};
+
+ my $kid_pid = $pms->{pyzor_pid};
# if $finish, force waiting for the child
- my $pid = waitpid($kid_pid, $finish ? 0 : WNOHANG);
+ my $pid = waitpid($kid_pid, $finish && !$pms->{pyzor_abort} ? 0 : WNOHANG);
if ($pid == 0) {
#dbg("pyzor: child process $kid_pid not finished yet, trying later");
+ if ($pms->{pyzor_abort}) {
+ dbg("pyzor: bailing out due to deadline/shortcircuit");
+ kill('TERM', $kid_pid);
+ if (waitpid($kid_pid, WNOHANG) == 0) {
+ sleep(1);
+ if (waitpid($kid_pid, WNOHANG) == 0) {
+ dbg("pyzor: child process $kid_pid still alive, KILL");
+ kill('KILL', $kid_pid);
+ waitpid($kid_pid, 0);
+ }
+ }
+ delete $pms->{pyzor_pid};
+ delete $pms->{pyzor_backchannel};
+ }
return 0;
} elsif ($pid == -1) {
# child does not exist?
@@ -573,7 +602,7 @@ sub plugin_report {
else {
info("reporter: could not report spam to Pyzor");
}
- $options->{report}->delete_fulltext_tmpfile();
+ $options->{report}->delete_fulltext_tmpfile($tmpf);
return 1;
}
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm?rev=1844251&r1=1844250&r2=1844251&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm Thu Oct 18 14:52:58 2018
@@ -457,9 +457,20 @@ sub check_razor2 {
};
my $pid = fork();
+ if (!defined $pid) {
+ info("razor2: child fork failed: $!");
+ delete $pms->{razor2_backchannel};
+ return 0;
+ }
if (!$pid) {
- # Must be set so DESTROY blocks etc can ignore us when exiting
- $ENV{'IS_FORKED_HELPER_PROCESS'} = 1;
+ $0 = "$0 (razor2)";
+ $SIG{CHLD} = 'DEFAULT';
+ $SIG{PIPE} = 'IGNORE';
+ $SIG{$_} = sub {
+ eval { dbg("razor2: child process $$ caught signal $_[0]"); };
+ _exit(6); # avoid END and destructor processing
+ kill('KILL',$$); # still kicking? die!
+ } foreach qw(INT HUP TERM TSTP QUIT USR1 USR2);
dbg("razor2: child process $$ forked");
$pms->{razor2_backchannel}->setup_backchannel_child_post_fork();
(undef, my @results) =
@@ -470,14 +481,16 @@ sub check_razor2 {
};
if ($@) {
dbg("razor2: child return value freeze failed: $@");
- _exit(0); # prevent touching filehandles as per perlfork(1)
+ _exit(0); # avoid END and destructor processing
}
if (!syswrite($pms->{razor2_backchannel}->{parent}, $backmsg)) {
dbg("razor2: child backchannel write failed: $!");
}
- _exit(0); # prevent touching filehandles as per perlfork(1)
+ _exit(0); # avoid END and destructor processing
}
+ $pms->{razor2_pid} = $pid;
+
eval {
$pms->{razor2_backchannel}->setup_backchannel_parent_post_fork($pid);
} or do {
@@ -503,14 +516,31 @@ sub _check_forked_result {
my ($self, $pms, $finish) = @_;
return 0 if !$pms->{razor2_backchannel};
+ return 0 if !$pms->{razor2_pid};
my $timer = $self->{main}->time_method("check_razor2");
- my $kid_pid = (keys %{$pms->{razor2_backchannel}->{kids}})[0];
+ $pms->{razor2_abort} = $pms->{deadline_exceeded} || $pms->{shortcircuited};
+
+ my $kid_pid = $pms->{razor2_pid};
# if $finish, force waiting for the child
- my $pid = waitpid($kid_pid, $finish ? 0 : WNOHANG);
+ my $pid = waitpid($kid_pid, $finish && !$pms->{razor2_abort} ? 0 : WNOHANG);
if ($pid == 0) {
#dbg("razor2: child process $kid_pid not finished yet, trying later");
+ if ($pms->{razor2_abort}) {
+ dbg("razor2: bailing out due to deadline/shortcircuit");
+ kill('TERM', $kid_pid);
+ if (waitpid($kid_pid, WNOHANG) == 0) {
+ sleep(1);
+ if (waitpid($kid_pid, WNOHANG) == 0) {
+ dbg("razor2: child process $kid_pid still alive, KILL");
+ kill('KILL', $kid_pid);
+ waitpid($kid_pid, 0);
+ }
+ }
+ delete $pms->{razor2_pid};
+ delete $pms->{razor2_backchannel};
+ }
return 0;
} elsif ($pid == -1) {
# child does not exist?
@@ -605,6 +635,7 @@ sub check_razor2_range {
# continue.
return unless $self->{razor2_available};
return unless $self->{main}->{conf}->{use_razor2};
+ return if $pms->{razor2_abort};
# Check if callback overriding rulename
if (!defined $rulename) {
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm?rev=1844251&r1=1844250&r2=1844251&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm Thu Oct 18 14:52:58 2018
@@ -256,6 +256,8 @@ sub hit_rule {
$scscore = $score;
}
+ $scan->{shortcircuited} = 1;
+
# bug 5256: if we short-circuit, don't do auto-learning
$scan->{disable_auto_learning} = 1;
$scan->got_hit('SHORTCIRCUIT', '', score => $scscore);