You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2018/10/01 13:20:03 UTC
svn commit: r1842494 - in /spamassassin/trunk:
lib/Mail/SpamAssassin/Plugin/Razor2.pm t/razor2.t
Author: hege
Date: Mon Oct 1 13:20:03 2018
New Revision: 1842494
URL: http://svn.apache.org/viewvc?rev=1842494&view=rev
Log:
Add razor_fork option for async check
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm
spamassassin/trunk/t/razor2.t
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm?rev=1842494&r1=1842493&r2=1842494&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm Mon Oct 1 13:20:03 2018
@@ -43,11 +43,15 @@ package Mail::SpamAssassin::Plugin::Razo
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Logger;
use Mail::SpamAssassin::Timeout;
+use Mail::SpamAssassin::SubProcBackChannel;
use strict;
use warnings;
# use bytes;
use re 'taint';
+use Storable;
+use POSIX qw(PIPE_BUF WNOHANG);
+
our @ISA = qw(Mail::SpamAssassin::Plugin);
sub new {
@@ -97,6 +101,20 @@ Whether to use Razor2, if it is availabl
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
});
+=item razor_fork (0|1) (default: 0)
+
+Instead of running Razor2 synchronously, fork separate process for it and
+read the results in later (similar to async DNS lookups). Increases
+throughput. Experimental.
+
+=cut
+
+ push(@cmds, {
+ setting => 'razor_fork',
+ default => 0,
+ type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
+ });
+
=back
=head1 ADMINISTRATOR SETTINGS
@@ -376,33 +394,135 @@ sub plugin_revoke {
}
sub check_razor2 {
- my ($self, $permsgstatus, $full) = @_;
+ my ($self, $pms, $full) = @_;
- return $permsgstatus->{razor2_result} if (defined $permsgstatus->{razor2_result});
- $permsgstatus->{razor2_result} = 0;
- $permsgstatus->{razor2_cf_score} = { '4' => 0, '8' => 0 };
+ return 0 unless $self->{razor2_available};
+ return 0 unless $self->{main}->{conf}->{use_razor2};
- return unless $self->{razor2_available};
- return unless $self->{main}->{conf}->{use_razor2};
+ return $pms->{razor2_result} if (defined $pms->{razor2_result});
- my $timer = $self->{main}->time_method("check_razor2");
+ return 0 if $pms->{razor2_running};
+ $pms->{razor2_running} = 1;
+
+ ## non-forking method
+
+ if (!$self->{main}->{conf}->{razor_fork}) {
+ my $timer = $self->{main}->time_method("check_razor2");
+ # TODO: check for cache header, set results appropriately
+ # do it this way to make it easier to get out the results later from the
+ # netcache plugin ... what netcache plugin?
+ (undef, my @results) =
+ $self->razor2_access($full, 'check', $pms->{master_deadline});
+ return $self->_check_result($pms, \@results);
+ }
+
+ ## forking method
+
+ $pms->{razor2_rulename} = $pms->get_current_eval_rule_name();
+
+ # create socketpair for communication
+ $pms->{razor2_backchannel} = Mail::SpamAssassin::SubProcBackChannel->new();
+ eval {
+ $pms->{razor2_backchannel}->setup_backchannel_parent_pre_fork();
+ } or do {
+ dbg("razor2: backchannel pre-setup failed: $@");
+ delete $pms->{razor2_backchannel};
+ return 0;
+ };
+
+ my $pid = fork();
+ if (!$pid) {
+ dbg("razor2: child process $$ forked");
+ $pms->{razor2_backchannel}->setup_backchannel_child_post_fork();
+ (undef, my @results) =
+ $self->razor2_access($full, 'check', $pms->{master_deadline});
+ my $backmsg;
+ eval {
+ $backmsg = Storable::freeze(\@results);
+ };
+ if ($@) {
+ dbg("razor2: child return value freeze failed: $@");
+ exit;
+ }
+ if (!syswrite($pms->{razor2_backchannel}->{parent}, $backmsg)) {
+ dbg("razor2: child backchannel write failed: $!");
+ }
+ exit;
+ }
- my $return;
- my @results;
+ eval {
+ $pms->{razor2_backchannel}->setup_backchannel_parent_post_fork($pid);
+ } or do {
+ dbg("razor2: backchannel post-setup failed: $@");
+ delete $pms->{razor2_backchannel};
+ return 0;
+ };
+
+ return 0;
+}
+
+sub check_tick {
+ my ($self, $opts) = @_;
+ $self->_check_forked_result($opts->{permsgstatus}, 0);
+}
- # TODO: check for cache header, set results appropriately
+sub check_cleanup {
+ my ($self, $opts) = @_;
+ $self->_check_forked_result($opts->{permsgstatus}, 1);
+}
+
+sub _check_forked_result {
+ my ($self, $pms, $finish) = @_;
+
+ return 0 if !$pms->{razor2_backchannel};
+
+ my $kid_pid = (keys %{$pms->{razor2_backchannel}->{kids}})[0];
+ # if $finish, force waiting for the child
+ my $pid = waitpid($kid_pid, $finish ? 0 : WNOHANG);
+ if ($pid == 0) {
+ dbg("razor2: child process $kid_pid not finished yet, trying later");
+ return 0;
+ } elsif ($pid == -1) {
+ # child does not exist?
+ dbg("razor2: child process $kid_pid already handled?");
+ delete $pms->{razor2_backchannel};
+ return 0;
+ }
+
+ dbg("razor2: child process $kid_pid finished, reading results");
+
+ my $backmsg;
+ my $ret = sysread($pms->{razor2_backchannel}->{latest_kid_fh}, $backmsg, PIPE_BUF);
+ if (!defined $ret || $ret == 0) {
+ dbg("razor2: could not read result from child: ".($ret == 0 ? 0 : $!));
+ delete $pms->{razor2_backchannel};
+ return 0;
+ }
+
+ delete $pms->{razor2_backchannel};
+
+ my $results;
+ eval {
+ $results = Storable::thaw($backmsg);
+ };
+ if ($@) {
+ dbg("razor2: child return value thaw failed: $@");
+ return;
+ }
+
+ $self->_check_result($pms, $results);
+}
+
+sub _check_result {
+ my ($self, $pms, $results) = @_;
- # do it this way to make it easier to get out the results later from the
- # netcache plugin
- ($return, @results) =
- $self->razor2_access($full, 'check', $permsgstatus->{master_deadline});
$self->{main}->call_plugins ('process_razor_result',
- { results => \@results, permsgstatus => $permsgstatus }
+ { results => $results, permsgstatus => $pms }
);
- foreach my $result (@results) {
+ foreach my $result (@$results) {
if (exists $result->{result}) {
- $permsgstatus->{razor2_result} = $result->{result} if $result->{result};
+ $pms->{razor2_result} = $result->{result} if $result->{result};
}
elsif ($result->{noresponse}) {
dbg('razor2: part=' . $result->{part} . ' noresponse');
@@ -415,46 +535,73 @@ sub check_razor2 {
next if $result->{contested};
- my $cf = $permsgstatus->{razor2_cf_score}->{$result->{engine}} || 0;
+ my $cf = $pms->{razor2_cf_score}->{$result->{engine}} || 0;
if ($result->{confidence} > $cf) {
- $permsgstatus->{razor2_cf_score}->{$result->{engine}} = $result->{confidence};
+ $pms->{razor2_cf_score}->{$result->{engine}} = $result->{confidence};
}
}
}
- dbg("razor2: results: spam? " . $permsgstatus->{razor2_result});
- while(my ($engine, $cf) = each %{$permsgstatus->{razor2_cf_score}}) {
+ $pms->{razor2_result} ||= 0;
+ $pms->{razor2_cf_score} ||= {};
+
+ dbg("razor2: results: spam? " . $pms->{razor2_result});
+ while(my ($engine, $cf) = each %{$pms->{razor2_cf_score}}) {
dbg("razor2: results: engine $engine, highest cf score: $cf");
}
- return $permsgstatus->{razor2_result};
+ if ($self->{main}->{conf}->{razor_fork}) {
+ # forked needs to run got_hit()
+ if ($pms->{razor2_rulename} && $pms->{razor2_result}) {
+ $pms->got_hit($pms->{razor2_rulename}, "", ruletype => 'eval');
+ }
+ # forked needs to run range callbacks
+ if ($pms->{razor2_range_callbacks}) {
+ foreach (@{$pms->{razor2_range_callbacks}}) {
+ $self->check_razor2_range($pms, '', @$_);
+ }
+ }
+ }
+
+ return $pms->{razor2_result};
}
# Check the cf value of a given message and return if it's within the
# given range
sub check_razor2_range {
- my ($self, $permsgstatus, $body, $engine, $min, $max) = @_;
+ my ($self, $pms, $body, $engine, $min, $max, $rulename) = @_;
# If Razor2 isn't available, or the general test is disabled, don't
# continue.
return unless $self->{razor2_available};
return unless $self->{main}->{conf}->{use_razor2};
- return unless $self->{main}->{conf}->{scores}->{'RAZOR2_CHECK'};
+
+ # If forked, call back later unless results are in
+ if ($self->{main}->{conf}->{razor_fork}) {
+ if (!defined $pms->{razor2_result}) {
+ my $rulename = $pms->get_current_eval_rule_name();
+ dbg("razor2: delaying check_razor2_range call for $rulename");
+ # array matches check_razor2_range() argument order
+ push @{$pms->{razor2_range_callbacks}},
+ [$engine, $min, $max, $rulename];
+ return 0;
+ }
+ }
# If Razor2 hasn't been checked yet, go ahead and run it.
- unless (defined $permsgstatus->{razor2_result}) {
- $self->check_razor2($permsgstatus, $body);
+ if (!$pms->{razor2_running}) {
+ $self->check_razor2($pms, $body);
}
my $cf = 0;
if ($engine) {
- $cf = $permsgstatus->{razor2_cf_score}->{$engine};
- return unless defined $cf;
+ $cf = $pms->{razor2_cf_score}->{$engine};
+ return 0 unless defined $cf;
}
else {
# If no specific engine was given to the rule, find the highest cf
# determined and use that
- while(my ($engine, $ecf) = each %{$permsgstatus->{razor2_cf_score}}) {
+ while(my ($engine, $ecf) = each %{$pms->{razor2_cf_score}}) {
if ($ecf > $cf) {
$cf = $ecf;
}
@@ -462,11 +609,19 @@ sub check_razor2_range {
}
if ($cf >= $min && $cf <= $max) {
- $permsgstatus->test_log(sprintf("cf: %3d", $cf));
+ my $cf_str = sprintf("cf: %3d", $cf);
+ if ($self->{main}->{conf}->{razor_fork}) {
+ # TODO why doesn't test_log accept rulename as parameter?
+ my $desc = $pms->{conf}->{descriptions}->{$rulename} || '';
+ $desc .= " [$cf_str]"; $desc =~ s/^\s+//s;
+ $pms->got_hit($rulename, "", description => $desc, ruletype => 'eval');
+ } else {
+ $pms->test_log($cf_str);
+ }
return 1;
}
- return;
+ return 0;
}
1;
Modified: spamassassin/trunk/t/razor2.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/razor2.t?rev=1842494&r1=1842493&r2=1842494&view=diff
==============================================================================
--- spamassassin/trunk/t/razor2.t (original)
+++ spamassassin/trunk/t/razor2.t Mon Oct 1 13:20:03 2018
@@ -12,7 +12,7 @@ use Test::More;
plan skip_all => "Net tests disabled" unless conf_bool('run_net_tests');
plan skip_all => "Needs Razor2" unless HAS_RAZOR2;
plan skip_all => "Needs Razor2 Identity File Needed. razor-register / razor-admin -register has not been run, or identity file ($ENV{'HOME'}/.razor/identity) is unreadable." unless HAS_RAZOR2_IDENT;
-plan tests => 2;
+plan tests => 4;
diag('Note: Failures may not be an SpamAssassin bug, as Razor tests can fail due to problems with the Razor servers.');
@@ -39,6 +39,9 @@ loadplugin Mail::SpamAssassin::Plugin::R
sarun ("-t < data/spam/razor2", \&patterns_run_cb);
ok_all_patterns();
+# Same with fork
+sarun ("--cf='razor_fork 1' -t < data/spam/razor2", \&patterns_run_cb);
+ok_all_patterns();
#TESTING FOR HAM
%patterns = ();
@@ -48,3 +51,6 @@ ok_all_patterns();
sarun ("-t < data/nice/001", \&patterns_run_cb);
ok_all_patterns();
+# same with fork
+sarun ("--cf='razor_fork 1' -t < data/nice/001", \&patterns_run_cb);
+ok_all_patterns();