You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2018/10/01 13:20:03 UTC

svn commit: r1842494 - in /spamassassin/trunk: lib/Mail/SpamAssassin/Plugin/Razor2.pm t/razor2.t

Author: hege
Date: Mon Oct  1 13:20:03 2018
New Revision: 1842494

URL: http://svn.apache.org/viewvc?rev=1842494&view=rev
Log:
Add razor_fork option for async check

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm
    spamassassin/trunk/t/razor2.t

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm?rev=1842494&r1=1842493&r2=1842494&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Razor2.pm Mon Oct  1 13:20:03 2018
@@ -43,11 +43,15 @@ package Mail::SpamAssassin::Plugin::Razo
 use Mail::SpamAssassin::Plugin;
 use Mail::SpamAssassin::Logger;
 use Mail::SpamAssassin::Timeout;
+use Mail::SpamAssassin::SubProcBackChannel;
 use strict;
 use warnings;
 # use bytes;
 use re 'taint';
 
+use Storable;
+use POSIX qw(PIPE_BUF WNOHANG);
+
 our @ISA = qw(Mail::SpamAssassin::Plugin);
 
 sub new {
@@ -97,6 +101,20 @@ Whether to use Razor2, if it is availabl
     type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
   });
 
+=item razor_fork (0|1)		(default: 0)
+
+Instead of running Razor2 synchronously, fork separate process for it and
+read the results in later (similar to async DNS lookups).  Increases
+throughput.  Experimental.
+
+=cut
+
+  push(@cmds, {
+    setting => 'razor_fork',
+    default => 0,
+    type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
+  });
+
 =back
 
 =head1 ADMINISTRATOR SETTINGS
@@ -376,33 +394,135 @@ sub plugin_revoke {
 }
 
 sub check_razor2 {
-  my ($self, $permsgstatus, $full) = @_;
+  my ($self, $pms, $full) = @_;
 
-  return $permsgstatus->{razor2_result} if (defined $permsgstatus->{razor2_result});
-  $permsgstatus->{razor2_result} = 0;
-  $permsgstatus->{razor2_cf_score} = { '4' => 0, '8' => 0 };
+  return 0 unless $self->{razor2_available};
+  return 0 unless $self->{main}->{conf}->{use_razor2};
 
-  return unless $self->{razor2_available};
-  return unless $self->{main}->{conf}->{use_razor2};
+  return $pms->{razor2_result} if (defined $pms->{razor2_result});
 
-  my $timer = $self->{main}->time_method("check_razor2");
+  return 0 if $pms->{razor2_running};
+  $pms->{razor2_running} = 1;
+
+  ## non-forking method
+
+  if (!$self->{main}->{conf}->{razor_fork}) {
+    my $timer = $self->{main}->time_method("check_razor2");
+    # TODO: check for cache header, set results appropriately
+    # do it this way to make it easier to get out the results later from the
+    # netcache plugin ... what netcache plugin?
+    (undef, my @results) =
+      $self->razor2_access($full, 'check', $pms->{master_deadline});
+    return $self->_check_result($pms, \@results);
+  }
+
+  ## forking method
+
+  $pms->{razor2_rulename} = $pms->get_current_eval_rule_name();
+
+  # create socketpair for communication
+  $pms->{razor2_backchannel} = Mail::SpamAssassin::SubProcBackChannel->new();
+  eval {
+    $pms->{razor2_backchannel}->setup_backchannel_parent_pre_fork();
+  } or do {
+    dbg("razor2: backchannel pre-setup failed: $@");
+    delete $pms->{razor2_backchannel};
+    return 0;
+  };
+
+  my $pid = fork();
+  if (!$pid) {
+    dbg("razor2: child process $$ forked");
+    $pms->{razor2_backchannel}->setup_backchannel_child_post_fork();
+    (undef, my @results) =
+      $self->razor2_access($full, 'check', $pms->{master_deadline});
+    my $backmsg;
+    eval {
+      $backmsg = Storable::freeze(\@results);
+    };
+    if ($@) {
+      dbg("razor2: child return value freeze failed: $@");
+      exit;
+    }
+    if (!syswrite($pms->{razor2_backchannel}->{parent}, $backmsg)) {
+      dbg("razor2: child backchannel write failed: $!");
+    }
+    exit;
+  }
 
-  my $return;
-  my @results;
+  eval {
+    $pms->{razor2_backchannel}->setup_backchannel_parent_post_fork($pid);
+  } or do {
+    dbg("razor2: backchannel post-setup failed: $@");
+    delete $pms->{razor2_backchannel};
+    return 0;
+  };
+
+  return 0;
+}
+
+sub check_tick {
+  my ($self, $opts) = @_;
+  $self->_check_forked_result($opts->{permsgstatus}, 0);
+}
 
-  # TODO: check for cache header, set results appropriately
+sub check_cleanup {
+  my ($self, $opts) = @_;
+  $self->_check_forked_result($opts->{permsgstatus}, 1);
+}
+
+sub _check_forked_result {
+  my ($self, $pms, $finish) = @_;
+
+  return 0 if !$pms->{razor2_backchannel};
+
+  my $kid_pid = (keys %{$pms->{razor2_backchannel}->{kids}})[0];
+  # if $finish, force waiting for the child
+  my $pid = waitpid($kid_pid, $finish ? 0 : WNOHANG);
+  if ($pid == 0) {
+    dbg("razor2: child process $kid_pid not finished yet, trying later");
+    return 0;
+  } elsif ($pid == -1) {
+    # child does not exist?
+    dbg("razor2: child process $kid_pid already handled?");
+    delete $pms->{razor2_backchannel};
+    return 0;
+  }
+
+  dbg("razor2: child process $kid_pid finished, reading results");
+
+  my $backmsg;
+  my $ret = sysread($pms->{razor2_backchannel}->{latest_kid_fh}, $backmsg, PIPE_BUF);
+  if (!defined $ret || $ret == 0) {
+    dbg("razor2: could not read result from child: ".($ret == 0 ? 0 : $!));
+    delete $pms->{razor2_backchannel};
+    return 0;
+  }
+
+  delete $pms->{razor2_backchannel};
+
+  my $results;
+  eval {
+    $results = Storable::thaw($backmsg);
+  };
+  if ($@) {
+    dbg("razor2: child return value thaw failed: $@");
+    return;
+  }
+
+  $self->_check_result($pms, $results);
+}
+
+sub _check_result {
+  my ($self, $pms, $results) = @_;
 
-  # do it this way to make it easier to get out the results later from the
-  # netcache plugin
-  ($return, @results) =
-    $self->razor2_access($full, 'check', $permsgstatus->{master_deadline});
   $self->{main}->call_plugins ('process_razor_result',
-  	{ results => \@results, permsgstatus => $permsgstatus }
+  	{ results => $results, permsgstatus => $pms }
   );
 
-  foreach my $result (@results) {
+  foreach my $result (@$results) {
     if (exists $result->{result}) {
-      $permsgstatus->{razor2_result} = $result->{result} if $result->{result};
+      $pms->{razor2_result} = $result->{result} if $result->{result};
     }
     elsif ($result->{noresponse}) {
       dbg('razor2: part=' . $result->{part} . ' noresponse');
@@ -415,46 +535,73 @@ sub check_razor2 {
 
       next if $result->{contested};
 
-      my $cf = $permsgstatus->{razor2_cf_score}->{$result->{engine}} || 0;
+      my $cf = $pms->{razor2_cf_score}->{$result->{engine}} || 0;
       if ($result->{confidence} > $cf) {
-        $permsgstatus->{razor2_cf_score}->{$result->{engine}} = $result->{confidence};
+        $pms->{razor2_cf_score}->{$result->{engine}} = $result->{confidence};
       }
     }
   }
 
-  dbg("razor2: results: spam? " . $permsgstatus->{razor2_result});
-  while(my ($engine, $cf) = each %{$permsgstatus->{razor2_cf_score}}) {
+  $pms->{razor2_result} ||= 0;
+  $pms->{razor2_cf_score} ||= {};
+
+  dbg("razor2: results: spam? " . $pms->{razor2_result});
+  while(my ($engine, $cf) = each %{$pms->{razor2_cf_score}}) {
     dbg("razor2: results: engine $engine, highest cf score: $cf");
   }
 
-  return $permsgstatus->{razor2_result};
+  if ($self->{main}->{conf}->{razor_fork}) {
+    # forked needs to run got_hit()
+    if ($pms->{razor2_rulename} && $pms->{razor2_result}) {
+      $pms->got_hit($pms->{razor2_rulename}, "", ruletype => 'eval');
+    }
+    # forked needs to run range callbacks
+    if ($pms->{razor2_range_callbacks}) {
+      foreach (@{$pms->{razor2_range_callbacks}}) {
+        $self->check_razor2_range($pms, '', @$_);
+      }
+    }
+  }
+
+  return $pms->{razor2_result};
 }
 
 # Check the cf value of a given message and return if it's within the
 # given range
 sub check_razor2_range {
-  my ($self, $permsgstatus, $body, $engine, $min, $max) = @_;
+  my ($self, $pms, $body, $engine, $min, $max, $rulename) = @_;
 
   # If Razor2 isn't available, or the general test is disabled, don't
   # continue.
   return unless $self->{razor2_available};
   return unless $self->{main}->{conf}->{use_razor2};
-  return unless $self->{main}->{conf}->{scores}->{'RAZOR2_CHECK'};
+
+  # If forked, call back later unless results are in
+  if ($self->{main}->{conf}->{razor_fork}) {
+    if (!defined $pms->{razor2_result}) {
+      my $rulename = $pms->get_current_eval_rule_name();
+      dbg("razor2: delaying check_razor2_range call for $rulename");
+      # array matches check_razor2_range() argument order
+      push @{$pms->{razor2_range_callbacks}},
+        [$engine, $min, $max, $rulename];
+      return 0;
+    }
+  }
 
   # If Razor2 hasn't been checked yet, go ahead and run it.
-  unless (defined $permsgstatus->{razor2_result}) {
-    $self->check_razor2($permsgstatus, $body);
+  if (!$pms->{razor2_running}) {
+    $self->check_razor2($pms, $body);
   }
 
   my $cf = 0;
   if ($engine) {
-    $cf = $permsgstatus->{razor2_cf_score}->{$engine};
-    return unless defined $cf;
+    $cf = $pms->{razor2_cf_score}->{$engine};
+    return 0 unless defined $cf;
   }
   else {
     # If no specific engine was given to the rule, find the highest cf
     # determined and use that
-    while(my ($engine, $ecf) = each %{$permsgstatus->{razor2_cf_score}}) {
+    while(my ($engine, $ecf) = each %{$pms->{razor2_cf_score}}) {
       if ($ecf > $cf) {
         $cf = $ecf;
       }
@@ -462,11 +609,19 @@ sub check_razor2_range {
   }
 
   if ($cf >= $min && $cf <= $max) {
-    $permsgstatus->test_log(sprintf("cf: %3d", $cf));
+    my $cf_str = sprintf("cf: %3d", $cf);
+    if ($self->{main}->{conf}->{razor_fork}) {
+      # TODO why doesn't test_log accept rulename as parameter?
+      my $desc = $pms->{conf}->{descriptions}->{$rulename} || '';
+      $desc .= " [$cf_str]"; $desc =~ s/^\s+//s;
+      $pms->got_hit($rulename, "", description => $desc, ruletype => 'eval');
+    } else {
+      $pms->test_log($cf_str);
+    }
     return 1;
   }
 
-  return;
+  return 0;
 }
 
 1;

Modified: spamassassin/trunk/t/razor2.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/razor2.t?rev=1842494&r1=1842493&r2=1842494&view=diff
==============================================================================
--- spamassassin/trunk/t/razor2.t (original)
+++ spamassassin/trunk/t/razor2.t Mon Oct  1 13:20:03 2018
@@ -12,7 +12,7 @@ use Test::More;
 plan skip_all => "Net tests disabled" unless conf_bool('run_net_tests');
 plan skip_all => "Needs Razor2" unless HAS_RAZOR2;
 plan skip_all => "Needs Razor2 Identity File Needed. razor-register / razor-admin -register has not been run, or identity file ($ENV{'HOME'}/.razor/identity) is unreadable." unless HAS_RAZOR2_IDENT;
-plan tests => 2;
+plan tests => 4;
 
 diag('Note: Failures may not be an SpamAssassin bug, as Razor tests can fail due to problems with the Razor servers.');
 
@@ -39,6 +39,9 @@ loadplugin Mail::SpamAssassin::Plugin::R
 
 sarun ("-t < data/spam/razor2", \&patterns_run_cb);
 ok_all_patterns();
+# Same with fork
+sarun ("--cf='razor_fork 1' -t < data/spam/razor2", \&patterns_run_cb);
+ok_all_patterns();
 
 #TESTING FOR HAM
 %patterns = ();
@@ -48,3 +51,6 @@ ok_all_patterns();
 
 sarun ("-t < data/nice/001", \&patterns_run_cb);
 ok_all_patterns();
+# same with fork
+sarun ("--cf='razor_fork 1' -t < data/nice/001", \&patterns_run_cb);
+ok_all_patterns();