You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/06/17 03:06:04 UTC

svn commit: r191042 - /spamassassin/trunk/lib/Mail/SpamAssassin/SpamdForkScaling.pm

Author: jm
Date: Thu Jun 16 18:06:03 2005
New Revision: 191042

URL: http://svn.apache.org/viewcvs?rev=191042&view=rev
Log:
bug 4407: really idle servers were exiting after 5 minutes of inactivity, due to a watchdog timeout.  implement a 'ping' operation, run every 2.5 minutes, to avoid this

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/SpamdForkScaling.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/SpamdForkScaling.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/SpamdForkScaling.pm?rev=191042&r1=191041&r2=191042&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/SpamdForkScaling.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/SpamdForkScaling.pm Thu Jun 16 18:06:03 2005
@@ -34,7 +34,7 @@
 
 @PFSTATE_VARS = qw(
   PFSTATE_ERROR PFSTATE_STARTING PFSTATE_IDLE PFSTATE_BUSY PFSTATE_KILLED
-  PFORDER_ACCEPT
+  PFORDER_ACCEPT 
 );
 
 %EXPORT_TAGS = (
@@ -50,11 +50,29 @@
 
 use constant PFORDER_ACCEPT      => 10;
 
+###########################################################################
+
+# we use the following protocol between the master and child processes to
+# control when they accept/who accepts: server tells a child to accept with a
+# PF_ACCEPT_ORDER, child responds with "B$pid\n" when it's busy, and "I$pid\n"
+# once it's idle again.  In addition, the parent sends PF_PING_ORDER
+# periodically to ping the child processes.  Very simple protocol.  Note that
+# the $pid values are packed into 4 bytes so that the buffers are always of a
+# known length; if you need to transfer longer data, assign a new protocol verb
+# (the first char) and use the length of the following data buffer as the
+# packed value.
+use constant PF_ACCEPT_ORDER     => "A....\n";
+use constant PF_PING_ORDER       => "P....\n";
 
 # timeout for a sysread() on the command channel.  if we go this long
 # without a message from the spamd parent or child, it's an error.
 use constant TOUT_READ_MAX       => 300;
 
+# interval between "ping" messages from the spamd parent to all children,
+# used as a sanity check to ensure TOUT_READ_MAX isn't hit when things
+# are functional.
+use constant TOUT_PING_INTERVAL  => 150;
+
 ###########################################################################
 
 sub new {
@@ -68,6 +86,7 @@
   $self->{kids} = { };
   $self->{overloaded} = 0;
   $self->{min_children} ||= 1;
+  $self->{server_last_ping} = time;
 
   $self;
 }
@@ -191,7 +210,16 @@
   }
 
   # any action?
-  return unless ($nfound);
+  if (!$nfound) {
+    # none.  periodically ping the children though just to ensure
+    # they're still alive and can hear us
+    
+    my $now = time;
+    if ($now - $self->{server_last_ping} > TOUT_PING_INTERVAL) {
+      $self->main_ping_kids($now);
+    }
+    return;
+  }
 
   # were the kids ready, or did we get signal?
   if (vec ($rout, $self->{server_fileno}, 1)) {
@@ -239,6 +267,23 @@
   $self->adapt_num_children();
 }
 
+sub main_ping_kids {
+  my ($self, $now) = @_;
+
+  $self->{server_last_ping} = $now;
+
+  my ($sock, $kid);
+  while (($kid, $sock) = each %{$self->{backchannel}->{kids}}) {
+    $self->syswrite_with_retry($sock, PF_PING_ORDER) and next;
+
+    warn "prefork: write of ping failed to $kid fd=".$sock->fileno.": ".$!;
+
+    # note: this is safe according to the note in perldoc -f each; 'it is
+    # always safe to delete the item most recently returned by each()'
+    $self->child_error_kill($kid, $sock);
+  }
+}
+
 sub read_one_message_from_child_socket {
   my ($self, $sock) = @_;
 
@@ -282,14 +327,6 @@
 
 ###########################################################################
 
-# we use the following protocol between the master and child processes to
-# control when they accept/who accepts: server tells a child to accept with a
-# "A....\n", child responds with "B$pid\n" when it's busy, and "I$pid\n" once
-# it's idle again.  Very simple protocol.  Note that the $pid values are packed
-# into 4 bytes so that the buffers are always of a known length; if you need to
-# transfer longer data, assign a new protocol verb (the first char) and use the
-# length of the following data buffer as the packed value.
-
 sub order_idle_child_to_accept {
   my ($self) = @_;
 
@@ -308,7 +345,7 @@
       return $self->order_idle_child_to_accept();
     }
 
-    if (!$self->syswrite_with_retry($sock, "A....\n"))
+    if (!$self->syswrite_with_retry($sock, PF_ACCEPT_ORDER))
     {
       # failure to write to the child; bad news.  call it dead
       warn "prefork: killing rogue child $kid, failed to write on fd ".$sock->fileno.": $!\n";
@@ -351,7 +388,7 @@
   my ($self, $kid) = @_;
   if ($self->{waiting_for_idle_child}) {
     my $sock = $self->{backchannel}->get_socket_for_child($kid);
-    $self->syswrite_with_retry($sock, "A....\n")
+    $self->syswrite_with_retry($sock, PF_ACCEPT_ORDER)
         or die "prefork: $kid claimed it was ready, but write failed on fd ".
                             $sock->fileno.": ".$!;
     $self->{waiting_for_idle_child} = 0;
@@ -406,6 +443,10 @@
     }
 
     chomp $line;
+    if (index ($line, "P") == 0) {  # string starts with "P" = ping
+      dbg("prefork: periodic ping from spamd parent");
+      next;
+    }
     if (index ($line, "A") == 0) {  # string starts with "A" = accept
       return PFORDER_ACCEPT;
     }
@@ -454,6 +495,7 @@
     }
     else {
       $tout = $deadline - $now;     # the remaining timeout
+      $tout = 1 if ($tout <= 0);    # ensure it's > 0
     }
 
     dbg("prefork: sysread(".$sock->fileno.") not ready, wait max $tout secs");