You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/04/15 20:40:53 UTC

svn commit: r394348 - in /spamassassin/trunk: MANIFEST lib/Mail/SpamAssassin/AsyncLoop.pm lib/Mail/SpamAssassin/Dns.pm lib/Mail/SpamAssassin/DnsResolver.pm lib/Mail/SpamAssassin/PerMsgStatus.pm lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm t/dnsbl.t

Author: jm
Date: Sat Apr 15 11:40:50 2006
New Revision: 394348

URL: http://svn.apache.org/viewcvs?rev=394348&view=rev
Log:
bug 4860: consolidate DNS-lookup async-rule infrastructure, increasing code-sharing between URIDNSBL.pm and Dns.pm and allowing 19% speedup

Added:
    spamassassin/trunk/lib/Mail/SpamAssassin/AsyncLoop.pm
Modified:
    spamassassin/trunk/MANIFEST
    spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/DnsResolver.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
    spamassassin/trunk/t/dnsbl.t

Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/MANIFEST?rev=394348&r1=394347&r2=394348&view=diff
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Sat Apr 15 11:40:50 2006
@@ -27,6 +27,7 @@
 lib/Mail/SpamAssassin.pm
 lib/Mail/SpamAssassin/AICache.pm
 lib/Mail/SpamAssassin/ArchiveIterator.pm
+lib/Mail/SpamAssassin/AsyncLoop.pm
 lib/Mail/SpamAssassin/AutoWhitelist.pm
 lib/Mail/SpamAssassin/Bayes.pm
 lib/Mail/SpamAssassin/Bayes/CombineChi.pm

Added: spamassassin/trunk/lib/Mail/SpamAssassin/AsyncLoop.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/AsyncLoop.pm?rev=394348&view=auto
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/AsyncLoop.pm (added)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/AsyncLoop.pm Sat Apr 15 11:40:50 2006
@@ -0,0 +1,332 @@
+# <@LICENSE>
+# Copyright 2004 Apache Software Foundation
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# </...@LICENSE>
+
+=head1 NAME
+
+Mail::SpamAssassin::AsyncLoop - scanner asynchronous event loop
+
+=head1 DESCRIPTION
+
+An asynchronous event loop used for long-running operations, performed "in the
+background" during the Mail::SpamAssassin::check() scan operation, such as DNS
+blocklist lookups.
+
+=head1 METHODS
+
+=over 4
+
+=cut
+
+package Mail::SpamAssassin::AsyncLoop;
+
+use strict;
+use warnings;
+use bytes;
+
+use Mail::SpamAssassin;
+use Mail::SpamAssassin::Logger;
+
+our @ISA = qw();
+
+#############################################################################
+
+sub new {
+  my $class = shift;
+  $class = ref($class) || $class;
+
+  my ($main) = @_;
+  my $self = {
+    main                => $main,
+    last_count          => 0,
+    times_count_was_same => 0,
+    queries_started     => 0,
+    queries_completed   => 0,
+    pending_lookups     => { }
+  };
+
+  bless ($self, $class);
+  $self;
+}
+
+# ---------------------------------------------------------------------------
+
+=item $obj = $async->start_lookup($obj)
+
+Register the start a long-running asynchronous lookup operation. C<$obj>
+is a hash reference containing the following items:
+
+=over 4
+
+=item key (required)
+
+A key string, unique to this lookup.  This is what is reported in
+debug messages, used as the key for C<get_lookup()>, etc.
+
+=item id (required)
+
+An ID string, also unique to this lookup.  Typically, this is the DNS packet ID
+as returned by DnsResolver's C<bgsend> method.  Sadly, the Net::DNS
+architecture forces us to keep a separate ID string for this task instead of
+reusing C<key> -- if you are not using DNS lookups through DnsResolver, it
+should be OK to just reuse C<key>.
+
+=item type (required)
+
+A string, typically one word, used to describe the type of lookup in log
+messages, such as C<DNSBL>, C<MX>, C<TXT>.
+
+=item poll_callback (optional)
+
+A code reference, which will be called periodically during the
+background-processing period.  If you will be performing an async lookup on a
+non-DNS-based service, you will need to implement this so that it checks for
+new responses and calls C<set_response_packet()> or C<report_id_complete()> as
+appropriate.   DNS-based lookups can leave it undefined, since
+DnsResolver::poll_responses() will be called automatically anyway.
+
+The code reference will be called with one argument, the C<$ent> object.
+
+=item completed_callback (optional)
+
+A code reference, which will be called when the lookup has been reported as
+complete via C<set_response_packet()> or C<report_id_complete()>.
+
+The code reference will be called with one argument, the C<$ent> object.
+
+=back
+
+C<$obj> is returned by this method.
+
+=cut
+
+sub start_lookup {
+  my ($self, $ent) = @_;
+
+  die "oops, no id"  unless $ent->{id};
+  die "oops, no key" unless $ent->{key};
+  die "oops, no type" unless $ent->{type};
+
+  $self->{queries_started}++;
+  $self->{pending_lookups}->{$ent->{key}} = $ent;
+  $ent;
+}
+
+# ---------------------------------------------------------------------------
+
+=item $obj = $async->get_lookup($key)
+
+Retrieve the pending-lookup object for the given key C<$key>.
+
+If the lookup is complete, this will return C<undef>.
+
+Note that a lookup is still considered "pending" until C<complete_lookups()> is
+called, even if it has been reported as complete via C<set_response_packet()>
+or C<report_id_complete()>.
+
+=cut
+
+sub get_lookup {
+  my ($self, $key) = @_;
+  return $self->{pending_lookups}->{$key};
+}
+
+# ---------------------------------------------------------------------------
+
+=item @objs = $async->get_pending_lookups()
+
+Retrieve the lookup objects for all pending lookups.
+
+Note that a lookup is still considered "pending" until C<complete_lookups()> is
+called, even if it has been reported as complete via C<set_response_packet()>
+or C<report_id_complete()>.
+
+=cut
+
+sub get_pending_lookups {
+  my ($self) = @_;
+  return values %{$self->{pending_lookups}};
+}
+
+# ---------------------------------------------------------------------------
+
+=item $alldone = $async->complete_lookups()
+
+Perform a poll of the pending lookups, to see if any are completed; if they
+are, their <completed_callback> is called with the entry object for that
+lookup.
+
+If there are no lookups remaining, or if too long has elapsed since any results
+were returned, C<1> is returned, otherwise C<0>.
+
+=cut
+
+sub complete_lookups {
+  my ($self, $timeout) = @_;
+  my %typecount = ();
+  my $stillwaiting = 0;
+
+  my $pending = $self->{pending_lookups};
+  if (scalar keys %{$pending} <= 0) {
+    return 1;           # nothing left to do
+  }
+
+  $self->{queries_started} = 0;
+  $self->{queries_completed} = 0;
+
+  # trap this loop in an eval { } block, as Net::DNS could throw
+  # die()s our way; in particular, process_dnsbl_results() has
+  # thrown die()s before (bug 3794).
+  eval {
+
+    my $nfound = $self->{main}->{resolver}->poll_responses($timeout);
+    $nfound ||= 'no';
+    dbg ("async: select found $nfound socks ready");
+
+    foreach my $key (keys %{$pending}) {
+      my $ent = $pending->{$key};
+
+      # call a "poll_callback" sub, if one exists
+      if (defined $ent->{poll_callback}) {
+        $ent->{poll_callback}->($ent);
+      }
+
+      my $type = $ent->{type};
+      if (!exists ($self->{finished}->{$ent->{id}})) {
+        $typecount{$type}++;
+        next;
+      }
+
+      $ent->{response_packet} = delete $self->{finished}->{$ent->{id}};
+      if (defined $ent->{completed_callback}) {
+        $ent->{completed_callback}->($ent);
+      }
+
+      $self->{queries_completed}++;
+      delete $self->{pending_lookups}->{$key};
+    }
+
+    dbg("async: queries completed: ".$self->{queries_completed}.
+                  " started: ".$self->{queries_started});
+
+    if (1) {
+      dbg("async: queries active: ".
+          join (' ', map { "$_=$typecount{$_}" } sort keys %typecount)." at ".
+          localtime(time));
+    }
+
+    # ensure we don't get stuck if a request gets lost in the ether.
+    if (!$stillwaiting) {
+      my $numkeys = scalar keys %{$self->{pending_lookups}};
+      if ($numkeys == 0) {
+        $stillwaiting = 0;
+
+      } else {
+        $stillwaiting = 1;
+
+        # avoid looping forever if we haven't got all results.
+        if ($self->{last_count} == $numkeys) {
+          $self->{times_count_was_same}++;
+          if ($self->{times_count_was_same} > 20)
+          {
+            dbg("async: escaping: must have lost requests");
+            $self->abort_remaining_lookups ($self);
+            $stillwaiting = 0;
+          }
+        } else {
+          $self->{last_count} = $numkeys;
+          $self->{times_count_was_same} = 0;
+        }
+      }
+    }
+
+  };
+
+  if ($@) {
+    dbg("async: caught complete_lookups death, aborting: $@");
+    $stillwaiting = 0;      # abort remaining
+  }
+
+  return (!$stillwaiting);
+}
+
+# ---------------------------------------------------------------------------
+
+=item $async->abort_remaining_lookups()
+
+Abort any remaining lookups.
+
+=cut
+
+sub abort_remaining_lookups {
+  my ($self) = @_;
+
+  my $pending = $self->{pending_lookups};
+  my $foundone = 0;
+  foreach my $key (keys %{$pending})
+  {
+    if (!$foundone) {
+      dbg("async: aborting remaining lookups");
+      $foundone = 1;
+    }
+
+    delete $pending->{$key};
+  }
+  $self->{main}->{resolver}->bgabort();
+}
+
+# ---------------------------------------------------------------------------
+
+=item $async->set_response_packet($id, $pkt)
+
+Register a "response packet" for a given query.  C<$id> is the ID for the
+query, and must match the C<id> supplied in C<start_lookup()>. C<$pkt> is the
+packet object for the response.
+
+If this was called, C<$pkt> will be available in the C<completed_callback>
+function as C<$ent-<gt>{response_packet}>.
+
+One or the other of C<set_response_packet()> or C<report_id_complete()>
+should be called, but not both.
+
+=cut
+
+sub set_response_packet {
+  my ($self, $id, $pkt) = @_;
+  $self->{finished}->{$id} = $pkt;
+}
+
+=item $async->report_id_complete($id)
+
+Register that a query has completed, and is no longer "pending". C<$id> is the
+ID for the query, and must match the C<id> supplied in C<start_lookup()>.
+
+One or the other of C<set_response_packet()> or C<report_id_complete()>
+should be called, but not both.
+
+=cut
+
+sub report_id_complete {
+  my ($self, $id) = @_;
+  $self->{finished}->{$id} = undef;
+}
+
+# ---------------------------------------------------------------------------
+
+1;
+
+=back
+
+=cut

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm?rev=394348&r1=394347&r2=394348&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm Sat Apr 15 11:40:50 2006
@@ -21,6 +21,7 @@
 
 use Mail::SpamAssassin::Conf;
 use Mail::SpamAssassin::PerMsgStatus;
+use Mail::SpamAssassin::AsyncLoop;
 use Mail::SpamAssassin::Constants qw(:ip);
 use File::Spec;
 use IO::Socket;
@@ -94,32 +95,49 @@
 
 ###########################################################################
 
-# DNS query array constants
-use constant ID => 0;
-use constant RULES => 1;
-use constant SETS => 2;
-
 # TODO: $server is currently unused
 sub do_rbl_lookup {
   my ($self, $rule, $set, $type, $server, $host, $subtest) = @_;
 
+  my $key = "dns:$type:$host";
+  my $existing = $self->{async}->get_lookup($key);
+
   # only make a specific query once
-  if (!defined $self->{dnspending}->{$type}->{$host}->[ID]) {
+  if (!$existing) {
     dbg("dns: launching DNS $type query for $host in background");
-    $self->{rbl_launch} = time;
-    $self->{dnspending}->{$type}->{$host}->[ID] = $self->res_bgsend($host, $type);
+    $self->{query_launch_time} = time;
+
+    my $ent = {
+      key => $key,
+      type => "DNSBL-".$type,
+      sets => [ ],  # filled in below
+      rules => [ ], # filled in below
+      # id is filled in after we send the query below
+    };
+
+    my $id = $self->{resolver}->bgsend($host, $type, undef, sub {
+        my $pkt = shift;
+        my $id = shift;
+        $self->process_dnsbl_result($ent, $pkt);
+        $self->{async}->report_id_complete($id);
+      });
+
+    $ent->{id} = $id;     # tie up the loose end
+    $existing = $self->{async}->start_lookup($ent);
   }
 
   # always add set
-  push @{$self->{dnspending}->{$type}->{$host}->[SETS]}, $set;
+  push @{$existing->{sets}}, $set;
 
   # sometimes match or always match
   if (defined $subtest) {
     $self->{dnspost}->{$set}->{$subtest} = $rule;
   }
   else {
-    push @{$self->{dnspending}->{$type}->{$host}->[RULES]}, $rule;
+    push @{$existing->{rules}}, $rule;
   }
+
+  $self->{rulename_to_key}->{$rule} = $key;
 }
 
 # TODO: these are constant so they should only be added once at startup
@@ -131,23 +149,31 @@
 sub do_dns_lookup {
   my ($self, $rule, $type, $host) = @_;
 
+  my $key = "dns:$type:$host";
+
   # only make a specific query once
-  if (!defined $self->{dnspending}->{$type}->{$host}->[ID]) {
-    dbg("dns: launching DNS $type query for $host in background");
-    $self->{rbl_launch} = time;
-    $self->{dnspending}->{$type}->{$host}->[ID] = $self->res_bgsend($host, $type);
-  }
-  push @{$self->{dnspending}->{$type}->{$host}->[RULES]}, $rule;
-}
+  return if $self->{async}->get_lookup($key);
+
+  dbg("dns: launching DNS $type query for $host in background");
+
+  my $ent = {
+    key => $key,
+    type => "DNSBL-".$type,
+    rules => [ $rule ],
+    # id is filled in after we send the query below
+  };
 
-sub res_bgsend {
-  my ($self, $host, $type) = @_;
+  my $id = $self->{resolver}->bgsend($host, $type, undef, sub {
+      my $pkt = shift;
+      my $id = shift;
+      $self->process_dnsbl_result($ent, $pkt);
+      $self->{async}->report_id_complete($id);
+    });
 
-  return $self->{resolver}->bgsend($host, $type, undef, sub {
-          my $pkt = shift;
-          my $id = shift;
-          $self->{dnsfinished}->{$id} = $pkt;
-        });
+  $ent->{id} = $id;     # tie up the loose end
+  $self->{async}->start_lookup($ent);
+
+  $self->{query_launch_time} = time;
 }
 
 ###########################################################################
@@ -166,7 +192,23 @@
       $log = "$4.$3.$2.$1 listed in $5";
     }
   }
-  $self->{dnsresult}->{$rule}->{$log} = 1;
+
+  # TODO: this may result in some log messages appearing under the
+  # wrong rules, since we could see this sequence: { test one hits,
+  # test one's message is logged, test two hits, test one fires again
+  # on another IP, test one's message is logged for that other IP --
+  # but under test two's heading }.   Right now though it's better
+  # than just not logging at all.
+
+  $self->{already_logged} ||= { };
+  if ($log && !$self->{already_logged}->{$log}) {
+    $self->test_log($log);
+    $self->{already_logged}->{$log} = 1;
+  }
+
+  if (!defined $self->{tests_already_hit}->{$rule}) {
+    $self->got_hit($rule, "RBL: ");
+  }
 }
 
 sub dnsbl_uri {
@@ -183,6 +225,8 @@
     push(@vals, "type=$qtype") if $qtype ne "A";
     my $uri = "dns:$qname" . (@vals ? "?" . join(";", @vals) : "");
     push @{ $self->{dnsuri}->{$uri} }, $rdatastr;
+
+    dbg ("dns: hit <$uri> $rdatastr");
   }
 }
 
@@ -193,6 +237,9 @@
   my $question = ($packet->question)[0];
   return if !defined $question;
 
+  my $sets = $query->{sets} || [];
+  my $rules = $query->{rules};
+
   # NO_DNS_FOR_FROM
   if ($self->{sender_host} &&
       $question->qname eq $self->{sender_host} &&
@@ -200,10 +247,11 @@
       $packet->header->rcode =~ /^(?:NXDOMAIN|SERVFAIL)$/ &&
       ++$self->{sender_host_fail} == 2)
   {
-    for my $rule (@{$query->[RULES]}) {
+    for my $rule (@{$rules}) {
       $self->got_hit($rule, "DNS: ");
     }
   }
+
   # DNSBL tests are here
   foreach my $answer ($packet->answer) {
     next if !defined $answer;
@@ -213,10 +261,10 @@
     next if ($answer->type ne 'A' && $answer->type ne 'TXT');
     # skip any A record that isn't on 127/8
     next if ($answer->type eq 'A' && $answer->rdatastr !~ /^127\./);
-    for my $rule (@{$query->[RULES]}) {
+    for my $rule (@{$rules}) {
       $self->dnsbl_hit($rule, $question, $answer);
     }
-    for my $set (@{$query->[SETS]}) {
+    for my $set (@{$sets}) {
       if ($self->{dnspost}->{$set}) {
 	$self->process_dnsbl_set($set, $question, $answer);
       }
@@ -284,73 +332,48 @@
 sub harvest_dnsbl_queries {
   my ($self) = @_;
 
-  return if !defined $self->{rbl_launch};
-
-  my $deadline = $self->{conf}->{rbl_timeout} + $self->{rbl_launch};
-  my @waiting = (values %{ $self->{dnspending}->{A} },
-		 values %{ $self->{dnspending}->{MX} },
-		 values %{ $self->{dnspending}->{TXT} });
-  my @left;
-  my $total;
+  return if !defined $self->{query_launch_time};
 
-  @waiting = grep { defined $_->[ID] } @waiting;
-  $total = scalar @waiting;
+  my $deadline = $self->{conf}->{rbl_timeout} + $self->{query_launch_time};
   my $now = time;
-  # trap this loop in an eval { } block, as Net::DNS could throw
-  # die()s our way; in particular, process_dnsbl_results() has
-  # thrown die()s before (bug 3794).
-  eval {
-    while (@waiting && ($now < $deadline)) {
-      @left = ();
-      for my $query (@waiting) {
-        if (exists $self->{dnsfinished}->{$query->[ID]}) {
-          my $pkt = delete $self->{dnsfinished}->{$query->[ID]};
-          $self->process_dnsbl_result($query, $pkt);
-        } else {
-          push(@left, $query);
-        }
-      }
-      $self->{main}->call_plugins ("check_tick", { permsgstatus => $self });
-      last unless @left;
-      @waiting = @left;
-      # dynamic timeout
-      my $dynamic = (int($self->{conf}->{rbl_timeout}
-                        * (1 - (($total - @left) / $total) ** 2) + 0.5)
-                    + $self->{rbl_launch});
-      $deadline = $dynamic if ($dynamic < $deadline);
-      until((($now = time) >= $deadline) || ($self->{resolver}->poll_responses(1) > 0)) {
-      }
-    }
-    dbg("dns: success for " . ($total - @left) . " of $total queries");
-  };
 
-  if ($@) {
-    dbg("dns: DNS harvest failed: $@");
-    # carry on and clean up the BGSOCKs anyway.
+  my @left = $self->{async}->get_pending_lookups();
+  my $total = scalar @left;
+
+  while (!$self->{async}->complete_lookups(1) && ($now < $deadline)) {
+    $self->{main}->call_plugins ("check_tick", { permsgstatus => $self });
+
+    @left = $self->{async}->get_pending_lookups();
+
+    # dynamic timeout
+    my $dynamic = (int($self->{conf}->{rbl_timeout}
+                      * (1 - (($total - scalar @left) / $total) ** 2) + 0.5)
+                  + $self->{query_launch_time});
+    $deadline = $dynamic if ($dynamic < $deadline);
+    $now = time;
   }
 
+  dbg("dns: success for " . ($total - @left) . " of $total queries");
+
   # timeouts
+  @left = $self->{async}->get_pending_lookups();
   for my $query (@left) {
     my $string = '';
-    if (defined @{$query->[SETS]}) {
-      $string = join(",", grep defined, @{$query->[SETS]});
+    if (defined @{$query->{sets}}) {
+      $string = join(",", grep defined, @{$query->{sets}});
     }
-    elsif (defined @{$query->[RULES]}) {
-      $string = join(",", grep defined, @{$query->[RULES]});
+    elsif (defined @{$query->{rules}}) {
+      $string = join(",", grep defined, @{$query->{rules}});
     }
-    my $delay = time - $self->{rbl_launch};
+    my $delay = time - $self->{query_launch_time};
     dbg("dns: timeout for $string after $delay seconds");
-    undef $query->[ID];
-  }
-  # register hits
-  while (my ($rule, $logs) = each %{ $self->{dnsresult} }) {
-    for my $log (keys %{$logs}) {
-      $self->test_log($log) if $log;
-    }
-    if (!defined $self->{tests_already_hit}->{$rule}) {
-      $self->got_hit($rule, "RBL: ");
-    }
+    # undef $query->[ID];   # TODO: seems unnecessary now
   }
+}
+
+sub set_rbl_tag_data {
+  my ($self) = @_;
+
   # DNS URIs
   while (my ($dnsuri, $answers) = each %{ $self->{dnsuri} }) {
     # when parsing, look for elements of \".*?\" or \S+ with ", " as separator
@@ -366,14 +389,11 @@
 sub rbl_finish {
   my ($self) = @_;
 
-  delete $self->{rbl_launch};
-  delete $self->{dnspending};
-  delete $self->{dnsfinished};
+  $self->set_rbl_tag_data();
 
-  # TODO: do not remove these since they can be retained!
+  delete $self->{query_launch_time};
   delete $self->{dnscache};
   delete $self->{dnspost};
-  delete $self->{dnsresult};
   delete $self->{dnsuri};
 }
 
@@ -564,11 +584,10 @@
 
   # undef $IS_DNS_AVAILABLE if we should be testing for
   # working DNS and our check interval time has passed
-  dbg("dns: dnsopt=$dnsopt dnsint=$dnsint diff=$diff");
-  $IS_DNS_AVAILABLE=undef if ($dnsopt eq "test" && $diff > $dnsint);
-
-  dbg("dns: is_dns_available() last checked $diff seconds ago; dns available=".
-            ($IS_DNS_AVAILABLE ? $IS_DNS_AVAILABLE : "(undef)"));
+  if ($dnsopt eq "test" && $diff > $dnsint) {
+    $IS_DNS_AVAILABLE = undef;
+    dbg("dns: is_dns_available() last checked $diff seconds ago; re-checking");
+  }
 
   return $IS_DNS_AVAILABLE if (defined $IS_DNS_AVAILABLE);
 
@@ -744,6 +763,26 @@
   if ($SIG{CHLD} && $SIG{CHLD} ne 'IGNORE') {	# running from spamd
     waitpid ($pid, 0);
   }
+}
+
+###########################################################################
+
+sub is_rule_complete {
+  my ($self, $rule) = @_;
+
+  my $key = $self->{rulename_to_key}->{$rule};
+  if (!defined $key) {
+    dbg("dns: $rule lookup complete, as it was never started");
+    return 1;
+  }
+
+  my $obj = $self->{async}->get_lookup($key);
+  if (!defined $obj) {
+    dbg("dns: $rule lookup complete, $key not in pending list");
+    return 1;
+  }
+
+  return 0;         # not yet complete
 }
 
 ###########################################################################

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/DnsResolver.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/DnsResolver.pm?rev=394348&r1=394347&r2=394348&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/DnsResolver.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/DnsResolver.pm Sat Apr 15 11:40:50 2006
@@ -315,7 +315,7 @@
   my $ques = $questions[0];
 
   if (defined $ques) {
-    return $id . $ques->qname . $ques->qtype . $ques->qclass;
+    return join '/', $id, $ques->qname, $ques->qtype, $ques->qclass;
   } else {
     # odd.  this should not happen, but clearly some DNS servers
     # can return something that Net::DNS interprets as having no

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=394348&r1=394347&r2=394348&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Sat Apr 15 11:40:50 2006
@@ -54,6 +54,7 @@
 
 use Mail::SpamAssassin::Constants qw(:sa);
 use Mail::SpamAssassin::EvalTests;
+use Mail::SpamAssassin::AsyncLoop;
 use Mail::SpamAssassin::Conf;
 use Mail::SpamAssassin::Util;
 use Mail::SpamAssassin::Logger;
@@ -84,6 +85,7 @@
     'disable_auto_learning' => 0,
     'auto_learn_status' => undef,
     'conf'                => $main->{conf},
+    'async'             => Mail::SpamAssassin::AsyncLoop->new($main)
   };
 
   if (defined $opts && $opts->{disable_auto_learning}) {
@@ -2719,7 +2721,9 @@
 sub register_plugin_eval_glue {
   my ($self, $pluginobj, $function) = @_;
 
-  dbg("plugin: registering glue method for $function ($pluginobj)");
+  # stop reporting this -- it's too noisy!
+  # dbg("plugin: registering glue method for $function ($pluginobj)");
+
   my $evalstr = <<"ENDOFEVAL";
 {
     package Mail::SpamAssassin::PerMsgStatus;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm?rev=394348&r1=394347&r2=394348&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Sat Apr 15 11:40:50 2006
@@ -384,61 +384,11 @@
   $conf->{parser}->register_commands(\@cmds);
 }
 
-sub check_tick {
-  my ($self, $opts) = @_;
-
-  return if ($self->{dns_not_available});
-  $self->complete_lookups($opts->{permsgstatus}->{uribl_scanstate}, 0.3);
-  return 1;
-}
-
-sub check_post_dnsbl {
-  my ($self, $opts) = @_;
-
-  return if ($self->{dns_not_available});
-
-  my $scan = $opts->{permsgstatus};
-  my $scanstate = $scan->{uribl_scanstate};
-
-  # try to complete a few more
-  if (!$self->complete_lookups($scanstate, 0.1)) {
-    my $secs_to_wait = $scan->{conf}->{uridnsbl_timeout};
-    if ($secs_to_wait < 0) { $secs_to_wait = 0; }
-    my $now = time;
-    my $deadline = $now + $secs_to_wait;
-    dbg("uridnsbl: waiting $secs_to_wait seconds for URIDNSBL lookups to complete");
-
-    while ($now < $deadline) {
-      last if ($self->complete_lookups($scanstate, $deadline - $now));
-      $now = time;
-    }
-    dbg("uridnsbl: done waiting for URIDNSBL lookups to complete");
-  }
-
-  foreach my $rulename (keys %{$scanstate->{active_rules_revipbl}},
-                        keys %{$scanstate->{active_rules_rhsbl}})
-  {
-    $scan->clear_test_state();
-
-    if ($scanstate->{hits}->{$rulename}) {
-      my $uris = join (' ', keys %{$scanstate->{hits}->{$rulename}});
-      $scan->test_log ("URIs: $uris");
-      $scan->got_hit ($rulename, "");
-    }
-  }
-
-  $self->abort_remaining_lookups ($scanstate);
-}
-
 # ---------------------------------------------------------------------------
 
 sub setup {
   my ($self, $scanstate) = @_;
-
-  $scanstate->{pending_lookups} = { };
   $scanstate->{seen_domain} = { };
-  $scanstate->{last_count} = 0;
-  $scanstate->{times_count_was_same} = 0;
 }
 
 # ---------------------------------------------------------------------------
@@ -499,12 +449,11 @@
   my ($self, $scanstate, $obj, $dom) = @_;
 
   my $key = "NS:".$dom;
-  return if $scanstate->{pending_lookups}->{$key};
+  return if $scanstate->{scanner}->{async}->get_lookup($key);
 
   # dig $dom ns
-  my $ent = $self->start_lookup ($scanstate, 'NS', $self->res_bgsend($dom, 'NS'));
+  my $ent = $self->start_lookup ($scanstate, 'NS', $self->res_bgsend($scanstate, $dom, 'NS'), $key);
   $ent->{obj} = $obj;
-  $scanstate->{pending_lookups}->{$key} = $ent;
 }
 
 sub complete_ns_lookup {
@@ -544,12 +493,11 @@
   my ($self, $scanstate, $obj, $hname) = @_;
 
   my $key = "A:".$hname;
-  return if $scanstate->{pending_lookups}->{$key};
+  return if $scanstate->{scanner}->{async}->get_lookup($key);
 
   # dig $hname a
-  my $ent = $self->start_lookup ($scanstate, 'A', $self->res_bgsend($hname, 'A'));
+  my $ent = $self->start_lookup ($scanstate, 'A', $self->res_bgsend($scanstate, $hname, 'A'), $key);
   $ent->{obj} = $obj;
-  $scanstate->{pending_lookups}->{$key} = $ent;
 }
 
 sub complete_a_lookup {
@@ -585,16 +533,15 @@
   my ($self, $scanstate, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_;
 
   my $key = "DNSBL:".$dnsbl.":".$lookupstr;
-  return if $scanstate->{pending_lookups}->{$key};
+  return if $scanstate->{scanner}->{async}->get_lookup($key);
   my $item = $lookupstr.".".$dnsbl;
 
   # dig $ip txt
   my $ent = $self->start_lookup ($scanstate, 'DNSBL',
-        $self->res_bgsend($item, $qtype));
+        $self->res_bgsend($scanstate, $item, $qtype), $key);
   $ent->{obj} = $obj;
   $ent->{rulename} = $rulename;
   $ent->{zone} = $dnsbl;
-  $scanstate->{pending_lookups}->{$key} = $ent;
 }
 
 sub complete_dnsbl_lookup {
@@ -675,18 +622,55 @@
     $scanstate->{hits}->{$rulename} = { };
   };
   $scanstate->{hits}->{$rulename}->{$dom} = 1;
+
+  my $scan = $scanstate->{scanner};
+  if ($scanstate->{active_rules_revipbl}->{$rulename}
+    || $scanstate->{active_rules_rhsbl}->{$rulename})
+  {
+    # TODO: this needs to handle multiple domain hits per rule
+    $scan->clear_test_state();
+    my $uris = join (' ', keys %{$scanstate->{hits}->{$rulename}});
+    $scan->test_log ("URIs: $uris");
+    $scan->got_hit ($rulename, "");
+  }
 }
 
 # ---------------------------------------------------------------------------
 
 sub start_lookup {
-  my ($self, $scanstate, $type, $id) = @_;
+  my ($self, $scanstate, $type, $id, $key) = @_;
+
   my $ent = {
-    type => $type,
-    id => $id
+    key => $key,
+    type => "URI-".$type,
+    id => $id,
+    completed_callback => sub {
+      my $ent = shift;
+      $self->completed_lookup_callback ($scanstate, $ent);
+    }
   };
-  $scanstate->{queries_started}++;
-  $ent;
+  $scanstate->{scanner}->{async}->start_lookup($ent);
+  return $ent;
+}
+
+sub completed_lookup_callback {
+  my ($self, $scanstate, $ent) = @_;
+  my $type = $ent->{type};
+  my $key = $ent->{key};
+  $key =~ /:(\S+?)$/; my $val = $1;
+
+  if ($type eq 'URI-NS') {
+    $self->complete_ns_lookup ($scanstate, $ent, $val);
+  }
+  elsif ($type eq 'URI-A') {
+    $self->complete_a_lookup ($scanstate, $ent, $val);
+  }
+  elsif ($type eq 'URI-DNSBL') {
+    $self->complete_dnsbl_lookup ($scanstate, $ent, $val);
+    my $totalsecs = (time - $ent->{obj}->{querystart});
+    dbg("uridnsbl: query for ".$ent->{obj}->{dom}." took ".
+              $totalsecs." seconds to look up ($val)");
+  }
 }
 
 # ---------------------------------------------------------------------------
@@ -696,122 +680,18 @@
 
 sub complete_lookups {
   my ($self, $scanstate, $timeout) = @_;
-  my %typecount = ();
-  my $stillwaiting = 0;
-
-  my $pending = $scanstate->{pending_lookups};
-  if (scalar keys %{$pending} <= 0) {
-    return 1;		# nothing left to do
-  }
-
-  $scanstate->{queries_started} = 0;
-  $scanstate->{queries_completed} = 0;
-
-  my $nfound = $self->{main}->{resolver}->poll_responses($timeout);
-  $nfound ||= 'no';
-  dbg ("uridnsbl: select found $nfound socks ready");
-
-  foreach my $key (keys %{$pending}) {
-    my $ent = $pending->{$key};
-    my $type = $ent->{type};
-
-    if (!exists ($self->{finished}->{$ent->{id}})) {
-      $typecount{$type}++;
-      #$stillwaiting = 1;
-      next;
-    }
-
-    $ent->{response_packet} = delete $self->{finished}->{$ent->{id}};
-    $key =~ /:(\S+)$/; my $val = $1;
-
-    if (LOG_COMPLETION_TIMES) {
-      my $secs = (time - $ent->{start});
-      my $totalsecs = (time - $ent->{obj}->{querystart});
-      printf "# time: %s %3.3f %3.3f %s\n",
-		$type, $secs, $totalsecs, $ent->{obj}->{dom};
-    }
-
-    if ($type eq 'NS') {
-      $self->complete_ns_lookup ($scanstate, $ent, $val);
-    }
-    elsif ($type eq 'A') {
-      $self->complete_a_lookup ($scanstate, $ent, $val);
-    }
-    elsif ($type eq 'DNSBL') {
-      $self->complete_dnsbl_lookup ($scanstate, $ent, $val);
-      my $totalsecs = (time - $ent->{obj}->{querystart});
-      dbg("uridnsbl: query for ".$ent->{obj}->{dom}." took ".
-		$totalsecs." seconds to look up ($val)");
-    }
-
-    $scanstate->{queries_completed}++;
-    delete $scanstate->{pending_lookups}->{$key};
-  }
-
-  dbg("uridnsbl: queries completed: ".$scanstate->{queries_completed}.
-		" started: ".$scanstate->{queries_started});
-
-  if (1) {
-    dbg("uridnsbl: queries active: ".
-	join (' ', map { "$_=$typecount{$_}" } sort keys %typecount)." at ".
-	localtime(time));
-  }
-
-  # ensure we don't get stuck if a request gets lost in the ether.
-  if (!$stillwaiting) {
-    my $numkeys = scalar keys %{$scanstate->{pending_lookups}};
-    if ($numkeys == 0) {
-      $stillwaiting = 0;
-
-    } else {
-      $stillwaiting = 1;
-
-      # avoid looping forever if we haven't got all results. 
-      if ($scanstate->{last_count} == $numkeys) {
-	$scanstate->{times_count_was_same}++;
-	if ($scanstate->{times_count_was_same} > 20) {
-	  dbg("uridnsbl: escaping: must have lost requests");
-	  $self->abort_remaining_lookups ($scanstate);
-	  $stillwaiting = 0;
-	}
-      } else {
-	$scanstate->{last_count} = $numkeys;
-	$scanstate->{times_count_was_same} = 0;
-      }
-    }
-  }
-
-  return (!$stillwaiting);
-}
-
-# ---------------------------------------------------------------------------
-
-sub abort_remaining_lookups  {
-  my ($self, $scanstate) = @_;
-
-  my $pending = $scanstate->{pending_lookups};
-  my $foundone = 0;
-  foreach my $key (keys %{$pending})
-  {
-    if (!$foundone) {
-      dbg("uridnsbl: aborting remaining lookups");
-      $foundone = 1;
-    }
-
-    delete $pending->{$key};
-  }
-  $self->{main}->{resolver}->bgabort();
+  return $scanstate->{scanner}->{async}->complete_lookups($timeout);
 }
 
 # ---------------------------------------------------------------------------
 
 sub res_bgsend {
-  my ($self, $host, $type) = @_;
+  my ($self, $scanstate, $host, $type) = @_;
 
   return $self->{main}->{resolver}->bgsend($host, $type, undef, sub {
         my $pkt = shift;
         my $id = shift;
-        $self->{finished}->{$id} = $pkt;
+        $scanstate->{scanner}->{async}->set_response_packet($id, $pkt);
       });
 }
 

Modified: spamassassin/trunk/t/dnsbl.t
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/t/dnsbl.t?rev=394348&r1=394347&r2=394348&view=diff
==============================================================================
--- spamassassin/trunk/t/dnsbl.t (original)
+++ spamassassin/trunk/t/dnsbl.t Sat Apr 15 11:40:50 2006
@@ -92,6 +92,9 @@
 );
 
 tstprefs("
+
+rbl_timeout 30
+
 add_header all RBL _RBL_
 add_header all Trusted _RELAYSTRUSTED_
 add_header all Untrusted _RELAYSUNTRUSTED_