You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/04/15 20:40:53 UTC
svn commit: r394348 - in /spamassassin/trunk: MANIFEST
lib/Mail/SpamAssassin/AsyncLoop.pm lib/Mail/SpamAssassin/Dns.pm
lib/Mail/SpamAssassin/DnsResolver.pm lib/Mail/SpamAssassin/PerMsgStatus.pm
lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm t/dnsbl.t
Author: jm
Date: Sat Apr 15 11:40:50 2006
New Revision: 394348
URL: http://svn.apache.org/viewcvs?rev=394348&view=rev
Log:
bug 4860: consolidate DNS-lookup async-rule infrastructure, increasing code-sharing between URIDNSBL.pm and Dns.pm and allowing 19% speedup
Added:
spamassassin/trunk/lib/Mail/SpamAssassin/AsyncLoop.pm
Modified:
spamassassin/trunk/MANIFEST
spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm
spamassassin/trunk/lib/Mail/SpamAssassin/DnsResolver.pm
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
spamassassin/trunk/t/dnsbl.t
Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/MANIFEST?rev=394348&r1=394347&r2=394348&view=diff
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Sat Apr 15 11:40:50 2006
@@ -27,6 +27,7 @@
lib/Mail/SpamAssassin.pm
lib/Mail/SpamAssassin/AICache.pm
lib/Mail/SpamAssassin/ArchiveIterator.pm
+lib/Mail/SpamAssassin/AsyncLoop.pm
lib/Mail/SpamAssassin/AutoWhitelist.pm
lib/Mail/SpamAssassin/Bayes.pm
lib/Mail/SpamAssassin/Bayes/CombineChi.pm
Added: spamassassin/trunk/lib/Mail/SpamAssassin/AsyncLoop.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/AsyncLoop.pm?rev=394348&view=auto
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/AsyncLoop.pm (added)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/AsyncLoop.pm Sat Apr 15 11:40:50 2006
@@ -0,0 +1,332 @@
+# <@LICENSE>
+# Copyright 2004 Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# </...@LICENSE>
+
+=head1 NAME
+
+Mail::SpamAssassin::AsyncLoop - scanner asynchronous event loop
+
+=head1 DESCRIPTION
+
+An asynchronous event loop used for long-running operations, performed "in the
+background" during the Mail::SpamAssassin::check() scan operation, such as DNS
+blocklist lookups.
+
+=head1 METHODS
+
+=over 4
+
+=cut
+
+package Mail::SpamAssassin::AsyncLoop;
+
+use strict;
+use warnings;
+use bytes;
+
+use Mail::SpamAssassin;
+use Mail::SpamAssassin::Logger;
+
+our @ISA = qw();
+
+#############################################################################
+
+sub new {
+ my $class = shift;
+ $class = ref($class) || $class;
+
+ my ($main) = @_;
+ my $self = {
+ main => $main,
+ last_count => 0,
+ times_count_was_same => 0,
+ queries_started => 0,
+ queries_completed => 0,
+ pending_lookups => { }
+ };
+
+ bless ($self, $class);
+ $self;
+}
+
+# ---------------------------------------------------------------------------
+
+=item $obj = $async->start_lookup($obj)
+
+Register the start a long-running asynchronous lookup operation. C<$obj>
+is a hash reference containing the following items:
+
+=over 4
+
+=item key (required)
+
+A key string, unique to this lookup. This is what is reported in
+debug messages, used as the key for C<get_lookup()>, etc.
+
+=item id (required)
+
+An ID string, also unique to this lookup. Typically, this is the DNS packet ID
+as returned by DnsResolver's C<bgsend> method. Sadly, the Net::DNS
+architecture forces us to keep a separate ID string for this task instead of
+reusing C<key> -- if you are not using DNS lookups through DnsResolver, it
+should be OK to just reuse C<key>.
+
+=item type (required)
+
+A string, typically one word, used to describe the type of lookup in log
+messages, such as C<DNSBL>, C<MX>, C<TXT>.
+
+=item poll_callback (optional)
+
+A code reference, which will be called periodically during the
+background-processing period. If you will be performing an async lookup on a
+non-DNS-based service, you will need to implement this so that it checks for
+new responses and calls C<set_response_packet()> or C<report_id_complete()> as
+appropriate. DNS-based lookups can leave it undefined, since
+DnsResolver::poll_responses() will be called automatically anyway.
+
+The code reference will be called with one argument, the C<$ent> object.
+
+=item completed_callback (optional)
+
+A code reference, which will be called when the lookup has been reported as
+complete via C<set_response_packet()> or C<report_id_complete()>.
+
+The code reference will be called with one argument, the C<$ent> object.
+
+=back
+
+C<$obj> is returned by this method.
+
+=cut
+
+sub start_lookup {
+ my ($self, $ent) = @_;
+
+ die "oops, no id" unless $ent->{id};
+ die "oops, no key" unless $ent->{key};
+ die "oops, no type" unless $ent->{type};
+
+ $self->{queries_started}++;
+ $self->{pending_lookups}->{$ent->{key}} = $ent;
+ $ent;
+}
+
+# ---------------------------------------------------------------------------
+
+=item $obj = $async->get_lookup($key)
+
+Retrieve the pending-lookup object for the given key C<$key>.
+
+If the lookup is complete, this will return C<undef>.
+
+Note that a lookup is still considered "pending" until C<complete_lookups()> is
+called, even if it has been reported as complete via C<set_response_packet()>
+or C<report_id_complete()>.
+
+=cut
+
+sub get_lookup {
+ my ($self, $key) = @_;
+ return $self->{pending_lookups}->{$key};
+}
+
+# ---------------------------------------------------------------------------
+
+=item @objs = $async->get_pending_lookups()
+
+Retrieve the lookup objects for all pending lookups.
+
+Note that a lookup is still considered "pending" until C<complete_lookups()> is
+called, even if it has been reported as complete via C<set_response_packet()>
+or C<report_id_complete()>.
+
+=cut
+
+sub get_pending_lookups {
+ my ($self) = @_;
+ return values %{$self->{pending_lookups}};
+}
+
+# ---------------------------------------------------------------------------
+
+=item $alldone = $async->complete_lookups()
+
+Perform a poll of the pending lookups, to see if any are completed; if they
+are, their <completed_callback> is called with the entry object for that
+lookup.
+
+If there are no lookups remaining, or if too long has elapsed since any results
+were returned, C<1> is returned, otherwise C<0>.
+
+=cut
+
+sub complete_lookups {
+ my ($self, $timeout) = @_;
+ my %typecount = ();
+ my $stillwaiting = 0;
+
+ my $pending = $self->{pending_lookups};
+ if (scalar keys %{$pending} <= 0) {
+ return 1; # nothing left to do
+ }
+
+ $self->{queries_started} = 0;
+ $self->{queries_completed} = 0;
+
+ # trap this loop in an eval { } block, as Net::DNS could throw
+ # die()s our way; in particular, process_dnsbl_results() has
+ # thrown die()s before (bug 3794).
+ eval {
+
+ my $nfound = $self->{main}->{resolver}->poll_responses($timeout);
+ $nfound ||= 'no';
+ dbg ("async: select found $nfound socks ready");
+
+ foreach my $key (keys %{$pending}) {
+ my $ent = $pending->{$key};
+
+ # call a "poll_callback" sub, if one exists
+ if (defined $ent->{poll_callback}) {
+ $ent->{poll_callback}->($ent);
+ }
+
+ my $type = $ent->{type};
+ if (!exists ($self->{finished}->{$ent->{id}})) {
+ $typecount{$type}++;
+ next;
+ }
+
+ $ent->{response_packet} = delete $self->{finished}->{$ent->{id}};
+ if (defined $ent->{completed_callback}) {
+ $ent->{completed_callback}->($ent);
+ }
+
+ $self->{queries_completed}++;
+ delete $self->{pending_lookups}->{$key};
+ }
+
+ dbg("async: queries completed: ".$self->{queries_completed}.
+ " started: ".$self->{queries_started});
+
+ if (1) {
+ dbg("async: queries active: ".
+ join (' ', map { "$_=$typecount{$_}" } sort keys %typecount)." at ".
+ localtime(time));
+ }
+
+ # ensure we don't get stuck if a request gets lost in the ether.
+ if (!$stillwaiting) {
+ my $numkeys = scalar keys %{$self->{pending_lookups}};
+ if ($numkeys == 0) {
+ $stillwaiting = 0;
+
+ } else {
+ $stillwaiting = 1;
+
+ # avoid looping forever if we haven't got all results.
+ if ($self->{last_count} == $numkeys) {
+ $self->{times_count_was_same}++;
+ if ($self->{times_count_was_same} > 20)
+ {
+ dbg("async: escaping: must have lost requests");
+ $self->abort_remaining_lookups ($self);
+ $stillwaiting = 0;
+ }
+ } else {
+ $self->{last_count} = $numkeys;
+ $self->{times_count_was_same} = 0;
+ }
+ }
+ }
+
+ };
+
+ if ($@) {
+ dbg("async: caught complete_lookups death, aborting: $@");
+ $stillwaiting = 0; # abort remaining
+ }
+
+ return (!$stillwaiting);
+}
+
+# ---------------------------------------------------------------------------
+
+=item $async->abort_remaining_lookups()
+
+Abort any remaining lookups.
+
+=cut
+
+sub abort_remaining_lookups {
+ my ($self) = @_;
+
+ my $pending = $self->{pending_lookups};
+ my $foundone = 0;
+ foreach my $key (keys %{$pending})
+ {
+ if (!$foundone) {
+ dbg("async: aborting remaining lookups");
+ $foundone = 1;
+ }
+
+ delete $pending->{$key};
+ }
+ $self->{main}->{resolver}->bgabort();
+}
+
+# ---------------------------------------------------------------------------
+
+=item $async->set_response_packet($id, $pkt)
+
+Register a "response packet" for a given query. C<$id> is the ID for the
+query, and must match the C<id> supplied in C<start_lookup()>. C<$pkt> is the
+packet object for the response.
+
+If this was called, C<$pkt> will be available in the C<completed_callback>
+function as C<$ent-<gt>{response_packet}>.
+
+One or the other of C<set_response_packet()> or C<report_id_complete()>
+should be called, but not both.
+
+=cut
+
+sub set_response_packet {
+ my ($self, $id, $pkt) = @_;
+ $self->{finished}->{$id} = $pkt;
+}
+
+=item $async->report_id_complete($id)
+
+Register that a query has completed, and is no longer "pending". C<$id> is the
+ID for the query, and must match the C<id> supplied in C<start_lookup()>.
+
+One or the other of C<set_response_packet()> or C<report_id_complete()>
+should be called, but not both.
+
+=cut
+
+sub report_id_complete {
+ my ($self, $id) = @_;
+ $self->{finished}->{$id} = undef;
+}
+
+# ---------------------------------------------------------------------------
+
+1;
+
+=back
+
+=cut
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm?rev=394348&r1=394347&r2=394348&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Dns.pm Sat Apr 15 11:40:50 2006
@@ -21,6 +21,7 @@
use Mail::SpamAssassin::Conf;
use Mail::SpamAssassin::PerMsgStatus;
+use Mail::SpamAssassin::AsyncLoop;
use Mail::SpamAssassin::Constants qw(:ip);
use File::Spec;
use IO::Socket;
@@ -94,32 +95,49 @@
###########################################################################
-# DNS query array constants
-use constant ID => 0;
-use constant RULES => 1;
-use constant SETS => 2;
-
# TODO: $server is currently unused
sub do_rbl_lookup {
my ($self, $rule, $set, $type, $server, $host, $subtest) = @_;
+ my $key = "dns:$type:$host";
+ my $existing = $self->{async}->get_lookup($key);
+
# only make a specific query once
- if (!defined $self->{dnspending}->{$type}->{$host}->[ID]) {
+ if (!$existing) {
dbg("dns: launching DNS $type query for $host in background");
- $self->{rbl_launch} = time;
- $self->{dnspending}->{$type}->{$host}->[ID] = $self->res_bgsend($host, $type);
+ $self->{query_launch_time} = time;
+
+ my $ent = {
+ key => $key,
+ type => "DNSBL-".$type,
+ sets => [ ], # filled in below
+ rules => [ ], # filled in below
+ # id is filled in after we send the query below
+ };
+
+ my $id = $self->{resolver}->bgsend($host, $type, undef, sub {
+ my $pkt = shift;
+ my $id = shift;
+ $self->process_dnsbl_result($ent, $pkt);
+ $self->{async}->report_id_complete($id);
+ });
+
+ $ent->{id} = $id; # tie up the loose end
+ $existing = $self->{async}->start_lookup($ent);
}
# always add set
- push @{$self->{dnspending}->{$type}->{$host}->[SETS]}, $set;
+ push @{$existing->{sets}}, $set;
# sometimes match or always match
if (defined $subtest) {
$self->{dnspost}->{$set}->{$subtest} = $rule;
}
else {
- push @{$self->{dnspending}->{$type}->{$host}->[RULES]}, $rule;
+ push @{$existing->{rules}}, $rule;
}
+
+ $self->{rulename_to_key}->{$rule} = $key;
}
# TODO: these are constant so they should only be added once at startup
@@ -131,23 +149,31 @@
sub do_dns_lookup {
my ($self, $rule, $type, $host) = @_;
+ my $key = "dns:$type:$host";
+
# only make a specific query once
- if (!defined $self->{dnspending}->{$type}->{$host}->[ID]) {
- dbg("dns: launching DNS $type query for $host in background");
- $self->{rbl_launch} = time;
- $self->{dnspending}->{$type}->{$host}->[ID] = $self->res_bgsend($host, $type);
- }
- push @{$self->{dnspending}->{$type}->{$host}->[RULES]}, $rule;
-}
+ return if $self->{async}->get_lookup($key);
+
+ dbg("dns: launching DNS $type query for $host in background");
+
+ my $ent = {
+ key => $key,
+ type => "DNSBL-".$type,
+ rules => [ $rule ],
+ # id is filled in after we send the query below
+ };
-sub res_bgsend {
- my ($self, $host, $type) = @_;
+ my $id = $self->{resolver}->bgsend($host, $type, undef, sub {
+ my $pkt = shift;
+ my $id = shift;
+ $self->process_dnsbl_result($ent, $pkt);
+ $self->{async}->report_id_complete($id);
+ });
- return $self->{resolver}->bgsend($host, $type, undef, sub {
- my $pkt = shift;
- my $id = shift;
- $self->{dnsfinished}->{$id} = $pkt;
- });
+ $ent->{id} = $id; # tie up the loose end
+ $self->{async}->start_lookup($ent);
+
+ $self->{query_launch_time} = time;
}
###########################################################################
@@ -166,7 +192,23 @@
$log = "$4.$3.$2.$1 listed in $5";
}
}
- $self->{dnsresult}->{$rule}->{$log} = 1;
+
+ # TODO: this may result in some log messages appearing under the
+ # wrong rules, since we could see this sequence: { test one hits,
+ # test one's message is logged, test two hits, test one fires again
+ # on another IP, test one's message is logged for that other IP --
+ # but under test two's heading }. Right now though it's better
+ # than just not logging at all.
+
+ $self->{already_logged} ||= { };
+ if ($log && !$self->{already_logged}->{$log}) {
+ $self->test_log($log);
+ $self->{already_logged}->{$log} = 1;
+ }
+
+ if (!defined $self->{tests_already_hit}->{$rule}) {
+ $self->got_hit($rule, "RBL: ");
+ }
}
sub dnsbl_uri {
@@ -183,6 +225,8 @@
push(@vals, "type=$qtype") if $qtype ne "A";
my $uri = "dns:$qname" . (@vals ? "?" . join(";", @vals) : "");
push @{ $self->{dnsuri}->{$uri} }, $rdatastr;
+
+ dbg ("dns: hit <$uri> $rdatastr");
}
}
@@ -193,6 +237,9 @@
my $question = ($packet->question)[0];
return if !defined $question;
+ my $sets = $query->{sets} || [];
+ my $rules = $query->{rules};
+
# NO_DNS_FOR_FROM
if ($self->{sender_host} &&
$question->qname eq $self->{sender_host} &&
@@ -200,10 +247,11 @@
$packet->header->rcode =~ /^(?:NXDOMAIN|SERVFAIL)$/ &&
++$self->{sender_host_fail} == 2)
{
- for my $rule (@{$query->[RULES]}) {
+ for my $rule (@{$rules}) {
$self->got_hit($rule, "DNS: ");
}
}
+
# DNSBL tests are here
foreach my $answer ($packet->answer) {
next if !defined $answer;
@@ -213,10 +261,10 @@
next if ($answer->type ne 'A' && $answer->type ne 'TXT');
# skip any A record that isn't on 127/8
next if ($answer->type eq 'A' && $answer->rdatastr !~ /^127\./);
- for my $rule (@{$query->[RULES]}) {
+ for my $rule (@{$rules}) {
$self->dnsbl_hit($rule, $question, $answer);
}
- for my $set (@{$query->[SETS]}) {
+ for my $set (@{$sets}) {
if ($self->{dnspost}->{$set}) {
$self->process_dnsbl_set($set, $question, $answer);
}
@@ -284,73 +332,48 @@
sub harvest_dnsbl_queries {
my ($self) = @_;
- return if !defined $self->{rbl_launch};
-
- my $deadline = $self->{conf}->{rbl_timeout} + $self->{rbl_launch};
- my @waiting = (values %{ $self->{dnspending}->{A} },
- values %{ $self->{dnspending}->{MX} },
- values %{ $self->{dnspending}->{TXT} });
- my @left;
- my $total;
+ return if !defined $self->{query_launch_time};
- @waiting = grep { defined $_->[ID] } @waiting;
- $total = scalar @waiting;
+ my $deadline = $self->{conf}->{rbl_timeout} + $self->{query_launch_time};
my $now = time;
- # trap this loop in an eval { } block, as Net::DNS could throw
- # die()s our way; in particular, process_dnsbl_results() has
- # thrown die()s before (bug 3794).
- eval {
- while (@waiting && ($now < $deadline)) {
- @left = ();
- for my $query (@waiting) {
- if (exists $self->{dnsfinished}->{$query->[ID]}) {
- my $pkt = delete $self->{dnsfinished}->{$query->[ID]};
- $self->process_dnsbl_result($query, $pkt);
- } else {
- push(@left, $query);
- }
- }
- $self->{main}->call_plugins ("check_tick", { permsgstatus => $self });
- last unless @left;
- @waiting = @left;
- # dynamic timeout
- my $dynamic = (int($self->{conf}->{rbl_timeout}
- * (1 - (($total - @left) / $total) ** 2) + 0.5)
- + $self->{rbl_launch});
- $deadline = $dynamic if ($dynamic < $deadline);
- until((($now = time) >= $deadline) || ($self->{resolver}->poll_responses(1) > 0)) {
- }
- }
- dbg("dns: success for " . ($total - @left) . " of $total queries");
- };
- if ($@) {
- dbg("dns: DNS harvest failed: $@");
- # carry on and clean up the BGSOCKs anyway.
+ my @left = $self->{async}->get_pending_lookups();
+ my $total = scalar @left;
+
+ while (!$self->{async}->complete_lookups(1) && ($now < $deadline)) {
+ $self->{main}->call_plugins ("check_tick", { permsgstatus => $self });
+
+ @left = $self->{async}->get_pending_lookups();
+
+ # dynamic timeout
+ my $dynamic = (int($self->{conf}->{rbl_timeout}
+ * (1 - (($total - scalar @left) / $total) ** 2) + 0.5)
+ + $self->{query_launch_time});
+ $deadline = $dynamic if ($dynamic < $deadline);
+ $now = time;
}
+ dbg("dns: success for " . ($total - @left) . " of $total queries");
+
# timeouts
+ @left = $self->{async}->get_pending_lookups();
for my $query (@left) {
my $string = '';
- if (defined @{$query->[SETS]}) {
- $string = join(",", grep defined, @{$query->[SETS]});
+ if (defined @{$query->{sets}}) {
+ $string = join(",", grep defined, @{$query->{sets}});
}
- elsif (defined @{$query->[RULES]}) {
- $string = join(",", grep defined, @{$query->[RULES]});
+ elsif (defined @{$query->{rules}}) {
+ $string = join(",", grep defined, @{$query->{rules}});
}
- my $delay = time - $self->{rbl_launch};
+ my $delay = time - $self->{query_launch_time};
dbg("dns: timeout for $string after $delay seconds");
- undef $query->[ID];
- }
- # register hits
- while (my ($rule, $logs) = each %{ $self->{dnsresult} }) {
- for my $log (keys %{$logs}) {
- $self->test_log($log) if $log;
- }
- if (!defined $self->{tests_already_hit}->{$rule}) {
- $self->got_hit($rule, "RBL: ");
- }
+ # undef $query->[ID]; # TODO: seems unnecessary now
}
+}
+
+sub set_rbl_tag_data {
+ my ($self) = @_;
+
# DNS URIs
while (my ($dnsuri, $answers) = each %{ $self->{dnsuri} }) {
# when parsing, look for elements of \".*?\" or \S+ with ", " as separator
@@ -366,14 +389,11 @@
sub rbl_finish {
my ($self) = @_;
- delete $self->{rbl_launch};
- delete $self->{dnspending};
- delete $self->{dnsfinished};
+ $self->set_rbl_tag_data();
- # TODO: do not remove these since they can be retained!
+ delete $self->{query_launch_time};
delete $self->{dnscache};
delete $self->{dnspost};
- delete $self->{dnsresult};
delete $self->{dnsuri};
}
@@ -564,11 +584,10 @@
# undef $IS_DNS_AVAILABLE if we should be testing for
# working DNS and our check interval time has passed
- dbg("dns: dnsopt=$dnsopt dnsint=$dnsint diff=$diff");
- $IS_DNS_AVAILABLE=undef if ($dnsopt eq "test" && $diff > $dnsint);
-
- dbg("dns: is_dns_available() last checked $diff seconds ago; dns available=".
- ($IS_DNS_AVAILABLE ? $IS_DNS_AVAILABLE : "(undef)"));
+ if ($dnsopt eq "test" && $diff > $dnsint) {
+ $IS_DNS_AVAILABLE = undef;
+ dbg("dns: is_dns_available() last checked $diff seconds ago; re-checking");
+ }
return $IS_DNS_AVAILABLE if (defined $IS_DNS_AVAILABLE);
@@ -744,6 +763,26 @@
if ($SIG{CHLD} && $SIG{CHLD} ne 'IGNORE') { # running from spamd
waitpid ($pid, 0);
}
+}
+
+###########################################################################
+
+sub is_rule_complete {
+ my ($self, $rule) = @_;
+
+ my $key = $self->{rulename_to_key}->{$rule};
+ if (!defined $key) {
+ dbg("dns: $rule lookup complete, as it was never started");
+ return 1;
+ }
+
+ my $obj = $self->{async}->get_lookup($key);
+ if (!defined $obj) {
+ dbg("dns: $rule lookup complete, $key not in pending list");
+ return 1;
+ }
+
+ return 0; # not yet complete
}
###########################################################################
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/DnsResolver.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/DnsResolver.pm?rev=394348&r1=394347&r2=394348&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/DnsResolver.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/DnsResolver.pm Sat Apr 15 11:40:50 2006
@@ -315,7 +315,7 @@
my $ques = $questions[0];
if (defined $ques) {
- return $id . $ques->qname . $ques->qtype . $ques->qclass;
+ return join '/', $id, $ques->qname, $ques->qtype, $ques->qclass;
} else {
# odd. this should not happen, but clearly some DNS servers
# can return something that Net::DNS interprets as having no
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=394348&r1=394347&r2=394348&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Sat Apr 15 11:40:50 2006
@@ -54,6 +54,7 @@
use Mail::SpamAssassin::Constants qw(:sa);
use Mail::SpamAssassin::EvalTests;
+use Mail::SpamAssassin::AsyncLoop;
use Mail::SpamAssassin::Conf;
use Mail::SpamAssassin::Util;
use Mail::SpamAssassin::Logger;
@@ -84,6 +85,7 @@
'disable_auto_learning' => 0,
'auto_learn_status' => undef,
'conf' => $main->{conf},
+ 'async' => Mail::SpamAssassin::AsyncLoop->new($main)
};
if (defined $opts && $opts->{disable_auto_learning}) {
@@ -2719,7 +2721,9 @@
sub register_plugin_eval_glue {
my ($self, $pluginobj, $function) = @_;
- dbg("plugin: registering glue method for $function ($pluginobj)");
+ # stop reporting this -- it's too noisy!
+ # dbg("plugin: registering glue method for $function ($pluginobj)");
+
my $evalstr = <<"ENDOFEVAL";
{
package Mail::SpamAssassin::PerMsgStatus;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm?rev=394348&r1=394347&r2=394348&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Sat Apr 15 11:40:50 2006
@@ -384,61 +384,11 @@
$conf->{parser}->register_commands(\@cmds);
}
-sub check_tick {
- my ($self, $opts) = @_;
-
- return if ($self->{dns_not_available});
- $self->complete_lookups($opts->{permsgstatus}->{uribl_scanstate}, 0.3);
- return 1;
-}
-
-sub check_post_dnsbl {
- my ($self, $opts) = @_;
-
- return if ($self->{dns_not_available});
-
- my $scan = $opts->{permsgstatus};
- my $scanstate = $scan->{uribl_scanstate};
-
- # try to complete a few more
- if (!$self->complete_lookups($scanstate, 0.1)) {
- my $secs_to_wait = $scan->{conf}->{uridnsbl_timeout};
- if ($secs_to_wait < 0) { $secs_to_wait = 0; }
- my $now = time;
- my $deadline = $now + $secs_to_wait;
- dbg("uridnsbl: waiting $secs_to_wait seconds for URIDNSBL lookups to complete");
-
- while ($now < $deadline) {
- last if ($self->complete_lookups($scanstate, $deadline - $now));
- $now = time;
- }
- dbg("uridnsbl: done waiting for URIDNSBL lookups to complete");
- }
-
- foreach my $rulename (keys %{$scanstate->{active_rules_revipbl}},
- keys %{$scanstate->{active_rules_rhsbl}})
- {
- $scan->clear_test_state();
-
- if ($scanstate->{hits}->{$rulename}) {
- my $uris = join (' ', keys %{$scanstate->{hits}->{$rulename}});
- $scan->test_log ("URIs: $uris");
- $scan->got_hit ($rulename, "");
- }
- }
-
- $self->abort_remaining_lookups ($scanstate);
-}
-
# ---------------------------------------------------------------------------
sub setup {
my ($self, $scanstate) = @_;
-
- $scanstate->{pending_lookups} = { };
$scanstate->{seen_domain} = { };
- $scanstate->{last_count} = 0;
- $scanstate->{times_count_was_same} = 0;
}
# ---------------------------------------------------------------------------
@@ -499,12 +449,11 @@
my ($self, $scanstate, $obj, $dom) = @_;
my $key = "NS:".$dom;
- return if $scanstate->{pending_lookups}->{$key};
+ return if $scanstate->{scanner}->{async}->get_lookup($key);
# dig $dom ns
- my $ent = $self->start_lookup ($scanstate, 'NS', $self->res_bgsend($dom, 'NS'));
+ my $ent = $self->start_lookup ($scanstate, 'NS', $self->res_bgsend($scanstate, $dom, 'NS'), $key);
$ent->{obj} = $obj;
- $scanstate->{pending_lookups}->{$key} = $ent;
}
sub complete_ns_lookup {
@@ -544,12 +493,11 @@
my ($self, $scanstate, $obj, $hname) = @_;
my $key = "A:".$hname;
- return if $scanstate->{pending_lookups}->{$key};
+ return if $scanstate->{scanner}->{async}->get_lookup($key);
# dig $hname a
- my $ent = $self->start_lookup ($scanstate, 'A', $self->res_bgsend($hname, 'A'));
+ my $ent = $self->start_lookup ($scanstate, 'A', $self->res_bgsend($scanstate, $hname, 'A'), $key);
$ent->{obj} = $obj;
- $scanstate->{pending_lookups}->{$key} = $ent;
}
sub complete_a_lookup {
@@ -585,16 +533,15 @@
my ($self, $scanstate, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_;
my $key = "DNSBL:".$dnsbl.":".$lookupstr;
- return if $scanstate->{pending_lookups}->{$key};
+ return if $scanstate->{scanner}->{async}->get_lookup($key);
my $item = $lookupstr.".".$dnsbl;
# dig $ip txt
my $ent = $self->start_lookup ($scanstate, 'DNSBL',
- $self->res_bgsend($item, $qtype));
+ $self->res_bgsend($scanstate, $item, $qtype), $key);
$ent->{obj} = $obj;
$ent->{rulename} = $rulename;
$ent->{zone} = $dnsbl;
- $scanstate->{pending_lookups}->{$key} = $ent;
}
sub complete_dnsbl_lookup {
@@ -675,18 +622,55 @@
$scanstate->{hits}->{$rulename} = { };
};
$scanstate->{hits}->{$rulename}->{$dom} = 1;
+
+ my $scan = $scanstate->{scanner};
+ if ($scanstate->{active_rules_revipbl}->{$rulename}
+ || $scanstate->{active_rules_rhsbl}->{$rulename})
+ {
+ # TODO: this needs to handle multiple domain hits per rule
+ $scan->clear_test_state();
+ my $uris = join (' ', keys %{$scanstate->{hits}->{$rulename}});
+ $scan->test_log ("URIs: $uris");
+ $scan->got_hit ($rulename, "");
+ }
}
# ---------------------------------------------------------------------------
sub start_lookup {
- my ($self, $scanstate, $type, $id) = @_;
+ my ($self, $scanstate, $type, $id, $key) = @_;
+
my $ent = {
- type => $type,
- id => $id
+ key => $key,
+ type => "URI-".$type,
+ id => $id,
+ completed_callback => sub {
+ my $ent = shift;
+ $self->completed_lookup_callback ($scanstate, $ent);
+ }
};
- $scanstate->{queries_started}++;
- $ent;
+ $scanstate->{scanner}->{async}->start_lookup($ent);
+ return $ent;
+}
+
+sub completed_lookup_callback {
+ my ($self, $scanstate, $ent) = @_;
+ my $type = $ent->{type};
+ my $key = $ent->{key};
+ $key =~ /:(\S+?)$/; my $val = $1;
+
+ if ($type eq 'URI-NS') {
+ $self->complete_ns_lookup ($scanstate, $ent, $val);
+ }
+ elsif ($type eq 'URI-A') {
+ $self->complete_a_lookup ($scanstate, $ent, $val);
+ }
+ elsif ($type eq 'URI-DNSBL') {
+ $self->complete_dnsbl_lookup ($scanstate, $ent, $val);
+ my $totalsecs = (time - $ent->{obj}->{querystart});
+ dbg("uridnsbl: query for ".$ent->{obj}->{dom}." took ".
+ $totalsecs." seconds to look up ($val)");
+ }
}
# ---------------------------------------------------------------------------
@@ -696,122 +680,18 @@
sub complete_lookups {
my ($self, $scanstate, $timeout) = @_;
- my %typecount = ();
- my $stillwaiting = 0;
-
- my $pending = $scanstate->{pending_lookups};
- if (scalar keys %{$pending} <= 0) {
- return 1; # nothing left to do
- }
-
- $scanstate->{queries_started} = 0;
- $scanstate->{queries_completed} = 0;
-
- my $nfound = $self->{main}->{resolver}->poll_responses($timeout);
- $nfound ||= 'no';
- dbg ("uridnsbl: select found $nfound socks ready");
-
- foreach my $key (keys %{$pending}) {
- my $ent = $pending->{$key};
- my $type = $ent->{type};
-
- if (!exists ($self->{finished}->{$ent->{id}})) {
- $typecount{$type}++;
- #$stillwaiting = 1;
- next;
- }
-
- $ent->{response_packet} = delete $self->{finished}->{$ent->{id}};
- $key =~ /:(\S+)$/; my $val = $1;
-
- if (LOG_COMPLETION_TIMES) {
- my $secs = (time - $ent->{start});
- my $totalsecs = (time - $ent->{obj}->{querystart});
- printf "# time: %s %3.3f %3.3f %s\n",
- $type, $secs, $totalsecs, $ent->{obj}->{dom};
- }
-
- if ($type eq 'NS') {
- $self->complete_ns_lookup ($scanstate, $ent, $val);
- }
- elsif ($type eq 'A') {
- $self->complete_a_lookup ($scanstate, $ent, $val);
- }
- elsif ($type eq 'DNSBL') {
- $self->complete_dnsbl_lookup ($scanstate, $ent, $val);
- my $totalsecs = (time - $ent->{obj}->{querystart});
- dbg("uridnsbl: query for ".$ent->{obj}->{dom}." took ".
- $totalsecs." seconds to look up ($val)");
- }
-
- $scanstate->{queries_completed}++;
- delete $scanstate->{pending_lookups}->{$key};
- }
-
- dbg("uridnsbl: queries completed: ".$scanstate->{queries_completed}.
- " started: ".$scanstate->{queries_started});
-
- if (1) {
- dbg("uridnsbl: queries active: ".
- join (' ', map { "$_=$typecount{$_}" } sort keys %typecount)." at ".
- localtime(time));
- }
-
- # ensure we don't get stuck if a request gets lost in the ether.
- if (!$stillwaiting) {
- my $numkeys = scalar keys %{$scanstate->{pending_lookups}};
- if ($numkeys == 0) {
- $stillwaiting = 0;
-
- } else {
- $stillwaiting = 1;
-
- # avoid looping forever if we haven't got all results.
- if ($scanstate->{last_count} == $numkeys) {
- $scanstate->{times_count_was_same}++;
- if ($scanstate->{times_count_was_same} > 20) {
- dbg("uridnsbl: escaping: must have lost requests");
- $self->abort_remaining_lookups ($scanstate);
- $stillwaiting = 0;
- }
- } else {
- $scanstate->{last_count} = $numkeys;
- $scanstate->{times_count_was_same} = 0;
- }
- }
- }
-
- return (!$stillwaiting);
-}
-
-# ---------------------------------------------------------------------------
-
-sub abort_remaining_lookups {
- my ($self, $scanstate) = @_;
-
- my $pending = $scanstate->{pending_lookups};
- my $foundone = 0;
- foreach my $key (keys %{$pending})
- {
- if (!$foundone) {
- dbg("uridnsbl: aborting remaining lookups");
- $foundone = 1;
- }
-
- delete $pending->{$key};
- }
- $self->{main}->{resolver}->bgabort();
+ return $scanstate->{scanner}->{async}->complete_lookups($timeout);
}
# ---------------------------------------------------------------------------
sub res_bgsend {
- my ($self, $host, $type) = @_;
+ my ($self, $scanstate, $host, $type) = @_;
return $self->{main}->{resolver}->bgsend($host, $type, undef, sub {
my $pkt = shift;
my $id = shift;
- $self->{finished}->{$id} = $pkt;
+ $scanstate->{scanner}->{async}->set_response_packet($id, $pkt);
});
}
Modified: spamassassin/trunk/t/dnsbl.t
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/t/dnsbl.t?rev=394348&r1=394347&r2=394348&view=diff
==============================================================================
--- spamassassin/trunk/t/dnsbl.t (original)
+++ spamassassin/trunk/t/dnsbl.t Sat Apr 15 11:40:50 2006
@@ -92,6 +92,9 @@
);
tstprefs("
+
+rbl_timeout 30
+
add_header all RBL _RBL_
add_header all Trusted _RELAYSTRUSTED_
add_header all Untrusted _RELAYSUNTRUSTED_