You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2006/09/04 01:36:39 UTC

svn commit: r439873 - /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm

Author: felicity
Date: Sun Sep  3 16:36:38 2006
New Revision: 439873

URL: http://svn.apache.org/viewvc?view=rev&rev=439873
Log:
try to simplify URIDNSBL a bit -- just use PMS to store data instead of some hash w/ multiple excess reference pointers, etc.

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm?view=diff&rev=439873&r1=439872&r2=439873
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm Sun Sep  3 16:36:38 2006
@@ -161,31 +161,25 @@
     return;
   }
 
-  $self->{scanner} = $scanner;
-  my $scanstate = $scanner->{uribl_scanstate} = {
-    self => $self,
-    scanner => $scanner,
-    activerules => { },
-    hits => { }
-  };
+  $scanner->{'uridnsbl_activerules'} = { };
+  $scanner->{'uridnsbl_hits'} = { };
+  $scanner->{'uridnsbl_seen_domain'} = { };
 
   # only hit DNSBLs for active rules (defined and score != 0)
-  $scanstate->{active_rules_rhsbl} = { };
-  $scanstate->{active_rules_revipbl} = { };
+  $scanner->{'uridnsbl_active_rules_rhsbl'} = { };
+  $scanner->{'uridnsbl_active_rules_revipbl'} = { };
+
   foreach my $rulename (keys %{$scanner->{conf}->{uridnsbls}}) {
     next unless ($scanner->{conf}->is_rule_active('body_evals',$rulename));
 
-    my $rulecf = $scanstate->{scanner}->{conf}->{uridnsbls}->{$rulename};
+    my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
     if ($rulecf->{is_rhsbl}) {
-      $scanstate->{active_rules_rhsbl}->{$rulename} = 1;
+      $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename} = 1;
     } else {
-      $scanstate->{active_rules_revipbl}->{$rulename} = 1;
+      $scanner->{uridnsbl_active_rules_revipbl}->{$rulename} = 1;
     }
   }
 
-  $self->setup ($scanstate);
-
-
   # get all domains in message
 
   # don't keep dereferencing this
@@ -247,7 +241,8 @@
   # at this point, @uri_ordered is an ordered array of uri hashes
 
   my %domlist = ();
-  while (keys %domlist < $scanner->{main}->{conf}->{uridnsbl_max_domains} && @uri_ordered) {
+  my $umd = $scanner->{main}->{conf}->{uridnsbl_max_domains};
+  while (keys %domlist < $umd && @uri_ordered) {
     my $array = shift @uri_ordered;
     next unless $array;
 
@@ -256,7 +251,7 @@
     next unless @domains;
 
     # the new domains are all useful, just add them in
-    if (keys(%domlist) + @domains <= $scanner->{main}->{conf}->{uridnsbl_max_domains}) {
+    if (keys(%domlist) + @domains <= $umd) {
       foreach (@domains) {
         $domlist{$_} = 1;
       }
@@ -264,7 +259,7 @@
     else {
       # trim down to a limited number - pick randomly
       my $i;
-      while (@domains && keys %domlist < $scanner->{main}->{conf}->{uridnsbl_max_domains}) {
+      while (@domains && keys %domlist < $umd) {
         my $r = int rand (scalar @domains);
         $domlist{splice (@domains, $r, 1)} = 1;
       }
@@ -274,7 +269,7 @@
   # and query
   dbg("uridnsbl: domains to query: ".join(' ',keys %domlist));
   foreach my $dom (keys %domlist) {
-    $self->query_domain ($scanstate, $dom);
+    $self->query_domain ($scanner, $dom);
   }
 
   return 1;
@@ -386,22 +381,15 @@
 
 # ---------------------------------------------------------------------------
 
-sub setup {
-  my ($self, $scanstate) = @_;
-  $scanstate->{seen_domain} = { };
-}
-
-# ---------------------------------------------------------------------------
-
 sub query_domain {
-  my ($self, $scanstate, $dom) = @_;
+  my ($self, $scanner, $dom) = @_;
 
   #warn "uridnsbl: domain $dom\n";
   #return;
 
   $dom = lc $dom;
-  return if $scanstate->{seen_domain}->{$dom};
-  $scanstate->{seen_domain}->{$dom} = 1;
+  return if $scanner->{uridnsbl_seen_domain}->{$dom};
+  $scanner->{uridnsbl_seen_domain}->{$dom} = 1;
   $self->log_dns_result("querying domain $dom");
 
   my $obj = {
@@ -415,7 +403,7 @@
     my $IP_PRIVATE = IP_PRIVATE;
     # only look up the IP if it is public and valid
     if ($dom =~ /^$IPV4_ADDRESS$/ && $dom !~ /^$IP_PRIVATE$/) {
-      $self->lookup_dnsbl_for_ip($scanstate, $obj, $dom);
+      $self->lookup_dnsbl_for_ip($scanner, $obj, $dom);
       # and check the IP in RHSBLs too
       if ($dom =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
 	$dom = "$4.$3.$2.$1";
@@ -429,46 +417,46 @@
 
   if ($single_dnsbl) {
     # look up the domain in the RHSBL subset
-    my $cf = $scanstate->{active_rules_rhsbl};
+    my $cf = $scanner->{uridnsbl_active_rules_rhsbl};
     foreach my $rulename (keys %{$cf}) {
-      my $rulecf = $scanstate->{scanner}->{conf}->{uridnsbls}->{$rulename};
-      $self->lookup_single_dnsbl($scanstate, $obj, $rulename,
+      my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
+      $self->lookup_single_dnsbl($scanner, $obj, $rulename,
 				 $dom, $rulecf->{zone}, $rulecf->{type});
 
       # see comment below
-      $scanstate->{scanner}->register_async_rule_start($rulename);
+      $scanner->register_async_rule_start($rulename);
     }
 
     # perform NS, A lookups to look up the domain in the non-RHSBL subset
     if ($dom !~ /^\d+\.\d+\.\d+\.\d+$/) {
-      $self->lookup_domain_ns($scanstate, $obj, $dom);
+      $self->lookup_domain_ns($scanner, $obj, $dom);
     }
   }
 
   # note that these rules are now underway.   important: unless the
   # rule hits, in the current design, these will not be considered
   # "finished" until harvest_dnsbl_queries() completes
-  my $cf = $scanstate->{active_rules_revipbl};
+  my $cf = $scanner->{uridnsbl_active_rules_revipbl};
   foreach my $rulename (keys %{$cf}) {
-    $scanstate->{scanner}->register_async_rule_start($rulename);
+    $scanner->register_async_rule_start($rulename);
   }
 }
 
 # ---------------------------------------------------------------------------
 
 sub lookup_domain_ns {
-  my ($self, $scanstate, $obj, $dom) = @_;
+  my ($self, $scanner, $obj, $dom) = @_;
 
   my $key = "NS:".$dom;
-  return if $scanstate->{scanner}->{async}->get_lookup($key);
+  return if $scanner->{async}->get_lookup($key);
 
   # dig $dom ns
-  my $ent = $self->start_lookup ($scanstate, 'NS', $self->res_bgsend($scanstate, $dom, 'NS'), $key);
+  my $ent = $self->start_lookup ($scanner, 'NS', $self->res_bgsend($scanner, $dom, 'NS'), $key);
   $ent->{obj} = $obj;
 }
 
 sub complete_ns_lookup {
-  my ($self, $scanstate, $ent, $dom) = @_;
+  my ($self, $scanner, $ent, $dom) = @_;
 
   my $packet = $ent->{response_packet};
   my @answer = $packet->answer;
@@ -488,11 +476,11 @@
 	$nsmatch =~ s/\.$//;
 	# only look up the IP if it is public and valid
 	if ($nsmatch =~ /^$IPV4_ADDRESS$/ && $nsmatch !~ /^$IP_PRIVATE$/) {
-	  $self->lookup_dnsbl_for_ip($scanstate, $ent->{obj}, $nsmatch);
+	  $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $nsmatch);
 	}
       }
       else {
-	$self->lookup_a_record($scanstate, $ent->{obj}, $nsmatch);
+	$self->lookup_a_record($scanner, $ent->{obj}, $nsmatch);
       }
     }
   }
@@ -501,25 +489,25 @@
 # ---------------------------------------------------------------------------
 
 sub lookup_a_record {
-  my ($self, $scanstate, $obj, $hname) = @_;
+  my ($self, $scanner, $obj, $hname) = @_;
 
   my $key = "A:".$hname;
-  return if $scanstate->{scanner}->{async}->get_lookup($key);
+  return if $scanner->{async}->get_lookup($key);
 
   # dig $hname a
-  my $ent = $self->start_lookup ($scanstate, 'A', $self->res_bgsend($scanstate, $hname, 'A'), $key);
+  my $ent = $self->start_lookup ($scanner, 'A', $self->res_bgsend($scanner, $hname, 'A'), $key);
   $ent->{obj} = $obj;
 }
 
 sub complete_a_lookup {
-  my ($self, $scanstate, $ent, $hname) = @_;
+  my ($self, $scanner, $ent, $hname) = @_;
 
   foreach my $rr ($ent->{response_packet}->answer) {
     my $str = $rr->string;
     $self->log_dns_result ("A for NS $hname: $str");
 
     if ($str =~ /IN\s+A\s+(\S+)/) {
-      $self->lookup_dnsbl_for_ip($scanstate, $ent->{obj}, $1);
+      $self->lookup_dnsbl_for_ip($scanner, $ent->{obj}, $1);
     }
   }
 }
@@ -527,39 +515,38 @@
 # ---------------------------------------------------------------------------
 
 sub lookup_dnsbl_for_ip {
-  my ($self, $scanstate, $obj, $ip) = @_;
+  my ($self, $scanner, $obj, $ip) = @_;
 
   $ip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/;
   my $revip = "$4.$3.$2.$1";
 
-  my $cf = $scanstate->{active_rules_revipbl};
+  my $cf = $scanner->{uridnsbl_active_rules_revipbl};
   foreach my $rulename (keys %{$cf}) {
-    my $rulecf = $scanstate->{scanner}->{conf}->{uridnsbls}->{$rulename};
-    $self->lookup_single_dnsbl($scanstate, $obj, $rulename,
+    my $rulecf = $scanner->{conf}->{uridnsbls}->{$rulename};
+    $self->lookup_single_dnsbl($scanner, $obj, $rulename,
 			       $revip, $rulecf->{zone}, $rulecf->{type});
   }
 }
 
 sub lookup_single_dnsbl {
-  my ($self, $scanstate, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_;
+  my ($self, $scanner, $obj, $rulename, $lookupstr, $dnsbl, $qtype) = @_;
 
   my $key = "DNSBL:".$dnsbl.":".$lookupstr;
-  return if $scanstate->{scanner}->{async}->get_lookup($key);
+  return if $scanner->{async}->get_lookup($key);
   my $item = $lookupstr.".".$dnsbl;
 
   # dig $ip txt
-  my $ent = $self->start_lookup ($scanstate, 'DNSBL',
-        $self->res_bgsend($scanstate, $item, $qtype), $key);
+  my $ent = $self->start_lookup ($scanner, 'DNSBL',
+        $self->res_bgsend($scanner, $item, $qtype), $key);
   $ent->{obj} = $obj;
   $ent->{rulename} = $rulename;
   $ent->{zone} = $dnsbl;
 }
 
 sub complete_dnsbl_lookup {
-  my ($self, $scanstate, $ent, $dnsblip) = @_;
+  my ($self, $scanner, $ent, $dnsblip) = @_;
 
-  my $scan = $scanstate->{scanner};
-  my $conf = $scan->{conf};
+  my $conf = $scanner->{conf};
   my @subtests = ();
   my $rulename = $ent->{rulename};
   my $rulecf = $conf->{uridnsbls}->{$rulename};
@@ -583,7 +570,7 @@
             $packet->header->id." rr=".$rr->string);
 	next;
       }
-      $self->got_dnsbl_hit($scanstate, $ent, $rdatastr, $dom, $rulename);
+      $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $rulename);
     }
     else {
       foreach my $subtest (keys (%{$uridnsbl_subs}))
@@ -591,14 +578,14 @@
         my $subrulename = $uridnsbl_subs->{$subtest}->{rulename};
 
         if ($subtest eq $rdatastr) {
-          $self->got_dnsbl_hit($scanstate, $ent, $rdatastr, $dom, $subrulename);
+          $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
         }
         # bitmask
         elsif ($subtest =~ /^\d+$/) {
 	  if ($rdatastr =~ m/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ &&
 	      Mail::SpamAssassin::Util::my_inet_aton($rdatastr) & $subtest)
           {
-            $self->got_dnsbl_hit($scanstate, $ent, $rdatastr, $dom, $subrulename);
+            $self->got_dnsbl_hit($scanner, $ent, $rdatastr, $dom, $subrulename);
           }
         }
       }
@@ -607,35 +594,34 @@
 }
 
 sub got_dnsbl_hit {
-  my ($self, $scanstate, $ent, $str, $dom, $rulename) = @_;
+  my ($self, $scanner, $ent, $str, $dom, $rulename) = @_;
 
   $str =~ s/\s+/  /gs;	# long whitespace => short
   dbg("uridnsbl: domain \"$dom\" listed ($rulename): $str");
 
-  if (!defined $scanstate->{hits}->{$rulename}) {
-    $scanstate->{hits}->{$rulename} = { };
+  if (!defined $scanner->{uridnsbl_hits}->{$rulename}) {
+    $scanner->{uridnsbl_hits}->{$rulename} = { };
   };
-  $scanstate->{hits}->{$rulename}->{$dom} = 1;
+  $scanner->{uridnsbl_hits}->{$rulename}->{$dom} = 1;
 
-  my $scan = $scanstate->{scanner};
-  if ($scanstate->{active_rules_revipbl}->{$rulename}
-    || $scanstate->{active_rules_rhsbl}->{$rulename})
+  if ($scanner->{uridnsbl_active_rules_revipbl}->{$rulename}
+    || $scanner->{uridnsbl_active_rules_rhsbl}->{$rulename})
   {
     # TODO: this needs to handle multiple domain hits per rule
-    $scan->clear_test_state();
-    my $uris = join (' ', keys %{$scanstate->{hits}->{$rulename}});
-    $scan->test_log ("URIs: $uris");
-    $scan->got_hit ($rulename, "");
+    $scanner->clear_test_state();
+    my $uris = join (' ', keys %{$scanner->{uridnsbl_hits}->{$rulename}});
+    $scanner->test_log ("URIs: $uris");
+    $scanner->got_hit ($rulename, "");
 
     # note that this rule has completed (since it got at least 1 hit)
-    $scanstate->{scanner}->register_async_rule_finish($rulename);
+    $scanner->register_async_rule_finish($rulename);
   }
 }
 
 # ---------------------------------------------------------------------------
 
 sub start_lookup {
-  my ($self, $scanstate, $type, $id, $key) = @_;
+  my ($self, $scanner, $type, $id, $key) = @_;
 
   my $ent = {
     key => $key,
@@ -643,27 +629,27 @@
     id => $id,
     completed_callback => sub {
       my $ent = shift;
-      $self->completed_lookup_callback ($scanstate, $ent);
+      $self->completed_lookup_callback ($scanner, $ent);
     }
   };
-  $scanstate->{scanner}->{async}->start_lookup($ent);
+  $scanner->{async}->start_lookup($ent);
   return $ent;
 }
 
 sub completed_lookup_callback {
-  my ($self, $scanstate, $ent) = @_;
+  my ($self, $scanner, $ent) = @_;
   my $type = $ent->{type};
   my $key = $ent->{key};
   $key =~ /:(\S+?)$/; my $val = $1;
 
   if ($type eq 'URI-NS') {
-    $self->complete_ns_lookup ($scanstate, $ent, $val);
+    $self->complete_ns_lookup ($scanner, $ent, $val);
   }
   elsif ($type eq 'URI-A') {
-    $self->complete_a_lookup ($scanstate, $ent, $val);
+    $self->complete_a_lookup ($scanner, $ent, $val);
   }
   elsif ($type eq 'URI-DNSBL') {
-    $self->complete_dnsbl_lookup ($scanstate, $ent, $val);
+    $self->complete_dnsbl_lookup ($scanner, $ent, $val);
     my $totalsecs = (time - $ent->{obj}->{querystart});
     dbg("uridnsbl: query for ".$ent->{obj}->{dom}." took ".
               $totalsecs." seconds to look up ($val)");
@@ -673,12 +659,12 @@
 # ---------------------------------------------------------------------------
 
 sub res_bgsend {
-  my ($self, $scanstate, $host, $type) = @_;
+  my ($self, $scanner, $host, $type) = @_;
 
   return $self->{main}->{resolver}->bgsend($host, $type, undef, sub {
         my $pkt = shift;
         my $id = shift;
-        $scanstate->{scanner}->{async}->set_response_packet($id, $pkt);
+        $scanner->{async}->set_response_packet($id, $pkt);
       });
 }