You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/05/15 16:07:26 UTC

svn commit: r1900929 - in /spamassassin/trunk: MANIFEST lib/Mail/SpamAssassin/Plugin/HashBL.pm t/hashbl.t

Author: hege
Date: Sun May 15 16:07:26 2022
New Revision: 1900929

URL: http://svn.apache.org/viewvc?rev=1900929&view=rev
Log:
- Add options to check_hashbl_tag, ip/ipv4/ipv6/revip/fqdn/tld/trim
- Cleanup HashBL code
- Add basic HashBL tests

Added:
    spamassassin/trunk/t/hashbl.t   (with props)
Modified:
    spamassassin/trunk/MANIFEST
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm

Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/trunk/MANIFEST?rev=1900929&r1=1900928&r2=1900929&view=diff
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Sun May 15 16:07:26 2022
@@ -517,6 +517,7 @@ t/get_headers.t
 t/gtube.t
 t/header.t
 t/header_utf8.t
+t/hashbl.t
 t/html_colors.t
 t/html_obfu.t
 t/html_utf8.t

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm?rev=1900929&r1=1900928&r2=1900929&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm Sun May 15 16:07:26 2022
@@ -142,6 +142,20 @@ DNS query type can be appended to list w
 
 Default OPTS: sha1/max=10/shuffle
 
+Additional supported OPTS:
+
+  ip        only query if value is valid IPv4/IPv6 address
+  ipv4      only query if value is valid IPv4 address
+  ipv6      only query if value is valid IPv6 address
+  revip     reverse IP before query
+  fqdn      only query if value is valid FQDN (is_fqdn_valid)
+  tld       only query if value has valid TLD (is_domain_valid)
+  trim      trim name from hostname to domain (trim_domain)
+
+If both ip/ipv4/ipv6 and fqdn/tld are enabled, only either of them is
+required to match.  Both fqdn and tld are needed for complete FQDN+TLD
+check.
+
 =back
 
 =cut
@@ -157,7 +171,8 @@ use Digest::MD5 qw(md5_hex);
 use Digest::SHA qw(sha1_hex);
 
 use Mail::SpamAssassin::Plugin;
-use Mail::SpamAssassin::Util qw(compile_regexp);
+use Mail::SpamAssassin::Constants qw(:ip);
+use Mail::SpamAssassin::Util qw(compile_regexp is_fqdn_valid reverse_ip_address);
 
 our @ISA = qw(Mail::SpamAssassin::Plugin);
 
@@ -276,37 +291,35 @@ sub set_config {
 }
 
 sub parse_config {
-    my ($self, $opt) = @_;
+  my ($self, $opt) = @_;
 
-    if ($opt->{key} =~ /^hashbl_acl_([a-z0-9]{1,32})$/i) {
-        $self->inhibit_further_callbacks();
-        return 1 unless $self->{hashbl_available};
-
-        my $acl = lc($1);
-        my @opts = split(/\s+/, $opt->{value});
-        foreach my $tmp (@opts)
-        {
-            if ($tmp =~ /^(\!)?(\S+)$/i) {
-                my $neg = $1;
-                my $value = lc($2);
-
-                if (defined $neg) {
-                    $self->{hashbl_acl}{$acl}{$value} = 0;
-                } else {
-                    next if $acl eq 'all';
-                    # exclusions overrides
-                    if ( not defined $self->{hashbl_acl}{$acl}{$value} ) {
-                      $self->{hashbl_acl}{$acl}{$value} = 1
-                    }
-                }
-            } else {
-                warn("invalid acl: $tmp");
-            }
+  if ($opt->{key} =~ /^hashbl_acl_([a-z0-9]{1,32})$/i) {
+    $self->inhibit_further_callbacks();
+    return 1 unless $self->{hashbl_available};
+
+    my $acl = lc($1);
+    my @opts = split(/\s+/, $opt->{value});
+    foreach my $tmp (@opts) {
+      if ($tmp =~ /^(\!)?(\S+)$/i) {
+        my $neg = $1;
+        my $value = lc($2);
+        if (defined $neg) {
+          $self->{hashbl_acl}{$acl}{$value} = 0;
+        } else {
+          next if $acl eq 'all';
+          # exclusions overrides
+          if (!defined $self->{hashbl_acl}{$acl}{$value}) {
+            $self->{hashbl_acl}{$acl}{$value} = 1
+          }
         }
-        return 1;
+      } else {
+        warn("invalid acl: $tmp");
+      }
     }
+    return 1;
+  }
 
-    return 0;
+  return 0;
 }
 
 sub finish_parsing_start {
@@ -339,28 +352,32 @@ sub finish_parsing_end {
   return 0;
 }
 
+sub _parse_opts {
+  my %opts;
+  foreach my $o (split(/\s*\/\s*/, lc $_[0])) {
+    my ($k, $v) = split(/=/, $o);
+    $opts{$k} = defined $v ? $v : 1;
+  }
+  return \%opts;
+}
+
 sub _get_emails {
   my ($self, $pms, $opts, $from, $acl) = @_;
 
   my @emails; # keep find order
   my %seen;
-  my @tmp_email;
-  my $domain;
 
-  foreach my $hdr (split(/\//, $from)) {
+  foreach my $hdr (split(/\s*\/\s*/, $from)) {
     my $parsed_emails = $self->_parse_emails($pms, $opts, $hdr);
     foreach (@$parsed_emails) {
-      next if exists $seen{$_};
-      my @tmp_email = split('@', $_);
-      my $domain = $tmp_email[1];
-      if (defined($acl) and ($acl ne "all") and defined($domain)) {
-        if (defined($self->{hashbl_acl}{$acl}{$domain}) and ($self->{hashbl_acl}{$acl}{$domain} eq 1)) {
+      next if $seen{$_}++;
+      my ($domain) = ($_ =~ /.*\@(.+)/);
+      if (defined $domain && defined $acl && $acl ne 'all') {
+        if ($self->{hashbl_acl}{$acl}{$domain}) {
           push @emails, $_;
-          $seen{$_} = 1;
         }
       } else {
         push @emails, $_;
-        $seen{$_} = 1;
       }
     }
   }
@@ -380,16 +397,14 @@ sub _parse_emails {
     return $pms->{hashbl_email_cache}{$hdr} = \@emails;
   }
 
-  if (not defined $pms->{hashbl_welcomelist}) {
+  if (!defined $pms->{hashbl_welcomelist}) {
     %{$pms->{hashbl_welcomelist}} = map { lc($_) => 1 }
         ( $pms->get("X-Original-To:addr"),
           $pms->get("Apparently-To:addr"),
           $pms->get("Delivered-To:addr"),
           $pms->get("Envelope-To:addr"),
         );
-    if ( defined $pms->{hashbl_welcomelist}{''} ) {
-      delete $pms->{hashbl_welcomelist}{''};
-    }
+    delete $pms->{hashbl_welcomelist}{''};
   }
 
   my $str = '';
@@ -406,11 +421,11 @@ sub _parse_emails {
       }
     }
     my $body = join('', @{$pms->get_decoded_stripped_body_text_array()});
-    if ($opts =~ /\bnouri\b/) {
+    if ($opts->{nouri}) {
       # strip urls with possible emails inside
       $body =~ s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
     }
-    if ($opts =~ /\bnoquote\b/) {
+    if ($opts->{noquote}) {
       # strip emails contained in <>, not mailto:
       # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
       $body =~ s#<?(?<!mailto:)$pms->{conf}->{hashbl_email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
@@ -424,7 +439,7 @@ sub _parse_emails {
   my %seen;
 
   while ($str =~ /($pms->{conf}->{hashbl_email_regex})/g) {
-    next if exists $seen{$1};
+    next if $seen{$1}++;
     push @emails, $1;
   }
 
@@ -453,15 +468,14 @@ sub check_hashbl_emails {
     $subtest = $rec;
   }
 
-  # Defaults
-  $opts = 'sha1/notag/noquote/max=10/shuffle' if !$opts;
-
+  # Parse opts, defaults
+  $opts = _parse_opts($opts || 'sha1/notag/noquote/max=10/shuffle');
   $from = 'ALLFROM/Reply-To/body' if !$from;
 
   # Find all emails
   my $emails = $self->_get_emails($pms, $opts, $from, $acl);
   if (!@$emails) {
-    if(defined $acl) {
+    if (defined $acl) {
       dbg("$rulename: no emails found ($from) on acl $acl");
     } else {
       dbg("$rulename: no emails found ($from)");
@@ -472,38 +486,34 @@ sub check_hashbl_emails {
   }
 
   # Filter list
-  my $keep_case = $opts =~ /\bcase\b/i;
-  my $nodot = $opts =~ /\bnodot\b/i;
-  my $notag = $opts =~ /\bnotag\b/i;
   my @filtered_emails; # keep order
   my %seen;
   foreach my $email (@$emails) {
-    next if exists $seen{$email};
-    next if $email !~ /.*\@.*/;
+    next if $seen{$email}++;
+    next if index($email, '@') == -1;
     if ($email =~ $pms->{conf}->{hashbl_email_welcomelist}
         || defined $pms->{hashbl_welcomelist}{$email}) {
       dbg("Address welcomelisted: $email");
       next;
     }
-    if ($nodot || $notag) {
+    if ($opts->{nodot} || $opts->{notag}) {
       my ($username, $domain) = ($email =~ /(.*)(\@.*)/);
-      $username =~ tr/.//d if $nodot;
-      $username =~ s/\+.*// if $notag;
+      $username =~ tr/.//d if $opts->{nodot};
+      $username =~ s/\+.*// if $opts->{notag};
       $email = $username.$domain;
     }
-    push @filtered_emails, $keep_case ? $email : lc($email);
-    $seen{$email} = 1;
+    push @filtered_emails, $opts->{case} ? $email : lc($email);
   }
 
   return 0 unless @filtered_emails;
 
   # Randomize order
-  if ($opts =~ /\bshuffle\b/) {
+  if ($opts->{shuffle}) {
     Mail::SpamAssassin::Util::fisher_yates_shuffle(\@filtered_emails);
   }
 
   # Truncate list
-  my $max = $opts =~ /\bmax=(\d+)\b/ ? $1 : 10;
+  my $max = $opts->{max} || 10;
   $#filtered_emails = $max-1 if scalar @filtered_emails > $max;
 
   $pms->rule_pending($rulename); # mark async
@@ -537,13 +547,10 @@ sub check_hashbl_uris {
     $subtest = $rec;
   }
 
-  # Defaults
-  $opts = 'sha1/max=10/shuffle' if !$opts;
+  # Parse opts, defaults
+  $opts = _parse_opts($opts || 'sha1/max=10/shuffle');
 
-  # Filter list
-  my $keep_case = $opts =~ /\bcase\b/i;
-
-  if ($opts =~ /raw/) {
+  if ($opts->{raw}) {
     warn "HashBL: $rulename raw option invalid\n";
     return 0;
   }
@@ -555,7 +562,7 @@ sub check_hashbl_uris {
   while (my($uri, $info) = each %{$uris}) {
     # we want to skip mailto: uris
     next if ($uri =~ /^mailto:/i);
-    next if exists $seen{$uri};
+    next if $seen{$uri}++;
 
     # no hosts/domains were found via this uri, so skip
     next unless $info->{hosts};
@@ -563,20 +570,19 @@ sub check_hashbl_uris {
     next unless $info->{types}->{a} || $info->{types}->{parsed};
     foreach my $uri (@{$info->{cleaned}}) {
       # check url
-      push @filtered_uris, $keep_case ? $uri : lc($uri);
+      push @filtered_uris, $opts->{case} ? $uri : lc($uri);
     }
-    $seen{$uri} = 1;
   }
 
   return 0 unless @filtered_uris;
 
   # Randomize order
-  if ($opts =~ /\bshuffle\b/) {
+  if ($opts->{shuffle}) {
     Mail::SpamAssassin::Util::fisher_yates_shuffle(\@filtered_uris);
   }
 
   # Truncate list
-  my $max = $opts =~ /\bmax=(\d+)\b/ ? $1 : 10;
+  my $max = $opts->{max} || 10;
   $#filtered_uris = $max-1 if scalar @filtered_uris > $max;
 
   $pms->rule_pending($rulename); # mark async
@@ -621,22 +627,20 @@ sub check_hashbl_bodyre {
     $subtest = $rec;
   }
 
-  # Defaults
-  $opts = 'sha1/max=10/shuffle' if !$opts;
-
-  my $keep_case = $opts =~ /\bcase\b/i;
+  # Parse opts, defaults
+  $opts = _parse_opts($opts || 'sha1/max=10/shuffle');
 
   # Search body
   my @matches;
   my %seen;
+
   if (ref($bodyref) eq 'ARRAY') {
     # body, rawbody
-    foreach (@$bodyref) {
-      while ($_ =~ /$re/gs) {
+    foreach my $body (@$bodyref) {
+      while ($body =~ /$re/gs) {
         next if !defined $1;
-        my $match = $keep_case ? $1 : lc($1);
-        next if exists $seen{$match};
-        $seen{$match} = 1;
+        my $match = $opts->{case} ? $1 : lc($1);
+        next if $seen{$match}++;
         push @matches, $match;
       }
     }
@@ -644,9 +648,8 @@ sub check_hashbl_bodyre {
     # full
     while ($$bodyref =~ /$re/gs) {
       next if !defined $1;
-      my $match = $keep_case ? $1 : lc($1);
-      next if exists $seen{$match};
-      $seen{$match} = 1;
+      my $match = $opts->{case} ? $1 : lc($1);
+      next if $seen{$match}++;
       push @matches, $match;
     }
   }
@@ -659,12 +662,12 @@ sub check_hashbl_bodyre {
   }
 
   # Randomize order
-  if ($opts =~ /\bshuffle\b/) {
+  if ($opts->{shuffle}) {
     Mail::SpamAssassin::Util::fisher_yates_shuffle(\@matches);
   }
 
   # Truncate list
-  my $max = $opts =~ /\bmax=(\d+)\b/ ? $1 : 10;
+  my $max = $opts->{max} || 10;
   $#matches = $max-1 if scalar @matches > $max;
 
   $pms->rule_pending($rulename); # mark async
@@ -703,6 +706,10 @@ sub check_hashbl_tag {
     $subtest = $rec;
   }
 
+  # Parse opts, defaults
+  $opts = _parse_opts($opts || 'sha1/max=10/shuffle');
+  $opts->{fqdn} = $opts->{tld} = 1  if $opts->{trim};
+
   # Strip possible _ delimiters
   $tag =~ s/^_(.+)_$/$1/;
 
@@ -721,37 +728,69 @@ sub check_hashbl_tag {
 sub _check_hashbl_tag {
   my ($self, $pms, $list, $opts, $tag, $subtest, $rulename) = @_;
 
-  # Defaults
-  $opts = 'sha1/max=10/shuffle' if !$opts;
-
-  # Filter list
-  my $keep_case = $opts =~ /\bcase\b/i;
-
   # Get raw array of tag values, get_tag() returns joined string
   my $valref = $pms->get_tag_raw($tag);
+  my @vals = ref $valref ? @$valref : $valref;
 
-  # Filter empty
-  my @vals = grep { defined $_ && $_ ne '' }
-               (ref $valref ? @$valref : $valref);
+  # Lowercase
+  @vals = map { lc } @vals  if !$opts->{case};
+
+  # Options
+  foreach my $value (@vals) {
+    my $is_ip = $value =~ IS_IP_ADDRESS;
+    if ($opts->{ip}) {
+      if (!$is_ip) {
+        $value = undef;
+        next;
+      }
+    }
+    if ($opts->{ipv4}) {
+      if ($value =~ IS_IPV4_ADDRESS) {
+        $is_ip = 1;
+      } else {
+        $value = undef;
+        next;
+      }
+    }
+    if ($opts->{ipv6}) {
+      if (!$is_ip || $value =~ IS_IPV4_ADDRESS) {
+        $value = undef;
+        next;
+      }
+    }
+    if ($is_ip && $opts->{revip}) {
+      $value = reverse_ip_address($value);
+    }
+    if (!$is_ip) {
+      if ($opts->{fqdn} && !is_fqdn_valid($value)) {
+        $value = undef;
+        next;
+      }
+      if ($opts->{tld} && !$pms->{main}->{registryboundaries}->is_domain_valid($value)) {
+        $value = undef;
+        next;
+      }
+      if ($opts->{trim}) {
+        $value = $pms->{main}->{registryboundaries}->trim_domain($value);
+      }
+    }
+  }
+
+  # Unique (and remove empty)
+  @vals = do { my %seen; grep { defined $_ && !$seen{$_}++ } @vals; };
 
   if (!@vals) {
     $pms->rule_ready($rulename); # mark rule ready for metas
     return;
   }
 
-  # Lowercase
-  @vals = map { lc } @vals  if !$keep_case;
-
-  # Unique
-  @vals = do { my %seen; grep { !$seen{$_}++ } @vals; };
-
   # Randomize order
-  if ($opts =~ /\bshuffle\b/) {
+  if ($opts->{shuffle}) {
     Mail::SpamAssassin::Util::fisher_yates_shuffle(\@vals);
   }
 
   # Truncate list
-  my $max = $opts =~ /\bmax=(\d+)\b/ ? $1 : 10;
+  my $max = $opts->{max} || 10;
   $#vals = $max-1 if scalar @vals > $max;
 
   foreach my $value (@vals) {
@@ -764,11 +803,10 @@ sub _check_hashbl_tag {
 sub _hash {
   my ($self, $opts, $value) = @_;
 
-  my $hashtype = $opts =~ /\b(raw|sha1|md5)\b/i ? lc($1) : 'sha1';
-  if ($hashtype eq 'sha1') {
+  if ($opts->{sha1}) {
     utf8::encode($value) if utf8::is_utf8($value); # sha1_hex expects bytes
     return sha1_hex($value);
-  } elsif ($hashtype eq 'md5') {
+  } elsif ($opts->{md5}) {
     utf8::encode($value) if utf8::is_utf8($value); # md5_hex expects bytes
     return md5_hex($value);
   } else {

Added: spamassassin/trunk/t/hashbl.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/hashbl.t?rev=1900929&view=auto
==============================================================================
--- spamassassin/trunk/t/hashbl.t (added)
+++ spamassassin/trunk/t/hashbl.t Sun May 15 16:07:26 2022
@@ -0,0 +1,102 @@
+#!/usr/bin/perl -T
+
+use lib '.'; use lib 't';
+use SATest; sa_t_init("hashbl");
+
+use Test::More;
+plan skip_all => "Net tests disabled"          unless conf_bool('run_net_tests');
+plan skip_all => "Can't use Net::DNS Safely"   unless can_use_net_dns_safely();
+
+# run many times to catch some random natured failures
+my $iterations = 5;
+plan tests => 10 * $iterations;
+
+# ---------------------------------------------------------------------------
+
+%patterns = (
+ q{ 1.0 X_HASHBL_EMAIL } => '',
+ q{ 1.0 X_HASHBL_OSENDR } => '',
+ q{ 1.0 X_HASHBL_BTC } => '',
+ q{ 1.0 X_HASHBL_URI } => '',
+ q{ 1.0 X_HASHBL_TAG } => '',
+ q{ 1.0 META_HASHBL_EMAIL } => '',
+ q{ 1.0 META_HASHBL_BTC } => '',
+ q{ 1.0 META_HASHBL_URI } => '',
+);
+%anti_patterns = ();
+
+# Check from debug output log that nothing else than these were queried
+@valid_queries = qw(
+cb565607a98fbdf1be52cdb86466ab34244bd6fc.hashbltest1.spamassassin.org
+bc9f1b35acd338b92b0659cc2111e6b661a8b2bc.hashbltest1.spamassassin.org
+62e12fbe4b32adc2e87147d74590372b461f35f6.hashbltest1.spamassassin.org
+96b802967118135ef048c2bc860e7b0deb7d2333.hashbltest1.spamassassin.org
+170d83ef2dc9c2de0e65ce4461a3a375.hashbltest2.spamassassin.org
+cc205dd956d568ff8524d7fc42868500e4d7d162.hashbltest3.spamassassin.org
+6a42acf4133289d595e3875a9d677f810e80b7b4.hashbltest4.spamassassin.org
+5c6205960a65b1f9078f0e12dcac970aab0015eb.hashbltest4.spamassassin.org
+1234567890.hashbltest5.spamassassin.org
+);
+
+sub check_queries {
+  my %invalid;
+  my %found;
+  if (!open(WL, $current_checkfile)) {
+    diag("LOGFILE OPEN FAILED");
+    return 0;
+  }
+  while (<WL>) {
+    my $line = $_;
+    while ($line =~ /\b(\w+\.hashbltest\d\.spamassassin\.org)\b/g) {
+      my $query = $1;
+      $found{$query}++;
+      if (!grep { $query eq $_ } @valid_queries) {
+        $invalid{$query}++;
+      }
+    }
+  }
+  close WL;
+  unless (keys %found == @valid_queries) {
+    diag("Not all queries launched");
+    return 0;
+  }
+  diag("Invalid query launched: $_") foreach (keys %invalid);
+  return !%invalid;
+}
+
+tstlocalrules(q{
+  rbl_timeout 30
+
+  header   X_HASHBL_EMAIL eval:check_hashbl_emails('hashbltest1.spamassassin.org')
+  tflags   X_HASHBL_EMAIL net
+
+  hashbl_acl_freemail gmail.com
+  header   X_HASHBL_OSENDR eval:check_hashbl_emails('hashbltest2.spamassassin.org/A', 'md5/max=10/shuffle', 'X-Original-Sender', '^127\.', 'freemail')
+  tflags   X_HASHBL_OSENDR net
+
+  body     X_HASHBL_BTC eval:check_hashbl_bodyre('hashbltest3.spamassassin.org', 'sha1/max=10/shuffle', '\b([13][a-km-zA-HJ-NP-Z1-9]{25,34})\b')
+  tflags   X_HASHBL_BTC net
+
+  header   X_HASHBL_URI eval:check_hashbl_uris('hashbltest4.spamassassin.org', 'sha1', '127.0.0.2')
+  tflags   X_HASHBL_URI net
+
+  header   __X_SOME_ID X-Some-ID =~ /^(?<XSOMEID>\d{10,20})$/
+  header   X_HASHBL_TAG eval:check_hashbl_tag('hashbltest5.spamassassin.org/A', 'raw', 'XSOMEID', '^127\.')
+
+  # Bug 7897 - test that meta rules depending on net rules hit
+  meta META_HASHBL_EMAIL X_HASHBL_EMAIL
+  # It also needs to hit even if priority is lower than dnsbl (-100)
+  meta META_HASHBL_BTC X_HASHBL_BTC
+  priority META_HASHBL_BTC -500
+  # Or super high
+  meta META_HASHBL_URI X_HASHBL_URI
+  priority META_HASHBL_URI 2000
+  priority X_HASHBL_URI 2000
+});
+
+for (1 .. $iterations) {
+  ok sarun ("-t -D async,dns,HashBL < data/spam/hashbl 2>&1", \&patterns_run_cb);
+  ok(check_queries());
+  ok_all_patterns();
+}
+

Propchange: spamassassin/trunk/t/hashbl.t
------------------------------------------------------------------------------
    svn:executable = *