You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/05/26 06:27:34 UTC

svn commit: r1901270 - in /spamassassin/trunk: lib/Mail/SpamAssassin/Plugin/HashBL.pm t/data/spam/hashbl t/hashbl.t

Author: hege
Date: Thu May 26 06:27:34 2022
New Revision: 1901270

URL: http://svn.apache.org/viewvc?rev=1901270&view=rev
Log:
user/host/domain options for check_hashbl_emails() and some cleaning up

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
    spamassassin/trunk/t/data/spam/hashbl
    spamassassin/trunk/t/hashbl.t

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm?rev=1901270&r1=1901269&r2=1901270&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm Thu May 26 06:27:34 2022
@@ -30,7 +30,7 @@ HashBL - query hashed (and unhashed) DNS
   describe HASHBL_EMAIL Message contains email address found on EBL
   tflags   HASHBL_EMAIL net
 
-  hashbl_acl_freemail gmail.com
+  hashbl_acl_freemail gmail.com # only query gmail.com addresses
   header   HASHBL_OSENDR eval:check_hashbl_emails('rbl.example.invalid/A', 'md5/max=10/shuffle', 'X-Original-Sender', '^127\.', 'freemail')
   describe HASHBL_OSENDR Message contains email address found on HASHBL
   tflags   HASHBL_OSENDR net
@@ -94,6 +94,9 @@ Additional supported OPTS:
   notag    strip username tags from email
   nouri    ignore emails inside uris
   noquote  ignore emails inside < > or possible quotings
+  user     query userpart of email only
+  host     query hostpart of email only
+  domain   query domain of email only (hostpart+trim_domain)
 
 Default HEADERS: ALLFROM/Reply-To/body
 
@@ -424,18 +427,6 @@ sub _get_emails {
       my ($domain) = ($email =~ /.*\@(.+)/);
       next unless defined $domain;
       next if defined $acl && $acl ne 'all' && !$self->{hashbl_acl}{$acl}{$domain};
-      # Don't check uridnsbl_skip_domains when explicit acl is used
-      if (!defined $acl) {
-        if (exists $conf->{uridnsbl_skip_domains}->{lc $domain}) {
-          dbg("query skipped, uridnsbl_skip_domains: $email");
-          next;
-        }
-        my $dom = $pms->{main}->{registryboundaries}->trim_domain($domain);
-        if (exists $conf->{uridnsbl_skip_domains}->{lc $dom}) {
-          dbg("query skipped, uridnsbl_skip_domains: $email");
-          next;
-        }
-      }
       push @emails, $email;
     }
   }
@@ -455,7 +446,7 @@ sub _parse_emails {
     return $pms->{hashbl_email_cache}{$hdr} = \@emails;
   }
 
-  if (!defined $pms->{hashbl_welcomelist}) {
+  if (!exists $pms->{hashbl_welcomelist}) {
     %{$pms->{hashbl_welcomelist}} = map { lc($_) => 1 }
         ( $pms->get("X-Original-To:addr"),
           $pms->get("Apparently-To:addr"),
@@ -510,6 +501,7 @@ sub check_hashbl_emails {
   return 0 if !$self->{hashbl_available};
   return 0 if !$pms->is_dns_available();
 
+  my $conf = $pms->{conf};
   my $rulename = $pms->get_current_eval_rule_name();
 
   if (!defined $list) {
@@ -548,23 +540,50 @@ sub check_hashbl_emails {
   my %seen;
   foreach my $email (@$emails) {
     next if $seen{$email}++;
-    next if index($email, '@') == -1;
-    if ($email =~ $pms->{conf}->{hashbl_email_welcomelist}
-        || defined $pms->{hashbl_welcomelist}{$email}) {
-      dbg("Address welcomelisted: $email");
+    if (exists $pms->{hashbl_welcomelist}{$email} ||
+        $email =~ $conf->{hashbl_email_welcomelist})
+    {
+      dbg("query skipped, address welcomelisted: $email");
       next;
     }
+    my ($username, $domain) = ($email =~ /(.*)\@(.*)/);
+    # Don't check uridnsbl_skip_domains when explicit acl is used
+    if (!defined $acl) {
+      if (exists $conf->{uridnsbl_skip_domains}->{lc $domain}) {
+        dbg("query skipped, uridnsbl_skip_domains: $email");
+        next;
+      }
+      my $dom = $pms->{main}->{registryboundaries}->trim_domain($domain);
+      if (exists $conf->{uridnsbl_skip_domains}->{lc $dom}) {
+        dbg("query skipped, uridnsbl_skip_domains: $email");
+        next;
+      }
+    }
     if ($opts->{nodot} || $opts->{notag}) {
-      my ($username, $domain) = ($email =~ /(.*)(\@.*)/);
       $username =~ tr/.//d if $opts->{nodot};
       $username =~ s/\+.*// if $opts->{notag};
-      $email = $username.$domain;
     }
-    push @filtered_emails, $opts->{case} ? $email : lc($email);
+    # Final query assembly
+    my $qmail;
+    if ($opts->{host} || $opts->{domain}) {
+      if ($opts->{domain}) {
+        $domain = $pms->{main}->{registryboundaries}->trim_domain($domain);
+      }
+      $qmail = $domain;
+    } elsif ($opts->{user}) {
+      $qmail = $username;
+    } else {
+      $qmail = $username.'@'.$domain;
+    }
+    $qmail = lc $qmail  if !$opts->{case};
+    push @filtered_emails, $qmail;
   }
 
   return 0 unless @filtered_emails;
 
+  # Unique
+  @filtered_emails = do { my %seen; grep { !$seen{$_}++ } @filtered_emails; };
+
   # Randomize order
   if ($opts->{shuffle}) {
     Mail::SpamAssassin::Util::fisher_yates_shuffle(\@filtered_emails);
@@ -645,6 +664,9 @@ URI:
 
   return 0 unless @filtered_uris;
 
+  # Unique
+  @filtered_uris = do { my %seen; grep { !$seen{$_}++ } @filtered_uris; };
+
   # Randomize order
   if ($opts->{shuffle}) {
     Mail::SpamAssassin::Util::fisher_yates_shuffle(\@filtered_uris);
@@ -731,6 +753,9 @@ sub check_hashbl_bodyre {
     dbg("$rulename: matches found: '".join("', '", @matches)."'");
   }
 
+  # Unique
+  @matches = do { my %seen; grep { !$seen{$_}++ } @matches; };
+
   # Randomize order
   if ($opts->{shuffle}) {
     Mail::SpamAssassin::Util::fisher_yates_shuffle(\@matches);
@@ -1050,5 +1075,6 @@ sub has_hashbl_email_whitelist { 1 }
 sub has_hashbl_tag { 1 }
 sub has_hashbl_sha256 { 1 }
 sub has_hashbl_attachments { 1 }
+sub has_hashbl_email_domain { 1 } # user/host/domain option for emails
 
 1;

Modified: spamassassin/trunk/t/data/spam/hashbl
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/data/spam/hashbl?rev=1901270&r1=1901269&r2=1901270&view=diff
==============================================================================
--- spamassassin/trunk/t/data/spam/hashbl (original)
+++ spamassassin/trunk/t/data/spam/hashbl Thu May 26 06:27:34 2022
@@ -35,6 +35,8 @@ btc 1JaSs2bTZYVbj6jaqZ5Mjfs8gSLY9vYCrK
 
 uridnsbl_skip_domain  https://sub.trusted.com/  email@trusted.com
 
+email host/domain userpart@host.domain.com
+
 --ETDFsshmzrOmOVdZ
 Content-Type: application/octet-stream
 Content-Disposition: attachment; filename="macro.xlsm"

Modified: spamassassin/trunk/t/hashbl.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/hashbl.t?rev=1901270&r1=1901269&r2=1901270&view=diff
==============================================================================
--- spamassassin/trunk/t/hashbl.t (original)
+++ spamassassin/trunk/t/hashbl.t Thu May 26 06:27:34 2022
@@ -33,6 +33,7 @@ cb565607a98fbdf1be52cdb86466ab34244bd6fc
 bc9f1b35acd338b92b0659cc2111e6b661a8b2bc.hashbltest1.spamassassin.org
 62e12fbe4b32adc2e87147d74590372b461f35f6.hashbltest1.spamassassin.org
 96b802967118135ef048c2bc860e7b0deb7d2333.hashbltest1.spamassassin.org
+1675677ba3d539bdfb0ae8940bf7e6c836f3ad17.hashbltest1.spamassassin.org
 170d83ef2dc9c2de0e65ce4461a3a375.hashbltest2.spamassassin.org
 cc205dd956d568ff8524d7fc42868500e4d7d162.hashbltest3.spamassassin.org
 jykf2a5v6asavfel3stymlmieh4e66jeroxuw52mc5xhdylnyb7a.hashbltest3.spamassassin.org
@@ -40,6 +41,9 @@ jykf2a5v6asavfel3stymlmieh4e66jeroxuw52m
 5c6205960a65b1f9078f0e12dcac970aab0015eb.hashbltest4.spamassassin.org
 1234567890.hashbltest5.spamassassin.org
 w3hcrlct6yshq5vq6gjv2hf3pzk3jvsk6ilj5iaks4qwewudrr6q.hashbltest6.spamassassin.org
+userpart.hashbltest7.spamassassin.org
+host.domain.com.hashbltest7.spamassassin.org
+domain.com.hashbltest7.spamassassin.org
 );
 
 sub check_queries {
@@ -51,7 +55,8 @@ sub check_queries {
   }
   while (<WL>) {
     my $line = $_;
-    while ($line =~ /\b(\w+\.hashbltest\d\.spamassassin\.org)\b/g) {
+    print STDERR $line if $line =~ /warn:/;
+    while ($line =~ m,([^\s/]+\.hashbltest\d\.spamassassin\.org)\b,g) {
       my $query = $1;
       $found{$query}++;
       if (!grep { $query eq $_ } @valid_queries) {
@@ -101,6 +106,12 @@ tstlocalrules(q{
   describe X_HASHBL_ATT Message contains attachment found on attbl
   tflags   X_HASHBL_ATT net
 
+  # email user/host/domain
+  hashbl_acl_domacl host.domain.com
+  header __X_HASHBL_UHD1 eval:check_hashbl_emails('hashbltest7.spamassassin.org', 'raw/user', 'body', '^', 'domacl')
+  header __X_HASHBL_UHD2 eval:check_hashbl_emails('hashbltest7.spamassassin.org', 'raw/host', 'body', '^', 'domacl')
+  header __X_HASHBL_UHD3 eval:check_hashbl_emails('hashbltest7.spamassassin.org', 'raw/domain', 'body', '^', 'domacl')
+
   # Bug 7897 - test that meta rules depending on net rules hit
   meta META_HASHBL_EMAIL X_HASHBL_EMAIL
   # It also needs to hit even if priority is lower than dnsbl (-100)