You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by gb...@apache.org on 2019/04/03 18:18:17 UTC

svn commit: r1856894 - in /spamassassin: branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm

Author: gbechis
Date: Wed Apr  3 18:18:17 2019
New Revision: 1856894

URL: http://svn.apache.org/viewvc?rev=1856894&view=rev
Log:
Add check_hashbl_emails from trunk
Add the possibility to specify an acl to be able
to check only some domains against an hashbl rbl

Modified:
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm?rev=1856894&r1=1856893&r2=1856894&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm Wed Apr  3 18:18:17 2019
@@ -1,6 +1,3 @@
-# Author: Steve Freegard <st...@fsl.com>
-# Copyright 2016 Steve Freegard
-#
 # <@LICENSE>
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -21,32 +18,78 @@
 
 =head1 NAME
 
-HashBL - seearch email addresses in HashBL blocklists
+HashBL - query hashed (and unhashed) DNS blocklists
 
 =head1 SYNOPSIS
 
   loadplugin Mail::SpamAssassin::Plugin::HashBL
-  header   HASHBL_EMAIL       eval:check_hashbl_emails('ebl.msbl.org')
-  describe HASHBL_EMAIL       Message contains email address found on EBL
+
+  header   HASHBL_EMAIL eval:check_hashbl_emails('ebl.msbl.org')
+  describe HASHBL_EMAIL Message contains email address found on EBL
+
+  hashbl_acl_freemail gmail.com
+  header  HASHBL_OSENDR  eval:check_hashbl_emails('rbl.example.com/A', 'md5/max=10/shuffle', 'X-Original-Sender', '^127\.', 'freemail')
+  describe HASHBL_OSENDR Message contains email address found on HASHBL
+  tflags  HASHBL_OSENDR  net
 
 =head1 DESCRIPTION
 
-The Email Blocklist (EBL) contains email addresses used to receive responses to spam emails.
-These email addresses are sometimes called contact email addresses or 
-drop boxes.
-The initial target of this blocklist was "Nigerian" 419 Advance Fee Fraud spam. As time passed and more types of spam that used drop boxes was identified, 
-these drop boxes also were listed.
-The EBL now lists significant numbers of drop boxes used in spam sent 
-by Chinese manufacturers of high-tech and light industrial products, 
-SEO/web development companies, direct spam services, list sellers, and a number
-of fraudulent or outright illegal products sold by botnets.
+This plugin support multiple types of hashed or unhashed DNS blocklists.
+
+OPTS refers to multiple generic options:
+
+  raw      do not hash data, query as is
+  md5      hash query with MD5
+  sha1     hash query with SHA1
+  sha256   hash query with SHA256
+  case     keep case before hashing, default is to lowercase
+  max=x	   maximum number of queries
+  shuffle  if max exceeded, random shuffle queries before truncating to limit
+
+Multiple options can be separated with slash or other non-word character. 
+If OPTS is empty ('') or missing, default is used.
+
+HEADERS referes to slash separated list of Headers to process:
+
+  ALL           all headers
+  ALLFROM       all From headers as returned by $pms->all_from_addrs()
+  EnvelopeFrom  message envelope from (Return-Path etc)
+  HeaderName    any header as used with $pms->get()
+
+if HEADERS is empty ('') or missing, default is used.
+
+=over 4
+
+=item header RULE check_hashbl_emails('bl.example.com/A', 'OPTS', 'HEADERS/body', '^127\.')
+
+Check email addresses from DNS list, "body" can be specified along with
+headers to search body for emails.  Optional subtest regexp to match DNS
+answer.  Note that eval rule type must always be "header".
+
+DNS query type can be appended to list with /A (default) or /TXT.
+
+Additional supported OPTS:
+
+  nodot    strip username dots from email
+  notag    strip username tags from email
+  nouri    ignore emails inside uris
+  noquote  ignore emails inside < > or possible quotings
+
+Default OPTS: sha1/notag/noquote/max=10/shuffle
+
+Default HEADERS: ALLFROM/Reply-To/body
+
+For existing public email blacklist, see: http://msbl.org/ebl.html
+
+  header HASHBL_EBL check_hashbl_emails('ebl.msbl.org')
+  priority HASHBL_EBL -100 # required for async query
 
 =cut
 
 package Mail::SpamAssassin::Plugin::HashBL;
 use strict;
 use warnings;
-my $VERSION = 0.001;
+my $VERSION = 0.90;
 
 use Mail::SpamAssassin::Plugin;
 use Mail::SpamAssassin::PerMsgStatus;
@@ -60,24 +103,84 @@ use vars qw(@ISA $email_whitelist $skip_
 sub dbg { Mail::SpamAssassin::Plugin::dbg ("HashBL: @_"); }
 
 sub new {
-    my ($class, $mailsa) = @_;
+  my ($class, $mailsa) = @_;
+
+  $class = ref($class) || $class;
+  my $self = $class->SUPER::new($mailsa);
+  bless ($self, $class);
+
+  # are network tests enabled?
+  if ($mailsa->{local_tests_only}) {
+    $self->{hashbl_available} = 0;
+    dbg("local tests only, disabling HashBL");
+  } else {
+    $self->{hashbl_available} = 1;
+  }
+
+  $self->register_eval_rule("check_hashbl_emails");
+  $self->set_config($mailsa->{conf});
+
+  return $self;
+}
+
+sub set_config {
+  my($self, $conf) = @_;
+  my @cmds = ();
 
-    $class = ref($class) || $class;
-    my $self = $class->SUPER::new($mailsa);
-    bless ($self, $class);
+  $conf->{parser}->register_commands(\@cmds);
+}
+
+sub _parse_args {
+    my ($self, $acl) = @_;
 
-    # are network tests enabled?
-    if ($mailsa->{local_tests_only}) {
-      $self->{hashbl_available} = 0;
-      dbg("local tests only, disabling HashBL");
-    } else {
-      $self->{hashbl_available} = 1;
+    if (not defined $acl) {
+      return ();
+    }
+    $acl =~ s/\s+//g;
+    if ($acl !~ /^[a-z0-9]{1,32}$/) {
+        warn("invalid acl name: $acl");
+        return ();
+    }
+    if ($acl eq 'all') {
+        return ();
     }
+    if (defined $self->{hashbl_acl}{$acl}) {
+        warn("no such acl defined: $acl");
+        return ();
+    }
+}
 
-    $self->set_config($mailsa->{conf});
-    $self->register_eval_rule("check_hashbl_emails");
+sub parse_config {
+    my ($self, $opt) = @_;
 
-    return $self;
+    if ($opt->{key} =~ /^hashbl_acl_([a-z0-9]{1,32})$/i) {
+        $self->inhibit_further_callbacks();
+        return 1 unless $self->{hashbl_available};
+
+        my $acl = lc($1);
+        my @opts = split(/\s+/, $opt->{value});
+        foreach my $tmp (@opts)
+        {
+            if ($tmp =~ /^(\!)?(\S+)$/i) {
+                my $neg = $1;
+                my $value = lc($2);
+
+                if (defined $neg) {
+                    $self->{hashbl_acl}{$acl}{$value} = 0;
+                } else {
+                    next if $acl eq 'all';
+                    # exclusions overrides
+                    if ( not defined $self->{hashbl_acl}{$acl}{$value} ) {
+                      $self->{hashbl_acl}{$acl}{$value} = 1
+                    }
+                }
+            } else {
+                warn("invalid acl: $tmp");
+            }
+        }
+        return 1;
+    }
+    return 0;
 }
 
 sub finish_parsing_end {
@@ -99,187 +202,257 @@ sub _init_email_regex {
 
     # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
     # full email regex v0.02
-    $self->{email_regex} = qr/
-      (?=.{0,64}\@)				# limit userpart to 64 chars (and speed up searching?)
-      (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-])	# start boundary
-      (						# capture email
-      [a-z0-9!#\$%&'*+\/=?^_`{|}~-]+		# no dot in beginning
-      (?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)*	# no consecutive dots, no ending dot
-      \@
-      (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
-      $self->{main}->{registryboundaries}->{valid_tlds_re}	# ends with valid tld
-      )
-    /xi;
-}
+  $self->{email_re} = qr/
+    (?=.{0,64}\@)			# limit userpart to 64 chars (and speed up searching?)
+    (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-])	# start boundary
+    (					# capture email
+    [a-z0-9!#\$%&'*+\/=?^_`{|}~-]+	# no dot in beginning
+    (?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)* # no consecutive dots, no ending dot
+    \@
+    (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
+    $self->{main}->{registryboundaries}->{valid_tlds_re} # ends with valid tld
+    )
+  /xi;
+
+  # default email whitelist
+  $self->{email_whitelist} = qr/
+    ^(?:
+        abuse|support|sales|info|helpdesk|contact|kontakt
+      | (?:post|host|domain)master
+      | undisclosed.*                     # yahoo.com etc(?)
+      | request-[a-f0-9]{16}              # live.com
+      | bounced?-                         # yahoo.com etc
+      | [a-f0-9]{8}(?:\.[a-f0-9]{8}|-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) # gmail msgids?
+      | .+=.+=.+                          # gmail forward
+    )\@
+  /xi;
+}
+
+sub _get_emails {
+  my ($self, $pms, $opts, $from, $acl) = @_;
+
+  my @emails; # keep find order
+  my %seen;
+  my @tmp_email;
+  my $domain;
+
+  foreach my $hdr (split(/\//, $from)) {
+    my $parsed_emails = $self->_parse_emails($pms, $opts, $hdr);
+    foreach (@$parsed_emails) {
+      next if exists $seen{$_};
+      my @tmp_email = split('@', $_);
+      my $domain = $tmp_email[1];
+      if (defined($acl) and ($acl ne "all")) {
+        if (defined($self->{hashbl_acl}{$acl}{$domain}) and ($self->{hashbl_acl}{$acl}{$domain} eq 1)) {
+          push @emails, $_;
+          $seen{$_} = 1;
+        }
+      } else {
+        push @emails, $_;
+        $seen{$_} = 1;
+      }
+    }
+  }
 
-sub set_config {
-    my ($self, $conf) = @_;
-    my @cmds;
+  return \@emails;
 }
 
-sub parse_config {
-    my ($self, $opts) = @_;
-    return 0;
-}
+sub _parse_emails {
+  my ($self, $pms, $opts, $hdr) = @_;
 
-sub _parse_headers {
-    my ($self, $pms) = @_;
+  if (exists $pms->{hashbl_email_cache}{$hdr}) {
+    return $pms->{hashbl_email_cache}{$hdr};
+  }
 
-    if (not defined $pms->{hashbl_email_cache}) {
-        %{$pms->{hashbl_email_cache}{'headers'}} = ();
-    }
+  if ($hdr eq 'ALLFROM') {
+    my @emails = $pms->all_from_addrs();
+    return $pms->{hashbl_email_cache}{$hdr} = \@emails;
+  }
 
-    my @headers = ('EnvelopeFrom', 'Sender', 'From', 'Reply-To');
+  if (not defined $pms->{hashbl_whitelist}) {
+    %{$pms->{hashbl_whitelist}} = map { lc($_) => 1 }
+        ( $pms->get("X-Original-To:addr"),
+          $pms->get("Apparently-To:addr"),
+          $pms->get("Delivered-To:addr"),
+          $pms->get("Envelope-To:addr"),
+        );
+    if ( defined $pms->{hashbl_whitelist}{''} ) {
+      delete $pms->{hashbl_whitelist}{''};
+    }
+  }
 
-    foreach my $header (@headers) {
-        my $email = $pms->get($header . ':addr');
-        if ($email) {
-            dbg("Found email $email in header $header");
-            $pms->{hashbl_email_cache}{'headers'}{$email} = 1;
+  my $str = '';
+  if ($hdr eq 'ALL') {
+    $str = join("\n", $pms->get('ALL'));
+  } elsif ($hdr eq 'body') {
+    # get all <a href="mailto:", since they don't show up on stripped_body
+    my $uris = $pms->get_uri_detail_list();
+    while (my($uri, $info) = each %{$uris}) {
+      if (defined $info->{types}->{a} && !defined $info->{types}->{parsed}) {
+        if ($uri =~ /^mailto:(.+)/i) {
+          $str .= "$1\n";
         }
+      }
     }
+    my $body = join('', $pms->get_decoded_stripped_body_text_array());
+    if ($opts =~ /\bnouri\b/) {
+      # strip urls with possible emails inside
+      $body =~ s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
+    }
+    if ($opts =~ /\bnoquote\b/) {
+      # strip emails contained in <>, not mailto:
+      # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
+      $body =~ s#<?(?<!mailto:)$self->{email_re}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
+    }
+    $str .= $body;
+  } else {
+    $str .= join("\n", $pms->get($hdr));
+  }
+
+  my @emails; # keep find order
+  my %seen;
 
-    return 1;
+  while ($str =~ /($self->{email_re})/g) {
+    next if exists $seen{$1};
+    push @emails, $1;
+  }
+
+  return $pms->{hashbl_email_cache}{$hdr} = \@emails;
 }
 
-sub _parse_body {
-    my ($self, $pms) = @_;
+sub check_hashbl_emails {
+  my ($self, $pms, $list, $opts, $from, $subtest, $acl) = @_;
 
-    # Parse body
-    if (not defined $pms->{hashbl_email_cache}) {
-        %{$pms->{hashbl_email_cache}{'body'}} = ();
-    }
+  return 0 if !$self->{hashbl_available};
+  return 0 if !$self->{email_re};
 
-    my %seen;
-    my @body_emails;
-    # get all <a href="mailto:", since they don't show up on stripped_body
-    my $parsed = $pms->get_uri_detail_list();
-    while (my($uri, $info) = each %{$parsed}) {
-        if (defined $info->{types}->{a} and not defined $info->{types}->{parsed}) {
-            if ($uri =~ /^(?:(?i)mailto):$self->{email_regex}/) {
-                my $email = lc($1);
-                push(@body_emails, $email) unless defined $seen{$email};
-                $seen{$email} = 1;
-                last if scalar @body_emails >= 20; # sanity
-            }
-        }
-    }
-    # scan stripped normalized body
-    # have to do this way since get_uri_detail_list doesn't know what mails are inside <>
-    my $body = $pms->get_decoded_stripped_body_text_array();
-    BODY: foreach (@$body) {
-        # strip urls with possible emails inside
-        s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
-        # strip emails contained in <>, not mailto:
-        # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
-        s#<?(?<!mailto:)$self->{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
-        while (/$self->{email_regex}/g) {
-            my $email = lc($1);
-            push(@body_emails, $email) unless defined $seen{$email};
-            $seen{$email} = 1;
-            last BODY if scalar @body_emails >= 40; # sanity
-        }
-    }
-    foreach my $email (@body_emails) {
-        dbg("Found email $email in body");
-        $pms->{hashbl_email_cache}{'body'}{$email} = 1;
-    }
-    
-    return 1;
-}
+  my $rulename = $pms->get_current_eval_rule_name();
 
-sub _got_hit {
-    my ($self, $pms, $rulename, $email) = @_;
+  if (!defined $list) {
+    info("HashBL: $rulename blocklist argument missing");
+    return 0;
+  }
 
-    $email =~ s/\@/[at]/g;
-    $pms->test_log($email);
-    $pms->got_hit($rulename, "", ruletype => 'eval');
-}
+  if ($subtest && !eval { $subtest = qr/$subtest/ }) {
+    info("HashBL: $rulename invalid subtest regex: $@");
+    return 0;
+  }
 
-sub _submit_email_query {
-    my ($self, $pms, $list, $type, $email) = @_;
-    my $rulename = $pms->get_current_eval_rule_name();
-    my ($hash, $lookup, $key);
-    if (uc($type) eq 'SHA1') {
-        $hash = sha1_hex($email);
-    }
-    elsif (uc($type) eq 'MD5') {
-        $hash = md5_hex($email);
-    }
-    $lookup = "$hash.$list.";
-    my $obj = { email => $email };
-    dbg("list: $list, type: $type, email: $email, hash: $hash, lookup: $lookup");
-    $key = "HASHBL_EMAIL:$lookup";
-    my $ent = {
-        key => $key,
-        zone => $list,
-        obj => $obj,
-        type => 'HASHBL',
-        rulename => $rulename,
-    };
+  # Defaults
+  $opts = 'sha1/notag/noquote/max=10/shuffle' if !$opts;
 
-    $ent = $pms->{async}->bgsend_and_start_lookup($lookup, 'A', undef, $ent, sub {
-        my ($ent2, $pkt) = @_;
-        $self->_finish_email_lookup($pms, $ent2, $pkt);
-    }, master_deadline => $pms->{master_deadline} );
+  $from = 'ALLFROM/Reply-To/body' if !$from;
 
-    return $ent;   
-}
+  # Find all emails
+  my $emails = $self->_get_emails($pms, $opts, $from, $acl);
+  if (!@$emails) {
+    dbg("$rulename: no emails found ($from)");
+    return 0;
+  } else {
+    dbg("$rulename: raw emails found: ".join(', ', @$emails));
+  }
 
-sub _finish_email_lookup {
-  my ($self, $pms, $ent, $pkt) = @_;
+  # Filter list
+  my $keep_case = $opts =~ /\bcase\b/i;
+  my $nodot = $opts =~ /\bnodot\b/i;
+  my $notag = $opts =~ /\bnotag\b/i;
+  my @filtered_emails; # keep order
+  my %seen;
+  foreach my $email (@$emails) {
+    next if exists $seen{$email};
+    if (($email =~ $self->{email_whitelist}) or defined ($pms->{hashbl_whitelist}{$email})) {
+      dbg("Address whitelisted: $email");
+      next;
+    }
+    if ($nodot || $notag) {
+      my ($username, $domain) = ($email =~ /(.*)(\@.*)/);
+      $username =~ tr/.//d if $nodot;
+      $username =~ s/\+.*// if $notag;
+      $email = $username.$domain;
+    }
+    push @filtered_emails, $keep_case ? $email : lc($email);
+    $seen{$email} = 1;
+  }
 
-  if (!$pkt) {
-      # $pkt will be undef if the DNS query was aborted (e.g. timed out)
-      dbg("_finish_email_lookup aborted: ",
-          $ent->{rulename}, $ent->{key});
-      return;
+  # Randomize order
+  if ($opts =~ /\bshuffle\b/) {
+    Mail::SpamAssassin::Util::fisher_yates_shuffle(\@filtered_emails);
   }
 
-  my $email = $ent->{obj}->{email};
+  # Truncate list
+  my $max = $opts =~ /\bmax=(\d+)\b/ ? $1 : 10;
+  $#filtered_emails = $max if scalar @filtered_emails > $max;
 
-  dbg("_finish_email_lookup: ", $ent->{rulename}, $ent->{key}, $email);
- 
-  my @answer = $pkt->answer;
-  foreach my $rr (@answer) {
-      if ($rr->address =~ /^127\./) {
-          $self->_got_hit($pms, $ent->{rulename}, $email);
-          $pms->register_async_rule_finish($ent->{rulename});
-      }
+  foreach my $email (@filtered_emails) {
+    $self->_submit_query($pms, $rulename, $email, $list, $opts, $subtest);
   }
+
+  return 0;
 }
 
-sub check_hashbl_emails {
-    my ($self, $pms, $list, $type) = @_;
+sub _hash {
+  my ($self, $opts, $value) = @_;
 
-    return 0 unless $self->{hashbl_available};
+  my $hashtype = $opts =~ /\b(raw|sha1|sha256|md5)\b/i ? lc($1) : 'sha1';
+  if ($hashtype eq 'sha1') {
+    return sha1_hex($value);
+  } elsif ($hashtype eq 'sha256') {
+    return sha256_hex($value);
+  } elsif ($hashtype eq 'md5') {
+    return md5_hex($value);
+  } else {
+    return $value;
+  }
+}
 
-    my $rulename = $pms->get_current_eval_rule_name();
+sub _submit_query {
+  my ($self, $pms, $rulename, $value, $list, $opts, $subtest) = @_;
 
-    # First we lookup all unique email addresses found in the headers
-    return 0 unless $self->_parse_headers($pms);
-    foreach my $email (keys %{$pms->{hashbl_email_cache}{'headers'}}) {
-        # Remove this from the body hash
-        delete $pms->{hashbl_email_cache}{'body'}{$email};
-        dbg("HEADER: $email");
-        $self->_submit_email_query($pms, $list, (($type) ? $type : 'SHA1'), $email);
-    }
+  my $hash = $self->_hash($opts, $value);
+  dbg("querying $value ($hash) from $list");
 
-    # Check any e-mail addresses found in the message body
-    return 0 unless $self->_parse_body($pms);
+  my $type = $list =~ s,/(A|TXT)$,,i ? uc($1) : 'A';
+  my $lookup = "$hash.$list";
 
-    my (@emails) = keys %{$pms->{hashbl_email_cache}{'body'}};
+  my $key = "HASHBL_EMAIL:$lookup";
+  my $ent = {
+    key => $key,
+    zone => $list,
+    rulename => $rulename,
+    type => "HASHBL",
+    hash => $hash,
+    value => $value,
+    subtest => $subtest,
+  };
+  $pms->{async}->bgsend_and_start_lookup($lookup, $type, undef, $ent,
+    sub { my ($ent, $pkt) = @_; $self->_finish_query($pms, $ent, $pkt); },
+    master_deadline => $pms->{master_deadline}
+  );
+}
 
-    # Randomize order and truncate the array to 10 items maximum
-    Mail::SpamAssassin::Util::fisher_yates_shuffle(\@emails);
-    $#emails = 9 if (scalar @emails > 10);
+sub _finish_query {
+  my ($self, $pms, $ent, $pkt) = @_;
 
-    foreach my $email (@emails) {
-        dbg("BODY: $email");
-        $self->_submit_email_query($pms, $list, (($type) ? $type : 'SHA1'), $email);
-    }
+  if (!$pkt) {
+    # $pkt will be undef if the DNS query was aborted (e.g. timed out)
+    dbg("lookup was aborted: $ent->{rulename} $ent->{key}");
+    return;
+  }
 
-    return 0;
+  my $dnsmatch = $ent->{subtest} ? $ent->{subtest} : qr/^127\./;
+  my @answer = $pkt->answer;
+  foreach my $rr (@answer) {
+    if ($rr->address =~ $dnsmatch) {
+      dbg("$ent->{rulename}: $ent->{zone} hit '$ent->{value}'");
+      $ent->{value} =~ s/\@/[at]/g;
+      $pms->test_log($ent->{value});
+      $pms->got_hit($ent->{rulename}, '', ruletype => 'eval');
+      return;
+    }
+  }
 }
 
+# Version features
+sub has_hashbl_ignore { 1 }
+
 1;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm?rev=1856894&r1=1856893&r2=1856894&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm Wed Apr  3 18:18:17 2019
@@ -25,7 +25,11 @@ HashBL - query hashed (and unhashed) DNS
 
   header   HASHBL_EMAIL eval:check_hashbl_emails('ebl.msbl.org')
   describe HASHBL_EMAIL Message contains email address found on EBL
-  priority HASHBL_EMAIL -100 # required priority to launch async lookups
+
+  hashbl_acl_freemail gmail.com
+  header  HASHBL_OSENDR  eval:check_hashbl_emails('rbl.example.com/A', 'md5/max=10/shuffle', 'X-Original-Sender', '^127\.', 'freemail')
+  describe HASHBL_OSENDR Message contains email address found on HASHBL
+  tflags  HASHBL_OSENDR  net
 
   body     HASHBL_BTC eval:check_hashbl_bodyre('btcbl.foo.bar', 'sha1/max=10/shuffle', '\b([13][a-km-zA-HJ-NP-Z1-9]{25,34})\b')
   describe HASHBL_BTC Message contains BTC address found on BTCBL
@@ -89,13 +93,6 @@ Search body for matching regexp and quer
 have a single capture ( ) for the string ($1).  Optional subtest regexp to
 match DNS answer.  Note that eval rule type must be "body" or "rawbody".
 
-Default OPTS: sha1/max=10/shuffle
-
-=item hashbl_ignore string string2 ...
-
-Ignore (do not query) specified emails or captured strings.
-Both raw and hashed values are checked.
-
 =back
 
 =cut
@@ -163,6 +160,59 @@ sub set_config {
   $conf->{parser}->register_commands(\@cmds);
 }
 
+sub _parse_args {
+    my ($self, $acl) = @_;
+
+    if (not defined $acl) {
+      return ();
+    }
+    $acl =~ s/\s+//g;
+    if ($acl !~ /^[a-z0-9]{1,32}$/) {
+        warn("invalid acl name: $acl");
+        return ();
+    }
+    if ($acl eq 'all') {
+        return ();
+    }
+    if (defined $self->{hashbl_acl}{$acl}) {
+        warn("no such acl defined: $acl");
+        return ();
+    }
+}
+
+sub parse_config {
+    my ($self, $opt) = @_;
+
+    if ($opt->{key} =~ /^hashbl_acl_([a-z0-9]{1,32})$/i) {
+        $self->inhibit_further_callbacks();
+        return 1 unless $self->{hashbl_available};
+
+        my $acl = lc($1);
+        my @opts = split(/\s+/, $opt->{value});
+        foreach my $tmp (@opts)
+        {
+            if ($tmp =~ /^(\!)?(\S+)$/i) {
+                my $neg = $1;
+                my $value = lc($2);
+
+                if (defined $neg) {
+                    $self->{hashbl_acl}{$acl}{$value} = 0;
+                } else {
+                    next if $acl eq 'all';
+                    # exclusions overrides
+                    if ( not defined $self->{hashbl_acl}{$acl}{$value} ) {
+                      $self->{hashbl_acl}{$acl}{$value} = 1
+                    }
+                }
+            } else {
+                warn("invalid acl: $tmp");
+            }
+        }
+        return 1;
+    }
+    return 0;
+}
+
 sub finish_parsing_end {
   my ($self, $opts) = @_;
 
@@ -193,19 +243,44 @@ sub _init_email_re {
     $self->{main}->{registryboundaries}->{valid_tlds_re} # ends with valid tld
     )
   /xi;
+
+  # default email whitelist
+  $self->{email_whitelist} = qr/
+    ^(?:
+        abuse|support|sales|info|helpdesk|contact|kontakt
+      | (?:post|host|domain)master
+      | undisclosed.*                     # yahoo.com etc(?)
+      | request-[a-f0-9]{16}              # live.com
+      | bounced?-                         # yahoo.com etc
+      | [a-f0-9]{8}(?:\.[a-f0-9]{8}|-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) # gmail msgids?
+      | .+=.+=.+                          # gmail forward
+    )\@
+  /xi;  
 }
 
 sub _get_emails {
-  my ($self, $pms, $opts, $from) = @_;
+  my ($self, $pms, $opts, $from, $acl) = @_;
 
   my @emails; # keep find order
   my %seen;
-  foreach my $hdr (split(/\W+/, $from)) {
+  my @tmp_email;
+  my $domain;
+
+  foreach my $hdr (split(/\//, $from)) {
     my $parsed_emails = $self->_parse_emails($pms, $opts, $hdr);
     foreach (@$parsed_emails) {
       next if exists $seen{$_};
-      push @emails, $_;
-      $seen{$_} = 1;
+      my @tmp_email = split('@', $_);
+      my $domain = $tmp_email[1];
+      if (defined($acl) and ($acl ne "all")) {
+        if (defined($self->{hashbl_acl}{$acl}{$domain}) and ($self->{hashbl_acl}{$acl}{$domain} eq 1)) {
+          push @emails, $_;
+          $seen{$_} = 1;
+        }
+      } else {
+        push @emails, $_;
+        $seen{$_} = 1;
+      }
     }
   }
 
@@ -224,6 +299,18 @@ sub _parse_emails {
     return $pms->{hashbl_email_cache}{$hdr} = \@emails;
   }
 
+  if (not defined $pms->{hashbl_whitelist}) {
+    %{$pms->{hashbl_whitelist}} = map { lc($_) => 1 }
+        ( $pms->get("X-Original-To:addr"),
+          $pms->get("Apparently-To:addr"),
+          $pms->get("Delivered-To:addr"),
+          $pms->get("Envelope-To:addr"),
+        );
+    if ( defined $pms->{hashbl_whitelist}{''} ) {
+      delete $pms->{hashbl_whitelist}{''};
+    }
+  }
+
   my $str = '';
   if ($hdr eq 'ALL') {
     $str = join("\n", $pms->get('ALL'));
@@ -254,6 +341,7 @@ sub _parse_emails {
 
   my @emails; # keep find order
   my %seen;
+
   while ($str =~ /($self->{email_re})/g) {
     next if exists $seen{$1};
     push @emails, $1;
@@ -263,7 +351,7 @@ sub _parse_emails {
 }
 
 sub check_hashbl_emails {
-  my ($self, $pms, $list, $opts, $from, $subtest) = @_;
+  my ($self, $pms, $list, $opts, $from, $subtest, $acl) = @_;
 
   return 0 if !$self->{hashbl_available};
   return 0 if !$self->{email_re};
@@ -286,7 +374,7 @@ sub check_hashbl_emails {
   $from = 'ALLFROM/Reply-To/body' if !$from;
 
   # Find all emails
-  my $emails = $self->_get_emails($pms, $opts, $from);
+  my $emails = $self->_get_emails($pms, $opts, $from, $acl);
   if (!@$emails) {
     dbg("$rulename: no emails found ($from)");
     return 0;
@@ -302,6 +390,10 @@ sub check_hashbl_emails {
   my %seen;
   foreach my $email (@$emails) {
     next if exists $seen{$email};
+    if (($email =~ $self->{email_whitelist}) or defined ($pms->{hashbl_whitelist}{$email})) {
+      dbg("Address whitelisted: $email");
+      next;
+    }
     if ($nodot || $notag) {
       my ($username, $domain) = ($email =~ /(.*)(\@.*)/);
       $username =~ tr/.//d if $nodot;