You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by gb...@apache.org on 2019/04/03 18:18:17 UTC
svn commit: r1856894 - in /spamassassin:
branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm
trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
Author: gbechis
Date: Wed Apr 3 18:18:17 2019
New Revision: 1856894
URL: http://svn.apache.org/viewvc?rev=1856894&view=rev
Log:
Add check_hashbl_emails from trunk
Add the possibility to specify an acl to be able
to check only some domains against an hashbl rbl
Modified:
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm?rev=1856894&r1=1856893&r2=1856894&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm Wed Apr 3 18:18:17 2019
@@ -1,6 +1,3 @@
-# Author: Steve Freegard <st...@fsl.com>
-# Copyright 2016 Steve Freegard
-#
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
@@ -21,32 +18,78 @@
=head1 NAME
-HashBL - seearch email addresses in HashBL blocklists
+HashBL - query hashed (and unhashed) DNS blocklists
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::HashBL
- header HASHBL_EMAIL eval:check_hashbl_emails('ebl.msbl.org')
- describe HASHBL_EMAIL Message contains email address found on EBL
+
+ header HASHBL_EMAIL eval:check_hashbl_emails('ebl.msbl.org')
+ describe HASHBL_EMAIL Message contains email address found on EBL
+
+ hashbl_acl_freemail gmail.com
+ header HASHBL_OSENDR eval:check_hashbl_emails('rbl.example.com/A', 'md5/max=10/shuffle', 'X-Original-Sender', '^127\.', 'freemail')
+ describe HASHBL_OSENDR Message contains email address found on HASHBL
+ tflags HASHBL_OSENDR net
=head1 DESCRIPTION
-The Email Blocklist (EBL) contains email addresses used to receive responses to spam emails.
-These email addresses are sometimes called contact email addresses or
-drop boxes.
-The initial target of this blocklist was "Nigerian" 419 Advance Fee Fraud spam. As time passed and more types of spam that used drop boxes was identified,
-these drop boxes also were listed.
-The EBL now lists significant numbers of drop boxes used in spam sent
-by Chinese manufacturers of high-tech and light industrial products,
-SEO/web development companies, direct spam services, list sellers, and a number
-of fraudulent or outright illegal products sold by botnets.
+This plugin support multiple types of hashed or unhashed DNS blocklists.
+
+OPTS refers to multiple generic options:
+
+ raw do not hash data, query as is
+ md5 hash query with MD5
+ sha1 hash query with SHA1
+ sha256 hash query with SHA256
+ case keep case before hashing, default is to lowercase
+ max=x maximum number of queries
+ shuffle if max exceeded, random shuffle queries before truncating to limit
+
+Multiple options can be separated with slash or other non-word character.
+If OPTS is empty ('') or missing, default is used.
+
+HEADERS referes to slash separated list of Headers to process:
+
+ ALL all headers
+ ALLFROM all From headers as returned by $pms->all_from_addrs()
+ EnvelopeFrom message envelope from (Return-Path etc)
+ HeaderName any header as used with $pms->get()
+
+if HEADERS is empty ('') or missing, default is used.
+
+=over 4
+
+=item header RULE check_hashbl_emails('bl.example.com/A', 'OPTS', 'HEADERS/body', '^127\.')
+
+Check email addresses from DNS list, "body" can be specified along with
+headers to search body for emails. Optional subtest regexp to match DNS
+answer. Note that eval rule type must always be "header".
+
+DNS query type can be appended to list with /A (default) or /TXT.
+
+Additional supported OPTS:
+
+ nodot strip username dots from email
+ notag strip username tags from email
+ nouri ignore emails inside uris
+ noquote ignore emails inside < > or possible quotings
+
+Default OPTS: sha1/notag/noquote/max=10/shuffle
+
+Default HEADERS: ALLFROM/Reply-To/body
+
+For existing public email blacklist, see: http://msbl.org/ebl.html
+
+ header HASHBL_EBL check_hashbl_emails('ebl.msbl.org')
+ priority HASHBL_EBL -100 # required for async query
=cut
package Mail::SpamAssassin::Plugin::HashBL;
use strict;
use warnings;
-my $VERSION = 0.001;
+my $VERSION = 0.90;
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::PerMsgStatus;
@@ -60,24 +103,84 @@ use vars qw(@ISA $email_whitelist $skip_
sub dbg { Mail::SpamAssassin::Plugin::dbg ("HashBL: @_"); }
sub new {
- my ($class, $mailsa) = @_;
+ my ($class, $mailsa) = @_;
+
+ $class = ref($class) || $class;
+ my $self = $class->SUPER::new($mailsa);
+ bless ($self, $class);
+
+ # are network tests enabled?
+ if ($mailsa->{local_tests_only}) {
+ $self->{hashbl_available} = 0;
+ dbg("local tests only, disabling HashBL");
+ } else {
+ $self->{hashbl_available} = 1;
+ }
+
+ $self->register_eval_rule("check_hashbl_emails");
+ $self->set_config($mailsa->{conf});
+
+ return $self;
+}
+
+sub set_config {
+ my($self, $conf) = @_;
+ my @cmds = ();
- $class = ref($class) || $class;
- my $self = $class->SUPER::new($mailsa);
- bless ($self, $class);
+ $conf->{parser}->register_commands(\@cmds);
+}
+
+sub _parse_args {
+ my ($self, $acl) = @_;
- # are network tests enabled?
- if ($mailsa->{local_tests_only}) {
- $self->{hashbl_available} = 0;
- dbg("local tests only, disabling HashBL");
- } else {
- $self->{hashbl_available} = 1;
+ if (not defined $acl) {
+ return ();
+ }
+ $acl =~ s/\s+//g;
+ if ($acl !~ /^[a-z0-9]{1,32}$/) {
+ warn("invalid acl name: $acl");
+ return ();
+ }
+ if ($acl eq 'all') {
+ return ();
}
+ if (defined $self->{hashbl_acl}{$acl}) {
+ warn("no such acl defined: $acl");
+ return ();
+ }
+}
- $self->set_config($mailsa->{conf});
- $self->register_eval_rule("check_hashbl_emails");
+sub parse_config {
+ my ($self, $opt) = @_;
- return $self;
+ if ($opt->{key} =~ /^hashbl_acl_([a-z0-9]{1,32})$/i) {
+ $self->inhibit_further_callbacks();
+ return 1 unless $self->{hashbl_available};
+
+ my $acl = lc($1);
+ my @opts = split(/\s+/, $opt->{value});
+ foreach my $tmp (@opts)
+ {
+ if ($tmp =~ /^(\!)?(\S+)$/i) {
+ my $neg = $1;
+ my $value = lc($2);
+
+ if (defined $neg) {
+ $self->{hashbl_acl}{$acl}{$value} = 0;
+ } else {
+ next if $acl eq 'all';
+ # exclusions overrides
+ if ( not defined $self->{hashbl_acl}{$acl}{$value} ) {
+ $self->{hashbl_acl}{$acl}{$value} = 1
+ }
+ }
+ } else {
+ warn("invalid acl: $tmp");
+ }
+ }
+ return 1;
+ }
+ return 0;
}
sub finish_parsing_end {
@@ -99,187 +202,257 @@ sub _init_email_regex {
# Some regexp tips courtesy of http://www.regular-expressions.info/email.html
# full email regex v0.02
- $self->{email_regex} = qr/
- (?=.{0,64}\@) # limit userpart to 64 chars (and speed up searching?)
- (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-]) # start boundary
- ( # capture email
- [a-z0-9!#\$%&'*+\/=?^_`{|}~-]+ # no dot in beginning
- (?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)* # no consecutive dots, no ending dot
- \@
- (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
- $self->{main}->{registryboundaries}->{valid_tlds_re} # ends with valid tld
- )
- /xi;
-}
+ $self->{email_re} = qr/
+ (?=.{0,64}\@) # limit userpart to 64 chars (and speed up searching?)
+ (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-]) # start boundary
+ ( # capture email
+ [a-z0-9!#\$%&'*+\/=?^_`{|}~-]+ # no dot in beginning
+ (?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)* # no consecutive dots, no ending dot
+ \@
+ (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
+ $self->{main}->{registryboundaries}->{valid_tlds_re} # ends with valid tld
+ )
+ /xi;
+
+ # default email whitelist
+ $self->{email_whitelist} = qr/
+ ^(?:
+ abuse|support|sales|info|helpdesk|contact|kontakt
+ | (?:post|host|domain)master
+ | undisclosed.* # yahoo.com etc(?)
+ | request-[a-f0-9]{16} # live.com
+ | bounced?- # yahoo.com etc
+ | [a-f0-9]{8}(?:\.[a-f0-9]{8}|-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) # gmail msgids?
+ | .+=.+=.+ # gmail forward
+ )\@
+ /xi;
+}
+
+sub _get_emails {
+ my ($self, $pms, $opts, $from, $acl) = @_;
+
+ my @emails; # keep find order
+ my %seen;
+ my @tmp_email;
+ my $domain;
+
+ foreach my $hdr (split(/\//, $from)) {
+ my $parsed_emails = $self->_parse_emails($pms, $opts, $hdr);
+ foreach (@$parsed_emails) {
+ next if exists $seen{$_};
+ my @tmp_email = split('@', $_);
+ my $domain = $tmp_email[1];
+ if (defined($acl) and ($acl ne "all")) {
+ if (defined($self->{hashbl_acl}{$acl}{$domain}) and ($self->{hashbl_acl}{$acl}{$domain} eq 1)) {
+ push @emails, $_;
+ $seen{$_} = 1;
+ }
+ } else {
+ push @emails, $_;
+ $seen{$_} = 1;
+ }
+ }
+ }
-sub set_config {
- my ($self, $conf) = @_;
- my @cmds;
+ return \@emails;
}
-sub parse_config {
- my ($self, $opts) = @_;
- return 0;
-}
+sub _parse_emails {
+ my ($self, $pms, $opts, $hdr) = @_;
-sub _parse_headers {
- my ($self, $pms) = @_;
+ if (exists $pms->{hashbl_email_cache}{$hdr}) {
+ return $pms->{hashbl_email_cache}{$hdr};
+ }
- if (not defined $pms->{hashbl_email_cache}) {
- %{$pms->{hashbl_email_cache}{'headers'}} = ();
- }
+ if ($hdr eq 'ALLFROM') {
+ my @emails = $pms->all_from_addrs();
+ return $pms->{hashbl_email_cache}{$hdr} = \@emails;
+ }
- my @headers = ('EnvelopeFrom', 'Sender', 'From', 'Reply-To');
+ if (not defined $pms->{hashbl_whitelist}) {
+ %{$pms->{hashbl_whitelist}} = map { lc($_) => 1 }
+ ( $pms->get("X-Original-To:addr"),
+ $pms->get("Apparently-To:addr"),
+ $pms->get("Delivered-To:addr"),
+ $pms->get("Envelope-To:addr"),
+ );
+ if ( defined $pms->{hashbl_whitelist}{''} ) {
+ delete $pms->{hashbl_whitelist}{''};
+ }
+ }
- foreach my $header (@headers) {
- my $email = $pms->get($header . ':addr');
- if ($email) {
- dbg("Found email $email in header $header");
- $pms->{hashbl_email_cache}{'headers'}{$email} = 1;
+ my $str = '';
+ if ($hdr eq 'ALL') {
+ $str = join("\n", $pms->get('ALL'));
+ } elsif ($hdr eq 'body') {
+ # get all <a href="mailto:", since they don't show up on stripped_body
+ my $uris = $pms->get_uri_detail_list();
+ while (my($uri, $info) = each %{$uris}) {
+ if (defined $info->{types}->{a} && !defined $info->{types}->{parsed}) {
+ if ($uri =~ /^mailto:(.+)/i) {
+ $str .= "$1\n";
}
+ }
}
+ my $body = join('', $pms->get_decoded_stripped_body_text_array());
+ if ($opts =~ /\bnouri\b/) {
+ # strip urls with possible emails inside
+ $body =~ s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
+ }
+ if ($opts =~ /\bnoquote\b/) {
+ # strip emails contained in <>, not mailto:
+ # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
+ $body =~ s#<?(?<!mailto:)$self->{email_re}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
+ }
+ $str .= $body;
+ } else {
+ $str .= join("\n", $pms->get($hdr));
+ }
+
+ my @emails; # keep find order
+ my %seen;
- return 1;
+ while ($str =~ /($self->{email_re})/g) {
+ next if exists $seen{$1};
+ push @emails, $1;
+ }
+
+ return $pms->{hashbl_email_cache}{$hdr} = \@emails;
}
-sub _parse_body {
- my ($self, $pms) = @_;
+sub check_hashbl_emails {
+ my ($self, $pms, $list, $opts, $from, $subtest, $acl) = @_;
- # Parse body
- if (not defined $pms->{hashbl_email_cache}) {
- %{$pms->{hashbl_email_cache}{'body'}} = ();
- }
+ return 0 if !$self->{hashbl_available};
+ return 0 if !$self->{email_re};
- my %seen;
- my @body_emails;
- # get all <a href="mailto:", since they don't show up on stripped_body
- my $parsed = $pms->get_uri_detail_list();
- while (my($uri, $info) = each %{$parsed}) {
- if (defined $info->{types}->{a} and not defined $info->{types}->{parsed}) {
- if ($uri =~ /^(?:(?i)mailto):$self->{email_regex}/) {
- my $email = lc($1);
- push(@body_emails, $email) unless defined $seen{$email};
- $seen{$email} = 1;
- last if scalar @body_emails >= 20; # sanity
- }
- }
- }
- # scan stripped normalized body
- # have to do this way since get_uri_detail_list doesn't know what mails are inside <>
- my $body = $pms->get_decoded_stripped_body_text_array();
- BODY: foreach (@$body) {
- # strip urls with possible emails inside
- s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
- # strip emails contained in <>, not mailto:
- # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
- s#<?(?<!mailto:)$self->{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
- while (/$self->{email_regex}/g) {
- my $email = lc($1);
- push(@body_emails, $email) unless defined $seen{$email};
- $seen{$email} = 1;
- last BODY if scalar @body_emails >= 40; # sanity
- }
- }
- foreach my $email (@body_emails) {
- dbg("Found email $email in body");
- $pms->{hashbl_email_cache}{'body'}{$email} = 1;
- }
-
- return 1;
-}
+ my $rulename = $pms->get_current_eval_rule_name();
-sub _got_hit {
- my ($self, $pms, $rulename, $email) = @_;
+ if (!defined $list) {
+ info("HashBL: $rulename blocklist argument missing");
+ return 0;
+ }
- $email =~ s/\@/[at]/g;
- $pms->test_log($email);
- $pms->got_hit($rulename, "", ruletype => 'eval');
-}
+ if ($subtest && !eval { $subtest = qr/$subtest/ }) {
+ info("HashBL: $rulename invalid subtest regex: $@");
+ return 0;
+ }
-sub _submit_email_query {
- my ($self, $pms, $list, $type, $email) = @_;
- my $rulename = $pms->get_current_eval_rule_name();
- my ($hash, $lookup, $key);
- if (uc($type) eq 'SHA1') {
- $hash = sha1_hex($email);
- }
- elsif (uc($type) eq 'MD5') {
- $hash = md5_hex($email);
- }
- $lookup = "$hash.$list.";
- my $obj = { email => $email };
- dbg("list: $list, type: $type, email: $email, hash: $hash, lookup: $lookup");
- $key = "HASHBL_EMAIL:$lookup";
- my $ent = {
- key => $key,
- zone => $list,
- obj => $obj,
- type => 'HASHBL',
- rulename => $rulename,
- };
+ # Defaults
+ $opts = 'sha1/notag/noquote/max=10/shuffle' if !$opts;
- $ent = $pms->{async}->bgsend_and_start_lookup($lookup, 'A', undef, $ent, sub {
- my ($ent2, $pkt) = @_;
- $self->_finish_email_lookup($pms, $ent2, $pkt);
- }, master_deadline => $pms->{master_deadline} );
+ $from = 'ALLFROM/Reply-To/body' if !$from;
- return $ent;
-}
+ # Find all emails
+ my $emails = $self->_get_emails($pms, $opts, $from, $acl);
+ if (!@$emails) {
+ dbg("$rulename: no emails found ($from)");
+ return 0;
+ } else {
+ dbg("$rulename: raw emails found: ".join(', ', @$emails));
+ }
-sub _finish_email_lookup {
- my ($self, $pms, $ent, $pkt) = @_;
+ # Filter list
+ my $keep_case = $opts =~ /\bcase\b/i;
+ my $nodot = $opts =~ /\bnodot\b/i;
+ my $notag = $opts =~ /\bnotag\b/i;
+ my @filtered_emails; # keep order
+ my %seen;
+ foreach my $email (@$emails) {
+ next if exists $seen{$email};
+ if (($email =~ $self->{email_whitelist}) or defined ($pms->{hashbl_whitelist}{$email})) {
+ dbg("Address whitelisted: $email");
+ next;
+ }
+ if ($nodot || $notag) {
+ my ($username, $domain) = ($email =~ /(.*)(\@.*)/);
+ $username =~ tr/.//d if $nodot;
+ $username =~ s/\+.*// if $notag;
+ $email = $username.$domain;
+ }
+ push @filtered_emails, $keep_case ? $email : lc($email);
+ $seen{$email} = 1;
+ }
- if (!$pkt) {
- # $pkt will be undef if the DNS query was aborted (e.g. timed out)
- dbg("_finish_email_lookup aborted: ",
- $ent->{rulename}, $ent->{key});
- return;
+ # Randomize order
+ if ($opts =~ /\bshuffle\b/) {
+ Mail::SpamAssassin::Util::fisher_yates_shuffle(\@filtered_emails);
}
- my $email = $ent->{obj}->{email};
+ # Truncate list
+ my $max = $opts =~ /\bmax=(\d+)\b/ ? $1 : 10;
+ $#filtered_emails = $max if scalar @filtered_emails > $max;
- dbg("_finish_email_lookup: ", $ent->{rulename}, $ent->{key}, $email);
-
- my @answer = $pkt->answer;
- foreach my $rr (@answer) {
- if ($rr->address =~ /^127\./) {
- $self->_got_hit($pms, $ent->{rulename}, $email);
- $pms->register_async_rule_finish($ent->{rulename});
- }
+ foreach my $email (@filtered_emails) {
+ $self->_submit_query($pms, $rulename, $email, $list, $opts, $subtest);
}
+
+ return 0;
}
-sub check_hashbl_emails {
- my ($self, $pms, $list, $type) = @_;
+sub _hash {
+ my ($self, $opts, $value) = @_;
- return 0 unless $self->{hashbl_available};
+ my $hashtype = $opts =~ /\b(raw|sha1|sha256|md5)\b/i ? lc($1) : 'sha1';
+ if ($hashtype eq 'sha1') {
+ return sha1_hex($value);
+ } elsif ($hashtype eq 'sha256') {
+ return sha256_hex($value);
+ } elsif ($hashtype eq 'md5') {
+ return md5_hex($value);
+ } else {
+ return $value;
+ }
+}
- my $rulename = $pms->get_current_eval_rule_name();
+sub _submit_query {
+ my ($self, $pms, $rulename, $value, $list, $opts, $subtest) = @_;
- # First we lookup all unique email addresses found in the headers
- return 0 unless $self->_parse_headers($pms);
- foreach my $email (keys %{$pms->{hashbl_email_cache}{'headers'}}) {
- # Remove this from the body hash
- delete $pms->{hashbl_email_cache}{'body'}{$email};
- dbg("HEADER: $email");
- $self->_submit_email_query($pms, $list, (($type) ? $type : 'SHA1'), $email);
- }
+ my $hash = $self->_hash($opts, $value);
+ dbg("querying $value ($hash) from $list");
- # Check any e-mail addresses found in the message body
- return 0 unless $self->_parse_body($pms);
+ my $type = $list =~ s,/(A|TXT)$,,i ? uc($1) : 'A';
+ my $lookup = "$hash.$list";
- my (@emails) = keys %{$pms->{hashbl_email_cache}{'body'}};
+ my $key = "HASHBL_EMAIL:$lookup";
+ my $ent = {
+ key => $key,
+ zone => $list,
+ rulename => $rulename,
+ type => "HASHBL",
+ hash => $hash,
+ value => $value,
+ subtest => $subtest,
+ };
+ $pms->{async}->bgsend_and_start_lookup($lookup, $type, undef, $ent,
+ sub { my ($ent, $pkt) = @_; $self->_finish_query($pms, $ent, $pkt); },
+ master_deadline => $pms->{master_deadline}
+ );
+}
- # Randomize order and truncate the array to 10 items maximum
- Mail::SpamAssassin::Util::fisher_yates_shuffle(\@emails);
- $#emails = 9 if (scalar @emails > 10);
+sub _finish_query {
+ my ($self, $pms, $ent, $pkt) = @_;
- foreach my $email (@emails) {
- dbg("BODY: $email");
- $self->_submit_email_query($pms, $list, (($type) ? $type : 'SHA1'), $email);
- }
+ if (!$pkt) {
+ # $pkt will be undef if the DNS query was aborted (e.g. timed out)
+ dbg("lookup was aborted: $ent->{rulename} $ent->{key}");
+ return;
+ }
- return 0;
+ my $dnsmatch = $ent->{subtest} ? $ent->{subtest} : qr/^127\./;
+ my @answer = $pkt->answer;
+ foreach my $rr (@answer) {
+ if ($rr->address =~ $dnsmatch) {
+ dbg("$ent->{rulename}: $ent->{zone} hit '$ent->{value}'");
+ $ent->{value} =~ s/\@/[at]/g;
+ $pms->test_log($ent->{value});
+ $pms->got_hit($ent->{rulename}, '', ruletype => 'eval');
+ return;
+ }
+ }
}
+# Version features
+sub has_hashbl_ignore { 1 }
+
1;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm?rev=1856894&r1=1856893&r2=1856894&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm Wed Apr 3 18:18:17 2019
@@ -25,7 +25,11 @@ HashBL - query hashed (and unhashed) DNS
header HASHBL_EMAIL eval:check_hashbl_emails('ebl.msbl.org')
describe HASHBL_EMAIL Message contains email address found on EBL
- priority HASHBL_EMAIL -100 # required priority to launch async lookups
+
+ hashbl_acl_freemail gmail.com
+ header HASHBL_OSENDR eval:check_hashbl_emails('rbl.example.com/A', 'md5/max=10/shuffle', 'X-Original-Sender', '^127\.', 'freemail')
+ describe HASHBL_OSENDR Message contains email address found on HASHBL
+ tflags HASHBL_OSENDR net
body HASHBL_BTC eval:check_hashbl_bodyre('btcbl.foo.bar', 'sha1/max=10/shuffle', '\b([13][a-km-zA-HJ-NP-Z1-9]{25,34})\b')
describe HASHBL_BTC Message contains BTC address found on BTCBL
@@ -89,13 +93,6 @@ Search body for matching regexp and quer
have a single capture ( ) for the string ($1). Optional subtest regexp to
match DNS answer. Note that eval rule type must be "body" or "rawbody".
-Default OPTS: sha1/max=10/shuffle
-
-=item hashbl_ignore string string2 ...
-
-Ignore (do not query) specified emails or captured strings.
-Both raw and hashed values are checked.
-
=back
=cut
@@ -163,6 +160,59 @@ sub set_config {
$conf->{parser}->register_commands(\@cmds);
}
+sub _parse_args {
+ my ($self, $acl) = @_;
+
+ if (not defined $acl) {
+ return ();
+ }
+ $acl =~ s/\s+//g;
+ if ($acl !~ /^[a-z0-9]{1,32}$/) {
+ warn("invalid acl name: $acl");
+ return ();
+ }
+ if ($acl eq 'all') {
+ return ();
+ }
+ if (defined $self->{hashbl_acl}{$acl}) {
+ warn("no such acl defined: $acl");
+ return ();
+ }
+}
+
+sub parse_config {
+ my ($self, $opt) = @_;
+
+ if ($opt->{key} =~ /^hashbl_acl_([a-z0-9]{1,32})$/i) {
+ $self->inhibit_further_callbacks();
+ return 1 unless $self->{hashbl_available};
+
+ my $acl = lc($1);
+ my @opts = split(/\s+/, $opt->{value});
+ foreach my $tmp (@opts)
+ {
+ if ($tmp =~ /^(\!)?(\S+)$/i) {
+ my $neg = $1;
+ my $value = lc($2);
+
+ if (defined $neg) {
+ $self->{hashbl_acl}{$acl}{$value} = 0;
+ } else {
+ next if $acl eq 'all';
+ # exclusions overrides
+ if ( not defined $self->{hashbl_acl}{$acl}{$value} ) {
+ $self->{hashbl_acl}{$acl}{$value} = 1
+ }
+ }
+ } else {
+ warn("invalid acl: $tmp");
+ }
+ }
+ return 1;
+ }
+ return 0;
+}
+
sub finish_parsing_end {
my ($self, $opts) = @_;
@@ -193,19 +243,44 @@ sub _init_email_re {
$self->{main}->{registryboundaries}->{valid_tlds_re} # ends with valid tld
)
/xi;
+
+ # default email whitelist
+ $self->{email_whitelist} = qr/
+ ^(?:
+ abuse|support|sales|info|helpdesk|contact|kontakt
+ | (?:post|host|domain)master
+ | undisclosed.* # yahoo.com etc(?)
+ | request-[a-f0-9]{16} # live.com
+ | bounced?- # yahoo.com etc
+ | [a-f0-9]{8}(?:\.[a-f0-9]{8}|-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) # gmail msgids?
+ | .+=.+=.+ # gmail forward
+ )\@
+ /xi;
}
sub _get_emails {
- my ($self, $pms, $opts, $from) = @_;
+ my ($self, $pms, $opts, $from, $acl) = @_;
my @emails; # keep find order
my %seen;
- foreach my $hdr (split(/\W+/, $from)) {
+ my @tmp_email;
+ my $domain;
+
+ foreach my $hdr (split(/\//, $from)) {
my $parsed_emails = $self->_parse_emails($pms, $opts, $hdr);
foreach (@$parsed_emails) {
next if exists $seen{$_};
- push @emails, $_;
- $seen{$_} = 1;
+ my @tmp_email = split('@', $_);
+ my $domain = $tmp_email[1];
+ if (defined($acl) and ($acl ne "all")) {
+ if (defined($self->{hashbl_acl}{$acl}{$domain}) and ($self->{hashbl_acl}{$acl}{$domain} eq 1)) {
+ push @emails, $_;
+ $seen{$_} = 1;
+ }
+ } else {
+ push @emails, $_;
+ $seen{$_} = 1;
+ }
}
}
@@ -224,6 +299,18 @@ sub _parse_emails {
return $pms->{hashbl_email_cache}{$hdr} = \@emails;
}
+ if (not defined $pms->{hashbl_whitelist}) {
+ %{$pms->{hashbl_whitelist}} = map { lc($_) => 1 }
+ ( $pms->get("X-Original-To:addr"),
+ $pms->get("Apparently-To:addr"),
+ $pms->get("Delivered-To:addr"),
+ $pms->get("Envelope-To:addr"),
+ );
+ if ( defined $pms->{hashbl_whitelist}{''} ) {
+ delete $pms->{hashbl_whitelist}{''};
+ }
+ }
+
my $str = '';
if ($hdr eq 'ALL') {
$str = join("\n", $pms->get('ALL'));
@@ -254,6 +341,7 @@ sub _parse_emails {
my @emails; # keep find order
my %seen;
+
while ($str =~ /($self->{email_re})/g) {
next if exists $seen{$1};
push @emails, $1;
@@ -263,7 +351,7 @@ sub _parse_emails {
}
sub check_hashbl_emails {
- my ($self, $pms, $list, $opts, $from, $subtest) = @_;
+ my ($self, $pms, $list, $opts, $from, $subtest, $acl) = @_;
return 0 if !$self->{hashbl_available};
return 0 if !$self->{email_re};
@@ -286,7 +374,7 @@ sub check_hashbl_emails {
$from = 'ALLFROM/Reply-To/body' if !$from;
# Find all emails
- my $emails = $self->_get_emails($pms, $opts, $from);
+ my $emails = $self->_get_emails($pms, $opts, $from, $acl);
if (!@$emails) {
dbg("$rulename: no emails found ($from)");
return 0;
@@ -302,6 +390,10 @@ sub check_hashbl_emails {
my %seen;
foreach my $email (@$emails) {
next if exists $seen{$email};
+ if (($email =~ $self->{email_whitelist}) or defined ($pms->{hashbl_whitelist}{$email})) {
+ dbg("Address whitelisted: $email");
+ next;
+ }
if ($nodot || $notag) {
my ($username, $domain) = ($email =~ /(.*)(\@.*)/);
$username =~ tr/.//d if $nodot;