You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/03/06 11:49:44 UTC
svn commit: r1898649 - /spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
Author: hege
Date: Sun Mar 6 11:49:43 2022
New Revision: 1898649
URL: http://svn.apache.org/viewvc?rev=1898649&view=rev
Log:
Bug 7923 - RFE: Making HashBL email_whitelist a configurable feature
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm?rev=1898649&r1=1898648&r2=1898649&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/HashBL.pm Sun Mar 6 11:49:43 2022
@@ -101,6 +101,11 @@ For existing public email blacklist, see
priority HASHBL_EMAIL -100 # required priority to launch async lookups early
tflags HASHBL_EMAIL net
+Default regex for matching and capturing emails can be overridden with
+C<hashbl_email_regex>. Likewise, the default whitelist can be changed with
+C<hashbl_email_whitelist>. Only change if you know what you are doing, see
+module source for the defaults. Example: hashbl_email_regex \S+@\S+.com
+
=over 4
=item header RULE check_hashbl_uris('bl.example.invalid/A', 'OPTS', '^127\.')
@@ -187,27 +192,66 @@ sub set_config {
}
});
- $conf->{parser}->register_commands(\@cmds);
-}
-
-sub _parse_args {
- my ($self, $acl) = @_;
-
- if (not defined $acl) {
- return ();
+ push (@cmds, {
+ setting => 'hashbl_email_regex',
+ is_admin => 1,
+ type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
+ # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
+ # full email regex v0.02
+ default => qr/(?i)
+ (?=.{0,64}\@) # limit userpart to 64 chars (and speed up searching?)
+ (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-]) # start boundary
+ ( # capture email
+ [a-z0-9!#\$%&'*+\/=?^_`{|}~-]+ # no dot in beginning
+ (?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)* # no consecutive dots, no ending dot
+ \@
+ (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
+ _TLDS_ # ends with valid tld, _TLDS_ is template which will be replaced in finish_parsing_end()
+ )
+ /x,
+ code => sub {
+ my ($self, $key, $value, $line) = @_;
+ if (!defined $value || $value eq '') {
+ return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
+ }
+ my ($rec, $err) = compile_regexp($value, 0);
+ if (!$rec) {
+ dbg("config: invalid hashbl_email_regex '$value': $err");
+ return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+ }
+ $self->{hashbl_email_regex} = $rec;
}
- $acl =~ s/\s+//g;
- if ($acl !~ /^[a-z0-9]{1,32}$/) {
- warn("invalid acl name: $acl");
- return ();
- }
- if ($acl eq 'all') {
- return ();
- }
- if (defined $self->{hashbl_acl}{$acl}) {
- warn("no such acl defined: $acl");
- return ();
+ });
+
+ push (@cmds, {
+ setting => 'hashbl_email_whitelist',
+ is_admin => 1,
+ type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
+ default => qr/(?i)
+ ^(?:
+ abuse|support|sales|info|helpdesk|contact|kontakt
+ | (?:post|host|domain)master
+ | undisclosed.* # yahoo.com etc(?)
+ | request-[a-f0-9]{16} # live.com
+ | bounced?- # yahoo.com etc
+ | [a-f0-9]{8}(?:\.[a-f0-9]{8}|-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) # gmail msgids?
+ | .+=.+=.+ # gmail forward
+ )\@
+ /x,
+ code => sub {
+ my ($self, $key, $value, $line) = @_;
+ if (!defined $value || $value eq '') {
+ }
+ my ($rec, $err) = compile_regexp($value, 0);
+ if (!$rec) {
+ dbg("config: invalid hashbl_email_whitelist '$value': $err");
+ return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+ }
+ $self->{hashbl_email_whitelist} = $rec;
}
+ });
+
+ $conf->{parser}->register_commands(\@cmds);
}
sub parse_config {
@@ -240,6 +284,7 @@ sub parse_config {
}
return 1;
}
+
return 0;
}
@@ -250,44 +295,15 @@ sub finish_parsing_end {
# valid_tlds_re will be available at finish_parsing_end, compile it now,
# we only need to do it once and before possible forking
- if (!exists $self->{email_re}) {
- $self->_init_email_re();
- }
+ # replace _TLDS_ with valid list of TLDs
+ $opts->{conf}->{hashbl_email_regex} =~ s/_TLDS_/$self->{main}->{registryboundaries}->{valid_tlds_re}/g;
+ #dbg("hashbl_email_regex: $opts->{conf}->{hashbl_email_regex}");
+ $opts->{conf}->{hashbl_email_whitelist} =~ s/_TLDS_/$self->{main}->{registryboundaries}->{valid_tlds_re}/g;
+ #dbg("hashbl_email_whitelist: $opts->{conf}->{hashbl_email_regex}");
return 0;
}
-sub _init_email_re {
- my ($self) = @_;
-
- # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
- # full email regex v0.02
- $self->{email_re} = qr/
- (?=.{0,64}\@) # limit userpart to 64 chars (and speed up searching?)
- (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-]) # start boundary
- ( # capture email
- [a-z0-9!#\$%&'*+\/=?^_`{|}~-]+ # no dot in beginning
- (?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)* # no consecutive dots, no ending dot
- \@
- (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
- $self->{main}->{registryboundaries}->{valid_tlds_re} # ends with valid tld
- )
- /xi;
-
- # default email whitelist
- $self->{email_whitelist} = qr/
- ^(?:
- abuse|support|sales|info|helpdesk|contact|kontakt
- | (?:post|host|domain)master
- | undisclosed.* # yahoo.com etc(?)
- | request-[a-f0-9]{16} # live.com
- | bounced?- # yahoo.com etc
- | [a-f0-9]{8}(?:\.[a-f0-9]{8}|-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}) # gmail msgids?
- | .+=.+=.+ # gmail forward
- )\@
- /xi;
-}
-
sub _get_emails {
my ($self, $pms, $opts, $from, $acl) = @_;
@@ -362,7 +378,7 @@ sub _parse_emails {
if ($opts =~ /\bnoquote\b/) {
# strip emails contained in <>, not mailto:
# also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
- $body =~ s#<?(?<!mailto:)$self->{email_re}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
+ $body =~ s#<?(?<!mailto:)$pms->{conf}->{hashbl_email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
}
$str .= $body;
} else {
@@ -372,7 +388,7 @@ sub _parse_emails {
my @emails; # keep find order
my %seen;
- while ($str =~ /($self->{email_re})/g) {
+ while ($str =~ /($pms->{conf}->{hashbl_email_regex})/g) {
next if exists $seen{$1};
push @emails, $1;
}
@@ -385,7 +401,6 @@ sub check_hashbl_emails {
return 0 if !$self->{hashbl_available};
return 0 if !$pms->is_dns_available();
- return 0 if !$self->{email_re};
my $rulename = $pms->get_current_eval_rule_name();
@@ -430,7 +445,8 @@ sub check_hashbl_emails {
foreach my $email (@$emails) {
next if exists $seen{$email};
next if $email !~ /.*\@.*/;
- if (($email =~ $self->{email_whitelist}) or defined ($pms->{hashbl_whitelist}{$email})) {
+ if ($email =~ $pms->{conf}->{hashbl_email_whitelist}
+ || defined $pms->{hashbl_whitelist}{$email}) {
dbg("Address whitelisted: $email");
next;
}
@@ -703,5 +719,7 @@ sub has_hashbl_bodyre { 1 }
sub has_hashbl_emails { 1 }
sub has_hashbl_uris { 1 }
sub has_hashbl_ignore { 1 }
+sub has_hashbl_email_regex { 1 }
+sub has_hashbl_email_whitelist { 1 }
1;