You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2019/07/07 10:50:06 UTC

svn commit: r1862685 - /spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm

Author: hege
Date: Sun Jul  7 10:50:05 2019
New Revision: 1862685

URL: http://svn.apache.org/viewvc?rev=1862685&view=rev
Log:
Sync with trunk version (check_hashbl_uris, hashbl_ignore), use compile_regexp, fix max=x truncating, logging cleanup

Modified:
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm?rev=1862685&r1=1862684&r2=1862685&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HashBL.pm Sun Jul  7 10:50:05 2019
@@ -6,7 +6,7 @@
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at:
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # </...@LICENSE>
-#
 
 =head1 NAME
 
@@ -36,6 +35,9 @@ HashBL - query hashed (and unhashed) DNS
   describe HASHBL_BTC Message contains BTC address found on BTCBL
   priority HASHBL_BTC -100 # required priority to launch async lookups
 
+  header   HASHBL_URI eval:check_hashbl_uris('rbl.foo.bar', 'sha1', '127.0.0.32')
+  describe HASHBL_URI Message contains uri found on rbl
+
 =head1 DESCRIPTION
 
 This plugin support multiple types of hashed or unhashed DNS blocklists.
@@ -50,7 +52,7 @@ OPTS refers to multiple generic options:
   max=x	   maximum number of queries
   shuffle  if max exceeded, random shuffle queries before truncating to limit
 
-Multiple options can be separated with slash or other non-word character. 
+Multiple options can be separated with slash or other non-word character.
 If OPTS is empty ('') or missing, default is used.
 
 HEADERS referes to slash separated list of Headers to process:
@@ -88,6 +90,19 @@ For existing public email blacklist, see
   header HASHBL_EBL check_hashbl_emails('ebl.msbl.org')
   priority HASHBL_EBL -100 # required for async query
 
+=over 4
+
+=item header RULE check_hashbl_uris('bl.example.com/A', 'OPTS', '^127\.')
+
+Check uris from DNS list, optional subtest regexp to match DNS
+answer.
+
+DNS query type can be appended to list with /A (default) or /TXT.
+
+Default OPTS: sha1/max=10/shuffle
+
+=back
+
 =item body RULE check_hashbl_bodyre('bl.example.com/A', 'OPTS', '\b(match)\b', '^127\.')
 
 Search body for matching regexp and query the string captured.  Regexp must
@@ -101,18 +116,21 @@ match DNS answer.  Note that eval rule t
 package Mail::SpamAssassin::Plugin::HashBL;
 use strict;
 use warnings;
-my $VERSION = 0.100;
 
-use Mail::SpamAssassin::Plugin;
-use Mail::SpamAssassin::PerMsgStatus;
-use Mail::SpamAssassin::Util;
-use Digest::SHA qw(sha1_hex);
+my $VERSION = 0.101;
+
 use Digest::MD5 qw(md5_hex);
+use Digest::SHA qw(sha1_hex sha256_hex);
 
-use vars qw(@ISA $email_whitelist $skip_replyto_envfrom);
-@ISA = qw(Mail::SpamAssassin::Plugin);
+use Mail::SpamAssassin::Plugin;
+use Mail::SpamAssassin::Util qw(compile_regexp);
 
-sub dbg { Mail::SpamAssassin::Plugin::dbg ("HashBL: @_"); }
+our @ISA = qw(Mail::SpamAssassin::Plugin);
+
+sub dbg {
+  my $msg = shift;
+  Mail::SpamAssassin::Plugin::dbg("HashBL: $msg", @_);
+}
 
 sub new {
   my ($class, $mailsa) = @_;
@@ -130,7 +148,8 @@ sub new {
   }
 
   $self->register_eval_rule("check_hashbl_emails");
-  $self->register_eval_rule("check_hashbl_bodyre");  
+  $self->register_eval_rule("check_hashbl_uris");
+  $self->register_eval_rule("check_hashbl_bodyre");
   $self->set_config($mailsa->{conf});
 
   return $self;
@@ -138,7 +157,23 @@ sub new {
 
 sub set_config {
   my($self, $conf) = @_;
-  my @cmds = ();
+  my @cmds;
+
+  push (@cmds, {
+    setting => 'hashbl_ignore',
+    is_admin => 1,
+    type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
+    default => {},
+    code => sub {
+      my ($self, $key, $value, $line) = @_;
+      if (!defined $value || $value eq '') {
+        return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
+      }
+      foreach my $str (split (/\s+/, $value)) {
+        $self->{hashbl_ignore}->{lc $str} = 1;
+      }
+    }
+  });
 
   $conf->{parser}->register_commands(\@cmds);
 }
@@ -197,24 +232,24 @@ sub parse_config {
 }
 
 sub finish_parsing_end {
-    my ($self, $opts) = @_;
+  my ($self, $opts) = @_;
 
-    # valid_tlds_re will be available at finish_parsing_end, compile it now,
-    # we only need to do it once and before possible forking
-    if ($self->{hashbl_available} && !$self->{email_regex}) {
-      $self->_init_email_regex();
-    }
+  return 0 if !$self->{hashbl_available};
 
-    return 0;
-}
+  # valid_tlds_re will be available at finish_parsing_end, compile it now,
+  # we only need to do it once and before possible forking
+  if (!exists $self->{email_re}) {
+    $self->_init_email_re();
+  }
 
-sub _init_email_regex {
-    my ($self) = @_;
+  return 0;
+}
 
-    dbg("initializing email regex");
+sub _init_email_re {
+  my ($self) = @_;
 
-    # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
-    # full email regex v0.02
+  # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
+  # full email regex v0.02
   $self->{email_re} = qr/
     (?=.{0,64}\@)			# limit userpart to 64 chars (and speed up searching?)
     (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-])	# start boundary
@@ -342,13 +377,17 @@ sub check_hashbl_emails {
   my $rulename = $pms->get_current_eval_rule_name();
 
   if (!defined $list) {
-    Mail::SpamAssassin::Logger::info("HashBL: $rulename blocklist argument missing");
+    warn "HashBL: $rulename blocklist argument missing\n";
     return 0;
   }
 
-  if ($subtest && !eval { $subtest = qr/$subtest/ }) {
-    Mail::SpamAssassin::Logger::info("HashBL: $rulename invalid subtest regex: $@");
-    return 0;
+  if ($subtest) {
+    my ($rec, $err) = compile_regexp($subtest, 0);
+    if (!$rec) {
+      warn "HashBL: $rulename invalid subtest regex: $@\n";
+      return 0;
+    }
+    $subtest = $rec;
   }
 
   # Defaults
@@ -394,7 +433,7 @@ sub check_hashbl_emails {
 
   # Truncate list
   my $max = $opts =~ /\bmax=(\d+)\b/ ? $1 : 10;
-  $#filtered_emails = $max if scalar @filtered_emails > $max;
+  $#filtered_emails = $max-1 if scalar @filtered_emails > $max;
 
   foreach my $email (@filtered_emails) {
     $self->_submit_query($pms, $rulename, $email, $list, $opts, $subtest);
@@ -403,6 +442,73 @@ sub check_hashbl_emails {
   return 0;
 }
 
+sub check_hashbl_uris {
+  my ($self, $pms, $list, $opts, $subtest) = @_;
+  my %uri;
+  my %seen;
+  my @filtered_uris;
+
+  return 0 if !$self->{hashbl_available};
+
+  my $rulename = $pms->get_current_eval_rule_name();
+
+  if (!defined $list) {
+    warn "HashBL: $rulename blocklist argument missing\n";
+    return 0;
+  }
+
+  if ($subtest) {
+    my ($rec, $err) = compile_regexp($subtest, 0);
+    if (!$rec) {
+      warn "HashBL: $rulename invalid subtest regex: $@\n";
+      return 0;
+    }
+    $subtest = $rec;
+  }
+
+  # Defaults
+  $opts = 'sha1/max=10/shuffle' if !$opts;
+
+  # Filter list
+  my $keep_case = $opts =~ /\bcase\b/i;
+
+  if ($opts =~ /raw/) {
+    warn "HashBL: $rulename raw option invalid\n";
+    return 0;
+  }
+
+  my $uris = $pms->get_uri_detail_list();
+
+  while (my($uri, $info) = each %{$uris}) {
+    # we want to skip mailto: uris
+    next if ($uri =~ /^mailto:/i);
+    next if exists $seen{$uri};
+
+    # no hosts/domains were found via this uri, so skip
+    next unless ($info->{hosts});
+    if (($info->{types}->{a}) || ($info->{types}->{parsed})) {
+      # check url
+      push @filtered_uris, $keep_case ? $uri : lc($uri);
+    }
+    $seen{$uri} = 1;
+  }
+
+  # Randomize order
+  if ($opts =~ /\bshuffle\b/) {
+    Mail::SpamAssassin::Util::fisher_yates_shuffle(\@filtered_uris);
+  }
+
+  # Truncate list
+  my $max = $opts =~ /\bmax=(\d+)\b/ ? $1 : 10;
+  $#filtered_uris = $max-1 if scalar @filtered_uris > $max;
+
+  foreach my $furi (@filtered_uris) {
+    $self->_submit_query($pms, $rulename, $furi, $list, $opts, $subtest);
+  }
+
+  return 0;
+}
+
 sub check_hashbl_bodyre {
   my ($self, $pms, $bodyref, $list, $opts, $re, $subtest) = @_;
 
@@ -411,19 +517,29 @@ sub check_hashbl_bodyre {
   my $rulename = $pms->get_current_eval_rule_name();
 
   if (!defined $list) {
-    Mail::SpamAssassin::Logger::info("HashBL: $rulename blocklist argument missing");
+    warn "HashBL: $rulename blocklist argument missing\n";
     return 0;
   }
 
-  if (!$re || !eval { $re = qr/$re/ }) {
-    Mail::SpamAssassin::Logger::info("HashBL: $rulename invalid body regex: $@");
+  if (!$re) {
+    warn "HashBL: $rulename missing body regex\n";
     return 0;
   }
-
-  if ($subtest && !eval { $subtest = qr/$subtest/ }) {
-    Mail::SpamAssassin::Logger::info("HashBL: $rulename invalid subtext regex: $@");
+  my ($rec, $err) = compile_regexp($re, 0);
+  if (!$rec) {
+    warn "HashBL: $rulename invalid body regex: $@\n";
     return 0;
   }
+  $re = $rec;
+
+  if ($subtest) {
+    my ($rec, $err) = compile_regexp($subtest, 0);
+    if (!$rec) {
+      warn "HashBL: $rulename invalid subtest regex: $@\n";
+      return 0;
+    }
+    $subtest = $rec;
+  }
 
   # Defaults
   $opts = 'sha1/max=10/shuffle' if !$opts;
@@ -469,7 +585,7 @@ sub check_hashbl_bodyre {
 
   # Truncate list
   my $max = $opts =~ /\bmax=(\d+)\b/ ? $1 : 10;
-  $#matches = $max if scalar @matches > $max;
+  $#matches = $max-1 if scalar @matches > $max;
 
   foreach my $match (@matches) {
     $self->_submit_query($pms, $rulename, $match, $list, $opts, $subtest);
@@ -496,9 +612,19 @@ sub _hash {
 sub _submit_query {
   my ($self, $pms, $rulename, $value, $list, $opts, $subtest) = @_;
 
+  if (exists $pms->{conf}->{hashbl_ignore}->{lc $value}) {
+    dbg("query skipped, ignored string: $value");
+    return 1;
+  }
+
   my $hash = $self->_hash($opts, $value);
   dbg("querying $value ($hash) from $list");
 
+  if (exists $pms->{conf}->{hashbl_ignore}->{$hash}) {
+    dbg("query skipped, ignored hash: $value");
+    return 1;
+  }
+
   my $type = $list =~ s,/(A|TXT)$,,i ? uc($1) : 'A';
   my $lookup = "$hash.$list";
 
@@ -536,7 +662,6 @@ sub _finish_query {
       $ent->{value} =~ s/\@/[at]/g;
       $pms->test_log($ent->{value});
       $pms->got_hit($ent->{rulename}, '', ruletype => 'eval');
-      $pms->register_async_rule_finish($ent->{rulename});
       return;
     }
   }
@@ -544,6 +669,6 @@ sub _finish_query {
 
 # Version features
 sub has_hashbl_bodyre { 1 }
-sub has_hashbl_ignore { 0 }
+sub has_hashbl_ignore { 1 }
 
 1;