You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by km...@apache.org on 2018/12/14 21:05:01 UTC
svn commit: r1848969 [2/2] - in /spamassassin/branches/3.4: ./ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Conf/ lib/Mail/SpamAssassin/Plugin/ t/

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm Fri Dec 14 21:05:01 2018
@@ -57,7 +57,8 @@ our @EXPORT_OK = qw(&local_tz &base64_de
                   &exit_status_str &proc_status_ok &am_running_on_windows
                   &reverse_ip_address &decode_dns_question_entry &touch_file
                   &get_my_locales &parse_rfc822_date &get_user_groups
-                  &secure_tmpfile &secure_tmpdir &uri_list_canonicalize);
+                  &secure_tmpfile &secure_tmpdir &uri_list_canonicalize
+                  &compile_regexp &qr_to_string);
 
 our $AM_TAINTED;
 
@@ -1097,7 +1098,8 @@ with Perl.
 sub first_available_module {
   my (@packages) = @_;
   foreach my $mod (@packages) {
-    if (eval 'require '.$mod.'; 1; ') {
+    next if $mod !~ /^[\w:]+$/; # be paranoid
+    if (eval 'require '.$mod.'; 1;') {
       return $mod;
     }
   }
@@ -1267,6 +1269,8 @@ sub secure_tmpdir {
 ## Replaced with Mail::SpamAssassin::RegistryBoundaries::uri_to_domain.
 ##
 
+###########################################################################
+
 *uri_list_canonify = \&uri_list_canonicalize;  # compatibility alias
 sub uri_list_canonicalize {
   my($redirector_patterns, @uris) = @_;
@@ -1729,6 +1733,157 @@ sub trap_sigalrm_fully {
 
 ###########################################################################
 
+# returns ($compiled_re, $error)
+# if any errors, $compiled_re = undef, $error has string
+# args:
+# - regexp
+# - strip_delimiters (default: 1) (value 2 means, try strip, but don't error)
+# - ignore_always_matching (default: 0)
+sub compile_regexp {
+  my ($re, $strip_delimiters, $ignore_always_matching) = @_;
+  local($1);
+
+  # Do not allow already compiled regexes or other funky refs
+  if (ref($re)) {
+    return (undef, 'ref passed');
+  }
+
+  # try stripping by default
+  $strip_delimiters = 1 if !defined $strip_delimiters;
+
+  # OK, try to remove any normal perl-style regexp delimiters at
+  # the start and end, and modifiers at the end if present,
+  # so we can validate those too.
+  my $origre = $re;
+  my $delim_end = '';
+
+  if ($strip_delimiters >= 1) {
+    # most common delimiter
+    if ($re =~ s{^/}{}) {
+      $delim_end = '/';
+    }
+    # symmetric delimiters
+    elsif ($re =~ s/^(?:m|qr)([\{\(\<\[])//) {
+      ($delim_end = $1) =~ tr/\{\(\<\[/\}\)\>\]/;
+    }
+    # any non-wordchar delimiter, but let's ignore backslash..
+    elsif ($re =~ s/^(?:m|qr)(\W)//) {
+      $delim_end = $1;
+      if ($delim_end eq '\\') {
+        return (undef, 'backslash delimiter not allowed');
+      }
+    }
+    elsif ($strip_delimiters != 2) {
+      return (undef, 'missing regexp delimiters');
+    }
+  }
+
+  # cut end delimiter, mods
+  my $mods;
+  if ($delim_end) {
+    # Ignore e because paranoid
+    if ($re =~ s/\Q${delim_end}\E([a-df-z]*)\z//) {
+      $mods = $1;
+    } else {
+      return (undef, 'invalid end delimiter/mods');
+    }
+  }
+
+  # paranoid check for eval exec (?{foo}), in case someone
+  # actually put "use re 'eval'" somewhere..
+  if ($re =~ /\(\?\??\{/) {
+    return (undef, 'eval (?{}) found');
+  }
+
+  # check unescaped delimiter, but only if it's not symmetric,
+  # those will fp on .{0,10} [xyz] etc, no need for so strict checks
+  # since these regexes don't end up in eval strings anyway
+  if ($delim_end && $delim_end !~ tr/\}\)\]//) {
+    # first we remove all escaped backslashes "\\"
+    my $dbs_stripped = $re;
+    $dbs_stripped =~ s/\\\\//g;
+    # now we can properly check if something is unescaped
+    if ($dbs_stripped =~ /(?<!\\)\Q${delim_end}\E/) {
+      return (undef, "unquoted delimiter '$delim_end' found");
+    }
+  }
+
+  if ($ignore_always_matching) {
+    if (my $err = is_always_matching_regexp($re)) {
+      return (undef, "always matching regexp: $err");
+    }
+  }
+
+  # now prepend the modifiers, in order to check if they're valid
+  if ($mods) {
+    $re = '(?'.$mods.')'.$re;
+  }
+
+  # no re "strict";  # since perl 5.21.8: Ranges of ASCII printables...
+  my $compiled_re;
+  $re = untaint_var($re);
+  my $ok = eval {
+    # don't dump deprecated warnings to user STDERR
+    # but die on any other warning for safety?
+    local $SIG{__WARN__} = sub {
+      if ($_[0] !~ /deprecated/i) {
+        die "$_[0]\n";
+      }
+    };
+    $compiled_re = qr/$re/; 1;
+  };
+  if ($ok && ref($compiled_re) eq 'Regexp') {
+    #$origre = untaint_var($origre);
+    #dbg("config: accepted regex '%s' => '%s'", $origre, $compiled_re);
+    return ($compiled_re, '');
+  } else {
+    my $err = $@ ne '' ? $@ : "errno=$!"; chomp $err;
+    $err =~ s/ at .*? line \d.*$//;
+    return (undef, $err);
+  }
+}
+
+sub is_always_matching_regexp {
+  my ($re) = @_;
+
+  if ($re eq '') {
+    return "empty";
+  }
+  elsif ($re =~ /(?<!\\)\|\|/) {
+    return "contains '||'";
+  }
+  elsif ($re =~ /^\|/) {
+    return "starts with '|'";
+  }
+  elsif ($re =~ /\|(?<!\\\|)$/) {
+    return "ends with '|'";
+  }
+
+  return undef;
+}
+
+# convert compiled regexp (?^i:foo) to string (?i)foo
+sub qr_to_string {
+  my ($re) = @_;
+
+  return undef unless ref($re) eq 'Regexp';
+  $re = "".$re; # stringify
+
+  local($1);
+  $re =~ s/^\(\?\^([a-z]*)://;
+  my $mods = $1;
+  $re =~ s/\)\z//;
+
+  return ($mods ? "(?$mods)$re" : $re);
+}
+
+###########################################################################
+
+###
+### regexp_remove_delimiters and make_qr DEPRECATED, to be removed
+### compile_regexp() should be used everywhere
+###
+
 # Removes any normal perl-style regexp delimiters at
 # the start and end, and modifiers at the end (if present).
 # If modifiers are found, they are inserted into the pattern using
@@ -1737,27 +1892,33 @@ sub trap_sigalrm_fully {
 sub regexp_remove_delimiters {
   my ($re) = @_;
 
+  warn("deprecated Util regexp_remove_delimiters() called\n");
+
   my $delim;
   if (!defined $re || $re eq '') {
-    warn "cannot remove delimiters from null regexp";
-    return;  # invalid
+    return undef;
   }
-  elsif ($re =~ s/^m\{//) {             # m{foo/bar}
+  elsif ($re =~ s/^m?\{//) {             # m{foo/bar}
     $delim = '}';
   }
-  elsif ($re =~ s/^m\(//) {             # m(foo/bar)
+  elsif ($re =~ s/^m?\[//) {             # m[foo/bar]
+    $delim = ']';
+  }
+  elsif ($re =~ s/^m?\(//) {             # m(foo/bar)
     $delim = ')';
   }
-  elsif ($re =~ s/^m<//) {              # m<foo/bar>
+  elsif ($re =~ s/^m?<//) {              # m<foo/bar>
     $delim = '>';
   }
-  elsif ($re =~ s/^m(\W)//) {           # m#foo/bar#
+  elsif ($re =~ s/^m?(\W)//) {           # m#foo/bar#
     $delim = $1;
   } else {                              # /foo\/bar/ or !foo/bar!
-    $re =~ s/^(\W)//; $delim = $1;
+    return undef; # invalid    
   }
 
-  $re =~ s/\Q${delim}\E([imsx]*)$// or warn "unbalanced re: $re";
+  if ($re !~ s/\Q${delim}\E([imsx]*)$//) {
+    return undef;
+  }
 
   my $mods = $1;
   if ($mods) {
@@ -1771,8 +1932,17 @@ sub regexp_remove_delimiters {
 
 sub make_qr {
   my ($re) = @_;
+
+  warn("deprecated Util make_qr() called\n");
+
   $re = regexp_remove_delimiters($re);
-  return qr/$re/;
+  return undef if !defined $re || $re eq '';
+  my $compiled_re;
+  if (eval { $compiled_re = qr/$re/; 1; } && ref($compiled_re) eq 'Regexp') {
+    return $compiled_re;
+  } else {
+    return undef;
+  }
 }
 
 ###########################################################################

Modified: spamassassin/branches/3.4/t/dnsbl.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/t/dnsbl.t?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/t/dnsbl.t (original)
+++ spamassassin/branches/3.4/t/dnsbl.t Fri Dec 14 21:05:01 2018
@@ -7,7 +7,7 @@ use Test::More;
 plan skip_all => "Long running tests disabled" unless conf_bool('run_long_tests');
 plan skip_all => "Net tests disabled" unless conf_bool('run_net_tests');
 plan skip_all => "Can't use Net::DNS Safely" unless can_use_net_dns_safely();
-plan tests => 23;
+plan tests => 17;
 
 # ---------------------------------------------------------------------------
 # bind configuration currently used to support this test
@@ -54,7 +54,6 @@ EOF
  q{ <dns:14.35.17.212.dnsbltest.spamassassin.org> [127.0.0.1] } => 'P_4',
  q{ <dns:226.149.120.193.dnsbltest.spamassassin.org> [127.0.0.1] } => 'P_5',
  q{ <dns:example.com.dnsbltest.spamassassin.org> [127.0.0.2] } => 'P_6',
- q{ <dns:134.88.73.210.sb.dnsbltest.spamassassin.org?type=TXT> } => 'P_7',
  q{,DNSBL_TEST_TOP,} => 'P_8',
  q{,DNSBL_TEST_WHITELIST,} => 'P_9',
  q{,DNSBL_TEST_DYNAMIC,} => 'P_10',
@@ -63,16 +62,11 @@ EOF
  q{,DNSBL_TXT_TOP,} => 'P_13',
  q{,DNSBL_TXT_RE,} => 'P_14',
  q{,DNSBL_RHS,} => 'P_15',
- q{,DNSBL_SB_TIME,} => 'P_16',
- q{,DNSBL_SB_FLOAT,} => 'P_17',
- q{,DNSBL_SB_STR,} => 'P_18',
 );
 
 %anti_patterns = (
  q{,DNSBL_TEST_MISS,} => 'P_19',
  q{,DNSBL_TXT_MISS,} => 'P_20',
- q{,DNSBL_SB_UNDEF,} => 'P_21',
- q{,DNSBL_SB_MISS,} => 'P_22',
  q{ launching DNS A query for 14.35.17.212.untrusted.dnsbltest.spamassassin.org. } => 'untrusted',
 );
 
@@ -136,28 +130,6 @@ header DNSBL_RHS	eval:check_rbl_from_hos
 describe DNSBL_RHS	DNSBL RHS match
 tflags DNSBL_RHS	net
 
-header __TEST_SENDERBASE	eval:check_rbl_txt('sb', 'sb.dnsbltest.spamassassin.org.')
-tflags __TEST_SENDERBASE	net
-
-header DNSBL_SB_TIME	eval:check_rbl_sub('sb', 'sb:S6 == 1060085863 && S6 < time')
-describe DNSBL_SB_TIME	DNSBL SenderBase time
-tflags DNSBL_SB_TIME	net
-
-header DNSBL_SB_FLOAT	eval:check_rbl_sub('sb', 'sb:S3 > 7.0 && S3 < 7.2')
-describe DNSBL_SB_FLOAT	DNSBL SenderBase floating point
-tflags DNSBL_SB_FLOAT	net
-
-header DNSBL_SB_STR	eval:check_rbl_sub('sb', 'sb:S1 eq \"Spammer Networks\" && S49 !~ /Y/ && index(S21, \".com\") > 0')
-describe DNSBL_SB_STR	DNSBL SenderBase strings
-tflags DNSBL_SB_STR	net
-
-header DNSBL_SB_UNDEF	eval:check_rbl_sub('sb', 'sb:S98 =~ /foo/ && S99 > 10')
-describe DNSBL_SB_UNDEF	DNSBL SenderBase undefined
-tflags DNSBL_SB_UNDEF	net
-
-header DNSBL_SB_MISS	eval:check_rbl_sub('sb', 'sb:S2 < 3.0')
-describe DNSBL_SB_MISS	DNSBL SenderBase miss
-tflags DNSBL_SB_MISS	net
 ");
 
 # The -D clobbers test performance but some patterns & antipatterns depend on debug output

Modified: spamassassin/branches/3.4/t/if_can.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/t/if_can.t?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/t/if_can.t (original)
+++ spamassassin/branches/3.4/t/if_can.t Fri Dec 14 21:05:01 2018
@@ -2,7 +2,7 @@
 
 use lib '.'; use lib 't';
 use SATest; sa_t_init("if_can");
-use Test::More tests => 13;
+use Test::More tests => 16;
 
 # ---------------------------------------------------------------------------
 
@@ -16,6 +16,9 @@ use Test::More tests => 13;
         q{ SHOULD_BE_CALLED5 }, 'should_be_called5',
         q{ SHOULD_BE_CALLED6 }, 'should_be_called6',
         q{ SHOULD_BE_CALLED7 }, 'should_be_called7',
+        q{ SHOULD_BE_CALLED8 }, 'should_be_called8',
+        q{ SHOULD_BE_CALLED9 }, 'should_be_called9',
+        q{ SHOULD_BE_CALLED10 }, 'should_be_called10',
 
 );
 %anti_patterns = (
@@ -51,6 +54,15 @@ tstlocalrules (q{
         if (!can(Mail::SpamAssassin::Plugin::Test::test_feature_xxxx_nosuch))
           body SHOULD_BE_CALLED7 /./
         endif
+        if can(Mail::SpamAssassin::Plugin::Test::test_feature_xxxx_true) && version > 0.00000
+          body SHOULD_BE_CALLED8 /./
+        endif
+        if !can(Mail::SpamAssassin::Plugin::Test::test_feature_xxxx_false  ) && !(! version > 0.00000)
+          body SHOULD_BE_CALLED9 /./
+        endif
+        if has(Mail::SpamAssassin::Plugin::Test::test_feature_xxxx_true) && (!can(Mail::SpamAssassin::Plugin::Test::test_feature_xxxx_nosuch))
+          body SHOULD_BE_CALLED10 /./
+        endif
 
         if !has(Mail::SpamAssassin::Plugin::Test::check_test_plugin)
           body SHOULD_NOT_BE_CALLED1 /./

Modified: spamassassin/branches/3.4/t/mimeheader.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/t/mimeheader.t?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/t/mimeheader.t (original)
+++ spamassassin/branches/3.4/t/mimeheader.t Fri Dec 14 21:05:01 2018
@@ -2,7 +2,7 @@
 
 use lib '.'; use lib 't';
 use SATest; sa_t_init("mimeheader");
-use Test::More tests => 4;
+use Test::More tests => 6;
 
 $ENV{'LANGUAGE'} = $ENV{'LC_ALL'} = 'C';             # a cheat, but we need the patterns to work
 
@@ -14,18 +14,33 @@ $ENV{'LANGUAGE'} = $ENV{'LC_ALL'} = 'C';
   q{ MIMEHEADER_TEST2 }, q{ test2 },
   q{ MATCH_NL_NONRAW }, q{ match_nl_nonraw },
   q{ MATCH_NL_RAW }, q{ match_nl_raw },
+  q{ MIMEHEADER_FOUND }, q{ unset_found },
 
 );
 
+%anti_patterns = (
+
+  q{ MIMEHEADER_NOTFOUND }, q{ unset_notfound },
+
+);
+
+tstpre(q{
+
+  loadplugin Mail::SpamAssassin::Plugin::MIMEHeader
+
+});
+
 tstprefs (q{
 
-  # loadplugin Mail::SpamAssassin::Plugin::MIMEHeader
   mimeheader MIMEHEADER_TEST1 content-type =~ /application\/msword/
   mimeheader MIMEHEADER_TEST2 content-type =~ m!APPLICATION/MSWORD!i
 
   mimeheader MATCH_NL_NONRAW       Content-Type =~ /msword; name/
   mimeheader MATCH_NL_RAW   Content-Type:raw =~ /msword;\n\tname/
 
+  mimeheader MIMEHEADER_NOTFOUND xyzzy =~ /foobar/
+  mimeheader MIMEHEADER_FOUND xyzzy =~ /foobar/ [if-unset: xyzfoobarxyz]
+
 	});
 
 sarun ("-L -t < data/nice/004", \&patterns_run_cb);

Modified: spamassassin/branches/3.4/t/regexp_valid.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/t/regexp_valid.t?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/t/regexp_valid.t (original)
+++ spamassassin/branches/3.4/t/regexp_valid.t Fri Dec 14 21:05:01 2018
@@ -18,55 +18,34 @@ if (-e 'test_dir') {            # runnin
 use strict;
 use lib '.'; use lib 't';
 use SATest; sa_t_init("regexp_valid");
+use Mail::SpamAssassin::Util qw(compile_regexp);
 
-use Test::More tests => 24;
-
-# initialize SpamAssassin
-use Mail::SpamAssassin;
-my $sa = create_saobj({'dont_copy_prefs' => 1});
-$sa->init(0); # parse rules
-
-
-# make a _copy_ of the STDERR file descriptor
-# (so we can restore it after redirecting it)
-open(OLDERR, ">&STDERR") || die "Cannot copy STDERR file handle";
-
-# create a file descriptior for logging STDERR
-# (we do not want warnings for regexps we know are invalid)
-my $fh = IO::File->new_tmpfile();
-open(LOGERR, ">&".fileno($fh)) || die "Cannot create LOGERR temp file";
-
-# quiet "used only once" warnings
-1 if *OLDERR;
-1 if *LOGERR;
-
+use Test::More tests => 41;
 
+my $showerr;
 sub tryone {
-  my $re = shift;
-  return $sa->{conf}->{parser}->is_regexp_valid('test', $re);
+  my ($re, $strip) = @_;
+  $strip = 1 if !defined $strip;
+  my ($rec, $err) = compile_regexp($re, $strip, 1);
+  if (!$rec && $showerr) { print STDERR "invalid regex '$re': $err\n"; }
+  return $rec;
 }
 
 # test valid regexps with this sub
 sub goodone {
-  my $re = shift;
-  open(STDERR, ">&=OLDERR") || die "Cannot reopen STDERR";
-  return tryone $re;
+  my ($re, $strip) = @_;
+  $showerr = 1;
+  return tryone($re, $strip);
 }
 
 # test invalid regexps with this sub
 sub badone {
-  my $re = shift;
-  open(STDERR, ">&=LOGERR") || die "Cannot reopen STDERR (for logging)";
-  return !tryone $re;
+  my ($re, $strip) = @_;
+  $showerr = 0;
+  return !tryone($re, $strip);
 }
 
 
-ok goodone qr/foo bar/;
-ok goodone qr/foo bar/i;
-ok goodone qr/foo bar/is;
-ok goodone qr/foo bar/im;
-ok goodone qr!foo bar!im;
-
 ok goodone 'qr/foo bar/';
 ok goodone 'qr/foo bar/im';
 ok goodone 'qr!foo bar!';
@@ -80,14 +59,38 @@ ok goodone 'm{foo bar}is';
 ok goodone 'm(foo bar)is';
 
 ok goodone 'm<foo bar>is';
-ok goodone 'foo bar';
-ok goodone 'foo/bar';
-ok badone 'foo(bar';
-ok badone 'foo(?{1})bar';
+ok goodone 'foo bar', 0;
+ok goodone 'foo/bar', 0;
+ok badone 'foo(bar', 0;
 
+ok badone 'foo(?{1})bar';
+ok badone 'foo(??{1})bar';
 ok badone '/foo(?{1})bar/';
+ok badone '/foo(??{1})bar/';
 ok badone 'm!foo(?{1})bar!';
-# ok badone '/test//';          # removed for bug 4700
-ok goodone '.*';
+
+ok goodone '/test\//';
+ok badone '/test//';  # removed for bug 4700 - and back from 7648
+ok badone 'm!test!xyz!i';
+ok badone '//';
+ok badone 'm!|foo!';
+ok goodone 'm!\|foo!';
+ok badone 'm{bar||y}';
+
+ok goodone 'm{test}}'; # it's actually bad, but no way to parse this with simple code
+ok goodone 'm}test}}'; # it's actually bad, but no way to parse this with simple code
+ok goodone 'm{test{}'; # it's good even though perl warns unescaped { is deprecated
+ok goodone 'm}test{}';
+ok goodone 'm{test.{0,10}}';
+ok goodone 'm}test.{0,10}}';
+ok goodone 'm[foo[bar]]';
+ok badone 'm[foo[bar\]]';
+ok goodone 'm(foo(?:bar)x)';
+ok badone 'm(foo\(?:bar)x)';
+ok goodone 'm/test # comment/x';
+ok badone 'm/test # comm/ent/x'; # well you shouldn't use comments anyway
+ok goodone 'm[test # \] foo []x';
+
+ok goodone '.*', 0;
 ok goodone 'm*<a[^<]{0,60} onMouseMove=(?:3D)?"window.status=(?:3D)?\'https?://*';
 

Modified: spamassassin/branches/3.4/t/stop_always_matching_regexps.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/t/stop_always_matching_regexps.t?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/t/stop_always_matching_regexps.t (original)
+++ spamassassin/branches/3.4/t/stop_always_matching_regexps.t Fri Dec 14 21:05:01 2018
@@ -13,20 +13,18 @@ BEGIN {
 
 use lib '.'; use lib 't';
 use SATest; sa_t_init("stop_always_matching_regexps");
-use Test::More tests => 13;
+use Test::More tests => 12;
 
 # ---------------------------------------------------------------------------
 
 use strict;
 require Mail::SpamAssassin;
-
-my $sa = create_saobj({'dont_copy_prefs' => 1});
-$sa->init(0);
-ok($sa);
+use Mail::SpamAssassin::Util qw(compile_regexp);
 
 sub is_caught {
   my ($re) = @_;
-  return $sa->{conf}->{parser}->is_always_matching_regexp($re, $re);
+  my ($rec, $err) = compile_regexp($re, 0, 1);
+  return !$rec;
 }
 
 ok !is_caught 'foo|bar';