You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by km...@apache.org on 2018/12/14 21:05:01 UTC
svn commit: r1848969 [1/2] - in /spamassassin/branches/3.4: ./ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Conf/ lib/Mail/SpamAssassin/Plugin/ t/

Author: kmcgrail
Date: Fri Dec 14 21:05:01 2018
New Revision: 1848969

URL: http://svn.apache.org/viewvc?rev=1848969&view=rev
Log:
Work on improving evaluation rules and preparing for 3.4.3

Modified:
    spamassassin/branches/3.4/MANIFEST
    spamassassin/branches/3.4/UPGRADE
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Dns.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Logger.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/P595Body.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/PDFInfo.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Rule2XSBody.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/URIDetail.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/PluginHandler.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Util.pm
    spamassassin/branches/3.4/t/dnsbl.t
    spamassassin/branches/3.4/t/if_can.t
    spamassassin/branches/3.4/t/mimeheader.t
    spamassassin/branches/3.4/t/regexp_valid.t
    spamassassin/branches/3.4/t/stop_always_matching_regexps.t

Modified: spamassassin/branches/3.4/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/MANIFEST?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/MANIFEST (original)
+++ spamassassin/branches/3.4/MANIFEST Fri Dec 14 21:05:01 2018
@@ -224,6 +224,7 @@ sql/userpref_mysql.sql
 sql/userpref_pg.sql
 sql/txrep_mysql.sql
 sql/txrep_pg.sql
+sql/txrep_sqlite.sql
 t/README
 t/SATest.pl
 t/SATest.pm

Modified: spamassassin/branches/3.4/UPGRADE
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/UPGRADE?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/UPGRADE (original)
+++ spamassassin/branches/3.4/UPGRADE Fri Dec 14 21:05:01 2018
@@ -21,6 +21,11 @@ Note for Users Upgrading to SpamAssassin
   to run sa-update" unless it can find list from config files.
 
 
+- Deprecated functions: Parser::is_delimited_regexp_valid(),
+  Parser::is_regexp_valid(), Util::regexp_remove_delimiters(),
+  Util::make_qr().  These all are combined into new Util::compile_regexp().
+
+
 Note for Users Upgrading to SpamAssassin 3.4.2
 ----------------------------------------------
 

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf.pm Fri Dec 14 21:05:01 2018
@@ -82,12 +82,11 @@ use warnings;
 # use bytes;
 use re 'taint';
 
-use Mail::SpamAssassin::Util;
 use Mail::SpamAssassin::NetSet;
 use Mail::SpamAssassin::Constants qw(:sa :ip);
 use Mail::SpamAssassin::Conf::Parser;
 use Mail::SpamAssassin::Logger;
-use Mail::SpamAssassin::Util qw(untaint_var);
+use Mail::SpamAssassin::Util qw(untaint_var compile_regexp);
 use File::Spec;
 
 our @ISA = qw();
@@ -2733,24 +2732,23 @@ Example: http://chkpt.zdnet.com/chkpt/wh
   push (@cmds, {
     setting => 'redirector_pattern',
     is_priv => 1,
+    default => [],
+    type => $CONF_TYPE_STRINGLIST,
     code => sub {
       my ($self, $key, $value, $line) = @_;
+
+      $value =~ s/^\s+//;
       if ($value eq '') {
 	return $MISSING_REQUIRED_VALUE;
       }
-      elsif (!$self->{parser}->is_delimited_regexp_valid("redirector_pattern", $value)) {
+
+      my ($rec, $err) = compile_regexp($value, 1);
+      if (!$rec) {
+        dbg("config: invalid redirector_pattern '$value': $err");
 	return $INVALID_VALUE;
       }
 
-      # convert to qr// while including modifiers
-      local ($1,$2,$3);
-      $value =~ /^m?(\W)(.*)(?:\1|>|}|\)|\])(.*?)$/;
-      my $pattern = $2;
-      $pattern = "(?".$3.")".$pattern if $3;
-      $pattern = qr/$pattern/;
-
-      push @{$self->{main}->{conf}->{redirector_patterns}}, $pattern;
-      # dbg("config: adding redirector regex: " . $value);
+      push @{$self->{main}->{conf}->{redirector_patterns}}, $rec;
     }
   });
 
@@ -2983,11 +2981,9 @@ why the IP is listed, typically a hyperl
 Create a sub-test for 'set'.  If you want to look up a multi-meaning zone
 like relays.osirusoft.com, you can then query the results from that zone
 using the zone ID from the original query.  The sub-test may either be an
-IPv4 dotted address for RBLs that return multiple A records or a
+IPv4 dotted address for RBLs that return multiple A records, or a
 non-negative decimal number to specify a bitmask for RBLs that return a
-single A record containing a bitmask of results, a SenderBase test
-beginning with "sb:", or (if none of the preceding options seem to fit) a
-regular expression.
+single A record containing a bitmask of results, or a regular expression.
 
 Note: the set name must be exactly the same for as the main query rule,
 including selections like '-notfirsthop' appearing at the end of the set
@@ -3001,11 +2997,17 @@ name.
     is_priv => 1,
     code => sub {
       my ($self, $key, $value, $line) = @_;
-      local ($1,$2);
-      if ($value =~ /^(\S+)\s+(?:rbl)?eval:(.*)$/) {
-        my ($rulename, $fn) = ($1, $2);
-        dbg("config: header eval rule name is $rulename function is $fn");
-        if ($fn !~ /^\w+(\(.*\))?$/) {
+      local($1);
+      if ($value !~ s/^(\S+)\s+//) {
+        return $INVALID_VALUE;
+      }
+      my $rulename = $1;
+      if ($value eq '') {
+        return $MISSING_REQUIRED_VALUE;
+      }
+      if ($value =~ /^(?:rbl)?eval:(.*)$/) {
+        my $fn = $1;
+        if ($fn !~ /^\w+\(.*\)$/) {
           return $INVALID_VALUE;
         }
         if ($fn =~ /^check_(?:rbl|dns)/) {
@@ -3015,25 +3017,9 @@ name.
           $self->{parser}->add_test ($rulename, $fn, $TYPE_HEAD_EVALS);
         }
       }
-      elsif ($value =~ /^(\S+)\s+exists:(.*)$/) {
-        my ($rulename, $header_name) = ($1, $2);
-        # RFC 5322 section 3.6.8, ftext printable US-ASCII ch not including ":"
-        if ($header_name !~ /\S/) {
-	  return $MISSING_REQUIRED_VALUE;
-      # } elsif ($header_name !~ /^([!-9;-\176]+)$/) {
-        } elsif ($header_name !~ /^([^: \t]+)$/) {  # be generous
-          return $INVALID_HEADER_FIELD_NAME;
-        }
-        $self->{parser}->add_test ($rulename, "defined($header_name)",
-                                   $TYPE_HEAD_TESTS);
-        $self->{descriptions}->{$rulename} = "Found a $header_name header";
-      }
       else {
-	my @values = split(/\s+/, $value, 2);
-	if (@values != 2) {
-	  return $MISSING_REQUIRED_VALUE;
-	}
-        $self->{parser}->add_test (@values, $TYPE_HEAD_TESTS);
+        # Detailed parsing in add_test
+        $self->{parser}->add_test ($rulename, $value, $TYPE_HEAD_TESTS);
       }
     }
   });
@@ -3063,22 +3049,22 @@ Define a body eval test.  See above.
     is_priv => 1,
     code => sub {
       my ($self, $key, $value, $line) = @_;
-      local ($1,$2);
-      if ($value =~ /^(\S+)\s+eval:(.*)$/) {
-        my ($rulename, $fn) = ($1, $2);
-        dbg("config: body eval rule name is $rulename function is $fn");
-
-        if ($fn !~ /^\w+(\(.*\))?$/) {
+      local($1);
+      if ($value !~ s/^(\S+)\s+//) {
+        return $INVALID_VALUE;
+      }
+      my $rulename = $1;
+      if ($value eq '') {
+        return $MISSING_REQUIRED_VALUE;
+      }
+      if ($value =~ /^eval:(.*)$/) {
+        my $fn = $1;
+        if ($fn !~ /^\w+\(.*\)$/) {
           return $INVALID_VALUE;
         }
         $self->{parser}->add_test ($rulename, $fn, $TYPE_BODY_EVALS);
-      }
-      else {
-	my @values = split(/\s+/, $value, 2);
-	if (@values != 2) {
-	  return $MISSING_REQUIRED_VALUE;
-	}
-        $self->{parser}->add_test (@values, $TYPE_BODY_TESTS);
+      } else {
+        $self->{parser}->add_test ($rulename, $value, $TYPE_BODY_TESTS);
       }
     }
   });
@@ -3107,11 +3093,15 @@ points of the URI, and will also be fast
     is_priv => 1,
     code => sub {
       my ($self, $key, $value, $line) = @_;
-      my @values = split(/\s+/, $value, 2);
-      if (@values != 2) {
+      local($1);
+      if ($value !~ s/^(\S+)\s+//) {
+        return $INVALID_VALUE;
+      }
+      my $rulename = $1;
+      if ($value eq '') {
         return $MISSING_REQUIRED_VALUE;
       }
-      $self->{parser}->add_test (@values, $TYPE_URI_TESTS);
+      $self->{parser}->add_test ($rulename, $value, $TYPE_URI_TESTS);
     }
   });
 
@@ -3138,15 +3128,22 @@ Define a raw-body eval test.  See above.
     is_priv => 1,
     code => sub {
       my ($self, $key, $value, $line) = @_;
-      local ($1,$2);
-      if ($value =~ /^(\S+)\s+eval:(.*)$/) {
-        $self->{parser}->add_test ($1, $2, $TYPE_RAWBODY_EVALS);
+      local($1);
+      if ($value !~ s/^(\S+)\s+//) {
+        return $INVALID_VALUE;
+      }
+      my $rulename = $1;
+      if ($value eq '') {
+        return $MISSING_REQUIRED_VALUE;
+      }
+      if ($value =~ /^eval:(.*)$/) {
+        my $fn = $1;
+        if ($fn !~ /^\w+\(.*\)$/) {
+          return $INVALID_VALUE;
+        }
+        $self->{parser}->add_test ($rulename, $fn, $TYPE_RAWBODY_EVALS);
       } else {
-	my @values = split(/\s+/, $value, 2);
-	if (@values != 2) {
-	  return $MISSING_REQUIRED_VALUE;
-	}
-        $self->{parser}->add_test (@values, $TYPE_RAWBODY_TESTS);
+        $self->{parser}->add_test ($rulename, $value, $TYPE_RAWBODY_TESTS);
       }
     }
   });
@@ -3172,15 +3169,22 @@ Define a full message eval test.  See ab
     is_priv => 1,
     code => sub {
       my ($self, $key, $value, $line) = @_;
-      local ($1,$2);
-      if ($value =~ /^(\S+)\s+eval:(.*)$/) {
-        $self->{parser}->add_test ($1, $2, $TYPE_FULL_EVALS);
+      local($1);
+      if ($value !~ s/^(\S+)\s+//) {
+        return $INVALID_VALUE;
+      }
+      my $rulename = $1;
+      if ($value eq '') {
+        return $MISSING_REQUIRED_VALUE;
+      }
+      if ($value =~ /^eval:(.*)$/) {
+        my $fn = $1;
+        if ($fn !~ /^\w+\(.*\)$/) {
+          return $INVALID_VALUE;
+        }
+        $self->{parser}->add_test ($rulename, $fn, $TYPE_FULL_EVALS);
       } else {
-	my @values = split(/\s+/, $value, 2);
-	if (@values != 2) {
-	  return $MISSING_REQUIRED_VALUE;
-	}
-        $self->{parser}->add_test (@values, $TYPE_FULL_TESTS);
+        $self->{parser}->add_test ($rulename, $value, $TYPE_FULL_TESTS);
       }
     }
   });
@@ -3225,15 +3229,19 @@ ignore these for scoring.
     is_priv => 1,
     code => sub {
       my ($self, $key, $value, $line) = @_;
-      my @values = split(/\s+/, $value, 2);
-      if (@values != 2) {
+      local($1);
+      if ($value !~ s/^(\S+)\s+//) {
+        return $INVALID_VALUE;
+      }
+      my $rulename = $1;
+      if ($value eq '') {
         return $MISSING_REQUIRED_VALUE;
       }
-      if ($values[1] =~ /\*\s*\*/) {
+      if ($value =~ /\*\s*\*/) {
 	info("config: found invalid '**' or '* *' operator in meta command");
         return $INVALID_VALUE;
       }
-      $self->{parser}->add_test (@values, $TYPE_META_TESTS);
+      $self->{parser}->add_test ($rulename, $value, $TYPE_META_TESTS);
     }
   });
 
@@ -3933,12 +3941,15 @@ from SQL or LDAP, instead of passing the
     type => $CONF_TYPE_BOOL,
   });
 
-=item loadplugin PluginModuleName [/path/module.pm]
+=item loadplugin [Mail::SpamAssassin::Plugin::]ModuleName [/path/module.pm]
 
-Load a SpamAssassin plugin module.  The C<PluginModuleName> is the perl module
+Load a SpamAssassin plugin module.  The C<ModuleName> is the perl module
 name, used to create the plugin object itself.
 
-C</path/to/module.pm> is the file to load, containing the module's perl code;
+Module naming is strict, name must only contain alphanumeric characters or
+underscores.  File must have .pm extension.
+
+C</path/module.pm> is the file to load, containing the module's perl code;
 if it's specified as a relative path, it's considered to be relative to the
 current configuration file.  If it is omitted, the module will be loaded
 using perl's search path (the C<@INC> array).
@@ -3957,20 +3968,16 @@ See C<Mail::SpamAssassin::Plugin> for mo
       }
       my ($package, $path);
       local ($1,$2);
-      if ($value =~ /^(\S+)\s+(\S+)$/) {
+      if ($value =~ /^((?:\w+::){0,10}\w+)(?:\s+(\S+\.pm))?$/i) {
         ($package, $path) = ($1, $2);
-      } elsif ($value =~ /^\S+$/) {
-        ($package, $path) = ($value, undef);
       } else {
 	return $INVALID_VALUE;
       }
-      # is blindly untainting safe?  it is no worse than before
-      $_ = untaint_var($_)  for ($package,$path);
       $self->load_plugin ($package, $path);
     }
   });
 
-=item tryplugin PluginModuleName [/path/module.pm]
+=item tryplugin ModuleName [/path/module.pm]
 
 Same as C<loadplugin>, but silently ignored if the .pm file cannot be found in
 the filesystem.
@@ -3987,15 +3994,11 @@ the filesystem.
       }
       my ($package, $path);
       local ($1,$2);
-      if ($value =~ /^(\S+)\s+(\S+)$/) {
+      if ($value =~ /^((?:\w+::){0,10}\w+)(?:\s+(\S+\.pm))?$/i) {
         ($package, $path) = ($1, $2);
-      } elsif ($value =~ /^\S+$/) {
-        ($package, $path) = ($value, undef);
       } else {
 	return $INVALID_VALUE;
       }
-      # is blindly untainting safe?  it is no worse than before
-      $_ = untaint_var($_)  for ($package,$path);
       $self->load_plugin ($package, $path, 1);
     }
   });
@@ -4777,12 +4780,7 @@ sub maybe_body_only {
 
 sub load_plugin {
   my ($self, $package, $path, $silent) = @_;
-  if ($path) {
-    $path = $self->{parser}->fix_path_relative_to_current_file($path);
-  }
-  # it wouldn't hurt to do some checking on validity of $package
-  # and $path before untainting them
-  $self->{main}->{plugins}->load_plugin(untaint_var($package), $path, $silent);
+  $self->{main}->{plugins}->load_plugin($package, $path, $silent);
 }
 
 sub load_plugin_succeeded {
@@ -4963,6 +4961,7 @@ sub feature_bug6558_free { 1 }
 sub feature_edns { 1 }  # supports 'dns_options edns' config option
 sub feature_dns_query_restriction { 1 }  # supported config option
 sub feature_registryboundaries { 1 } # replaces deprecated registrarboundaries
+sub feature_compile_regexp { 1 } # Util::compile_regexp
 sub perl_min_version_5010000 { return $] >= 5.010000 }  # perl version check ("perl_version" not neatly backwards-compatible)
 
 ###########################################################################

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm Fri Dec 14 21:05:01 2018
@@ -137,7 +137,7 @@ package Mail::SpamAssassin::Conf::Parser
 use Mail::SpamAssassin::Conf;
 use Mail::SpamAssassin::Constants qw(:sa);
 use Mail::SpamAssassin::Logger;
-use Mail::SpamAssassin::Util qw(untaint_var);
+use Mail::SpamAssassin::Util qw(untaint_var compile_regexp);
 use Mail::SpamAssassin::NetSet;
 
 use strict;
@@ -147,6 +147,9 @@ use re 'taint';
 
 our @ISA = qw();
 
+my $RULENAME_RE = RULENAME_RE;
+my $ARITH_EXPRESSION_LEXER = ARITH_EXPRESSION_LEXER;
+
 ###########################################################################
 
 sub new {
@@ -506,13 +509,12 @@ sub handle_conditional {
   my ($self, $key, $value, $if_stack_ref, $skip_parsing_ref) = @_;
   my $conf = $self->{conf};
 
-  my $lexer = ARITH_EXPRESSION_LEXER;
-  my @tokens = ($value =~ m/($lexer)/og);
+  my @tokens = ($value =~ /($ARITH_EXPRESSION_LEXER)/og);
 
   my $eval = '';
   my $bad = 0;
   foreach my $token (@tokens) {
-    if ($token =~ /^(?:\W+|[+-]?\d+(?:\.\d+)?)$/) {
+    if ($token =~ /^(?:\W{1,5}|[+-]?\d+(?:\.\d+)?)$/) {
       # using tainted subr. argument may taint the whole expression, avoid
       my $u = untaint_var($token);
       $eval .= $u . " ";
@@ -536,17 +538,25 @@ sub handle_conditional {
       $eval .= $]." ";
     }
     elsif ($token =~ /^\w[\w\:]+$/) { # class name
-      my $u = untaint_var($token);
-      $eval .= '"' . $u . '" ';
+      # Strictly controlled form:
+      if ($token =~ /^(?:\w+::){0,10}\w+$/) {
+        my $u = untaint_var($token);
+        $eval .= "'$u'";
+      } else {
+        warn "config: illegal name '$token' in 'if $value'\n";
+        $bad++;
+        last;
+      }
     }
     else {
       $bad++;
       warn "config: unparseable chars in 'if $value': '$token'\n";
+      last;
     }
   }
 
   if ($bad) {
-    $self->lint_warn("bad 'if' line, in \"$self->{currentfile}\"", undef);
+    $self->lint_warn("config: bad 'if' line, in \"$self->{currentfile}\"", undef);
     return -1;
   }
 
@@ -572,7 +582,7 @@ sub cond_clause_plugin_loaded {
 
 sub cond_clause_can {
   my ($self, $method) = @_;
-  if ($self->{currentfile} =~ q!/user_prefs$! ) {
+  if ($self->{currentfile} =~ q!\buser_prefs$! ) {
     warn "config: 'if can $method' not available in user_prefs";
     return 0
   }
@@ -589,7 +599,7 @@ sub cond_clause_can_or_has {
 
   local($1,$2);
   if (!defined $method) {
-    $self->lint_warn("bad 'if' line, no argument to $fn_name(), ".
+    $self->lint_warn("config: bad 'if' line, no argument to $fn_name(), ".
                      "in \"$self->{currentfile}\"", undef);
   } elsif ($method =~ /^(.*)::([^:]+)$/) {
     no strict "refs";
@@ -597,7 +607,7 @@ sub cond_clause_can_or_has {
     return 1  if $module->can($meth) &&
                  ( $fn_name eq 'has' || &{$method}() );
   } else {
-    $self->lint_warn("bad 'if' line, cannot find '::' in $fn_name($method), ".
+    $self->lint_warn("config: bad 'if' line, cannot find '::' in $fn_name($method), ".
                      "in \"$self->{currentfile}\"", undef);
   }
   return;
@@ -876,39 +886,40 @@ sub finish_parsing {
 
     # eval type handling
     if (($type & 1) == 1) {
-      if (my ($function, $args) = ($text =~ m/(.*?)\s*\((.*?)\)\s*$/)) {
-        my ($packed, $argsref) =
-                $self->pack_eval_method($function, $args, $name, $text);
-
-        if (!$packed) {
-          # we've already warned about this
+      if (my ($function, $args) = ($text =~ /^(\w+)\((.*?)\)$/)) {
+        my $argsref = $self->pack_eval_args($args);
+        if (!defined $argsref) {
+          $self->lint_warn("syntax error for eval function $name: $text");
+          next;
         }
         elsif ($type == $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS) {
-          $conf->{body_evals}->{$priority}->{$name} = $packed;
+          $conf->{body_evals}->{$priority}->{$name} = [ $function, [@$argsref] ];
         }
         elsif ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS) {
-          $conf->{head_evals}->{$priority}->{$name} = $packed;
+          $conf->{head_evals}->{$priority}->{$name} = [ $function, [@$argsref] ];
         }
         elsif ($type == $Mail::SpamAssassin::Conf::TYPE_RBL_EVALS) {
           # We don't do priorities for $Mail::SpamAssassin::Conf::TYPE_RBL_EVALS
           # we also use the arrayref instead of the packed string
-          $conf->{rbl_evals}->{$name} = [ $function, @$argsref ];
+          $conf->{rbl_evals}->{$name} = [ $function, [@$argsref] ];
         }
         elsif ($type == $Mail::SpamAssassin::Conf::TYPE_RAWBODY_EVALS) {
-          $conf->{rawbody_evals}->{$priority}->{$name} = $packed;
+          $conf->{rawbody_evals}->{$priority}->{$name} = [ $function, [@$argsref] ];
         }
         elsif ($type == $Mail::SpamAssassin::Conf::TYPE_FULL_EVALS) {
-          $conf->{full_evals}->{$priority}->{$name} = $packed;
+          $conf->{full_evals}->{$priority}->{$name} = [ $function, [@$argsref] ];
         }
         #elsif ($type == $Mail::SpamAssassin::Conf::TYPE_URI_EVALS) {
-        #  $conf->{uri_evals}->{$priority}->{$name} = $packed;
+        #  $conf->{uri_evals}->{$priority}->{$name} = [ $function, [@$argsref] ];
         #}
         else {
           $self->lint_warn("unknown type $type for $name: $text", $name);
+          next;
         }
       }
       else {
         $self->lint_warn("syntax error for eval function $name: $text", $name);
+        next;
       }
     }
     # non-eval tests
@@ -935,6 +946,7 @@ sub finish_parsing {
       }
       else {
         $self->lint_warn("unknown type $type for $name: $text", $name);
+        next;
       }
     }
   }
@@ -986,8 +998,7 @@ sub _meta_deps_recurse {
   return unless $rule;
 
   # Lex the rule into tokens using a rather simple RE method ...
-  my $lexer = ARITH_EXPRESSION_LEXER;
-  my @tokens = ($rule =~ m/$lexer/og);
+  my @tokens = ($rule =~ /($ARITH_EXPRESSION_LEXER)/og);
 
   # Go through each token in the meta rule
   my $conf_tests = $conf->{tests};
@@ -1086,40 +1097,36 @@ sub find_dup_rules {
   }
 }
 
+# Deprecated function
 sub pack_eval_method {
-  my ($self, $function, $args, $name, $text) = @_;
+  warn "deprecated function pack_eval_method() used\n";
+  return ('',undef);
+}
 
+sub pack_eval_args {
+  my ($self, $args) = @_;
+
+  return [] if $args =~ /^\s+$/;
+
+  # bug 4419: Parse quoted strings, unquoted alphanumerics/floats,
+  # unquoted IPv4 and IPv6 addresses, and unquoted common domain names.
+  # s// is used so that we can determine whether or not we successfully
+  # parsed ALL arguments.
   my @args;
-  if (defined $args) {
-    # bug 4419: Parse quoted strings, unquoted alphanumerics/floats,
-    # unquoted IPv4 and IPv6 addresses, and unquoted common domain names.
-    # s// is used so that we can determine whether or not we successfully
-    # parsed ALL arguments.
-    local($1,$2,$3);
-    while ($args =~ s/^\s* (?: (['"]) (.*?) \1 | ( [\d\.:A-Za-z-]+? ) )
-                       \s* (?: , \s* | $ )//x) {
-      if (defined $2) {
-        push @args, $2;
-      }
-      else {
-        push @args, $3;
-      }
-    }
+  local($1,$2,$3);
+  while ($args =~ s/^\s* (?: (['"]) (.*?) \1 | ( [\d\.:A-Za-z-]+? ) )
+                     \s* (?: , \s* | $ )//x) {
+    # DO NOT UNTAINT THESE ARGS
+    # The eval function that handles these should do that as necessary,
+    # we have no idea what acceptable arguments look like here.
+    push @args, defined $2 ? $2 : $3;
   }
 
   if ($args ne '') {
-    $self->lint_warn("syntax error (unparsable argument: $args) for eval function: $name: $text", $name);
-    return;
+    return undef;
   }
 
-  my $argstr = $function;
-  $argstr =~ s/\s+//gs;
-
-  if (@args > 0) {
-    $argstr .= ',' . join(', ',
-              map { my $s = $_; $s =~ s/\#/[HASH]/gs; 'q#' . $s . '#' } @args);
-  }
-  return ($argstr, \@args);
+  return \@args;
 }
 
 ###########################################################################
@@ -1181,7 +1188,7 @@ sub add_test {
   my $conf = $self->{conf};
 
   # Don't allow invalid names ...
-  if ($name !~ /^[_[:alpha:]]\w*$/) {
+  if ($name !~ /^${RULENAME_RE}$/) {
     $self->lint_warn("config: error: rule '$name' has invalid characters ".
 	   "(not Alphanumeric + Underscore + starting with a non-digit)\n", $name);
     return;
@@ -1204,29 +1211,68 @@ sub add_test {
     }
   }
 
+  # parameter to compile_regexp()
+  my $ignore_amre =
+    $self->{conf}->{lint_rules} ||
+    $self->{conf}->{ignore_always_matching_regexps};
+
   # all of these rule types are regexps
   if ($type == $Mail::SpamAssassin::Conf::TYPE_BODY_TESTS ||
       $type == $Mail::SpamAssassin::Conf::TYPE_FULL_TESTS ||
       $type == $Mail::SpamAssassin::Conf::TYPE_RAWBODY_TESTS ||
       $type == $Mail::SpamAssassin::Conf::TYPE_URI_TESTS)
   {
-    return unless $self->is_delimited_regexp_valid($name, $text);
+    my ($rec, $err) = compile_regexp($text, 1, $ignore_amre);
+    if (!$rec) {
+      $self->lint_warn("config: invalid regexp for $name '$text': $err", $name);
+      return;
+    }
+    $conf->{test_qrs}->{$name} = $rec;
   }
-  if ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS)
+  elsif ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS)
   {
+    local($1,$2,$3);
     # RFC 5322 section 3.6.8, ftext printable US-ASCII chars not including ":"
     # no re "strict";  # since perl 5.21.8: Ranges of ASCII printables...
-    if ($text =~ /^!?defined\([!-9;-\176]+\)$/) {
-      # fine, implements 'exists:'
+    if ($text =~ /^exists:(.*)/) {
+      my $hdr = $1;
+      # never evaled, so can be quite generous with the name
+      # check :addr etc header options
+      if ($hdr !~ /^[^:\s]+:?$/) {
+        $self->lint_warn("config: invalid head test $name header: $hdr");
+        return;
+      }
+      $hdr =~ s/:$//;
+      $conf->{test_opt_header}->{$name} = $hdr;
+      $conf->{test_opt_exists}->{$name} = 1;
     } else {
-      my ($pat) = ($text =~ /^\s*\S+\s*(?:\=|\!)\~\s*(\S.*?\S)\s*$/);
-      if ($pat) { $pat =~ s/\s+\[if-unset:\s+(.+)\]\s*$//; }
-      return unless $self->is_delimited_regexp_valid($name, $pat);
+      if ($text !~ /^([^:\s]+(?:\:|(?:\:[a-z]+){1,2})?)\s*([=!]~)\s*(.+)$/) {
+        $self->lint_warn("config: invalid head test $name: $text");
+        return;
+      }
+      my ($hdr, $op, $pat) = ($1, $2, $3);
+      $hdr =~ s/:$//;
+      if ($pat =~ s/\s+\[if-unset:\s+(.+)\]$//) {
+        $conf->{test_opt_unset}->{$name} = $1;
+      }
+      my ($rec, $err) = compile_regexp($pat, 1, $ignore_amre);
+      if (!$rec) {
+        $self->lint_warn("config: invalid regexp for $name '$pat': $err", $name);
+        return;
+      }
+      $conf->{test_qrs}->{$name} = $rec;
+      $conf->{test_opt_header}->{$name} = $hdr;
+      $conf->{test_opt_neg}->{$name} = 1 if $op eq '!~';
     }
   }
   elsif ($type == $Mail::SpamAssassin::Conf::TYPE_META_TESTS)
   {
-    return unless $self->is_meta_valid($name, $text);
+    if ($self->is_meta_valid($name, $text)) {
+      # Untaint now once and not repeatedly later
+      $text = untaint_var($text);
+    } else {
+      return;
+    }
   }
 
   $conf->{tests}->{$name} = $text;
@@ -1291,39 +1337,36 @@ sub is_meta_valid {
 
   # $meta is a degenerate translation of the rule, replacing all variables (i.e. rule names) with 0. 
   my $meta = '';
-  $rule = untaint_var($rule);  # must be careful below
-  # Bug #7557 code injection
-  if ( $rule =~ /\S(::|->)\S/ )  {
-    warn("is_meta_valid: Bogus rule $name: $rule") ;
+
+  # Paranoid check (Bug #7557)
+  if ($rule =~ /(?:\:\:|->)/)  {
+    warn("config: invalid meta $name rule: $rule") ;
     return 0;
   }
 
   # Lex the rule into tokens using a rather simple RE method ...
-  my $lexer = ARITH_EXPRESSION_LEXER;
-  my @tokens = ($rule =~ m/$lexer/og);
-  if (length($name) == 1) {
-    for (@tokens) {
-      print "$name $_\n "  or die "Error writing token: $!";
-    }
-  }
+  my @tokens = ($rule =~ /($ARITH_EXPRESSION_LEXER)/og);
+
   # Go through each token in the meta rule
   foreach my $token (@tokens) {
     # If the token is a syntactically legal rule name, make it zero
-    if ($token =~ /^[_[:alpha:]]\w+\z/s) {
+    if ($token =~ /^${RULENAME_RE}\z/s) {
       $meta .= "0 ";
     }
     # if it is a (decimal) number or a string of 1 or 2 punctuation
     # characters (i.e. operators) tack it onto the degenerate rule
-    elsif ( $token =~ /^(\d+(?:\.\d+)?|[[:punct:]]{1,2})\z/s ) {
+    elsif ($token =~ /^(\d+(?:\.\d+)?|[[:punct:]]{1,2})\z/s) {
       $meta .= "$token ";
     }
-    # WTF is it? Just warn, for now. Bug #7557
+    # Skip anything unknown (Bug #7557)
     else {
-      $self->lint_warn("config: Strange rule $name token: $token", $name);
-      $meta .= "$token ";
+      $self->lint_warn("config: invalid meta $name token: $token", $name);
+      return 0;
     }
   }
-  my $evalstr = 'my $x = ' . $meta . '; 1;';
+
+  $meta = untaint_var($meta); # was carefully checked
+  my $evalstr = 'my $x = '.$meta.'; 1;';
   if (eval $evalstr) {
     return 1;
   }
@@ -1334,94 +1377,21 @@ sub is_meta_valid {
   return 0;
 }
 
+# Deprecated functions, leave just in case..
 sub is_delimited_regexp_valid {
-  my ($self, $name, $re) = @_;
-
-  if (!$re || $re !~ /^\s*m?(\W).*(?:\1|>|}|\)|\])[a-z]*\s*$/) {
-    $re ||= '';
-    $self->lint_warn("config: invalid regexp for rule $name: $re: missing or invalid delimiters\n", $name);
-    return 0;
-  }
-  return $self->is_regexp_valid($name, $re);
+  my ($self, $rule, $re) = @_;
+  warn "deprecated is_delimited_regexp_valid() called, use compile_regexp()\n";
+  my ($rec, $err) = compile_regexp($re, 1, 1);
+  return $rec;
 }
-
 sub is_regexp_valid {
-  my ($self, $name, $re) = @_;
-
-  # OK, try to remove any normal perl-style regexp delimiters at
-  # the start and end, and modifiers at the end if present,
-  # so we can validate those too.
-  my $origre = $re;
-  my $safere = $re;
-  my $mods = '';
-  local ($1,$2);
-  if ($re =~ s/^m\{//) {
-    $re =~ s/\}([a-z]*)\z//; $mods = $1;
-  }
-  elsif ($re =~ s/^m\(//) {
-    $re =~ s/\)([a-z]*)\z//; $mods = $1;
-  }
-  elsif ($re =~ s/^m<//) {
-    $re =~ s/>([a-z]*)\z//; $mods = $1;
-  }
-  elsif ($re =~ s/^m(\W)//) {
-    $re =~ s/\Q$1\E([a-z]*)\z//; $mods = $1;
-  }
-  elsif ($re =~ s{^/(.*)/([a-z]*)\z}{$1}) {
-    $mods = $2;
-  }
-  else {
-    $safere = "m#".$re."#";
-  }
-
-  if ($self->{conf}->{lint_rules} ||
-      $self->{conf}->{ignore_always_matching_regexps})
-  {
-    my $msg = $self->is_always_matching_regexp($name, $re);
-
-    if (defined $msg) {
-      if ($self->{conf}->{lint_rules}) {
-        $self->lint_warn($msg, $name);
-      } else {
-        warn $msg;
-        return 0;
-      }
-    }
-  }
-
-  # now prepend the modifiers, in order to check if they're valid
-  if ($mods) {
-    $re = "(?" . $mods . ")" . $re;
-  }
-
-  # note: this MUST use m/...${re}.../ in some form or another, ie.
-  # interpolation of the $re variable into a code regexp, in order to test the
-  # security of the regexp.  simply using ("" =~ $re) will NOT do that, and
-  # will therefore open a hole!
-  { # no re "strict";  # since perl 5.21.8: Ranges of ASCII printables...
-    if (eval { ("" =~ m{$re}); 1; }) { return 1 }
-  }
-  my $err = $@ ne '' ? $@ : "errno=$!";  chomp $err;
-  $err =~ s/ at .*? line \d.*$//;
-  $self->lint_warn("config: invalid regexp for rule $name: $origre: $err\n", $name);
-  return 0;
+  my ($self, $rule, $re) = @_;
+  warn "deprecated is_regexp_valid() called, use compile_regexp()\n";
+  my ($rec, $err) = compile_regexp($re, 1, 1);
+  return $rec;
 }
-
-# check the pattern for some basic errors, and warn if found
 sub is_always_matching_regexp {
-  my ($self, $name, $re) = @_;
-
-  if ($re =~ /(?<!\\)\|\|/) {
-    return "config: regexp for rule $name always matches due to '||'";
-  }
-  elsif ($re =~ /^\|/) {
-    return "config: regexp for rule $name always matches due to " .
-      "pattern starting with '|'";
-  }
-  elsif ($re =~ /\|(?<!\\\|)$/) {
-    return "config: regexp for rule $name always matches due to " .
-      "pattern ending with '|'";
-  }
+  warn "deprecated is_always_matching_regexp() called\n";
   return;
 }
 

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm Fri Dec 14 21:05:01 2018
@@ -32,7 +32,7 @@ our(@BAYES_VARS, @IP_VARS, @SA_VARS, %EX
 
 # NOTE: Unless you need these to be available at BEGIN time, you're better with this out of a BEGIN block with a simple our statement.
 BEGIN { 
-    @IP_VARS = qw(
+  @IP_VARS = qw(
 	IP_IN_RESERVED_RANGE IP_PRIVATE LOCALHOST IPV4_ADDRESS IP_ADDRESS
   );
   @BAYES_VARS = qw(
@@ -43,7 +43,7 @@ BEGIN {
 	HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
 	MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
 	MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
-	CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH
+	CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH RULENAME_RE
   );
 
   %EXPORT_TAGS = (
@@ -402,4 +402,7 @@ use constant CHARSETS_LIKELY_TO_FP_AS_CA
 	  koi|jp|jis|euc|gb|big5|isoir|cp1251|windows-1251|georgianps|pt154|tis
 	)[-_a-z0-9]*}ix;
 
+# Allowed rulename format
+use constant RULENAME_RE => qr([_a-zA-Z][_a-zA-Z0-9]{0,127});
+
 1;

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Dns.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Dns.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Dns.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Dns.pm Fri Dec 14 21:05:01 2018
@@ -139,6 +139,12 @@ sub do_rbl_lookup {
 # TODO: these are constant so they should only be added once at startup
 sub register_rbl_subtest {
   my ($self, $rule, $set, $subtest) = @_;
+
+  if ($subtest =~ /^sb:/) {
+    warn("dns: ignored $rule, SenderBase rules are deprecated\n");
+    return 0;
+  }
+
   $self->{dnspost}->{$set}->{$subtest} = $rule;
 }
 
@@ -307,30 +313,6 @@ sub process_dnsbl_set {
       # test for exact equality, not a regexp (an IPv4 address)
       $self->dnsbl_hit($rule, $question, $answer)  if $subtest eq $rdatastr;
     }
-    # senderbase
-    elsif ($subtest =~ s/^sb://) {
-      # SB rules are not available to users
-      if ($self->{conf}->{user_defined_rules}->{$rule}) {
-        dbg("dns: skipping rule '$rule': not supported when user-defined");
-        next;
-      }
-
-      $rdatastr =~ s/^\d+-//;
-      my %sb = ($rdatastr =~ m/(?:^|\|)(\d+)=([^|]+)/g);
-      my $undef = 0;
-      while ($subtest =~ m/\bS(\d+)\b/g) {
-	if (!defined $sb{$1}) {
-	  $undef = 1;
-	  last;
-	}
-	$subtest =~ s/\bS(\d+)\b/\$sb{$1}/;
-      }
-
-      # untaint. (bug 3325)
-      $subtest = untaint_var($subtest);
-
-      $self->got_hit($rule, "SenderBase: ", ruletype => "dnsbl") if !$undef && eval $subtest;
-    }
     # bitmask
     elsif ($subtest =~ /^\d+$/) {
       # Bug 6803: response should be within 127.0.0.0/8, ignore otherwise

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Logger.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Logger.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Logger.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Logger.pm Fri Dec 14 21:05:01 2018
@@ -265,6 +265,8 @@ sub add {
   my $name = lc($params{method});
   my $class = ucfirst($name);
 
+  return 0 if $class !~ /^\w+$/; # be paranoid
+
   eval 'use Mail::SpamAssassin::Logger::'.$class.'; 1'
   or do {
     my $eval_stat = $@ ne '' ? $@ : "errno=$!";  chomp $eval_stat;

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Message.pm Fri Dec 14 21:05:01 2018
@@ -188,14 +188,34 @@ sub new {
     @message = split(/^/m, $message, -1);
   }
 
-  # Pull off mbox and mbx separators
-  # also deal with null messages
+  # Deal with null message
   if (!@message) {
     # bug 4884:
     # if we get here, it means that the input was null, so fake the message
     # content as a single newline...
     @message = ("\n");
-  } elsif ($message[0] =~ /^From\s+(?!:)/) {
+  }
+
+  # Bug 7648:
+  # Make sure the message is tainted. When linting, @testmsg is not, so this
+  # handles that. Perhaps 3rd party tools could call this with untainted
+  # messages? Tainting the message is important because it prevents certain
+  # exploits later.
+  if (Mail::SpamAssassin::Util::am_running_in_taint_mode() &&
+        grep { !Scalar::Util::tainted($_) } @message) {
+    local($_);
+    # To preserve newlines, no joining and splitting here, process each line
+    # directly as is.
+    foreach (@message) {
+      $_ = Mail::SpamAssassin::Util::taint_var($_);
+    }
+    if (grep { !Scalar::Util::tainted($_) } @message) {
+      die "Mail::SpamAssassin::Message failed to enforce message taintness";
+    }
+  }
+
+  # Pull off mbox and mbx separators
+  if ($message[0] =~ /^From\s+(?!:)/) {
     # careful not to confuse with obsolete syntax which allowed WSP before ':'
     # mbox formated mailbox
     $self->{'mbox_sep'} = shift @message;

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/PerMsgStatus.pm Fri Dec 14 21:05:01 2018
@@ -269,7 +269,6 @@ sub new {
     'master_deadline'   => $msg->{master_deadline},  # dflt inherited from msg
     'deadline_exceeded' => 0,  # time limit exceeded, skipping further tests
   };
-  #$self->{main}->{use_rule_subs} = 1;
 
   dbg("check: pms new, time limit in %.3f s",
       $self->{master_deadline} - time)  if $self->{master_deadline};

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Bayes.pm Fri Dec 14 21:05:01 2018
@@ -1645,8 +1645,14 @@ sub learner_new {
   my ($self) = @_;
 
   my $store;
-  my $module = untaint_var($self->{conf}->{bayes_store_module});
-  $module = 'Mail::SpamAssassin::BayesStore::DBM'  if !$module;
+  my $module = $self->{conf}->{bayes_store_module};
+  if (!$module) {
+    $module = 'Mail::SpamAssassin::BayesStore::DBM';
+  } elsif ($module =~ /^([_A-Za-z0-9:]+)$/) {
+    $module = untaint_var($module);
+  } else {
+    die "bayes: invalid module: $module\n";
+  }
 
   dbg("bayes: learner_new self=%s, bayes_store_module=%s", $self,$module);
   undef $self->{store};  # DESTROYs previous object, if any

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm Fri Dec 14 21:05:01 2018
@@ -29,7 +29,7 @@ package Mail::SpamAssassin::Plugin::Body
 
 use Mail::SpamAssassin::Plugin;
 use Mail::SpamAssassin::Logger;
-use Mail::SpamAssassin::Util qw(untaint_var);
+use Mail::SpamAssassin::Util qw(untaint_var qr_to_string);
 use Mail::SpamAssassin::Util::Progress;
 
 use Errno qw(ENOENT EACCES EEXIST);
@@ -152,8 +152,12 @@ NEXT_RULE:
   foreach my $name (keys %{$rules}) {
     $self->{show_progress} and $progress and $progress->update(++$count);
 
-    my $rule = $rules->{$name};
-    my $cachekey = join "#", $name, $rule;
+    #my $rule = $rules->{$name};
+    my $rule = qr_to_string($conf->{test_qrs}->{$name});
+    if (!defined $rule) {
+      die "zoom: error: regexp for $rule not found\n";
+    }
+    my $cachekey = $name.'#'.$rule;
 
     my $cent = $cached->{rule_bases}->{$cachekey};
     if (defined $cent) {
@@ -177,7 +181,7 @@ NEXT_RULE:
     }
 
     # ignore ReplaceTags rules
-    my $is_a_replacetags_rule = $conf->{rules_to_replace}->{$name};
+    my $is_a_replacetags_rule = $conf->{replace_rules}->{$name};
     my ($minlen, $lossy, @bases);
 
     if (!$is_a_replacetags_rule) {
@@ -407,11 +411,14 @@ sub simplify_and_qr_regexp {
   my $rule = shift;
 
   my $main = $self->{main};
-  $rule = Mail::SpamAssassin::Util::regexp_remove_delimiters($rule);
 
-  # remove the regexp modifiers, keep for later
+
   my $mods = '';
-  while ($rule =~ s/^\(\?([a-z]*)\)//) { $mods .= $1; }
+
+  # remove the regexp modifiers, keep for later
+  while ($rule =~ s/^\(\?([a-z]*)\)//) {
+    $mods .= $1;
+  }
 
   # modifier removal
   while ($rule =~ s/^\(\?-([a-z]*)\)//) {
@@ -685,7 +692,7 @@ sub extract_hints {
     $add_candidate->();
 
     if (!$longestexact) {
-      die "no long-enough string found in $rawrule";
+      die "no long-enough string found in $rawrule\n";
       # all unrolled versions must have a long string, otherwise
       # we cannot reliably match all variants of the rule
     } else {

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm Fri Dec 14 21:05:01 2018
@@ -28,6 +28,9 @@ use Mail::SpamAssassin::Constants qw(:sa
 
 our @ISA = qw(Mail::SpamAssassin::Plugin);
 
+my $ARITH_EXPRESSION_LEXER = ARITH_EXPRESSION_LEXER;
+my $RULENAME_RE = RULENAME_RE;
+
 # methods defined by the compiled ruleset; deleted in finish_tests()
 our @TEMPORARY_METHODS;
 
@@ -263,11 +266,15 @@ sub run_rbl_eval_tests {
 
     %{$pms->{test_log_msgs}} = ();        # clear test state
 
-    my ($function, @args) = @{$test};
+    my $function = $test->[0];
+    if (!exists $pms->{conf}->{eval_plugins}->{$function}) {
+      warn("rules: unknown eval '$function' for $rulename, ignoring RBL eval\n");
+      return 0;
+    }
 
     my $result;
     eval {
-      $result = $pms->$function($rulename, @args);  1;
+      $result = $pms->$function($rulename, @{$test->[1]});  1;
     } or do {
       my $eval_stat = $@ ne '' ? $@ : "errno=$!";  chomp $eval_stat;
       die "rules: $eval_stat\n"  if $eval_stat =~ /__alarm__ignore__/;
@@ -334,6 +341,7 @@ sub run_generic_tests {
     $self->push_evalstr_prefix($pms, '
         # start_rules_plugin_code '.$ruletype.' '.$priority.'
         my $scoresptr = $self->{conf}->{scores};
+        my $qrptr = $self->{conf}->{test_qrs};
     ');
     if (defined $opts{pre_loop_body}) {
       $opts{pre_loop_body}->($self, $pms, $conf, %nopts);
@@ -529,11 +537,9 @@ sub do_meta_tests {
     loop_body => sub
   {
     my ($self, $pms, $conf, $rulename, $rule, %opts) = @_;
-    $rule = untaint_var($rule);  # presumably checked
 
     # Lex the rule into tokens using a rather simple RE method ...
-    my $lexer = ARITH_EXPRESSION_LEXER;
-    my @tokens = ($rule =~ m/$lexer/og);
+    my @tokens = ($rule =~ /$ARITH_EXPRESSION_LEXER/og);
 
     # Set the rule blank to start
     $meta{$rulename} = "";
@@ -544,15 +550,12 @@ sub do_meta_tests {
     # Go through each token in the meta rule
     foreach my $token (@tokens) {
 
-      # Numbers can't be rule names
-      if ($token =~ tr{A-Za-z0-9_}{}c || substr($token,0,1) =~ tr{A-Za-z_}{}c) {
-        $meta{$rulename} .= "$token ";
-      }
-      else {  # token is a rule name
+      # ... rulename?
+      if ($token =~ /^${RULENAME_RE}\z/) {
         # the " || 0" formulation is to avoid "use of uninitialized value"
         # warnings; this is better than adding a 0 to a hash for every
         # rule referred to in a meta...
-        $meta{$rulename} .= "(\$h->{'$token'} || 0) ";
+        $meta{$rulename} .= "(\$h->{'$token'}||0) ";
       
         if (!exists $conf->{scores}->{$token}) {
           dbg("rules: meta test $rulename has undefined dependency '$token'");
@@ -571,6 +574,9 @@ sub do_meta_tests {
         # If the token is another meta rule, add it as a dependency
         push (@{ $rule_deps{$rulename} }, $token)
           if (exists $conf->{meta_tests}->{$opts{priority}}->{$token});
+      } else {
+        # ... number or operator
+        $meta{$rulename} .= "$token ";
       }
     }
   },
@@ -666,66 +672,30 @@ sub do_head_tests {
     args => [ ],
     loop_body => sub
   {
-    my ($self, $pms, $conf, $rulename, $rule, %opts) = @_;
-    my $def;
-    $rule = untaint_var($rule);  # presumably checked
-    my ($hdrname, $op, $op_infix, $pat);
-    if ($rule =~ /^\s* (\S+) \s* ([=!]~) \s* (\S .*? \S) \s*$/x) {
-      ($hdrname, $op, $pat) = ($1,$2,$3);  # e.g.: Subject =~ /patt/
-      $op_infix = 1;
-      if (!defined $pat) {
-        warn "rules: invalid rule: $rulename\n";
-        $pms->{rule_errors}++;
-        next;
-      }
-      if ($pat =~ s/\s+\[if-unset:\s+(.+)\]\s*$//) { $def = $1 }
-    } elsif ($rule =~ /^\s* (\S+) \s* \( \s* (\S+) \s* \) \s*$/x) {
-      # implements exists:name_of_header (and similar function or prefix ops)
-      ($hdrname, $op) = ($2,$1);  # e.g.: !defined(Subject)
+    my ($self, $pms, $conf, $rulename, $pat, %opts) = @_;
+    my ($op, $op_infix);
+    my $hdrname = $conf->{test_opt_header}->{$rulename};
+    if (exists $conf->{test_opt_exists}->{$rulename}) {
       $op_infix = 0;
-    } else {
-      warn "rules: unrecognized rule: $rulename\n";
-      $pms->{rule_errors}++;
-      next;
+      if (exists $conf->{test_opt_neg}->{$rulename}) {
+        $op = '!defined';
+      } else {
+        $op = 'defined';
+      }
+    }
+    else {
+      $op_infix = 1;
+      $op = $conf->{test_opt_neg}->{$rulename} ? '!~' : '=~';
     }
 
+    my $def = $conf->{test_opt_unset}->{$rulename};
     push(@{ $ordered{$hdrname . (!defined $def ? '' : "\t".$def)} },
          $rulename);
 
-    next if ($opts{doing_user_rules} &&
+    return if ($opts{doing_user_rules} &&
             !$self->is_user_rule_sub($rulename.'_head_test'));
 
-    # caller can set this member of the Mail::SpamAssassin object to
-    # override this; useful for profiling rule runtimes, although I think
-    # the HitFreqsRuleTiming.pm plugin is probably better nowadays anyway
-    if ($self->{main}->{use_rule_subs}) {
-      my $matching_string_unavailable = 0;
-      my $expr;
-      if ($op =~ /^!?[A-Za-z_]+$/) {  # function or its negation
-        $expr = $op . '($text)';
-        $matching_string_unavailable = 1;
-      } else {  # infix operator
-        $expr = '$text ' . $op . ' ' . $pat;
-        if ($op eq '=~' || $op eq '!~') {
-          $expr .= 'g';
-        } else {
-          $matching_string_unavailable = 1;
-        }
-      }
-      $self->add_temporary_method ($rulename.'_head_test', '{
-          my($self,$text) = @_;
-          '.$self->hash_line_for_rule($pms, $rulename).'
-	    while ('.$expr.') {
-            $self->got_hit(q{'.$rulename.'}, "", ruletype => "header");
-            '. $self->hit_rule_plugin_code($pms, $rulename, "header", "last",
-                                           $matching_string_unavailable) . '
-            }
-        }');
-    }
-    else {
-      # store for use below
-      $testcode{$rulename} = [$op_infix, $op, $pat];
-    }
+    $testcode{$rulename} = [$op_infix, $op, $pat];
   },
     pre_loop_body => sub
   {
@@ -746,15 +716,6 @@ sub do_head_tests {
                            (!defined($def) ? 'undef' : 'q{'.$def.'}') . ');
       ');
       foreach my $rulename (@{$v}) {
-        if ($self->{main}->{use_rule_subs}) {
-          $self->add_evalstr($pms, '
-            if ($scoresptr->{q{'.$rulename.'}}) {
-              '.$rulename.'_head_test($self, $hval);
-              '.$self->ran_rule_plugin_code($rulename, "header").'
-            }
-          ');
-        }
-        else {
           my $tc_ref = $testcode{$rulename};
           my ($op_infix, $op, $pat);
           ($op_infix, $op, $pat) = @$tc_ref  if defined $tc_ref;
@@ -772,9 +733,7 @@ sub do_head_tests {
             $matching_string_unavailable = 1;
           }
           else {  # infix operator
-            if (! ($op eq '=~' || $op eq '!~') ) { # not a pattern matching op.
-              $matching_string_unavailable = 1;
-            } elsif ( ($conf->{tflags}->{$rulename}||'') =~ /\bmultiple\b/ ) {
+            if (($conf->{tflags}->{$rulename}||'') =~ /\bmultiple\b/) {
               $posline = 'pos $hval = 0; $hits = 0;';
               $ifwhile = 'while';
               $hitdone = 'last';
@@ -783,7 +742,11 @@ sub do_head_tests {
               $max = untaint_var($max);
               $whlimit = ' && $hits++ < '.$max if $max;
             }
-            $expr = '$hval ' . $op . ' ' . $pat . $matchg;
+            if ($matchg) {
+              $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/g';
+            } else {
+              $expr = '$hval '.$op.' $qrptr->{q{'.$rulename.'}}';
+            }
           }
 
           $self->add_evalstr($pms, '
@@ -798,7 +761,6 @@ sub do_head_tests {
             '.$self->ran_rule_plugin_code($rulename, "header").'
           }
           ');
-        }
       }
       $self->pop_evalstr_prefix();
     }
@@ -820,7 +782,6 @@ sub do_body_tests {
     loop_body => sub
   {
     my ($self, $pms, $conf, $rulename, $pat, %opts) = @_;
-    $pat = untaint_var($pat);  # presumably checked
     my $sub = '';
     if (would_log('dbg', 'rules-all') == 2) {
       $sub .= '
@@ -838,7 +799,7 @@ sub do_body_tests {
       body_'.$loopid.': foreach my $l (@_) {
         pos $l = 0;
         '.$self->hash_line_for_rule($pms, $rulename).'
-        while ($l =~ '.$pat.'g'. ($max? ' && $hits++ < '.$max:'') .') { 
+        while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') { 
           $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body"); 
           '. $self->hit_rule_plugin_code($pms, $rulename, 'body',
 					 "last body_".$loopid) . '
@@ -853,7 +814,7 @@ sub do_body_tests {
       $sub .= '
       foreach my $l (@_) {
         '.$self->hash_line_for_rule($pms, $rulename).'
-        if ($l =~ '.$pat.') { 
+        if ($l =~ $qrptr->{q{'.$rulename.'}}) { 
           $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body"); 
           '. $self->hit_rule_plugin_code($pms, $rulename, "body", "last") .'
         }
@@ -861,30 +822,15 @@ sub do_body_tests {
       ';
     }
 
-    if ($self->{main}->{use_rule_subs}) {
-      $self->add_evalstr($pms, '
-        if ($scoresptr->{q{'.$rulename.'}}) {
-          '.$rulename.'_body_test($self,@_); 
-          '.$self->ran_rule_plugin_code($rulename, "body").'
-        }
-      ');
-    }
-    else {
-      $self->add_evalstr($pms, '
-        if ($scoresptr->{q{'.$rulename.'}}) {
-          '.$sub.'
-          '.$self->ran_rule_plugin_code($rulename, "body").'
-        }
-      ');
-    }
+    $self->add_evalstr($pms, '
+      if ($scoresptr->{q{'.$rulename.'}}) {
+        '.$sub.'
+        '.$self->ran_rule_plugin_code($rulename, "body").'
+      }
+    ');
 
-    next if ($opts{doing_user_rules} &&
+    return if ($opts{doing_user_rules} &&
             !$self->is_user_rule_sub($rulename.'_body_test'));
-
-    if ($self->{main}->{use_rule_subs}) {
-      $self->add_temporary_method ($rulename.'_body_test',
-        '{ my $self = shift; '.$sub.' }');
-    }
   }
   );
 }
@@ -902,7 +848,6 @@ sub do_uri_tests {
     loop_body => sub
   {
     my ($self, $pms, $conf, $rulename, $pat, %opts) = @_;
-    $pat = untaint_var($pat);  # presumably checked
     my $sub = '';
     if (would_log('dbg', 'rules-all') == 2) {
       $sub .= '
@@ -918,7 +863,7 @@ sub do_uri_tests {
       uri_'.$loopid.': foreach my $l (@_) {
         pos $l = 0;
         '.$self->hash_line_for_rule($pms, $rulename).'
-        while ($l =~ '.$pat.'g'. ($max? ' && $hits++ < '.$max:'') .') { 
+        while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') { 
            $self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
            '. $self->hit_rule_plugin_code($pms, $rulename, "uri",
 					  "last uri_".$loopid) . '
@@ -930,7 +875,7 @@ sub do_uri_tests {
       $sub .= '
       foreach my $l (@_) {
         '.$self->hash_line_for_rule($pms, $rulename).'
-        if ($l =~ '.$pat.') { 
+        if ($l =~ $qrptr->{q{'.$rulename.'}}) { 
            $self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
            '. $self->hit_rule_plugin_code($pms, $rulename, "uri", "last") .'
         }
@@ -938,30 +883,15 @@ sub do_uri_tests {
       ';
     }
 
-    if ($self->{main}->{use_rule_subs}) {
-      $self->add_evalstr($pms, '
-        if ($scoresptr->{q{'.$rulename.'}}) {
-          '.$rulename.'_uri_test($self, @_);
-          '.$self->ran_rule_plugin_code($rulename, "uri").'
-        }
-      ');
-    }
-    else {
-      $self->add_evalstr($pms, '
-        if ($scoresptr->{q{'.$rulename.'}}) {
-          '.$sub.'
-          '.$self->ran_rule_plugin_code($rulename, "uri").'
-        }
-      ');
-    }
+    $self->add_evalstr($pms, '
+      if ($scoresptr->{q{'.$rulename.'}}) {
+        '.$sub.'
+        '.$self->ran_rule_plugin_code($rulename, "uri").'
+      }
+    ');
 
-    next if ($opts{doing_user_rules} &&
+    return if ($opts{doing_user_rules} &&
             !$self->is_user_rule_sub($rulename.'_uri_test'));
-
-    if ($self->{main}->{use_rule_subs}) {
-      $self->add_temporary_method ($rulename.'_uri_test',
-        '{ my $self = shift; '.$sub.' }');
-    }
   }
   );
 }
@@ -979,7 +909,6 @@ sub do_rawbody_tests {
     loop_body => sub
   {
     my ($self, $pms, $conf, $rulename, $pat, %opts) = @_;
-    $pat = untaint_var($pat);  # presumably checked
     my $sub = '';
     if (would_log('dbg', 'rules-all') == 2) {
       $sub .= '
@@ -997,7 +926,7 @@ sub do_rawbody_tests {
       rawbody_'.$loopid.': foreach my $l (@_) {
         pos $l = 0;
         '.$self->hash_line_for_rule($pms, $rulename).'
-        while ($l =~ '.$pat.'g'. ($max? ' && $hits++ < '.$max:'') .') { 
+        while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') { 
            $self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
            '. $self->hit_rule_plugin_code($pms, $rulename, "rawbody",
 					  "last rawbody_".$loopid) . '
@@ -1010,7 +939,7 @@ sub do_rawbody_tests {
       $sub .= '
       foreach my $l (@_) {
         '.$self->hash_line_for_rule($pms, $rulename).'
-        if ($l =~ '.$pat.') { 
+        if ($l =~ $qrptr->{q{'.$rulename.'}}) { 
            $self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
            '. $self->hit_rule_plugin_code($pms, $rulename, "rawbody", "last") . '
         }
@@ -1018,30 +947,15 @@ sub do_rawbody_tests {
       ';
     }
 
-    if ($self->{main}->{use_rule_subs}) {
-      $self->add_evalstr($pms, '
-        if ($scoresptr->{q{'.$rulename.'}}) {
-           '.$rulename.'_rawbody_test($self, @_);
-           '.$self->ran_rule_plugin_code($rulename, "rawbody").'
-        }
-      ');
-    }
-    else {
-      $self->add_evalstr($pms, '
-        if ($scoresptr->{q{'.$rulename.'}}) {
-          '.$sub.'
-          '.$self->ran_rule_plugin_code($rulename, "rawbody").'
-        }
-      ');
-    }
+    $self->add_evalstr($pms, '
+      if ($scoresptr->{q{'.$rulename.'}}) {
+        '.$sub.'
+        '.$self->ran_rule_plugin_code($rulename, "rawbody").'
+      }
+    ');
 
-    next if ($opts{doing_user_rules} &&
+    return if ($opts{doing_user_rules} &&
             !$self->is_user_rule_sub($rulename.'_rawbody_test'));
-
-    if ($self->{main}->{use_rule_subs}) {
-      $self->add_temporary_method ($rulename.'_rawbody_test',
-        '{ my $self = shift; '.$sub.' }');
-    }
   }
   );
 }
@@ -1066,7 +980,6 @@ sub do_full_tests {
                 loop_body => sub
   {
     my ($self, $pms, $conf, $rulename, $pat, %opts) = @_;
-    $pat = untaint_var($pat);  # presumably checked
     my ($max) = ($pms->{conf}->{tflags}->{$rulename}||'') =~ /\bmaxhits=(\d+)\b/;
     $max = untaint_var($max);
     $self->add_evalstr($pms, '
@@ -1075,7 +988,7 @@ sub do_full_tests {
         '.$self->hash_line_for_rule($pms, $rulename).'
         dbg("rules-all: running full rule %s", q{'.$rulename.'});
         $hits = 0;
-        while ($$fullmsgref =~ '.$pat.'g'. ($max? ' && $hits++ < '.$max:'') .') {
+        while ($$fullmsgref =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') {
           $self->got_hit(q{'.$rulename.'}, "FULL: ", ruletype => "full");
           '. $self->hit_rule_plugin_code($pms, $rulename, "full", "last") . '
         }
@@ -1093,7 +1006,7 @@ sub do_head_eval_tests {
   return unless (defined($pms->{conf}->{head_evals}->{$priority}));
   dbg("rules: running head_eval tests; score so far=".$pms->{score});
   $self->run_eval_tests ($pms, $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS,
-			 $pms->{conf}->{head_evals}->{$priority}, '', $priority);
+			 'head_evals', '', $priority);
 }
 
 sub do_body_eval_tests {
@@ -1101,8 +1014,7 @@ sub do_body_eval_tests {
   return unless (defined($pms->{conf}->{body_evals}->{$priority}));
   dbg("rules: running body_eval tests; score so far=".$pms->{score});
   $self->run_eval_tests ($pms, $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS,
-			 $pms->{conf}->{body_evals}->{$priority}, 'BODY: ',
-			 $priority, $bodystring);
+			 'body_evals', 'BODY: ', $priority, $bodystring);
 }
 
 sub do_rawbody_eval_tests {
@@ -1110,8 +1022,7 @@ sub do_rawbody_eval_tests {
   return unless (defined($pms->{conf}->{rawbody_evals}->{$priority}));
   dbg("rules: running rawbody_eval tests; score so far=".$pms->{score});
   $self->run_eval_tests ($pms, $Mail::SpamAssassin::Conf::TYPE_RAWBODY_EVALS,
-			 $pms->{conf}->{rawbody_evals}->{$priority}, 'RAW: ',
-			 $priority, $bodystring);
+			 'rawbody_evals', 'RAW: ', $priority, $bodystring);
 }
 
 sub do_full_eval_tests {
@@ -1119,12 +1030,11 @@ sub do_full_eval_tests {
   return unless (defined($pms->{conf}->{full_evals}->{$priority}));
   dbg("rules: running full_eval tests; score so far=".$pms->{score});
   $self->run_eval_tests($pms, $Mail::SpamAssassin::Conf::TYPE_FULL_EVALS,
-			$pms->{conf}->{full_evals}->{$priority}, '',
-			$priority, $fullmsgref);
+			'full_evals', '', $priority, $fullmsgref);
 }
 
 sub run_eval_tests {
-  my ($self, $pms, $testtype, $evalhash, $prepend2desc, $priority, @extraevalargs) = @_;
+  my ($self, $pms, $testtype, $evalname, $prepend2desc, $priority, @extraevalargs) = @_;
  
   my $master_deadline = $pms->{master_deadline};
   if ($pms->{deadline_exceeded}) {
@@ -1159,7 +1069,7 @@ sub run_eval_tests {
       && !$doing_user_rules)
   {
     my $method = "${package_name}::${methodname}";
-  # dbg("rules: run_eval_tests - calling previously compiled %s", $method);
+    #dbg("rules: run_eval_tests - calling previously compiled %s", $method);
     my $t = Mail::SpamAssassin::Timeout->new({ deadline => $master_deadline });
     my $err = $t->run(sub {
       no strict "refs";
@@ -1173,24 +1083,23 @@ sub run_eval_tests {
   }
 
   # look these up once in advance to save repeated lookups in loop below
+  my $evalhash = $conf->{$evalname}->{$priority};
   my $tflagsref = $conf->{tflags};
+  my $scoresref = $conf->{scores};
   my $eval_pluginsref = $conf->{eval_plugins};
   my $have_start_rules = $self->{main}->have_plugin("start_rules");
   my $have_ran_rule = $self->{main}->have_plugin("ran_rule");
 
   # the buffer for the evaluated code 
-  my $evalstr = q{ };
-  $evalstr .= q{ my $function; };
- 
+  my $evalstr = '';
+
   # conditionally include the dbg in the eval str
-  my $dbgstr = q{ };
+  my $dbgstr = '';
   if (would_log('dbg')) {
-    $dbgstr = q{
-      dbg("rules: ran eval rule $rulename ======> got hit ($result)");
-    };
+    $dbgstr = 'dbg("rules: ran eval rule $rulename ======> got hit ($result)");';
   }
 
-  while (my ($rulename, $test) = each %{$evalhash})  { 
+  while (my ($rulename, $test) = each %{$evalhash}) {
     if ($tflagsref->{$rulename}) {
       # If the rule is a net rule, and we are in a non-net scoreset, skip it.
       if ($tflagsref->{$rulename} =~ /\bnet\b/) {
@@ -1201,34 +1110,35 @@ sub run_eval_tests {
         next if (($scoreset & 2) == 0);
       }
     }
+
+    # skip if score zeroed
+    next if !$scoresref->{$rulename};
  
-    $test = untaint_var($test);  # presumably checked
-    my ($function, $argstr) = ($test,'');
-    if ($test =~ s/^([^,]+)(,.*)$//gs) {
-      ($function, $argstr) = ($1,$2);
+    my $function = untaint_var($test->[0]); # was validated with \w+
+    if (!$function) {
+      warn "rules: error: no eval function defined for $rulename";
+      next;
     }
 
-    if (!$function) {
-      warn "rules: error: no function defined for $rulename";
+    if (!exists $conf->{eval_plugins}->{$function}) {
+      warn("rules: error: unknown eval '$function' for $rulename\n");
       next;
     }
- 
+
     $evalstr .= '
-    if ($scoresptr->{q#'.$rulename.'#}) {
+    {
       $rulename = q#'.$rulename.'#;
       %{$self->{test_log_msgs}} = ();
-    ';
+';
  
     # only need to set current_rule_name for plugin evals
     if ($eval_pluginsref->{$function}) {
       # let plugins get the name of the rule that is currently being run,
       # and ensure their eval functions exist
       $evalstr .= '
-
-        $self->{current_rule_name} = $rulename;
-        $self->register_plugin_eval_glue(q#'.$function.'#);
-
-      ';
+      $self->{current_rule_name} = $rulename;
+      $self->register_plugin_eval_glue(q#'.$function.'#);
+';
     }
 
     # this stuff is quite slow, and totally superfluous if
@@ -1236,47 +1146,41 @@ sub run_eval_tests {
     if ($have_start_rules) {
       # XXX - should we use helper function here?
       $evalstr .= '
-
         $self->{main}->call_plugins("start_rules", {
                 permsgstatus => $self,
                 ruletype => "eval",
                 priority => '.$priority.'
               });
 
-      ';
+';
     }
- 
-    $evalstr .= '
 
+    $evalstr .= '
       eval {
-        $result = $self->' . $function . ' (@extraevalargs '. $argstr .' );  1;
+        $result = $self->'.$function.'(@extraevalargs, @{$testptr->{q#'.$rulename.'#}->[1]}); 1;
       } or do {
         $result = 0;
         die "rules: $@\n"  if $@ =~ /__alarm__ignore__/;
         $self->handle_eval_rule_errors($rulename);
       };
-
-    ';
+';
 
     if ($have_ran_rule) {
       # XXX - should we use helper function here?
       $evalstr .= '
-
         $self->{main}->call_plugins("ran_rule", {
             permsgstatus => $self, ruletype => "eval", rulename => $rulename
           });
-
-      ';
+';
     }
 
     $evalstr .= '
-
       if ($result) {
         $self->got_hit($rulename, $prepend2desc, ruletype => "eval", value => $result);
         '.$dbgstr.'
       }
     }
-    ';
+';
   }
 
   # don't free the eval ruleset here -- we need it in the compiled code!
@@ -1288,16 +1192,15 @@ sub run_eval_tests {
 {
   package $package_name;
 
-    sub ${methodname} {
-      my (\$self, \@extraevalargs) = \@_;
-
-      my \$scoresptr = \$self->{conf}->{scores};
-      my \$prepend2desc = q#$prepend2desc#;
-      my \$rulename;
-      my \$result;
+  sub ${methodname} {
+    my (\$self, \@extraevalargs) = \@_;
 
-      $evalstr
-    }
+    my \$testptr = \$self->{conf}->{$evalname}->{$priority};
+    my \$prepend2desc = q#$prepend2desc#;
+    my \$rulename;
+    my \$result;
+    $evalstr
+  }
 
   1;
 }

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm Fri Dec 14 21:05:01 2018
@@ -24,7 +24,7 @@ use re 'taint';
 
 use Mail::SpamAssassin::Plugin;
 use Mail::SpamAssassin::Locales;
-use Mail::SpamAssassin::Util qw(untaint_var);
+use Mail::SpamAssassin::Util qw(untaint_var compile_regexp);
 
 our @ISA = qw(Mail::SpamAssassin::Plugin);
 
@@ -57,13 +57,18 @@ sub new {
 
 sub html_tag_balance {
   my ($self, $pms, undef, $rawtag, $rawexpr) = @_;
-  $rawtag =~ /^([a-zA-Z0-9]+)$/; my $tag = $1;
-  $rawexpr =~ /^([\<\>\=\!\-\+ 0-9]+)$/; my $expr = $1;
+
+  return 0 if $rawtag !~ /^([a-zA-Z0-9]+)$/;
+  my $tag = $1;
 
   return 0 unless exists $pms->{html}{inside}{$tag};
 
+  return 0 if $rawexpr !~ /^([\<\>\=\!\-\+ 0-9]+)$/;
+  my $expr = untaint_var($1);
+
   $pms->{html}{inside}{$tag} =~ /^([\<\>\=\!\-\+ 0-9]+)$/;
-  my $val = $1;
+  my $val = untaint_var($1);
+
   return eval "\$val $expr";
 }
 
@@ -119,14 +124,14 @@ sub html_test {
 
 sub html_eval {
   my ($self, $pms, undef, $test, $rawexpr) = @_;
-  my $expr;
-  if ($rawexpr =~ /^[\<\>\=\!\-\+ 0-9]+$/) {
-    $expr = untaint_var($rawexpr);
-  }
+
+  return 0 if $rawexpr !~ /^([\<\>\=\!\-\+ 0-9]+)$/;
+  my $expr = untaint_var($1);
+
   # workaround bug 3320: wierd perl bug where additional, very explicit
   # untainting into a new var is required.
   my $tainted = $pms->{html}{$test};
-  return unless defined($tainted);
+  return 0 unless defined($tainted);
   my $val = $tainted;
 
   # just use the value in $val, don't copy it needlessly
@@ -135,8 +140,14 @@ sub html_eval {
 
 sub html_text_match {
   my ($self, $pms, undef, $text, $regexp) = @_;
-  for my $string (@{ $pms->{html}{$text} }) {
-    if (defined $string && $string =~ /${regexp}/) {
+  my ($rec, $err) = compile_regexp($regexp, 0);
+  if (!$rec) {
+    warn "htmleval: html_text_match invalid regexp '$regexp': $err";
+    return 0;
+  }
+  foreach my $string (@{$pms->{html}{$text}}) {
+    next unless defined $string;
+    if ($string =~ $rec) {
       return 1;
     }
   }

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm Fri Dec 14 21:05:01 2018
@@ -65,12 +65,15 @@ use re 'taint';
 use Mail::SpamAssassin::Plugin;
 use Mail::SpamAssassin::Conf;
 use Mail::SpamAssassin::Logger;
-use Mail::SpamAssassin::Util qw(untaint_var);
+use Mail::SpamAssassin::Util qw(untaint_var compile_regexp);
+use Mail::SpamAssassin::Constants qw(:sa);
 
 our @ISA = qw(Mail::SpamAssassin::Plugin);
 
 our @TEMPORARY_METHODS;
 
+my $RULENAME_RE = RULENAME_RE;
+
 # ---------------------------------------------------------------------------
 
 # constructor
@@ -101,27 +104,37 @@ sub set_config {
     is_priv => 1,
     code => sub {
       my ($self, $key, $value, $line) = @_;
-      local ($1,$2,$3,$4);
-      if ($value !~ /^(\S+)\s+(\S+)\s*([\=\!]\~)\s*(.+)$/) {
+      local ($1,$2,$3);
+      if ($value !~ s/^(${RULENAME_RE})\s+//) {
         return $Mail::SpamAssassin::Conf::INVALID_VALUE;
       }
-
-      # provide stricter syntax for rule name!?
       my $rulename = untaint_var($1);
-      my $hdrname = $2;
-      my $negated = ($3 eq '!~') ? 1 : 0;
-      my $pattern = $4;
-
-      return unless $self->{parser}->is_delimited_regexp_valid($rulename, $pattern);
-
-      $pattern = Mail::SpamAssassin::Util::make_qr($pattern);
-      return $Mail::SpamAssassin::Conf::INVALID_VALUE unless $pattern;
+      if ($value eq '') {
+        return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
+      }
+      # Take :raw to hdrname!
+      if ($value !~ /^([^:\s]+(?:\:(?:raw)?)?)\s*([=!]~)\s*(.+)$/) {
+        return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+      }
+      my $hdrname = $1;
+      my $negated = $2 eq '!~' ? 1 : 0;
+      my $pattern = $3;
+      $hdrname =~ s/:$//;
+      my $if_unset = '';
+      if ($pattern =~ s/\s+\[if-unset:\s+(.+)\]$//) {
+         $if_unset = $1;
+      }
+      my ($rec, $err) = compile_regexp($pattern, 1);
+      if (!$rec) {
+        info("mimeheader: invalid regexp for $rulename '$pattern': $err");
+        return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+      }
 
       $self->{mimeheader_tests}->{$rulename} = {
         hdr => $hdrname,
         negated => $negated,
-        if_unset => '',             # TODO!
-        pattern => $pattern
+        if_unset => $if_unset,
+        pattern => $rec
       };
 
       # now here's a hack; generate a fake eval rule function to
@@ -129,7 +142,6 @@ sub set_config {
       # TODO: we should have a more elegant way for new rule types to
       # be defined
       my $evalfn = "_mimeheader_eval_$rulename";
-      $evalfn =~ s/[^a-zA-Z0-9_]/_/gs;
 
       # don't redefine the subroutine if it already exists!
       # this causes lots of annoying warnings and such during things like
@@ -139,6 +151,7 @@ sub set_config {
       $self->{parser}->add_test($rulename, $evalfn."()",
                 $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
 
+      # evalfn/rulename safe, sanitized by $RULENAME_RE
       my $evalcode = '
         sub Mail::SpamAssassin::Plugin::MIMEHeader::'.$evalfn.' {
           $_[0]->eval_hook_called($_[1], q{'.$rulename.'});
@@ -175,7 +188,7 @@ sub eval_hook_called {
 
 
   my $getraw;
-  if ($hdr =~ s/:raw$//i) {
+  if ($hdr =~ s/:raw$//) {
     $getraw = 1;
   } else {
     $getraw = 0;
@@ -188,9 +201,9 @@ sub eval_hook_called {
     } else {
       $val = $p->get_header($hdr);
     }
-    $val ||= $if_unset;
+    $val = $if_unset if !defined $val;
 
-    if ($val =~ ${pattern}) {
+    if ($val =~ $pattern) {
       return ($negated ? 0 : 1);
     }
   }

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/P595Body.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/P595Body.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/P595Body.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/P595Body.pm Fri Dec 14 21:05:01 2018
@@ -70,10 +70,12 @@ sub setup_test_set_pri {
 
   my $alternates = [];
   while (my ($rule, $pat) = each %{$conf->{body_tests}->{$pri}}) {
-    $pat = Mail::SpamAssassin::Util::regexp_remove_delimiters($pat);
-
     # ignore rules marked for ReplaceTags work!
-    next if ($conf->{rules_to_replace}->{$rule});
+    next if ($conf->{replace_rules}->{$rule});
+
+    #$pat = Mail::SpamAssassin::Util::regexp_remove_delimiters($pat);
+    $pat = qr_to_string($conf->{test_qrs}->{$rule});
+    next unless !$pat;
 
     # use the REGMARK feature:
     # see http://taint.org/2006/11/16/154546a.html#comment-1011

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/PDFInfo.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/PDFInfo.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/PDFInfo.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/PDFInfo.pm Fri Dec 14 21:05:01 2018
@@ -142,7 +142,7 @@ package Mail::SpamAssassin::Plugin::PDFI
 
 use Mail::SpamAssassin::Plugin;
 use Mail::SpamAssassin::Logger;
-use Mail::SpamAssassin::Util;
+use Mail::SpamAssassin::Util qw(compile_regexp);
 use strict;
 use warnings;
 # use bytes;
@@ -471,16 +471,15 @@ sub pdf_name_regex {
   return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'});
   return 0 unless (exists $pms->{'pdfinfo'}->{"names_pdf"});
 
+  my ($rec, $err) = compile_regexp($re, 2);
+  if (!$rec) {
+    info("pdfinfo: invalid regexp '$re': $err");
+    return 0;
+  }
+
   my $hit = 0;
   foreach my $name (keys %{$pms->{'pdfinfo'}->{"names_pdf"}}) {
-    eval {
-        my $regex = Mail::SpamAssassin::Util::make_qr($re);
-        if ( $name =~ m/$regex/ ) {
-            $hit = 1;
-        }
-    };
-    dbg("pdfinfo: error in regex $re - $@") if $@;
-    if ($hit) {
+    if ($name =~ $rec) {
       dbg("pdfinfo: pdf_name_regex hit on $name");
       return 1;
     }
@@ -722,15 +721,13 @@ sub pdf_match_details {
   my $check_value = $pms->{pdfinfo}->{details}->{$detail};
   return unless $check_value;
 
-  my $hit = 0;
-  eval {
-      my $re = Mail::SpamAssassin::Util::make_qr($regex);
-      if ( $check_value =~ m/$re/ ) {
-          $hit = 1;
-      }
-  };
-  dbg("pdfinfo: error in regex $regex - $@") if $@;
-  if ($hit) {
+  my ($rec, $err) = compile_regexp($regex, 2);
+  if (!$rec) {
+    info("pdfinfo: invalid regexp '$regex': $err");
+    return 0;
+  }
+
+  if ($check_value =~ $rec) {
     dbg("pdfinfo: pdf_match_details $detail $regex matches $check_value");
     return 1;
   }

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm Fri Dec 14 21:05:01 2018
@@ -52,6 +52,7 @@ package Mail::SpamAssassin::Plugin::Repl
 use Mail::SpamAssassin;
 use Mail::SpamAssassin::Plugin;
 use Mail::SpamAssassin::Logger;
+use Mail::SpamAssassin::Util qw(compile_regexp qr_to_string);
 
 use strict;
 use warnings;
@@ -73,6 +74,16 @@ sub new {
   return $self;
 }
 
+sub finish_parsing_start {
+  my ($self, $opts) = @_;
+
+  # keeps track of replaced rules
+  # don't have $pms in finish_parsing_end() so init this..
+  $self->{replace_rules_done} = {};
+
+  return 1;
+}
+
 sub finish_parsing_end {
   my ($self, $opts) = @_;
 
@@ -82,94 +93,96 @@ sub finish_parsing_end {
   my $start = $conf->{replace_start};
   my $end = $conf->{replace_end};
 
-  # this is the version-specific code
-  for my $type (qw|body_tests rawbody_tests head_tests full_tests uri_tests|) {
-    for my $priority (keys %{$conf->{$type}}) {
-      while (my ($rule, $re) = each %{$conf->{$type}->{$priority}}) {
-        # skip if not listed by replace_rules
-        next unless $conf->{rules_to_replace}{$rule};
-
-        if (would_log('dbg', 'replacetags') > 1) {
-          dbg("replacetags: replacing $rule: $re");
-        }
-
-        my $passes = 0;
-        my $doagain;
+  foreach my $rule (keys %{$conf->{replace_rules}}) {
+    # process rules only once, mark to replace_rules_done,
+    # do NOT delete $conf->{replace_rules}, it's used by BodyRuleExtractor
+    next if exists $self->{replace_rules_done}->{$rule};
+    $self->{replace_rules_done}->{$rule} = 1;
+
+    if (!exists $conf->{test_qrs}->{$rule}) {
+      dbg("replacetags: replace requested for non-existing rule: $rule\n");
+      next;
+    }
 
-        do {
-          my $pre_name;
-          my $post_name;
-          my $inter_name;
-          $doagain = 0;
-
-          # get modifier tags
-          if ($re =~ s/${start}pre (.+?)${end}//) {
-            $pre_name = $1;
-          }
-          if ($re =~ s/${start}post (.+?)${end}//) {
-            $post_name = $1;
-          }
-          if ($re =~ s/${start}inter (.+?)${end}//) {
-            $inter_name = $1;
-          }
+    my $re = qr_to_string($conf->{test_qrs}->{$rule});
+    next unless defined $re;
+    my $origre = $re;
+
+    my $passes = 0;
+    my $doagain;
+
+    do {
+      my $pre_name;
+      my $post_name;
+      my $inter_name;
+      $doagain = 0;
+
+      # get modifier tags
+      if ($re =~ s/${start}pre (.+?)${end}//) {
+        $pre_name = $1;
+      }
+      if ($re =~ s/${start}post (.+?)${end}//) {
+        $post_name = $1;
+      }
+      if ($re =~ s/${start}inter (.+?)${end}//) {
+        $inter_name = $1;
+      }
 
-          # this will produce an array of tags to be replaced
-          # for two adjacent tags, an element of "" will be between the two
-          my @re = split(/(<[^<>]+>)/, $re);
-
-          if ($pre_name) {
-            my $pre = $conf->{replace_pre}->{$pre_name};
-            if ($pre) {
-              s{($start.+?$end)}{$pre$1}  for @re;
-            }
-          }
-          if ($post_name) {
-            my $post = $conf->{replace_post}->{$post_name};
-            if ($post) {
-              s{($start.+?$end)}{$1$post}g  for @re;
-            }
-          }
-          if ($inter_name) {
-            my $inter = $conf->{replace_inter}->{$inter_name};
-            if ($inter) {
-              s{^$}{$inter}  for @re;
-            }
-          }
-          for (my $i = 0; $i < @re; $i++) {
-            if ($re[$i] =~ m|$start(.+?)$end|g) {
-              my $tag_name = $1;
-              # if the tag exists, replace it with the corresponding phrase
-              if ($tag_name) {
-                my $replacement = $conf->{replace_tag}->{$tag_name};
-                if ($replacement) {
-                  $re[$i] =~ s|$start$tag_name$end|$replacement|g;
-                  $doagain = 1 if !$doagain && $replacement =~ /<[^>]+>/;
-                }
-              }
+      # this will produce an array of tags to be replaced
+      # for two adjacent tags, an element of "" will be between the two
+      my @re = split(/(<[^<>]+>)/, $re);
+
+      if ($pre_name) {
+        my $pre = $conf->{replace_pre}->{$pre_name};
+        if ($pre) {
+          s{($start.+?$end)}{$pre$1}  for @re;
+         }
+      }
+      if ($post_name) {
+        my $post = $conf->{replace_post}->{$post_name};
+        if ($post) {
+          s{($start.+?$end)}{$1$post}g  for @re;
+        }
+      }
+      if ($inter_name) {
+        my $inter = $conf->{replace_inter}->{$inter_name};
+        if ($inter) {
+          s{^$}{$inter}  for @re;
+        }
+      }
+      for (my $i = 0; $i < @re; $i++) {
+        if ($re[$i] =~ m|$start(.+?)$end|g) {
+          my $tag_name = $1;
+          # if the tag exists, replace it with the corresponding phrase
+          if ($tag_name) {
+            my $replacement = $conf->{replace_tag}->{$tag_name};
+            if ($replacement) {
+              $re[$i] =~ s|$start$tag_name$end|$replacement|g;
+              $doagain = 1 if !$doagain && $replacement =~ /<[^>]+>/;
             }
           }
+        }
+      }
 
-          $re = join('', @re);
-
-          # do the actual replacement
-          $conf->{$type}->{$priority}->{$rule} = $re;
+      $re = join('', @re);
 
-          if (would_log('dbg', 'replacetags') > 1) {
-            dbg("replacetags: replaced $rule: $re");
-          }
+      $passes++;
+    } while $doagain && $passes <= 5;
 
-          $passes++;
-        } while $doagain && $passes <= 5;
+    if ($re ne $origre) {
+      # do the actual replacement
+      my ($rec, $err) = compile_regexp($re, 0);
+      if (!$rec) {
+        info("replacetags: regexp compilation failed '$re': $err");
+        next;
       }
+      $conf->{test_qrs}->{$rule} = $rec;
+      #dbg("replacetags: replaced $rule: '$origre' => '$re'");
+      dbg("replacetags: replaced $rule");
+    } else {
+      dbg("replacetags: nothing was replaced in $rule");
     }
   }
-
-  # free this up, if possible
-  if (!$conf->{allow_user_rules}) {
-    delete $conf->{rules_to_replace};
-  }
-
-  dbg("replacetags: done replacing tags");
 }
 
 sub user_conf_parsing_end {
@@ -250,6 +263,7 @@ body, header, uri, full, rawbody tests a
   push(@cmds, {
     setting => 'replace_rules',
     is_priv => 1,
+    default => {},
     type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
     code => sub {
       my ($self, $key, $value, $line) = @_;
@@ -259,8 +273,8 @@ body, header, uri, full, rawbody tests a
       unless ($value =~ /\S+/) {
         return $Mail::SpamAssassin::Conf::INVALID_VALUE;
       }
-      foreach my $rule (split(' ', $value)) {
-        $conf->{rules_to_replace}->{$rule} = 1;
+      foreach my $rule (split(/\s+/, $value)) {
+        $self->{replace_rules}->{$rule} = 1;
       }
     }
   });

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Rule2XSBody.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Rule2XSBody.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Rule2XSBody.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Rule2XSBody.pm Fri Dec 14 21:05:01 2018
@@ -38,6 +38,7 @@ package Mail::SpamAssassin::Plugin::Rule
 use Mail::SpamAssassin::Plugin;
 use Mail::SpamAssassin::Logger;
 use Mail::SpamAssassin::Plugin::OneLineBodyRuleType;
+use Mail::SpamAssassin::Util qw(qr_to_string);
 
 use strict;
 use warnings;
@@ -120,24 +121,25 @@ sub setup_test_set_pri {
 
   my $found = 0;
   foreach my $name (keys %{$rules}) {
-    my $rule = $rules->{$name};
+    #my $rule = $rules->{$name};
+    my $rule = qr_to_string($conf->{test_qrs}->{$name});
     my $comprule = $hasrules->{$longname{$name} || ''};
     $rule =~ s/\#/\[hash\]/gs;
 
-    if (!$comprule) { 
+    if (!$comprule) {
       # this is pretty common, based on rule complexity; don't warn
       # dbg "zoom: skipping rule $name, not in compiled ruleset";
       next;
     }
     if ($comprule ne $rule) {
-      dbg "zoom: skipping rule $name, code differs in compiled ruleset";
+      dbg "zoom: skipping rule $name, code differs in compiled ruleset '$comprule' '$rule'";
       next;
     }
 
     # ignore rules marked for ReplaceTags work!
     # TODO: we should be able to order the 'finish_parsing_end'
     # plugin calls to do this.
-    if ($conf->{rules_to_replace}->{$name}) {
+    if ($conf->{replace_rules}->{$name}) {
       dbg "zoom: skipping rule $name, ReplaceTags";
       next;
     }

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/URIDetail.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/URIDetail.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/URIDetail.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/URIDetail.pm Fri Dec 14 21:05:01 2018
@@ -68,7 +68,7 @@ Regular expressions should be delimited
 package Mail::SpamAssassin::Plugin::URIDetail;
 use Mail::SpamAssassin::Plugin;
 use Mail::SpamAssassin::Logger;
-use Mail::SpamAssassin::Util qw(untaint_var);
+use Mail::SpamAssassin::Util qw(untaint_var compile_regexp);
 
 use strict;
 use warnings;
@@ -122,22 +122,23 @@ sub set_config {
 	if ($target !~ /^(?:raw|type|cleaned|text|domain)$/) {
 	    return $Mail::SpamAssassin::Conf::INVALID_VALUE;
 	}
-	if ($conf->{parser}->is_delimited_regexp_valid($name, $pattern)) {
-	    $pattern = $pluginobj->make_qr($pattern);
-	}
-	else {
-	    return $Mail::SpamAssassin::Conf::INVALID_VALUE;
+
+	my ($rec, $err) = compile_regexp($pattern, 1);
+	if (!$rec) {
+	  dbg("config: uri_detail invalid regexp '$pattern': $err");
+	  return $Mail::SpamAssassin::Conf::INVALID_VALUE;
 	}
 
-	dbg("config: uri_detail adding ($target $op /$pattern/) to $name");
+	dbg("config: uri_detail adding ($target $op /$rec/) to $name");
         $conf->{parser}->{conf}->{uri_detail}->{$name}->{$target} =
-          [$op, $pattern];
+          [$op, $rec];
 	$added_criteria = 1;
       }
 
       if ($added_criteria) {
 	dbg("config: uri_detail added $name\n");
-	$conf->{parser}->add_test($name, 'check_uri_detail()', $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
+	$conf->{parser}->add_test($name, 'check_uri_detail()',
+	  $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
       } 
       else {
 	warn "config: failed to add invalid rule $name";
@@ -163,8 +164,8 @@ sub check_uri_detail {
 
     if (exists $rule->{raw}) {
       my($op,$patt) = @{$rule->{raw}};
-      if ( ($op eq '=~' && $raw =~ /$patt/) ||
-           ($op eq '!~' && $raw !~ /$patt/) ) {
+      if ( ($op eq '=~' && $raw =~ $patt) ||
+           ($op eq '!~' && $raw !~ $patt) ) {
         dbg("uri: raw matched: '%s' %s /%s/", $raw,$op,$patt);
       } else {
         next;
@@ -176,8 +177,8 @@ sub check_uri_detail {
       my($op,$patt) = @{$rule->{type}};
       my $match;
       for my $text (keys %{ $info->{types} }) {
-        if ( ($op eq '=~' && $text =~ /$patt/) ||
-             ($op eq '!~' && $text !~ /$patt/) ) { $match = $text; last }
+        if ( ($op eq '=~' && $text =~ $patt) ||
+             ($op eq '!~' && $text !~ $patt) ) { $match = $text; last }
       }
       next unless defined $match;
       dbg("uri: type matched: '%s' %s /%s/", $match,$op,$patt);
@@ -188,8 +189,8 @@ sub check_uri_detail {
       my($op,$patt) = @{$rule->{cleaned}};
       my $match;
       for my $text (@{ $info->{cleaned} }) {
-        if ( ($op eq '=~' && $text =~ /$patt/) ||
-             ($op eq '!~' && $text !~ /$patt/) ) { $match = $text; last }
+        if ( ($op eq '=~' && $text =~ $patt) ||
+             ($op eq '!~' && $text !~ $patt) ) { $match = $text; last }
       }
       next unless defined $match;
       dbg("uri: cleaned matched: '%s' %s /%s/", $match,$op,$patt);
@@ -200,8 +201,8 @@ sub check_uri_detail {
       my($op,$patt) = @{$rule->{text}};
       my $match;
       for my $text (@{ $info->{anchor_text} }) {
-        if ( ($op eq '=~' && $text =~ /$patt/) ||
-             ($op eq '!~' && $text !~ /$patt/) ) { $match = $text; last }
+        if ( ($op eq '=~' && $text =~ $patt) ||
+             ($op eq '!~' && $text !~ $patt) ) { $match = $text; last }
       }
       next unless defined $match;
       dbg("uri: text matched: '%s' %s /%s/", $match,$op,$patt);
@@ -212,8 +213,8 @@ sub check_uri_detail {
       my($op,$patt) = @{$rule->{domain}};
       my $match;
       for my $text (keys %{ $info->{domains} }) {
-        if ( ($op eq '=~' && $text =~ /$patt/) ||
-             ($op eq '!~' && $text !~ /$patt/) ) { $match = $text; last }
+        if ( ($op eq '=~' && $text =~ $patt) ||
+             ($op eq '!~' && $text !~ $patt) ) { $match = $text; last }
       }
       next unless defined $match;
       dbg("uri: domain matched: '%s' %s /%s/", $match,$op,$patt);
@@ -235,29 +236,5 @@ sub check_uri_detail {
 }
 
 # ---------------------------------------------------------------------------
-
-# turn "/foobar/i" into qr/(?i)foobar/
-sub make_qr {
-  my ($self, $pattern) = @_;
-
-  my $re_delim;
-  if ($pattern =~ s/^m(\W)//) {     # m!foo/bar!
-    $re_delim = $1;
-  } else {                          # /foo\/bar/ or !foo/bar!
-    $pattern =~ s/^(\W)//; $re_delim = $1;
-  }
-  if (!$re_delim) {
-    return;
-  }
-
-  $pattern =~ s/${re_delim}([imsx]*)$//;
-
-  my $mods = $1;
-  if ($mods) { $pattern = "(?".$mods.")".$pattern; }
-
-  return qr/$pattern/;
-}
-
-# ---------------------------------------------------------------------------
 
 1;

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/PluginHandler.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/PluginHandler.pm?rev=1848969&r1=1848968&r2=1848969&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/PluginHandler.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/PluginHandler.pm Fri Dec 14 21:05:01 2018
@@ -74,6 +74,13 @@ sub new {
 sub load_plugin {
   my ($self, $package, $path, $silent) = @_;
 
+  # Strict name checking
+  if ($package !~ /^(?:\w+::){0,10}\w+$/) {
+    warn "plugin: illegal plugin name, not loading: $package\n";
+    return;
+  }
+  $package = Mail::SpamAssassin::Util::untaint_var($package);
+
   # Don't load the same plugin twice!
   # Do this *before* calling ->new(), otherwise eval rules will be
   # registered on a nonexistent object
@@ -86,6 +93,13 @@ sub load_plugin {
 
   my $ret;
   if ($path) {
+    if ($path !~ /^\S+\.pm/i) {
+      warn "plugin: illegal plugin filename, not loading: $path";
+      return;
+    }
+
+    $path = $self->{main}->{conf}->{parser}->fix_path_relative_to_current_file($path);
+
     # bug 3717:
     # At least Perl 5.8.0 seems to confuse $cwd internally at some point -- we
     # need to use an absolute path here else we get a "File not found" error.