You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2019/08/05 06:47:21 UTC

svn commit: r1864416 - in /spamassassin: branches/3.4/lib/Mail/SpamAssassin/ branches/3.4/lib/Mail/SpamAssassin/Conf/ branches/3.4/lib/Mail/SpamAssassin/Plugin/ trunk/lib/Mail/SpamAssassin/ trunk/lib/Mail/SpamAssassin/Conf/ trunk/lib/Mail/SpamAssassin/...

Author: hege
Date: Mon Aug  5 06:47:21 2019
New Revision: 1864416

URL: http://svn.apache.org/viewvc?rev=1864416&view=rev
Log:
5% overall speedup from Check.pm regex //o, add IS_RULENAME constant

Modified:
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm
    spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=1864416&r1=1864415&r2=1864416&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm Mon Aug  5 06:47:21 2019
@@ -148,7 +148,6 @@ use re 'taint';
 our @ISA = qw();
 
 my $ARITH_EXPRESSION_LEXER = ARITH_EXPRESSION_LEXER;
-my $RULENAME_RE = RULENAME_RE;
 my $META_RULES_MATCHING_RE = META_RULES_MATCHING_RE;
 
 ###########################################################################
@@ -1197,7 +1196,7 @@ sub add_test {
   my $conf = $self->{conf};
 
   # Don't allow invalid names ...
-  if ($name !~ /^${RULENAME_RE}$/) {
+  if ($name !~ IS_RULENAME) {
     $self->lint_warn("config: error: rule '$name' has invalid characters ".
 	   "(not Alphanumeric + Underscore + starting with a non-digit)\n", $name);
     return;
@@ -1362,7 +1361,7 @@ sub is_meta_valid {
   # Go through each token in the meta rule
   foreach my $token (@tokens) {
     # If the token is a syntactically legal rule name, make it zero
-    if ($token =~ /^${RULENAME_RE}\z/s) {
+    if ($token =~ IS_RULENAME) {
       $meta .= "0 ";
     }
     # if it is a (decimal) number or a string of 1 or 2 punctuation

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm?rev=1864416&r1=1864415&r2=1864416&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm Mon Aug  5 06:47:21 2019
@@ -43,7 +43,7 @@ BEGIN {
 	HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
 	MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
 	MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
-	CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH RULENAME_RE
+	CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH RULENAME_RE IS_RULENAME
 	META_RULES_MATCHING_RE
   );
 
@@ -405,6 +405,8 @@ use constant CHARSETS_LIKELY_TO_FP_AS_CA
 
 # Allowed rulename format
 use constant RULENAME_RE => qr([_a-zA-Z][_a-zA-Z0-9]{0,127});
+# Exact match
+use constant IS_RULENAME => qr/^${\(RULENAME_RE)}$/;
 
 # meta function rules_matching(), takes argument RULENAME_RE with glob *? characters
 use constant META_RULES_MATCHING_RE => qr/(?<!_)\brules_matching\(\s*([_a-zA-Z*?][_a-zA-Z0-9*?]{0,127})\s*\)/;

Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm?rev=1864416&r1=1864415&r2=1864416&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm Mon Aug  5 06:47:21 2019
@@ -29,7 +29,6 @@ use Mail::SpamAssassin::Constants qw(:sa
 our @ISA = qw(Mail::SpamAssassin::Plugin);
 
 my $ARITH_EXPRESSION_LEXER = ARITH_EXPRESSION_LEXER;
-my $RULENAME_RE = RULENAME_RE;
 my $META_RULES_MATCHING_RE = META_RULES_MATCHING_RE;
 
 # methods defined by the compiled ruleset; deleted in finish_tests()
@@ -579,7 +578,7 @@ sub do_meta_tests {
     foreach my $token (@tokens) {
 
       # ... rulename?
-      if ($token =~ /^${RULENAME_RE}\z/) {
+      if ($token =~ IS_RULENAME) {
         # the " || 0" formulation is to avoid "use of uninitialized value"
         # warnings; this is better than adding a 0 to a hash for every
         # rule referred to in a meta...
@@ -771,9 +770,9 @@ sub do_head_tests {
               $whlimit = ' && $hits++ < '.$max if $max;
             }
             if ($matchg) {
-              $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/g';
+              $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/go';
             } else {
-              $expr = '$hval '.$op.' $qrptr->{q{'.$rulename.'}}';
+              $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/o';
             }
           }
 
@@ -827,8 +826,8 @@ sub do_body_tests {
       body_'.$loopid.': foreach my $l (@_) {
         pos $l = 0;
         '.$self->hash_line_for_rule($pms, $rulename).'
-        while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') { 
-          $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body"); 
+        while ($l =~ /$qrptr->{q{'.$rulename.'}}/go'. ($max? ' && $hits++ < '.$max:'') .') {
+          $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
           '. $self->hit_rule_plugin_code($pms, $rulename, 'body',
 					 "last body_".$loopid) . '
         }
@@ -842,8 +841,8 @@ sub do_body_tests {
       $sub .= '
       foreach my $l (@_) {
         '.$self->hash_line_for_rule($pms, $rulename).'
-        if ($l =~ $qrptr->{q{'.$rulename.'}}) { 
-          $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body"); 
+        if ($l =~ /$qrptr->{q{'.$rulename.'}}/o) {
+          $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
           '. $self->hit_rule_plugin_code($pms, $rulename, "body", "last") .'
         }
       }
@@ -891,7 +890,7 @@ sub do_uri_tests {
       uri_'.$loopid.': foreach my $l (@_) {
         pos $l = 0;
         '.$self->hash_line_for_rule($pms, $rulename).'
-        while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') { 
+        while ($l =~ /$qrptr->{q{'.$rulename.'}}/go'. ($max? ' && $hits++ < '.$max:'') .') {
            $self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
            '. $self->hit_rule_plugin_code($pms, $rulename, "uri",
 					  "last uri_".$loopid) . '
@@ -903,7 +902,7 @@ sub do_uri_tests {
       $sub .= '
       foreach my $l (@_) {
         '.$self->hash_line_for_rule($pms, $rulename).'
-        if ($l =~ $qrptr->{q{'.$rulename.'}}) { 
+        if ($l =~ /$qrptr->{q{'.$rulename.'}}/o) {
            $self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
            '. $self->hit_rule_plugin_code($pms, $rulename, "uri", "last") .'
         }
@@ -954,7 +953,7 @@ sub do_rawbody_tests {
       rawbody_'.$loopid.': foreach my $l (@_) {
         pos $l = 0;
         '.$self->hash_line_for_rule($pms, $rulename).'
-        while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') { 
+        while ($l =~ /$qrptr->{q{'.$rulename.'}}/go'. ($max? ' && $hits++ < '.$max:'') .') {
            $self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
            '. $self->hit_rule_plugin_code($pms, $rulename, "rawbody",
 					  "last rawbody_".$loopid) . '
@@ -967,7 +966,7 @@ sub do_rawbody_tests {
       $sub .= '
       foreach my $l (@_) {
         '.$self->hash_line_for_rule($pms, $rulename).'
-        if ($l =~ $qrptr->{q{'.$rulename.'}}) { 
+        if ($l =~ /$qrptr->{q{'.$rulename.'}}/o) {
            $self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
            '. $self->hit_rule_plugin_code($pms, $rulename, "rawbody", "last") . '
         }

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=1864416&r1=1864415&r2=1864416&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm Mon Aug  5 06:47:21 2019
@@ -143,7 +143,6 @@ use re 'taint';
 our @ISA = qw();
 
 my $ARITH_EXPRESSION_LEXER = ARITH_EXPRESSION_LEXER;
-my $RULENAME_RE = RULENAME_RE;
 my $META_RULES_MATCHING_RE = META_RULES_MATCHING_RE;
 
 ###########################################################################
@@ -1191,7 +1190,7 @@ sub add_test {
   my $conf = $self->{conf};
 
   # Don't allow invalid names ...
-  if ($name !~ /^${RULENAME_RE}$/) {
+  if ($name !~ IS_RULENAME) {
     $self->lint_warn("config: error: rule '$name' has invalid characters ".
 	   "(not Alphanumeric + Underscore + starting with a non-digit)\n", $name);
     return;
@@ -1369,7 +1368,7 @@ sub is_meta_valid {
   # Go through each token in the meta rule
   foreach my $token (@tokens) {
     # If the token is a syntactically legal rule name, make it zero
-    if ($token =~ /^${RULENAME_RE}\z/s) {
+    if ($token =~ IS_RULENAME) {
       $meta .= "0 ";
     }
     # if it is a (decimal) number or a string of 1 or 2 punctuation

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm?rev=1864416&r1=1864415&r2=1864416&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm Mon Aug  5 06:47:21 2019
@@ -44,7 +44,7 @@ BEGIN {
 	MBX_SEPARATOR
 	MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
 	MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
-	CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH RULENAME_RE
+	CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH RULENAME_RE IS_RULENAME
 	META_RULES_MATCHING_RE
   );
 
@@ -415,6 +415,8 @@ use constant CHARSETS_LIKELY_TO_FP_AS_CA
 
 # Allowed rulename format
 use constant RULENAME_RE => qr([_a-zA-Z][_a-zA-Z0-9]{0,127});
+# Exact match
+use constant IS_RULENAME => qr/^${\(RULENAME_RE)}$/;
 
 # meta function rules_matching(), takes argument RULENAME_RE with glob *? characters
 use constant META_RULES_MATCHING_RE => qr/(?<!_)\brules_matching\(\s*([_a-zA-Z*?][_a-zA-Z0-9*?]{0,127})\s*\)/;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm?rev=1864416&r1=1864415&r2=1864416&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm Mon Aug  5 06:47:21 2019
@@ -29,7 +29,6 @@ use Mail::SpamAssassin::Constants qw(:sa
 our @ISA = qw(Mail::SpamAssassin::Plugin);
 
 my $ARITH_EXPRESSION_LEXER = ARITH_EXPRESSION_LEXER;
-my $RULENAME_RE = RULENAME_RE;
 my $META_RULES_MATCHING_RE = META_RULES_MATCHING_RE;
 
 # methods defined by the compiled ruleset; deleted in finish_tests()
@@ -579,7 +578,7 @@ sub do_meta_tests {
     foreach my $token (@tokens) {
 
       # ... rulename?
-      if ($token =~ /^${RULENAME_RE}\z/) {
+      if ($token =~ IS_RULENAME) {
         # the " || 0" formulation is to avoid "use of uninitialized value"
         # warnings; this is better than adding a 0 to a hash for every
         # rule referred to in a meta...
@@ -771,9 +770,9 @@ sub do_head_tests {
               }
             }
             if ($matchg) {
-              $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/g';
+              $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/go';
             } else {
-              $expr = '$hval '.$op.' $qrptr->{q{'.$rulename.'}}';
+              $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/o';
             }
           }
 
@@ -830,8 +829,8 @@ sub do_body_tests {
       body_'.$loopid.': foreach my $l (@_) {
         pos $l = 0;
         '.$self->hash_line_for_rule($pms, $rulename).'
-        while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') {
-          $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body"); 
+        while ($l =~ /$qrptr->{q{'.$rulename.'}}/go'. ($max? ' && $hits++ < '.$max:'') .') {
+          $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
           '. $self->hit_rule_plugin_code($pms, $rulename, 'body',
 					 "last body_".$loopid) . '
         }
@@ -845,7 +844,7 @@ sub do_body_tests {
       $sub .= '
       foreach my $l (@_) {
         '.$self->hash_line_for_rule($pms, $rulename).'
-        if ($l =~ $qrptr->{q{'.$rulename.'}}) {
+        if ($l =~ /$qrptr->{q{'.$rulename.'}}/o) {
           $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
           '. $self->hit_rule_plugin_code($pms, $rulename, "body", "last") .'
         }
@@ -895,7 +894,7 @@ sub do_uri_tests {
       uri_'.$loopid.': foreach my $l (@_) {
         pos $l = 0;
         '.$self->hash_line_for_rule($pms, $rulename).'
-        while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') {
+        while ($l =~ /$qrptr->{q{'.$rulename.'}}/go'. ($max? ' && $hits++ < '.$max:'') .') {
            $self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
            '. $self->hit_rule_plugin_code($pms, $rulename, "uri",
 					  "last uri_".$loopid) . '
@@ -907,7 +906,7 @@ sub do_uri_tests {
       $sub .= '
       foreach my $l (@_) {
         '.$self->hash_line_for_rule($pms, $rulename).'
-          if ($l =~ $qrptr->{q{'.$rulename.'}}) {
+          if ($l =~ /$qrptr->{q{'.$rulename.'}}/o) {
            $self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
            '. $self->hit_rule_plugin_code($pms, $rulename, "uri", "last") .'
         }
@@ -955,7 +954,7 @@ sub do_rawbody_tests {
       rawbody_'.$loopid.': foreach my $l (@_) {
         pos $l = 0;
         '.$self->hash_line_for_rule($pms, $rulename).'
-        while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') { 
+        while ($l =~ /$qrptr->{q{'.$rulename.'}}/go'. ($max? ' && $hits++ < '.$max:'') .') {
            $self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
            '. $self->hit_rule_plugin_code($pms, $rulename, "rawbody",
 					  "last rawbody_".$loopid) . '
@@ -968,7 +967,7 @@ sub do_rawbody_tests {
       $sub .= '
       foreach my $l (@_) {
         '.$self->hash_line_for_rule($pms, $rulename).'
-        if ($l =~ $qrptr->{q{'.$rulename.'}}) { 
+        if ($l =~ /$qrptr->{q{'.$rulename.'}}/o) {
            $self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
            '. $self->hit_rule_plugin_code($pms, $rulename, "rawbody", "last") . '
         }