You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2019/08/05 06:47:21 UTC
svn commit: r1864416 - in /spamassassin: branches/3.4/lib/Mail/SpamAssassin/
branches/3.4/lib/Mail/SpamAssassin/Conf/
branches/3.4/lib/Mail/SpamAssassin/Plugin/ trunk/lib/Mail/SpamAssassin/
trunk/lib/Mail/SpamAssassin/Conf/ trunk/lib/Mail/SpamAssassin/...
Author: hege
Date: Mon Aug 5 06:47:21 2019
New Revision: 1864416
URL: http://svn.apache.org/viewvc?rev=1864416&view=rev
Log:
5% overall speedup from Check.pm regex //o, add IS_RULENAME constant
Modified:
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm
spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=1864416&r1=1864415&r2=1864416&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Conf/Parser.pm Mon Aug 5 06:47:21 2019
@@ -148,7 +148,6 @@ use re 'taint';
our @ISA = qw();
my $ARITH_EXPRESSION_LEXER = ARITH_EXPRESSION_LEXER;
-my $RULENAME_RE = RULENAME_RE;
my $META_RULES_MATCHING_RE = META_RULES_MATCHING_RE;
###########################################################################
@@ -1197,7 +1196,7 @@ sub add_test {
my $conf = $self->{conf};
# Don't allow invalid names ...
- if ($name !~ /^${RULENAME_RE}$/) {
+ if ($name !~ IS_RULENAME) {
$self->lint_warn("config: error: rule '$name' has invalid characters ".
"(not Alphanumeric + Underscore + starting with a non-digit)\n", $name);
return;
@@ -1362,7 +1361,7 @@ sub is_meta_valid {
# Go through each token in the meta rule
foreach my $token (@tokens) {
# If the token is a syntactically legal rule name, make it zero
- if ($token =~ /^${RULENAME_RE}\z/s) {
+ if ($token =~ IS_RULENAME) {
$meta .= "0 ";
}
# if it is a (decimal) number or a string of 1 or 2 punctuation
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm?rev=1864416&r1=1864415&r2=1864416&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Constants.pm Mon Aug 5 06:47:21 2019
@@ -43,7 +43,7 @@ BEGIN {
HARVEST_DNSBL_PRIORITY MBX_SEPARATOR
MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
- CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH RULENAME_RE
+ CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH RULENAME_RE IS_RULENAME
META_RULES_MATCHING_RE
);
@@ -405,6 +405,8 @@ use constant CHARSETS_LIKELY_TO_FP_AS_CA
# Allowed rulename format
use constant RULENAME_RE => qr([_a-zA-Z][_a-zA-Z0-9]{0,127});
+# Exact match
+use constant IS_RULENAME => qr/^${\(RULENAME_RE)}$/;
# meta function rules_matching(), takes argument RULENAME_RE with glob *? characters
use constant META_RULES_MATCHING_RE => qr/(?<!_)\brules_matching\(\s*([_a-zA-Z*?][_a-zA-Z0-9*?]{0,127})\s*\)/;
Modified: spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm?rev=1864416&r1=1864415&r2=1864416&view=diff
==============================================================================
--- spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm (original)
+++ spamassassin/branches/3.4/lib/Mail/SpamAssassin/Plugin/Check.pm Mon Aug 5 06:47:21 2019
@@ -29,7 +29,6 @@ use Mail::SpamAssassin::Constants qw(:sa
our @ISA = qw(Mail::SpamAssassin::Plugin);
my $ARITH_EXPRESSION_LEXER = ARITH_EXPRESSION_LEXER;
-my $RULENAME_RE = RULENAME_RE;
my $META_RULES_MATCHING_RE = META_RULES_MATCHING_RE;
# methods defined by the compiled ruleset; deleted in finish_tests()
@@ -579,7 +578,7 @@ sub do_meta_tests {
foreach my $token (@tokens) {
# ... rulename?
- if ($token =~ /^${RULENAME_RE}\z/) {
+ if ($token =~ IS_RULENAME) {
# the " || 0" formulation is to avoid "use of uninitialized value"
# warnings; this is better than adding a 0 to a hash for every
# rule referred to in a meta...
@@ -771,9 +770,9 @@ sub do_head_tests {
$whlimit = ' && $hits++ < '.$max if $max;
}
if ($matchg) {
- $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/g';
+ $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/go';
} else {
- $expr = '$hval '.$op.' $qrptr->{q{'.$rulename.'}}';
+ $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/o';
}
}
@@ -827,8 +826,8 @@ sub do_body_tests {
body_'.$loopid.': foreach my $l (@_) {
pos $l = 0;
'.$self->hash_line_for_rule($pms, $rulename).'
- while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') {
- $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
+ while ($l =~ /$qrptr->{q{'.$rulename.'}}/go'. ($max? ' && $hits++ < '.$max:'') .') {
+ $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
'. $self->hit_rule_plugin_code($pms, $rulename, 'body',
"last body_".$loopid) . '
}
@@ -842,8 +841,8 @@ sub do_body_tests {
$sub .= '
foreach my $l (@_) {
'.$self->hash_line_for_rule($pms, $rulename).'
- if ($l =~ $qrptr->{q{'.$rulename.'}}) {
- $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
+ if ($l =~ /$qrptr->{q{'.$rulename.'}}/o) {
+ $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
'. $self->hit_rule_plugin_code($pms, $rulename, "body", "last") .'
}
}
@@ -891,7 +890,7 @@ sub do_uri_tests {
uri_'.$loopid.': foreach my $l (@_) {
pos $l = 0;
'.$self->hash_line_for_rule($pms, $rulename).'
- while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') {
+ while ($l =~ /$qrptr->{q{'.$rulename.'}}/go'. ($max? ' && $hits++ < '.$max:'') .') {
$self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
'. $self->hit_rule_plugin_code($pms, $rulename, "uri",
"last uri_".$loopid) . '
@@ -903,7 +902,7 @@ sub do_uri_tests {
$sub .= '
foreach my $l (@_) {
'.$self->hash_line_for_rule($pms, $rulename).'
- if ($l =~ $qrptr->{q{'.$rulename.'}}) {
+ if ($l =~ /$qrptr->{q{'.$rulename.'}}/o) {
$self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
'. $self->hit_rule_plugin_code($pms, $rulename, "uri", "last") .'
}
@@ -954,7 +953,7 @@ sub do_rawbody_tests {
rawbody_'.$loopid.': foreach my $l (@_) {
pos $l = 0;
'.$self->hash_line_for_rule($pms, $rulename).'
- while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') {
+ while ($l =~ /$qrptr->{q{'.$rulename.'}}/go'. ($max? ' && $hits++ < '.$max:'') .') {
$self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
'. $self->hit_rule_plugin_code($pms, $rulename, "rawbody",
"last rawbody_".$loopid) . '
@@ -967,7 +966,7 @@ sub do_rawbody_tests {
$sub .= '
foreach my $l (@_) {
'.$self->hash_line_for_rule($pms, $rulename).'
- if ($l =~ $qrptr->{q{'.$rulename.'}}) {
+ if ($l =~ /$qrptr->{q{'.$rulename.'}}/o) {
$self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
'. $self->hit_rule_plugin_code($pms, $rulename, "rawbody", "last") . '
}
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=1864416&r1=1864415&r2=1864416&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm Mon Aug 5 06:47:21 2019
@@ -143,7 +143,6 @@ use re 'taint';
our @ISA = qw();
my $ARITH_EXPRESSION_LEXER = ARITH_EXPRESSION_LEXER;
-my $RULENAME_RE = RULENAME_RE;
my $META_RULES_MATCHING_RE = META_RULES_MATCHING_RE;
###########################################################################
@@ -1191,7 +1190,7 @@ sub add_test {
my $conf = $self->{conf};
# Don't allow invalid names ...
- if ($name !~ /^${RULENAME_RE}$/) {
+ if ($name !~ IS_RULENAME) {
$self->lint_warn("config: error: rule '$name' has invalid characters ".
"(not Alphanumeric + Underscore + starting with a non-digit)\n", $name);
return;
@@ -1369,7 +1368,7 @@ sub is_meta_valid {
# Go through each token in the meta rule
foreach my $token (@tokens) {
# If the token is a syntactically legal rule name, make it zero
- if ($token =~ /^${RULENAME_RE}\z/s) {
+ if ($token =~ IS_RULENAME) {
$meta .= "0 ";
}
# if it is a (decimal) number or a string of 1 or 2 punctuation
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm?rev=1864416&r1=1864415&r2=1864416&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Constants.pm Mon Aug 5 06:47:21 2019
@@ -44,7 +44,7 @@ BEGIN {
MBX_SEPARATOR
MAX_BODY_LINE_LENGTH MAX_HEADER_KEY_LENGTH MAX_HEADER_VALUE_LENGTH
MAX_HEADER_LENGTH ARITH_EXPRESSION_LEXER AI_TIME_UNKNOWN
- CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH RULENAME_RE
+ CHARSETS_LIKELY_TO_FP_AS_CAPS MAX_URI_LENGTH RULENAME_RE IS_RULENAME
META_RULES_MATCHING_RE
);
@@ -415,6 +415,8 @@ use constant CHARSETS_LIKELY_TO_FP_AS_CA
# Allowed rulename format
use constant RULENAME_RE => qr([_a-zA-Z][_a-zA-Z0-9]{0,127});
+# Exact match
+use constant IS_RULENAME => qr/^${\(RULENAME_RE)}$/;
# meta function rules_matching(), takes argument RULENAME_RE with glob *? characters
use constant META_RULES_MATCHING_RE => qr/(?<!_)\brules_matching\(\s*([_a-zA-Z*?][_a-zA-Z0-9*?]{0,127})\s*\)/;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm?rev=1864416&r1=1864415&r2=1864416&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/Check.pm Mon Aug 5 06:47:21 2019
@@ -29,7 +29,6 @@ use Mail::SpamAssassin::Constants qw(:sa
our @ISA = qw(Mail::SpamAssassin::Plugin);
my $ARITH_EXPRESSION_LEXER = ARITH_EXPRESSION_LEXER;
-my $RULENAME_RE = RULENAME_RE;
my $META_RULES_MATCHING_RE = META_RULES_MATCHING_RE;
# methods defined by the compiled ruleset; deleted in finish_tests()
@@ -579,7 +578,7 @@ sub do_meta_tests {
foreach my $token (@tokens) {
# ... rulename?
- if ($token =~ /^${RULENAME_RE}\z/) {
+ if ($token =~ IS_RULENAME) {
# the " || 0" formulation is to avoid "use of uninitialized value"
# warnings; this is better than adding a 0 to a hash for every
# rule referred to in a meta...
@@ -771,9 +770,9 @@ sub do_head_tests {
}
}
if ($matchg) {
- $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/g';
+ $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/go';
} else {
- $expr = '$hval '.$op.' $qrptr->{q{'.$rulename.'}}';
+ $expr = '$hval '.$op.' /$qrptr->{q{'.$rulename.'}}/o';
}
}
@@ -830,8 +829,8 @@ sub do_body_tests {
body_'.$loopid.': foreach my $l (@_) {
pos $l = 0;
'.$self->hash_line_for_rule($pms, $rulename).'
- while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') {
- $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
+ while ($l =~ /$qrptr->{q{'.$rulename.'}}/go'. ($max? ' && $hits++ < '.$max:'') .') {
+ $self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
'. $self->hit_rule_plugin_code($pms, $rulename, 'body',
"last body_".$loopid) . '
}
@@ -845,7 +844,7 @@ sub do_body_tests {
$sub .= '
foreach my $l (@_) {
'.$self->hash_line_for_rule($pms, $rulename).'
- if ($l =~ $qrptr->{q{'.$rulename.'}}) {
+ if ($l =~ /$qrptr->{q{'.$rulename.'}}/o) {
$self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
'. $self->hit_rule_plugin_code($pms, $rulename, "body", "last") .'
}
@@ -895,7 +894,7 @@ sub do_uri_tests {
uri_'.$loopid.': foreach my $l (@_) {
pos $l = 0;
'.$self->hash_line_for_rule($pms, $rulename).'
- while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') {
+ while ($l =~ /$qrptr->{q{'.$rulename.'}}/go'. ($max? ' && $hits++ < '.$max:'') .') {
$self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
'. $self->hit_rule_plugin_code($pms, $rulename, "uri",
"last uri_".$loopid) . '
@@ -907,7 +906,7 @@ sub do_uri_tests {
$sub .= '
foreach my $l (@_) {
'.$self->hash_line_for_rule($pms, $rulename).'
- if ($l =~ $qrptr->{q{'.$rulename.'}}) {
+ if ($l =~ /$qrptr->{q{'.$rulename.'}}/o) {
$self->got_hit(q{'.$rulename.'}, "URI: ", ruletype => "uri");
'. $self->hit_rule_plugin_code($pms, $rulename, "uri", "last") .'
}
@@ -955,7 +954,7 @@ sub do_rawbody_tests {
rawbody_'.$loopid.': foreach my $l (@_) {
pos $l = 0;
'.$self->hash_line_for_rule($pms, $rulename).'
- while ($l =~ /$qrptr->{q{'.$rulename.'}}/g'. ($max? ' && $hits++ < '.$max:'') .') {
+ while ($l =~ /$qrptr->{q{'.$rulename.'}}/go'. ($max? ' && $hits++ < '.$max:'') .') {
$self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
'. $self->hit_rule_plugin_code($pms, $rulename, "rawbody",
"last rawbody_".$loopid) . '
@@ -968,7 +967,7 @@ sub do_rawbody_tests {
$sub .= '
foreach my $l (@_) {
'.$self->hash_line_for_rule($pms, $rulename).'
- if ($l =~ $qrptr->{q{'.$rulename.'}}) {
+ if ($l =~ /$qrptr->{q{'.$rulename.'}}/o) {
$self->got_hit(q{'.$rulename.'}, "RAW: ", ruletype => "rawbody");
'. $self->hit_rule_plugin_code($pms, $rulename, "rawbody", "last") . '
}