You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2018/10/07 08:08:06 UTC
svn commit: r1843051 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Conf.pm
Conf/Parser.pm
Author: hege
Date: Sun Oct 7 08:08:06 2018
New Revision: 1843051
URL: http://svn.apache.org/viewvc?rev=1843051&view=rev
Log:
Deprecate ancient TieOneStringHash usage, it's an absolute performance pig. Deprecate pointless is_frequent. Some generic Parser.pm optimizations.
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?rev=1843051&r1=1843050&r2=1843051&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Sun Oct 7 08:08:06 2018
@@ -86,7 +86,6 @@ use Mail::SpamAssassin::NetSet;
use Mail::SpamAssassin::Constants qw(:sa :ip);
use Mail::SpamAssassin::Conf::Parser;
use Mail::SpamAssassin::Logger;
-use Mail::SpamAssassin::Util::TieOneStringHash;
use Mail::SpamAssassin::Util qw(untaint_var idn_to_ascii);
use File::Spec;
@@ -216,7 +215,6 @@ it from running.
push (@cmds, {
setting => 'score',
- is_frequent => 1,
code => sub {
my ($self, $key, $value, $line) = @_;
my($rule, @scores) = split(/\s+/, $value);
@@ -2607,7 +2605,6 @@ length to no more than 50 characters.
push (@cmds, {
command => 'describe',
setting => 'descriptions',
- is_frequent => 1,
type => $CONF_TYPE_HASH_KEY_VALUE,
});
@@ -3079,7 +3076,6 @@ name.
push (@cmds, {
setting => 'header',
- is_frequent => 1,
is_priv => 1,
code => sub {
my ($self, $key, $value, $line) = @_;
@@ -3141,7 +3137,6 @@ Define a body eval test. See above.
push (@cmds, {
setting => 'body',
- is_frequent => 1,
is_priv => 1,
code => sub {
my ($self, $key, $value, $line) = @_;
@@ -3216,7 +3211,6 @@ Define a raw-body eval test. See above.
push (@cmds, {
setting => 'rawbody',
- is_frequent => 1,
is_priv => 1,
code => sub {
my ($self, $key, $value, $line) = @_;
@@ -3303,7 +3297,6 @@ ignore these for scoring.
push (@cmds, {
setting => 'meta',
- is_frequent => 1,
is_priv => 1,
code => sub {
my ($self, $key, $value, $line) = @_;
@@ -3456,7 +3449,6 @@ it is documented there.
push (@cmds, {
setting => 'tflags',
- is_frequent => 1,
is_priv => 1,
type => $CONF_TYPE_HASH_KEY_VALUE,
});
@@ -4875,8 +4867,12 @@ sub new {
# keep descriptions in a slow but space-efficient single-string
# data structure
- tie %{$self->{descriptions}}, 'Mail::SpamAssassin::Util::TieOneStringHash'
- or warn "tie failed";
+ # NOTE: Deprecated usage of TieOneStringHash as of 10/2018, it's an
+ # absolute pig, doubling config parsing time, while benchmarks indicate
+ # no difference in resident memory size!
+ $self->{descriptions} = { };
+ #tie %{$self->{descriptions}}, 'Mail::SpamAssassin::Util::TieOneStringHash'
+ # or warn "tie failed";
# after parsing, tests are refiled into these hashes for each test type.
# this allows e.g. a full-text test to be rewritten as a body test in
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm?rev=1843051&r1=1843050&r2=1843051&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm Sun Oct 7 08:08:06 2018
@@ -123,11 +123,6 @@ Set to 1 if this setting can only be set
from spamd. (All settings can be used by local programs run directly by the
user.)
-=item is_frequent
-
-Set to 1 if this value occurs frequently in the config. this means it's looked
-up first for speed.
-
=back
=cut
@@ -159,8 +154,6 @@ sub new {
};
$self->{command_luts} = { };
- $self->{command_luts}->{frequent} = { };
- $self->{command_luts}->{remaining} = { };
bless ($self, $class);
$self;
@@ -194,20 +187,13 @@ sub build_command_luts {
my $conf = $self->{conf};
- my $set;
foreach my $cmd (@{$arrref}) {
- # first off, decide what set this is in.
- if ($cmd->{is_frequent}) { $set = 'frequent'; }
- else { $set = 'remaining'; }
-
- # next, its priority (used to ensure frequently-used params
- # are parsed first)
my $cmdname = $cmd->{command} || $cmd->{setting};
- $self->{command_luts}->{$set}->{$cmdname} = $cmd;
+ $self->{command_luts}->{$cmdname} = $cmd;
if ($cmd->{aliases} && scalar @{$cmd->{aliases}} > 0) {
foreach my $name (@{$cmd->{aliases}}) {
- $self->{command_luts}->{$set}->{$name} = $cmd;
+ $self->{command_luts}->{$name} = $cmd;
}
}
}
@@ -240,8 +226,7 @@ sub parse {
} # (eg. .utf8 or @euro)
# get fast-access handles on the command lookup tables
- my $lut_frequent = $self->{command_luts}->{frequent};
- my $lut_remaining = $self->{command_luts}->{remaining};
+ my $lut = $self->{command_luts};
my %migrated_keys = map { $_ => 1 }
@Mail::SpamAssassin::Conf::MIGRATED_SETTINGS;
@@ -296,8 +281,28 @@ sub parse {
my $parse_error; # undef by default, may be overridden
- # File/line number assertions
- if ($key eq 'file') {
+ # $key if/elsif blocks sorted by most commonly used
+ if ($key eq 'endif') {
+ my $lastcond = pop @if_stack;
+ if (!defined $lastcond) {
+ $parse_error = "config: found endif without matching conditional";
+ goto failed_line;
+ }
+
+ $skip_parsing = $lastcond->{skip_parsing};
+ next;
+ }
+ elsif ($key eq 'ifplugin') {
+ $self->handle_conditional ($key, "plugin ($value)",
+ \@if_stack, \$skip_parsing);
+ next;
+ }
+ elsif ($key eq 'if') {
+ $self->handle_conditional ($key, $value,
+ \@if_stack, \$skip_parsing);
+ next;
+ }
+ elsif ($key eq 'file') {
if ($value =~ /^start\s+(.+)$/) {
push (@curfile_stack, $self->{currentfile});
$self->{currentfile} = $1;
@@ -335,27 +340,12 @@ sub parse {
next;
}
}
-
- # now handle the commands.
elsif ($key eq 'include') {
$value = $self->fix_path_relative_to_current_file($value);
my $text = $conf->{main}->read_cf($value, 'included file');
unshift (@conf_lines, split (/\n/, $text));
next;
}
-
- elsif ($key eq 'ifplugin') {
- $self->handle_conditional ($key, "plugin ($value)",
- \@if_stack, \$skip_parsing);
- next;
- }
-
- elsif ($key eq 'if') {
- $self->handle_conditional ($key, $value,
- \@if_stack, \$skip_parsing);
- next;
- }
-
elsif ($key eq 'else') {
# TODO: if/else/else won't get flagged here :(
if (!@if_stack) {
@@ -367,18 +357,6 @@ sub parse {
next;
}
- # and the endif statement:
- elsif ($key eq 'endif') {
- my $lastcond = pop @if_stack;
- if (!defined $lastcond) {
- $parse_error = "config: found endif without matching conditional";
- goto failed_line;
- }
-
- $skip_parsing = $lastcond->{skip_parsing};
- next;
- }
-
# preprocessing? skip all other commands
next if $skip_parsing;
@@ -409,10 +387,7 @@ sub parse {
next;
}
- my $cmd = $lut_frequent->{$key}; # check the frequent command set
- if (!$cmd) {
- $cmd = $lut_remaining->{$key}; # no? try the rest
- }
+ my $cmd = $lut->{$key};
# we've either fallen through with no match, in which case this
# if() will fail, or we have a match.
@@ -435,22 +410,23 @@ sub parse {
}
my $ret = &{$cmd->{code}} ($conf, $cmd->{setting}, $value, $line);
+ next if !$ret;
- if ($ret && $ret eq $Mail::SpamAssassin::Conf::INVALID_VALUE)
+ if ($ret eq $Mail::SpamAssassin::Conf::INVALID_VALUE)
{
$parse_error = "config: SpamAssassin failed to parse line, ".
"\"$value\" is not valid for \"$key\", ".
"skipping: $line";
goto failed_line;
}
- elsif ($ret && $ret eq $Mail::SpamAssassin::Conf::INVALID_HEADER_FIELD_NAME)
+ elsif ($ret eq $Mail::SpamAssassin::Conf::INVALID_HEADER_FIELD_NAME)
{
$parse_error = "config: SpamAssassin failed to parse line, ".
"it does not specify a valid header field name, ".
"skipping: $line";
goto failed_line;
}
- elsif ($ret && $ret eq $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE)
+ elsif ($ret eq $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE)
{
$parse_error = "config: SpamAssassin failed to parse line, ".
"no value provided for \"$key\", ".
@@ -494,6 +470,7 @@ failed_line:
}
delete $self->{if_stack};
+ delete $self->{command_luts};
$self->lint_check();
$self->set_default_scores();
@@ -512,7 +489,7 @@ sub handle_conditional {
my $eval = '';
my $bad = 0;
foreach my $token (@tokens) {
- if ($token =~ /^(?:\W+|[+-]?\d+(?:\.\d+)?)$/) {
+ if ($token eq '(' || $token eq ')' || $token eq '!') {
# using tainted subr. argument may taint the whole expression, avoid
my $u = untaint_var($token);
$eval .= $u . " ";
@@ -521,6 +498,10 @@ sub handle_conditional {
# replace with a method call
$eval .= '$self->cond_clause_plugin_loaded';
}
+ elsif ($token =~ /^Mail::SpamAssassin::\w[\w\:]+$/) { # class name
+ my $u = untaint_var($token);
+ $eval .= '"' . $u . '" ';
+ }
elsif ($token eq 'can') {
# replace with a method call
$eval .= '$self->cond_clause_can';
@@ -535,6 +516,11 @@ sub handle_conditional {
elsif ($token eq 'perl_version') {
$eval .= $]." ";
}
+ elsif ($token =~ /^(?:\W+|[+-]?\d+(?:\.\d+)?)$/) {
+ # using tainted subr. argument may taint the whole expression, avoid
+ my $u = untaint_var($token);
+ $eval .= $u . " ";
+ }
elsif ($token =~ /^\w[\w\:]+$/) { # class name
my $u = untaint_var($token);
$eval .= '"' . $u . '" ';