You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2004/05/28 09:06:56 UTC

svn commit: rev 20529 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf

Author: jm
Date: Fri May 28 00:06:55 2004
New Revision: 20529

Added:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
Log:
Conf reorg continued; broke out parsing code into a separate parsing class.  also fixed some bugs that 'make test' hadn't shown up; however 'make test' is failing in t/spamd_allow_user_rules.t due to spamc coredumping when the -u switch is used ;)

Added: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm
==============================================================================
--- (empty file)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf/Parser.pm	Fri May 28 00:06:55 2004
@@ -0,0 +1,569 @@
+# <@LICENSE>
+# Copyright 2004 Apache Software Foundation
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# </...@LICENSE>
+
+=head1 NAME
+
+Mail::SpamAssassin::Conf::Parser - parse SpamAssassin configuration
+
+=head1 SYNOPSIS
+
+  (see Mail::SpamAssassin)
+  
+
+=head1 DESCRIPTION
+
+Mail::SpamAssassin is a module to identify spam using text analysis and
+several internet-based realtime blacklists.
+
+This class is used internally by SpamAssassin to parse its configuration files.
+Please refer to the C<Mail::SpamAssassin> documentation for public interfaces.
+
+=head1 METHODS
+
+=over 4
+
+=cut
+
+package Mail::SpamAssassin::Conf::Parser;
+
+use strict;
+use bytes;
+use Carp;
+
+use vars qw{
+  @ISA
+};
+
+@ISA = qw();
+
+###########################################################################
+
+sub new {
+  my $class = shift;
+  $class = ref($class) || $class;
+  my ($conf) = @_;
+
+  my $self = {
+    'conf'      => $conf
+  };
+
+  bless ($self, $class);
+  $self;
+}
+
+###########################################################################
+
+sub set_defaults_from_command_list {
+  my ($self) = @_;
+  my $conf = $self->{conf};
+  foreach my $cmd (@{$conf->{registered_commands}}) {
+    # note! exists, not defined -- we want to be able to set
+    # "undef" default values.
+    if (exists($cmd->{default})) {
+      $conf->{$cmd->{setting}} = $cmd->{default};
+    }
+  }
+}
+
+sub build_command_luts {
+  my ($self) = @_;
+
+  return if $self->{already_built_config_lookup};
+  $self->{already_built_config_lookup} = 1;
+
+  $self->{command_luts} = { };
+  $self->{command_luts}->{frequent} = { };
+  $self->{command_luts}->{remaining} = { };
+  my $conf = $self->{conf};
+
+  my $set;
+  foreach my $cmd (@{$conf->{registered_commands}})
+  {
+    # first off, decide what set this is in.
+    if ($cmd->{is_frequent}) { $set = 'frequent'; }
+    else { $set = 'remaining'; }
+
+    # next, its priority (used to ensure frequently-used params
+    # are parsed first)
+    my $cmdname = $cmd->{command} || $cmd->{setting};
+    foreach my $name ($cmdname, @{$cmd->{aliases}}) {
+      $self->{command_luts}->{$set}->{$name} = $cmd;
+    }
+  }
+}
+
+###########################################################################
+
+sub parse {
+  my ($self, undef, $scoresonly) = @_; # leave $rules in $_[1]
+
+  $self->{scoresonly} = $scoresonly;
+  my $conf = $self->{conf};
+
+  # Language selection:
+  # See http://www.gnu.org/manual/glibc-2.2.5/html_node/Locale-Categories.html
+  # and http://www.gnu.org/manual/glibc-2.2.5/html_node/Using-gettextized-software.html
+  my $lang = $ENV{'LANGUAGE'}; # LANGUAGE has the highest precedence but has a
+  if ($lang) {                 # special format: The user may specify more than
+    $lang =~ s/:.*$//;         # one language here, colon separated. We use the
+  }                            # first one only (lazy bums we are :o)
+  $lang ||= $ENV{'LC_ALL'};
+  $lang ||= $ENV{'LC_MESSAGES'};
+  $lang ||= $ENV{'LANG'};
+  $lang ||= 'C';               # Nothing set means C/POSIX
+
+  if ($lang =~ /^(C|POSIX)$/) {
+    $lang = 'en_US';           # Our default language
+  } else {
+    $lang =~ s/[@.+,].*$//;    # Strip codeset, modifier/audience, etc.
+  }                            # (eg. .utf8 or @euro)
+
+  # build and get fast-access handles on the command lookup tables
+  $self->build_command_luts();
+  my $lut_frequent = $self->{command_luts}->{frequent};
+  my $lut_remaining = $self->{command_luts}->{remaining};
+
+  $self->{currentfile} = '(no file)';
+  my $skip_parsing = 0;
+  my @curfile_stack = ();
+  my @if_stack = ();
+  my @conf_lines = split (/\n/, $_[1]);
+  my $line;
+
+  while (defined ($line = shift @conf_lines)) {
+    $line =~ s/(?<!\\)#.*$//; # remove comments
+    $line =~ s/^\s+|\s+$//g;  # remove leading and trailing spaces (including newlines)
+    next unless($line); # skip empty lines
+
+    # handle i18n
+    if ($line =~ s/^lang\s+(\S+)\s+//) { next if ($lang !~ /^$1/i); }
+
+    my($key, $value) = split(/\s+/, $line, 2);
+    $key = lc $key;
+    # convert all dashes in setting name to underscores.
+    $key =~ s/-/_/g;
+
+    # Do a better job untainting this info ...
+    $value = '' unless defined($value);
+    $value =~ /^(.*)$/;
+    $value = $1;
+
+    # File/line number assertions
+    if ($key eq 'file') {
+      if ($value =~ /^start\s+(.+)$/) {
+        push (@curfile_stack, $self->{currentfile});
+        $self->{currentfile} = $1;
+        next;
+      }
+
+      if ($value =~ /^end\s/) {
+        if (scalar @if_stack > 0) {
+          my $cond = pop @if_stack;
+
+          if ($cond->{type} eq 'ifplugin') {
+            warn "unclosed 'if' in ".
+                  $self->{currentfile}.": ifplugin ".$cond->{plugin}."\n";
+          } else {
+            die "unknown 'if' type: ".$cond->{type}."\n";
+          }
+
+          $conf->{errors}++;
+          @if_stack = ();
+        }
+        $skip_parsing = 0;
+
+        my $curfile = pop @curfile_stack;
+        if (defined $curfile) {
+          $self->{currentfile} = $curfile;
+        } else {
+          $self->{currentfile} = '(no file)';
+        }
+        next;
+      }
+    }
+
+    # now handle the commands.
+    if ($key eq 'include') {
+      $value = $self->fix_path_relative_to_current_file($value);
+      my $text = $conf->{main}->read_cf($value, 'included file');
+      unshift (@conf_lines, split (/\n/, $text));
+      next;
+    }
+
+    if ($key eq 'ifplugin') {
+      push (@if_stack, {
+          type => 'ifplugin',
+          plugin => $value,
+          skip_parsing => $skip_parsing
+        });
+
+      if ($conf->{plugins_loaded}->{$value}) {
+        # leave $skip_parsing as-is; we may not be parsing anyway in this block.
+        # in other words, support nested 'if's and 'require_version's
+      } else {
+        $skip_parsing = 1;
+      }
+      next;
+    }
+
+    # and the endif statement:
+    if ($key eq 'endif') {
+      my $lastcond = pop @if_stack;
+      $skip_parsing = $lastcond->{skip_parsing};
+      next;
+    }
+
+    if ($key eq 'require_version') {
+      # if it wasn't replaced during install, assume current version ...
+      next if ($value eq "\@\@VERSION\@\@");
+
+      my $ver = $Mail::SpamAssassin::VERSION;
+
+      # if we want to allow "require_version 3.0" be good for all
+      # "3.0.x" versions:
+      ## make sure it's a numeric value
+      #$value += 0.0;
+      ## convert 3.000000 -> 3.0, stay backwards compatible ...
+      #$ver =~ s/^(\d+)\.(\d{1,3}).*$/sprintf "%d.%d", $1, $2/e;
+      #$value =~ s/^(\d+)\.(\d{1,3}).*$/sprintf "%d.%d", $1, $2/e;
+
+      if ($ver ne $value) {
+        warn "configuration file \"$self->{currentfile}\" requires version ".
+                "$value of SpamAssassin, but this is code version ".
+                "$ver. Maybe you need to use ".
+                "the -C switch, or remove the old config files? ".
+                "Skipping this file";
+        $skip_parsing = 1;
+        $conf->{errors}++;
+      }
+      next;
+    }
+
+    # preprocessing? skip all other commands
+    next if $skip_parsing;
+
+    my $cmd = $lut_frequent->{$key}; # check the frequent command set
+    if (!$cmd) {
+      $cmd = $lut_remaining->{$key}; # no? try the rest
+    }
+
+    # we've either fallen through with no match, in which case this
+    # if() will fail, or we have a match.
+    if ($cmd) {
+      if ($self->{scoresonly}) {              # reading user config from spamd
+        if ($cmd->{is_priv} && !$conf->{allow_user_rules}) {
+          dbg ("config: not parsing, 'allow_user_rules' is 0: $line");
+          goto failed_line;
+        }
+        if ($cmd->{is_admin}) {
+          dbg ("config: not parsing, administrator setting: $line");
+          goto failed_line;
+        }
+      }
+
+      if (!$cmd->{code}) {
+        $self->setup_default_code_cb ($cmd);
+      }
+
+      my $ret = &{$cmd->{code}} ($conf, $cmd->{setting}, $value, $line);
+
+      if ($ret && $ret eq $Mail::SpamAssassin::Conf::INVALID_VALUE) {
+        warn "invalid value for \"$key\": $value\n";
+        $conf->{errors}++;
+      } else {
+        next;
+      }
+    }
+
+failed_line:
+
+    # last ditch: try to see if the plugins know what to do with it
+    if ($conf->{main}->call_plugins ("parse_config", {
+                key => $key,
+                value => $value,
+                line => $line,
+                conf => $conf,
+                user_config => $self->{scoresonly}
+            }))
+    {
+      # a plugin dealt with it successfully.
+      next;
+    }
+
+###########################################################################
+
+    my $msg = "Failed to parse line in SpamAssassin configuration, ".
+                        "skipping: $line";
+
+    if ($conf->{lint_rules}) {
+      warn $msg."\n";
+    } else {
+      dbg ($msg);
+    }
+    $conf->{errors}++;
+  }
+
+  $self->lint_check();
+  $self->set_default_scores();
+
+  delete $self->{scoresonly};
+}
+
+# Let's do some linting here ...
+# This is called from _parse(), BTW, so we can check for $conf->{tests}
+# easily before finish_parsing() is called and deletes it.
+#
+sub lint_check {
+  my ($self) = @_;
+  my $conf = $self->{conf};
+  my ($k, $v);
+
+  if ($conf->{lint_rules})
+  {
+    # Check for description and score issues in lint fashion
+    while ( ($k,$v) = each %{$conf->{descriptions}} ) {
+      if (length($v) > 50) {
+        warn "warning: description for $k is over 50 chars\n";
+        $conf->{errors}++;
+      }
+      if (!exists $conf->{tests}->{$k}) {
+        warn "warning: description exists for non-existent rule $k\n";
+        $conf->{errors}++;
+      }
+    }
+
+    while ( my($sk) = each %{$conf->{scores}} ) {
+      if (!exists $conf->{tests}->{$sk}) {
+        warn "warning: score set for non-existent rule $sk\n";
+        $conf->{errors}++;
+      }
+    }
+  }
+}
+
+# we should set a default score for all valid rules...  Do this here
+# instead of add_test because mostly 'score' occurs after the rule is
+# specified, so why set the scores to default, then set them again at
+# 'score'?
+# 
+sub set_default_scores {
+  my ($self) = @_;
+  my $conf = $self->{conf};
+  my ($k, $v);
+
+  while ( ($k,$v) = each %{$conf->{tests}} ) {
+    if ($conf->{lint_rules}) {
+      if (length($k) > 22 && $k !~ /^__/ && $k !~ /^T_/) {
+        warn "warning: rule '$k' is over 22 chars\n";
+        $conf->{errors}++;
+      }
+    }
+
+    if ( ! exists $conf->{scores}->{$k} ) {
+      # T_ rules (in a testing probationary period) get low, low scores
+      my $set_score = ($k =~/^T_/) ? 0.01 : 1.0;
+
+      $set_score = -$set_score if ( $conf->{tflags}->{$k} =~ /\bnice\b/ );
+      for my $index (0..3) {
+        $conf->{scoreset}->[$index]->{$k} = $set_score;
+      }
+    }
+  }
+}
+
+###########################################################################
+
+sub setup_default_code_cb {
+  my ($self, $cmd) = @_;
+  my $type = $cmd->{type};
+
+  if ($type == $Mail::SpamAssassin::Conf::CONF_TYPE_STRING) {
+    $cmd->{code} = \&set_string_value;
+  }
+  elsif ($type == $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL) {
+    $cmd->{code} = \&set_bool_value;
+  }
+  elsif ($type == $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC) {
+    $cmd->{code} = \&set_numeric_value;
+  }
+  elsif ($type == $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE) {
+    $cmd->{code} = \&set_hash_key_value;
+  }
+  elsif ($type == $Mail::SpamAssassin::Conf::CONF_TYPE_ADDRLIST) {
+    $cmd->{code} = \&set_addrlist_value;
+  }
+  elsif ($type == $Mail::SpamAssassin::Conf::CONF_TYPE_TEMPLATE) {
+    $cmd->{code} = \&set_template_append;
+  }
+  else {
+    die "unknown conf type $type!";
+  }
+}
+
+sub set_numeric_value {
+  my ($conf, $key, $value, $line) = @_;
+  $conf->{$key} = $value+0.0;
+}
+
+sub set_bool_value {
+  my ($conf, $key, $value, $line) = @_;
+  $conf->{$key} = $value+0;
+}
+
+sub set_string_value {
+  my ($conf, $key, $value, $line) = @_;
+  $conf->{$key} = $value;
+}
+
+sub set_hash_key_value {
+  my ($conf, $key, $value, $line) = @_;
+  my($k,$v) = split(/\s+/, $value, 2);
+  $conf->{$key}->{$k} = $v;
+}
+
+sub set_addrlist_value {
+  my ($conf, $key, $value, $line) = @_;
+  $conf->{parser}->add_to_addrlist ($key, split (' ', $value));
+}
+
+sub remove_addrlist_value {
+  my ($conf, $key, $value, $line) = @_;
+  $conf->{parser}->remove_from_addrlist ($key, split (' ', $value));
+}
+
+sub set_template_append {
+  my ($conf, $key, $value, $line) = @_;
+  if ( $value =~ /^"(.*?)"$/ ) { $value = $1; }
+  $conf->{$key} .= $value."\n";
+}
+
+sub set_template_clear {
+  my ($conf, $key, $value, $line) = @_;
+  $conf->{$key} = '';
+}
+
+###########################################################################
+
+sub add_test {
+  my ($self, $name, $text, $type) = @_;
+  my $conf = $self->{conf};
+
+  # Don't allow invalid names ...
+  if ($name !~ /^\w+$/) {
+    warn "error: rule '$name' has invalid characters (not Alphanumeric + Underscore)\n";
+    $conf->{errors}++;
+    return;
+  }
+
+  $conf->{tests}->{$name} = $text;
+  $conf->{test_types}->{$name} = $type;
+  $conf->{tflags}->{$name} ||= '';
+  $conf->{priority}->{$name} ||= 0;
+  $conf->{source_file}->{$name} = $self->{currentfile};
+
+  if ($self->{scoresonly}) {
+    $conf->{user_rules_to_compile}->{$type} = 1;
+  }
+}
+
+sub add_regression_test {
+  my ($self, $name, $ok_or_fail, $string) = @_;
+  my $conf = $self->{conf};
+
+  if ($conf->{regression_tests}->{$name}) {
+    push @{$conf->{regression_tests}->{$name}}, [$ok_or_fail, $string];
+  }
+  else {
+    # initialize the array, and create one element
+    $conf->{regression_tests}->{$name} = [ [$ok_or_fail, $string] ];
+  }
+}
+
+###########################################################################
+
+sub add_to_addrlist {
+  my ($self, $singlelist, @addrs) = @_;
+  my $conf = $self->{conf};
+
+  foreach my $addr (@addrs) {
+    $addr = lc $addr;
+    my $re = $addr;
+    $re =~ s/[\000\\\(]/_/gs;			# paranoia
+    $re =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g;	# escape any possible metachars
+    $re =~ tr/?/./;				# "?" -> "."
+    $re =~ s/\*/\.\*/g;				# "*" -> "any string"
+    $conf->{$singlelist}->{$addr} = qr/^${re}$/;
+  }
+}
+
+sub add_to_addrlist_rcvd {
+  my ($self, $listname, $addr, $domain) = @_;
+  my $conf = $self->{conf};
+
+  $addr = lc $addr;
+  if ($conf->{$listname}->{$addr}) {
+    push @{$conf->{$listname}->{$addr}{domain}}, $domain;
+  }
+  else {
+    my $re = $addr;
+    $re =~ s/[\000\\\(]/_/gs;			# paranoia
+    $re =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g;	# escape any possible metachars
+    $re =~ tr/?/./;				# "?" -> "."
+    $re =~ s/\*/\.\*/g;				# "*" -> "any string"
+    $conf->{$listname}->{$addr}{re} = qr/^${re}$/;
+    $conf->{$listname}->{$addr}{domain} = [ $domain ];
+  }
+}
+
+sub remove_from_addrlist {
+  my ($self, $singlelist, @addrs) = @_;
+  my $conf = $self->{conf};
+
+  foreach my $addr (@addrs) {
+    delete($conf->{$singlelist}->{$addr});
+  }
+}
+
+sub remove_from_addrlist_rcvd {
+  my ($self, $listname, @addrs) = @_;
+  my $conf = $self->{conf};
+
+  foreach my $addr (@addrs) {
+    delete($conf->{$listname}->{$addr});
+  }
+}
+
+###########################################################################
+
+sub fix_path_relative_to_current_file {
+  my ($self, $path) = @_;
+
+  if (!File::Spec->file_name_is_absolute ($path)) {
+    my ($vol, $dirs, $file) = File::Spec->splitpath ($self->{currentfile});
+    $path = File::Spec->catpath ($vol, $dirs, $path);
+    dbg ("plugin: fixed relative path: $path");
+  }
+  return $path;
+}
+
+###########################################################################
+
+sub dbg { Mail::SpamAssassin::dbg (@_); }
+sub sa_die { Mail::SpamAssassin::sa_die (@_); }
+
+###########################################################################
+
+1;