You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2005/02/22 07:55:08 UTC
svn commit: r154808 - in spamassassin/trunk: MANIFEST
lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm rules/25_replace.cf
rules/70_testing.cf rules/init.pre
Author: quinlan
Date: Mon Feb 21 22:55:06 2005
New Revision: 154808
URL: http://svn.apache.org/viewcvs?view=rev&rev=154808
Log:
bug 4094: add plugin from Felix Bauer to make fuzzy matching easier
Added:
spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm
spamassassin/trunk/rules/25_replace.cf
Modified:
spamassassin/trunk/MANIFEST
spamassassin/trunk/rules/70_testing.cf
spamassassin/trunk/rules/init.pre
Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/MANIFEST?view=diff&r1=154807&r2=154808
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Mon Feb 21 22:55:06 2005
@@ -66,6 +66,7 @@
lib/Mail/SpamAssassin/Plugin/Pyzor.pm
lib/Mail/SpamAssassin/Plugin/Razor2.pm
lib/Mail/SpamAssassin/Plugin/RelayCountry.pm
+lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm
lib/Mail/SpamAssassin/Plugin/SPF.pm
lib/Mail/SpamAssassin/Plugin/SpamCop.pm
lib/Mail/SpamAssassin/Plugin/Test.pm
Added: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm?view=auto&rev=154808
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm (added)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm Mon Feb 21 22:55:06 2005
@@ -0,0 +1,181 @@
+# <@LICENSE>
+# Copyright 2004 Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# </...@LICENSE>
+
+=head1 NAME
+
+Mail::SpamAssassin::Plugin::ReplaceTags - tags for SpamAssassin rules
+
+The plugin allows rules to contain regular expression tags to be used in
+regular expression rules. The tags make it much easier to maintain
+complicated rules.
+
+=head1 SYNOPSIS
+
+ loadplugin Mail::SpamAssassin::Plugin::ReplaceTags
+
+ replace_start <
+ replace_end >
+
+ replace_tag A [a@]
+ replace_tag G [gk]
+ replace_tag I [il\|\!1y\?\xcc\xcd\xce\xcf\xec\xed\xee\xef]
+ replace_tag R [r3]
+ replace_tag V [v\\\/wu]
+ replace_tag SP [\s~_-]
+
+ body VIAGRA_OBFU /(?!viagra)<V>+<SP>*<I>+<SP>*<A>+<SP>*<G>+<SP>*<R>+<SP>*<A>+/i
+ describe VIAGRA_OBFU Attempt to obfuscate "viagra"
+
+ replace_rules VIAGRA_OBFU
+
+=cut
+
+package Mail::SpamAssassin::Plugin::ReplaceTags;
+
+# Make the main dbg() accessible in our package w/o an extra function
+*dbg=\&Mail::SpamAssassin::Plugin::dbg;
+*info=\&Mail::SpamAssassin::Plugin::info;
+
+use Mail::SpamAssassin;
+use Mail::SpamAssassin::Plugin;
+
+use strict;
+use warnings;
+use bytes;
+
+use vars qw(@ISA);
+@ISA = qw(Mail::SpamAssassin::Plugin);
+
+sub new {
+ my ($class, $mailsa) = @_;
+ $class = ref($class) || $class;
+
+ my $self = $class->SUPER::new($mailsa);
+
+ bless ($self, $class);
+
+ $self->set_config($mailsa->{conf});
+
+ return $self;
+}
+
+sub finish_parsing_end {
+ my ($self, $opts) = @_;
+
+ dbg("replacetags: replacing tags");
+
+ my $start = $opts->{conf}->{replace_start};
+ my $end = $opts->{conf}->{replace_end};
+
+ for my $type (qw|body_tests rawbody_tests head_tests full_tests uri_tests|) {
+ for my $priority (keys %{$opts->{conf}->{$type}}) {
+ while (my ($rule, $re) = each %{$opts->{conf}->{$type}->{$priority}}) {
+ # skip if not listed by replace_rules
+ next unless $opts->{conf}->{rules_to_replace}{$rule};
+
+ dbg("replacetags: replacing $rule: $re");
+ while ($re =~ m|$start(.+?)$end|g) {
+ my $tag_name = $1;
+
+ # if the tag exists, replace it with the corresponding phrase
+ if ($tag_name) {
+ my $replacement = $opts->{conf}->{replace_tags}->{$tag_name};
+ if ($replacement) {
+ $re =~ s|$start$tag_name$end|$replacement|g;
+ }
+ }
+ }
+ # do the actual replacement
+ $opts->{conf}->{$type}->{$priority}->{$rule} = $re;
+ dbg("replacetags: replaced $rule: $re");
+ }
+ }
+ }
+
+ dbg("replacetags: done replacing tags");
+}
+
+sub set_config {
+ my ($self, $conf) = @_;
+ my @cmds = ();
+
+=head1 CONFIGURATION
+
+=over 4
+
+=item replace_tag tagname expression
+
+Assign a valid regular expression to tagname.
+
+Note: It is not recommended to put quantifiers inside the tag, it's better to
+put them inside the rule itself for greater flexibility.
+
+=cut
+
+ push(@cmds, {
+ setting => 'replace_tag',
+ code => sub {
+ my ($self, $key, $value, $line) = @_;
+ if ($value =~ m|^(\S+)\s+(.*?)\s*$|) {
+ dbg("replacetags: replace_tag $1 -> $2");
+ $conf->{replace_tags}->{$1} = $2;
+ }
+ }
+ });
+
+=item replace_rules list_of_tests
+
+Specify a list of symbolic test names (separated by whitespace) of tests which
+should be modified using replacement tags. Only simple regular expression
+body, header, uri, full, rawbody tests are supported.
+
+=cut
+
+ push(@cmds, {
+ setting => 'replace_rules',
+ code => sub {
+ my ($self, $key, $value, $line) = @_;
+ foreach my $rule (split(' ', $value)) {
+ $conf->{rules_to_replace}->{$rule} = 1;
+ }
+ }
+ });
+
+=item replace_start string
+
+=item replace_end string
+
+String(s) which indicate the start and end of a tag inside a rule. Only tags
+enclosed by the start and end strings are found and replaced.
+
+=cut
+
+ push(@cmds, {
+ setting => 'replace_start',
+ default => '<',
+ type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
+ });
+
+ push(@cmds, {
+ setting => 'replace_end',
+ default => '>',
+ type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
+ });
+
+ $conf->{parser}->register_commands(\@cmds);
+}
+
+1;
Added: spamassassin/trunk/rules/25_replace.cf
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/25_replace.cf?view=auto&rev=154808
==============================================================================
--- spamassassin/trunk/rules/25_replace.cf (added)
+++ spamassassin/trunk/rules/25_replace.cf Mon Feb 21 22:55:06 2005
@@ -0,0 +1,59 @@
+# SpamAssassin - ReplaceTags configuration
+#
+# Please don't modify this file as your changes will be overwritten with
+# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
+# See 'perldoc Mail::SpamAssassin::Conf' for details.
+#
+# <@LICENSE>
+# Copyright 2004 Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# </...@LICENSE>
+#
+###########################################################################
+
+# Requires the Mail::SpamAssassin::Plugin::ReplaceTags plugin be loaded.
+
+ifplugin Mail::SpamAssassin::Plugin::ReplaceTags
+
+replace_tag A [gra\@\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xe4\xe3\xe2\xe0\xe1\xe2\xe3\xe4\xe5\xe60o]
+replace_tag B [b8]
+replace_tag C [kc\xc7\xe7@]
+replace_tag D [d\xd0]
+replace_tag E [e3\xc8\xc9\xca\xcb\xe8\xe9\xea\xeb\xa4]
+replace_tag F [f]
+replace_tag G [gk]
+replace_tag H [h]
+replace_tag I [il\|\!1y\?\xcc\xcd\xce\xcf\xec\xed\xee\xef]
+replace_tag J [j]
+replace_tag K [k]
+replace_tag L [il\|\!1\xa3]
+replace_tag M [m]
+replace_tag N [n\xd1\xf1]
+replace_tag O [go0\xd2\xd3\xd4\xd5\xd6\xd8\xf0\xf2\xf3\xf4\xf5\xf6\xf8]
+replace_tag P [p\xfek]
+replace_tag Q [q]
+replace_tag R [r]
+replace_tag S [sz\xa6\xa7]
+replace_tag T [t]
+replace_tag U [uv\xb5\xd9\xda\xdb\xdc\xfc\xfb\xfa\xf9\xfd]
+replace_tag V (?:[vu]|\\\/)
+replace_tag W [wv]
+replace_tag X [x\xd7]
+replace_tag Y [y\xff\xfd\xa5j]
+replace_tag Z [zs]
+replace_tag IMG (?:jpe?g|gif|png)
+replace_tag SP [\s\d\_\-\*\$\%\(\)\,\.\:\;\?\!\}\{\[\]\|\/\?\^\#\~\xa1\<B4>\`\'\+]
+replace_tag CUR [\$\xa5\xa3\xa4\xa2]
+
+endif # Mail::SpamAssassin::Plugin::ReplaceTags
Modified: spamassassin/trunk/rules/70_testing.cf
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/70_testing.cf?view=diff&r1=154807&r2=154808
==============================================================================
--- spamassassin/trunk/rules/70_testing.cf (original)
+++ spamassassin/trunk/rules/70_testing.cf Mon Feb 21 22:55:06 2005
@@ -428,3 +428,40 @@
# 0.058 0.0662 0.0000 1.000 0.40 0.01 T_AUTO_MARKET
body T_AUTO_MARKET /\bAutomated Marketing\b/i
+
+# use ReplaceTags
+ifplugin Mail::SpamAssassin::Plugin::ReplaceTags
+body T_INT_AFFORDABLE2 /(?!affordable)<A><F><F><O><R><D><A><B><L><E>/i
+body T_INT_ANXIETY2 /(?!anxiety)<A><N><X><I><E><T><Y>/i
+body T_INT_BILLION2 /(?!billion)<B><I><L><L><I><O><N>/i
+body T_INT_CELEBREX2 /(?!celebrex)<C><E><L><E><B><R><E><X>/i
+body T_INT_CIALIS2 /(?!cialis)<C><I><A><L><I><S>/i
+body T_INT_CREDIT2 /(?!credit)<C><R><E><D><I><T>/i
+body T_INT_ERECT2 /(?!erection)<E><R><E><C><T><I><O><N>/i
+body T_INT_FREE2 /(?!free)<F><R><E><E>/i
+body T_INT_HUNDREDS2 /(?!hundreds)<H><U><N><D><R><E><D><S>/i
+body T_INT_LEVITRA2 /(?!levitra)<L><E><V><I><T><R><A>/i
+body T_INT_MILF2 /(?!milf)<M><I><L><F>/i
+body T_INT_MILLION2 /(?!million)<M><I><L><L><I><O><N>/i
+body T_INT_MONEY2 /(?!money)<M><O><N><E><Y>/i
+body T_INT_MORTGAGE2 /(?!mortgage)<M><O><R><T><G><A><G><E>/i
+body T_INT_OBLIGATION2 /(?!obligation)<O><B><L><I><G><A><T><I><O><N>/i
+body T_INT_OFFERS2 /(?!offers)<O><F><F><E><R><S>/i
+body T_INT_PENIS2 /(?!penis)<P><E><N><I><S>/i
+body T_INT_PHARMACY2 /(?!pharmacy)<P><H><A><R><M><A><C><Y>/i
+body T_INT_PHENT2 /(?!phentermine)<P><H><E><N><T><E><R><M><I><N><E>/i
+body T_INT_PRESCRIPT2 /(?!prescription)<P><R><E><S><C><R><I><P><T><I><O><N>/i
+body T_INT_PROFIT2 /(?!profit)<P><R><O><F><I><T>/i
+body T_INT_REFINANCE2 /(?!refinance)<R><E><F><I><N><A><N><C><E>/i
+body T_INT_ROLEX2 /(?!rolex)<R><O><L><E><X>/i
+body T_INT_SOFTWARE2 /(?!software)<S><O><F><T><W><A><R><E>/i
+body T_INT_THOUSANDS2 /(?!thousands)<T><H><O><U><S><A><N><D><S>/i
+body T_INT_VALIUM2 /(?!valium)<V><A><L><I><U><M>/i
+body T_INT_VIAGRA2 /(?!viagra)<V><I><A><G><R><A>/i
+body T_INT_VICODIN2 /(?!vicodin)<V><I><C><O><D><I><N>/i
+body T_INT_VIOXX2 /(?!vioxx)<V><I><O><X><X>/i
+body T_INT_XANAX2 /(?!xanax)<X><A><N><A><X>/i
+body T_INT_PILLS2 /(?!pills)<P><I><L><L><S>/i
+body T_INT_PRICES2 /(?!prices)<P><R><I><C><E><S>/i
+replace_rules T_INT_AFFORDABLE2 T_INT_ANXIETY2 T_INT_BILLION2 T_INT_CELEBREX2 T_INT_CIALIS2 T_INT_CREDIT2 T_INT_ERECT2 T_INT_FREE2 T_INT_HUNDREDS2 T_INT_LEVITRA2 T_INT_MILF2 T_INT_MILLION2 T_INT_MONEY2 T_INT_MORTGAGE2 T_INT_OBLIGATION2 T_INT_OFFERS2 T_INT_PENIS2 T_INT_PHARMACY2 T_INT_PHENT2 T_INT_PRESCRIPT2 T_INT_PROFIT2 T_INT_REFINANCE2 T_INT_ROLEX2 T_INT_SOFTWARE2 T_INT_THOUSANDS2 T_INT_VALIUM2 T_INT_VIAGRA2 T_INT_VICODIN2 T_INT_VIOXX2 T_INT_XANAX2 T_INT_PILLS2 T_INT_PRICES2
+endif
Modified: spamassassin/trunk/rules/init.pre
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/init.pre?view=diff&r1=154807&r2=154808
==============================================================================
--- spamassassin/trunk/rules/init.pre (original)
+++ spamassassin/trunk/rules/init.pre Mon Feb 21 22:55:06 2005
@@ -64,3 +64,7 @@
#
loadplugin Mail::SpamAssassin::Plugin::MIMEHeader
+# ReplaceTags
+#
+loadplugin Mail::SpamAssassin::Plugin::ReplaceTags
+