You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2005/12/14 01:41:05 UTC
svn commit: r356657 - in /spamassassin/trunk: MANIFEST MANIFEST.SKIP
build/listpromotable build/mkrules masses/parse-rules-for-masses
rules/20_html_tests.cf rules/active.list
Author: jm
Date: Tue Dec 13 16:41:01 2005
New Revision: 356657
URL: http://svn.apache.org/viewcvs?rev=356657&view=rev
Log:
build/mkrules now compiles 'good enough' rules to rules/72_active.cf, instead of copying all core rules into rules. it also picks rules from sandboxes as part of this. Not-promotable rules are left in rules/70_sandbox.cf. It also follows meta dependencies correctly to ensure that each rule is in a consistent state even without the 72_sandbox.cf file.
Modified:
spamassassin/trunk/MANIFEST
spamassassin/trunk/MANIFEST.SKIP
spamassassin/trunk/build/listpromotable
spamassassin/trunk/build/mkrules
spamassassin/trunk/masses/parse-rules-for-masses
spamassassin/trunk/rules/20_html_tests.cf
spamassassin/trunk/rules/active.list
Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/MANIFEST?rev=356657&r1=356656&r2=356657&view=diff
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Tue Dec 13 16:41:01 2005
@@ -417,48 +417,27 @@
tools/triplets.pl
build/mkrules
rules/10_default_prefs.cf
-rules/20_advance_fee.cf
-rules/20_body_tests.cf
-rules/20_compensate.cf
rules/20_dnsbl_tests.cf
-rules/20_drugs.cf
-rules/20_fake_helo_tests.cf
-rules/20_head_tests.cf
rules/20_html_tests.cf
-rules/20_meta_tests.cf
rules/20_net_tests.cf
-rules/20_phrases.cf
-rules/20_porn.cf
-rules/20_ratware.cf
-rules/20_uri_tests.cf
rules/23_bayes.cf
rules/25_accessdb.cf
rules/25_antivirus.cf
-rules/25_body_tests_es.cf
-rules/25_body_tests_pl.cf
rules/25_dcc.cf
rules/25_domainkeys.cf
rules/25_hashcash.cf
rules/25_pyzor.cf
rules/25_razor2.cf
-rules/25_replace.cf
rules/25_spf.cf
rules/25_textcat.cf
rules/25_uribl.cf
-rules/30_text_de.cf
-rules/30_text_fr.cf
-rules/30_text_it.cf
-rules/30_text_nl.cf
-rules/30_text_pl.cf
-rules/30_text_pt_br.cf
-rules/50_scores.cf
rules/60_awl.cf
rules/60_whitelist.cf
-rules/60_whitelist_spf.cf
rules/60_whitelist_subject.cf
rules/70_broken_rules.cf
rules/70_sandbox.cf
rules/70_uribl.cf
+rules/72_active.cf
rules/sandbox-felicity.pm
rules/STATISTICS-set0.txt
rules/STATISTICS-set1.txt
@@ -472,3 +451,5 @@
rules/triplets.txt
rules/user_prefs.template
rules/v310.pre
+build/listpromotable
+rules/active.list
Modified: spamassassin/trunk/MANIFEST.SKIP
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/MANIFEST.SKIP?rev=356657&r1=356656&r2=356657&view=diff
==============================================================================
--- spamassassin/trunk/MANIFEST.SKIP (original)
+++ spamassassin/trunk/MANIFEST.SKIP Tue Dec 13 16:41:01 2005
@@ -114,3 +114,4 @@
^rules/70_sandbox.cf$
^build/automc/
^rulesrc/.*$
+^rules/active.list$
Modified: spamassassin/trunk/build/listpromotable
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/build/listpromotable?rev=356657&r1=356656&r2=356657&view=diff
==============================================================================
--- spamassassin/trunk/build/listpromotable (original)
+++ spamassassin/trunk/build/listpromotable Tue Dec 13 16:41:01 2005
@@ -112,9 +112,6 @@
next unless ($mailsa->{conf}->{descriptions}->{$name}
|| $mailsa->{conf}->{scores}->{$name});
- # ignore rules that are not marked as promotable
- next unless ($obj->{promo});
-
# "nopublish" tflags
my $tfs = $mailsa->{conf}->{tflags}->{$name};
if ($tfs) {
@@ -124,7 +121,14 @@
$notes = "tflags publish";
goto publish;
}
+ if ($tfs =~ /\buserconf\b/) {
+ $notes = "tflags userconf";
+ goto publish;
+ }
}
+
+ # ignore rules that are not marked as promotable
+ next unless ($obj->{promo});
# only rules from "rulesrc" dirs
my $src = $mailsa->{conf}->{source_file}->{$name};
Modified: spamassassin/trunk/build/mkrules
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/build/mkrules?rev=356657&r1=356656&r2=356657&view=diff
==============================================================================
--- spamassassin/trunk/build/mkrules (original)
+++ spamassassin/trunk/build/mkrules Tue Dec 13 16:41:01 2005
@@ -37,14 +37,16 @@
use vars qw(
@opt_srcs $opt_out $opt_sandboxout $opt_manifest
- $opt_manifestskip $opt_listpromotable
+ $opt_manifestskip $opt_listpromotable $opt_active
+ $opt_activeout
);
GetOptions("src=s" => \@opt_srcs,
"out=s",
"sandboxout=s",
+ "activeout=s",
+ "active=s",
"manifest=s",
"manifestskip=s",
- "listpromotable=s",
);
if (!@opt_srcs) {
@@ -64,19 +66,20 @@
$opt_manifestskip = "MANIFEST.SKIP";
}
+if (!$opt_active && -f "rules/active.list") {
+ $opt_active = "rules/active.list";
+}
+
die "no src" unless (@opt_srcs >= 1);
my $promolist;
-if ($opt_listpromotable) {
- my $list = do $opt_listpromotable;
- die "no listpromotable list" unless $list;
- $promolist = $list;
-}
-else {
+
die "no out" unless ($opt_out);
die "unreadable out" unless (-d $opt_out);
-}
+ die "unreadable active" unless (-f $opt_active);
+
$opt_sandboxout ||= "70_sandbox.cf";
+$opt_activeout ||= "72_active.cf";
# source files that need compilation, and their targets
my $needs_compile = { };
@@ -95,6 +98,8 @@
}, $src);
}
+my $rules = { };
+
my $file_manifest = { };
my $file_manifest_skip = [ ];
if ($opt_manifest) {
@@ -104,6 +109,9 @@
read_manifest_skip($opt_manifestskip);
}
+my $active_rules = { };
+read_active($opt_active);
+
# context for the rules compiler
my $seen_rules = { };
my $renamed_rules = { };
@@ -111,20 +119,26 @@
my $output_file_text = { };
my $files_to_lint = { };
-compile_sorted($needs_compile);
+# $COMMENTS is a "catch-all" "name", for lines that appear after the last line
+# that refers to a rule by name. Those lines are not published by themselves;
+# they'll be published to all pubfiles found in the file.
+#
+# It's assumed they are comments, because they generally are, but could be all
+# sorts of unparseable lines.
+my $COMMENTS = '!comments!';
+
+# another "fake name" for lines that should always be published. They'll
+# be published to all files, in duplicate. This should probably be improved
+# somehow, TODO.
+my $ALWAYS_PUBLISH = '!always_publish!';
+
+read_all_rules($needs_compile);
+compile_output_files();
lint_output_files();
+write_output_files();
-if ($opt_listpromotable) {
- list_promotable();
-}
-else {
- write_output_files();
-}
exit;
-sub list_promotable {
-}
-
# ---------------------------------------------------------------------------
sub lint_output_files {
@@ -146,8 +160,22 @@
dont_copy_prefs => 1,
config_text => $text
});
- my $res = $mailsa->lint_rules();
- return $res; # 0 means good
+
+ my $errors = 0;
+ $mailsa->{lint_callback} = sub {
+ my %opts = @_;
+
+ return if ($opts{msg} =~ /
+ (?:score\sset\sfor\snon-existent|description\sexists)
+ /x);
+
+ warn "lint: $opts{msg}";
+ if ($opts{iserror}) {
+ $errors++;
+ }
+ };
+
+ return $errors; # 0 means good
}
sub wanted {
@@ -169,10 +197,7 @@
my $f = "$current_src/$dir$filename";
my $t;
-
- if (!$opt_listpromotable) {
- $t = "$opt_out/$filename";
- }
+ $t = "$opt_out/$filename";
$needs_compile->{$f} = {
f => $f,
@@ -184,7 +209,7 @@
# compile all the source files found by the wanted() sub, in sorted
# order so that the order of precedence makes sense.
-sub compile_sorted {
+sub read_all_rules {
my ($sources) = @_;
# deal with the perl modules first, so that later linting w/ loadplugin will
@@ -211,7 +236,7 @@
plugin_file_compile($entry);
}
elsif ($entry->{dir} =~ /sandbox/) {
- rule_file_compile_sandbox($f, $t, $entry->{filename});
+ rule_file_compile($f, $t, $entry->{filename}, 1);
}
elsif ($entry->{dir} =~ /extra/) {
# 'extra' rulesets; not built by default (TODO)
@@ -220,38 +245,28 @@
else {
# rules in "core" and "lang" are always copied
if ($needs_rebuild) {
- rule_file_compile_core($f, $t, $entry->{filename});
+ rule_file_compile($f, $t, $entry->{filename}, 0);
}
}
}
}
-# implement the validation criteria from
-# http://wiki.apache.org/spamassassin/RulesProjPromotion .
-#
-# Rules are compiled from source dir to output dir. All rules in "core" are
-# always promoted (for backwards compatibility). In addition, rules in the
-# sandboxes will be promoted, if the rules source file contains a "publish
-# core" command prior to that rule. This command is added (by hand!) to the
-# source file by committers, as the rules pass the validation criteria.
-#
-# The compiler will copy the rules to the output directory. By default, the
-# filename is preserved; so a rule in a file called "20_foo.cf" in the source
-# directory will be output to the file "20_foo.cf".
-#
-# If the rule is not "publish"-tagged, it will be output as a testing rule
-# to "70_sandbox.cf".
-#
+###########################################################################
+
+# Rules are compiled from source dir to output dir.
+#
+# Rules in "rules/active.list" are promoted to "72_active.cf"; rules not
+# listed there are relegated to "70_sandbox.cf". There is code to allow
+# other filenames to be selected from the rulesrc .cf file, but I'm not
+# sure if it works anymore ;)
+#
# Rules will be autorenamed, if there's a collision between a new rule name and
# one that's already been output by the compiler in another source file. The
# autorenaming is very simple -- portions of the current source path are
# appended to the rule name, sanitised.
-my $COMMENTS;
-my $ALWAYS_PUBLISH;
-
-sub rule_file_compile_sandbox {
- my ($f, $t, $filename) = @_;
+sub rule_file_compile {
+ my ($f, $t, $filename, $issandbox) = @_;
open (IN, "<$f") or die "cannot read $f";
@@ -259,23 +274,14 @@
# full deal here, and it must be fast, since it's run on every
# "make" invocation
- my $rules = { };
my $rule_order = [ ];
- # $COMMENTS is a "catch-all" "name", for lines that appear after the last
- # line that refers to a rule by name. Those lines are not published by
- # themselves; they'll be published to all pubfiles found in the file.
- #
- # It's assumed they are comments, because they generally are, but could be
- # all sorts of unparseable lines.
- $COMMENTS = '!comments!';
-
my $lastrule = $COMMENTS;
- # another "fake name" for lines that should always be published, to an
- # output file with the same name as the input file.
- $ALWAYS_PUBLISH = '!always_publish!';
- $rules->{$ALWAYS_PUBLISH} = rule_entry_create();
+ if (!defined $rules->{$ALWAYS_PUBLISH}) {
+ $rules->{$ALWAYS_PUBLISH} = rule_entry_create();
+ }
+
my $ALWAYS = { $ALWAYS_PUBLISH => 1 };
# an "ifplugin" or "if" scope
@@ -314,7 +320,10 @@
my $val = $3;
my $origname = $name;
- $name = sandbox_rule_name_avoid_collisions($name, $f);
+ if ($issandbox) {
+ $name = sandbox_rule_name_avoid_collisions($name, $f);
+ }
+ # non-sandbox rules always use the same name
if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
$rules->{$name}->{origname} = $origname;
@@ -338,7 +347,9 @@
my $val = $3;
my $origname = $name;
- $name = sandbox_rule_name_avoid_collisions($name, $f);
+ if ($issandbox) {
+ $name = sandbox_rule_name_avoid_collisions($name, $f);
+ }
if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
$rules->{$name}->{origname} = $origname;
@@ -350,9 +361,8 @@
}
elsif ($command eq 'pubfile') {
if (!filename_in_manifest($opt_out.'/'.$val)) {
- my $sbout = $opt_out.'/'.$opt_sandboxout;
- warn "$val: WARNING: not listed in manifest file, using $sbout\n";
- $val = $sbout;
+ warn "$val: WARNING: not listed in manifest file, using default\n";
+ next; # don't set 'pubfile' below
}
}
@@ -378,7 +388,10 @@
else {
my $NAME = $ALWAYS_PUBLISH;
if ($current_conditional) {
+
$NAME .= $current_conditional . '!';
+ $NAME =~ s/\n//gs;
+
unless ($rules->{$NAME}) {
$rules->{$NAME} = rule_entry_create();
$ALWAYS->{$NAME} = 1;
@@ -397,7 +410,7 @@
}
# now append all the found text to the output file buffers
- copy_to_output_buffers($rule_order, $rules, $ALWAYS, $f, $filename);
+ copy_to_output_buffers($rule_order, $issandbox, $ALWAYS, $f, $filename);
# ok; file complete. now mark all those rules as "seen"; future
# refs to those rule names will trigger an autorename.
@@ -407,7 +420,7 @@
}
sub copy_to_output_buffers {
- my ($rule_order, $rules, $ALWAYS, $f, $filename) = @_;
+ my ($rule_order, $issandbox, $ALWAYS, $f, $filename) = @_;
my %already_done = ();
my $copied = 0;
@@ -422,135 +435,54 @@
next; # nothing to write!
}
- if ($opt_listpromotable) {
- promo_rule ($rules, $name, $text);
- }
- else {
- copy_rule ($rules, $name, $text, $filename);
- $copied++;
- }
- }
-
- if (!$opt_listpromotable) {
- print "$f: $copied sandbox rules copied\n";
- }
-}
-
-sub copy_rule {
- my ($rules, $name, $text, $filename) = @_;
-
- my $f = $rules->{$name}->{srcfile};
-
- my $pubfile;
- if ($rules->{$name}->{publish}) {
- $pubfile = ($rules->{$name}->{pubfile} || $filename);
- $pubfile = $opt_out.'/'.$pubfile;
- } else {
- $pubfile = $opt_out.'/'.$opt_sandboxout;
- }
- $output_files->{$pubfile} = 1;
+ my $srcfile = $rules->{$name}->{srcfile};
+ my $pubfile = pubfile_for_rule($rules, $name);
- if (!$output_file_text->{$pubfile}) {
- $output_file_text->{$pubfile} = output_file_header($f);
- }
+ $output_files->{$pubfile} = {
+ header => "",
+ # header => "# [compiled from '$srcfile']\n",
+ # don't use that header; we now have multiple srcfiles in each
+ # output file!
+ };
- # fix up any rule renamings we were supposed to do
- sed_renamed_rule_names(\$text);
+ # fix up any rule renamings we were supposed to do
+ sed_renamed_rule_names(\$text);
- my $cond = $rules->{$name}->{cond};
- if ($cond) {
- $output_file_text->{$pubfile} .= $cond.$text."endif\n";
- }
- else {
- $output_file_text->{$pubfile} .= $text;
- }
+ my $cond = $rules->{$name}->{cond};
+ if ($cond) {
+ $rules->{$name}->{output_text} = "\n".$cond.$text."endif\n";
+ } else {
+ $rules->{$name}->{output_text} = $text;
+ }
+ $rules->{$name}->{output_file} = $pubfile;
- # do we have any end-of-file comments? if so, add it
- my $cmts = $rules->{$COMMENTS}->{text};
- if ($cmts) {
- $output_file_text->{$pubfile} .= $cmts;
+ $copied++;
}
- $files_to_lint->{$pubfile} = 1;
+ print "$f: $copied ".
+ ($issandbox ? "sandbox" : "core")." rules copied\n";
}
-sub promo_rule {
- my ($rules, $name, $text) = @_;
-
- return unless $promolist->{$name};
- my $pent = $promolist->{$name};
- return unless $pent->{promo};
-
- $text =~ s/^\s+//s;
- $text =~ s/\s+$//s;
-
- print "\n## ".("-" x 70)."\n";
- print "## Promotable rule: $name\n";
- printf "## so=%5.3f spc=%5.3f hpc=%5.3f\n",
- $pent->{so}, $pent->{spc}, $pent->{hpc};
- print "## $rules->{$name}->{srcfile}\n";
- print "## $pent->{detailhref}\n\n";
- print $text,"\n";
-}
-
-sub rule_file_compile_core {
- my ($f, $t, $filename) = @_;
-
- return if $opt_listpromotable;
-
- my $pubfile = $opt_out.'/'.$filename;
- $output_files->{$pubfile} = 1;
-
- open (IN, "<$f") or die "cannot read $f";
- while (<IN>) {
- my $orig = $_;
-
- s/^#reuse/reuse/; # TODO - dirty hack. we need to fix this to just be
- # a keyword which the engine ignores, this is absurd!
-
- s/#.*$//g; s/^\s+//; s/\s+$//;
-
- # always publish non-sandbox lines verbatim. just note what
- # rules we've seen, and carry on
- $output_file_text->{$pubfile} .= $orig;
-
- # save "lang" declarations
- my $lang = '';
- if (s/^lang\s+(\S+)\s+//) {
- $lang = $1;
- }
-
- if (/^
- (header|rawbody|body|full|uri|meta|mimeheader|describe|
- tflags|reuse|score)
- \s+(\S+)\s+(.*)$
- /x)
- {
- # rule definitions
- my $type = $1;
- my $name = $2;
- my $val = $3;
+sub pubfile_for_rule {
+ my ($rules, $name) = @_;
- # just save the name, and ignore the rest; we're already publishing it
- $seen_rules->{$name} = 1;
+ my $pubfile;
+ if ($rules->{$name}->{publish}) {
+ $pubfile = $rules->{$name}->{pubfile};
+ if ($pubfile) {
+ $pubfile = $opt_out.'/'.$pubfile;
}
- elsif (/^
- (pubfile|publish)
- \s+(\S+)\s*(.*?)$
- /x)
- {
- # preprocessor directives
- my $command = $1;
- my $name = $2;
- my $val = $3;
+ }
- warn "$f: WARNING: cannot use 'publish' in non-sandbox files\n";
+ # default: "70_sandbox.cf" or "72_active.cf"
+ if (!$pubfile) {
+ if ($active_rules->{$name}) {
+ $pubfile = $opt_out.'/'.$opt_activeout;
+ } else {
+ $pubfile = $opt_out.'/'.$opt_sandboxout;
}
}
- close IN;
-
- # now append all the found text to the output file buffers
- print "$f: all lines copied\n";
+ return $pubfile;
}
sub plugin_file_compile {
@@ -570,6 +502,88 @@
}
}
+###########################################################################
+
+sub compile_output_files {
+ # create all known output files
+ foreach my $file (keys %$output_files) {
+ my $always_publish_text = $rules->{$ALWAYS_PUBLISH}->{output_text};
+
+ $output_file_text->{$file} = $output_files->{$file}->{header}.
+ $always_publish_text;
+ }
+
+ my @rulenames = sort keys %$rules;
+ my %seen = ();
+
+ # go through the rules looking for meta subrules we
+ # may have forgotten; this happens if a non-subrule is
+ # listed in active.list, the subrules will not be! fix them
+ # to appear in the same output file as the master rule.
+ foreach my $rule (@rulenames) {
+ fix_up_rule_dependencies($rule);
+ }
+
+ # now repeat, just for rules in the active set; their dependencies should
+ # always be likewise promoted into the active set, overriding the prev step.
+ foreach my $rule (@rulenames) {
+ my $pubfile = $rules->{$rule}->{output_file};
+ next unless ($pubfile && $pubfile =~ /\Q$opt_activeout\E/);
+ fix_up_rule_dependencies($rule);
+ }
+
+ # output the known rules that are not meta subrules.
+ foreach my $rule (@rulenames) {
+ next if ($rule =~ /^__/);
+ my $pubfile = $rules->{$rule}->{output_file};
+ my $text = $rules->{$rule}->{output_text};
+ next unless defined ($text);
+ $output_file_text->{$pubfile} .= "## ".$rule."\n".$text."\n";
+ }
+
+ # now output all subrules (in a slightly more compact form)
+ foreach my $rule (@rulenames) {
+ next unless ($rule =~ /^__/);
+ my $pubfile = $rules->{$rule}->{output_file};
+ my $text = $rules->{$rule}->{output_text};
+ next unless defined ($text);
+ $output_file_text->{$pubfile} .= $text;
+ }
+
+ # finally, finish off all output files
+ foreach my $file (keys %$output_files) {
+ # do we have any end-of-file comments? if so, add it
+ # off: results in comments being duplicated many times
+ # my $cmts = $rules->{$COMMENTS}->{text};
+ # if ($cmts) {
+ # $output_file_text->{$pubfile} .= $cmts;
+ # }
+
+ # and get them lint-checked!
+ $files_to_lint->{$file} = 1;
+ }
+}
+
+sub fix_up_rule_dependencies {
+ my $rule = shift;
+
+ # next if ($rule =~ /^__/);
+ my $pubfile = $rules->{$rule}->{output_file};
+ my $text = $rules->{$rule}->{output_text};
+ return unless $text;
+
+ while ($text =~ /^\s*meta\s+(.*)$/mg) {
+ my $line = $1;
+ while ($line =~ /\b([_A-Za-z0-9]+)\b/g) {
+ # force that subrule (if it exists) to output in the
+ # same pubfile
+ my $rule2 = $1;
+ next unless ($rules->{$rule2} && $rules->{$rule2}->{output_text});
+ $rules->{$rule2}->{output_file} = $pubfile;
+ }
+ }
+}
+
sub write_output_files {
foreach my $pubfile (sort keys %$output_files) {
if (-f $pubfile) {
@@ -593,6 +607,8 @@
}
}
+###########################################################################
+
sub rule_entry_create {
return {
text => '',
@@ -600,6 +616,8 @@
};
}
+###########################################################################
+
sub sandbox_rule_name_avoid_collisions {
my ($rule, $path) = @_;
my $new;
@@ -640,11 +658,7 @@
}
}
-sub output_file_header {
- my ($filename) = @_;
- my $now = scalar localtime time;
- return "# [compiled from '$filename' on $now]\n";
-}
+###########################################################################
sub read_manifest {
my ($mfest) = @_;
@@ -666,6 +680,16 @@
close IN;
}
+sub read_active {
+ my ($fname) = @_;
+ open (IN, "<$fname") or die "cannot read $fname";
+ while (<IN>) {
+ s/#.*$//; next if /^\s*$/;
+ /^(\S+)/ and $active_rules->{$1} = 1;
+ }
+ close IN;
+}
+
sub filename_in_manifest {
my ($fname) = @_;
return 1 if ($file_manifest->{$fname});
@@ -676,4 +700,12 @@
return 0;
}
+
+__DATA__
+
+TODO list for this script:
+
+- license blocks at the top of each rulesrc/*/*.cf file need to be
+ ignored when compiling, instead of being duplicated into the
+ compiled output files.
Modified: spamassassin/trunk/masses/parse-rules-for-masses
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/parse-rules-for-masses?rev=356657&r1=356656&r2=356657&view=diff
==============================================================================
--- spamassassin/trunk/masses/parse-rules-for-masses (original)
+++ spamassassin/trunk/masses/parse-rules-for-masses Tue Dec 13 16:41:01 2005
@@ -76,7 +76,7 @@
my %langs = ();
foreach $file (sort @files) {
if ($skip_test_rules) {
- next if ($file =~ /7\d_/);
+ next if ($file =~ /70_/);
}
open (IN, "<$file");
while (<IN>)
Modified: spamassassin/trunk/rules/20_html_tests.cf
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/20_html_tests.cf?rev=356657&r1=356656&r2=356657&view=diff
==============================================================================
--- spamassassin/trunk/rules/20_html_tests.cf (original)
+++ spamassassin/trunk/rules/20_html_tests.cf Tue Dec 13 16:41:01 2005
@@ -347,3 +347,7 @@
# bug 3070
rawbody HTML_TINY_FONT /\<.*font\-size\:[ \"]*[01][^0-9]+.*\>/i
describe HTML_TINY_FONT body contains 1 or 0-point font
+
+body __HIGHBITS /(?:[\x80-\xff].?){4}/
+# note: __HIGHBITS is used by HTML_CHARSET_FARAWAY
+
Modified: spamassassin/trunk/rules/active.list
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/rules/active.list?rev=356657&r1=356656&r2=356657&view=diff
==============================================================================
--- spamassassin/trunk/rules/active.list (original)
+++ spamassassin/trunk/rules/active.list Tue Dec 13 16:41:01 2005
@@ -1,4 +1,4 @@
-# active ruleset list generated on Mon Dec 12 17:12:08 2005
+# active ruleset list generated on Tue Dec 13 16:38:48 2005
# spam=2.8371 ham=0.0690 so=0.976
ADVANCE_FEE_2
@@ -15,9 +15,18 @@
# spam=0.0000 ham=0.3866 so=0.000
ALL_TRUSTED
+# tflags userconf
+AWL
+
# spam=1.3418 ham=0.0050 so=0.996
BODY_ENHANCEMENT2
+# tflags userconf
+CHARSET_FARAWAY
+
+# tflags userconf
+CHARSET_FARAWAY_HEADER
+
# spam=0.8825 ham=0.0138 so=0.985
DATE_IN_FUTURE_03_06
@@ -75,6 +84,9 @@
# spam=0.3132 ham=0.0000 so=1.000
EM_ROLEX
+# tflags userconf
+ENV_AND_HDR_SPF_MATCH
+
# spam=0.2770 ham=0.0000 so=1.000
FAKE_HELO_MAIL_COM_DOM
@@ -189,6 +201,33 @@
# spam=0.2923 ham=0.0063 so=0.979
GAPPY_SUBJECT
+# tflags userconf
+GTUBE
+
+# tflags userconf
+HASHCASH_20
+
+# tflags userconf
+HASHCASH_21
+
+# tflags userconf
+HASHCASH_22
+
+# tflags userconf
+HASHCASH_23
+
+# tflags userconf
+HASHCASH_24
+
+# tflags userconf
+HASHCASH_25
+
+# tflags userconf
+HASHCASH_2SPEND
+
+# tflags userconf
+HASHCASH_HIGH
+
# spam=0.6102 ham=0.0000 so=1.000
HEADER_SPAM
@@ -216,6 +255,9 @@
# spam=0.7028 ham=0.0038 so=0.995
HG_HORMONE
+# tflags userconf
+HTML_CHARSET_FARAWAY
+
# spam=2.7616 ham=0.0000 so=1.000
HTTP_77
@@ -255,6 +297,9 @@
# spam=0.2786 ham=0.0013 so=0.996
MIME_BOUND_MANY_HEX
+# tflags userconf
+MIME_CHARSET_FARAWAY
+
# spam=1.2325 ham=0.0615 so=0.952
MIME_HTML_MOSTLY
@@ -300,6 +345,12 @@
# spam=1.4883 ham=0.0025 so=0.998
NO_PRESCRIPTION
+# tflags userconf
+NO_RECEIVED
+
+# tflags userconf
+NO_RELAYS
+
# spam=0.3899 ham=0.0025 so=0.994
NUMERIC_HTTP_ADDR
@@ -384,6 +435,12 @@
# spam=1.4109 ham=0.0100 so=0.993
SORTED_RECIPS
+# tflags userconf
+SPF_HELO_PASS
+
+# tflags userconf
+SPF_PASS
+
# spam=0.3183 ham=0.0075 so=0.977
SPOOF_OURI
@@ -405,6 +462,15 @@
# spam=0.3877 ham=0.0000 so=1.000
SUBJECT_FUZZY_PENIS
+# tflags userconf
+SUBJECT_IN_BLACKLIST
+
+# tflags userconf
+SUBJECT_IN_WHITELIST
+
+# spam=21.3632 ham=0.0063 so=1.000
+SUBJ_ILLEGAL_CHARS
+
# spam=0.4457 ham=0.0000 so=1.000
T_DRUGS_HDIA
@@ -462,6 +528,9 @@
# spam=0.5966 ham=0.0314 so=0.950
UNIQUE_WORDS
+# tflags userconf
+UNPARSEABLE_RELAY
+
# spam=0.5256 ham=0.0000 so=1.000
UNRESOLVED_TEMPLATE
@@ -476,6 +545,33 @@
# spam=1.9425 ham=0.0013 so=0.999
URI_NO_WWW_INFO_CGI
+
+# tflags userconf
+USER_IN_ALL_SPAM_TO
+
+# tflags userconf
+USER_IN_BLACKLIST
+
+# tflags userconf
+USER_IN_BLACKLIST_TO
+
+# tflags userconf
+USER_IN_DEF_SPF_WL
+
+# tflags userconf
+USER_IN_DEF_WHITELIST
+
+# tflags userconf
+USER_IN_MORE_SPAM_TO
+
+# tflags userconf
+USER_IN_SPF_WHITELIST
+
+# tflags userconf
+USER_IN_WHITELIST
+
+# tflags userconf
+USER_IN_WHITELIST_TO
# spam=1.5009 ham=0.0653 so=0.958
US_DOLLARS_3