You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by pa...@apache.org on 2006/10/29 02:59:37 UTC

svn commit: r468796 [1/2] - in /spamassassin/branches/check_plugin: ./ build/ build/automc/ debian/ lib/ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Bayes/ lib/Mail/SpamAssassin/BayesStore/ lib/Mail/SpamAssassin/Conf/ lib/Mail/SpamAssassin/Message/ li...

Author: parker
Date: Sat Oct 28 17:59:34 2006
New Revision: 468796

URL: http://svn.apache.org/viewvc?view=rev&rev=468796
Log:
merge up to r468720 on svn trunk using "svn merge -r 462690:468720 https://svn.apache.org/repos/asf/spamassassin/trunk"

Added:
    spamassassin/branches/check_plugin/rules/20_advance_fee.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/20_advance_fee.cf
    spamassassin/branches/check_plugin/rules/20_body_tests.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/20_body_tests.cf
    spamassassin/branches/check_plugin/rules/20_compensate.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/20_compensate.cf
    spamassassin/branches/check_plugin/rules/20_drugs.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/20_drugs.cf
    spamassassin/branches/check_plugin/rules/20_fake_helo_tests.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/20_fake_helo_tests.cf
    spamassassin/branches/check_plugin/rules/20_meta_tests.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/20_meta_tests.cf
    spamassassin/branches/check_plugin/rules/20_phrases.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/20_phrases.cf
    spamassassin/branches/check_plugin/rules/20_porn.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/20_porn.cf
    spamassassin/branches/check_plugin/rules/20_ratware.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/20_ratware.cf
    spamassassin/branches/check_plugin/rules/20_uri_tests.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/20_uri_tests.cf
    spamassassin/branches/check_plugin/rules/25_body_tests_pl.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/25_body_tests_pl.cf
    spamassassin/branches/check_plugin/rules/30_text_de.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/30_text_de.cf
    spamassassin/branches/check_plugin/rules/30_text_fr.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/30_text_fr.cf
    spamassassin/branches/check_plugin/rules/30_text_it.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/30_text_it.cf
    spamassassin/branches/check_plugin/rules/30_text_nl.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/30_text_nl.cf
    spamassassin/branches/check_plugin/rules/30_text_pl.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/30_text_pl.cf
    spamassassin/branches/check_plugin/rules/30_text_pt_br.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/30_text_pt_br.cf
    spamassassin/branches/check_plugin/rules/50_scores.cf
      - copied unchanged from r468720, spamassassin/trunk/rules/50_scores.cf
    spamassassin/branches/check_plugin/t/config_text.t
      - copied unchanged from r468720, spamassassin/trunk/t/config_text.t
    spamassassin/branches/check_plugin/t/dnsbl_sc_meta.t
      - copied unchanged from r468720, spamassassin/trunk/t/dnsbl_sc_meta.t
Removed:
    spamassassin/branches/check_plugin/BUGS
    spamassassin/branches/check_plugin/STATUS
Modified:
    spamassassin/branches/check_plugin/INSTALL
    spamassassin/branches/check_plugin/MANIFEST
    spamassassin/branches/check_plugin/Makefile.PL
    spamassassin/branches/check_plugin/build/README
    spamassassin/branches/check_plugin/build/automc/run_preflight
    spamassassin/branches/check_plugin/build/mkrules
    spamassassin/branches/check_plugin/debian/control
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/ArchiveIterator.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Bayes/CombineChi.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/BayesStore/DBM.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Conf/Parser.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Dns.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Message.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Message/Node.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/BodyEval.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/RelayEval.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/URIEval.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/PluginHandler.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/SpamdForkScaling.pm
    spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Util/DependencyInfo.pm
    spamassassin/branches/check_plugin/lib/spamassassin-run.pod
    spamassassin/branches/check_plugin/masses/rule-qa/automc/gen_info_xml
    spamassassin/branches/check_plugin/masses/rule-qa/automc/ruleqa.cgi
    spamassassin/branches/check_plugin/masses/rule-qa/corpus-hourly
    spamassassin/branches/check_plugin/rules/20_dnsbl_tests.cf
    spamassassin/branches/check_plugin/rules/20_head_tests.cf
    spamassassin/branches/check_plugin/rules/20_html_tests.cf
    spamassassin/branches/check_plugin/rules/25_replace.cf
    spamassassin/branches/check_plugin/rules/active.list
    spamassassin/branches/check_plugin/rules/regression_tests.cf
    spamassassin/branches/check_plugin/sa-learn.raw
    spamassassin/branches/check_plugin/sa-update.raw
    spamassassin/branches/check_plugin/spamassassin.raw
    spamassassin/branches/check_plugin/spamassassin.spec
    spamassassin/branches/check_plugin/spamc/configure
    spamassassin/branches/check_plugin/spamc/configure.in
    spamassassin/branches/check_plugin/spamd/spamd.raw
    spamassassin/branches/check_plugin/t/bayesdbm.t
    spamassassin/branches/check_plugin/t/mimeheader.t
    spamassassin/branches/check_plugin/t/missing_hb_separator.t
    spamassassin/branches/check_plugin/t/mkrules.t

Modified: spamassassin/branches/check_plugin/INSTALL
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/INSTALL?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/INSTALL (original)
+++ spamassassin/branches/check_plugin/INSTALL Sat Oct 28 17:59:34 2006
@@ -207,6 +207,17 @@
     Debian: apt-get install libhtml-parser-perl
     Gentoo: emerge dev-perl/HTML-Parser
 
+  - Net::DNS (from CPAN)
+
+    Used for all DNS-based tests (SBL, XBL, SpamCop, DSBL, etc.),
+    perform MX checks, used when manually reporting spam to SpamCop,
+    and used by sa-update to gather version information.
+
+    You need to make sure the Net::DNS version is sufficiently up-to-date:
+
+      - version 0.34 or higher on Unix systems
+      - version 0.46 or higher on Windows systems
+
 
 Optional Modules
 ----------------
@@ -235,20 +246,6 @@
     There seems to be a bug in libdb 4.1.25, which is
     distributed by default on some versions of Linux.  See
     http://wiki.apache.org/spamassassin/DbFileSleepBug for details.
-
-
-  - Net::DNS (from CPAN)
-
-    Used for all DNS-based tests (SBL, XBL, SpamCop, DSBL, etc.),
-    perform MX checks, used when manually reporting spam to SpamCop,
-    and used by sa-update to gather version information.  Recommended.
-
-    If this is installed and you are using network tests of any variety
-    (which is the default), then you need to make sure the Net::DNS
-    version is sufficiently up-to-date:
-
-      - version 0.34 or higher on Unix systems
-      - version 0.46 or higher on Windows systems
 
 
   - Net::SMTP (from CPAN)

Modified: spamassassin/branches/check_plugin/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/MANIFEST?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/MANIFEST (original)
+++ spamassassin/branches/check_plugin/MANIFEST Sat Oct 28 17:59:34 2006
@@ -1,4 +1,3 @@
-BUGS
 CREDITS
 Changes
 INSTALL
@@ -10,7 +9,6 @@
 NOTICE
 PACKAGING
 README
-STATUS
 TRADEMARK
 UPGRADE
 USAGE
@@ -269,6 +267,7 @@
 t/cidrs.t
 t/config.dist
 t/config_errs.t
+t/config_text.t
 t/cpp_comments_in_spamc.t
 t/data/01_test_rules.cf
 t/data/etc/hello.txt
@@ -368,6 +367,7 @@
 t/debug.t
 t/desc_wrap.t
 t/dnsbl.t
+t/dnsbl_sc_meta.t
 t/get_headers.t
 t/gtube.t
 t/hashcash.t
@@ -481,6 +481,24 @@
 rules/25_spf.cf
 rules/25_textcat.cf
 rules/25_uribl.cf
+rules/20_advance_fee.cf
+rules/20_body_tests.cf
+rules/20_compensate.cf
+rules/20_drugs.cf
+rules/20_fake_helo_tests.cf
+rules/20_meta_tests.cf
+rules/20_phrases.cf
+rules/20_porn.cf
+rules/20_ratware.cf
+rules/20_uri_tests.cf
+rules/25_body_tests_pl.cf
+rules/30_text_de.cf
+rules/30_text_fr.cf
+rules/30_text_it.cf
+rules/30_text_nl.cf
+rules/30_text_pl.cf
+rules/30_text_pt_br.cf
+rules/50_scores.cf
 rules/60_awl.cf
 rules/60_shortcircuit.cf
 rules/60_whitelist.cf
@@ -488,7 +506,6 @@
 rules/60_whitelist_dkim.cf
 rules/60_whitelist_spf.cf
 rules/60_whitelist_subject.cf
-rules/70_sandbox.cf
 rules/72_active.cf
 rules/STATISTICS-set0.txt
 rules/STATISTICS-set1.txt
@@ -504,7 +521,6 @@
 rules/v312.pre
 rules/v320.pre
 rules/active.list
-rules/70_inactive.cf
 t/mkrules.t
 t/trust_path.t
 t/data/nice/dkim/AddedVtag_07

Modified: spamassassin/branches/check_plugin/Makefile.PL
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/Makefile.PL?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/Makefile.PL (original)
+++ spamassassin/branches/check_plugin/Makefile.PL Sat Oct 28 17:59:34 2006
@@ -208,6 +208,7 @@
         'Pod::Usage'    => 1.10,          # all versions prior to this do seem to be buggy
         'HTML::Parser'  => 3.43,          # the HTML code is based on this parser, older versions have utf-8 bugs
         'Text::Wrap'    => 98.112902,     # this version is shipped with 5.005_03, the oldest version known to work
+        'Net::DNS'      => (RUNNING_ON_WINDOWS ? 0.46 : 0.34), # bugs in older revs
         'Sys::Hostname' => 0,
         'Time::Local'   => 0,
         'Errno'         => 0,
@@ -245,34 +246,14 @@
 
         'rules/*.pm',
 
-        # at one stage, these were copied from rulesrc into the rules
-        # build dir, or were in SVN in this dir, but this no longer
-        # is the case.  Ensure they get cleaned on "make clean".
-        'rules/20_advance_fee.cf',
-        'rules/20_compensate.cf',
-        'rules/20_drugs.cf',
-        'rules/20_fake_helo_tests.cf',
-        'rules/20_phrases.cf',
-        'rules/20_porn.cf',
-        'rules/20_ratware.cf',
-        'rules/20_uri_tests.cf',
-        'rules/25_body_tests_es.cf',
-        'rules/25_body_tests_pl.cf',
-        'rules/30_text_de.cf',
-        'rules/30_text_fr.cf',
-        'rules/30_text_it.cf',
-        'rules/30_text_nl.cf',
-        'rules/30_text_pl.cf',
-        'rules/30_text_pt_br.cf',
-        'rules/50_scores.cf',
-        'rules/70_broken_rules.cf',
-
         # don't remove these. they are built from 'rulesrc' in SVN, but
         # in a distribution tarball, they're not
-        # 'rules/70_inactive.cf',
         # 'rules/70_sandbox.cf',
         # 'rules/72_active.cf',
 
+        # this file is no longer built, or used
+        'rules/70_inactive.cf',
+
       )
 
     },
@@ -292,6 +273,12 @@
     'NO_META' => 1,
 );
 
+# rules/72_active.cf is built from "rulesrc", but *must* exist before
+# WriteMakefile() is called due to shortcomings in MakeMaker.
+my @FILES_THAT_MUST_EXIST = qw(
+        rules/72_active.cf
+    );
+
 # That META.yml stuff was introduced with Perl 6.06_03, see
 # <http://archive.develooper.com/makemaker@perl.org/msg00922.html>
 # <http://archive.develooper.com/makemaker@perl.org/msg00984.html>
@@ -397,6 +384,10 @@
   exit 1;
 }
 
+foreach my $file (@FILES_THAT_MUST_EXIST) {
+  open (TOUCH, ">>$file") or die "cannot touch '$file'";
+  close TOUCH;
+}
 
 #######################################################################
 
@@ -1141,7 +1132,8 @@
 	$(PREPROCESS) $(FIXBYTES) $(FIXVARS) $(FIXBANG) -m$(PERM_RWX) -i$? -o$@
 
 build_rules: 
-	$(PERL) build/mkrules --src rulesrc --out rules --manifest MANIFEST --manifestskip MANIFEST.SKIP
+	-rm rules/70_inactive.cf
+	[ ! -d rulesrc ] || $(PERL) build/mkrules --src rulesrc --out rules --manifest MANIFEST --manifestskip MANIFEST.SKIP
 
 SPAMC_MAKEFILE  = spamc/Makefile
 MAKE_SPAMC      = $(MAKE) -f $(SPAMC_MAKEFILE)

Modified: spamassassin/branches/check_plugin/build/README
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/build/README?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/build/README (original)
+++ spamassassin/branches/check_plugin/build/README Sat Oct 28 17:59:34 2006
@@ -121,6 +121,10 @@
   a branch really) of whatever the latest branch revision to be the new
   base of the tag release.
 
+  Note that if you have to re-run this command later to fix a broken
+  release in progress, you need to "svn delete" the previous copy of the
+  tag, otherwise it'll create a subdirectory instead of overwriting.
+
 - run "make distcheck" to ensure all files are included in the
   distribution that should be, and to ensure all files that are listed
   in the MANIFEST also exist in SVN.
@@ -256,19 +260,19 @@
         cd /www/spamassassin.apache.org
         webmake -F
 
-- update the 'dist' tree in the SpamAssassin website:
+- update the 'doc' tree in the SpamAssassin website:
 
         cd /www/spamassassin.apache.org
         svn up
         svn delete --force full/3.1.x
-        svn commit -m "removing old dist tree from website" full
+        svn commit -m "removing old doc tree from website" full
 
         cd [checkedoutdir]
         build/update_website_docs
 
         cd /www/spamassassin.apache.org
         svn add full/3.1.x
-        svn commit -m "updating new dist tree on website" full
+        svn commit -m "updating new doc tree on website" full
 
 - and check the entire website into SVN (see bug 4998 for reasons why).
   Note that you may need to resolve conflicts and ensure the contents of
@@ -276,6 +280,15 @@
 
         svn up
         svn commit -m "added new release to website"
+
+- update the tag used to point to "current release":
+
+    repo=https://svn.apache.org/repos/asf/spamassassin
+    svn delete -m "updating for new release" \
+	$repo/tags/spamassassin_current_release_3.1.x
+    svn copy -m "updating for new release" \
+	$repo/tags/spamassassin_release_3_1_7 \
+	$repo/tags/spamassassin_current_release_3.1.x
 
 - upload release tarball to CPAN at http://pause.cpan.org/:
 

Modified: spamassassin/branches/check_plugin/build/automc/run_preflight
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/build/automc/run_preflight?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/build/automc/run_preflight (original)
+++ spamassassin/branches/check_plugin/build/automc/run_preflight Sat Oct 28 17:59:34 2006
@@ -90,16 +90,18 @@
 # produce lots of noisy output to stop the buildbot from timing out on
 # mass-checks of large corpora.
 # store AICache data in /tmpfs/aicache.
+# ignore mails older than 6 months (use the nightly runs for those corpora,
+# it's too slow to mass-check them here).
 
 run "/local/bbmasstools/masschroot $perl ".
     "mass-check -c=tstrules --cache -j=1 ".
     "--noisy --deencap='petuniapress.com' ".
     "--cachedir=/tmpfs/aicache ".
+    "--after='6 months ago' ".
     $mass_check_args{$slavename}." ".
     "ham:detect:/home/bbmass/rawcor/*/ham/* ".
     "spam:detect:/home/bbmass/rawcor/*/spam/*";
 
-    # TODO: add --after="6 months ago"?
 
 exit;
 

Modified: spamassassin/branches/check_plugin/build/mkrules
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/build/mkrules?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/build/mkrules (original)
+++ spamassassin/branches/check_plugin/build/mkrules Sat Oct 28 17:59:34 2006
@@ -161,6 +161,7 @@
 my $ALWAYS_PUBLISH = '!always_publish!';
 
 read_all_rules($needs_compile);
+read_rules_from_output_dir();
 compile_output_files();
 lint_output_files();
 write_output_files();
@@ -391,10 +392,19 @@
       {
         $rules->{$name}->{found_definition} = 1;
       }
-      # userconf rules are always published in "active"
-      elsif (($type eq 'tflags') && ($val =~ /\buserconf\b/))
-      {
-        $rules->{$name}->{forceactive} = 1;
+      elsif ($type eq 'tflags') {
+        # userconf rules are always published in "active"
+        if ($val =~ /\buserconf\b/) {
+          $rules->{$name}->{forceactive} = 1;
+        }
+
+        # record for rulemetadata code
+        $val =~ s/\s+/ /gs;
+        if ($rules->{$name}->{tflags}) {
+          $rules->{$name}->{tflags} .= ' '.$val;
+        } else {
+          $rules->{$name}->{tflags} = $val;
+        }
       }
 
       $current_comments = '';
@@ -413,9 +423,13 @@
       my $val = $3;
 
       my $origname = $name;
-      if ($issandbox) {
-        $name = sandbox_rule_name_avoid_collisions($name, $f);
-      }
+
+      # note: if we call sandbox_rule_name_avoid_collisions(), it'll
+      # rename to 'T_RULENAME' -- which is exactly what we're trying
+      # to avoid in 'publish RULENAME' lines!  so don't call it here.
+      # if ($issandbox) {
+      # $name = sandbox_rule_name_avoid_collisions($name, $f);
+      # }
 
       if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
       $rules->{$name}->{origname} = $origname;
@@ -423,7 +437,7 @@
       if ($command eq 'publish') {
         # the 'publish' command defaults to "1", unless it explicitly
         # is set to "0".  iow: publish RULE_NAME [(0 | 1)]  [default: 1]
-        if (!defined $val) { $val = '1'; }
+        if (!defined $val || $val eq '') { $val = '1'; }
       }
       elsif ($command eq 'pubfile') {
         if (!filename_in_manifest($opt_out.'/'.$val)) {
@@ -433,6 +447,12 @@
       }
 
       $rules->{$name}->{$command} = $val;
+
+      # if we see "publish NAMEOFRULE", that means the rule is
+      # considered active
+      if ($rules->{$name}->{publish}) {
+        $rules->{$name}->{forceactive} = 1;
+      }
     }
     elsif (/^
         (if|ifplugin)
@@ -504,6 +524,16 @@
       /^\s*(\S+)/ and $name = $1;
       $name =~ s/\s+/ /gs;
 
+      my $forceactive = 1;
+      
+      # always send 'test' lines to the sandbox or inactive files
+      if (/^test\s*/) {
+        $forceactive = 0;
+
+        $name = $_;   # ensure we don't drag rules with us though!
+        $name =~ s/\s+/ /gs;
+      }
+
       my $cond;
       if ($current_conditional) {
         $name = $current_conditional; $name =~ s/\s+/ /gs;
@@ -519,7 +549,8 @@
       }
       $rules->{$name}->{cond} ||= $cond;
       $rules->{$name}->{issandbox} = $issandbox;
-      $rules->{$name}->{forceactive} = 1;
+      $rules->{$name}->{forceactive} = $forceactive;
+      # $rules->{$name}->{forceactive} = 1;
       $rules->{$name}->{iscommand} = 1;
 
       # warn "unknown line in rules file '$f', saving to default: $orig";
@@ -544,6 +575,54 @@
   }
 }
 
+# this is only run if we're generating rulemetadata!
+sub read_rules_from_output_dir {
+  return unless ($opt_rulemetadata);
+
+  foreach my $file (<$opt_out/*.cf>) {
+    next unless ($file =~ /\d\d_\S+\.cf$/);
+    next if ($file =~ /\/\Q$opt_activeout\E/);
+    next if ($file =~ /\/\Q$opt_sandboxout\E/);
+    next if ($file =~ /\/\Q$opt_inactiveout\E/);
+    read_output_file($file);
+  }
+}
+
+sub read_output_file {
+  my ($file) = @_;
+  open (IN, "<$file") or warn "cannot read $file";
+  while (<IN>) {
+    my $orig = $_;
+
+    s/^#reuse/reuse/;   # TODO - dirty hack.  we need to fix this to just be
+    # a keyword which the engine ignores, this is absurd! 
+
+    s/#.*$//g; s/^\s+//; s/\s+$//;
+
+    # drop comments/blank lines from output
+    next if (/^$/);
+
+    # save "lang" declarations
+    my $lang = '';
+    if (s/^lang\s+(\S+)\s+//) {
+      $lang = $1;
+    }
+
+    # right now, we only read tflags from output files
+    if (/^tflags\s+(\S+)\s+(.*)$/) {
+      my $name = $1;
+      my $val = $2;
+      $val =~ s/\s+/ /gs;
+      if ($rules->{$name}->{tflags}) {
+        $rules->{$name}->{tflags} .= ' '.$val;
+      } else {
+        $rules->{$name}->{tflags} = $val;
+      }
+    }
+  }
+  close IN;
+}
+
 sub copy_to_output_buffers {
   my ($rule_order, $issandbox, $f, $filename) = @_;
 
@@ -577,9 +656,6 @@
       $is_active++;
     }
 
-    # fix up any rule renamings we were supposed to do
-    sed_renamed_rule_names(\$text);
-
     my $cond = $rules->{$name}->{cond};
     my $pluginclass = $rules->{$name}->{ifplugin};
     if ($cond)
@@ -632,6 +708,9 @@
     $pubfile = $rules->{$name}->{pubfile};
     if ($pubfile) {
       $pubfile = $opt_out.'/'.$pubfile;
+    } else {
+      # "publish NAMEOFRULE" => send it to active
+      $pubfile = $opt_out.'/'.$opt_activeout;
     }
   }
 
@@ -769,19 +848,27 @@
   my ($rule) = @_;
 
   return '' unless ($opt_rulemetadata);
-  return '' unless ($rules->{$rule}->{found_definition});
 
   my $mod = 0;
-  my $srcfile = $rules->{$rule}->{srcfile};
-  my @s = stat $srcfile;
-  if (@s) {
-    $mod = $s[9];
+  my $srcfile = '';
+
+  if ($rules->{$rule}->{found_definition}) {
+    $srcfile = $rules->{$rule}->{srcfile} || '';
+    if ($srcfile) {
+      my @s = stat $srcfile;
+      if (@s) {
+        $mod = $s[9];
+      }
+    }
   }
 
+  my $tf = $rules->{$rule}->{tflags} || '';
+
   return "<rulemetadata>".
             "<name>$rule</name>".
             "<src>$srcfile</src>".
             "<srcmtime>$mod</srcmtime>".
+            "<tf>$tf</tf>".
           "</rulemetadata>\n";
 }
 
@@ -799,7 +886,11 @@
       # force that subrule (if it exists) to output in the
       # same pubfile
       my $rule2 = $1;
-      next unless ($rules->{$rule2} && $rules->{$rule2}->{output_text});
+
+      # deal with rules that changed name from "FOO" to "T_FOO"
+      sed_renamed_rule_names(\$rule2);
+      
+      next unless ($rules->{$rule2} && $rules->{$rule2}->{output_file});
 
       # don't do this if the subrule would be moved *out* of the
       # active file!
@@ -821,9 +912,11 @@
       warn "$pubfile: WARNING: not listed in manifest file\n";
     }
 
-    if ($output_file_text->{$pubfile}) {
+    my $text = $output_file_text->{$pubfile};
+    if ($text) {
       open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'";
-      print OUT $output_file_text->{$pubfile};
+      sed_renamed_rule_names(\$text);
+      print OUT $text;
       close OUT or die "cannot close output file '$pubfile'";
       # print "$pubfile: written\n";        # too noisy
     }
@@ -856,6 +949,7 @@
 
   return $rule if $opt_listpromotable;
   return $rule if $active_rules->{$rule};
+  return $rule if $rules->{$rule}->{forceactive};
 
   if ($rule !~ /^(?:T_|__)/) {
     $new = "T_".$rule;
@@ -929,7 +1023,6 @@
   my ($fname) = @_;
   return 1 if ($file_manifest->{$fname});
   foreach my $skipre (@{$file_manifest_skip}) {
-    # warn "JMD $skipre $fname";
     return 1 if ($fname =~ $skipre);
   }
   return 0;

Modified: spamassassin/branches/check_plugin/debian/control
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/debian/control?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/debian/control (original)
+++ spamassassin/branches/check_plugin/debian/control Sat Oct 28 17:59:34 2006
@@ -7,8 +7,8 @@
 
 Package: spamassassin
 Architecture: any
-Depends: ${perl:Depends}, ${shlibs:Depends}, libdigest-sha1-perl, libhtml-parser-perl (>= 3.24), libstorable-perl | perl (>= 5.8.0)
-Recommends: libnet-dns-perl, libmail-spf-query-perl
+Depends: ${perl:Depends}, ${shlibs:Depends}, libdigest-sha1-perl, libhtml-parser-perl (>= 3.24), libnet-dns-perl | perl (>= 5.8.0)
+Recommends: libmail-spf-query-perl
 Suggests: razor (>= 2.40), pyzor, libtime-hires-perl
 Provides: spamassassin, spamc
 Description:  Spam detector and markup engine

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/ArchiveIterator.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/ArchiveIterator.pm Sat Oct 28 17:59:34 2006
@@ -488,7 +488,7 @@
     }
   }
 
-  my @messages;
+  my $messages;
   if ($self->{opt_n}) {
     # OPT_N == 1 means don't bother sorting on message receive date
 
@@ -502,8 +502,10 @@
       splice(@{$self->{h}}, min ($self->{opt_head}, scalar @{$self->{h}}));
     }
 
-    @messages = ( @{$self->{s}}, @{$self->{h}} );
+    # for ease of memory, we'll play with pointers
+    $messages = $self->{s};
     undef $self->{s};
+    push(@{$messages}, @{$self->{h}});
     undef $self->{h};
   }
   else {
@@ -529,22 +531,22 @@
     if (@s && @h) {
       my $ratio = @s / @h;
       while (@s && @h) {
-	push @messages, (@s / @h > $ratio) ? (shift @s) : (shift @h);
+	push @{$messages}, (@s / @h > $ratio) ? (shift @s) : (shift @h);
       }
     }
     # push the rest onto the end
-    push @messages, @s, @h;
+    push @{$messages}, @s, @h;
   }
 
   # head or tail < 0 means crop the total list, negate the value appropriately
   if ($self->{opt_tail} < 0) {
-    splice(@messages, 0, $self->{opt_tail});
+    splice(@{$messages}, 0, $self->{opt_tail});
   }
   if ($self->{opt_head} < 0) {
-    splice(@messages, -$self->{opt_head});
+    splice(@{$messages}, -$self->{opt_head});
   }
 
-  return scalar(@messages), \@messages;
+  return scalar(@{$messages}), $messages;
 }
 
 sub mail_open {
@@ -569,7 +571,7 @@
 
 ############################################################################
 
-sub message_is_useful_by_date  {
+sub message_is_useful_by_date {
   my ($self, $date) = @_;
 
   return 0 unless $date;	# undef or 0 date = unusable
@@ -579,7 +581,7 @@
     return 1;
   }
   elsif (!$self->{opt_before}) {
-    # Just case about after
+    # Just care about after
     return $date > $self->{opt_after};
   }
   else {
@@ -587,6 +589,24 @@
   }
 }
 
+# additional check, based solely on a file's mod timestamp.  we cannot
+# make assumptions about --before, since the file may have been "touch"ed
+# since the last message was appended; but we can assume that too-old
+# files cannot contain messages newer than their modtime.
+sub message_is_useful_by_file_modtime {
+  my ($self, $date) = @_;
+
+  # better safe than sorry, if date is undef; let other stuff catch errors
+  return 1 unless $date;
+
+  if ($self->{opt_after}) {
+    return ($date > $self->{opt_after});
+  }
+  else {
+    return 1;       # --after not in use
+  }
+}
+
 ############################################################################
 
 # 0 850852128			atime
@@ -646,13 +666,16 @@
   my ($self, $class, $mail) = @_;
 
   $self->bump_scan_progress();
+
+  my @s = stat($mail);
+  return unless $self->message_is_useful_by_file_modtime($s[9]);
+
   if (!$self->{determine_receive_date}) {
     push(@{$self->{$class}}, index_pack(AI_TIME_UNKNOWN, $class, "f", $mail));
     return;
   }
 
   my $date;
-
   unless (defined $AICache and $date = $AICache->check($mail)) {
     my $header;
     if (!mail_open($mail)) {
@@ -706,6 +729,9 @@
       next;
     }
 
+    my @s = stat($file);
+    next unless $self->message_is_useful_by_file_modtime($s[9]);
+
     my $info = {};
     my $count;
 
@@ -807,6 +833,9 @@
       $self->{access_problem} = 1;
       next;
     }
+
+    my @s = stat($file);
+    next unless $self->message_is_useful_by_file_modtime($s[9]);
 
     my $info = {};
     my $count;

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Bayes/CombineChi.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Bayes/CombineChi.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Bayes/CombineChi.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Bayes/CombineChi.pm Sat Oct 28 17:59:34 2006
@@ -41,7 +41,7 @@
 # the use of "s") of an original assumed expectation ... relative to how
 # strongly we want to consider our actual collected data."  Low 's' means
 # trust collected data more strongly.
-our $FW_S_CONSTANT = 0.100;
+our $FW_S_CONSTANT = 0.030;
 
 # (s . x) for the f(w) equation.
 our $FW_S_DOT_X = ($FW_X_CONSTANT * $FW_S_CONSTANT);

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/BayesStore/DBM.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/BayesStore/DBM.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/BayesStore/DBM.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/BayesStore/DBM.pm Sat Oct 28 17:59:34 2006
@@ -590,7 +590,7 @@
     my $db_var = 'db_'.$dbname;
 
     if (exists $self->{$db_var}) {
-      dbg("bayes: untie-ing $db_var");
+      # dbg("bayes: untie-ing $db_var");
       untie %{$self->{$db_var}};
       delete $self->{$db_var};
     }
@@ -1515,7 +1515,7 @@
     my $db_var = 'db_'.$dbname;
 
     if (exists $self->{$db_var}) {
-      dbg("bayes: untie-ing $db_var");
+      # dbg("bayes: untie-ing $db_var");
       untie %{$self->{$db_var}};
       delete $self->{$db_var};
     }

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Conf/Parser.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Conf/Parser.pm Sat Oct 28 17:59:34 2006
@@ -932,7 +932,9 @@
   }
   $conf->{priority}->{$name} ||= 0;
   $conf->{source_file}->{$name} = $self->{currentfile};
-  $conf->{if_stack}->{$name} = $self->get_if_stack_as_string();
+
+  # this no longer seems to be needed!
+  # $conf->{if_stack}->{$name} = $self->get_if_stack_as_string();
 
   if ($self->{scoresonly}) {
     $conf->{user_rules_to_compile}->{$type} = 1;

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Dns.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Dns.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Dns.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Dns.pm Sat Oct 28 17:59:34 2006
@@ -205,7 +205,7 @@
     $self->{already_logged}->{$log} = 1;
   }
 
-  if (!defined $self->{tests_already_hit}->{$rule}) {
+  if (!$self->{tests_already_hit}->{$rule}) {
     $self->got_hit($rule, "RBL: ", ruletype => "dnsbl");
   }
 }
@@ -277,7 +277,7 @@
 
   my $rdatastr = $answer->rdatastr;
   while (my ($subtest, $rule) = each %{ $self->{dnspost}->{$set} }) {
-    next if defined $self->{tests_already_hit}->{$rule};
+    next if $self->{tests_already_hit}->{$rule};
 
     # exact substr (usually IP address)
     if ($subtest eq $rdatastr) {

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Message.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Message.pm Sat Oct 28 17:59:34 2006
@@ -731,7 +731,7 @@
     }
 
     if (!$in_body) {
-      s/\s+$//;
+      # s/\s+$//;   # bug 5127: don't clean this up (yet)
       if (m/^[\041-\071\073-\176]+:/) {
         if ($header) {
           my ( $key, $value ) = split ( /:\s*/, $header, 2 );
@@ -741,7 +741,7 @@
 	next;
       }
       elsif (/^[ \t]/) {
-        $_ =~ s/^\s*//;
+        # $_ =~ s/^\s*//;   # bug 5127, again
         $header .= $_;
 	next;
       }

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Message/Node.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Message/Node.pm Sat Oct 28 17:59:34 2006
@@ -170,7 +170,11 @@
       $self->{'raw_headers'}->{$key} = [];
     }
 
-    push @{ $self->{'headers'}->{$key} },     $self->_decode_header($raw_value);
+    my $dec_value = $raw_value;
+    $dec_value =~ s/\n[ \t]+/ /gs;
+    $dec_value =~ s/\s*$//s;
+    push @{ $self->{'headers'}->{$key} },     $self->_decode_header($dec_value);
+
     push @{ $self->{'raw_headers'}->{$key} }, $raw_value;
 
     return $self->{'headers'}->{$key}->[-1];

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin.pm Sat Oct 28 17:59:34 2006
@@ -402,6 +402,13 @@
 Called via SpamAssassin::finish and should clear up any tests that a plugin
 has added to the namespace.
 
+In certain circumstances, plugins may find it useful to compile
+perl functions from the ruleset, on the fly.  It is important to
+remove these once the C<Mail::SpamAssassin> object is deleted,
+however, and this API allows this.
+
+Each plugin is responsible for its own generated perl functions.
+
 =over 4
 
 =item conf
@@ -905,6 +912,23 @@
 
 The configuration file arguments will be passed in after the standard
 arguments.
+
+=head1 BACKWARDS COMPATIBILITY
+
+Note that if you write a plugin and need to determine if a particular
+helper method is supported on C<Mail::SpamAssassin::Plugin>, you
+can do this:
+
+    if ($self->can("name_of_method")) {
+      eval {
+        $self->name_of_method();        # etc.
+      }
+    } else {
+      # take fallback action
+    }
+
+The same applies for the public APIs on objects of other types, such as
+C<Mail::SpamAssassin::PerMsgStatus>.
 
 =head1 SEE ALSO
 

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/BodyEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/BodyEval.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/BodyEval.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/BodyEval.pm Sat Oct 28 17:59:34 2006
@@ -39,43 +39,12 @@
   bless ($self, $class);
 
   # the important bit!
-  $self->register_eval_rule("check_unique_words");
   $self->register_eval_rule("multipart_alternative_difference");
   $self->register_eval_rule("multipart_alternative_difference_count");
   $self->register_eval_rule("check_blank_line_ratio");
   $self->register_eval_rule("tvd_vertical_words");
 
   return $self;
-}
-
-sub check_unique_words {
-  my ($self, $pms, $body, $m, $b) = @_;
-
-  if (!defined $pms->{unique_words_repeat}) {
-    $pms->{unique_words_repeat} = 0;
-    $pms->{unique_words_unique} = 0;
-    my %count;
-    for (@$body) {
-      # copy to avoid changing @$body
-      my $line = $_;
-      # from tokenize_line in Bayes.pm
-      $line =~ tr/-A-Za-z0-9,\@\*\!_'"\$.\241-\377 / /cs;
-      $line =~ s/(\w)(\.{3,6})(\w)/$1 $2 $3/gs;
-      $line =~ s/(\w)(\-{2,6})(\w)/$1 $2 $3/gs;
-      $line =~ s/(?:^|\.\s+)([A-Z])([^A-Z]+)(?:\s|$)/ ' '.(lc $1).$2.' '/ge;
-      for my $token (split(' ', $line)) {
-        $count{$token}++;
-      }
-    }
-    $pms->{unique_words_unique} = scalar grep { $_ == 1 } values(%count);
-    $pms->{unique_words_repeat} = scalar keys(%count) - $pms->{unique_words_unique};
-  }
-
-  # y = mx+b where y is number of unique words needed
-  my $unique = $pms->{unique_words_unique};
-  my $repeat = $pms->{unique_words_repeat};
-  my $y = ($unique + $repeat) * $m + $b;
-  return ($unique > $y);
 }
 
 sub multipart_alternative_difference {

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm Sat Oct 28 17:59:34 2006
@@ -49,7 +49,6 @@
   $self->register_eval_rule("html_text_not_match");
   $self->register_eval_rule("html_range");
   $self->register_eval_rule("check_iframe_src");
-  $self->register_eval_rule("check_html_uri_only");
 
   return $self;
 }
@@ -197,24 +196,6 @@
   }
 
   return 0;
-}
-
-sub check_html_uri_only {
-  my ($self, $pms) = @_;
-
-  # Find out if there are any multipart/alternative parts in the message
-  my @ma = $pms->{msg}->find_parts(qr@^multipart/alternative\b@i);
-
-  # If there are no multipart/alternative sections, skip this test.
-  return if (!@ma);
-
-  # At this point, we're not actually checking the alternates, just the entire
-  # message.
-  foreach my $v ( values %{$pms->{html}->{uri_detail}} ) {
-    return 0 if (exists $v->{types}->{parsed});
-  }
-
-  return 1;
 }
 
 1;

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm Sat Oct 28 17:59:34 2006
@@ -50,7 +50,6 @@
   $self->register_eval_rule("check_for_forged_eudoramail_received_headers");
   $self->register_eval_rule("check_for_forged_yahoo_received_headers");
   $self->register_eval_rule("check_for_forged_juno_received_headers");
-  $self->register_eval_rule("check_for_from_to_same");
   $self->register_eval_rule("check_for_matching_env_and_hdr_from");
   $self->register_eval_rule("sorted_recipients");
   $self->register_eval_rule("similar_recipients");
@@ -562,29 +561,6 @@
   return 0;   
 }
 
-# From and To have same address, but are not exactly the same and
-# neither contains intermediate spaces.
-sub check_for_from_to_same {
-  my ($self, $pms) = @_;
-
-  my $hdr_from = $pms->get('From');
-  my $hdr_to = $pms->get('To');
-  return 0 if (!length($hdr_from) || !length($hdr_to) ||
-	       $hdr_from eq $hdr_to);
-
-  my $addr_from = $pms->get('From:addr');
-  my $addr_to = $pms->get('To:addr');
-  # BUG: From:addr and To:addr sometimes contain whitespace
-  $addr_from =~ s/\s+//g;
-  $addr_to =~ s/\s+//g;
-  return 0 if (!length($addr_from) || !length($addr_to) ||
-	       $addr_from ne $addr_to);
-
-  if ($hdr_from =~ /^\s*\S+\s*$/ && $hdr_to =~ /^\s*\S+\s*$/) {
-    return 1;
-  }
-}
-
 sub check_for_matching_env_and_hdr_from {
   my ($self, $pms) =@_;
   # two blank headers match so don't bother checking
@@ -852,10 +828,10 @@
     foreach $rcvd (@local) {
       if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) {
 	my $date = $1;
-	dbg("eval: trying Received fetchmail header date for real time: $date");
+        dbg2("eval: trying Received fetchmail header date for real time: $date");
 	my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date);
 	if (defined($time) && (time() >= $time)) {
-	  dbg("eval: time_t from date=$time, rcvd=$date");
+          dbg2("eval: time_t from date=$time, rcvd=$date");
 	  push @fetchmail_times, $time;
 	}
       }
@@ -872,10 +848,10 @@
   foreach $rcvd (@received) {
     if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) {
       my $date = $1;
-      dbg("eval: trying Received header date for real time: $date");
+      dbg2("eval: trying Received header date for real time: $date");
       my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date);
       if (defined($time)) {
-	dbg("eval: time_t from date=$time, rcvd=$date");
+        dbg2("eval: time_t from date=$time, rcvd=$date");
 	push @header_times, $time;
       }
     }
@@ -1127,6 +1103,15 @@
   }
 
   return 0;
+}
+
+###########################################################################
+
+# support eval-test verbose debugs using "-Deval"
+sub dbg2 {
+  if (would_log('dbg', 'eval') == 2) {
+    dbg(@_);
+  }
 }
 
 1;

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm Sat Oct 28 17:59:34 2006
@@ -159,10 +159,6 @@
     $pms->{mime_base64_encoded_text} = 1;
   }
 
-  if ($cte =~ /base64/ && !$name) {
-    $pms->{mime_base64_no_name} = 1;
-  }
-
   if ($charset =~ /iso-\S+-\S+\b/i &&
       $charset !~ /iso-(?:8859-\d{1,2}|2022-(?:jp|kr))\b/)
   {
@@ -231,7 +227,6 @@
   $pms->{mime_base64_encoded_text} = 0;
   # $pms->{mime_base64_illegal} = 0;
   # $pms->{mime_base64_latin} = 0;
-  $pms->{mime_base64_no_name} = 0;
   $pms->{mime_body_html_count} = 0;
   $pms->{mime_body_text_count} = 0;
   $pms->{mime_faraway_charset} = 0;

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm Sat Oct 28 17:59:34 2006
@@ -46,8 +46,10 @@
 
 Header names are considered case-insensitive.
 
-The header values are normally cleaned up a little. Append C<:raw> to the
-header name to retrieve the raw, undecoded value instead.
+The header values are normally cleaned up a little; for example, whitespace
+around the newline character in "folded" headers will be replaced with a single
+space.  Append C<:raw> to the header name to retrieve the raw, undecoded value,
+including pristine whitespace, instead.
 
 =back
 
@@ -62,9 +64,11 @@
 use warnings;
 use bytes;
 
-use vars qw(@ISA);
+use vars qw(@ISA @TEMPORARY_METHODS);
 @ISA = qw(Mail::SpamAssassin::Plugin);
 
+@TEMPORARY_METHODS = (); 
+
 # ---------------------------------------------------------------------------
 
 # constructor
@@ -129,6 +133,7 @@
 
       $self->{parser}->add_test($rulename, $evalfn."()",
                 $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
+
       my $evalcode = '
         sub Mail::SpamAssassin::Plugin::MIMEHeader::'.$evalfn.' {
           $_[0]->eval_hook_called($_[1], q{'.$rulename.'});
@@ -142,6 +147,8 @@
       }
 
       $pluginobj->register_eval_rule($evalfn);
+
+      push @TEMPORARY_METHODS, "Mail::SpamAssassin::Plugin::MIMEHeader::${evalfn}";
     }
   });
 
@@ -168,7 +175,12 @@
   }
 
   foreach my $p ($scanner->{msg}->find_parts(qr/./)) {
-    my $val = $p->get_header($hdr, $getraw);
+    my $val;
+    if ($getraw) {
+      $val = $p->raw_header($hdr);
+    } else {
+      $val = $p->get_header($hdr);
+    }
     $val ||= $if_unset;
 
     if ($val =~ ${pattern}) {
@@ -177,6 +189,17 @@
   }
 
   return ($negated ? 1 : 0);
+}
+
+# ---------------------------------------------------------------------------
+
+sub finish_tests {
+  my ($self, $params) = @_;
+
+  foreach my $method (@TEMPORARY_METHODS) {
+    undef &{$method};
+  }
+  @TEMPORARY_METHODS = ();      # clear for next time
 }
 
 # ---------------------------------------------------------------------------

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/RelayEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/RelayEval.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/RelayEval.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/RelayEval.pm Sat Oct 28 17:59:34 2006
@@ -48,11 +48,9 @@
   $self->register_eval_rule("check_for_sender_no_reverse");
   $self->register_eval_rule("check_for_from_domain_in_received_headers");
   $self->register_eval_rule("check_for_forged_received_trail");
-  $self->register_eval_rule("check_for_forged_received_helo");
   $self->register_eval_rule("check_for_forged_received_ip_helo");
   $self->register_eval_rule("helo_ip_mismatch");
   $self->register_eval_rule("check_for_no_rdns_dotcom_helo");
-  $self->register_eval_rule("message_id_from_mta");
 
   return $self;
 }
@@ -162,7 +160,7 @@
       if ($pms->is_dns_available()) {
 	my $vrdns = $pms->lookup_ptr ($relay->{ip});
 	if (defined $vrdns && $vrdns ne $claimed) {
-	  dbg("eval: rdns/helo mismatch: helo=$relay->{helo} ".	
+	  dbg2("eval: rdns/helo mismatch: helo=$relay->{helo} ".	
 		"claimed-rdns=$claimed true-rdns=$vrdns");
 	  return 1;
 	  # TODO: instead, we should set a flag and check it later for
@@ -182,7 +180,7 @@
       }
 
       # otherwise there *is* a mismatch
-      dbg("eval: rdns/helo mismatch: helo=$relay->{helo} rdns=$claimed");
+      dbg2("eval: rdns/helo mismatch: helo=$relay->{helo} rdns=$claimed");
       return 1;
     }
   }
@@ -351,28 +349,13 @@
 
       # ok, let's catch the case where there's *no* reverse DNS there either
       if ($no_rdns) {
-	dbg("eval: Received: no rDNS for dotcom HELO: from=$from_host HELO=$helo_host");
+	dbg2("eval: Received: no rDNS for dotcom HELO: from=$from_host HELO=$helo_host");
 	$pms->{no_rdns_dotcom_helo} = 1;
       }
     }
   }
 } # _check_received_helos()
 
-# Message-ID for untrusted message was added by a trusted relay
-sub message_id_from_mta {
-  my ($self, $pms) = @_;
-
-  my $id = $pms->get('MESSAGEID');
-
-  if ($id && $pms->{num_relays_untrusted} > 0) {
-    for my $rcvd (@{$pms->{relays_untrusted}}[0], @{$pms->{relays_trusted}})
-    {
-      return 1 if $rcvd->{id} && (index(lc($id), lc($rcvd->{id})) != -1);
-    }
-  }
-  return 0;
-}
-
 # FORGED_RCVD_TRAIL
 sub check_for_forged_received_trail {
   my ($self, $pms) = @_;
@@ -380,13 +363,6 @@
   return ($pms->{mismatch_from} > 1);
 }
 
-# FORGED_RCVD_HELO
-sub check_for_forged_received_helo {
-  my ($self, $pms) = @_;
-  $self->_check_for_forged_received($pms) unless exists $pms->{mismatch_helo};
-  return ($pms->{mismatch_helo} > 0);
-}
-
 # FORGED_RCVD_IP_HELO
 sub check_for_forged_received_ip_helo {
   my ($self, $pms) = @_;
@@ -398,7 +374,6 @@
   my ($self, $pms) = @_;
 
   $pms->{mismatch_from} = 0;
-  $pms->{mismatch_helo} = 0;
   $pms->{mismatch_ip_helo} = 0;
 
   my $IP_PRIVATE = IP_PRIVATE;
@@ -433,7 +408,7 @@
     my $hlo = $helo[$i];
     my $by = $by[$i];
 
-    dbg("eval: forged-HELO: from=".(defined $frm ? $frm : "(undef)").
+    dbg2("eval: forged-HELO: from=".(defined $frm ? $frm : "(undef)").
 			" helo=".(defined $hlo ? $hlo : "(undef)").
 			" by=".(defined $by ? $by : "(undef)"));
 
@@ -442,15 +417,6 @@
 
     next unless ($by =~ /^\w+(?:[\w.-]+\.)+\w+$/);
 
-    if (defined($hlo) && defined($frm)
-		&& $hlo =~ /^\w+(?:[\w.-]+\.)+\w+$/
-		&& $frm =~ /^\w+(?:[\w.-]+\.)+\w+$/
-		&& $frm ne $hlo && !helo_forgery_whitelisted($frm, $hlo))
-    {
-      dbg("eval: forged-HELO: mismatch on HELO: '$hlo' != '$frm'");
-      $pms->{mismatch_helo}++;
-    }
-
     my $fip = $fromip[$i];
 
     if (defined($hlo) && defined($fip)) {
@@ -466,7 +432,7 @@
 		$hclassb ne $fclassb &&
 		!($hlo =~ /$IP_PRIVATE/o))
 	{
-	  dbg("eval: forged-HELO: massive mismatch on IP-addr HELO: '$hlo' != '$fip'");
+	  dbg2("eval: forged-HELO: massive mismatch on IP-addr HELO: '$hlo' != '$fip'");
 	  $pms->{mismatch_ip_helo}++;
 	}
       }
@@ -477,9 +443,18 @@
 		&& $prev =~ /^\w+(?:[\w.-]+\.)+\w+$/
 		&& $by ne $prev && !helo_forgery_whitelisted($by, $prev))
     {
-      dbg("eval: forged-HELO: mismatch on from: '$prev' != '$by'");
+      dbg2("eval: forged-HELO: mismatch on from: '$prev' != '$by'");
       $pms->{mismatch_from}++;
     }
+  }
+}
+
+###########################################################################
+
+# support eval-test verbose debugs using "-Deval"
+sub dbg2 {
+  if (would_log('dbg', 'eval') == 2) {
+    dbg(@_);
   }
 }
 

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/URIEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/URIEval.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/URIEval.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Plugin/URIEval.pm Sat Oct 28 17:59:34 2006
@@ -38,7 +38,6 @@
   bless ($self, $class);
 
   # the important bit!
-  $self->register_eval_rule("check_domain_ratio");
   $self->register_eval_rule("check_for_http_redirector");
   $self->register_eval_rule("check_https_ip_mismatch");
 
@@ -46,16 +45,6 @@
 }
 
 ###########################################################################
-
-sub check_domain_ratio {
-  my ($self, $pms, $body, $ratio) = @_;
-  my $length = (length(join('', @{$body})) || 1);
-  if (!defined $pms->{uri_domain_count}) {
-    $pms->get_uri_list();
-  }
-  return 0 if !defined $pms->{uri_domain_count};
-  return (($pms->{uri_domain_count} / $length) > $ratio);
-}
 
 sub check_for_http_redirector {
   my ($self, $pms) = @_;

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/PluginHandler.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/PluginHandler.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/PluginHandler.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/PluginHandler.pm Sat Oct 28 17:59:34 2006
@@ -138,7 +138,7 @@
   my ($self, $plugin) = @_;
   $plugin->{main} = $self->{main};
   push (@{$self->{plugins}}, $plugin);
-  dbg("plugin: registered $plugin");
+  # dbg("plugin: registered $plugin");
 
   # invalidate cache entries for any configuration-time hooks, in case
   # one has already been built; this plugin may implement that hook!

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/SpamdForkScaling.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/SpamdForkScaling.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/SpamdForkScaling.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/SpamdForkScaling.pm Sat Oct 28 17:59:34 2006
@@ -426,7 +426,13 @@
     dbg("prefork: ordered $kid to accept");
 
     # now wait for it to say it's done that
-    return $self->wait_for_child_to_accept($sock);
+    my $ret = $self->wait_for_child_to_accept($kid, $sock);
+    if ($ret) {
+      return $ret;
+    } else {
+      # retry with another child
+      return $self->order_idle_child_to_accept();
+    }
 
   }
   else {
@@ -436,10 +442,11 @@
 }
 
 sub wait_for_child_to_accept {
-  my ($self, $sock) = @_;
+  my ($self, $kid, $sock) = @_;
 
   while (1) {
     my $state = $self->read_one_message_from_child_socket($sock);
+
     if ($state == PFSTATE_BUSY) {
       return 1;     # 1 == success
     }
@@ -447,7 +454,12 @@
       return undef;
     }
     else {
-      die "prefork: ordered child to accept, but child reported state '$state'";
+      warn "prefork: ordered child $kid to accept, but they reported state '$state', killing rogue";
+      $self->child_error_kill($kid, $sock);
+      $self->adapt_num_children();
+      sleep 1;
+
+      return undef;
     }
   }
 }

Modified: spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Util/DependencyInfo.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Util/DependencyInfo.pm?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Util/DependencyInfo.pm (original)
+++ spamassassin/branches/check_plugin/lib/Mail/SpamAssassin/Util/DependencyInfo.pm Sat Oct 28 17:59:34 2006
@@ -40,6 +40,18 @@
   'desc' => 'HTML is used for an ever-increasing amount of email so this dependency
   is unavoidable.  Run "perldoc -q html" for additional information.',
 },
+{
+  module => 'Net::DNS',
+  version => ($^O =~ /^(mswin|dos|os2)/oi ? '0.46' : '0.34'),
+  desc => 'Used for all DNS-based tests (SBL, XBL, SpamCop, DSBL, etc.),
+  perform MX checks, and is also used when manually reporting spam to
+  SpamCop.
+
+  You need to make sure the Net::DNS version is sufficiently up-to-date:
+
+  - version 0.34 or higher on Unix systems
+  - version 0.46 or higher on Windows systems',
+},
 );
 
 my @OPTIONAL_MODULES = (
@@ -55,20 +67,6 @@
   desc => 'Used to store data on-disk, for the Bayes-style logic and
   auto-whitelist.  *Much* more efficient than the other standard Perl
   database packages.  Strongly recommended.',
-},
-{
-  module => 'Net::DNS',
-  version => ($^O =~ /^(mswin|dos|os2)/oi ? '0.46' : '0.34'),
-  desc => 'Used for all DNS-based tests (SBL, XBL, SpamCop, DSBL, etc.),
-  perform MX checks, and is also used when manually reporting spam to
-  SpamCop.  Recommended.
-
-  If this is installed and you are using network tests of any variety
-  (which is the default), then you need to make sure the Net::DNS
-  version is sufficiently up-to-date:
-
-  - version 0.34 or higher on Unix systems
-  - version 0.46 or higher on Windows systems',
 },
 {
   module => 'Net::SMTP',

Modified: spamassassin/branches/check_plugin/lib/spamassassin-run.pod
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/lib/spamassassin-run.pod?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/lib/spamassassin-run.pod (original)
+++ spamassassin/branches/check_plugin/lib/spamassassin-run.pod Sat Oct 28 17:59:34 2006
@@ -44,6 +44,7 @@
                                    Set user preferences file
  --siteconfigpath=path             Path for site configs
                                    (def: /etc/mail/spamassassin)
+ --cf='config line'                Additional line of configuration
  -x, --nocreate-prefs              Don't create user preferences file
  -e, --exit-code                   Exit with a non-zero exit code if the
                                    tested message was spam
@@ -237,6 +238,15 @@
 
 Use the specified path for locating site-specific configuration files.  Ignore
 the default directories (usually C</etc/mail/spamassassin> or similar).
+
+=item B<--cf='config line'>
+
+Add additional lines of configuration directly from the command-line, parsed
+after the configuration files are read.   Multiple B<--cf> arguments can be
+used, and each will be considered a separate line of configuration.  For
+example:
+
+        spamassassin -t --cf="body NEWRULE /text/" --cf="score NEWRULE 3.0"
 
 =item B<-p> I<prefs>, B<--prefspath>=I<prefs>, B<--prefs-file>=I<prefs>
 

Modified: spamassassin/branches/check_plugin/masses/rule-qa/automc/gen_info_xml
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/masses/rule-qa/automc/gen_info_xml?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/masses/rule-qa/automc/gen_info_xml (original)
+++ spamassassin/branches/check_plugin/masses/rule-qa/automc/gen_info_xml Sat Oct 28 17:59:34 2006
@@ -4,6 +4,12 @@
 # need this to ensure that 'svn log' will include ALL changes
 my $svn_checkins_root = "http://svn.apache.org/repos/asf/spamassassin/";
 
+# we won't provide who-checked-in and commit-message details for changes
+# older than this.  Note, this is not in rev number terms; it's an
+# absolute count of revisions.
+# update: alternatively, since the zone's SVN is too old (!), use a date.
+# my $svn_log_limit = 500;
+
 my $full_rebuild = 0;
 if ($ARGV[0] && $ARGV[0] =~ /^-f/) {
   $full_rebuild = 1;
@@ -158,8 +164,13 @@
 }
 
 sub get_svn_log {
-  print "getting svn log...\n";
-  if (open (IN, "svn log --xml $svn_checkins_root |")) {
+  print "getting svn log... (".time.")\n";
+
+  my $limitdate = strftime ("%Y-%m-%d", localtime time-(24*60*60*30*12));
+
+  if (open (IN, "svn log -r 'HEAD:{$limitdate}' --xml $svn_checkins_root |"))
+  # if (open (IN, "svn log --limit $svn_log_limit --xml $svn_checkins_root |"))
+  {
     eval {
       my $xml = join('', <IN>);
       $svn_log = XMLin($xml);
@@ -172,7 +183,7 @@
   if (!$svn_log) {
     die "no svn log --xml";
   }
-  print "got ".(scalar @{$svn_log->{logentry}})." log entries\n";
+  print "got ".(scalar @{$svn_log->{logentry}})." log entries (".time.")\n";
 
   # use Data::Dumper; print Dumper($svn_log); die;
 }

Modified: spamassassin/branches/check_plugin/masses/rule-qa/automc/ruleqa.cgi
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/masses/rule-qa/automc/ruleqa.cgi?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/masses/rule-qa/automc/ruleqa.cgi (original)
+++ spamassassin/branches/check_plugin/masses/rule-qa/automc/ruleqa.cgi Sat Oct 28 17:59:34 2006
@@ -199,6 +199,8 @@
 
   # turn possibly-empty $self->{daterev} into a real date/rev combo (that exists)
   $self->{daterev} = $self->date_in_direction($self->{daterev}, 0);
+
+  $self->{daterev_md} = $self->get_daterev_metadata($self->{daterev});
 }
 
 # ---------------------------------------------------------------------------
@@ -980,6 +982,8 @@
 
   my $srcpath = $self->{srcpath};
   my $mtime = $self->{mtime};
+  my $no_net_rules = (!$self->{daterev_md}->{includes_net});
+
   if ($srcpath || $mtime) {
     my $rev = $self->get_rev_for_daterev($self->{daterev});
     my $md = $self->get_rule_metadata($rev);
@@ -1003,6 +1007,13 @@
              ($md->{$_}->{srcmtime} >= $target);
          } @rules;
     }
+
+    if ($no_net_rules) {    # bug 5047
+      @rules = grep {
+          !$md->{$_}->{tf} or
+             ($md->{$_}->{tf} !~ /\bnet\b/);
+         } @rules;
+    }
   }
 
   if ($self->{include_embedded_freqs_xml} == 0) {
@@ -1121,7 +1132,7 @@
   }
 
   my $outof = ($isspam ? $obj->{nspam} : $obj->{nham});
-  my $count = int (($percent/100.0) * $outof);
+  my $count = int ((($percent/100.0) * $outof) + 0.99); # round up
   return qq{
     $count\&nbsp;of\&nbsp;$outof\&nbsp;messages
   };

Modified: spamassassin/branches/check_plugin/masses/rule-qa/corpus-hourly
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/masses/rule-qa/corpus-hourly?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/masses/rule-qa/corpus-hourly (original)
+++ spamassassin/branches/check_plugin/masses/rule-qa/corpus-hourly Sat Oct 28 17:59:34 2006
@@ -356,6 +356,11 @@
       $flags .= " -c '$opt{rules_dir}'";
     }
 
+    # are we analyzing --net mass-check logs?  if so, use scoreset 1
+    if (join(" ", @ham) =~ /-net-/) {
+      $flags .= " -s 1" if $class eq "NET";
+    }
+
     if ($age eq "all") {
       my %spam;
       my %ham;

Modified: spamassassin/branches/check_plugin/rules/20_dnsbl_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/rules/20_dnsbl_tests.cf?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/rules/20_dnsbl_tests.cf (original)
+++ spamassassin/branches/check_plugin/rules/20_dnsbl_tests.cf Sat Oct 28 17:59:34 2006
@@ -167,25 +167,23 @@
 tflags DNS_FROM_RFC_DSN		net
 #reuse DNS_FROM_RFC_DSN
 
-header DNS_FROM_RFC_POST	eval:check_rbl_sub('rfci_envfrom', '127.0.0.3')
-describe DNS_FROM_RFC_POST	Envelope sender in postmaster.rfc-ignorant.org
-tflags DNS_FROM_RFC_POST	net
-#reuse DNS_FROM_RFC_POST
-
-header DNS_FROM_RFC_ABUSE	eval:check_rbl_sub('rfci_envfrom', '127.0.0.4')
-describe DNS_FROM_RFC_ABUSE	Envelope sender in abuse.rfc-ignorant.org
-tflags DNS_FROM_RFC_ABUSE	net
-#reuse DNS_FROM_RFC_ABUSE
-
-header DNS_FROM_RFC_WHOIS	eval:check_rbl_sub('rfci_envfrom', '127.0.0.5')
-describe DNS_FROM_RFC_WHOIS	Envelope sender in whois.rfc-ignorant.org
-tflags DNS_FROM_RFC_WHOIS	net
-#reuse DNS_FROM_RFC_WHOIS
-
 header DNS_FROM_RFC_BOGUSMX	eval:check_rbl_sub('rfci_envfrom', '127.0.0.8')
 describe DNS_FROM_RFC_BOGUSMX	Envelope sender in bogusmx.rfc-ignorant.org
 tflags DNS_FROM_RFC_BOGUSMX	net
 #reuse DNS_FROM_RFC_BOGUSMX
+
+# bug 4628: these rules are too unreliable to assign scores to
+header __DNS_FROM_RFC_POST      eval:check_rbl_sub('rfci_envfrom', '127.0.0.3')
+tflags __DNS_FROM_RFC_POST      net
+#reuse __DNS_FROM_RFC_POST DNS_FROM_RFC_POST
+
+header __DNS_FROM_RFC_ABUSE     eval:check_rbl_sub('rfci_envfrom', '127.0.0.4')
+tflags __DNS_FROM_RFC_ABUSE     net
+#reuse __DNS_FROM_RFC_ABUSE DNS_FROM_RFC_ABUSE
+
+header __DNS_FROM_RFC_WHOIS     eval:check_rbl_sub('rfci_envfrom', '127.0.0.5')
+tflags __DNS_FROM_RFC_WHOIS     net
+#reuse __DNS_FROM_RFC_WHOIS DNS_FROM_RFC_WHOIS
 
 # ---------------------------------------------------------------------------
 # CompleteWhois blacklists

Modified: spamassassin/branches/check_plugin/rules/20_head_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/check_plugin/rules/20_head_tests.cf?view=diff&rev=468796&r1=468795&r2=468796
==============================================================================
--- spamassassin/branches/check_plugin/rules/20_head_tests.cf (original)
+++ spamassassin/branches/check_plugin/rules/20_head_tests.cf Sat Oct 28 17:59:34 2006
@@ -27,8 +27,551 @@
 
 ###########################################################################
 
+# partial messages; currently-theoretical attack
+# unsurprisingly this hits 0/0 right now.
+header FRAGMENTED_MESSAGE	Content-Type =~ /\bmessage\/partial/i
+describe FRAGMENTED_MESSAGE	Partial message
+tflags FRAGMENTED_MESSAGE       userconf
+
+# this is also mostly-theoretical, so allow 0 hits
+header HEAD_LONG                eval:check_msg_parse_flags('truncated_header')
+describe HEAD_LONG              Message headers are very long
+tflags HEAD_LONG                userconf
+
+###########################################################################
+
+header FROM_BLANK_NAME		From =~ /(?:\s|^)"" <\S+>/i
+describe FROM_BLANK_NAME	From: contains empty name
+
+###########################################################################
+# numeric address rules, these are written to avoid overlap with each other
+
+header __FROM_ENDS_IN_NUMS	From:addr =~ /\D\d{8,}\@/i
+
+header FROM_STARTS_WITH_NUMS	From:addr =~ /^\d{6,}\S+\@/i
+describe FROM_STARTS_WITH_NUMS	From: starts with many numbers
+
+# don't match US/Canada phone numbers: 10 digits optionally preceded by a "1"
+header __FROM_ALL_NUMS		From:addr =~ /^(?:\d{1,9}|[02-9]\d{10}|\d{12,})@/
+
+###########################################################################
+
+header FROM_OFFERS		From:addr =~ /\@\S*offers(?![eo]n\b)/i
+describe FROM_OFFERS		From address is "at something-offers"
+
+header FROM_NO_USER		From =~ /(?:^\@|<\@| \@[^\)<]*$|<>)/ [if-unset: unset@unset.unset]
+describe FROM_NO_USER		From: has no local-part before @ sign
+
+# also 100% valid
+header FAKED_UNDISC_RECIPS	To =~ /undisclosed[_ ]*recipient(?:s[^:]|[^s])/i
+describe FAKED_UNDISC_RECIPS	Faked To "Undisclosed-Recipients"
+
+header PLING_QUERY		Subject =~ /\?.*!|!.*\?/
+describe PLING_QUERY		Subject has exclamation mark and question mark
+
+
+header MSGID_SPAM_99X9XX99	MESSAGEID =~ /^<\d\d\d\d\d\d[a-z]\d[a-z][a-z]\d\d\$[a-z][a-z][a-z]\d\d\d\d\d\$\d\d\d\d\d\d\d\d\@/
+describe MSGID_SPAM_99X9XX99	Spam tool Message-Id: (99x9xx99 variant)
+
+header MSGID_SPAM_ALPHA_NUM	MESSAGEID =~ /<[A-Z]{7}-000[0-9]{10}\@[a-z]*>/
+describe MSGID_SPAM_ALPHA_NUM	Spam tool Message-Id: (alpha-numeric variant)
+
+header MSGID_SPAM_CAPS		Message-ID =~ /^\s*<?[A-Z]+\@(?!(?:mailcity|whowhere)\.com)/
+describe MSGID_SPAM_CAPS	Spam tool Message-Id: (caps variant)
+
+header MSGID_SPAM_LETTERS	Message-Id =~ /<[a-z]{5,}\@(\S+\.)+\S+>/
+describe MSGID_SPAM_LETTERS	Spam tool Message-Id: (letters variant)
+
+
+header MSGID_NO_HOST            MESSAGEID =~ /\@>(?:$|\s)/m
+describe MSGID_NO_HOST 		Message-Id has no hostname
+
+# negative lookahead exempts this MUA from circa 1997-2000 
+# X-Mailer: Microsoft Outlook Express 4.71.1712.3
+# Message-ID: <01...@andrew>
+header __MSGID_DOLLARS_OK	MESSAGEID =~ /<[0-9a-f]{4,}\$[0-9a-f]{4,}\$[0-9a-f]{4,}\@\S+>/m
+header __MSGID_DOLLARS_MAYBE	MESSAGEID =~ /<\w{4,}\$\w{4,}\$(?!localhost)\w{4,}\@\S+>/mi
+meta MSGID_DOLLARS_RANDOM	__MSGID_DOLLARS_MAYBE && !__MSGID_DOLLARS_OK
+
+# bit of a ratware rule, but catches a bit more than just the one ratware
+header __MSGID_RANDY		Message-ID =~ /<[a-z\d][a-z\d\$-]{10,29}[a-z\d]\@[a-z\d][a-z\d.]{3,12}[a-z\d]>/
+# heuristic to eliminate most good Message-ID formats
+header __MSGID_OK_HEX		Message-ID =~ /\b[a-f\d]{8}\b/
+header __MSGID_OK_DIGITS	Message-ID =~ /\d{10}/
+header __MSGID_OK_HOST		Message-ID =~ /\@(?:\D{2,}|(?:\d{1,3}\.){3}\d{1,3})>/
+meta MSGID_RANDY	(__MSGID_RANDY && !(__MSGID_OK_HEX || __MSGID_OK_DIGITS || __MSGID_OK_HOST))
+describe MSGID_RANDY		Message-Id has pattern used in spam
+
+# bug 3395
+header MSGID_YAHOO_CAPS		Message-ID =~ /<[A...@yahoo.com>/
+describe MSGID_YAHOO_CAPS	Message-ID has ALLCAPS@yahoo.com
+
+###########################################################################
+
+header   __AT_AOL_MSGID		MESSAGEID =~ /\@aol\.com\b/i
+header   __FROM_AOL_COM		From =~ /\@aol\.com\b/i
+meta     FORGED_MSGID_AOL	(__AT_AOL_MSGID && !__FROM_AOL_COM)
+describe FORGED_MSGID_AOL	Message-ID is forged, (aol.com)
+
+header   __AT_EXCITE_MSGID	MESSAGEID =~ /\@excite\.com\b/i
+header   __MY_RCVD_EXCITE	Received =~ /\.excite\.com\b/i
+meta     FORGED_MSGID_EXCITE	(__AT_EXCITE_MSGID && !__MY_RCVD_EXCITE)
+describe FORGED_MSGID_EXCITE	Message-ID is forged, (excite.com)
+
+header   __AT_HOTMAIL_MSGID	MESSAGEID =~ /\@hotmail\.com\b/i
+header   __FROM_HOTMAIL_COM	From =~ /\@hotmail\.com\b/i
+meta     FORGED_MSGID_HOTMAIL	(__AT_HOTMAIL_MSGID && (!__FROM_HOTMAIL_COM && !__FROM_MSN_COM && !__FROM_YAHOO_COM))
+describe FORGED_MSGID_HOTMAIL	Message-ID is forged, (hotmail.com)
+
+header   __AT_MSN_MSGID		MESSAGEID =~ /\@msn\.com\b/i
+header   __FROM_MSN_COM		From =~ /\@msn\.com\b/i
+meta     FORGED_MSGID_MSN	(__AT_MSN_MSGID && (!__FROM_MSN_COM && !__FROM_HOTMAIL_COM && !__FROM_YAHOO_COM))
+describe FORGED_MSGID_MSN	Message-ID is forged, (msn.com)
+
+header   __AT_YAHOO_MSGID	MESSAGEID =~ /\@yahoo\.com\b/i
+header   __FROM_YAHOO_COM	From =~ /\@yahoo\.com\b/i
+meta     FORGED_MSGID_YAHOO	(__AT_YAHOO_MSGID && !__FROM_YAHOO_COM)
+describe FORGED_MSGID_YAHOO	Message-ID is forged, (yahoo.com)
+
+###########################################################################
+
+header __MSGID_BEFORE_RECEIVED	ALL =~ /\nMessage-Id:.*\nReceived:/si
+header __MSGID_BEFORE_OKAY	Message-Id =~ /\@[a-z0-9.-]+\.(?:yahoo|wanadoo)(?:\.[a-z]{2,3}){1,2}>/
+meta MSGID_FROM_MTA_HEADER	(__MSGID_BEFORE_RECEIVED && !__MSGID_BEFORE_OKAY)
+describe MSGID_FROM_MTA_HEADER	Message-Id was added by a relay
+
+header MSGID_FROM_MTA_HOTMAIL	Message-Id =~ /<MC\d{1,2}-F{1,2}\w{21,22}\@\S*hotmail\.com>/
+describe MSGID_FROM_MTA_HOTMAIL	Message-Id was added by a hotmail.com relay
+
+header MSGID_LONG		MESSAGEID =~ /<.{160,}>|<.{140,}\@|\@.{55,}>/m
+describe MSGID_LONG		Message-ID is unusually long
+
+header MSGID_SHORT		MESSAGEID =~ /^.{1,15}$|<.{0,4}\@/m
+describe MSGID_SHORT		Message-ID is unusually short
+
+header MSGID_MULTIPLE_AT	MESSAGEID =~ /<[^>]*\@[^>]*\@/
+describe MSGID_MULTIPLE_AT	Message-ID contains multiple '@' characters
+
+###########################################################################
+
+header DATE_SPAMWARE_Y2K	Date =~ /^[A-Z][a-z]{2}, \d\d [A-Z][a-z]{2} [0-6]\d \d\d:\d\d:\d\d [A-Z]{3}$/
+describe DATE_SPAMWARE_Y2K	Date header uses unusual Y2K formatting
+
+# as noted on the dev@ list, ":60" is valid for seconds when there's a leap
+# second (12/31/2005 for instance), so let's accept that as valid.  ISO 8601
+# apparently allows for it.
+# WRT the tests, remember that ok and fail are reversed -- so valid dates
+# should be "fail" and invalid dates should be "ok".
+header INVALID_DATE		Date !~ /^\s*(?:(?i:Mon|Tue|Wed|Thu|Fri|Sat|Sun),\s)?\s*(?:[12]\d|3[01]|0?[1-9])\s+(?i:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec)\s+(?:19[7-9]\d|2\d{3})\s+(?:[01]?\d|2[0-3])\:[0-5]\d(?::(?:[0-5]\d|60))?\s+(?:[AP]M\s+)?(?:[+-][0-9]{4}|UT|[A-Z]{2,3}T)(?:\s+\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
+describe INVALID_DATE		Invalid Date: header (not RFC 2822)
+test INVALID_DATE fail    Sat, 31 Dec 2005 23:59:60 -0500
+test INVALID_DATE fail    Wed, 31 Jul 2002 16:41:57 +0200
+test INVALID_DATE ok      Sat, 31 Dec 2005 24:00:00 -0500
+test INVALID_DATE ok      Sat, 31 Dec 2005 23:00:00
+test INVALID_DATE ok      Thurs, 31 Jul 2002 16:41:57 +0200
+
+# allow +1300, NZ timezone
+header INVALID_DATE_TZ_ABSURD	Date =~ /[-+](?:1[4-9]\d\d|[2-9]\d\d\d)$/
+describe INVALID_DATE_TZ_ABSURD	Invalid Date: header (timezone does not exist)
+
+header INVALID_TZ_CST		ALL =~ /[+-]\d\d[30]0(?<!-0600|-0500|\+0800|\+0930|\+1030)\s+(?:\bCST\b|\(CST\))/
+describe INVALID_TZ_CST		Invalid date in header (wrong CST timezone)
+
+header INVALID_TZ_EST		ALL =~ /[+-]\d\d[30]0(?<!-0500|-0300|\+1000|\+1100)\s+(?:\bEST\b|\(EST\))/
+describe INVALID_TZ_EST		Invalid date in header (wrong EST timezone)
+
+
+###########################################################################
+# MIME encoding with spam characteristics
+
+header __SUBJECT_NEEDS_MIME	Subject =~ /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/
+header __SUBJECT_ENCODED_QP	Subject:raw =~ /=\?\S+\?Q\?/i
+header __SUBJECT_ENCODED_B64	Subject:raw =~ /=\?\S+\?B\?/i
+
+
+
+header __FROM_NEEDS_MIME	From =~ /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\xff]/
+header __FROM_ENCODED_QP	From:raw =~ /=\?\S+\?Q\?/i
+header __FROM_ENCODED_B64	From:raw =~ /=\?\S+\?B\?/i
+
+
+meta FROM_EXCESS_BASE64		__FROM_ENCODED_B64 && !__FROM_NEEDS_MIME
+describe FROM_EXCESS_BASE64	From: base64 encoded unnecessarily
+
+
+###########################################################################
+# ADV tags in various languages
+
+header ENGLISH_UCE_SUBJECT	Subject =~ /^[^0-9a-z]*adv(?:ert)?\b/i
+describe ENGLISH_UCE_SUBJECT	Subject contains an English UCE tag
+
+# alan premselaar <al...@12inch.com>, see SpamAssassin-talk list 2003-03
+# quinlan: 2003-03-23 here are more generic Japanese iso-2022-jp codes
+# ("not yet acceptance" or "email") + "announcement"
+# FWIW, according to Peter Evans, this should be sufficient to catch the
+# UCE tag and a common attempt at evasion (using the "sue" instead of
+# "mi" Chinese character).  2006-10-12: updated by bug 4021.
+header JAPANESE_UCE_SUBJECT     Subject =~ /\e\$B.*(?:L\$>5Bz|EE;R%a!<%k)(?:8x|9-)9p/
+describe JAPANESE_UCE_SUBJECT	Subject contains a Japanese UCE tag
+
+# check body for "shou nin daku kou koku" UCE tag (bug 4021)
+body __JAPANESE_UCE_BODY        /(?:L\$>5Bz|EE;R%a!<%k)(?:8x|9-)9p/
+
+meta JAPANESE_UCE_BODY (__ISO_2022_JP_DELIM && __JAPANESE_UCE_BODY)
+describe JAPANESE_UCE_BODY      Body contains Japanese UCE tag
+
+# quinlan: "advertisement" in Russian KOI8-R
+# (no longer common, but worth noting in future)
+#header RUSSIAN_UCE_SUBJECT	Subject =~ /\xf0\xe5\xea\xeb\xe0\xec\xf3/
+#describe RUSSIAN_UCE_SUBJECT	Subject contains a Russian UCE tag
+
+# Korean UCE Subject: lines are usually 8-bit, but are occasionally encoded
+# with quoted-printable or base64.
+#
+# \xbc\xba\xc0\xce means "adult"
+# \xb1\xa4\xb0\xed means "advertisement"
+# \xc1\xa4\xba\xb8 means "information"
+# \xc8\xab\xba\xb8 means "publicity"
+#
+# Each two byte sequence is one Korean letter; the spaces and periods are
+# sometimes used to obscure the words.  \xb1\xa4\xb0\xed is the most common
+# tag and is sometimes very obscured so we look harder.
+#
+header KOREAN_UCE_SUBJECT	Subject =~ /[({[<][. ]*(?-i:\xbc\xba[. ]*\xc0\xce[. ]*)?(?-i:\xb1\xa4(?:[. ]*|[\x00-\x7f]{0,3})\xb0\xed|\xc1\xa4[. ]*\xba\xb8|\xc8\xab[. ]*\xba\xb8)[. ]*[)}\]>]/
+describe KOREAN_UCE_SUBJECT	Subject: contains Korean unsolicited email tag
+
+###########################################################################
+
+# two reliable signatures
+header __DOUBLE_IP_SPAM_1	Received =~ /from \[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\] by \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} with/
+header __DOUBLE_IP_SPAM_2	Received =~ /from\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+by\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3};/
+# loose match
+header __DOUBLE_IP_LOOSE	Received =~ /(?:\b(?:from|by)\b.{1,4}\b\d{1,3}[._-]\d{1,3}[._-]\d{1,3}[._-]\d{1,3}(?<!127\.0\.0\.1)\b.{0,4}){2}/i
+# spam signature
+meta RCVD_DOUBLE_IP_SPAM	(__DOUBLE_IP_SPAM_1 || __DOUBLE_IP_SPAM_2)
+describe RCVD_DOUBLE_IP_SPAM	Bulk email fingerprint (double IP) found
+# other matches
+meta RCVD_DOUBLE_IP_LOOSE	(__DOUBLE_IP_LOOSE && !RCVD_DOUBLE_IP_SPAM)
+describe RCVD_DOUBLE_IP_LOOSE   Received: by and from look like IP addresses
+
+header FORGED_TELESP_RCVD	Received =~ /\.(?!br).. \(\d+-\d+-\d+-\d+\.dsl\.telesp\.net\.br /
+describe FORGED_TELESP_RCVD	Contains forged hostname for a DSL IP in Brazil
+
+# forgery meta-rules: more reliable than their inputs
+meta CONFIRMED_FORGED		(__FORGED_RCVD_TRAIL && (__FORGED_AOL_RCVD || __FORGED_HOTMAIL_RCVD || __FORGED_EUDORAMAIL_RCVD || FORGED_YAHOO_RCVD || __FORGED_JUNO_RCVD || FORGED_GW05_RCVD))
+describe CONFIRMED_FORGED	Received headers are forged
+
+meta MULTI_FORGED		((__FORGED_AOL_RCVD + __FORGED_HOTMAIL_RCVD + __FORGED_EUDORAMAIL_RCVD + FORGED_YAHOO_RCVD + __FORGED_JUNO_RCVD + FORGED_GW05_RCVD) > 1)
+describe MULTI_FORGED		Received headers indicate multiple forgeries
+
+header NONEXISTENT_CHARSET	Content-Type =~ /charset=.?DEFAULT/
+describe NONEXISTENT_CHARSET	Character set doesn't exist
+
+
+
+header MISSING_DATE             Date =~ /^UNSET$/ [if-unset: UNSET]
+describe MISSING_DATE           Missing Date: header
+
+header __HAS_SUBJECT		exists:Subject
+meta MISSING_SUBJECT		!__HAS_SUBJECT
+describe MISSING_SUBJECT	Missing Subject: header
+
+header GAPPY_SUBJECT		Subject =~ /\b(?:[a-z]([-_. =~\/:,*!\@\#\$\%\^&+;\"\'<>\\])\1{0,2}){4}/i
+describe GAPPY_SUBJECT		Subject: contains G.a.p.p.y-T.e.x.t
+
+### header existence tests (description is added automatically)
+
+# X-Fix example: NTMail fixed non RFC822 compliant EMail message
+#
+# X-PMFLAGS is all caps
+#
+# Headers that seem to only be used by a single spamming software and
+# are found together in the same message:
+# 1. X-MailingID and X-ServerHost
+# 2. X-Stormpost-To and X-List-Unsubscribe
+#
+# not spammish: X-EM-Registration, X-EM-Version, X-Antiabuse, X-List-Host,
+# X-Message-Id
+# bad FP rate: Comment, Date-warning
+
+header PREVENT_NONDELIVERY	exists:Prevent-NonDelivery-Report
+describe PREVENT_NONDELIVERY	Message has Prevent-NonDelivery-Report header
+
+header X_IP			exists:X-IP
+describe X_IP			Message has X-IP header
+
+header   __HAS_MIMEOLE          exists:X-MimeOLE
+header   __HAS_MSMAIL_PRI       exists:X-MSMail-Priority
+header   __HAS_SQUIRRELMAIL_IN_MAILER	X-Mailer =~ /SquirrelMail\b/
+meta     MISSING_MIMEOLE	(__HAS_MSMAIL_PRI && !__HAS_MIMEOLE && !__HAS_SQUIRRELMAIL_IN_MAILER)
+describe MISSING_MIMEOLE	Message has X-MSMail-Priority, but no X-MimeOLE
+
+header __HAS_X_MAILER		exists:X-Mailer
+
+header __IS_EXCH		X-MimeOLE =~ /Produced By Microsoft Exchange V/
+
+header SUBJ_AS_SEEN		Subject =~ /\bAs Seen/i
+describe SUBJ_AS_SEEN		Subject contains "As Seen"
+
+header SUBJ_DOLLARS             Subject =~ /^\$[0-9.,]+\b/
+describe SUBJ_DOLLARS           Subject starts with dollar amount
+
+
+
+
+
+
+
+header SUBJ_YOUR_DEBT		Subject =~ /Your (?:Bills|Debt|Credit)/i
+describe SUBJ_YOUR_DEBT		Subject contains "Your Bills" or similar
+
+header SUBJ_YOUR_FAMILY		Subject =~ /Your Family/i
+describe SUBJ_YOUR_FAMILY	Subject contains "Your Family"
+
+
+# the real services never HELO as 'foo.com', instead 'mail.foo.com' or
+# something like that.  Note: be careful when expanding this... legit dotcom
+# HELOers include: hotmail.com, drizzle.com, lockergnome.com.
+header RCVD_FAKE_HELO_DOTCOM    Received =~ /^from (?:msn|yahoo|yourwebsite|lycos|excite|cs|aol|localhost|koreanmail|allexecs|mydomain|juno|eudoramail|compuserve|desertmail|excite|caramail)\.com \(/m
+describe RCVD_FAKE_HELO_DOTCOM  Received contains a faked HELO hostname
+
+header SUBJECT_DIET		Subject =~ /\bLose .*(?:pounds|lbs|weight)/i
+describe SUBJECT_DIET		Subject talks about losing pounds
+
+header EXTRA_MPART_TYPE         Content-Type =~ /(?:\s*multipart\/)?.* type=/i
+describe EXTRA_MPART_TYPE       Header has extraneous Content-type:...type= entry
+
+header TO_RECIP_MARKER          To =~ /\#recipient\#/
+describe TO_RECIP_MARKER        To header contains 'recipient' marker
+
+# MIME boundary tests; spam tools use distinctive patterns.
+header MIME_BOUND_DD_DIGITS	Content-Type =~ /boundary=\"--\d+\"/
+describe MIME_BOUND_DD_DIGITS	Spam tool pattern in MIME boundary
+header MIME_BOUND_DIGITS_7	Content-Type =~ /boundary=\d{9}\.\d{13}/
+describe MIME_BOUND_DIGITS_7	Spam tool pattern in MIME boundary
+header MIME_BOUND_DIGITS_15	Content-Type =~ /boundary=\"\d{15,}\"/
+describe MIME_BOUND_DIGITS_15	Spam tool pattern in MIME boundary
+header MIME_BOUND_MANY_HEX	Content-Type =~ /boundary="[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12}"/
+describe MIME_BOUND_MANY_HEX	Spam tool pattern in MIME boundary
+header __NEXTPART_ALL		Content-Type =~ /NextPart/
+header __NEXTPART_NORMAL	Content-Type =~ /="(?:----_?=_)?NextPart_[\dA-F]{3}(_[\dA-F]{3,8})?_[\dA-F]{8}\.[\dA-F]{8}"/
+meta MIME_BOUND_NEXTPART	(__NEXTPART_ALL && !__NEXTPART_NORMAL)
+describe MIME_BOUND_NEXTPART	Spam tool pattern in MIME boundary
+
+# note: the first alternation is anchored for speed
+header TO_MALFORMED             To !~ /(?:^|[^\S"])(?:(?:\"[^\"]+\"|\S+)\@\S+\.\S+|^\s*.+:\s*;|^\s*\"[^\"]+\":\s*;|^\s*\([^\)]*\)\s*$|<\S+(?:\!\S+){1,}>|^\s*$)/ [if-unset: unset@unset.unset]
+describe TO_MALFORMED           To: has a malformed address
+
+header __CD                     exists:Content-Disposition
+header __CT                     exists:Content-Type
+header __CTE                    exists:Content-Transfer-Encoding
+header __MIME_VERSION           exists:MIME-Version
+header __CT_TEXT_PLAIN          Content-Type =~ /^text\/plain\b/i
+meta MIME_HEADER_CTYPE_ONLY     (!__CD && !__CTE && __CT && !__MIME_VERSION && !__CT_TEXT_PLAIN)
+describe MIME_HEADER_CTYPE_ONLY 'Content-Type' found without required MIME headers
+
+header WITH_LC_SMTP		Received =~ /\swith\ssmtp;\s/
+describe WITH_LC_SMTP		Received line contains spam-sign (lowercase smtp)
+
+
+header SUBJ_BUY                 Subject =~ /^buy/i
+describe SUBJ_BUY               Subject line starts with Buy or Buying
+
+# seems to be ratware
+header RCVD_AM_PM		Received =~ /; [A-Z][a-z][a-z], \d{1,2} \d{4} \d{1,2}:\d\d:\d\d [AP]M [+-]\d{4}/
+describe RCVD_AM_PM		Received headers forged (AM/PM)
+
+header __USER_AGENT_MSN             X-Mailer =~ /^MSN Explorer /
+
+# host no longer exists according to administrator
+header FAKE_OUTBLAZE_RCVD	Received =~ /\.mr\.outblaze\.com/
+describe FAKE_OUTBLAZE_RCVD	Received header contains faked 'mr.outblaze.com'
+
+header SUBJ_2_NUM_PARENS        Subject =~ /^\(\d+\).*\(\d+\)\s*$/
+describe SUBJ_2_NUM_PARENS      Subject contains common spam sign (2 numbers)
+
+# thanks to David Ritz for passing this on; ready for post-3.0.0
+header UNCLOSED_BRACKET		ALL =~ /\[\d+\r?\n/s
+describe UNCLOSED_BRACKET	Headers contain an unclosed bracket
+
+header FROM_DOMAIN_NOVOWEL	From =~ /\@\S*[bcdfghjklmnpqrstvwxz]{7}/i
+describe FROM_DOMAIN_NOVOWEL	From: domain has series of non-vowel letters
+
+header FROM_LOCAL_NOVOWEL	From =~ /[bcdfghjklmnpqrstvwxz]{7}\S*\@/i
+describe FROM_LOCAL_NOVOWEL	From: localpart has series of non-vowel letters
+
+header FROM_LOCAL_HEX		From =~ /[0-9a-f]{11}\S*\@/i
+describe FROM_LOCAL_HEX		From: localpart has long hexadecimal sequence
+
+header FROM_LOCAL_DIGITS	From =~ /\d{11}\S*\@/i
+describe FROM_LOCAL_DIGITS	From: localpart has long digit sequence
+
+header __TOCC_EXISTS		exists:ToCc
+
+header X_PRIORITY_CC		ALL =~ /\nX-Priority:[^\n]{0,80}\nCc:/si
+describe X_PRIORITY_CC		Cc: after X-Priority: (bulk email fingerprint)
+
+# catch non-RFC2047 compliant messages
+# Apple Mail has a bug where headers will have whitespace around the encoded
+# text, so try to ignore that
+header BAD_ENC_HEADER		ALL =~ /=\?[^?\s]+\?[^?\s]\?\s*[^?]+\s(?!\?=)/
+describe BAD_ENC_HEADER		Message has bad MIME encoding in the header
+
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::HeaderEval
+
+header __FORGED_AOL_RCVD	        eval:check_for_fake_aol_relay_in_rcvd()
+
+header CHARSET_FARAWAY_HEADER	eval:check_for_faraway_charset_in_headers()
+describe CHARSET_FARAWAY_HEADER	A foreign language charset used in headers
+tflags CHARSET_FARAWAY_HEADER	userconf
+
+    ###################################################################
+
+# illegal characters that should be MIME encoded
+# might want to exempt users using languages that don't use Latin
+# alphabets, but do it in the eval
+
+header SUBJ_ILLEGAL_CHARS	eval:check_illegal_chars('Subject','0.00','2')
+describe SUBJ_ILLEGAL_CHARS	Subject: has too many raw illegal characters
+
+header FROM_ILLEGAL_CHARS	eval:check_illegal_chars('From','0.20','2')
+describe FROM_ILLEGAL_CHARS	From: has too many raw illegal characters
+
+header HEAD_ILLEGAL_CHARS	eval:check_illegal_chars('ALL','0.010','2')
+describe HEAD_ILLEGAL_CHARS	Headers have too many raw illegal characters
+
+    ###################################################################
+
+# a forged Hotmail message; host HELO'd as hotmail.com, but it wasn't
+header __FORGED_HOTMAIL_RCVD	eval:check_for_forged_hotmail_received_headers()
+
+# this, by comparison is more common: from was @hotmail.com, but it wasn't
+header FORGED_HOTMAIL_RCVD2	eval:check_for_no_hotmail_received_headers()
+describe FORGED_HOTMAIL_RCVD2 hotmail.com 'From' address, but no 'Received:'
+
+header __FORGED_EUDORAMAIL_RCVD	eval:check_for_forged_eudoramail_received_headers()
+
+header FORGED_YAHOO_RCVD	eval:check_for_forged_yahoo_received_headers()
+describe FORGED_YAHOO_RCVD	'From' yahoo.com does not match 'Received' headers
+
+header __FORGED_JUNO_RCVD		eval:check_for_forged_juno_received_headers()
+
+header FORGED_GW05_RCVD		eval:check_for_forged_gw05_received_headers()
+describe FORGED_GW05_RCVD	Forged 'by gw05' 'Received:' header found
+
+
+header SORTED_RECIPS		eval:sorted_recipients()
+describe SORTED_RECIPS		Recipient list is sorted by address
+
+header SUSPICIOUS_RECIPS	eval:similar_recipients('0.65','undef')
+describe SUSPICIOUS_RECIPS	Similar addresses in recipient list
+
+# this is a quite common false positive, as it's legal to remove a To but leave
+# a CC. so don't score it high.
+header MISSING_HEADERS		eval:check_for_missing_to_header()
+describe MISSING_HEADERS	Missing To: header
+
+# this variant is local, using the Received hdr itself...
+header ROUND_THE_WORLD_LOCAL	eval:check_for_round_the_world_received_helo()
+describe ROUND_THE_WORLD_LOCAL	Received: says mail sent around the world (HELO)
+
+header DATE_IN_PAST_03_06	eval:check_for_shifted_date('-6', '-3')
+describe DATE_IN_PAST_03_06	Date: is 3 to 6 hours before Received: date
+
+header DATE_IN_PAST_06_12	eval:check_for_shifted_date('-12', '-6')
+describe DATE_IN_PAST_06_12	Date: is 6 to 12 hours before Received: date
+
+header DATE_IN_PAST_12_24	eval:check_for_shifted_date('-24', '-12')
+describe DATE_IN_PAST_12_24	Date: is 12 to 24 hours before Received: date
+
+header DATE_IN_PAST_24_48	eval:check_for_shifted_date('-48', '-24')
+describe DATE_IN_PAST_24_48	Date: is 24 to 48 hours before Received: date
+
+
+header DATE_IN_PAST_96_XX	eval:check_for_shifted_date('undef', '-96')
+describe DATE_IN_PAST_96_XX	Date: is 96 hours or more before Received: date
+
+header DATE_IN_FUTURE_03_06	eval:check_for_shifted_date('3', '6')
+describe DATE_IN_FUTURE_03_06	Date: is 3 to 6 hours after Received: date
+
+header DATE_IN_FUTURE_06_12	eval:check_for_shifted_date('6', '12')
+describe DATE_IN_FUTURE_06_12	Date: is 6 to 12 hours after Received: date
+
+header DATE_IN_FUTURE_12_24	eval:check_for_shifted_date('12', '24')
+describe DATE_IN_FUTURE_12_24	Date: is 12 to 24 hours after Received: date
+
+header DATE_IN_FUTURE_24_48	eval:check_for_shifted_date('24', '48')
+describe DATE_IN_FUTURE_24_48	Date: is 24 to 48 hours after Received: date
+
+header DATE_IN_FUTURE_48_96	eval:check_for_shifted_date('48', '96')
+describe DATE_IN_FUTURE_48_96	Date: is 48 to 96 hours after Received: date
+
+header DATE_IN_FUTURE_96_XX	eval:check_for_shifted_date('96', 'undef')
+describe DATE_IN_FUTURE_96_XX	Date: is 96 hours or more after Received: date
+
+header UNRESOLVED_TEMPLATE	eval:check_unresolved_template()
+describe UNRESOLVED_TEMPLATE	Headers contain an unresolved template
+
+header SUBJ_ALL_CAPS		eval:subject_is_all_caps()
+describe SUBJ_ALL_CAPS		Subject is all capitals
+
+
+header LOCALPART_IN_SUBJECT	eval:check_for_to_in_subject('user')
+describe LOCALPART_IN_SUBJECT	Local part of To: address appears in Subject
+
+header MSGID_OUTLOOK_INVALID	eval:check_outlook_message_id()
+describe MSGID_OUTLOOK_INVALID	Message-Id is fake (in Outlook Express format)
+
+header HEADER_COUNT_CTYPE	eval:check_header_count_range('Content-Type','2','999')
+describe HEADER_COUNT_CTYPE	Multiple Content-Type headers found
+
+endif
+
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::MIMEEval
+
+header MISSING_HB_SEP		eval:check_msg_parse_flags('missing_head_body_separator')
+describe MISSING_HB_SEP		Missing blank line between message header and body
+tflags MISSING_HB_SEP		userconf
+
+endif
+
+###########################################################################
+
+ifplugin Mail::SpamAssassin::Plugin::RelayEval
+
+header UNPARSEABLE_RELAY        eval:check_relays_unparseable()
+tflags UNPARSEABLE_RELAY        userconf
+describe UNPARSEABLE_RELAY      Informational: message has unparseable relay lines
+
+
+
+header RCVD_HELO_IP_MISMATCH	eval:helo_ip_mismatch()
+describe RCVD_HELO_IP_MISMATCH	Received: HELO and IP do not match, but should
+
+header RCVD_NUMERIC_HELO	eval:check_for_numeric_helo()
+describe RCVD_NUMERIC_HELO	Received: contains an IP address used for HELO
+
+header RCVD_ILLEGAL_IP		eval:check_for_illegal_ip()
+describe RCVD_ILLEGAL_IP	Received: contains illegal IP address
+
+# not used directly right now due to FPs; but CONFIRMED_FORGED turns it
+# into a 1.0 S/O rule anyway, so that's not a problem ;)
+# 2.626   3.6340   1.5251    0.704   0.34    1.44  FORGED_RCVD_TRAIL
+# 0.956   3.3890   0.0000    1.000   0.98    4.30  CONFIRMED_FORGED
+header __FORGED_RCVD_TRAIL	eval:check_for_forged_received_trail()
+
+header NO_RDNS_DOTCOM_HELO	eval:check_for_no_rdns_dotcom_helo()
+describe NO_RDNS_DOTCOM_HELO	Host HELO'd as a big ISP, but had no rDNS
+
+endif
+
 ifplugin Mail::SpamAssassin::Plugin::HeaderEval
 
 header __ENV_AND_HDR_FROM_MATCH	eval:check_for_matching_env_and_hdr_from()
 
 endif
+