You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/10/25 18:15:35 UTC

svn commit: r467701 [1/2] - in /spamassassin/branches/jm_re2c_hacks: ./ build/ build/automc/ lib/ lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Bayes/ lib/Mail/SpamAssassin/BayesStore/ lib/Mail/SpamAssassin/Conf/ lib/Mail/SpamAssassin/Message/ lib/Mail/...

Author: jm
Date: Wed Oct 25 09:15:31 2006
New Revision: 467701

URL: http://svn.apache.org/viewvc?view=rev&rev=467701
Log:
merged up to r467692 on svn trunk using 'svn merge -r453533:467692 https://svn.apache.org/repos/asf/spamassassin/trunk'

Added:
    spamassassin/branches/jm_re2c_hacks/rules/20_advance_fee.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/20_advance_fee.cf
    spamassassin/branches/jm_re2c_hacks/rules/20_body_tests.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/20_body_tests.cf
    spamassassin/branches/jm_re2c_hacks/rules/20_compensate.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/20_compensate.cf
    spamassassin/branches/jm_re2c_hacks/rules/20_drugs.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/20_drugs.cf
    spamassassin/branches/jm_re2c_hacks/rules/20_fake_helo_tests.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/20_fake_helo_tests.cf
    spamassassin/branches/jm_re2c_hacks/rules/20_meta_tests.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/20_meta_tests.cf
    spamassassin/branches/jm_re2c_hacks/rules/20_phrases.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/20_phrases.cf
    spamassassin/branches/jm_re2c_hacks/rules/20_porn.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/20_porn.cf
    spamassassin/branches/jm_re2c_hacks/rules/20_ratware.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/20_ratware.cf
    spamassassin/branches/jm_re2c_hacks/rules/20_uri_tests.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/20_uri_tests.cf
    spamassassin/branches/jm_re2c_hacks/rules/25_body_tests_pl.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/25_body_tests_pl.cf
    spamassassin/branches/jm_re2c_hacks/rules/30_text_de.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/30_text_de.cf
    spamassassin/branches/jm_re2c_hacks/rules/30_text_fr.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/30_text_fr.cf
    spamassassin/branches/jm_re2c_hacks/rules/30_text_it.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/30_text_it.cf
    spamassassin/branches/jm_re2c_hacks/rules/30_text_nl.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/30_text_nl.cf
    spamassassin/branches/jm_re2c_hacks/rules/30_text_pl.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/30_text_pl.cf
    spamassassin/branches/jm_re2c_hacks/rules/30_text_pt_br.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/30_text_pt_br.cf
    spamassassin/branches/jm_re2c_hacks/rules/50_scores.cf
      - copied unchanged from r467692, spamassassin/trunk/rules/50_scores.cf
    spamassassin/branches/jm_re2c_hacks/t/config_text.t
      - copied unchanged from r467692, spamassassin/trunk/t/config_text.t
    spamassassin/branches/jm_re2c_hacks/t/dnsbl_sc_meta.t
      - copied unchanged from r467692, spamassassin/trunk/t/dnsbl_sc_meta.t
Removed:
    spamassassin/branches/jm_re2c_hacks/BUGS
    spamassassin/branches/jm_re2c_hacks/STATUS
Modified:
    spamassassin/branches/jm_re2c_hacks/MANIFEST
    spamassassin/branches/jm_re2c_hacks/Makefile.PL
    spamassassin/branches/jm_re2c_hacks/build/README
    spamassassin/branches/jm_re2c_hacks/build/automc/run_preflight
    spamassassin/branches/jm_re2c_hacks/build/mkrules
    spamassassin/branches/jm_re2c_hacks/build/update_website_docs
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Bayes/CombineChi.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/BayesStore/DBM.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Conf/Parser.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Dns.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message/Node.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PerMsgStatus.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyEval.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RelayEval.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/URIEval.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm
    spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm
    spamassassin/branches/jm_re2c_hacks/lib/spamassassin-run.pod
    spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/gen_info_xml
    spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi
    spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-hourly
    spamassassin/branches/jm_re2c_hacks/rules/20_dnsbl_tests.cf
    spamassassin/branches/jm_re2c_hacks/rules/20_head_tests.cf
    spamassassin/branches/jm_re2c_hacks/rules/20_html_tests.cf
    spamassassin/branches/jm_re2c_hacks/rules/25_replace.cf
    spamassassin/branches/jm_re2c_hacks/rules/active.list
    spamassassin/branches/jm_re2c_hacks/rules/regression_tests.cf
    spamassassin/branches/jm_re2c_hacks/rules/rule2xs.pre
    spamassassin/branches/jm_re2c_hacks/sa-learn.raw
    spamassassin/branches/jm_re2c_hacks/sa-update.raw
    spamassassin/branches/jm_re2c_hacks/spamassassin.raw
    spamassassin/branches/jm_re2c_hacks/spamc/configure
    spamassassin/branches/jm_re2c_hacks/spamc/configure.in
    spamassassin/branches/jm_re2c_hacks/spamd/spamd.raw
    spamassassin/branches/jm_re2c_hacks/t/bayesdbm.t
    spamassassin/branches/jm_re2c_hacks/t/mimeheader.t
    spamassassin/branches/jm_re2c_hacks/t/missing_hb_separator.t
    spamassassin/branches/jm_re2c_hacks/t/mkrules.t

Modified: spamassassin/branches/jm_re2c_hacks/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/MANIFEST?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/MANIFEST (original)
+++ spamassassin/branches/jm_re2c_hacks/MANIFEST Wed Oct 25 09:15:31 2006
@@ -1,4 +1,3 @@
-BUGS
 CREDITS
 Changes
 INSTALL
@@ -10,7 +9,6 @@
 NOTICE
 PACKAGING
 README
-STATUS
 TRADEMARK
 UPGRADE
 USAGE
@@ -269,6 +267,7 @@
 t/cidrs.t
 t/config.dist
 t/config_errs.t
+t/config_text.t
 t/cpp_comments_in_spamc.t
 t/data/01_test_rules.cf
 t/data/etc/hello.txt
@@ -368,6 +367,7 @@
 t/debug.t
 t/desc_wrap.t
 t/dnsbl.t
+t/dnsbl_sc_meta.t
 t/get_headers.t
 t/gtube.t
 t/hashcash.t
@@ -481,6 +481,24 @@
 rules/25_spf.cf
 rules/25_textcat.cf
 rules/25_uribl.cf
+rules/20_advance_fee.cf
+rules/20_body_tests.cf
+rules/20_compensate.cf
+rules/20_drugs.cf
+rules/20_fake_helo_tests.cf
+rules/20_meta_tests.cf
+rules/20_phrases.cf
+rules/20_porn.cf
+rules/20_ratware.cf
+rules/20_uri_tests.cf
+rules/25_body_tests_pl.cf
+rules/30_text_de.cf
+rules/30_text_fr.cf
+rules/30_text_it.cf
+rules/30_text_nl.cf
+rules/30_text_pl.cf
+rules/30_text_pt_br.cf
+rules/50_scores.cf
 rules/60_awl.cf
 rules/60_shortcircuit.cf
 rules/60_whitelist.cf
@@ -488,7 +506,6 @@
 rules/60_whitelist_dkim.cf
 rules/60_whitelist_spf.cf
 rules/60_whitelist_subject.cf
-rules/70_sandbox.cf
 rules/72_active.cf
 rules/STATISTICS-set0.txt
 rules/STATISTICS-set1.txt
@@ -504,7 +521,6 @@
 rules/v312.pre
 rules/v320.pre
 rules/active.list
-rules/70_inactive.cf
 t/mkrules.t
 t/trust_path.t
 t/data/nice/dkim/AddedVtag_07

Modified: spamassassin/branches/jm_re2c_hacks/Makefile.PL
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/Makefile.PL?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/Makefile.PL (original)
+++ spamassassin/branches/jm_re2c_hacks/Makefile.PL Wed Oct 25 09:15:31 2006
@@ -245,34 +245,14 @@
 
         'rules/*.pm',
 
-        # at one stage, these were copied from rulesrc into the rules
-        # build dir, or were in SVN in this dir, but this no longer
-        # is the case.  Ensure they get cleaned on "make clean".
-        'rules/20_advance_fee.cf',
-        'rules/20_compensate.cf',
-        'rules/20_drugs.cf',
-        'rules/20_fake_helo_tests.cf',
-        'rules/20_phrases.cf',
-        'rules/20_porn.cf',
-        'rules/20_ratware.cf',
-        'rules/20_uri_tests.cf',
-        'rules/25_body_tests_es.cf',
-        'rules/25_body_tests_pl.cf',
-        'rules/30_text_de.cf',
-        'rules/30_text_fr.cf',
-        'rules/30_text_it.cf',
-        'rules/30_text_nl.cf',
-        'rules/30_text_pl.cf',
-        'rules/30_text_pt_br.cf',
-        'rules/50_scores.cf',
-        'rules/70_broken_rules.cf',
-
         # don't remove these. they are built from 'rulesrc' in SVN, but
         # in a distribution tarball, they're not
-        # 'rules/70_inactive.cf',
         # 'rules/70_sandbox.cf',
         # 'rules/72_active.cf',
 
+        # this file is no longer built, or used
+        'rules/70_inactive.cf',
+
       )
 
     },
@@ -1141,7 +1121,8 @@
 	$(PREPROCESS) $(FIXBYTES) $(FIXVARS) $(FIXBANG) -m$(PERM_RWX) -i$? -o$@
 
 build_rules: 
-	$(PERL) build/mkrules --src rulesrc --out rules --manifest MANIFEST --manifestskip MANIFEST.SKIP
+	-rm rules/70_inactive.cf
+	[ ! -d rulesrc ] || $(PERL) build/mkrules --src rulesrc --out rules --manifest MANIFEST --manifestskip MANIFEST.SKIP
 
 SPAMC_MAKEFILE  = spamc/Makefile
 MAKE_SPAMC      = $(MAKE) -f $(SPAMC_MAKEFILE)

Modified: spamassassin/branches/jm_re2c_hacks/build/README
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/build/README?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/build/README (original)
+++ spamassassin/branches/jm_re2c_hacks/build/README Wed Oct 25 09:15:31 2006
@@ -1,4 +1,3 @@
-
 SPAMASSASSIN DEVELOPMENT SNAPSHOT PROCEDURE
 ===========================================
 
@@ -40,6 +39,14 @@
 
     PATH=$HOME/sabuildtools/perl584/bin:$HOME/sabuildtools/bin:$PATH
 
+- Run "svn status" to ensure you're building/releasing from a pristine
+  checkout:
+
+    svn status
+
+  (It should return no text; any files marked "M" have been locally
+  modified, and should be "svn revert"ed before you continue.)
+
 - edit lib/Mail/SpamAssassin.pm and comment the $IS_DEVEL_BUILD
   line.   Ensure the correct version number is present in $VERSION
   and $EXTRA_VERSION.
@@ -86,15 +93,15 @@
 
 - Check in the updated Changes file.
 
-    svn commit -m "preparing to release X.Y.Z" Changes
+    svn commit -m "preparing to release X.Y.Z"
 
 - SVN tag the release files.  This is done using "svn copy".
   For a maintainance release (x.y.1, x.y.2):
 
     repo=https://svn.apache.org/repos/asf/spamassassin
-    svn copy -m "creating tag for release 3.0.1" \
-	$repo/branches/3.0 \
-	$repo/tags/spamassassin_release_3_0_1
+    svn copy -m "creating tag for release 3.1.7" \
+	$repo/branches/3.1 \
+	$repo/tags/spamassassin_release_3_1_7
 
   For a trunk release (x.y.0):
 
@@ -114,6 +121,10 @@
   a branch really) of whatever the latest branch revision to be the new
   base of the tag release.
 
+  Note that if you have to re-run this command later to fix a broken
+  release in progress, you need to "svn delete" the previous copy of the
+  tag, otherwise it'll create a subdirectory instead of overwriting.
+
 - run "make distcheck" to ensure all files are included in the
   distribution that should be, and to ensure all files that are listed
   in the MANIFEST also exist in SVN.
@@ -136,25 +147,43 @@
 
 - test the tar.gz and zip files!  redo until they work!! ;)
 
-- Propose a release, and post the URL and md5sums/sha1sums to the
-  dev list.  Once you've got 2 committer +1's, in addition to your own,
-  carry on:
+- Write the release announcement mail!  This is a simple matter of copying
+  the previous release's announcement, updating the version numbers and
+  links, fixing the MD5 and SHA1 checksums in this mail, and summarising
+  the important changes from the Changes file.
+
+    cp build/announcements/3.1.7.txt build/announcements/3.1.8.txt
+    svn add !$
+    vi !$
+
+- (for any rc, prerelease, or full release) Place the tarballs in a
+  discreet location (discreet means not linked from downloads, but
+  included in the vote mail) and request a vote on the development mailing
+  list to make the release.  Post the URL, md5sums/sha1sums, and proposed
+  release announcement mail.  
+  
+  Three +1 votes are required to make the release official.  The release
+  manager (that's you) may vote as well.  Once there are three or more +1
+  votes, you may proceed.
+
+- (for a full release) You need 3 +1's from PMC members.
 
 - SVN commit the release files, including 'Changes':
 
 	svn commit -m "X.Y.Z RELEASED"
 
-- Now, start the new development codebase.  For minor updates of a 2.x
-  tree (e.g. 2.x1, 2.x2), you don't need to branch; for major updates
-  (2.x0) you should use a new development branch, off the trunk.
+- [X.Y.0 RELEASES ONLY]: Now, start the new development codebase.  For
+  minor updates of a 2.x tree (e.g. 2.x.1, 2.x.2), you don't need to
+  branch; for major updates (2.x.0) you need to create a new development
+  branch, off the trunk.
 
     repo=https://svn.apache.org/repos/asf/spamassassin
-    svn copy $repo/tags/spamassassin_release_3_0_0 \
-           $repo/branches/b3_0_0
+    svn copy $repo/tags/spamassassin_release_3_1_0 \
+           $repo/branches/b3_1_0
 
   "trunk" is SVN's concept of head.  Typically, our branches are named
-  for their minor version number.  In the example above, b3_0_0 is the
-  branch for the stable 3.0.x releases.
+  for their minor version number.  In the example above, b3_1_0 is the
+  branch for the stable 3.1.x releases.
 
 - In the new development codeline, edit lib/Mail/SpamAssassin.pm, bump the
   $VERSION line to the correct version, and uncomment the $IS_DEVEL_BUILD
@@ -169,15 +198,6 @@
 
 	(where X.Y.N is the new version number)
 
-- (for any rc, prerelease, or full release) Place the tarballs in a
-  discreet location (discreet means not linked from downloads, but
-  included in the vote mail) and request a vote on the development
-  mailing list to make the release, three +1 votes are required to make
-  the release official.  The release manager (that's you) may vote as
-  well.  Once there are three or more +1 votes, you may proceed.
-
-- (for a full release) You need 3 +1's from PMC members.
-
 - !WARNING! After the next step, the version number will be considered
   "burned". The number is locked for this particular code.  The same
   number cannot be used for a future different release.  So make sure
@@ -240,44 +260,66 @@
         cd /www/spamassassin.apache.org
         webmake -F
 
-- update the 'dist' tree in the SpamAssassin website:
+- update the 'doc' tree in the SpamAssassin website:
 
         cd /www/spamassassin.apache.org
         svn up
         svn delete --force full/3.1.x
-        svn commit -m "removing old dist tree"
+        svn commit -m "removing old doc tree from website" full
 
         cd [checkedoutdir]
         build/update_website_docs
 
         cd /www/spamassassin.apache.org
         svn add full/3.1.x
-        svn commit -m "updating new dist tree" full
-        
+        svn commit -m "updating new doc tree on website" full
+
 - and check the entire website into SVN (see bug 4998 for reasons why).
   Note that you may need to resolve conflicts and ensure the contents of
   'full/3.1.x' are all added successfully:
 
-        svn commit -m "added new release"
+        svn up
+        svn commit -m "added new release to website"
+
+- update the tag used to point to "current release":
+
+    repo=https://svn.apache.org/repos/asf/spamassassin
+    svn delete -m "updating for new release" \
+	$repo/tags/spamassassin_current_release_3.1.x
+    svn copy -m "updating for new release" \
+	$repo/tags/spamassassin_release_3_1_7 \
+	$repo/tags/spamassassin_current_release_3.1.x
+
+- upload release tarball to CPAN at http://pause.cpan.org/:
+
+        https://pause.perl.org/pause/authenquery?ACTION=add_uri
+
+  (Note that recently, PAUSE has started indexing sub-modules under
+  Mail::SpamAssassin::, and it can't deal with our multi-maintainer
+  setup.  You may receive mail indicating that "indexing failed" after
+  the upload; as long as the main Mail::SpamAssassin module was indexed
+  correctly, this is fine.  However, it would help if you could visit
 
-- upload to CPAN at http://pause.cpan.org/
+        https://pause.perl.org/pause/authenquery?ACTION=share_perms
 
-  ( https://pause.perl.org/pause/authenquery?ACTION=add_uri )
+  select 3.1 ("Make someone else co-maintainer"), and ensure that the
+  other releasers (JMASON, DOS, FELICITY, others?) all have permissions as
+  'co-maintainer' on the full set of your listed modules in the
+  "Mail::SpamAssassin::" namespace.
 
-- Before doing the next step, run through the Changes file, and write up a
-  quick summary of the important changes in human-readable format.  This
-  should be less than 600 chars to fit into Freshmeat's format, and
-  to be easily understandable.
+- announce on the users, dev, and announce mailing lists using the
+  previously-prepared release announcement.
 
-- announce to Freshmeat at http://freshmeat.net/
+- Before doing the next step, run through the release summary mail, and
+  write up a really short summary of the important changes in
+  human-readable format.  This should be less than 600 chars to fit into
+  Freshmeat's format, and to be easily understandable.
 
-  ( http://freshmeat.net/add-release/14876/ may work )
+- announce to Freshmeat at http://freshmeat.net/:
 
-- announce on SpamAssassin-Users, SpamAssassin-Dev, and
-  SpamAssassin-Announce.  Be sure to include the MD5 checksums in this
-  mail, so paranoid folks can check the tarball's integrity.
+        http://freshmeat.net/add-release/14876/
 
-- Approve the posting to SpamAssassin-Announce (the list admins will get a
+- Approve the posting to the announce list (the list admins will get a
   mail indicating how to do this.)
 
 // vim:tw=74:

Modified: spamassassin/branches/jm_re2c_hacks/build/automc/run_preflight
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/build/automc/run_preflight?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/build/automc/run_preflight (original)
+++ spamassassin/branches/jm_re2c_hacks/build/automc/run_preflight Wed Oct 25 09:15:31 2006
@@ -90,16 +90,18 @@
 # produce lots of noisy output to stop the buildbot from timing out on
 # mass-checks of large corpora.
 # store AICache data in /tmpfs/aicache.
+# ignore mails older than 6 months (use the nightly runs for those corpora,
+# it's too slow to mass-check them here).
 
 run "/local/bbmasstools/masschroot $perl ".
     "mass-check -c=tstrules --cache -j=1 ".
     "--noisy --deencap='petuniapress.com' ".
     "--cachedir=/tmpfs/aicache ".
+    "--after='6 months ago' ".
     $mass_check_args{$slavename}." ".
     "ham:detect:/home/bbmass/rawcor/*/ham/* ".
     "spam:detect:/home/bbmass/rawcor/*/spam/*";
 
-    # TODO: add --after="6 months ago"?
 
 exit;
 

Modified: spamassassin/branches/jm_re2c_hacks/build/mkrules
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/build/mkrules?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/build/mkrules (original)
+++ spamassassin/branches/jm_re2c_hacks/build/mkrules Wed Oct 25 09:15:31 2006
@@ -161,6 +161,7 @@
 my $ALWAYS_PUBLISH = '!always_publish!';
 
 read_all_rules($needs_compile);
+read_rules_from_output_dir();
 compile_output_files();
 lint_output_files();
 write_output_files();
@@ -391,10 +392,19 @@
       {
         $rules->{$name}->{found_definition} = 1;
       }
-      # userconf rules are always published in "active"
-      elsif (($type eq 'tflags') && ($val =~ /\buserconf\b/))
-      {
-        $rules->{$name}->{forceactive} = 1;
+      elsif ($type eq 'tflags') {
+        # userconf rules are always published in "active"
+        if ($val =~ /\buserconf\b/) {
+          $rules->{$name}->{forceactive} = 1;
+        }
+
+        # record for rulemetadata code
+        $val =~ s/\s+/ /gs;
+        if ($rules->{$name}->{tflags}) {
+          $rules->{$name}->{tflags} .= ' '.$val;
+        } else {
+          $rules->{$name}->{tflags} = $val;
+        }
       }
 
       $current_comments = '';
@@ -413,9 +423,13 @@
       my $val = $3;
 
       my $origname = $name;
-      if ($issandbox) {
-        $name = sandbox_rule_name_avoid_collisions($name, $f);
-      }
+
+      # note: if we call sandbox_rule_name_avoid_collisions(), it'll
+      # rename to 'T_RULENAME' -- which is exactly what we're trying
+      # to avoid in 'publish RULENAME' lines!  so don't call it here.
+      # if ($issandbox) {
+      # $name = sandbox_rule_name_avoid_collisions($name, $f);
+      # }
 
       if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
       $rules->{$name}->{origname} = $origname;
@@ -423,7 +437,7 @@
       if ($command eq 'publish') {
         # the 'publish' command defaults to "1", unless it explicitly
         # is set to "0".  iow: publish RULE_NAME [(0 | 1)]  [default: 1]
-        if (!defined $val) { $val = '1'; }
+        if (!defined $val || $val eq '') { $val = '1'; }
       }
       elsif ($command eq 'pubfile') {
         if (!filename_in_manifest($opt_out.'/'.$val)) {
@@ -433,6 +447,12 @@
       }
 
       $rules->{$name}->{$command} = $val;
+
+      # if we see "publish NAMEOFRULE", that means the rule is
+      # considered active
+      if ($rules->{$name}->{publish}) {
+        $rules->{$name}->{forceactive} = 1;
+      }
     }
     elsif (/^
         (if|ifplugin)
@@ -504,6 +524,16 @@
       /^\s*(\S+)/ and $name = $1;
       $name =~ s/\s+/ /gs;
 
+      my $forceactive = 1;
+      
+      # always send 'test' lines to the sandbox or inactive files
+      if (/^test\s*/) {
+        $forceactive = 0;
+
+        $name = $_;   # ensure we don't drag rules with us though!
+        $name =~ s/\s+/ /gs;
+      }
+
       my $cond;
       if ($current_conditional) {
         $name = $current_conditional; $name =~ s/\s+/ /gs;
@@ -519,7 +549,8 @@
       }
       $rules->{$name}->{cond} ||= $cond;
       $rules->{$name}->{issandbox} = $issandbox;
-      $rules->{$name}->{forceactive} = 1;
+      $rules->{$name}->{forceactive} = $forceactive;
+      # $rules->{$name}->{forceactive} = 1;
       $rules->{$name}->{iscommand} = 1;
 
       # warn "unknown line in rules file '$f', saving to default: $orig";
@@ -544,6 +575,54 @@
   }
 }
 
+# this is only run if we're generating rulemetadata!
+sub read_rules_from_output_dir {
+  return unless ($opt_rulemetadata);
+
+  foreach my $file (<$opt_out/*.cf>) {
+    next unless ($file =~ /\d\d_\S+\.cf$/);
+    next if ($file =~ /\/\Q$opt_activeout\E/);
+    next if ($file =~ /\/\Q$opt_sandboxout\E/);
+    next if ($file =~ /\/\Q$opt_inactiveout\E/);
+    read_output_file($file);
+  }
+}
+
+sub read_output_file {
+  my ($file) = @_;
+  open (IN, "<$file") or warn "cannot read $file";
+  while (<IN>) {
+    my $orig = $_;
+
+    s/^#reuse/reuse/;   # TODO - dirty hack.  we need to fix this to just be
+    # a keyword which the engine ignores, this is absurd! 
+
+    s/#.*$//g; s/^\s+//; s/\s+$//;
+
+    # drop comments/blank lines from output
+    next if (/^$/);
+
+    # save "lang" declarations
+    my $lang = '';
+    if (s/^lang\s+(\S+)\s+//) {
+      $lang = $1;
+    }
+
+    # right now, we only read tflags from output files
+    if (/^tflags\s+(\S+)\s+(.*)$/) {
+      my $name = $1;
+      my $val = $2;
+      $val =~ s/\s+/ /gs;
+      if ($rules->{$name}->{tflags}) {
+        $rules->{$name}->{tflags} .= ' '.$val;
+      } else {
+        $rules->{$name}->{tflags} = $val;
+      }
+    }
+  }
+  close IN;
+}
+
 sub copy_to_output_buffers {
   my ($rule_order, $issandbox, $f, $filename) = @_;
 
@@ -577,9 +656,6 @@
       $is_active++;
     }
 
-    # fix up any rule renamings we were supposed to do
-    sed_renamed_rule_names(\$text);
-
     my $cond = $rules->{$name}->{cond};
     my $pluginclass = $rules->{$name}->{ifplugin};
     if ($cond)
@@ -632,6 +708,9 @@
     $pubfile = $rules->{$name}->{pubfile};
     if ($pubfile) {
       $pubfile = $opt_out.'/'.$pubfile;
+    } else {
+      # "publish NAMEOFRULE" => send it to active
+      $pubfile = $opt_out.'/'.$opt_activeout;
     }
   }
 
@@ -769,19 +848,27 @@
   my ($rule) = @_;
 
   return '' unless ($opt_rulemetadata);
-  return '' unless ($rules->{$rule}->{found_definition});
 
   my $mod = 0;
-  my $srcfile = $rules->{$rule}->{srcfile};
-  my @s = stat $srcfile;
-  if (@s) {
-    $mod = $s[9];
+  my $srcfile = '';
+
+  if ($rules->{$rule}->{found_definition}) {
+    $srcfile = $rules->{$rule}->{srcfile} || '';
+    if ($srcfile) {
+      my @s = stat $srcfile;
+      if (@s) {
+        $mod = $s[9];
+      }
+    }
   }
 
+  my $tf = $rules->{$rule}->{tflags} || '';
+
   return "<rulemetadata>".
             "<name>$rule</name>".
             "<src>$srcfile</src>".
             "<srcmtime>$mod</srcmtime>".
+            "<tf>$tf</tf>".
           "</rulemetadata>\n";
 }
 
@@ -799,7 +886,11 @@
       # force that subrule (if it exists) to output in the
       # same pubfile
       my $rule2 = $1;
-      next unless ($rules->{$rule2} && $rules->{$rule2}->{output_text});
+
+      # deal with rules that changed name from "FOO" to "T_FOO"
+      sed_renamed_rule_names(\$rule2);
+      
+      next unless ($rules->{$rule2} && $rules->{$rule2}->{output_file});
 
       # don't do this if the subrule would be moved *out* of the
       # active file!
@@ -821,9 +912,11 @@
       warn "$pubfile: WARNING: not listed in manifest file\n";
     }
 
-    if ($output_file_text->{$pubfile}) {
+    my $text = $output_file_text->{$pubfile};
+    if ($text) {
       open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'";
-      print OUT $output_file_text->{$pubfile};
+      sed_renamed_rule_names(\$text);
+      print OUT $text;
       close OUT or die "cannot close output file '$pubfile'";
       # print "$pubfile: written\n";        # too noisy
     }
@@ -856,6 +949,7 @@
 
   return $rule if $opt_listpromotable;
   return $rule if $active_rules->{$rule};
+  return $rule if $rules->{$rule}->{forceactive};
 
   if ($rule !~ /^(?:T_|__)/) {
     $new = "T_".$rule;
@@ -929,7 +1023,6 @@
   my ($fname) = @_;
   return 1 if ($file_manifest->{$fname});
   foreach my $skipre (@{$file_manifest_skip}) {
-    # warn "JMD $skipre $fname";
     return 1 if ($fname =~ $skipre);
   }
   return 0;

Modified: spamassassin/branches/jm_re2c_hacks/build/update_website_docs
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/build/update_website_docs?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/build/update_website_docs (original)
+++ spamassassin/branches/jm_re2c_hacks/build/update_website_docs Wed Oct 25 09:15:31 2006
@@ -38,3 +38,5 @@
   rm -f docbak
 )
 
+chmod -R g+w $fulldir/dist
+

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm Wed Oct 25 09:15:31 2006
@@ -488,7 +488,7 @@
     }
   }
 
-  my @messages;
+  my $messages;
   if ($self->{opt_n}) {
     # OPT_N == 1 means don't bother sorting on message receive date
 
@@ -502,8 +502,10 @@
       splice(@{$self->{h}}, min ($self->{opt_head}, scalar @{$self->{h}}));
     }
 
-    @messages = ( @{$self->{s}}, @{$self->{h}} );
+    # for ease of memory, we'll play with pointers
+    $messages = $self->{s};
     undef $self->{s};
+    push(@{$messages}, @{$self->{h}});
     undef $self->{h};
   }
   else {
@@ -529,22 +531,22 @@
     if (@s && @h) {
       my $ratio = @s / @h;
       while (@s && @h) {
-	push @messages, (@s / @h > $ratio) ? (shift @s) : (shift @h);
+	push @{$messages}, (@s / @h > $ratio) ? (shift @s) : (shift @h);
       }
     }
     # push the rest onto the end
-    push @messages, @s, @h;
+    push @{$messages}, @s, @h;
   }
 
   # head or tail < 0 means crop the total list, negate the value appropriately
   if ($self->{opt_tail} < 0) {
-    splice(@messages, 0, $self->{opt_tail});
+    splice(@{$messages}, 0, $self->{opt_tail});
   }
   if ($self->{opt_head} < 0) {
-    splice(@messages, -$self->{opt_head});
+    splice(@{$messages}, -$self->{opt_head});
   }
 
-  return scalar(@messages), \@messages;
+  return scalar(@{$messages}), $messages;
 }
 
 sub mail_open {
@@ -569,7 +571,7 @@
 
 ############################################################################
 
-sub message_is_useful_by_date  {
+sub message_is_useful_by_date {
   my ($self, $date) = @_;
 
   return 0 unless $date;	# undef or 0 date = unusable
@@ -579,7 +581,7 @@
     return 1;
   }
   elsif (!$self->{opt_before}) {
-    # Just case about after
+    # Just care about after
     return $date > $self->{opt_after};
   }
   else {
@@ -587,6 +589,24 @@
   }
 }
 
+# additional check, based solely on a file's mod timestamp.  we cannot
+# make assumptions about --before, since the file may have been "touch"ed
+# since the last message was appended; but we can assume that too-old
+# files cannot contain messages newer than their modtime.
+sub message_is_useful_by_file_modtime {
+  my ($self, $date) = @_;
+
+  # better safe than sorry, if date is undef; let other stuff catch errors
+  return 1 unless $date;
+
+  if ($self->{opt_after}) {
+    return ($date > $self->{opt_after});
+  }
+  else {
+    return 1;       # --after not in use
+  }
+}
+
 ############################################################################
 
 # 0 850852128			atime
@@ -646,13 +666,16 @@
   my ($self, $class, $mail) = @_;
 
   $self->bump_scan_progress();
+
+  my @s = stat($mail);
+  return unless $self->message_is_useful_by_file_modtime($s[9]);
+
   if (!$self->{determine_receive_date}) {
     push(@{$self->{$class}}, index_pack(AI_TIME_UNKNOWN, $class, "f", $mail));
     return;
   }
 
   my $date;
-
   unless (defined $AICache and $date = $AICache->check($mail)) {
     my $header;
     if (!mail_open($mail)) {
@@ -706,6 +729,9 @@
       next;
     }
 
+    my @s = stat($file);
+    next unless $self->message_is_useful_by_file_modtime($s[9]);
+
     my $info = {};
     my $count;
 
@@ -807,6 +833,9 @@
       $self->{access_problem} = 1;
       next;
     }
+
+    my @s = stat($file);
+    next unless $self->message_is_useful_by_file_modtime($s[9]);
 
     my $info = {};
     my $count;

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Bayes/CombineChi.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Bayes/CombineChi.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Bayes/CombineChi.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Bayes/CombineChi.pm Wed Oct 25 09:15:31 2006
@@ -41,7 +41,7 @@
 # the use of "s") of an original assumed expectation ... relative to how
 # strongly we want to consider our actual collected data."  Low 's' means
 # trust collected data more strongly.
-our $FW_S_CONSTANT = 0.100;
+our $FW_S_CONSTANT = 0.030;
 
 # (s . x) for the f(w) equation.
 our $FW_S_DOT_X = ($FW_X_CONSTANT * $FW_S_CONSTANT);

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/BayesStore/DBM.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/BayesStore/DBM.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/BayesStore/DBM.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/BayesStore/DBM.pm Wed Oct 25 09:15:31 2006
@@ -590,7 +590,7 @@
     my $db_var = 'db_'.$dbname;
 
     if (exists $self->{$db_var}) {
-      dbg("bayes: untie-ing $db_var");
+      # dbg("bayes: untie-ing $db_var");
       untie %{$self->{$db_var}};
       delete $self->{$db_var};
     }
@@ -1515,7 +1515,7 @@
     my $db_var = 'db_'.$dbname;
 
     if (exists $self->{$db_var}) {
-      dbg("bayes: untie-ing $db_var");
+      # dbg("bayes: untie-ing $db_var");
       untie %{$self->{$db_var}};
       delete $self->{$db_var};
     }

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Conf/Parser.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Conf/Parser.pm Wed Oct 25 09:15:31 2006
@@ -932,7 +932,9 @@
   }
   $conf->{priority}->{$name} ||= 0;
   $conf->{source_file}->{$name} = $self->{currentfile};
-  $conf->{if_stack}->{$name} = $self->get_if_stack_as_string();
+
+  # this no longer seems to be needed!
+  # $conf->{if_stack}->{$name} = $self->get_if_stack_as_string();
 
   if ($self->{scoresonly}) {
     $conf->{user_rules_to_compile}->{$type} = 1;

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Dns.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Dns.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Dns.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Dns.pm Wed Oct 25 09:15:31 2006
@@ -205,7 +205,7 @@
     $self->{already_logged}->{$log} = 1;
   }
 
-  if (!defined $self->{tests_already_hit}->{$rule}) {
+  if (!$self->{tests_already_hit}->{$rule}) {
     $self->got_hit($rule, "RBL: ", ruletype => "dnsbl");
   }
 }
@@ -277,7 +277,7 @@
 
   my $rdatastr = $answer->rdatastr;
   while (my ($subtest, $rule) = each %{ $self->{dnspost}->{$set} }) {
-    next if defined $self->{tests_already_hit}->{$rule};
+    next if $self->{tests_already_hit}->{$rule};
 
     # exact substr (usually IP address)
     if ($subtest eq $rdatastr) {

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message.pm Wed Oct 25 09:15:31 2006
@@ -731,7 +731,7 @@
     }
 
     if (!$in_body) {
-      s/\s+$//;
+      # s/\s+$//;   # bug 5127: don't clean this up (yet)
       if (m/^[\041-\071\073-\176]+:/) {
         if ($header) {
           my ( $key, $value ) = split ( /:\s*/, $header, 2 );
@@ -741,7 +741,7 @@
 	next;
       }
       elsif (/^[ \t]/) {
-        $_ =~ s/^\s*//;
+        # $_ =~ s/^\s*//;   # bug 5127, again
         $header .= $_;
 	next;
       }

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message/Node.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message/Node.pm Wed Oct 25 09:15:31 2006
@@ -170,7 +170,11 @@
       $self->{'raw_headers'}->{$key} = [];
     }
 
-    push @{ $self->{'headers'}->{$key} },     $self->_decode_header($raw_value);
+    my $dec_value = $raw_value;
+    $dec_value =~ s/\n[ \t]+/ /gs;
+    $dec_value =~ s/\s*$//s;
+    push @{ $self->{'headers'}->{$key} },     $self->_decode_header($dec_value);
+
     push @{ $self->{'raw_headers'}->{$key} }, $raw_value;
 
     return $self->{'headers'}->{$key}->[-1];

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PerMsgStatus.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PerMsgStatus.pm Wed Oct 25 09:15:31 2006
@@ -1374,44 +1374,6 @@
 
 sub finish_tests {
   my ($conf) = @_;
-
-  foreach my $priority (keys %{$conf->{priorities}}) {
-    # clean up priority value so it can be used in a subroutine name
-    my $clean_priority;
-    ($clean_priority = $priority) =~ s/-/neg/;
-
-    if (defined &{'_head_tests_'.$clean_priority}) {
-      undef &{'_head_tests_'.$clean_priority};
-    }
-    foreach my $rulename (keys %{$conf->{head_tests}->{$priority}}) {
-      undef &{$rulename.'_head_test'};
-    }
-    if (defined &{'_body_tests_'.$clean_priority}) {
-      undef &{'_body_tests_'.$clean_priority};
-    }
-    foreach my $rulename (keys %{$conf->{body_tests}->{$priority}}) {
-      undef &{$rulename.'_body_test'};
-    }
-    if (defined &{'_body_uri_tests_'.$clean_priority}) {
-      undef &{'_body_uri_tests_'.$clean_priority};
-    }
-    foreach my $rulename (keys %{$conf->{uri_tests}->{$priority}}) {
-      undef &{$rulename.'_uri_test'};
-    }
-    if (defined &{'_rawbody_tests_'.$clean_priority}) {
-      undef &{'_rawbody_tests_'.$clean_priority};
-    }
-    foreach my $rulename (keys %{$conf->{rawbody_tests}->{$priority}}) {
-      undef &{$rulename.'_rawbody_test'};
-    }
-    if (defined &{'_full_tests_'.$clean_priority}) {
-      undef &{'_full_tests_'.$clean_priority};
-    }
-    if (defined &{'_meta_tests_'.$clean_priority}) {
-      undef &{'_meta_tests_'.$clean_priority};
-    }
-  }
-
   foreach my $method (@TEMPORARY_METHODS) {
     if (defined &{$method}) {
       undef &{$method};
@@ -1842,6 +1804,7 @@
 	  }
 	}
       ';
+      push (@TEMPORARY_METHODS, $rulename.'_head_test');
     }
     else {
       # store for use below
@@ -1911,8 +1874,10 @@
     $self->{rule_errors}++;
   }
   else {
+    my $method = '_head_tests_'.$clean_priority;
+    push @TEMPORARY_METHODS, $method;
     no strict "refs";
-    &{'Mail::SpamAssassin::PerMsgStatus::_head_tests_'.$clean_priority}($self);
+    &{$method}($self);
     use strict "refs";
   }
 }
@@ -2039,6 +2004,7 @@
       $evalstr2 .= '
 	sub '.$rulename.'_body_test { my $self = shift; '.$sub.' }
       ';
+      push (@TEMPORARY_METHODS, $rulename.'_body_test');
     }
 
     $evalstr2 .= '
@@ -2080,8 +2046,10 @@
     $self->{rule_errors}++;
   }
   else {
+    my $method = '_body_tests_'.$clean_priority;
+    push @TEMPORARY_METHODS, $method;
     no strict "refs";
-    &{'Mail::SpamAssassin::PerMsgStatus::_body_tests_'.$clean_priority}($self, @$textary);
+    &{$method}($self,@$textary);
     use strict "refs";
   }
 }
@@ -2492,6 +2460,7 @@
       $evalstr2 .= '
         sub '.$rulename.'_uri_test { my $self = shift; '.$sub.' }
       ';
+      push (@TEMPORARY_METHODS, $rulename.'_uri_test');
     }
   }
 
@@ -2525,8 +2494,10 @@
     $self->{rule_errors}++;
   }
   else {
+    my $method = '_body_uri_tests_'.$clean_priority;
+    push @TEMPORARY_METHODS, $method;
     no strict "refs";
-    &{'Mail::SpamAssassin::PerMsgStatus::_body_uri_tests_'.$clean_priority}($self, @uris);
+    &{$method}($self, @uris);
     use strict "refs";
   }
 }
@@ -2617,6 +2588,7 @@
       $evalstr2 .= '
 	sub '.$rulename.'_rawbody_test { my $self = shift; '.$sub.' }
       ';
+      push (@TEMPORARY_METHODS, $rulename.'_rawbody_test');
     }
   }
 
@@ -2650,8 +2622,10 @@
     $self->{rule_errors}++;
   }
   else {
+    my $method = '_rawbody_tests_'.$clean_priority;
+    push @TEMPORARY_METHODS, $method;
     no strict "refs";
-    &{'Mail::SpamAssassin::PerMsgStatus::_rawbody_tests_'.$clean_priority}($self, @$textary);
+    &{$method}($self, @$textary);
     use strict "refs";
   }
 }
@@ -2724,8 +2698,10 @@
     warn "rules: failed to compile full tests, skipping:\n" . "\t($@)\n";
     $self->{rule_errors}++;
   } else {
+    my $method = '_full_tests_'.$clean_priority;
+    push @TEMPORARY_METHODS, $method;
     no strict "refs";
-    &{'Mail::SpamAssassin::PerMsgStatus::_full_tests_'.$clean_priority}($self, $fullmsgref);
+    &{$method}($self, $fullmsgref);
     use strict "refs";
   }
 }
@@ -2939,8 +2915,10 @@
     $self->{rule_errors}++;
   }
   else {
+    my $method = '_meta_tests_'.$clean_priority;
+    push @TEMPORARY_METHODS, $method;
     no strict "refs";
-    &{'Mail::SpamAssassin::PerMsgStatus::_meta_tests_'.$clean_priority}($self);
+    &{$method}($self);
     use strict "refs";
   }
 }    # do_meta_tests()
@@ -3112,13 +3090,12 @@
 
   eval $evalstr;
 
-  push (@TEMPORARY_METHODS, $methodname);
-
   if ($@) {
     warn "rules: failed to compile eval tests, skipping some: $@\n";
     $self->{rule_errors}++;
   }
   else {
+    push (@TEMPORARY_METHODS, $methodname);
     no strict "refs";
     &{'Mail::SpamAssassin::PerMsgStatus::'.$methodname}($self,@extraevalargs);
     use strict "refs";

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin.pm Wed Oct 25 09:15:31 2006
@@ -779,6 +779,30 @@
   $self->{main}->{conf}->register_eval_rule ($self, $nameofsub);
 }
 
+=item $plugin->register_generated_rule_method ($nameofsub)
+
+In certain circumstances, plugins may find it useful to compile
+perl functions from the ruleset, on the fly.  It is important to
+remove these once the C<Mail::SpamAssassin> object is deleted,
+however, and this API allows this.
+
+Once the method C<$nameofsub> has been generated, call this API
+with the name of the method (including full package scope).
+This indicates that it's a temporary piece of generated code,
+built from the SpamAssassin ruleset, and when 
+C<Mail::SpamAssassin::finish()> is called, the method will
+be destroyed.
+
+This API was added in SpamAssassin 3.2.0.
+
+=cut
+
+sub register_generated_rule_method {
+  my ($self, $nameofsub) = @_;
+  push @Mail::SpamAssassin::PerMsgStatus::TEMPORARY_METHODS,
+        $nameofsub;
+}
+
 =item $plugin->inhibit_further_callbacks()
 
 Tells the plugin handler to inhibit calling into other plugins in the plugin
@@ -885,6 +909,23 @@
 
 The configuration file arguments will be passed in after the standard
 arguments.
+
+=head1 BACKWARDS COMPATIBILITY
+
+Note that if you write a plugin and need to determine if a particular
+helper method is supported on C<Mail::SpamAssassin::Plugin>, you
+can do this:
+
+    if ($self->can("name_of_method")) {
+      eval {
+        $self->name_of_method();        # etc.
+      }
+    } else {
+      # take fallback action
+    }
+
+The same applies for the public APIs on objects of other types, such as
+C<Mail::SpamAssassin::PerMsgStatus>.
 
 =head1 SEE ALSO
 

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyEval.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyEval.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyEval.pm Wed Oct 25 09:15:31 2006
@@ -39,43 +39,12 @@
   bless ($self, $class);
 
   # the important bit!
-  $self->register_eval_rule("check_unique_words");
   $self->register_eval_rule("multipart_alternative_difference");
   $self->register_eval_rule("multipart_alternative_difference_count");
   $self->register_eval_rule("check_blank_line_ratio");
   $self->register_eval_rule("tvd_vertical_words");
 
   return $self;
-}
-
-sub check_unique_words {
-  my ($self, $pms, $body, $m, $b) = @_;
-
-  if (!defined $pms->{unique_words_repeat}) {
-    $pms->{unique_words_repeat} = 0;
-    $pms->{unique_words_unique} = 0;
-    my %count;
-    for (@$body) {
-      # copy to avoid changing @$body
-      my $line = $_;
-      # from tokenize_line in Bayes.pm
-      $line =~ tr/-A-Za-z0-9,\@\*\!_'"\$.\241-\377 / /cs;
-      $line =~ s/(\w)(\.{3,6})(\w)/$1 $2 $3/gs;
-      $line =~ s/(\w)(\-{2,6})(\w)/$1 $2 $3/gs;
-      $line =~ s/(?:^|\.\s+)([A-Z])([^A-Z]+)(?:\s|$)/ ' '.(lc $1).$2.' '/ge;
-      for my $token (split(' ', $line)) {
-        $count{$token}++;
-      }
-    }
-    $pms->{unique_words_unique} = scalar grep { $_ == 1 } values(%count);
-    $pms->{unique_words_repeat} = scalar keys(%count) - $pms->{unique_words_unique};
-  }
-
-  # y = mx+b where y is number of unique words needed
-  my $unique = $pms->{unique_words_unique};
-  my $repeat = $pms->{unique_words_repeat};
-  my $y = ($unique + $repeat) * $m + $b;
-  return ($unique > $y);
 }
 
 sub multipart_alternative_difference {

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm Wed Oct 25 09:15:31 2006
@@ -44,6 +44,7 @@
 my $BASES_CAN_USE_ALTERNATIONS = 0;    # /(foo|bar|baz)/
 my $BASES_CAN_USE_QUANTIFIERS = 0;     # /foo.*bar/ or /foo*bar/ or /foooo?bar/
 my $BASES_CAN_USE_CHAR_CLASSES = 0;    # /fo[opqr]bar/
+my $SPLIT_OUT_ALTERNATIONS = 1;        # /(foo|bar|baz)/ => ["foo", "bar", "baz"]
 
 ###########################################################################
 
@@ -54,6 +55,7 @@
   my $self = $class->SUPER::new($mailsaobject);
   bless ($self, $class);
 
+  # $self->test();
   return $self;
 }
 
@@ -75,7 +77,10 @@
 
   if ($rawf) {
     $rawf =~ /^(.*)$/;
-    $f = $1;       # untaint; allow anything here, it's from %ENV and safe
+    $f = $1;        # untaint; allow anything here, it's from %ENV and safe
+  }
+  else {
+    return;         # TODO: comment this for Rabin-Karp
   }
 
   $self->extract_set($f, $conf, $conf->{body_tests}, 'body');
@@ -120,22 +125,50 @@
     # TODO: need cleaner way to do this
     next if ($conf->{rules_to_replace}->{$name});
 
-    my $base  = $self->extract_base($rule, 0);
-    my $base2 = $self->extract_base($rule, 1);
-
-    my $len   = $base  ? $self->count_regexp_statements($base) : 0;
-    my $len2  = $base2 ? $self->count_regexp_statements($base2) : 0;
-
-    if ($base2 && (!$base || ($len2 > $len))) {
-      $base = $base2;
-      $len = $len2;
+    my @bases1 = ();
+    my @bases2 = ();
+    eval {  # catch die()s
+      @bases1 = $self->extract_hints($rule, 0);
+    };
+    eval {
+      @bases2 = $self->extract_hints($rule, 1);
+    };
+
+    # if any of the extracted hints in a set are too short, the entire
+    # set is invalid; this is because each set of N hints represents just
+    # 1 regexp.
+    my $minlen1;
+    foreach my $str (@bases1) {
+      my $len = length $str;
+      if ($len < $min_chars) { $minlen1 = undef; @bases1 = (); last; }
+      elsif (!defined($minlen1) || $len < $minlen1) { $minlen1 = $len; }
+    }
+    my $minlen2;
+    foreach my $str (@bases2) {
+      my $len = length $str;
+      if ($len < $min_chars) { $minlen2 = undef; @bases2 = (); last; }
+      elsif (!defined($minlen2) || $len < $minlen2) { $minlen2 = $len; }
     }
 
-    if (!$base || $len < $min_chars) { $base = undef; }
+    if (defined $minlen1 && !defined $minlen2) {
+      # keep using @bases1
+    }
+    elsif (!defined $minlen1 && defined $minlen2) {
+      # change to using @bases2
+      @bases1 = @bases2;
+    }
+    elsif (defined $minlen1 && defined $minlen2) {
+      # both are valid; use the end with the longer hints
+      if ($minlen2 > $minlen1) {
+        @bases1 = @bases2;
+      }
+    }
 
-    if ($base) {
+    if ($minlen1 && @bases1) {
       # dbg("zoom: YES <base>$base</base> <origrule>$rule</origrule>");
-      push @good_bases, { base => $base, orig => $rule, name => $name };
+      foreach my $base (@bases1) {
+        push @good_bases, { base => $base, orig => $rule, name => $name };
+      }
       $yes++;
     }
     else {
@@ -156,7 +189,7 @@
   # returned as two hits, correctly.  So we only have to be smart about the
   # full-subsumption case; overlapping is taken care of for us, by re2c.
   #
-  # TODO: there's a bug here.  Since the code in extract_base() has been
+  # TODO: there's a bug here.  Since the code in extract_hints() has been
   # modified to support more complex regexps, we can no longer simply assume
   # that if pattern A is not contained in pattern B, that means that pattern B
   # doesn't subsume it.  Consider, for example, A="foo*bar" and
@@ -250,7 +283,7 @@
 # /time to refinance|refinanc\w{1,3}\b.{0,16}\bnow\b/i
 #     => should understand alternations; tricky
 
-sub extract_base {
+sub extract_hints {
   my $self = shift;
   my $rule = shift;
   my $is_reversed = shift;
@@ -289,8 +322,8 @@
     $rule =~ s/\(\?i\)//gs;
   }
   else {
-    return if $rule =~ /\(\?i\)/;
-    return if $mods =~ /i/;
+    die "case-i" if $rule =~ /\(\?i\)/;
+    die "case-i" if $mods =~ /i/;
   }
 
   # remove /m and /s modifiers
@@ -313,8 +346,8 @@
 
   # if there are anchors, give up; we can't get much 
   # faster than these anyway
-  return if $rule =~ /^\(?(?:\^|\\A)/;
-  return if $rule =~ /(?:\$|\\Z)\)?$/;
+  die "anchors" if $rule =~ /^\(?(?:\^|\\A)/;
+  die "anchors" if $rule =~ /(?:\$|\\Z)\)?$/;
 
   # simplify (?:..) to (..)
   $rule =~ s/\(\?:/\(/g;
@@ -350,7 +383,8 @@
               .\{
             ).*$//gsx;
 
-  $BASES_CAN_USE_ALTERNATIONS or $rule =~ s/(?<!\\)(?:
+  ($BASES_CAN_USE_ALTERNATIONS||$SPLIT_OUT_ALTERNATIONS) or
+            $rule =~ s/(?<!\\)(?:
               \(|
               \)
             ).*$//gsx;
@@ -417,7 +451,7 @@
               [^\[]*\]
             )/sx)
     {
-      return;
+      die "pattern starts with a class in a group";
     }
 
     # kill quantifiers right at the start of the string.
@@ -445,10 +479,10 @@
 
 
   # return for things we know we can't handle.
-  if (!$BASES_CAN_USE_ALTERNATIONS) {
+  if (!($BASES_CAN_USE_ALTERNATIONS||$SPLIT_OUT_ALTERNATIONS)) {
     if ($rule =~ /\|/) {
       # /time to refinance|refinanc\w{1,3}\b.{0,16}\bnow\b/i
-      return;
+      die "alternations";
     }
   }
 
@@ -456,41 +490,50 @@
     # count (...braces...) to ensure the numbers match up
     my @c = ($rule =~ /(?<!\\)\(/g); my $brace_i = scalar @c;
        @c = ($rule =~ /(?<!\\)\)/g); my $brace_o = scalar @c;
-    if ($brace_i != $brace_o) { return; }
+    if ($brace_i != $brace_o) { die "brace mismatch"; }
   }
 
   # do the same for [charclasses]
   {
     my @c = ($rule =~ /(?<!\\)\[/g); my $brace_i = scalar @c;
        @c = ($rule =~ /(?<!\\)\]/g); my $brace_o = scalar @c;
-    if ($brace_i != $brace_o) { return; }
+    if ($brace_i != $brace_o) { die "charclass mismatch"; }
   }
 
   # and {quantifiers}
   {
     my @c = ($rule =~ /(?<!\\)\{/g); my $brace_i = scalar @c;
        @c = ($rule =~ /(?<!\\)\}/g); my $brace_o = scalar @c;
-    if ($brace_i != $brace_o) { return; }
+    if ($brace_i != $brace_o) { die "quantifier mismatch"; }
   }
 
   # lookaheads that are just too far for the re2c parser
   # r your .{0,40}account .{0,40}security
   if ($rule =~ /\.\{(\d+),?(\d+?)\}/ and ($1+$2 > 20)) {
-    return;
+    die "too far lookahead";
   }
 
   # re2xs doesn't like escaped brackets
   if ($rule =~ /\\:/) {
-    return;
+    die "escaped bracket";
+  }
+
+  my @rules;
+  if ($SPLIT_OUT_ALTERNATIONS && $rule =~ /\|/) {
+    @rules = $self->split_alt($rule);
+  }
+  else {
+    @rules = ($rule);
   }
 
-  # finally, reassemble a usable regexp
+  # finally, reassemble a usable regexp / set of regexps
   if ($mods ne '') {
     $mods = "(?$mods)";
   }
-  $rule = $mods . $rule;
 
-  return $rule;
+  return map {
+    $mods.$_;
+  } @rules;
 }
 
 sub count_regexp_statements {
@@ -536,5 +579,114 @@
 
   return $rule;
 }
+
+###########################################################################
+
+sub split_alt {
+  my ($self, $re) = @_;
+
+  # warn "JMD in $re";
+  # use "($re)" instead of "$re" to handle /foo|baz/ -- implied group
+  my @res = $self->_split_alt_recurse(0, '('.$re.')');
+  # warn "JMD out ".join('/ /', @res);
+  return @res;
+}
+
+sub _split_alt_recurse {
+  my ($self, $depth, $re) = @_;
+
+  $depth++;
+  "die recursed too far in alternation splitting" if ($depth > 5);
+
+  # trim unnecessary group markers, e.g. /f(oo)/ => /foo/
+  $re =~ s/\(([^\(\)\|]*)\)/$1/gs;
+
+  # identify the smallest nested (...|...) scope
+  $re =~ m{
+      ^(.*)
+      (?<!\\)\(([^\(\)]*?\|[^\(\)]*?)\)
+      (.*)$
+    }xs;
+
+  my $pre  = $1;
+  my $alts = $2;
+  my $post = $3;
+
+  if (!defined $post) {
+    $re =~ s/\(([^\(\)\|]*)\)/$1/gs;
+    return ($re);       # didn't match; no groups
+  }
+
+  # and expand it
+  my @out = ();
+  foreach my $str (split (/(?<!\\)\|/, $alts)) {
+    $str = $pre.$str.$post;
+    # are there unresolved groups left?
+    if ($str =~ /(?<!\\)[\(\|\)]/) {
+      push @out, $self->_split_alt_recurse($depth, $str);
+    } else {
+      push @out, $str;
+    }
+  }
+
+  { # uniq
+    my %u=(); @out = grep {defined} map {
+      if (exists $u{$_}) { undef; } else { $u{$_}=undef;$_; }
+    } @out; undef %u;
+  }
+
+  return @out;
+}
+
+###########################################################################
+
+sub test {
+  my ($self) = @_;
+
+  $self->test_split_alt("foo", "/foo/");
+  $self->test_split_alt("(foo)", "/foo/");
+  $self->test_split_alt("foo(bar)baz", "/foobarbaz/");
+  $self->test_split_alt("(foo|bar)", "/foo/ /bar/");
+  $self->test_split_alt("foo|bar", "/foo/ /bar/");
+  $self->test_split_alt("foo (bar|baz) argh", "/foo bar argh/ /foo baz argh/");
+  $self->test_split_alt("foo (bar|baz|bl(arg|at)) cough", "/foo bar cough/ /foo baz cough/ /foo blarg cough/ /foo blat cough/");
+  $self->test_split_alt("(s(otc|tco)k)", "/sotck/ /stcok/");
+  exit;
+}
+
+sub test_split_alt {
+  my ($self, $in, $out) = @_;
+
+  my @got = $self->split_alt($in);
+  $out =~ s/^\///;
+  $out =~ s/\/$//;
+  my @want = split(/\/ \//, $out);
+
+  my $failed = 0;
+  if (scalar @want != scalar @got) {
+    warn "FAIL: results count don't match";
+    $failed++;
+  }
+  else {
+    my %got = map { $_ => 1 } @got;
+    foreach my $w (@want) {
+      if (!$got{$w}) {
+        warn "FAIL: '$w' not found";
+        $failed++;
+      }
+    }
+  }
+
+  if ($failed) {
+    print "want: /".join('/ /', @want)."/\n";
+    print "got:  /".join('/ /', @got)."/\n";
+    return 0;
+  } else {
+    print "ok\n";
+    return 1;
+  }
+}
+
+###########################################################################
 
 1;

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm Wed Oct 25 09:15:31 2006
@@ -49,7 +49,6 @@
   $self->register_eval_rule("html_text_not_match");
   $self->register_eval_rule("html_range");
   $self->register_eval_rule("check_iframe_src");
-  $self->register_eval_rule("check_html_uri_only");
 
   return $self;
 }
@@ -197,24 +196,6 @@
   }
 
   return 0;
-}
-
-sub check_html_uri_only {
-  my ($self, $pms) = @_;
-
-  # Find out if there are any multipart/alternative parts in the message
-  my @ma = $pms->{msg}->find_parts(qr@^multipart/alternative\b@i);
-
-  # If there are no multipart/alternative sections, skip this test.
-  return if (!@ma);
-
-  # At this point, we're not actually checking the alternates, just the entire
-  # message.
-  foreach my $v ( values %{$pms->{html}->{uri_detail}} ) {
-    return 0 if (exists $v->{types}->{parsed});
-  }
-
-  return 1;
 }
 
 1;

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm Wed Oct 25 09:15:31 2006
@@ -50,7 +50,6 @@
   $self->register_eval_rule("check_for_forged_eudoramail_received_headers");
   $self->register_eval_rule("check_for_forged_yahoo_received_headers");
   $self->register_eval_rule("check_for_forged_juno_received_headers");
-  $self->register_eval_rule("check_for_from_to_same");
   $self->register_eval_rule("check_for_matching_env_and_hdr_from");
   $self->register_eval_rule("sorted_recipients");
   $self->register_eval_rule("similar_recipients");
@@ -562,29 +561,6 @@
   return 0;   
 }
 
-# From and To have same address, but are not exactly the same and
-# neither contains intermediate spaces.
-sub check_for_from_to_same {
-  my ($self, $pms) = @_;
-
-  my $hdr_from = $pms->get('From');
-  my $hdr_to = $pms->get('To');
-  return 0 if (!length($hdr_from) || !length($hdr_to) ||
-	       $hdr_from eq $hdr_to);
-
-  my $addr_from = $pms->get('From:addr');
-  my $addr_to = $pms->get('To:addr');
-  # BUG: From:addr and To:addr sometimes contain whitespace
-  $addr_from =~ s/\s+//g;
-  $addr_to =~ s/\s+//g;
-  return 0 if (!length($addr_from) || !length($addr_to) ||
-	       $addr_from ne $addr_to);
-
-  if ($hdr_from =~ /^\s*\S+\s*$/ && $hdr_to =~ /^\s*\S+\s*$/) {
-    return 1;
-  }
-}
-
 sub check_for_matching_env_and_hdr_from {
   my ($self, $pms) =@_;
   # two blank headers match so don't bother checking
@@ -852,10 +828,10 @@
     foreach $rcvd (@local) {
       if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) {
 	my $date = $1;
-	dbg("eval: trying Received fetchmail header date for real time: $date");
+        dbg2("eval: trying Received fetchmail header date for real time: $date");
 	my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date);
 	if (defined($time) && (time() >= $time)) {
-	  dbg("eval: time_t from date=$time, rcvd=$date");
+          dbg2("eval: time_t from date=$time, rcvd=$date");
 	  push @fetchmail_times, $time;
 	}
       }
@@ -872,10 +848,10 @@
   foreach $rcvd (@received) {
     if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) {
       my $date = $1;
-      dbg("eval: trying Received header date for real time: $date");
+      dbg2("eval: trying Received header date for real time: $date");
       my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date);
       if (defined($time)) {
-	dbg("eval: time_t from date=$time, rcvd=$date");
+        dbg2("eval: time_t from date=$time, rcvd=$date");
 	push @header_times, $time;
       }
     }
@@ -1127,6 +1103,15 @@
   }
 
   return 0;
+}
+
+###########################################################################
+
+# support eval-test verbose debugs using "-Deval"
+sub dbg2 {
+  if (would_log('dbg', 'eval') == 2) {
+    dbg(@_);
+  }
 }
 
 1;

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm Wed Oct 25 09:15:31 2006
@@ -159,10 +159,6 @@
     $pms->{mime_base64_encoded_text} = 1;
   }
 
-  if ($cte =~ /base64/ && !$name) {
-    $pms->{mime_base64_no_name} = 1;
-  }
-
   if ($charset =~ /iso-\S+-\S+\b/i &&
       $charset !~ /iso-(?:8859-\d{1,2}|2022-(?:jp|kr))\b/)
   {
@@ -231,7 +227,6 @@
   $pms->{mime_base64_encoded_text} = 0;
   # $pms->{mime_base64_illegal} = 0;
   # $pms->{mime_base64_latin} = 0;
-  $pms->{mime_base64_no_name} = 0;
   $pms->{mime_body_html_count} = 0;
   $pms->{mime_body_text_count} = 0;
   $pms->{mime_faraway_charset} = 0;

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm Wed Oct 25 09:15:31 2006
@@ -46,8 +46,10 @@
 
 Header names are considered case-insensitive.
 
-The header values are normally cleaned up a little. Append C<:raw> to the
-header name to retrieve the raw, undecoded value instead.
+The header values are normally cleaned up a little; for example, whitespace
+around the newline character in "folded" headers will be replaced with a single
+space.  Append C<:raw> to the header name to retrieve the raw, undecoded value,
+including pristine whitespace, instead.
 
 =back
 
@@ -129,6 +131,7 @@
 
       $self->{parser}->add_test($rulename, $evalfn."()",
                 $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
+
       my $evalcode = '
         sub Mail::SpamAssassin::Plugin::MIMEHeader::'.$evalfn.' {
           $_[0]->eval_hook_called($_[1], q{'.$rulename.'});
@@ -142,6 +145,9 @@
       }
 
       $pluginobj->register_eval_rule($evalfn);
+
+      $pluginobj->register_generated_rule_method(
+        'Mail::SpamAssassin::Plugin::MIMEHeader::'.$evalfn);
     }
   });
 
@@ -168,7 +174,12 @@
   }
 
   foreach my $p ($scanner->{msg}->find_parts(qr/./)) {
-    my $val = $p->get_header($hdr, $getraw);
+    my $val;
+    if ($getraw) {
+      $val = $p->raw_header($hdr);
+    } else {
+      $val = $p->get_header($hdr);
+    }
     $val ||= $if_unset;
 
     if ($val =~ ${pattern}) {

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RelayEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RelayEval.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RelayEval.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RelayEval.pm Wed Oct 25 09:15:31 2006
@@ -48,11 +48,9 @@
   $self->register_eval_rule("check_for_sender_no_reverse");
   $self->register_eval_rule("check_for_from_domain_in_received_headers");
   $self->register_eval_rule("check_for_forged_received_trail");
-  $self->register_eval_rule("check_for_forged_received_helo");
   $self->register_eval_rule("check_for_forged_received_ip_helo");
   $self->register_eval_rule("helo_ip_mismatch");
   $self->register_eval_rule("check_for_no_rdns_dotcom_helo");
-  $self->register_eval_rule("message_id_from_mta");
 
   return $self;
 }
@@ -162,7 +160,7 @@
       if ($pms->is_dns_available()) {
 	my $vrdns = $pms->lookup_ptr ($relay->{ip});
 	if (defined $vrdns && $vrdns ne $claimed) {
-	  dbg("eval: rdns/helo mismatch: helo=$relay->{helo} ".	
+	  dbg2("eval: rdns/helo mismatch: helo=$relay->{helo} ".	
 		"claimed-rdns=$claimed true-rdns=$vrdns");
 	  return 1;
 	  # TODO: instead, we should set a flag and check it later for
@@ -182,7 +180,7 @@
       }
 
       # otherwise there *is* a mismatch
-      dbg("eval: rdns/helo mismatch: helo=$relay->{helo} rdns=$claimed");
+      dbg2("eval: rdns/helo mismatch: helo=$relay->{helo} rdns=$claimed");
       return 1;
     }
   }
@@ -351,28 +349,13 @@
 
       # ok, let's catch the case where there's *no* reverse DNS there either
       if ($no_rdns) {
-	dbg("eval: Received: no rDNS for dotcom HELO: from=$from_host HELO=$helo_host");
+	dbg2("eval: Received: no rDNS for dotcom HELO: from=$from_host HELO=$helo_host");
 	$pms->{no_rdns_dotcom_helo} = 1;
       }
     }
   }
 } # _check_received_helos()
 
-# Message-ID for untrusted message was added by a trusted relay
-sub message_id_from_mta {
-  my ($self, $pms) = @_;
-
-  my $id = $pms->get('MESSAGEID');
-
-  if ($id && $pms->{num_relays_untrusted} > 0) {
-    for my $rcvd (@{$pms->{relays_untrusted}}[0], @{$pms->{relays_trusted}})
-    {
-      return 1 if $rcvd->{id} && (index(lc($id), lc($rcvd->{id})) != -1);
-    }
-  }
-  return 0;
-}
-
 # FORGED_RCVD_TRAIL
 sub check_for_forged_received_trail {
   my ($self, $pms) = @_;
@@ -380,13 +363,6 @@
   return ($pms->{mismatch_from} > 1);
 }
 
-# FORGED_RCVD_HELO
-sub check_for_forged_received_helo {
-  my ($self, $pms) = @_;
-  $self->_check_for_forged_received($pms) unless exists $pms->{mismatch_helo};
-  return ($pms->{mismatch_helo} > 0);
-}
-
 # FORGED_RCVD_IP_HELO
 sub check_for_forged_received_ip_helo {
   my ($self, $pms) = @_;
@@ -398,7 +374,6 @@
   my ($self, $pms) = @_;
 
   $pms->{mismatch_from} = 0;
-  $pms->{mismatch_helo} = 0;
   $pms->{mismatch_ip_helo} = 0;
 
   my $IP_PRIVATE = IP_PRIVATE;
@@ -433,7 +408,7 @@
     my $hlo = $helo[$i];
     my $by = $by[$i];
 
-    dbg("eval: forged-HELO: from=".(defined $frm ? $frm : "(undef)").
+    dbg2("eval: forged-HELO: from=".(defined $frm ? $frm : "(undef)").
 			" helo=".(defined $hlo ? $hlo : "(undef)").
 			" by=".(defined $by ? $by : "(undef)"));
 
@@ -442,15 +417,6 @@
 
     next unless ($by =~ /^\w+(?:[\w.-]+\.)+\w+$/);
 
-    if (defined($hlo) && defined($frm)
-		&& $hlo =~ /^\w+(?:[\w.-]+\.)+\w+$/
-		&& $frm =~ /^\w+(?:[\w.-]+\.)+\w+$/
-		&& $frm ne $hlo && !helo_forgery_whitelisted($frm, $hlo))
-    {
-      dbg("eval: forged-HELO: mismatch on HELO: '$hlo' != '$frm'");
-      $pms->{mismatch_helo}++;
-    }
-
     my $fip = $fromip[$i];
 
     if (defined($hlo) && defined($fip)) {
@@ -466,7 +432,7 @@
 		$hclassb ne $fclassb &&
 		!($hlo =~ /$IP_PRIVATE/o))
 	{
-	  dbg("eval: forged-HELO: massive mismatch on IP-addr HELO: '$hlo' != '$fip'");
+	  dbg2("eval: forged-HELO: massive mismatch on IP-addr HELO: '$hlo' != '$fip'");
 	  $pms->{mismatch_ip_helo}++;
 	}
       }
@@ -477,9 +443,18 @@
 		&& $prev =~ /^\w+(?:[\w.-]+\.)+\w+$/
 		&& $by ne $prev && !helo_forgery_whitelisted($by, $prev))
     {
-      dbg("eval: forged-HELO: mismatch on from: '$prev' != '$by'");
+      dbg2("eval: forged-HELO: mismatch on from: '$prev' != '$by'");
       $pms->{mismatch_from}++;
     }
+  }
+}
+
+###########################################################################
+
+# support eval-test verbose debugs using "-Deval"
+sub dbg2 {
+  if (would_log('dbg', 'eval') == 2) {
+    dbg(@_);
   }
 }
 

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/URIEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/URIEval.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/URIEval.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/URIEval.pm Wed Oct 25 09:15:31 2006
@@ -38,7 +38,6 @@
   bless ($self, $class);
 
   # the important bit!
-  $self->register_eval_rule("check_domain_ratio");
   $self->register_eval_rule("check_for_http_redirector");
   $self->register_eval_rule("check_https_ip_mismatch");
 
@@ -46,16 +45,6 @@
 }
 
 ###########################################################################
-
-sub check_domain_ratio {
-  my ($self, $pms, $body, $ratio) = @_;
-  my $length = (length(join('', @{$body})) || 1);
-  if (!defined $pms->{uri_domain_count}) {
-    $pms->get_uri_list();
-  }
-  return 0 if !defined $pms->{uri_domain_count};
-  return (($pms->{uri_domain_count} / $length) > $ratio);
-}
 
 sub check_for_http_redirector {
   my ($self, $pms) = @_;

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm Wed Oct 25 09:15:31 2006
@@ -138,7 +138,7 @@
   my ($self, $plugin) = @_;
   $plugin->{main} = $self->{main};
   push (@{$self->{plugins}}, $plugin);
-  dbg("plugin: registered $plugin");
+  # dbg("plugin: registered $plugin");
 
   # invalidate cache entries for any configuration-time hooks, in case
   # one has already been built; this plugin may implement that hook!

Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm Wed Oct 25 09:15:31 2006
@@ -426,7 +426,13 @@
     dbg("prefork: ordered $kid to accept");
 
     # now wait for it to say it's done that
-    return $self->wait_for_child_to_accept($sock);
+    my $ret = $self->wait_for_child_to_accept($kid, $sock);
+    if ($ret) {
+      return $ret;
+    } else {
+      # retry with another child
+      return $self->order_idle_child_to_accept();
+    }
 
   }
   else {
@@ -436,10 +442,11 @@
 }
 
 sub wait_for_child_to_accept {
-  my ($self, $sock) = @_;
+  my ($self, $kid, $sock) = @_;
 
   while (1) {
     my $state = $self->read_one_message_from_child_socket($sock);
+
     if ($state == PFSTATE_BUSY) {
       return 1;     # 1 == success
     }
@@ -447,7 +454,12 @@
       return undef;
     }
     else {
-      die "prefork: ordered child to accept, but child reported state '$state'";
+      warn "prefork: ordered child $kid to accept, but they reported state '$state', killing rogue";
+      $self->child_error_kill($kid, $sock);
+      $self->adapt_num_children();
+      sleep 1;
+
+      return undef;
     }
   }
 }

Modified: spamassassin/branches/jm_re2c_hacks/lib/spamassassin-run.pod
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/spamassassin-run.pod?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/spamassassin-run.pod (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/spamassassin-run.pod Wed Oct 25 09:15:31 2006
@@ -44,6 +44,7 @@
                                    Set user preferences file
  --siteconfigpath=path             Path for site configs
                                    (def: /etc/mail/spamassassin)
+ --cf='config line'                Additional line of configuration
  -x, --nocreate-prefs              Don't create user preferences file
  -e, --exit-code                   Exit with a non-zero exit code if the
                                    tested message was spam
@@ -237,6 +238,15 @@
 
 Use the specified path for locating site-specific configuration files.  Ignore
 the default directories (usually C</etc/mail/spamassassin> or similar).
+
+=item B<--cf='config line'>
+
+Add additional lines of configuration directly from the command-line, parsed
+after the configuration files are read.   Multiple B<--cf> arguments can be
+used, and each will be considered a separate line of configuration.  For
+example:
+
+        spamassassin -t --cf="body NEWRULE /text/" --cf="score NEWRULE 3.0"
 
 =item B<-p> I<prefs>, B<--prefspath>=I<prefs>, B<--prefs-file>=I<prefs>
 

Modified: spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/gen_info_xml
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/gen_info_xml?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/gen_info_xml (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/gen_info_xml Wed Oct 25 09:15:31 2006
@@ -4,6 +4,12 @@
 # need this to ensure that 'svn log' will include ALL changes
 my $svn_checkins_root = "http://svn.apache.org/repos/asf/spamassassin/";
 
+# we won't provide who-checked-in and commit-message details for changes
+# older than this.  Note, this is not in rev number terms; it's an
+# absolute count of revisions.
+# update: alternatively, since the zone's SVN is too old (!), use a date.
+# my $svn_log_limit = 500;
+
 my $full_rebuild = 0;
 if ($ARGV[0] && $ARGV[0] =~ /^-f/) {
   $full_rebuild = 1;
@@ -158,8 +164,13 @@
 }
 
 sub get_svn_log {
-  print "getting svn log...\n";
-  if (open (IN, "svn log --xml $svn_checkins_root |")) {
+  print "getting svn log... (".time.")\n";
+
+  my $limitdate = strftime ("%Y-%m-%d", localtime time-(24*60*60*30*12));
+
+  if (open (IN, "svn log -r 'HEAD:{$limitdate}' --xml $svn_checkins_root |"))
+  # if (open (IN, "svn log --limit $svn_log_limit --xml $svn_checkins_root |"))
+  {
     eval {
       my $xml = join('', <IN>);
       $svn_log = XMLin($xml);
@@ -172,7 +183,7 @@
   if (!$svn_log) {
     die "no svn log --xml";
   }
-  print "got ".(scalar @{$svn_log->{logentry}})." log entries\n";
+  print "got ".(scalar @{$svn_log->{logentry}})." log entries (".time.")\n";
 
   # use Data::Dumper; print Dumper($svn_log); die;
 }

Modified: spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi Wed Oct 25 09:15:31 2006
@@ -199,6 +199,8 @@
 
   # turn possibly-empty $self->{daterev} into a real date/rev combo (that exists)
   $self->{daterev} = $self->date_in_direction($self->{daterev}, 0);
+
+  $self->{daterev_md} = $self->get_daterev_metadata($self->{daterev});
 }
 
 # ---------------------------------------------------------------------------
@@ -980,6 +982,8 @@
 
   my $srcpath = $self->{srcpath};
   my $mtime = $self->{mtime};
+  my $no_net_rules = (!$self->{daterev_md}->{includes_net});
+
   if ($srcpath || $mtime) {
     my $rev = $self->get_rev_for_daterev($self->{daterev});
     my $md = $self->get_rule_metadata($rev);
@@ -1003,6 +1007,13 @@
              ($md->{$_}->{srcmtime} >= $target);
          } @rules;
     }
+
+    if ($no_net_rules) {    # bug 5047
+      @rules = grep {
+          !$md->{$_}->{tf} or
+             ($md->{$_}->{tf} !~ /\bnet\b/);
+         } @rules;
+    }
   }
 
   if ($self->{include_embedded_freqs_xml} == 0) {
@@ -1121,7 +1132,7 @@
   }
 
   my $outof = ($isspam ? $obj->{nspam} : $obj->{nham});
-  my $count = int (($percent/100.0) * $outof);
+  my $count = int ((($percent/100.0) * $outof) + 0.99); # round up
   return qq{
     $count\&nbsp;of\&nbsp;$outof\&nbsp;messages
   };

Modified: spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-hourly
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-hourly?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-hourly (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-hourly Wed Oct 25 09:15:31 2006
@@ -356,6 +356,11 @@
       $flags .= " -c '$opt{rules_dir}'";
     }
 
+    # are we analyzing --net mass-check logs?  if so, use scoreset 1
+    if (join(" ", @ham) =~ /-net-/) {
+      $flags .= " -s 1" if $class eq "NET";
+    }
+
     if ($age eq "all") {
       my %spam;
       my %ham;

Modified: spamassassin/branches/jm_re2c_hacks/rules/20_dnsbl_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/20_dnsbl_tests.cf?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/20_dnsbl_tests.cf (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/20_dnsbl_tests.cf Wed Oct 25 09:15:31 2006
@@ -172,9 +172,9 @@
 tflags DNS_FROM_RFC_POST	net
 #reuse DNS_FROM_RFC_POST
 
-header DNS_FROM_RFC_ABUSE	eval:check_rbl_sub('rfci_envfrom', '127.0.0.4')
-describe DNS_FROM_RFC_ABUSE	Envelope sender in abuse.rfc-ignorant.org
-tflags DNS_FROM_RFC_ABUSE	net
+header DNS_FROM_RFC_ABUSE       eval:check_rbl_sub('rfci_envfrom', '127.0.0.4')
+describe DNS_FROM_RFC_ABUSE     Envelope sender in whois.rfc-ignorant.org
+tflags DNS_FROM_RFC_ABUSE       net
 #reuse DNS_FROM_RFC_ABUSE
 
 header DNS_FROM_RFC_WHOIS	eval:check_rbl_sub('rfci_envfrom', '127.0.0.5')