You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/10/25 18:15:35 UTC
svn commit: r467701 [1/2] - in /spamassassin/branches/jm_re2c_hacks: ./
build/ build/automc/ lib/ lib/Mail/SpamAssassin/
lib/Mail/SpamAssassin/Bayes/ lib/Mail/SpamAssassin/BayesStore/
lib/Mail/SpamAssassin/Conf/ lib/Mail/SpamAssassin/Message/ lib/Mail/...
Author: jm
Date: Wed Oct 25 09:15:31 2006
New Revision: 467701
URL: http://svn.apache.org/viewvc?view=rev&rev=467701
Log:
merged up to r467692 on svn trunk using 'svn merge -r453533:467692 https://svn.apache.org/repos/asf/spamassassin/trunk'
Added:
spamassassin/branches/jm_re2c_hacks/rules/20_advance_fee.cf
- copied unchanged from r467692, spamassassin/trunk/rules/20_advance_fee.cf
spamassassin/branches/jm_re2c_hacks/rules/20_body_tests.cf
- copied unchanged from r467692, spamassassin/trunk/rules/20_body_tests.cf
spamassassin/branches/jm_re2c_hacks/rules/20_compensate.cf
- copied unchanged from r467692, spamassassin/trunk/rules/20_compensate.cf
spamassassin/branches/jm_re2c_hacks/rules/20_drugs.cf
- copied unchanged from r467692, spamassassin/trunk/rules/20_drugs.cf
spamassassin/branches/jm_re2c_hacks/rules/20_fake_helo_tests.cf
- copied unchanged from r467692, spamassassin/trunk/rules/20_fake_helo_tests.cf
spamassassin/branches/jm_re2c_hacks/rules/20_meta_tests.cf
- copied unchanged from r467692, spamassassin/trunk/rules/20_meta_tests.cf
spamassassin/branches/jm_re2c_hacks/rules/20_phrases.cf
- copied unchanged from r467692, spamassassin/trunk/rules/20_phrases.cf
spamassassin/branches/jm_re2c_hacks/rules/20_porn.cf
- copied unchanged from r467692, spamassassin/trunk/rules/20_porn.cf
spamassassin/branches/jm_re2c_hacks/rules/20_ratware.cf
- copied unchanged from r467692, spamassassin/trunk/rules/20_ratware.cf
spamassassin/branches/jm_re2c_hacks/rules/20_uri_tests.cf
- copied unchanged from r467692, spamassassin/trunk/rules/20_uri_tests.cf
spamassassin/branches/jm_re2c_hacks/rules/25_body_tests_pl.cf
- copied unchanged from r467692, spamassassin/trunk/rules/25_body_tests_pl.cf
spamassassin/branches/jm_re2c_hacks/rules/30_text_de.cf
- copied unchanged from r467692, spamassassin/trunk/rules/30_text_de.cf
spamassassin/branches/jm_re2c_hacks/rules/30_text_fr.cf
- copied unchanged from r467692, spamassassin/trunk/rules/30_text_fr.cf
spamassassin/branches/jm_re2c_hacks/rules/30_text_it.cf
- copied unchanged from r467692, spamassassin/trunk/rules/30_text_it.cf
spamassassin/branches/jm_re2c_hacks/rules/30_text_nl.cf
- copied unchanged from r467692, spamassassin/trunk/rules/30_text_nl.cf
spamassassin/branches/jm_re2c_hacks/rules/30_text_pl.cf
- copied unchanged from r467692, spamassassin/trunk/rules/30_text_pl.cf
spamassassin/branches/jm_re2c_hacks/rules/30_text_pt_br.cf
- copied unchanged from r467692, spamassassin/trunk/rules/30_text_pt_br.cf
spamassassin/branches/jm_re2c_hacks/rules/50_scores.cf
- copied unchanged from r467692, spamassassin/trunk/rules/50_scores.cf
spamassassin/branches/jm_re2c_hacks/t/config_text.t
- copied unchanged from r467692, spamassassin/trunk/t/config_text.t
spamassassin/branches/jm_re2c_hacks/t/dnsbl_sc_meta.t
- copied unchanged from r467692, spamassassin/trunk/t/dnsbl_sc_meta.t
Removed:
spamassassin/branches/jm_re2c_hacks/BUGS
spamassassin/branches/jm_re2c_hacks/STATUS
Modified:
spamassassin/branches/jm_re2c_hacks/MANIFEST
spamassassin/branches/jm_re2c_hacks/Makefile.PL
spamassassin/branches/jm_re2c_hacks/build/README
spamassassin/branches/jm_re2c_hacks/build/automc/run_preflight
spamassassin/branches/jm_re2c_hacks/build/mkrules
spamassassin/branches/jm_re2c_hacks/build/update_website_docs
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Bayes/CombineChi.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/BayesStore/DBM.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Conf/Parser.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Dns.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message/Node.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PerMsgStatus.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyEval.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RelayEval.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/URIEval.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm
spamassassin/branches/jm_re2c_hacks/lib/spamassassin-run.pod
spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/gen_info_xml
spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi
spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-hourly
spamassassin/branches/jm_re2c_hacks/rules/20_dnsbl_tests.cf
spamassassin/branches/jm_re2c_hacks/rules/20_head_tests.cf
spamassassin/branches/jm_re2c_hacks/rules/20_html_tests.cf
spamassassin/branches/jm_re2c_hacks/rules/25_replace.cf
spamassassin/branches/jm_re2c_hacks/rules/active.list
spamassassin/branches/jm_re2c_hacks/rules/regression_tests.cf
spamassassin/branches/jm_re2c_hacks/rules/rule2xs.pre
spamassassin/branches/jm_re2c_hacks/sa-learn.raw
spamassassin/branches/jm_re2c_hacks/sa-update.raw
spamassassin/branches/jm_re2c_hacks/spamassassin.raw
spamassassin/branches/jm_re2c_hacks/spamc/configure
spamassassin/branches/jm_re2c_hacks/spamc/configure.in
spamassassin/branches/jm_re2c_hacks/spamd/spamd.raw
spamassassin/branches/jm_re2c_hacks/t/bayesdbm.t
spamassassin/branches/jm_re2c_hacks/t/mimeheader.t
spamassassin/branches/jm_re2c_hacks/t/missing_hb_separator.t
spamassassin/branches/jm_re2c_hacks/t/mkrules.t
Modified: spamassassin/branches/jm_re2c_hacks/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/MANIFEST?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/MANIFEST (original)
+++ spamassassin/branches/jm_re2c_hacks/MANIFEST Wed Oct 25 09:15:31 2006
@@ -1,4 +1,3 @@
-BUGS
CREDITS
Changes
INSTALL
@@ -10,7 +9,6 @@
NOTICE
PACKAGING
README
-STATUS
TRADEMARK
UPGRADE
USAGE
@@ -269,6 +267,7 @@
t/cidrs.t
t/config.dist
t/config_errs.t
+t/config_text.t
t/cpp_comments_in_spamc.t
t/data/01_test_rules.cf
t/data/etc/hello.txt
@@ -368,6 +367,7 @@
t/debug.t
t/desc_wrap.t
t/dnsbl.t
+t/dnsbl_sc_meta.t
t/get_headers.t
t/gtube.t
t/hashcash.t
@@ -481,6 +481,24 @@
rules/25_spf.cf
rules/25_textcat.cf
rules/25_uribl.cf
+rules/20_advance_fee.cf
+rules/20_body_tests.cf
+rules/20_compensate.cf
+rules/20_drugs.cf
+rules/20_fake_helo_tests.cf
+rules/20_meta_tests.cf
+rules/20_phrases.cf
+rules/20_porn.cf
+rules/20_ratware.cf
+rules/20_uri_tests.cf
+rules/25_body_tests_pl.cf
+rules/30_text_de.cf
+rules/30_text_fr.cf
+rules/30_text_it.cf
+rules/30_text_nl.cf
+rules/30_text_pl.cf
+rules/30_text_pt_br.cf
+rules/50_scores.cf
rules/60_awl.cf
rules/60_shortcircuit.cf
rules/60_whitelist.cf
@@ -488,7 +506,6 @@
rules/60_whitelist_dkim.cf
rules/60_whitelist_spf.cf
rules/60_whitelist_subject.cf
-rules/70_sandbox.cf
rules/72_active.cf
rules/STATISTICS-set0.txt
rules/STATISTICS-set1.txt
@@ -504,7 +521,6 @@
rules/v312.pre
rules/v320.pre
rules/active.list
-rules/70_inactive.cf
t/mkrules.t
t/trust_path.t
t/data/nice/dkim/AddedVtag_07
Modified: spamassassin/branches/jm_re2c_hacks/Makefile.PL
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/Makefile.PL?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/Makefile.PL (original)
+++ spamassassin/branches/jm_re2c_hacks/Makefile.PL Wed Oct 25 09:15:31 2006
@@ -245,34 +245,14 @@
'rules/*.pm',
- # at one stage, these were copied from rulesrc into the rules
- # build dir, or were in SVN in this dir, but this no longer
- # is the case. Ensure they get cleaned on "make clean".
- 'rules/20_advance_fee.cf',
- 'rules/20_compensate.cf',
- 'rules/20_drugs.cf',
- 'rules/20_fake_helo_tests.cf',
- 'rules/20_phrases.cf',
- 'rules/20_porn.cf',
- 'rules/20_ratware.cf',
- 'rules/20_uri_tests.cf',
- 'rules/25_body_tests_es.cf',
- 'rules/25_body_tests_pl.cf',
- 'rules/30_text_de.cf',
- 'rules/30_text_fr.cf',
- 'rules/30_text_it.cf',
- 'rules/30_text_nl.cf',
- 'rules/30_text_pl.cf',
- 'rules/30_text_pt_br.cf',
- 'rules/50_scores.cf',
- 'rules/70_broken_rules.cf',
-
# don't remove these. they are built from 'rulesrc' in SVN, but
# in a distribution tarball, they're not
- # 'rules/70_inactive.cf',
# 'rules/70_sandbox.cf',
# 'rules/72_active.cf',
+ # this file is no longer built, or used
+ 'rules/70_inactive.cf',
+
)
},
@@ -1141,7 +1121,8 @@
$(PREPROCESS) $(FIXBYTES) $(FIXVARS) $(FIXBANG) -m$(PERM_RWX) -i$? -o$@
build_rules:
- $(PERL) build/mkrules --src rulesrc --out rules --manifest MANIFEST --manifestskip MANIFEST.SKIP
+ -rm rules/70_inactive.cf
+ [ ! -d rulesrc ] || $(PERL) build/mkrules --src rulesrc --out rules --manifest MANIFEST --manifestskip MANIFEST.SKIP
SPAMC_MAKEFILE = spamc/Makefile
MAKE_SPAMC = $(MAKE) -f $(SPAMC_MAKEFILE)
Modified: spamassassin/branches/jm_re2c_hacks/build/README
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/build/README?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/build/README (original)
+++ spamassassin/branches/jm_re2c_hacks/build/README Wed Oct 25 09:15:31 2006
@@ -1,4 +1,3 @@
-
SPAMASSASSIN DEVELOPMENT SNAPSHOT PROCEDURE
===========================================
@@ -40,6 +39,14 @@
PATH=$HOME/sabuildtools/perl584/bin:$HOME/sabuildtools/bin:$PATH
+- Run "svn status" to ensure you're building/releasing from a pristine
+ checkout:
+
+ svn status
+
+ (It should return no text; any files marked "M" have been locally
+ modified, and should be "svn revert"ed before you continue.)
+
- edit lib/Mail/SpamAssassin.pm and comment the $IS_DEVEL_BUILD
line. Ensure the correct version number is present in $VERSION
and $EXTRA_VERSION.
@@ -86,15 +93,15 @@
- Check in the updated Changes file.
- svn commit -m "preparing to release X.Y.Z" Changes
+ svn commit -m "preparing to release X.Y.Z"
- SVN tag the release files. This is done using "svn copy".
For a maintainance release (x.y.1, x.y.2):
repo=https://svn.apache.org/repos/asf/spamassassin
- svn copy -m "creating tag for release 3.0.1" \
- $repo/branches/3.0 \
- $repo/tags/spamassassin_release_3_0_1
+ svn copy -m "creating tag for release 3.1.7" \
+ $repo/branches/3.1 \
+ $repo/tags/spamassassin_release_3_1_7
For a trunk release (x.y.0):
@@ -114,6 +121,10 @@
a branch really) of whatever the latest branch revision to be the new
base of the tag release.
+ Note that if you have to re-run this command later to fix a broken
+ release in progress, you need to "svn delete" the previous copy of the
+ tag, otherwise it'll create a subdirectory instead of overwriting.
+
- run "make distcheck" to ensure all files are included in the
distribution that should be, and to ensure all files that are listed
in the MANIFEST also exist in SVN.
@@ -136,25 +147,43 @@
- test the tar.gz and zip files! redo until they work!! ;)
-- Propose a release, and post the URL and md5sums/sha1sums to the
- dev list. Once you've got 2 committer +1's, in addition to your own,
- carry on:
+- Write the release announcement mail! This is a simple matter of copying
+ the previous release's announcement, updating the version numbers and
+ links, fixing the MD5 and SHA1 checksums in this mail, and summarising
+ the important changes from the Changes file.
+
+ cp build/announcements/3.1.7.txt build/announcements/3.1.8.txt
+ svn add !$
+ vi !$
+
+- (for any rc, prerelease, or full release) Place the tarballs in a
+ discreet location (discreet means not linked from downloads, but
+ included in the vote mail) and request a vote on the development mailing
+ list to make the release. Post the URL, md5sums/sha1sums, and proposed
+ release announcement mail.
+
+ Three +1 votes are required to make the release official. The release
+ manager (that's you) may vote as well. Once there are three or more +1
+ votes, you may proceed.
+
+- (for a full release) You need 3 +1's from PMC members.
- SVN commit the release files, including 'Changes':
svn commit -m "X.Y.Z RELEASED"
-- Now, start the new development codebase. For minor updates of a 2.x
- tree (e.g. 2.x1, 2.x2), you don't need to branch; for major updates
- (2.x0) you should use a new development branch, off the trunk.
+- [X.Y.0 RELEASES ONLY]: Now, start the new development codebase. For
+ minor updates of a 2.x tree (e.g. 2.x.1, 2.x.2), you don't need to
+ branch; for major updates (2.x.0) you need to create a new development
+ branch, off the trunk.
repo=https://svn.apache.org/repos/asf/spamassassin
- svn copy $repo/tags/spamassassin_release_3_0_0 \
- $repo/branches/b3_0_0
+ svn copy $repo/tags/spamassassin_release_3_1_0 \
+ $repo/branches/b3_1_0
"trunk" is SVN's concept of head. Typically, our branches are named
- for their minor version number. In the example above, b3_0_0 is the
- branch for the stable 3.0.x releases.
+ for their minor version number. In the example above, b3_1_0 is the
+ branch for the stable 3.1.x releases.
- In the new development codeline, edit lib/Mail/SpamAssassin.pm, bump the
$VERSION line to the correct version, and uncomment the $IS_DEVEL_BUILD
@@ -169,15 +198,6 @@
(where X.Y.N is the new version number)
-- (for any rc, prerelease, or full release) Place the tarballs in a
- discreet location (discreet means not linked from downloads, but
- included in the vote mail) and request a vote on the development
- mailing list to make the release, three +1 votes are required to make
- the release official. The release manager (that's you) may vote as
- well. Once there are three or more +1 votes, you may proceed.
-
-- (for a full release) You need 3 +1's from PMC members.
-
- !WARNING! After the next step, the version number will be considered
"burned". The number is locked for this particular code. The same
number cannot be used for a future different release. So make sure
@@ -240,44 +260,66 @@
cd /www/spamassassin.apache.org
webmake -F
-- update the 'dist' tree in the SpamAssassin website:
+- update the 'doc' tree in the SpamAssassin website:
cd /www/spamassassin.apache.org
svn up
svn delete --force full/3.1.x
- svn commit -m "removing old dist tree"
+ svn commit -m "removing old doc tree from website" full
cd [checkedoutdir]
build/update_website_docs
cd /www/spamassassin.apache.org
svn add full/3.1.x
- svn commit -m "updating new dist tree" full
-
+ svn commit -m "updating new doc tree on website" full
+
- and check the entire website into SVN (see bug 4998 for reasons why).
Note that you may need to resolve conflicts and ensure the contents of
'full/3.1.x' are all added successfully:
- svn commit -m "added new release"
+ svn up
+ svn commit -m "added new release to website"
+
+- update the tag used to point to "current release":
+
+ repo=https://svn.apache.org/repos/asf/spamassassin
+ svn delete -m "updating for new release" \
+ $repo/tags/spamassassin_current_release_3.1.x
+ svn copy -m "updating for new release" \
+ $repo/tags/spamassassin_release_3_1_7 \
+ $repo/tags/spamassassin_current_release_3.1.x
+
+- upload release tarball to CPAN at http://pause.cpan.org/:
+
+ https://pause.perl.org/pause/authenquery?ACTION=add_uri
+
+ (Note that recently, PAUSE has started indexing sub-modules under
+ Mail::SpamAssassin::, and it can't deal with our multi-maintainer
+ setup. You may receive mail indicating that "indexing failed" after
+ the upload; as long as the main Mail::SpamAssassin module was indexed
+ correctly, this is fine. However, it would help if you could visit
-- upload to CPAN at http://pause.cpan.org/
+ https://pause.perl.org/pause/authenquery?ACTION=share_perms
- ( https://pause.perl.org/pause/authenquery?ACTION=add_uri )
+ select 3.1 ("Make someone else co-maintainer"), and ensure that the
+ other releasers (JMASON, DOS, FELICITY, others?) all have permissions as
+ 'co-maintainer' on the full set of your listed modules in the
+ "Mail::SpamAssassin::" namespace.
-- Before doing the next step, run through the Changes file, and write up a
- quick summary of the important changes in human-readable format. This
- should be less than 600 chars to fit into Freshmeat's format, and
- to be easily understandable.
+- announce on the users, dev, and announce mailing lists using the
+ previously-prepared release announcement.
-- announce to Freshmeat at http://freshmeat.net/
+- Before doing the next step, run through the release summary mail, and
+ write up a really short summary of the important changes in
+ human-readable format. This should be less than 600 chars to fit into
+ Freshmeat's format, and to be easily understandable.
- ( http://freshmeat.net/add-release/14876/ may work )
+- announce to Freshmeat at http://freshmeat.net/:
-- announce on SpamAssassin-Users, SpamAssassin-Dev, and
- SpamAssassin-Announce. Be sure to include the MD5 checksums in this
- mail, so paranoid folks can check the tarball's integrity.
+ http://freshmeat.net/add-release/14876/
-- Approve the posting to SpamAssassin-Announce (the list admins will get a
+- Approve the posting to the announce list (the list admins will get a
mail indicating how to do this.)
// vim:tw=74:
Modified: spamassassin/branches/jm_re2c_hacks/build/automc/run_preflight
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/build/automc/run_preflight?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/build/automc/run_preflight (original)
+++ spamassassin/branches/jm_re2c_hacks/build/automc/run_preflight Wed Oct 25 09:15:31 2006
@@ -90,16 +90,18 @@
# produce lots of noisy output to stop the buildbot from timing out on
# mass-checks of large corpora.
# store AICache data in /tmpfs/aicache.
+# ignore mails older than 6 months (use the nightly runs for those corpora,
+# it's too slow to mass-check them here).
run "/local/bbmasstools/masschroot $perl ".
"mass-check -c=tstrules --cache -j=1 ".
"--noisy --deencap='petuniapress.com' ".
"--cachedir=/tmpfs/aicache ".
+ "--after='6 months ago' ".
$mass_check_args{$slavename}." ".
"ham:detect:/home/bbmass/rawcor/*/ham/* ".
"spam:detect:/home/bbmass/rawcor/*/spam/*";
- # TODO: add --after="6 months ago"?
exit;
Modified: spamassassin/branches/jm_re2c_hacks/build/mkrules
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/build/mkrules?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/build/mkrules (original)
+++ spamassassin/branches/jm_re2c_hacks/build/mkrules Wed Oct 25 09:15:31 2006
@@ -161,6 +161,7 @@
my $ALWAYS_PUBLISH = '!always_publish!';
read_all_rules($needs_compile);
+read_rules_from_output_dir();
compile_output_files();
lint_output_files();
write_output_files();
@@ -391,10 +392,19 @@
{
$rules->{$name}->{found_definition} = 1;
}
- # userconf rules are always published in "active"
- elsif (($type eq 'tflags') && ($val =~ /\buserconf\b/))
- {
- $rules->{$name}->{forceactive} = 1;
+ elsif ($type eq 'tflags') {
+ # userconf rules are always published in "active"
+ if ($val =~ /\buserconf\b/) {
+ $rules->{$name}->{forceactive} = 1;
+ }
+
+ # record for rulemetadata code
+ $val =~ s/\s+/ /gs;
+ if ($rules->{$name}->{tflags}) {
+ $rules->{$name}->{tflags} .= ' '.$val;
+ } else {
+ $rules->{$name}->{tflags} = $val;
+ }
}
$current_comments = '';
@@ -413,9 +423,13 @@
my $val = $3;
my $origname = $name;
- if ($issandbox) {
- $name = sandbox_rule_name_avoid_collisions($name, $f);
- }
+
+ # note: if we call sandbox_rule_name_avoid_collisions(), it'll
+ # rename to 'T_RULENAME' -- which is exactly what we're trying
+ # to avoid in 'publish RULENAME' lines! so don't call it here.
+ # if ($issandbox) {
+ # $name = sandbox_rule_name_avoid_collisions($name, $f);
+ # }
if (!$rules->{$name}) { $rules->{$name} = rule_entry_create(); }
$rules->{$name}->{origname} = $origname;
@@ -423,7 +437,7 @@
if ($command eq 'publish') {
# the 'publish' command defaults to "1", unless it explicitly
# is set to "0". iow: publish RULE_NAME [(0 | 1)] [default: 1]
- if (!defined $val) { $val = '1'; }
+ if (!defined $val || $val eq '') { $val = '1'; }
}
elsif ($command eq 'pubfile') {
if (!filename_in_manifest($opt_out.'/'.$val)) {
@@ -433,6 +447,12 @@
}
$rules->{$name}->{$command} = $val;
+
+ # if we see "publish NAMEOFRULE", that means the rule is
+ # considered active
+ if ($rules->{$name}->{publish}) {
+ $rules->{$name}->{forceactive} = 1;
+ }
}
elsif (/^
(if|ifplugin)
@@ -504,6 +524,16 @@
/^\s*(\S+)/ and $name = $1;
$name =~ s/\s+/ /gs;
+ my $forceactive = 1;
+
+ # always send 'test' lines to the sandbox or inactive files
+ if (/^test\s*/) {
+ $forceactive = 0;
+
+ $name = $_; # ensure we don't drag rules with us though!
+ $name =~ s/\s+/ /gs;
+ }
+
my $cond;
if ($current_conditional) {
$name = $current_conditional; $name =~ s/\s+/ /gs;
@@ -519,7 +549,8 @@
}
$rules->{$name}->{cond} ||= $cond;
$rules->{$name}->{issandbox} = $issandbox;
- $rules->{$name}->{forceactive} = 1;
+ $rules->{$name}->{forceactive} = $forceactive;
+ # $rules->{$name}->{forceactive} = 1;
$rules->{$name}->{iscommand} = 1;
# warn "unknown line in rules file '$f', saving to default: $orig";
@@ -544,6 +575,54 @@
}
}
+# this is only run if we're generating rulemetadata!
+sub read_rules_from_output_dir {
+ return unless ($opt_rulemetadata);
+
+ foreach my $file (<$opt_out/*.cf>) {
+ next unless ($file =~ /\d\d_\S+\.cf$/);
+ next if ($file =~ /\/\Q$opt_activeout\E/);
+ next if ($file =~ /\/\Q$opt_sandboxout\E/);
+ next if ($file =~ /\/\Q$opt_inactiveout\E/);
+ read_output_file($file);
+ }
+}
+
+sub read_output_file {
+ my ($file) = @_;
+ open (IN, "<$file") or warn "cannot read $file";
+ while (<IN>) {
+ my $orig = $_;
+
+ s/^#reuse/reuse/; # TODO - dirty hack. we need to fix this to just be
+ # a keyword which the engine ignores, this is absurd!
+
+ s/#.*$//g; s/^\s+//; s/\s+$//;
+
+ # drop comments/blank lines from output
+ next if (/^$/);
+
+ # save "lang" declarations
+ my $lang = '';
+ if (s/^lang\s+(\S+)\s+//) {
+ $lang = $1;
+ }
+
+ # right now, we only read tflags from output files
+ if (/^tflags\s+(\S+)\s+(.*)$/) {
+ my $name = $1;
+ my $val = $2;
+ $val =~ s/\s+/ /gs;
+ if ($rules->{$name}->{tflags}) {
+ $rules->{$name}->{tflags} .= ' '.$val;
+ } else {
+ $rules->{$name}->{tflags} = $val;
+ }
+ }
+ }
+ close IN;
+}
+
sub copy_to_output_buffers {
my ($rule_order, $issandbox, $f, $filename) = @_;
@@ -577,9 +656,6 @@
$is_active++;
}
- # fix up any rule renamings we were supposed to do
- sed_renamed_rule_names(\$text);
-
my $cond = $rules->{$name}->{cond};
my $pluginclass = $rules->{$name}->{ifplugin};
if ($cond)
@@ -632,6 +708,9 @@
$pubfile = $rules->{$name}->{pubfile};
if ($pubfile) {
$pubfile = $opt_out.'/'.$pubfile;
+ } else {
+ # "publish NAMEOFRULE" => send it to active
+ $pubfile = $opt_out.'/'.$opt_activeout;
}
}
@@ -769,19 +848,27 @@
my ($rule) = @_;
return '' unless ($opt_rulemetadata);
- return '' unless ($rules->{$rule}->{found_definition});
my $mod = 0;
- my $srcfile = $rules->{$rule}->{srcfile};
- my @s = stat $srcfile;
- if (@s) {
- $mod = $s[9];
+ my $srcfile = '';
+
+ if ($rules->{$rule}->{found_definition}) {
+ $srcfile = $rules->{$rule}->{srcfile} || '';
+ if ($srcfile) {
+ my @s = stat $srcfile;
+ if (@s) {
+ $mod = $s[9];
+ }
+ }
}
+ my $tf = $rules->{$rule}->{tflags} || '';
+
return "<rulemetadata>".
"<name>$rule</name>".
"<src>$srcfile</src>".
"<srcmtime>$mod</srcmtime>".
+ "<tf>$tf</tf>".
"</rulemetadata>\n";
}
@@ -799,7 +886,11 @@
# force that subrule (if it exists) to output in the
# same pubfile
my $rule2 = $1;
- next unless ($rules->{$rule2} && $rules->{$rule2}->{output_text});
+
+ # deal with rules that changed name from "FOO" to "T_FOO"
+ sed_renamed_rule_names(\$rule2);
+
+ next unless ($rules->{$rule2} && $rules->{$rule2}->{output_file});
# don't do this if the subrule would be moved *out* of the
# active file!
@@ -821,9 +912,11 @@
warn "$pubfile: WARNING: not listed in manifest file\n";
}
- if ($output_file_text->{$pubfile}) {
+ my $text = $output_file_text->{$pubfile};
+ if ($text) {
open (OUT, ">".$pubfile) or die "cannot write to output file '$pubfile'";
- print OUT $output_file_text->{$pubfile};
+ sed_renamed_rule_names(\$text);
+ print OUT $text;
close OUT or die "cannot close output file '$pubfile'";
# print "$pubfile: written\n"; # too noisy
}
@@ -856,6 +949,7 @@
return $rule if $opt_listpromotable;
return $rule if $active_rules->{$rule};
+ return $rule if $rules->{$rule}->{forceactive};
if ($rule !~ /^(?:T_|__)/) {
$new = "T_".$rule;
@@ -929,7 +1023,6 @@
my ($fname) = @_;
return 1 if ($file_manifest->{$fname});
foreach my $skipre (@{$file_manifest_skip}) {
- # warn "JMD $skipre $fname";
return 1 if ($fname =~ $skipre);
}
return 0;
Modified: spamassassin/branches/jm_re2c_hacks/build/update_website_docs
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/build/update_website_docs?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/build/update_website_docs (original)
+++ spamassassin/branches/jm_re2c_hacks/build/update_website_docs Wed Oct 25 09:15:31 2006
@@ -38,3 +38,5 @@
rm -f docbak
)
+chmod -R g+w $fulldir/dist
+
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm Wed Oct 25 09:15:31 2006
@@ -488,7 +488,7 @@
}
}
- my @messages;
+ my $messages;
if ($self->{opt_n}) {
# OPT_N == 1 means don't bother sorting on message receive date
@@ -502,8 +502,10 @@
splice(@{$self->{h}}, min ($self->{opt_head}, scalar @{$self->{h}}));
}
- @messages = ( @{$self->{s}}, @{$self->{h}} );
+ # for ease of memory, we'll play with pointers
+ $messages = $self->{s};
undef $self->{s};
+ push(@{$messages}, @{$self->{h}});
undef $self->{h};
}
else {
@@ -529,22 +531,22 @@
if (@s && @h) {
my $ratio = @s / @h;
while (@s && @h) {
- push @messages, (@s / @h > $ratio) ? (shift @s) : (shift @h);
+ push @{$messages}, (@s / @h > $ratio) ? (shift @s) : (shift @h);
}
}
# push the rest onto the end
- push @messages, @s, @h;
+ push @{$messages}, @s, @h;
}
# head or tail < 0 means crop the total list, negate the value appropriately
if ($self->{opt_tail} < 0) {
- splice(@messages, 0, $self->{opt_tail});
+ splice(@{$messages}, 0, $self->{opt_tail});
}
if ($self->{opt_head} < 0) {
- splice(@messages, -$self->{opt_head});
+ splice(@{$messages}, -$self->{opt_head});
}
- return scalar(@messages), \@messages;
+ return scalar(@{$messages}), $messages;
}
sub mail_open {
@@ -569,7 +571,7 @@
############################################################################
-sub message_is_useful_by_date {
+sub message_is_useful_by_date {
my ($self, $date) = @_;
return 0 unless $date; # undef or 0 date = unusable
@@ -579,7 +581,7 @@
return 1;
}
elsif (!$self->{opt_before}) {
- # Just case about after
+ # Just care about after
return $date > $self->{opt_after};
}
else {
@@ -587,6 +589,24 @@
}
}
+# additional check, based solely on a file's mod timestamp. we cannot
+# make assumptions about --before, since the file may have been "touch"ed
+# since the last message was appended; but we can assume that too-old
+# files cannot contain messages newer than their modtime.
+sub message_is_useful_by_file_modtime {
+ my ($self, $date) = @_;
+
+ # better safe than sorry, if date is undef; let other stuff catch errors
+ return 1 unless $date;
+
+ if ($self->{opt_after}) {
+ return ($date > $self->{opt_after});
+ }
+ else {
+ return 1; # --after not in use
+ }
+}
+
############################################################################
# 0 850852128 atime
@@ -646,13 +666,16 @@
my ($self, $class, $mail) = @_;
$self->bump_scan_progress();
+
+ my @s = stat($mail);
+ return unless $self->message_is_useful_by_file_modtime($s[9]);
+
if (!$self->{determine_receive_date}) {
push(@{$self->{$class}}, index_pack(AI_TIME_UNKNOWN, $class, "f", $mail));
return;
}
my $date;
-
unless (defined $AICache and $date = $AICache->check($mail)) {
my $header;
if (!mail_open($mail)) {
@@ -706,6 +729,9 @@
next;
}
+ my @s = stat($file);
+ next unless $self->message_is_useful_by_file_modtime($s[9]);
+
my $info = {};
my $count;
@@ -807,6 +833,9 @@
$self->{access_problem} = 1;
next;
}
+
+ my @s = stat($file);
+ next unless $self->message_is_useful_by_file_modtime($s[9]);
my $info = {};
my $count;
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Bayes/CombineChi.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Bayes/CombineChi.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Bayes/CombineChi.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Bayes/CombineChi.pm Wed Oct 25 09:15:31 2006
@@ -41,7 +41,7 @@
# the use of "s") of an original assumed expectation ... relative to how
# strongly we want to consider our actual collected data." Low 's' means
# trust collected data more strongly.
-our $FW_S_CONSTANT = 0.100;
+our $FW_S_CONSTANT = 0.030;
# (s . x) for the f(w) equation.
our $FW_S_DOT_X = ($FW_X_CONSTANT * $FW_S_CONSTANT);
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/BayesStore/DBM.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/BayesStore/DBM.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/BayesStore/DBM.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/BayesStore/DBM.pm Wed Oct 25 09:15:31 2006
@@ -590,7 +590,7 @@
my $db_var = 'db_'.$dbname;
if (exists $self->{$db_var}) {
- dbg("bayes: untie-ing $db_var");
+ # dbg("bayes: untie-ing $db_var");
untie %{$self->{$db_var}};
delete $self->{$db_var};
}
@@ -1515,7 +1515,7 @@
my $db_var = 'db_'.$dbname;
if (exists $self->{$db_var}) {
- dbg("bayes: untie-ing $db_var");
+ # dbg("bayes: untie-ing $db_var");
untie %{$self->{$db_var}};
delete $self->{$db_var};
}
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Conf/Parser.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Conf/Parser.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Conf/Parser.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Conf/Parser.pm Wed Oct 25 09:15:31 2006
@@ -932,7 +932,9 @@
}
$conf->{priority}->{$name} ||= 0;
$conf->{source_file}->{$name} = $self->{currentfile};
- $conf->{if_stack}->{$name} = $self->get_if_stack_as_string();
+
+ # this no longer seems to be needed!
+ # $conf->{if_stack}->{$name} = $self->get_if_stack_as_string();
if ($self->{scoresonly}) {
$conf->{user_rules_to_compile}->{$type} = 1;
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Dns.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Dns.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Dns.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Dns.pm Wed Oct 25 09:15:31 2006
@@ -205,7 +205,7 @@
$self->{already_logged}->{$log} = 1;
}
- if (!defined $self->{tests_already_hit}->{$rule}) {
+ if (!$self->{tests_already_hit}->{$rule}) {
$self->got_hit($rule, "RBL: ", ruletype => "dnsbl");
}
}
@@ -277,7 +277,7 @@
my $rdatastr = $answer->rdatastr;
while (my ($subtest, $rule) = each %{ $self->{dnspost}->{$set} }) {
- next if defined $self->{tests_already_hit}->{$rule};
+ next if $self->{tests_already_hit}->{$rule};
# exact substr (usually IP address)
if ($subtest eq $rdatastr) {
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message.pm Wed Oct 25 09:15:31 2006
@@ -731,7 +731,7 @@
}
if (!$in_body) {
- s/\s+$//;
+ # s/\s+$//; # bug 5127: don't clean this up (yet)
if (m/^[\041-\071\073-\176]+:/) {
if ($header) {
my ( $key, $value ) = split ( /:\s*/, $header, 2 );
@@ -741,7 +741,7 @@
next;
}
elsif (/^[ \t]/) {
- $_ =~ s/^\s*//;
+ # $_ =~ s/^\s*//; # bug 5127, again
$header .= $_;
next;
}
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message/Node.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Message/Node.pm Wed Oct 25 09:15:31 2006
@@ -170,7 +170,11 @@
$self->{'raw_headers'}->{$key} = [];
}
- push @{ $self->{'headers'}->{$key} }, $self->_decode_header($raw_value);
+ my $dec_value = $raw_value;
+ $dec_value =~ s/\n[ \t]+/ /gs;
+ $dec_value =~ s/\s*$//s;
+ push @{ $self->{'headers'}->{$key} }, $self->_decode_header($dec_value);
+
push @{ $self->{'raw_headers'}->{$key} }, $raw_value;
return $self->{'headers'}->{$key}->[-1];
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PerMsgStatus.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PerMsgStatus.pm Wed Oct 25 09:15:31 2006
@@ -1374,44 +1374,6 @@
sub finish_tests {
my ($conf) = @_;
-
- foreach my $priority (keys %{$conf->{priorities}}) {
- # clean up priority value so it can be used in a subroutine name
- my $clean_priority;
- ($clean_priority = $priority) =~ s/-/neg/;
-
- if (defined &{'_head_tests_'.$clean_priority}) {
- undef &{'_head_tests_'.$clean_priority};
- }
- foreach my $rulename (keys %{$conf->{head_tests}->{$priority}}) {
- undef &{$rulename.'_head_test'};
- }
- if (defined &{'_body_tests_'.$clean_priority}) {
- undef &{'_body_tests_'.$clean_priority};
- }
- foreach my $rulename (keys %{$conf->{body_tests}->{$priority}}) {
- undef &{$rulename.'_body_test'};
- }
- if (defined &{'_body_uri_tests_'.$clean_priority}) {
- undef &{'_body_uri_tests_'.$clean_priority};
- }
- foreach my $rulename (keys %{$conf->{uri_tests}->{$priority}}) {
- undef &{$rulename.'_uri_test'};
- }
- if (defined &{'_rawbody_tests_'.$clean_priority}) {
- undef &{'_rawbody_tests_'.$clean_priority};
- }
- foreach my $rulename (keys %{$conf->{rawbody_tests}->{$priority}}) {
- undef &{$rulename.'_rawbody_test'};
- }
- if (defined &{'_full_tests_'.$clean_priority}) {
- undef &{'_full_tests_'.$clean_priority};
- }
- if (defined &{'_meta_tests_'.$clean_priority}) {
- undef &{'_meta_tests_'.$clean_priority};
- }
- }
-
foreach my $method (@TEMPORARY_METHODS) {
if (defined &{$method}) {
undef &{$method};
@@ -1842,6 +1804,7 @@
}
}
';
+ push (@TEMPORARY_METHODS, $rulename.'_head_test');
}
else {
# store for use below
@@ -1911,8 +1874,10 @@
$self->{rule_errors}++;
}
else {
+ my $method = '_head_tests_'.$clean_priority;
+ push @TEMPORARY_METHODS, $method;
no strict "refs";
- &{'Mail::SpamAssassin::PerMsgStatus::_head_tests_'.$clean_priority}($self);
+ &{$method}($self);
use strict "refs";
}
}
@@ -2039,6 +2004,7 @@
$evalstr2 .= '
sub '.$rulename.'_body_test { my $self = shift; '.$sub.' }
';
+ push (@TEMPORARY_METHODS, $rulename.'_body_test');
}
$evalstr2 .= '
@@ -2080,8 +2046,10 @@
$self->{rule_errors}++;
}
else {
+ my $method = '_body_tests_'.$clean_priority;
+ push @TEMPORARY_METHODS, $method;
no strict "refs";
- &{'Mail::SpamAssassin::PerMsgStatus::_body_tests_'.$clean_priority}($self, @$textary);
+ &{$method}($self,@$textary);
use strict "refs";
}
}
@@ -2492,6 +2460,7 @@
$evalstr2 .= '
sub '.$rulename.'_uri_test { my $self = shift; '.$sub.' }
';
+ push (@TEMPORARY_METHODS, $rulename.'_uri_test');
}
}
@@ -2525,8 +2494,10 @@
$self->{rule_errors}++;
}
else {
+ my $method = '_body_uri_tests_'.$clean_priority;
+ push @TEMPORARY_METHODS, $method;
no strict "refs";
- &{'Mail::SpamAssassin::PerMsgStatus::_body_uri_tests_'.$clean_priority}($self, @uris);
+ &{$method}($self, @uris);
use strict "refs";
}
}
@@ -2617,6 +2588,7 @@
$evalstr2 .= '
sub '.$rulename.'_rawbody_test { my $self = shift; '.$sub.' }
';
+ push (@TEMPORARY_METHODS, $rulename.'_rawbody_test');
}
}
@@ -2650,8 +2622,10 @@
$self->{rule_errors}++;
}
else {
+ my $method = '_rawbody_tests_'.$clean_priority;
+ push @TEMPORARY_METHODS, $method;
no strict "refs";
- &{'Mail::SpamAssassin::PerMsgStatus::_rawbody_tests_'.$clean_priority}($self, @$textary);
+ &{$method}($self, @$textary);
use strict "refs";
}
}
@@ -2724,8 +2698,10 @@
warn "rules: failed to compile full tests, skipping:\n" . "\t($@)\n";
$self->{rule_errors}++;
} else {
+ my $method = '_full_tests_'.$clean_priority;
+ push @TEMPORARY_METHODS, $method;
no strict "refs";
- &{'Mail::SpamAssassin::PerMsgStatus::_full_tests_'.$clean_priority}($self, $fullmsgref);
+ &{$method}($self, $fullmsgref);
use strict "refs";
}
}
@@ -2939,8 +2915,10 @@
$self->{rule_errors}++;
}
else {
+ my $method = '_meta_tests_'.$clean_priority;
+ push @TEMPORARY_METHODS, $method;
no strict "refs";
- &{'Mail::SpamAssassin::PerMsgStatus::_meta_tests_'.$clean_priority}($self);
+ &{$method}($self);
use strict "refs";
}
} # do_meta_tests()
@@ -3112,13 +3090,12 @@
eval $evalstr;
- push (@TEMPORARY_METHODS, $methodname);
-
if ($@) {
warn "rules: failed to compile eval tests, skipping some: $@\n";
$self->{rule_errors}++;
}
else {
+ push (@TEMPORARY_METHODS, $methodname);
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::'.$methodname}($self,@extraevalargs);
use strict "refs";
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin.pm Wed Oct 25 09:15:31 2006
@@ -779,6 +779,30 @@
$self->{main}->{conf}->register_eval_rule ($self, $nameofsub);
}
+=item $plugin->register_generated_rule_method ($nameofsub)
+
+In certain circumstances, plugins may find it useful to compile
+perl functions from the ruleset, on the fly. It is important to
+remove these once the C<Mail::SpamAssassin> object is deleted,
+however, and this API allows this.
+
+Once the method C<$nameofsub> has been generated, call this API
+with the name of the method (including full package scope).
+This indicates that it's a temporary piece of generated code,
+built from the SpamAssassin ruleset, and when
+C<Mail::SpamAssassin::finish()> is called, the method will
+be destroyed.
+
+This API was added in SpamAssassin 3.2.0.
+
+=cut
+
+sub register_generated_rule_method {
+ my ($self, $nameofsub) = @_;
+ push @Mail::SpamAssassin::PerMsgStatus::TEMPORARY_METHODS,
+ $nameofsub;
+}
+
=item $plugin->inhibit_further_callbacks()
Tells the plugin handler to inhibit calling into other plugins in the plugin
@@ -885,6 +909,23 @@
The configuration file arguments will be passed in after the standard
arguments.
+
+=head1 BACKWARDS COMPATIBILITY
+
+Note that if you write a plugin and need to determine if a particular
+helper method is supported on C<Mail::SpamAssassin::Plugin>, you
+can do this:
+
+ if ($self->can("name_of_method")) {
+ eval {
+ $self->name_of_method(); # etc.
+ }
+ } else {
+ # take fallback action
+ }
+
+The same applies for the public APIs on objects of other types, such as
+C<Mail::SpamAssassin::PerMsgStatus>.
=head1 SEE ALSO
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyEval.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyEval.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyEval.pm Wed Oct 25 09:15:31 2006
@@ -39,43 +39,12 @@
bless ($self, $class);
# the important bit!
- $self->register_eval_rule("check_unique_words");
$self->register_eval_rule("multipart_alternative_difference");
$self->register_eval_rule("multipart_alternative_difference_count");
$self->register_eval_rule("check_blank_line_ratio");
$self->register_eval_rule("tvd_vertical_words");
return $self;
-}
-
-sub check_unique_words {
- my ($self, $pms, $body, $m, $b) = @_;
-
- if (!defined $pms->{unique_words_repeat}) {
- $pms->{unique_words_repeat} = 0;
- $pms->{unique_words_unique} = 0;
- my %count;
- for (@$body) {
- # copy to avoid changing @$body
- my $line = $_;
- # from tokenize_line in Bayes.pm
- $line =~ tr/-A-Za-z0-9,\@\*\!_'"\$.\241-\377 / /cs;
- $line =~ s/(\w)(\.{3,6})(\w)/$1 $2 $3/gs;
- $line =~ s/(\w)(\-{2,6})(\w)/$1 $2 $3/gs;
- $line =~ s/(?:^|\.\s+)([A-Z])([^A-Z]+)(?:\s|$)/ ' '.(lc $1).$2.' '/ge;
- for my $token (split(' ', $line)) {
- $count{$token}++;
- }
- }
- $pms->{unique_words_unique} = scalar grep { $_ == 1 } values(%count);
- $pms->{unique_words_repeat} = scalar keys(%count) - $pms->{unique_words_unique};
- }
-
- # y = mx+b where y is number of unique words needed
- my $unique = $pms->{unique_words_unique};
- my $repeat = $pms->{unique_words_repeat};
- my $y = ($unique + $repeat) * $m + $b;
- return ($unique > $y);
}
sub multipart_alternative_difference {
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm Wed Oct 25 09:15:31 2006
@@ -44,6 +44,7 @@
my $BASES_CAN_USE_ALTERNATIONS = 0; # /(foo|bar|baz)/
my $BASES_CAN_USE_QUANTIFIERS = 0; # /foo.*bar/ or /foo*bar/ or /foooo?bar/
my $BASES_CAN_USE_CHAR_CLASSES = 0; # /fo[opqr]bar/
+my $SPLIT_OUT_ALTERNATIONS = 1; # /(foo|bar|baz)/ => ["foo", "bar", "baz"]
###########################################################################
@@ -54,6 +55,7 @@
my $self = $class->SUPER::new($mailsaobject);
bless ($self, $class);
+ # $self->test();
return $self;
}
@@ -75,7 +77,10 @@
if ($rawf) {
$rawf =~ /^(.*)$/;
- $f = $1; # untaint; allow anything here, it's from %ENV and safe
+ $f = $1; # untaint; allow anything here, it's from %ENV and safe
+ }
+ else {
+ return; # TODO: comment this for Rabin-Karp
}
$self->extract_set($f, $conf, $conf->{body_tests}, 'body');
@@ -120,22 +125,50 @@
# TODO: need cleaner way to do this
next if ($conf->{rules_to_replace}->{$name});
- my $base = $self->extract_base($rule, 0);
- my $base2 = $self->extract_base($rule, 1);
-
- my $len = $base ? $self->count_regexp_statements($base) : 0;
- my $len2 = $base2 ? $self->count_regexp_statements($base2) : 0;
-
- if ($base2 && (!$base || ($len2 > $len))) {
- $base = $base2;
- $len = $len2;
+ my @bases1 = ();
+ my @bases2 = ();
+ eval { # catch die()s
+ @bases1 = $self->extract_hints($rule, 0);
+ };
+ eval {
+ @bases2 = $self->extract_hints($rule, 1);
+ };
+
+ # if any of the extracted hints in a set are too short, the entire
+ # set is invalid; this is because each set of N hints represents just
+ # 1 regexp.
+ my $minlen1;
+ foreach my $str (@bases1) {
+ my $len = length $str;
+ if ($len < $min_chars) { $minlen1 = undef; @bases1 = (); last; }
+ elsif (!defined($minlen1) || $len < $minlen1) { $minlen1 = $len; }
+ }
+ my $minlen2;
+ foreach my $str (@bases2) {
+ my $len = length $str;
+ if ($len < $min_chars) { $minlen2 = undef; @bases2 = (); last; }
+ elsif (!defined($minlen2) || $len < $minlen2) { $minlen2 = $len; }
}
- if (!$base || $len < $min_chars) { $base = undef; }
+ if (defined $minlen1 && !defined $minlen2) {
+ # keep using @bases1
+ }
+ elsif (!defined $minlen1 && defined $minlen2) {
+ # change to using @bases2
+ @bases1 = @bases2;
+ }
+ elsif (defined $minlen1 && defined $minlen2) {
+ # both are valid; use the end with the longer hints
+ if ($minlen2 > $minlen1) {
+ @bases1 = @bases2;
+ }
+ }
- if ($base) {
+ if ($minlen1 && @bases1) {
# dbg("zoom: YES <base>$base</base> <origrule>$rule</origrule>");
- push @good_bases, { base => $base, orig => $rule, name => $name };
+ foreach my $base (@bases1) {
+ push @good_bases, { base => $base, orig => $rule, name => $name };
+ }
$yes++;
}
else {
@@ -156,7 +189,7 @@
# returned as two hits, correctly. So we only have to be smart about the
# full-subsumption case; overlapping is taken care of for us, by re2c.
#
- # TODO: there's a bug here. Since the code in extract_base() has been
+ # TODO: there's a bug here. Since the code in extract_hints() has been
# modified to support more complex regexps, we can no longer simply assume
# that if pattern A is not contained in pattern B, that means that pattern B
# doesn't subsume it. Consider, for example, A="foo*bar" and
@@ -250,7 +283,7 @@
# /time to refinance|refinanc\w{1,3}\b.{0,16}\bnow\b/i
# => should understand alternations; tricky
-sub extract_base {
+sub extract_hints {
my $self = shift;
my $rule = shift;
my $is_reversed = shift;
@@ -289,8 +322,8 @@
$rule =~ s/\(\?i\)//gs;
}
else {
- return if $rule =~ /\(\?i\)/;
- return if $mods =~ /i/;
+ die "case-i" if $rule =~ /\(\?i\)/;
+ die "case-i" if $mods =~ /i/;
}
# remove /m and /s modifiers
@@ -313,8 +346,8 @@
# if there are anchors, give up; we can't get much
# faster than these anyway
- return if $rule =~ /^\(?(?:\^|\\A)/;
- return if $rule =~ /(?:\$|\\Z)\)?$/;
+ die "anchors" if $rule =~ /^\(?(?:\^|\\A)/;
+ die "anchors" if $rule =~ /(?:\$|\\Z)\)?$/;
# simplify (?:..) to (..)
$rule =~ s/\(\?:/\(/g;
@@ -350,7 +383,8 @@
.\{
).*$//gsx;
- $BASES_CAN_USE_ALTERNATIONS or $rule =~ s/(?<!\\)(?:
+ ($BASES_CAN_USE_ALTERNATIONS||$SPLIT_OUT_ALTERNATIONS) or
+ $rule =~ s/(?<!\\)(?:
\(|
\)
).*$//gsx;
@@ -417,7 +451,7 @@
[^\[]*\]
)/sx)
{
- return;
+ die "pattern starts with a class in a group";
}
# kill quantifiers right at the start of the string.
@@ -445,10 +479,10 @@
# return for things we know we can't handle.
- if (!$BASES_CAN_USE_ALTERNATIONS) {
+ if (!($BASES_CAN_USE_ALTERNATIONS||$SPLIT_OUT_ALTERNATIONS)) {
if ($rule =~ /\|/) {
# /time to refinance|refinanc\w{1,3}\b.{0,16}\bnow\b/i
- return;
+ die "alternations";
}
}
@@ -456,41 +490,50 @@
# count (...braces...) to ensure the numbers match up
my @c = ($rule =~ /(?<!\\)\(/g); my $brace_i = scalar @c;
@c = ($rule =~ /(?<!\\)\)/g); my $brace_o = scalar @c;
- if ($brace_i != $brace_o) { return; }
+ if ($brace_i != $brace_o) { die "brace mismatch"; }
}
# do the same for [charclasses]
{
my @c = ($rule =~ /(?<!\\)\[/g); my $brace_i = scalar @c;
@c = ($rule =~ /(?<!\\)\]/g); my $brace_o = scalar @c;
- if ($brace_i != $brace_o) { return; }
+ if ($brace_i != $brace_o) { die "charclass mismatch"; }
}
# and {quantifiers}
{
my @c = ($rule =~ /(?<!\\)\{/g); my $brace_i = scalar @c;
@c = ($rule =~ /(?<!\\)\}/g); my $brace_o = scalar @c;
- if ($brace_i != $brace_o) { return; }
+ if ($brace_i != $brace_o) { die "quantifier mismatch"; }
}
# lookaheads that are just too far for the re2c parser
# r your .{0,40}account .{0,40}security
if ($rule =~ /\.\{(\d+),?(\d+?)\}/ and ($1+$2 > 20)) {
- return;
+ die "too far lookahead";
}
# re2xs doesn't like escaped brackets
if ($rule =~ /\\:/) {
- return;
+ die "escaped bracket";
+ }
+
+ my @rules;
+ if ($SPLIT_OUT_ALTERNATIONS && $rule =~ /\|/) {
+ @rules = $self->split_alt($rule);
+ }
+ else {
+ @rules = ($rule);
}
- # finally, reassemble a usable regexp
+ # finally, reassemble a usable regexp / set of regexps
if ($mods ne '') {
$mods = "(?$mods)";
}
- $rule = $mods . $rule;
- return $rule;
+ return map {
+ $mods.$_;
+ } @rules;
}
sub count_regexp_statements {
@@ -536,5 +579,114 @@
return $rule;
}
+
+###########################################################################
+
+sub split_alt {
+ my ($self, $re) = @_;
+
+ # warn "JMD in $re";
+ # use "($re)" instead of "$re" to handle /foo|baz/ -- implied group
+ my @res = $self->_split_alt_recurse(0, '('.$re.')');
+ # warn "JMD out ".join('/ /', @res);
+ return @res;
+}
+
+sub _split_alt_recurse {
+ my ($self, $depth, $re) = @_;
+
+ $depth++;
+ "die recursed too far in alternation splitting" if ($depth > 5);
+
+ # trim unnecessary group markers, e.g. /f(oo)/ => /foo/
+ $re =~ s/\(([^\(\)\|]*)\)/$1/gs;
+
+ # identify the smallest nested (...|...) scope
+ $re =~ m{
+ ^(.*)
+ (?<!\\)\(([^\(\)]*?\|[^\(\)]*?)\)
+ (.*)$
+ }xs;
+
+ my $pre = $1;
+ my $alts = $2;
+ my $post = $3;
+
+ if (!defined $post) {
+ $re =~ s/\(([^\(\)\|]*)\)/$1/gs;
+ return ($re); # didn't match; no groups
+ }
+
+ # and expand it
+ my @out = ();
+ foreach my $str (split (/(?<!\\)\|/, $alts)) {
+ $str = $pre.$str.$post;
+ # are there unresolved groups left?
+ if ($str =~ /(?<!\\)[\(\|\)]/) {
+ push @out, $self->_split_alt_recurse($depth, $str);
+ } else {
+ push @out, $str;
+ }
+ }
+
+ { # uniq
+ my %u=(); @out = grep {defined} map {
+ if (exists $u{$_}) { undef; } else { $u{$_}=undef;$_; }
+ } @out; undef %u;
+ }
+
+ return @out;
+}
+
+###########################################################################
+
+sub test {
+ my ($self) = @_;
+
+ $self->test_split_alt("foo", "/foo/");
+ $self->test_split_alt("(foo)", "/foo/");
+ $self->test_split_alt("foo(bar)baz", "/foobarbaz/");
+ $self->test_split_alt("(foo|bar)", "/foo/ /bar/");
+ $self->test_split_alt("foo|bar", "/foo/ /bar/");
+ $self->test_split_alt("foo (bar|baz) argh", "/foo bar argh/ /foo baz argh/");
+ $self->test_split_alt("foo (bar|baz|bl(arg|at)) cough", "/foo bar cough/ /foo baz cough/ /foo blarg cough/ /foo blat cough/");
+ $self->test_split_alt("(s(otc|tco)k)", "/sotck/ /stcok/");
+ exit;
+}
+
+sub test_split_alt {
+ my ($self, $in, $out) = @_;
+
+ my @got = $self->split_alt($in);
+ $out =~ s/^\///;
+ $out =~ s/\/$//;
+ my @want = split(/\/ \//, $out);
+
+ my $failed = 0;
+ if (scalar @want != scalar @got) {
+ warn "FAIL: results count don't match";
+ $failed++;
+ }
+ else {
+ my %got = map { $_ => 1 } @got;
+ foreach my $w (@want) {
+ if (!$got{$w}) {
+ warn "FAIL: '$w' not found";
+ $failed++;
+ }
+ }
+ }
+
+ if ($failed) {
+ print "want: /".join('/ /', @want)."/\n";
+ print "got: /".join('/ /', @got)."/\n";
+ return 0;
+ } else {
+ print "ok\n";
+ return 1;
+ }
+}
+
+###########################################################################
1;
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HTMLEval.pm Wed Oct 25 09:15:31 2006
@@ -49,7 +49,6 @@
$self->register_eval_rule("html_text_not_match");
$self->register_eval_rule("html_range");
$self->register_eval_rule("check_iframe_src");
- $self->register_eval_rule("check_html_uri_only");
return $self;
}
@@ -197,24 +196,6 @@
}
return 0;
-}
-
-sub check_html_uri_only {
- my ($self, $pms) = @_;
-
- # Find out if there are any multipart/alternative parts in the message
- my @ma = $pms->{msg}->find_parts(qr@^multipart/alternative\b@i);
-
- # If there are no multipart/alternative sections, skip this test.
- return if (!@ma);
-
- # At this point, we're not actually checking the alternates, just the entire
- # message.
- foreach my $v ( values %{$pms->{html}->{uri_detail}} ) {
- return 0 if (exists $v->{types}->{parsed});
- }
-
- return 1;
}
1;
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/HeaderEval.pm Wed Oct 25 09:15:31 2006
@@ -50,7 +50,6 @@
$self->register_eval_rule("check_for_forged_eudoramail_received_headers");
$self->register_eval_rule("check_for_forged_yahoo_received_headers");
$self->register_eval_rule("check_for_forged_juno_received_headers");
- $self->register_eval_rule("check_for_from_to_same");
$self->register_eval_rule("check_for_matching_env_and_hdr_from");
$self->register_eval_rule("sorted_recipients");
$self->register_eval_rule("similar_recipients");
@@ -562,29 +561,6 @@
return 0;
}
-# From and To have same address, but are not exactly the same and
-# neither contains intermediate spaces.
-sub check_for_from_to_same {
- my ($self, $pms) = @_;
-
- my $hdr_from = $pms->get('From');
- my $hdr_to = $pms->get('To');
- return 0 if (!length($hdr_from) || !length($hdr_to) ||
- $hdr_from eq $hdr_to);
-
- my $addr_from = $pms->get('From:addr');
- my $addr_to = $pms->get('To:addr');
- # BUG: From:addr and To:addr sometimes contain whitespace
- $addr_from =~ s/\s+//g;
- $addr_to =~ s/\s+//g;
- return 0 if (!length($addr_from) || !length($addr_to) ||
- $addr_from ne $addr_to);
-
- if ($hdr_from =~ /^\s*\S+\s*$/ && $hdr_to =~ /^\s*\S+\s*$/) {
- return 1;
- }
-}
-
sub check_for_matching_env_and_hdr_from {
my ($self, $pms) =@_;
# two blank headers match so don't bother checking
@@ -852,10 +828,10 @@
foreach $rcvd (@local) {
if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) {
my $date = $1;
- dbg("eval: trying Received fetchmail header date for real time: $date");
+ dbg2("eval: trying Received fetchmail header date for real time: $date");
my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date);
if (defined($time) && (time() >= $time)) {
- dbg("eval: time_t from date=$time, rcvd=$date");
+ dbg2("eval: time_t from date=$time, rcvd=$date");
push @fetchmail_times, $time;
}
}
@@ -872,10 +848,10 @@
foreach $rcvd (@received) {
if ($rcvd =~ m/(\s.?\d+ \S\S\S \d+ \d+:\d+:\d+ \S+)/) {
my $date = $1;
- dbg("eval: trying Received header date for real time: $date");
+ dbg2("eval: trying Received header date for real time: $date");
my $time = Mail::SpamAssassin::Util::parse_rfc822_date($date);
if (defined($time)) {
- dbg("eval: time_t from date=$time, rcvd=$date");
+ dbg2("eval: time_t from date=$time, rcvd=$date");
push @header_times, $time;
}
}
@@ -1127,6 +1103,15 @@
}
return 0;
+}
+
+###########################################################################
+
+# support eval-test verbose debugs using "-Deval"
+sub dbg2 {
+ if (would_log('dbg', 'eval') == 2) {
+ dbg(@_);
+ }
}
1;
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEEval.pm Wed Oct 25 09:15:31 2006
@@ -159,10 +159,6 @@
$pms->{mime_base64_encoded_text} = 1;
}
- if ($cte =~ /base64/ && !$name) {
- $pms->{mime_base64_no_name} = 1;
- }
-
if ($charset =~ /iso-\S+-\S+\b/i &&
$charset !~ /iso-(?:8859-\d{1,2}|2022-(?:jp|kr))\b/)
{
@@ -231,7 +227,6 @@
$pms->{mime_base64_encoded_text} = 0;
# $pms->{mime_base64_illegal} = 0;
# $pms->{mime_base64_latin} = 0;
- $pms->{mime_base64_no_name} = 0;
$pms->{mime_body_html_count} = 0;
$pms->{mime_body_text_count} = 0;
$pms->{mime_faraway_charset} = 0;
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm Wed Oct 25 09:15:31 2006
@@ -46,8 +46,10 @@
Header names are considered case-insensitive.
-The header values are normally cleaned up a little. Append C<:raw> to the
-header name to retrieve the raw, undecoded value instead.
+The header values are normally cleaned up a little; for example, whitespace
+around the newline character in "folded" headers will be replaced with a single
+space. Append C<:raw> to the header name to retrieve the raw, undecoded value,
+including pristine whitespace, instead.
=back
@@ -129,6 +131,7 @@
$self->{parser}->add_test($rulename, $evalfn."()",
$Mail::SpamAssassin::Conf::TYPE_BODY_EVALS);
+
my $evalcode = '
sub Mail::SpamAssassin::Plugin::MIMEHeader::'.$evalfn.' {
$_[0]->eval_hook_called($_[1], q{'.$rulename.'});
@@ -142,6 +145,9 @@
}
$pluginobj->register_eval_rule($evalfn);
+
+ $pluginobj->register_generated_rule_method(
+ 'Mail::SpamAssassin::Plugin::MIMEHeader::'.$evalfn);
}
});
@@ -168,7 +174,12 @@
}
foreach my $p ($scanner->{msg}->find_parts(qr/./)) {
- my $val = $p->get_header($hdr, $getraw);
+ my $val;
+ if ($getraw) {
+ $val = $p->raw_header($hdr);
+ } else {
+ $val = $p->get_header($hdr);
+ }
$val ||= $if_unset;
if ($val =~ ${pattern}) {
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RelayEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RelayEval.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RelayEval.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/RelayEval.pm Wed Oct 25 09:15:31 2006
@@ -48,11 +48,9 @@
$self->register_eval_rule("check_for_sender_no_reverse");
$self->register_eval_rule("check_for_from_domain_in_received_headers");
$self->register_eval_rule("check_for_forged_received_trail");
- $self->register_eval_rule("check_for_forged_received_helo");
$self->register_eval_rule("check_for_forged_received_ip_helo");
$self->register_eval_rule("helo_ip_mismatch");
$self->register_eval_rule("check_for_no_rdns_dotcom_helo");
- $self->register_eval_rule("message_id_from_mta");
return $self;
}
@@ -162,7 +160,7 @@
if ($pms->is_dns_available()) {
my $vrdns = $pms->lookup_ptr ($relay->{ip});
if (defined $vrdns && $vrdns ne $claimed) {
- dbg("eval: rdns/helo mismatch: helo=$relay->{helo} ".
+ dbg2("eval: rdns/helo mismatch: helo=$relay->{helo} ".
"claimed-rdns=$claimed true-rdns=$vrdns");
return 1;
# TODO: instead, we should set a flag and check it later for
@@ -182,7 +180,7 @@
}
# otherwise there *is* a mismatch
- dbg("eval: rdns/helo mismatch: helo=$relay->{helo} rdns=$claimed");
+ dbg2("eval: rdns/helo mismatch: helo=$relay->{helo} rdns=$claimed");
return 1;
}
}
@@ -351,28 +349,13 @@
# ok, let's catch the case where there's *no* reverse DNS there either
if ($no_rdns) {
- dbg("eval: Received: no rDNS for dotcom HELO: from=$from_host HELO=$helo_host");
+ dbg2("eval: Received: no rDNS for dotcom HELO: from=$from_host HELO=$helo_host");
$pms->{no_rdns_dotcom_helo} = 1;
}
}
}
} # _check_received_helos()
-# Message-ID for untrusted message was added by a trusted relay
-sub message_id_from_mta {
- my ($self, $pms) = @_;
-
- my $id = $pms->get('MESSAGEID');
-
- if ($id && $pms->{num_relays_untrusted} > 0) {
- for my $rcvd (@{$pms->{relays_untrusted}}[0], @{$pms->{relays_trusted}})
- {
- return 1 if $rcvd->{id} && (index(lc($id), lc($rcvd->{id})) != -1);
- }
- }
- return 0;
-}
-
# FORGED_RCVD_TRAIL
sub check_for_forged_received_trail {
my ($self, $pms) = @_;
@@ -380,13 +363,6 @@
return ($pms->{mismatch_from} > 1);
}
-# FORGED_RCVD_HELO
-sub check_for_forged_received_helo {
- my ($self, $pms) = @_;
- $self->_check_for_forged_received($pms) unless exists $pms->{mismatch_helo};
- return ($pms->{mismatch_helo} > 0);
-}
-
# FORGED_RCVD_IP_HELO
sub check_for_forged_received_ip_helo {
my ($self, $pms) = @_;
@@ -398,7 +374,6 @@
my ($self, $pms) = @_;
$pms->{mismatch_from} = 0;
- $pms->{mismatch_helo} = 0;
$pms->{mismatch_ip_helo} = 0;
my $IP_PRIVATE = IP_PRIVATE;
@@ -433,7 +408,7 @@
my $hlo = $helo[$i];
my $by = $by[$i];
- dbg("eval: forged-HELO: from=".(defined $frm ? $frm : "(undef)").
+ dbg2("eval: forged-HELO: from=".(defined $frm ? $frm : "(undef)").
" helo=".(defined $hlo ? $hlo : "(undef)").
" by=".(defined $by ? $by : "(undef)"));
@@ -442,15 +417,6 @@
next unless ($by =~ /^\w+(?:[\w.-]+\.)+\w+$/);
- if (defined($hlo) && defined($frm)
- && $hlo =~ /^\w+(?:[\w.-]+\.)+\w+$/
- && $frm =~ /^\w+(?:[\w.-]+\.)+\w+$/
- && $frm ne $hlo && !helo_forgery_whitelisted($frm, $hlo))
- {
- dbg("eval: forged-HELO: mismatch on HELO: '$hlo' != '$frm'");
- $pms->{mismatch_helo}++;
- }
-
my $fip = $fromip[$i];
if (defined($hlo) && defined($fip)) {
@@ -466,7 +432,7 @@
$hclassb ne $fclassb &&
!($hlo =~ /$IP_PRIVATE/o))
{
- dbg("eval: forged-HELO: massive mismatch on IP-addr HELO: '$hlo' != '$fip'");
+ dbg2("eval: forged-HELO: massive mismatch on IP-addr HELO: '$hlo' != '$fip'");
$pms->{mismatch_ip_helo}++;
}
}
@@ -477,9 +443,18 @@
&& $prev =~ /^\w+(?:[\w.-]+\.)+\w+$/
&& $by ne $prev && !helo_forgery_whitelisted($by, $prev))
{
- dbg("eval: forged-HELO: mismatch on from: '$prev' != '$by'");
+ dbg2("eval: forged-HELO: mismatch on from: '$prev' != '$by'");
$pms->{mismatch_from}++;
}
+ }
+}
+
+###########################################################################
+
+# support eval-test verbose debugs using "-Deval"
+sub dbg2 {
+ if (would_log('dbg', 'eval') == 2) {
+ dbg(@_);
}
}
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/URIEval.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/URIEval.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/URIEval.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/URIEval.pm Wed Oct 25 09:15:31 2006
@@ -38,7 +38,6 @@
bless ($self, $class);
# the important bit!
- $self->register_eval_rule("check_domain_ratio");
$self->register_eval_rule("check_for_http_redirector");
$self->register_eval_rule("check_https_ip_mismatch");
@@ -46,16 +45,6 @@
}
###########################################################################
-
-sub check_domain_ratio {
- my ($self, $pms, $body, $ratio) = @_;
- my $length = (length(join('', @{$body})) || 1);
- if (!defined $pms->{uri_domain_count}) {
- $pms->get_uri_list();
- }
- return 0 if !defined $pms->{uri_domain_count};
- return (($pms->{uri_domain_count} / $length) > $ratio);
-}
sub check_for_http_redirector {
my ($self, $pms) = @_;
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/PluginHandler.pm Wed Oct 25 09:15:31 2006
@@ -138,7 +138,7 @@
my ($self, $plugin) = @_;
$plugin->{main} = $self->{main};
push (@{$self->{plugins}}, $plugin);
- dbg("plugin: registered $plugin");
+ # dbg("plugin: registered $plugin");
# invalidate cache entries for any configuration-time hooks, in case
# one has already been built; this plugin may implement that hook!
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/SpamdForkScaling.pm Wed Oct 25 09:15:31 2006
@@ -426,7 +426,13 @@
dbg("prefork: ordered $kid to accept");
# now wait for it to say it's done that
- return $self->wait_for_child_to_accept($sock);
+ my $ret = $self->wait_for_child_to_accept($kid, $sock);
+ if ($ret) {
+ return $ret;
+ } else {
+ # retry with another child
+ return $self->order_idle_child_to_accept();
+ }
}
else {
@@ -436,10 +442,11 @@
}
sub wait_for_child_to_accept {
- my ($self, $sock) = @_;
+ my ($self, $kid, $sock) = @_;
while (1) {
my $state = $self->read_one_message_from_child_socket($sock);
+
if ($state == PFSTATE_BUSY) {
return 1; # 1 == success
}
@@ -447,7 +454,12 @@
return undef;
}
else {
- die "prefork: ordered child to accept, but child reported state '$state'";
+ warn "prefork: ordered child $kid to accept, but they reported state '$state', killing rogue";
+ $self->child_error_kill($kid, $sock);
+ $self->adapt_num_children();
+ sleep 1;
+
+ return undef;
}
}
}
Modified: spamassassin/branches/jm_re2c_hacks/lib/spamassassin-run.pod
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/spamassassin-run.pod?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/spamassassin-run.pod (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/spamassassin-run.pod Wed Oct 25 09:15:31 2006
@@ -44,6 +44,7 @@
Set user preferences file
--siteconfigpath=path Path for site configs
(def: /etc/mail/spamassassin)
+ --cf='config line' Additional line of configuration
-x, --nocreate-prefs Don't create user preferences file
-e, --exit-code Exit with a non-zero exit code if the
tested message was spam
@@ -237,6 +238,15 @@
Use the specified path for locating site-specific configuration files. Ignore
the default directories (usually C</etc/mail/spamassassin> or similar).
+
+=item B<--cf='config line'>
+
+Add additional lines of configuration directly from the command-line, parsed
+after the configuration files are read. Multiple B<--cf> arguments can be
+used, and each will be considered a separate line of configuration. For
+example:
+
+ spamassassin -t --cf="body NEWRULE /text/" --cf="score NEWRULE 3.0"
=item B<-p> I<prefs>, B<--prefspath>=I<prefs>, B<--prefs-file>=I<prefs>
Modified: spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/gen_info_xml
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/gen_info_xml?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/gen_info_xml (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/gen_info_xml Wed Oct 25 09:15:31 2006
@@ -4,6 +4,12 @@
# need this to ensure that 'svn log' will include ALL changes
my $svn_checkins_root = "http://svn.apache.org/repos/asf/spamassassin/";
+# we won't provide who-checked-in and commit-message details for changes
+# older than this. Note, this is not in rev number terms; it's an
+# absolute count of revisions.
+# update: alternatively, since the zone's SVN is too old (!), use a date.
+# my $svn_log_limit = 500;
+
my $full_rebuild = 0;
if ($ARGV[0] && $ARGV[0] =~ /^-f/) {
$full_rebuild = 1;
@@ -158,8 +164,13 @@
}
sub get_svn_log {
- print "getting svn log...\n";
- if (open (IN, "svn log --xml $svn_checkins_root |")) {
+ print "getting svn log... (".time.")\n";
+
+ my $limitdate = strftime ("%Y-%m-%d", localtime time-(24*60*60*30*12));
+
+ if (open (IN, "svn log -r 'HEAD:{$limitdate}' --xml $svn_checkins_root |"))
+ # if (open (IN, "svn log --limit $svn_log_limit --xml $svn_checkins_root |"))
+ {
eval {
my $xml = join('', <IN>);
$svn_log = XMLin($xml);
@@ -172,7 +183,7 @@
if (!$svn_log) {
die "no svn log --xml";
}
- print "got ".(scalar @{$svn_log->{logentry}})." log entries\n";
+ print "got ".(scalar @{$svn_log->{logentry}})." log entries (".time.")\n";
# use Data::Dumper; print Dumper($svn_log); die;
}
Modified: spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi Wed Oct 25 09:15:31 2006
@@ -199,6 +199,8 @@
# turn possibly-empty $self->{daterev} into a real date/rev combo (that exists)
$self->{daterev} = $self->date_in_direction($self->{daterev}, 0);
+
+ $self->{daterev_md} = $self->get_daterev_metadata($self->{daterev});
}
# ---------------------------------------------------------------------------
@@ -980,6 +982,8 @@
my $srcpath = $self->{srcpath};
my $mtime = $self->{mtime};
+ my $no_net_rules = (!$self->{daterev_md}->{includes_net});
+
if ($srcpath || $mtime) {
my $rev = $self->get_rev_for_daterev($self->{daterev});
my $md = $self->get_rule_metadata($rev);
@@ -1003,6 +1007,13 @@
($md->{$_}->{srcmtime} >= $target);
} @rules;
}
+
+ if ($no_net_rules) { # bug 5047
+ @rules = grep {
+ !$md->{$_}->{tf} or
+ ($md->{$_}->{tf} !~ /\bnet\b/);
+ } @rules;
+ }
}
if ($self->{include_embedded_freqs_xml} == 0) {
@@ -1121,7 +1132,7 @@
}
my $outof = ($isspam ? $obj->{nspam} : $obj->{nham});
- my $count = int (($percent/100.0) * $outof);
+ my $count = int ((($percent/100.0) * $outof) + 0.99); # round up
return qq{
$count\ of\ $outof\ messages
};
Modified: spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-hourly
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-hourly?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-hourly (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-hourly Wed Oct 25 09:15:31 2006
@@ -356,6 +356,11 @@
$flags .= " -c '$opt{rules_dir}'";
}
+ # are we analyzing --net mass-check logs? if so, use scoreset 1
+ if (join(" ", @ham) =~ /-net-/) {
+ $flags .= " -s 1" if $class eq "NET";
+ }
+
if ($age eq "all") {
my %spam;
my %ham;
Modified: spamassassin/branches/jm_re2c_hacks/rules/20_dnsbl_tests.cf
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/rules/20_dnsbl_tests.cf?view=diff&rev=467701&r1=467700&r2=467701
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/rules/20_dnsbl_tests.cf (original)
+++ spamassassin/branches/jm_re2c_hacks/rules/20_dnsbl_tests.cf Wed Oct 25 09:15:31 2006
@@ -172,9 +172,9 @@
tflags DNS_FROM_RFC_POST net
#reuse DNS_FROM_RFC_POST
-header DNS_FROM_RFC_ABUSE eval:check_rbl_sub('rfci_envfrom', '127.0.0.4')
-describe DNS_FROM_RFC_ABUSE Envelope sender in abuse.rfc-ignorant.org
-tflags DNS_FROM_RFC_ABUSE net
+header DNS_FROM_RFC_ABUSE eval:check_rbl_sub('rfci_envfrom', '127.0.0.4')
+describe DNS_FROM_RFC_ABUSE Envelope sender in whois.rfc-ignorant.org
+tflags DNS_FROM_RFC_ABUSE net
#reuse DNS_FROM_RFC_ABUSE
header DNS_FROM_RFC_WHOIS eval:check_rbl_sub('rfci_envfrom', '127.0.0.5')