You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2006/11/24 18:29:43 UTC
svn commit: r478926 [1/2] - in /spamassassin/branches/jm_re2c_hacks: ./
build/ build/automc/ build/buildbot/ build/mkupdates/ lib/Mail/
lib/Mail/SpamAssassin/ lib/Mail/SpamAssassin/Plugin/ masses/
masses/rule-qa/ masses/rule-qa/automc/ rules/ t/
Author: jm
Date: Fri Nov 24 09:29:40 2006
New Revision: 478926
URL: http://svn.apache.org/viewvc?view=rev&rev=478926
Log:
merged to svn trunk's head with: 'svn merge -r475397:478919 https://svn.apache.org/repos/asf/spamassassin/trunk'
Added:
spamassassin/branches/jm_re2c_hacks/build/parse-rules-for-masses
- copied unchanged from r478919, spamassassin/trunk/build/parse-rules-for-masses
spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.css
- copied unchanged from r478919, spamassassin/trunk/masses/rule-qa/automc/ruleqa.css
Removed:
spamassassin/branches/jm_re2c_hacks/masses/parse-rules-for-masses
Modified:
spamassassin/branches/jm_re2c_hacks/MANIFEST
spamassassin/branches/jm_re2c_hacks/MANIFEST.SKIP
spamassassin/branches/jm_re2c_hacks/README
spamassassin/branches/jm_re2c_hacks/build/automc/etc-apache-local-conf-httpd.conf
spamassassin/branches/jm_re2c_hacks/build/buildbot/master.cfg
spamassassin/branches/jm_re2c_hacks/build/mkupdates/run_part2
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/Check.pm
spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/P595Body.pm
spamassassin/branches/jm_re2c_hacks/masses/Makefile
spamassassin/branches/jm_re2c_hacks/masses/find-extremes
spamassassin/branches/jm_re2c_hacks/masses/generate-translation
spamassassin/branches/jm_re2c_hacks/masses/hit-frequencies
spamassassin/branches/jm_re2c_hacks/masses/logs-to-c
spamassassin/branches/jm_re2c_hacks/masses/mass-check
spamassassin/branches/jm_re2c_hacks/masses/mk-roc-graphs
spamassassin/branches/jm_re2c_hacks/masses/rewrite-cf-with-new-scores
spamassassin/branches/jm_re2c_hacks/masses/rule-qa/automc/ruleqa.cgi
spamassassin/branches/jm_re2c_hacks/masses/rule-qa/corpus-nightly
spamassassin/branches/jm_re2c_hacks/masses/score-ranges-from-freqs
spamassassin/branches/jm_re2c_hacks/rules/20_head_tests.cf
spamassassin/branches/jm_re2c_hacks/rules/20_ratware.cf
spamassassin/branches/jm_re2c_hacks/rules/50_scores.cf
spamassassin/branches/jm_re2c_hacks/rules/active.list
spamassassin/branches/jm_re2c_hacks/rules/v320.pre
spamassassin/branches/jm_re2c_hacks/sa-compile.raw
spamassassin/branches/jm_re2c_hacks/sa-learn.raw
spamassassin/branches/jm_re2c_hacks/sa-update.raw
spamassassin/branches/jm_re2c_hacks/t/meta.t
Modified: spamassassin/branches/jm_re2c_hacks/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/MANIFEST?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/MANIFEST (original)
+++ spamassassin/branches/jm_re2c_hacks/MANIFEST Fri Nov 24 09:29:40 2006
@@ -16,12 +16,10 @@
build/convert_pods_to_doc
build/get_version
build/md5sum.pl
+build/mkrules
build/preprocessor
build/sha1sum.pl
-contrib/check_spamd
-contrib/mbox-to-check
-contrib/run-corpora
-contrib/run-masses
+build/parse-rules-for-masses
ldap/README
ldap/README.testing
ldap/sa_test.ldif
@@ -112,76 +110,6 @@
lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm
lib/Mail/SpamAssassin/Util/TieOneStringHash.pm
lib/spamassassin-run.pod
-masses/CORPUS_POLICY
-masses/CORPUS_SUBMIT
-masses/CORPUS_SUBMIT_NIGHTLY
-masses/Makefile
-masses/README
-masses/README.perceptron
-masses/compare-models
-masses/config
-masses/config.set0
-masses/config.set1
-masses/config.set2
-masses/config.set3
-masses/corpora/README
-masses/corpora/fuzzy-hash-maildir
-masses/corpora/mass-find-nonspam
-masses/corpora/remove-tests-from-logs
-masses/corpora/uniq-mailbox
-masses/corpora/uniq-maildirs
-masses/corpora/mk-corpus-link-farm
-masses/cpucount
-masses/evolve_metarule/README
-masses/evolve_metarule/evolve_metarule.c
-masses/evolve_metarule/preproc.pl
-masses/extract-message-from-mbox
-masses/extract-results
-masses/find-extremes
-masses/fp-fn-statistics
-masses/fp-fn-to-tcr
-masses/freqdiff
-masses/generate-corpus
-masses/generate-translation
-masses/graphs/gen-score-freqs-gnuplot-table
-masses/graphs/gnuplot-score-graph
-masses/hit-frequencies
-masses/lint-rules-from-freqs
-masses/logdiff
-masses/logs-to-c
-masses/mass-check
-masses/mass-check.cf
-masses/mboxget
-masses/mk-baseline-results
-masses/mk-roc-graphs
-masses/model-statistics
-masses/overlap
-masses/parse-rules-for-masses
-masses/perceptron.c
-masses/plugins/01_rule_timing.cf
-masses/plugins/HitFreqsRuleTiming.pm
-masses/post-ga-analysis.pl
-masses/remove-ids-from-mclog
-masses/rewrite-cf-with-new-scores
-masses/rule-dev/maildir-scan-headers
-masses/rule-qa/README.nightly
-masses/rule-qa/corpus-hourly
-masses/rule-qa/corpus-nightly
-masses/rule-qa/corpus-tagtime
-masses/rule-qa/corpus.example
-masses/rule-qa/get-rulemetadata-for-revision
-masses/rule-qa/markup-rules-file-with-freqs
-masses/rule-qa/post-bugs-for-retired-tests
-masses/rule-qa/rule-hits-over-time
-masses/runGA
-masses/score-ranges-from-freqs
-masses/tenpass/10pass-compute-tcr
-masses/tenpass/10pass-run
-masses/tenpass/README
-masses/tenpass/compute-current-tcr
-masses/tenpass/split-log-into-buckets
-masses/tenpass/split-log-into-buckets-random
-masses/validate-model
procmailrc.example
sa-learn.raw
sa-update.raw
@@ -455,17 +383,6 @@
t/whitelist_subject.t
t/whitelist_to.t
t/zz_cleanup.t
-tools/README.speedtest
-tools/check_whitelist
-tools/convert_awl_dbm_to_sql
-tools/desc_length.pl
-tools/mboxsplit
-tools/sa-stats.pl
-tools/speedtest
-tools/split_corpora
-tools/sysreport
-tools/test_extract
-build/mkrules
rules/10_default_prefs.cf
rules/20_dnsbl_tests.cf
rules/20_head_tests.cf
Modified: spamassassin/branches/jm_re2c_hacks/MANIFEST.SKIP
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/MANIFEST.SKIP?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/MANIFEST.SKIP (original)
+++ spamassassin/branches/jm_re2c_hacks/MANIFEST.SKIP Fri Nov 24 09:29:40 2006
@@ -34,41 +34,10 @@
^debian/
^doc/
^lib/Mail/SpamAssassin/Plugin/NetCache\.pm$
+^lib/Mail/SpamAssassin/Util/MemoryDump\.pm$
^lm/
^made-doc-stamp$
^Mail-SpamAssassin-.*$
-^masses/analysis$
-^masses/badrules$
-^masses/bayes-testing/
-^masses/commands.sh$
-^masses/copy-logs-to-deimos$
-^masses/download-trapped-spam$
-^masses/dprof.nonspam$
-^masses/dprof.spam$
-^masses/evolve$
-^masses/freqs$
-^masses/galib245$
-^masses/goodresults$
-^masses/local-scripts-.*$
-^masses/logs$
-^masses/nonspam.dogma$
-^masses/nonspam.local$
-^masses/nonspam\..*$
-^masses/old-random-search$
-^masses/overnight.*$
-^masses/perceptron$
-^masses/pgapack$
-^masses/results?\..*$
-^masses/RUNME$
-^masses/spam.dogma$
-^masses/spam.local$
-^masses/spamtrap$
-^masses/spam\..*$
-^masses/start_evolving.sh$
-^masses/stop_evolving.sh$
-^masses/tmon.nonspam$
-^masses/tmon.spam$
-^masses/uniq-scores$
^pm_to_blib$
^pod2html?-?.*$
^presentation$
@@ -101,15 +70,13 @@
^todo$
^wordfreqs/
~$
-^masses/tmp/
-^masses/spamassassin/
+^masses/
+^contrib/
+^tools/
^pod2ref
-^masses/rule-qa/automc/
^sa-update$
^sa-compile$
^build/describe-to-po-file$
-^masses/enable-all-evolved-rules$
-^masses/evolve_metarule/Makefile$
^rules/70_sandbox.cf$
^build/automc/
^rulesrc/.*$
Modified: spamassassin/branches/jm_re2c_hacks/README
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/README?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/README (original)
+++ spamassassin/branches/jm_re2c_hacks/README Fri Nov 24 09:29:40 2006
@@ -216,6 +216,22 @@
'/usr/share/spamassassin/user_prefs.template'
+In addition, the "Distributed Configuration Files" location is overridden
+by a "Local State Directory", used to store an updated copy of the
+ruleset:
+
+ __prefix__ __local_state_dir__
+ -------------------------------------------------------------------------
+ /usr /var/lib/spamassassin/__version__
+ /usr/local /var/lib/spamassassin/__version__
+ /opt/$DIR /var/opt/spamassassin/__version__
+ $DIR $DIR/var/spamassassin/__version__
+
+This is normally written to by the "sa-update" script. "__version__" is
+replaced by a representation of the version number, so that multiple
+versions of SpamAssassin will not interfere with each other's rulesets.
+
+
After installation, try "perldoc Mail::SpamAssassin::Conf" to see what
can be set. Common first-time tweaks include:
Modified: spamassassin/branches/jm_re2c_hacks/build/automc/etc-apache-local-conf-httpd.conf
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/build/automc/etc-apache-local-conf-httpd.conf?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/build/automc/etc-apache-local-conf-httpd.conf (original)
+++ spamassassin/branches/jm_re2c_hacks/build/automc/etc-apache-local-conf-httpd.conf Fri Nov 24 09:29:40 2006
@@ -108,6 +108,8 @@
ServerAdmin webmaster@spamassassin.org
userdir disabled
+ Alias /ruleqa.css /home/automc/svn/spamassassin/masses/rule-qa/automc/ruleqa.css
+
# debugging:
# RewriteLogLevel 9
# RewriteLog /var/apache2/logs/rewrite_log
@@ -120,6 +122,8 @@
# index: straight through
RewriteRule ^/$ /ruleqa.cgi [PT,L]
+
+ RewriteRule ^/ruleqa.css$ /ruleqa.css [PT,L]
# satisfy local files
RewriteCond /var/www/ruleqa.spamassassin.org/%{REQUEST_FILENAME} !-d
Modified: spamassassin/branches/jm_re2c_hacks/build/buildbot/master.cfg
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/build/buildbot/master.cfg?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/build/buildbot/master.cfg (original)
+++ spamassassin/branches/jm_re2c_hacks/build/buildbot/master.cfg Fri Nov 24 09:29:40 2006
@@ -277,9 +277,9 @@
mode="problem",
sendToInterestedUsers=True))
-from buildbot.status import words
-c['status'].append(words.IRC(host="irc.us.freenode.net", nick="buildbot-bot",
- channels=["#spamassassin"]))
+# from buildbot.status import words
+# c['status'].append(words.IRC(host="irc.us.freenode.net", nick="buildbot-bot",
+ # channels=["#spamassassin"]))
# from buildbot.status import words
# c['status'].append(words.IRC(host="irc.example.com", nick="bb",
Modified: spamassassin/branches/jm_re2c_hacks/build/mkupdates/run_part2
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/build/mkupdates/run_part2?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/build/mkupdates/run_part2 (original)
+++ spamassassin/branches/jm_re2c_hacks/build/mkupdates/run_part2 Fri Nov 24 09:29:40 2006
@@ -81,22 +81,16 @@
make install || exit $?
-presdir=$tmpdir/etc/mail/spamassassin
rulesdir=$tmpdir/share/spamassassin
(
cd $rulesdir
- # include ".pre" files for the default distro plugins, like
- # Mail::SpamAssassin::Plugin::BodyEval, Mail::SpamAssassin::Plugin::Bayes
- # etc. (bug 5171)
- cp -p $presdir/*.pre .
-
# Use this to include plugin .pm files:
- # tar cvf - *.cf *.pm *.pre || exit $?
+ # tar cvf - *.cf *.pm || exit $?
# or this, to ban code from the updates:
- tar cvf - *.cf *.pre || exit $?
+ tar cvf - *.cf || exit $?
) | gzip -9 > $tmpdir/update.tgz || exit $?
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin.pm?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin.pm Fri Nov 24 09:29:40 2006
@@ -236,6 +236,12 @@
If set to 1, no tests that require internet access will be performed. (default:
0)
+=item ignore_site_cf_files
+
+If set to 1, any rule files found in the C<site_rules_filename> directory will
+be ignored. *.pre files (used for loading plugins) found in the
+C<site_rules_filename> directory will still be used. (default: 0)
+
=item dont_copy_prefs
If set to 1, the user preferences file will not be created if it doesn't
@@ -1412,7 +1418,7 @@
$self->{languages_filename} = $self->find_rule_support_file("languages");
}
- if ($siterules) {
+ if ($siterules && !$self->{ignore_site_cf_files}) {
$self->{config_text} .= $self->read_cf($siterules, 'site rules dir');
}
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/ArchiveIterator.pm Fri Nov 24 09:29:40 2006
@@ -106,6 +106,13 @@
Only use the first N ham and N spam (or if the value is -N, only use the first
N total messages regardless of class).
+This setting can be specified separately for ham and spam target classes.
+If multiple targets for one class are specified with different
+options, the last target's options will be used.
+
+If the value is negative, and multiple targets are specified with different
+options, the last spam target's setting will be used.
+
=item opt_tail
Only use the last N ham and N spam (or if the value is -N, only use the last
@@ -115,17 +122,36 @@
specifies a subset of the C<opt_tail> selection to use; in other words, the
C<opt_tail> splice is applied first.
+This setting can be specified separately for ham and spam target classes.
+If multiple targets for one class are specified with different
+options, the last target's options will be used.
+
+If the value is negative, and multiple targets are specified with different
+options, the last spam target's setting will be used.
+
+=item opt_scanprob
+
+Randomly select messages to scan, with a probability of N, where N ranges
+from 0.0 (no messages scanned) to 1.0 (all messages scanned). Default
+is 1.0.
+
+This setting can be specified separately for each target.
+
=item opt_before
Only use messages which are received after the given time_t value.
Negative values are an offset from the current time, e.g. -86400 =
last 24 hours; or as parsed by Time::ParseDate (e.g. '-6 months')
+This setting can be specified separately for each target.
+
=item opt_after
Same as opt_before, except the messages are only used if after the given
time_t value.
+This setting can be specified separately for each target.
+
=item opt_want_date
Set to 1 (default) if you want the received date to be filled in
@@ -136,7 +162,8 @@
=item opt_cache
Set to 0 (default) if you don't want to use cached information to help speed
-up ArchiveIterator. Set to 1 to enable.
+up ArchiveIterator. Set to 1 to enable. This setting requires C<opt_cachedir>
+also be set.
=item opt_cachedir
@@ -183,11 +210,6 @@
if (!defined $self) { $self = { }; }
bless ($self, $class);
- $self->{opt_head} = 0 unless (defined $self->{opt_head});
- $self->{opt_tail} = 0 unless (defined $self->{opt_tail});
- $self->{opt_want_date} = 1 unless (defined $self->{opt_want_date});
- $self->{opt_cache} = 0 unless (defined $self->{opt_cache});
-
# If any of these options are set, we need to figure out the message's
# receive date at scan time. opt_n == 0, opt_after, opt_before
$self->{determine_receive_date} = !$self->{opt_n} ||
@@ -226,8 +248,13 @@
C<.bz2> will be properly uncompressed via call to C<gzip -dc> and C<bzip2 -dc>
respectively.
-The target_paths array is expected to be one element per path in the following
-format: class:format:raw_location
+The target_paths array is expected to be either one element per path in the
+following format: C<class:format:raw_location>, or a hash reference containing
+key-value option pairs and a 'target' key with a value in that format.
+
+The key-value option pairs that can be used are: opt_head, opt_tail,
+opt_scanprob, opt_after, opt_before. See the constructor method's
+documentation for more information on their effects.
run() returns 0 if there was an error (can't open a file, etc,) and 1 if there
were no errors.
@@ -419,15 +446,30 @@
############################################################################
+# TODO: this needs POD since mass-check uses it?
sub message_array {
my ($self, $targets) = @_;
+ my %class_opts = ();
+
foreach my $target (@${targets}) {
if (!defined $target) {
warn "archive-iterator: invalid (undef) value in target list";
next;
}
+ my %opts = ();
+ if (ref $target eq 'HASH') {
+ # e.g. { target => $target, opt_foo => 1, opt_bar => 0.4 ... }
+ foreach my $k (keys %{$target}) {
+ next unless ($k =~ /^opt_/);
+ my $v = $target->{$k};
+ next unless defined $v;
+ $opts{$k} = $v;
+ }
+ $target = $target->{target};
+ }
+
my ($class, $format, $rawloc) = split(/:/, $target, 3);
# "class"
@@ -444,6 +486,15 @@
# use ham by default, things like "spamassassin" can't specify the type
$class = substr($class, 0, 1) || 'h';
+ # keep a copy of the most recent message-selection options for
+ # each class
+ $class_opts{$class} = \%opts;
+
+ foreach my $k (keys %opts) {
+ $self->{$k} = $opts{$k};
+ }
+ $self->set_default_message_selection_opts();
+
my @locations = $self->fix_globs($rawloc);
foreach my $location (@locations) {
@@ -488,20 +539,13 @@
}
}
+ $self->top_and_tail_messages($self->{h}, $class_opts{h});
+ $self->top_and_tail_messages($self->{s}, $class_opts{s});
+
my $messages;
if ($self->{opt_n}) {
# OPT_N == 1 means don't bother sorting on message receive date
- # head or tail > 0 means crop each list
- if ($self->{opt_tail} > 0) {
- splice(@{$self->{s}}, 0, -$self->{opt_tail});
- splice(@{$self->{h}}, 0, -$self->{opt_tail});
- }
- if ($self->{opt_head} > 0) {
- splice(@{$self->{s}}, min ($self->{opt_head}, scalar @{$self->{s}}));
- splice(@{$self->{h}}, min ($self->{opt_head}, scalar @{$self->{h}}));
- }
-
# for ease of memory, we'll play with pointers
$messages = $self->{s};
undef $self->{s};
@@ -512,21 +556,11 @@
# OPT_N == 0 means sort on message receive date
# Sort the spam and ham groups by date
- my @s = sort { $a cmp $b } @{$self->{s}};
+ my @s = @{$self->{s}};
undef $self->{s};
- my @h = sort { $a cmp $b } @{$self->{h}};
+ my @h = @{$self->{h}};
undef $self->{h};
- # head or tail > 0 means crop each list
- if ($self->{opt_tail} > 0) {
- splice(@s, 0, -$self->{opt_tail});
- splice(@h, 0, -$self->{opt_tail});
- }
- if ($self->{opt_head} > 0) {
- splice(@s, min ($self->{opt_head}, scalar @s));
- splice(@h, min ($self->{opt_head}, scalar @h));
- }
-
# interleave ordered spam and ham
if (@s && @h) {
my $ratio = @s / @h;
@@ -569,6 +603,52 @@
return 1;
}
+sub set_default_message_selection_opts {
+ my ($self) = @_;
+ $self->{opt_head} = 0 unless (defined $self->{opt_head});
+ $self->{opt_tail} = 0 unless (defined $self->{opt_tail});
+ $self->{opt_scanprob} = 1.0 unless (defined $self->{opt_scanprob});
+ $self->{opt_want_date} = 1 unless (defined $self->{opt_want_date});
+ $self->{opt_cache} = 0 unless (defined $self->{opt_cache});
+}
+
+sub top_and_tail_messages {
+ my ($self, $ary, $opts) = @_;
+
+ foreach my $k (keys %{$opts}) {
+ $self->{$k} = $opts->{$k};
+ }
+ $self->set_default_message_selection_opts();
+
+ if ($self->{opt_n}) {
+ # OPT_N == 1 means don't bother sorting on message receive date
+
+ # head or tail > 0 means crop each list
+ if ($self->{opt_tail} > 0) {
+ splice(@{$ary}, 0, -$self->{opt_tail});
+ }
+ if ($self->{opt_head} > 0) {
+ splice(@{$ary}, min ($self->{opt_head}, scalar @{$ary}));
+ }
+ }
+ else {
+ # OPT_N == 0 means sort on message receive date
+
+ # Sort the spam and ham groups by date
+ my @s = sort { $a cmp $b } @{$ary};
+
+ # head or tail > 0 means crop each list
+ if ($self->{opt_tail} > 0) {
+ splice(@s, 0, -$self->{opt_tail});
+ }
+ if ($self->{opt_head} > 0) {
+ splice(@s, min ($self->{opt_head}, scalar @s));
+ }
+
+ @{$ary} = @s;
+ }
+}
+
############################################################################
sub message_is_useful_by_date {
@@ -607,6 +687,16 @@
}
}
+sub scanprob_says_scan {
+ my ($self) = @_;
+ if (defined $self->{opt_scanprob} && $self->{opt_scanprob} < 1.0) {
+ if ( int( rand( 1 / $self->{opt_scanprob} ) ) != 0 ) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
############################################################################
# 0 850852128 atime
@@ -694,6 +784,7 @@
}
return if !$self->message_is_useful_by_date($date);
+ return if !$self->scanprob_says_scan();
push(@{$self->{$class}}, index_pack($date, $class, "f", $mail));
}
@@ -791,6 +882,7 @@
if ($self->{determine_receive_date}) {
next if !$self->message_is_useful_by_date($v);
}
+ next if !$self->scanprob_says_scan();
push(@{$self->{$class}}, index_pack($v, $class, "m", "$file.$k"));
}
@@ -898,6 +990,7 @@
if ($self->{determine_receive_date}) {
next if !$self->message_is_useful_by_date($v);
}
+ next if !$self->scanprob_says_scan();
push(@{$self->{$class}}, index_pack($v, $class, "b", "$file.$k"));
}
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm Fri Nov 24 09:29:40 2006
@@ -34,6 +34,8 @@
use warnings;
use bytes;
+use re qw(regmust); # added in blead, 2006-11-16
+
use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
@@ -107,7 +109,7 @@
# may be a good long string of text at the end of the rule.
# require this many chars in a base string, for it to be viable
- my $min_chars = 4;
+ my $min_chars = 3;
foreach my $name (keys %{$rules}) {
my $rule = $rules->{$name};
@@ -116,16 +118,31 @@
# TODO: need cleaner way to do this
next if ($conf->{rules_to_replace}->{$name});
- my @bases1 = ();
- my @bases2 = ();
- eval { # catch die()s
- @bases1 = $self->extract_hints($rule, 0);
- };
- $@ and dbg("giving up on that direction: $@");
- eval {
- @bases2 = $self->extract_hints($rule, 1);
- };
- $@ and dbg("giving up on that direction: $@");
+ my ($qr, $mods) = $self->simplify_and_qr_regexp($rule);
+ my ($anchored, $floating) = regmust(qr/$qr/);
+ my @bases1 = (quotemeta $anchored);
+ my @bases2 = (quotemeta $floating);
+ # my @bases1 = ();
+ # my @bases2 = ();
+
+ my $len1 = 0;
+ my $len2 = 0;
+ if ($anchored) { $len1 = length($anchored); }
+ if ($floating) { $len2 = length($floating); }
+
+ # fall back to using our own code, since the regexp is too
+ # complex (probably alternations involved).
+ if ((!$anchored || $len1 < $min_chars) && (!$floating || $len2 < $min_chars))
+ {
+ eval { # catch die()s
+ @bases1 = $self->extract_hints($qr, $mods, 0);
+ };
+ $@ and dbg("giving up on that direction: $@");
+ eval { # catch die()s
+ @bases2 = $self->extract_hints($qr, $mods, 1);
+ };
+ $@ and dbg("giving up on that direction: $@");
+ }
# if any of the extracted hints in a set are too short, the entire
# set is invalid; this is because each set of N hints represents just
@@ -154,6 +171,7 @@
# both are valid; use the end with the longer hints
if ($minlen2 > $minlen1) {
@bases1 = @bases2;
+ $minlen1 = $minlen2;
}
}
@@ -256,13 +274,11 @@
# /time to refinance|refinanc\w{1,3}\b.{0,16}\bnow\b/i
# => should understand alternations; tricky
-sub extract_hints {
+sub simplify_and_qr_regexp {
my $self = shift;
my $rule = shift;
- my $is_reversed = shift;
my $main = $self->{main};
- my $orig = $rule;
$rule = Mail::SpamAssassin::Util::regexp_remove_delimiters($rule);
# remove the regexp modifiers, keep for later
@@ -320,6 +336,17 @@
# remove the "?=" trick
# (?=[dehklnswxy])(horny|nasty|hot|wild|young|....etc...)
$rule =~ s/\(\?\=\[[^\]]+\]\)//gs;
+ ($rule, $mods);
+}
+
+sub extract_hints {
+ my $self = shift;
+ my $rule = shift;
+ my $mods = shift;
+ my $is_reversed = shift;
+
+ my $main = $self->{main};
+ my $orig = $rule;
# if there are anchors, give up; we can't get much
# faster than these anyway
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/Check.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/Check.pm?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/Check.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/Check.pm Fri Nov 24 09:29:40 2006
@@ -40,6 +40,8 @@
return $self;
}
+###########################################################################
+
sub check_main {
my ($self, $args) = @_;
@@ -68,14 +70,17 @@
next unless ($pms->{conf}->{priorities}->{$priority} > 0);
# if shortcircuiting is hit, we skip all other priorities...
- last if $self->shortcircuited_p();
+ last if $self->{main}->call_plugins("have_shortcircuited", { permsgstatus => $pms });
dbg("check: running tests for priority: $priority");
# only harvest the dnsbl queries once priority HARVEST_DNSBL_PRIORITY
# has been reached and then only run once
- if ($priority >= HARVEST_DNSBL_PRIORITY && $needs_dnsbl_harvest_p
- && !$self->shortcircuited_p($pms)) {
+ if ($priority >= HARVEST_DNSBL_PRIORITY
+ && $needs_dnsbl_harvest_p
+ && !$self->{main}->call_plugins("have_shortcircuited",
+ { permsgstatus => $pms }))
+ {
# harvest the DNS results
$pms->harvest_dnsbl_queries();
$needs_dnsbl_harvest_p = 0;
@@ -91,7 +96,7 @@
$self->do_head_eval_tests($pms, $priority);
$self->do_body_tests($pms, $priority, $decoded);
- $self->do_body_uri_tests($pms, $priority, @uris);
+ $self->do_uri_tests($pms, $priority, @uris);
$self->do_body_eval_tests($pms, $priority, $decoded);
$self->do_rawbody_tests($pms, $priority, $bodytext);
@@ -110,7 +115,9 @@
# sanity check, it is possible that no rules >= HARVEST_DNSBL_PRIORITY ran so the harvest
# may not have run yet. Check, and if so, go ahead and harvest here.
if ($needs_dnsbl_harvest_p) {
- if (!$self->shortcircuited_p($pms)) {
+ if (!$self->{main}->call_plugins("have_shortcircuited",
+ { permsgstatus => $pms }))
+ {
# harvest the DNS results
$pms->harvest_dnsbl_queries();
}
@@ -143,6 +150,8 @@
@TEMPORARY_METHODS = (); # clear for next time
}
+###########################################################################
+
sub run_rbl_eval_tests {
my ($self, $pms) = @_;
my ($rulename, $pat, @args);
@@ -174,42 +183,125 @@
}
}
-sub do_meta_tests {
- my ($self, $pms, $priority) = @_;
-
- # XXX - why not just make the plugin call?
- return if $self->shortcircuited_p($pms);
+###########################################################################
- dbg("rules: running meta tests; score so far=" . $pms->{score} );
- my $conf = $pms->{conf};
+sub run_generic_tests {
+ my ($self, $pms, $priority, %opts) = @_;
+
+ return if $self->{main}->call_plugins("have_shortcircuited",
+ { permsgstatus => $pms });
- my $doing_user_rules =
- $conf->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_META_TESTS};
+ my $ruletype = $opts{type};
+ dbg("rules: running ".$ruletype." tests; score so far=".$pms->{score});
+ $pms->{test_log_msgs} = (); # clear test state
+
+ my $conf = $pms->{conf};
+ my $doing_user_rules = $conf->{user_rules_to_compile}->{$opts{consttype}};
# clean up priority value so it can be used in a subroutine name
my $clean_priority;
($clean_priority = $priority) =~ s/-/neg/;
-
my $package_name = __PACKAGE__;
+ my $methodname = $package_name."::_".$ruletype."_tests_".$clean_priority;
- # speedup code provided by Matt Sergeant
- if (defined &{"${package_name}::_meta_tests_${clean_priority}"}
- && !$doing_user_rules) {
+ if (defined &{$methodname} && !$doing_user_rules) {
no strict "refs";
- &{"${package_name}::_meta_tests_${clean_priority}"}($pms);
+run_compiled_method:
+ $methodname->($pms, @{$opts{args}});
use strict "refs";
return;
}
- my (%rule_deps, %meta, $rulename);
- my $evalstr = '';
+ # build up the eval string...
+ $self->{evalstr} = $self->start_rules_plugin_code($ruletype, $priority);
+ $self->{evalstr2} = '';
+
+ # use %nopts for named parameter-passing; it's more friendly to future-proof
+ # subclassing, since new parameters can be added without breaking third-party
+ # subclassed implementations of this plugin.
+ my %nopts = (
+ ruletype => $ruletype,
+ doing_user_rules => $doing_user_rules,
+ priority => $priority,
+ clean_priority => $clean_priority
+ );
+
+ if (defined $opts{pre_loop_body}) {
+ $opts{pre_loop_body}->($self, $pms, $conf, %nopts);
+ }
+ while (my($rulename, $test) = each %{$opts{testhash}->{$priority}}) {
+ $opts{loop_body}->($self, $pms, $conf, $rulename, $test, %nopts);
+ }
+ if (defined $opts{post_loop_body}) {
+ $opts{post_loop_body}->($self, $pms, $conf, %nopts);
+ }
+
+ # clear out a previous version of this fn
+ undef &{$methodname};
+ $self->free_ruleset_source($pms, $ruletype, $priority);
+
+ my $evalstr = $self->{evalstr};
+
+ # generate the loop that goes through each line...
+ $evalstr = <<"EOT";
+ {
+ package $package_name;
+
+ $self->{evalstr2}
+
+ sub $methodname {
+ my \$self = shift;
+ $evalstr;
+ }
+
+ 1;
+ }
+EOT
+
+ delete $self->{evalstr};
+ delete $self->{evalstr2}; # free up some RAM before we eval()
+
+ ## dbg ("rules: eval code to compile: $evalstr");
+ eval $evalstr;
+ if ($@) {
+ warn("rules: failed to compile $ruletype tests, skipping:\n\t($@)\n");
+ $pms->{rule_errors}++;
+ }
+ else {
+ goto run_compiled_method;
+ }
+}
+
+sub add_evalstr {
+ my ($self, $str) = @_;
+ $self->{evalstr} .= $str;
+}
+
+sub add_evalstr2 {
+ my ($self, $str) = @_;
+ $self->{evalstr2} .= $str;
+}
+
+sub add_temporary_method {
+ my ($self, $methodname, $methodbody) = @_;
+ $self->add_evalstr2 (' sub '.$methodname.' { '.$methodbody.' } ');
+ push (@TEMPORARY_METHODS, $methodname);
+}
- # Get the list of meta tests
- my @metas = keys %{$conf->{meta_tests}->{$priority}};
+###########################################################################
+
+sub do_meta_tests {
+ my ($self, $pms, $priority) = @_;
+ my (%rule_deps, %meta, $rulename);
- # Go through each rule and figure out what we need to do
- foreach $rulename (@metas) {
- my $rule = $conf->{meta_tests}->{$priority}->{$rulename};
+ $self->run_generic_tests ($pms, $priority,
+ consttype => $Mail::SpamAssassin::Conf::TYPE_META_TESTS,
+ type => 'meta',
+ testhash => $pms->{conf}->{meta_tests},
+ args => [ ],
+ loop_body => sub
+ {
+ my ($self, $pms, $conf, $rulename, $rule, %opts) = @_;
my $token;
# Lex the rule into tokens using a rather simple RE method ...
@@ -254,150 +346,97 @@
# If the token is another meta rule, add it as a dependency
push (@{ $rule_deps{$rulename} }, $token)
- if (exists $conf->{meta_tests}->{$priority}->{$token});
+ if (exists $conf->{meta_tests}->{$opts{priority}}->{$token});
}
}
- }
-
- # Sort by length of dependencies list. It's more likely we'll get
- # the dependencies worked out this way.
- @metas = sort { @{ $rule_deps{$a} } <=> @{ $rule_deps{$b} } } @metas;
-
- my $count;
- my $tflags = $conf->{tflags};
-
- # Now go ahead and setup the eval string
- do {
- $count = $#metas;
- my %metas = map { $_ => 1 } @metas; # keep a small cache for fast lookups
-
- # Go through each meta rule we haven't done yet
- for (my $i = 0 ; $i <= $#metas ; $i++) {
+ },
+ pre_loop_body => sub
+ {
+ my ($self, $pms, $conf, %opts) = @_;
+ $self->add_evalstr ('
+ my $r;
+ my $h = $self->{tests_already_hit};
+ ');
+ },
+ post_loop_body => sub
+ {
+ my ($self, $pms, $conf, %opts) = @_;
- # If we depend on meta rules that haven't run yet, skip it
- next if (grep( $metas{$_}, @{ $rule_deps{ $metas[$i] } }));
+ # Sort by length of dependencies list. It's more likely we'll get
+ # the dependencies worked out this way.
+ my @metas = sort { @{ $rule_deps{$a} } <=> @{ $rule_deps{$b} } }
+ keys %{$conf->{meta_tests}->{$opts{priority}}};
+
+ my $count;
+ my $tflags = $conf->{tflags};
+
+ # Now go ahead and setup the eval string
+ do {
+ $count = $#metas;
+ my %metas = map { $_ => 1 } @metas; # keep a small cache for fast lookups
+
+ # Go through each meta rule we haven't done yet
+ for (my $i = 0 ; $i <= $#metas ; $i++) {
+
+ # If we depend on meta rules that haven't run yet, skip it
+ next if (grep( $metas{$_}, @{ $rule_deps{ $metas[$i] } }));
+
+ # If we depend on network tests, call ensure_rules_are_complete()
+ # to block until they are
+ my $alldeps = join ' ', grep {
+ ($tflags->{$_}||'') =~ /\bnet\b/
+ } split (' ', $conf->{meta_dependencies}->{ $metas[$i] } );
+
+ if ($alldeps ne '') {
+ $self->add_evalstr ('
+ $self->ensure_rules_are_complete(q{'.$metas[$i].'}, qw{'.$alldeps.'});
+ ');
+ }
- # If we depend on network tests, call ensure_rules_are_complete()
- # to block until they are
- my $alldeps = join ' ', grep {
- ($tflags->{$_}||'') =~ /\bnet\b/
- } split (' ', $conf->{meta_dependencies}->{ $metas[$i] } );
+ # Add this meta rule to the eval line
+ $self->add_evalstr ('
+ $r = '.$meta{$metas[$i]}.';
+ if ($r) { $self->got_hit(q#'.$metas[$i].'#, "", ruletype => "meta", value => $r); }
+ ');
- if ($alldeps ne '') {
- $evalstr .= ' $pms->ensure_rules_are_complete(q{'.$metas[$i].'}, qw{'.$alldeps.'});';
+ splice @metas, $i--, 1; # remove this rule from our list
}
+ } while ($#metas != $count && $#metas > -1); # run until we can't go anymore
- # Add this meta rule to the eval line
- $evalstr .= '
- $r = '.$meta{$metas[$i]}.';
- if ($r) { $pms->got_hit(q#'.$metas[$i].'#, "", ruletype => "meta", value => $r); }
- ';
-
- splice @metas, $i--, 1; # remove this rule from our list
- }
- } while ($#metas != $count && $#metas > -1); # run until we can't go anymore
-
- # If there are any rules left, we can't solve the dependencies so complain
- my %metas = map { $_ => 1 } @metas; # keep a small cache for fast lookups
- foreach $rulename (@metas) {
- $pms->{rule_errors}++; # flag to --lint that there was an error ...
- my $msg =
- "rules: excluding meta test $rulename, unsolved meta dependencies: " .
- join(", ", grep($metas{$_}, @{ $rule_deps{$rulename} }));
- if ($self->{main}->{lint_rules}) {
- warn $msg."\n";
- }
- else {
- info($msg);
+ # If there are any rules left, we can't solve the dependencies so complain
+ my %metas = map { $_ => 1 } @metas; # keep a small cache for fast lookups
+ foreach $rulename (@metas) {
+ $pms->{rule_errors}++; # flag to --lint that there was an error ...
+ my $msg =
+ "rules: excluding meta test $rulename, unsolved meta dependencies: " .
+ join(", ", grep($metas{$_}, @{ $rule_deps{$rulename} }));
+ if ($self->{main}->{lint_rules}) {
+ warn $msg."\n";
+ }
+ else {
+ info($msg);
+ }
}
}
-
- no strict "subs";
- undef &{"${package_name}::_meta_tests_${clean_priority}"};
- use strict "subs";
- $self->free_ruleset_source($pms, 'meta', $priority);
-
- return unless ($evalstr);
-
- # setup the environment for meta tests
- $evalstr = <<"EOT";
-{
- package $package_name;
-
- sub _meta_tests_$clean_priority {
- # note: cannot set \$^W here on perl 5.6.1 at least, it
- # crashes meta tests.
-
- my (\$pms) = \@_;
- my \$r;
-
- my \$h = \$pms->{tests_already_hit};
-
- $evalstr;
- }
-
- 1;
+ );
}
-EOT
- eval $evalstr;
-
- if ($@) {
- warn "rules: failed to run meta tests, skipping some: $@\n";
- $pms->{rule_errors}++;
- }
- else {
- my $method = "${package_name}::_meta_tests_${clean_priority}";
- push @TEMPORARY_METHODS, $method;
- no strict "refs";
- &{$method}($pms);
- use strict "refs";
- }
-} # do_meta_tests()
+###########################################################################
sub do_head_tests {
my ($self, $pms, $priority) = @_;
- local ($_);
-
- # XXX - why not just do the plugin call?
- return if $self->shortcircuited_p($pms);
-
- # note: we do this only once for all head pattern tests. Only
- # eval tests need to use stuff in here.
- $pms->{test_log_msgs} = (); # clear test state
-
- dbg("rules: running header regexp tests; score so far=".$pms->{score});
-
- my $doing_user_rules =
- $pms->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS};
-
- # clean up priority value so it can be used in a subroutine name
- my $clean_priority;
- ($clean_priority = $priority) =~ s/-/neg/;
-
- my $package_name = __PACKAGE__;
-
- # speedup code provided by Matt Sergeant
- if (defined &{"${package_name}::_head_tests_${clean_priority}"}
- && !$doing_user_rules) {
- no strict "refs";
- &{"${package_name}::_head_tests_${clean_priority}"}($pms);
- use strict "refs";
- return;
- }
-
- my $conf = $pms->{conf};
- my $tflags = $conf->{tflags};
- my $use_rule_subs = $self->{main}->{use_rule_subs};
-
- my $evalstr = $self->start_rules_plugin_code("header", $priority);
- my $evalstr2 = '';
-
# hash to hold the rules, "header\tdefault value" => rulename
my %ordered = ();
my %testcode = ();
- while (my($rulename, $rule) = each %{$conf->{head_tests}->{$priority}}) {
+ $self->run_generic_tests ($pms, $priority,
+ consttype => $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS,
+ type => 'head',
+ testhash => $pms->{conf}->{head_tests},
+ args => [ ],
+ loop_body => sub
+ {
+ my ($self, $pms, $conf, $rulename, $rule, %opts) = @_;
my $def = '';
my ($hdrname, $testtype, $pat) =
$rule =~ /^\s*(\S+)\s*(\=|\!)\~\s*(\S.*?\S)\s*$/;
@@ -415,58 +454,68 @@
push(@{$ordered{"$hdrname\t$def"}}, $rulename);
- if ($doing_user_rules) {
- next if (!$self->is_user_rule_sub ($rulename.'_head_test'));
- }
+ next if ($opts{doing_user_rules} &&
+ !$self->is_user_rule_sub($rulename.'_head_test'));
- if ($use_rule_subs) {
- $evalstr2 .= '
- sub '.$rulename.'_head_test {
+ # caller can set this member of the Mail::SpamAssassin object to
+ # override this; useful for profiling rule runtimes, although I think
+ # the HitFreqsRuleTiming.pm plugin is probably better nowadays anyway
+ if ($self->{main}->{use_rule_subs}) {
+ $self->add_temporary_method ($rulename.'_head_test', '{
my($self,$text) = @_;
'.$self->hash_line_for_rule($pms, $rulename).'
while ($text '.$testtype.'~ '.$pat.'g) {
$self->got_hit(q#'.$rulename.'#, "", ruletype => "header");
'. $self->hit_rule_plugin_code($pms, $rulename, "header", "last") . '
}
- }
- ';
- push (@TEMPORARY_METHODS, $rulename.'_head_test');
+ }');
}
else {
# store for use below
$testcode{$rulename} = $testtype.'~ '.$pat;
}
- }
+ },
+ pre_loop_body => sub
+ {
+ my ($self, $pms, $conf, %opts) = @_;
+ $self->add_evalstr ('
+ my $hval;
+ ');
+ },
+ post_loop_body => sub
+ {
+ my ($self, $pms, $conf, %opts) = @_;
+ # setup the function to run the rules
+ while(my($k,$v) = each %ordered) {
+ my($hdrname, $def) = split(/\t/, $k, 2);
+ $self->add_evalstr ('
+ $hval = $self->get(q#'.$hdrname.'#, q#'.$def.'#);
+ ');
+ foreach my $rulename (@{$v}) {
+ if ($self->{main}->{use_rule_subs}) {
+ $self->add_evalstr ('
+ if ($scoresptr->{q#'.$rulename.'#}) {
+ '.$rulename.'_head_test($self, $hval);
+ '.$self->ran_rule_plugin_code($rulename, "header").'
+ }
+ ');
+ }
+ else {
+ my $testcode = $testcode{$rulename};
- # setup the function to run the rules
- while(my($k,$v) = each %ordered) {
- my($hdrname, $def) = split(/\t/, $k, 2);
- $evalstr .= ' $hval = $self->get(q#'.$hdrname.'#, q#'.$def.'#);';
- foreach my $rulename (@{$v}) {
- if ($use_rule_subs) {
- $evalstr .= '
- if ($scoresptr->{q#'.$rulename.'#}) {
- '.$rulename.'_head_test($self, $hval);
- '.$self->ran_rule_plugin_code($rulename, "header").'
+ my $posline = '';
+ my $ifwhile = 'if';
+ my $hitdone = '';
+ my $matchg = '';
+ if (($conf->{tflags}->{$rulename}||'') =~ /\bmultiple\b/)
+ {
+ $posline = 'pos $hval = 0;';
+ $ifwhile = 'while';
+ $hitdone = 'last';
+ $matchg = 'g';
}
- ';
- }
- else {
- my $testcode = $testcode{$rulename};
-
- my $posline = '';
- my $ifwhile = 'if';
- my $hitdone = '';
- my $matchg = '';
- if (($tflags->{$rulename}||'') =~ /\bmultiple\b/)
- {
- $posline = 'pos $hval = 0;';
- $ifwhile = 'while';
- $hitdone = 'last';
- $matchg = 'g';
- }
- $evalstr .= '
+ $self->add_evalstr ('
if ($scoresptr->{q#'.$rulename.'#}) {
'.$posline.'
'.$self->hash_line_for_rule($pms, $rulename).'
@@ -476,105 +525,34 @@
}
'.$self->ran_rule_plugin_code($rulename, "header").'
}
- ';
+ ');
+ }
}
}
}
-
- # clear out a previous version of this fn, if already defined
- no strict "subs";
- undef &{"${package_name}::_head_tests_${clean_priority}"};
- use strict "subs";
- $self->free_ruleset_source($pms, 'head', $priority);
-
- return unless ($evalstr);
-
- $evalstr = <<"EOT";
-{
- package $package_name;
-
- $evalstr2
-
- sub _head_tests_$clean_priority {
- my (\$self) = \@_;
- my \$hval;
-
- $evalstr;
- }
-
- 1;
+ );
}
-EOT
- eval $evalstr;
-
- if ($@) {
- warn "rules: failed to run header tests, skipping some: $@\n";
- $pms->{rule_errors}++;
- }
- else {
- my $method = "${package_name}::_head_tests_${clean_priority}";
- push @TEMPORARY_METHODS, $method;
- no strict "refs";
- &{$method}($pms);
- use strict "refs";
- }
-}
+###########################################################################
sub do_body_tests {
my ($self, $pms, $priority, $textary) = @_;
-
- # XXX - why not just make the plugin call directly?
- return if $self->shortcircuited_p($pms);
-
- dbg("rules: running body-text per-line regexp tests; score so far=".$pms->{score});
-
- my $conf = $self->{conf};
- my $doing_user_rules =
- $conf->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_BODY_TESTS};
-
- # clean up priority value so it can be used in a subroutine name
- my $clean_priority;
- ($clean_priority = $priority) =~ s/-/neg/;
-
- my $package_name = __PACKAGE__;
-
- $pms->{test_log_msgs} = (); # clear test state
-
- if (defined &{"${package_name}::_body_tests_${clean_priority}"}
- && !$doing_user_rules) {
- no strict "refs";
- &{"${package_name}::_body_tests_${clean_priority}"}($pms, @$textary);
- use strict "refs";
- return;
- }
-
- # caller can set this member of the Mail::SpamAssassin object to
- # override this; useful for profiling rule runtimes, although I think
- # the HitFreqsRuleTiming.pm plugin is probably better nowadays anyway
- my $use_rule_subs = $self->{main}->{use_rule_subs};
-
- # build up the eval string...
- my $evalstr = $self->start_rules_plugin_code("body", $priority);
- my $evalstr2 = '';
my $loopid = 0;
- $evalstr .= '
-
- $self->{main}->call_plugins("run_body_hack", {
- permsgstatus => $self, ruletype => "body",
- priority => '.$priority.', lines => \@_
- });
-
- ';
-
- while (my($rulename, $pat) = each %{$pms->{conf}->{body_tests}->{$priority}}) {
+ $self->run_generic_tests ($pms, $priority,
+ consttype => $Mail::SpamAssassin::Conf::TYPE_BODY_TESTS,
+ type => 'body',
+ testhash => $pms->{conf}->{body_tests},
+ args => [ @$textary ],
+ loop_body => sub
+ {
+ my ($self, $pms, $conf, $rulename, $pat, %opts) = @_;
my $sub;
my $sub_one_line;
my $need_one_line = ($pms->{conf}->{generate_body_one_line_sub}->{$rulename});
- if (($pms->{conf}->{tflags}->{$rulename}||'') =~ /\bmultiple\b/)
+ if (($conf->{tflags}->{$rulename}||'') =~ /\bmultiple\b/)
{
# support multiple matches
$loopid++;
@@ -584,7 +562,7 @@
'.$self->hash_line_for_rule($pms, $rulename).'
while ($l =~ '.$pat.'g) {
$self->got_hit(q{'.$rulename.'}, "BODY: ", ruletype => "body");
- '. $self->hit_rule_plugin_code($pms, $rulename, "body",
+ '. $self->hit_rule_plugin_code($pms, $rulename, 'body',
"last body_".$loopid) . '
}
}
@@ -630,117 +608,65 @@
}
- if ($use_rule_subs) {
- $evalstr .= '
+ if ($self->{main}->{use_rule_subs}) {
+ $self->add_evalstr ('
if ($scoresptr->{q{'.$rulename.'}}) {
'.$rulename.'_body_test($self,@_);
'.$self->ran_rule_plugin_code($rulename, "body").'
}
- ';
+ ');
}
else {
- $evalstr .= '
+ $self->add_evalstr ('
if ($scoresptr->{q{'.$rulename.'}}) {
'.$sub.'
'.$self->ran_rule_plugin_code($rulename, "body").'
}
- ';
+ ');
}
- if ($doing_user_rules) {
- next if (!$self->is_user_rule_sub ($rulename.'_body_test'));
- }
+ next if ($opts{doing_user_rules} &&
+ !$self->is_user_rule_sub($rulename.'_body_test'));
- if ($use_rule_subs) {
- $evalstr2 .= '
- sub '.$rulename.'_body_test { my $self = shift; '.$sub.' }
- ';
- push (@TEMPORARY_METHODS, $rulename.'_body_test');
+ if ($self->{main}->{use_rule_subs}) {
+ $self->add_temporary_method ($rulename.'_body_test',
+ '{ my $self = shift; '.$sub.' }');
}
if ($need_one_line) {
- $evalstr2 .= '
- sub '.$rulename.'_one_line_body_test { '.$sub_one_line.' }
- ';
- push (@TEMPORARY_METHODS, $rulename.'_one_line_body_test');
+ $self->add_temporary_method ($rulename.'_one_line_body_test',
+ '{ my $self = shift; '.$sub_one_line.' }');
}
-
- }
-
- # clear out a previous version of this fn
- undef &{"${package_name}::_body_tests_${clean_priority}"};
- $self->free_ruleset_source($pms, 'body', $priority);
-
- return unless ($evalstr);
-
- # generate the loop that goes through each line...
- $evalstr = <<"EOT";
-{
- package $package_name;
-
- $evalstr2
-
- sub _body_tests_$clean_priority {
- my \$self = shift;
-
- $evalstr;
}
+ pre_loop_body => sub
+ {
+ my ($self, $pms, $conf, %opts) = @_;
+ $self->add_evalstr ('
+
+ $self->{main}->call_plugins("run_body_hack", {
+ permsgstatus => $self, ruletype => "body",
+ priority => '.$opts{priority}.', lines => \@_
+ });
- 1;
+ ');
+ });
}
-EOT
- eval $evalstr;
-
- if ($@) {
- warn("rules: failed to compile body tests, skipping:\n" . "\t($@)\n");
- $pms->{rule_errors}++;
- }
- else {
- my $method = "${package_name}::_body_tests_${clean_priority}";
- no strict "refs";
- &{$method}($pms, @$textary);
- use strict "refs";
- }
-}
+###########################################################################
-sub do_body_uri_tests {
+sub do_uri_tests {
my ($self, $pms, $priority, @uris) = @_;
-
- # XXX - why not just do the direct plugin call?
- return if $self->shortcircuited_p($pms);
-
- dbg("uri: running uri tests; score so far=".$pms->{score});
-
- my $doing_user_rules =
- $pms->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_URI_TESTS};
-
- # clean up priority value so it can be used in a subroutine name
- my $clean_priority;
- ($clean_priority = $priority) =~ s/-/neg/;
-
- my $package_name = __PACKAGE__;
-
- $pms->{test_log_msgs} = (); # clear test state
-
- if (defined &{"${package_name}::_body_uri_tests_${clean_priority}"}
- && !$doing_user_rules) {
- no strict "refs";
- &{"${package_name}::_body_uri_tests_${clean_priority}"}($pms, @uris);
- use strict "refs";
- return;
- }
-
- my $use_rule_subs = $self->{main}->{use_rule_subs};
-
- # otherwise build up the eval string...
- my $evalstr = $self->start_rules_plugin_code("uri", $priority);
- my $evalstr2 = '';
my $loopid = 0;
-
- while (my($rulename, $pat) = each %{$pms->{conf}{uri_tests}->{$priority}}) {
+ $self->run_generic_tests ($pms, $priority,
+ consttype => $Mail::SpamAssassin::Conf::TYPE_URI_TESTS,
+ type => 'uri',
+ testhash => $pms->{conf}->{uri_tests},
+ args => [ @uris ],
+ loop_body => sub
+ {
+ my ($self, $pms, $conf, $rulename, $pat, %opts) = @_;
my $sub;
- if (($pms->{conf}->{tflags}->{$rulename}||'') =~ /\bmultiple\b/) {
+ if (($conf->{tflags}->{$rulename}||'') =~ /\bmultiple\b/) {
$loopid++;
$sub = '
uri_'.$loopid.': foreach my $l (@_) {
@@ -765,109 +691,47 @@
';
}
- if ($use_rule_subs) {
- # XXX - why isn't it _body_uri_test??
- $evalstr .= '
+ if ($self->{main}->{use_rule_subs}) {
+ $self->add_evalstr ('
if ($scoresptr->{q{'.$rulename.'}}) {
'.$rulename.'_uri_test($self, @_);
'.$self->ran_rule_plugin_code($rulename, "uri").'
}
- ';
+ ');
}
else {
- $evalstr .= '
+ $self->add_evalstr ('
if ($scoresptr->{q{'.$rulename.'}}) {
'.$sub.'
'.$self->ran_rule_plugin_code($rulename, "uri").'
}
- ';
+ ');
}
- if ($doing_user_rules) {
- next if (!$self->is_user_rule_sub($rulename.'_uri_test'));
- }
+ next if ($opts{doing_user_rules} &&
+ !$self->is_user_rule_sub($rulename.'_uri_test'));
- if ($use_rule_subs) {
- # XXX - why isn't it _body_uri_test??
- $evalstr2 .= '
- sub '.$rulename.'_uri_test { my $self = shift; '.$sub.' }
- ';
- push (@TEMPORARY_METHODS, $rulename.'_uri_test');
+ if ($self->{main}->{use_rule_subs}) {
+ $self->add_temporary_method ($rulename.'_uri_test',
+ '{ my $self = shift; '.$sub.' }');
}
}
-
- # clear out a previous version of this fn
- undef &{"${package_name}::_body_uri_tests_${clean_priority}"};
- $self->free_ruleset_source($pms, 'uri', $priority);
-
- return unless ($evalstr);
-
- # generate the loop that goes through each line...
- $evalstr = <<"EOT";
-{
- package $package_name;
-
- $evalstr2
-
- sub _body_uri_tests_$clean_priority {
- my \$self = shift;
- $evalstr;
- }
-
- 1;
+ );
}
-EOT
- eval $evalstr;
-
- if ($@) {
- warn("rules: failed to compile URI tests, skipping:\n" . "\t($@)\n");
- $pms->{rule_errors}++;
- }
- else {
- my $method = "${package_name}::_body_uri_tests_${clean_priority}";
- push @TEMPORARY_METHODS, $method;
- no strict "refs";
- &{$method}($pms, @uris);
- use strict "refs";
- }
-}
+###########################################################################
sub do_rawbody_tests {
my ($self, $pms, $priority, $textary) = @_;
-
- # XXX - why not just do the plugin call here??
- return if $self->shortcircuited_p($pms);
-
- dbg("rules: running raw-body-text per-line regexp tests; score so far=".$pms->{score});
-
- my $doing_user_rules =
- $pms->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_RAWBODY_TESTS};
-
- # clean up priority value so it can be used in a subroutine name
- my $clean_priority;
- ($clean_priority = $priority) =~ s/-/neg/;
-
- my $package_name = __PACKAGE__;
-
- $pms->{test_log_msgs} = (); # clear test state
- dbg("rules: in middle of raw-body-text");
- if (defined &{"${package_name}::_rawbody_tests_${clean_priority}"}
- && !$doing_user_rules) {
- no strict "refs";
- &{"${package_name}::_rawbody_tests_${clean_priority}"}($pms, @$textary);
- use strict "refs";
- return;
- }
-
- my $use_rule_subs = $self->{main}->{use_rule_subs};
-
- # build up the eval string...
- my $evalstr = $self->start_rules_plugin_code("rawbody", $priority);
- my $evalstr2 = '';
my $loopid = 0;
-
- while (my($rulename, $pat) = each %{$pms->{conf}{rawbody_tests}->{$priority}}) {
+ $self->run_generic_tests ($pms, $priority,
+ consttype => $Mail::SpamAssassin::Conf::TYPE_RAWBODY_TESTS,
+ type => 'rawbody',
+ testhash => $pms->{conf}->{rawbody_tests},
+ args => [ @$textary ],
+ loop_body => sub
+ {
+ my ($self, $pms, $conf, $rulename, $pat, %opts) = @_;
my $sub;
if (($pms->{conf}->{tflags}->{$rulename}||'') =~ /\bmultiple\b/)
{
@@ -897,104 +761,55 @@
';
}
- if ($use_rule_subs) {
- $evalstr .= '
+ if ($self->{main}->{use_rule_subs}) {
+ $self->add_evalstr ('
if ($scoresptr->{q{'.$rulename.'}}) {
'.$rulename.'_rawbody_test($self, @_);
'.$self->ran_rule_plugin_code($rulename, "rawbody").'
}
- ';
+ ');
}
else {
- $evalstr .= '
+ $self->add_evalstr ('
if ($scoresptr->{q{'.$rulename.'}}) {
'.$sub.'
'.$self->ran_rule_plugin_code($rulename, "rawbody").'
}
- ';
+ ');
}
- if ($doing_user_rules) {
- next if (!$self->is_user_rule_sub($rulename.'_rawbody_test'));
- }
+ next if ($opts{doing_user_rules} &&
+ !$self->is_user_rule_sub($rulename.'_rawbody_test'));
- if ($use_rule_subs) {
- $evalstr2 .= '
- sub '.$rulename.'_rawbody_test { my $self = shift; '.$sub.' }
- ';
- push (@TEMPORARY_METHODS, $rulename.'_rawbody_test');
+ if ($self->{main}->{use_rule_subs}) {
+ $self->add_temporary_method ($rulename.'_rawbody_test',
+ '{ my $self = shift; '.$sub.' }');
}
}
-
- # clear out a previous version of this fn
- undef &{"${package_name}::_rawbody_tests_${clean_priority}"};
- $self->free_ruleset_source($pms, 'rawbody', $priority);
-
- return unless ($evalstr);
-
- # generate the loop that goes through each line...
- $evalstr = <<"EOT";
-{
- package $package_name;
-
- $evalstr2
-
- sub _rawbody_tests_$clean_priority {
- my \$self = shift;
- $evalstr;
- }
-
- 1;
+ );
}
-EOT
- eval $evalstr;
-
- if ($@) {
- warn("rules: failed to compile body tests, skipping:\n" . "\t($@)\n");
- $pms->{rule_errors}++;
- }
- else {
- my $method = "${package_name}::_rawbody_tests_${clean_priority}";
- push @TEMPORARY_METHODS, $method;
- no strict "refs";
- &{$method}($pms, @$textary);
- use strict "refs";
- }
-}
+###########################################################################
sub do_full_tests {
my ($self, $pms, $priority, $fullmsgref) = @_;
-
- # XXX - why not just do the plugin call directly?
- return if $self->shortcircuited_p($pms);
-
- dbg("rules: running full-text regexp tests; score so far=".$pms->{score});
-
- my $doing_user_rules =
- $pms->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_FULL_TESTS};
-
- # clean up priority value so it can be used in a subroutine name
- my $clean_priority;
- ($clean_priority = $priority) =~ s/-/neg/;
-
- my $package_name = __PACKAGE__;
-
- $pms->{test_log_msgs} = (); # clear test state
-
- if (defined &{"${package_name}::_full_tests_${clean_priority}"}
- && !$doing_user_rules) {
- no strict "refs";
- &{"${package_name}::_full_tests_${clean_priority}"}($pms, $fullmsgref);
- use strict "refs";
- return;
- }
-
- # build up the eval string...
- my $evalstr = $self->start_rules_plugin_code("full", $priority);
-
- while (my($rulename, $pat) = each %{$pms->{conf}{full_tests}->{$priority}}) {
- $evalstr .= '
+ my $loopid = 0;
+ $self->run_generic_tests ($pms, $priority,
+ consttype => $Mail::SpamAssassin::Conf::TYPE_FULL_TESTS,
+ type => 'full',
+ testhash => $pms->{conf}->{full_tests},
+ args => [ $fullmsgref ],
+ pre_loop_body => sub
+ {
+ my ($self, $pms, $conf, %opts) = @_;
+ $self->add_evalstr ('
+ my $fullmsgref = shift;
+ ');
+ },
+ loop_body => sub
+ {
+ my ($self, $pms, $conf, $rulename, $pat, %opts) = @_;
+ $self->add_evalstr ('
if ($scoresptr->{q{'.$rulename.'}}) {
pos $$fullmsgref = 0;
'.$self->hash_line_for_rule($pms, $rulename).'
@@ -1004,43 +819,13 @@
}
'.$self->ran_rule_plugin_code($rulename, "full").'
}
- ';
- }
-
- undef &{"${package_name}::_full_tests_${clean_priority}"};
- $self->free_ruleset_source($pms, 'full', $priority);
-
- return unless ($evalstr);
-
- # and compile it.
- $evalstr = <<"EOT";
- {
- package $package_name;
-
- sub _full_tests_$clean_priority {
- my (\$self, \$fullmsgref) = \@_;
- study \$\$fullmsgref;
- $evalstr
- }
-
- 1;
- }
-EOT
-
- eval $evalstr;
-
- if ($@) {
- warn "rules: failed to compile full tests, skipping:\n" . "\t($@)\n";
- $pms->{rule_errors}++;
- } else {
- my $method = "${package_name}::_full_tests_${clean_priority}";
- push @TEMPORARY_METHODS, $method;
- no strict "refs";
- &{$method}($pms, $fullmsgref);
- use strict "refs";
+ ');
}
+ );
}
+###########################################################################
+
sub do_head_eval_tests {
my ($self, $pms, $priority) = @_;
return unless (defined($pms->{conf}->{head_evals}->{$priority}));
@@ -1075,8 +860,8 @@
sub run_eval_tests {
my ($self, $pms, $testtype, $evalhash, $prepend2desc, $priority, @extraevalargs) = @_;
- # XXX - why not just call the plugin directly?
- return if $self->shortcircuited_p($pms);
+ return if $self->{main}->call_plugins("have_shortcircuited",
+ { permsgstatus => $pms });
my $conf = $pms->{conf};
my $doing_user_rules = $conf->{user_rules_to_compile}->{$testtype};
@@ -1097,7 +882,8 @@
# Some of the rules are scoreset specific, so we need additional
# subroutines to handle those
if (defined &{"${package_name}::${methodname}"}
- && !$doing_user_rules) {
+ && !$doing_user_rules)
+ {
no strict "refs";
&{"${package_name}::${methodname}"}($pms,@extraevalargs);
use strict "refs";
@@ -1246,15 +1032,9 @@
}
}
+###########################################################################
# Helper Functions
-# NOTE: don't call this have_shortcircuited since it creates a nasty recursion loop
-sub shortcircuited_p {
- my ($self, $pms) = @_;
- return 1 if $self->{main}->call_plugins("have_shortcircuited", { permsgstatus => $pms
- });
-}
-
sub hash_line_for_rule {
my ($self, $pms, $rulename) = @_;
return "\n".'#line 1 "'.
@@ -1274,7 +1054,7 @@
my $evalstr = '
- # start_rules_plugin_code '.$ruletype.'
+ # start_rules_plugin_code '.$ruletype.' '.$pri.'
my $scoresptr = $self->{conf}->{scores};
';
@@ -1350,5 +1130,7 @@
delete $pms->{conf}->{$type.'_tests'}->{$pri};
}
}
+
+###########################################################################
1;
Modified: spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/P595Body.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/P595Body.pm?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/P595Body.pm (original)
+++ spamassassin/branches/jm_re2c_hacks/lib/Mail/SpamAssassin/Plugin/P595Body.pm Fri Nov 24 09:29:40 2006
@@ -68,15 +68,20 @@
my ($self, $conf, $test_set, $ruletype) = @_;
foreach my $pri (keys %{$test_set}) {
my $nicepri = $pri; $nicepri =~ s/-/neg/g;
- $self->setup_test_set_pri($conf, $test_set->{$pri}, $ruletype.'_'.$nicepri);
+ $self->setup_test_set_pri($conf, $test_set->{$pri}, $ruletype.'_'.$nicepri, $pri);
}
}
sub setup_test_set_pri {
- my ($self, $conf, $rules, $ruletype) = @_;
+ my ($self, $conf, $rules, $ruletype, $pri) = @_;
my $alternates = [];
my $trie_rules = {};
+
+ # while (my ($rule, $pat) = each %{$pms->{conf}->{body_tests}->{$priority}}) {
+ # push @{$alternates}, $pat;
+ # }
+
foreach my $base (keys %{$conf->{base_string}->{$ruletype}})
{
push @{$alternates}, $base;
Modified: spamassassin/branches/jm_re2c_hacks/masses/Makefile
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/Makefile?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/Makefile (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/Makefile Fri Nov 24 09:29:40 2006
@@ -18,8 +18,8 @@
perceptron.o: tmp/rules.pl tmp/tests.h tmp/scores.h
$(CC) $(CFLAGS) -c -o perceptron.o perceptron.c
-tmp/rules.pl: tmp/.created parse-rules-for-masses
- perl parse-rules-for-masses -d $(RULES) -s $(SCORESET)
+tmp/rules.pl: tmp/.created ../build/parse-rules-for-masses
+ perl ../build/parse-rules-for-masses -d $(RULES) -s $(SCORESET)
tmp/tests.h: tmp/.created tmp/ranges.data logs-to-c
perl logs-to-c --cffile=$(RULES) --scoreset=$(SCORESET)
Modified: spamassassin/branches/jm_re2c_hacks/masses/find-extremes
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/find-extremes?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/find-extremes (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/find-extremes Fri Nov 24 09:29:40 2006
@@ -348,7 +348,7 @@
sub readscores {
- system ("./parse-rules-for-masses") and
+ system ("../build/parse-rules-for-masses") and
die "Couldn't do parse-rules-for-masses: $?; stopped";
require "./tmp/rules.pl";
}
Modified: spamassassin/branches/jm_re2c_hacks/masses/generate-translation
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/generate-translation?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/generate-translation (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/generate-translation Fri Nov 24 09:29:40 2006
@@ -82,7 +82,7 @@
sub read_rules {
my ($cffile) = @_;
- system("$FindBin::Bin/parse-rules-for-masses -d \"$cffile\"")
+ system("$FindBin::Bin/../build/parse-rules-for-masses -d \"$cffile\"")
and die "unable to parse rules\n";
require "$FindBin::Bin/tmp/rules.pl"
or die "unable to read tmp/rules.pl\n";
Modified: spamassassin/branches/jm_re2c_hacks/masses/hit-frequencies
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/hit-frequencies?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/hit-frequencies (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/hit-frequencies Fri Nov 24 09:29:40 2006
@@ -786,7 +786,7 @@
sub readscores {
my($cffile) = @_;
- if (system ("$FindBin::Bin/parse-rules-for-masses -d \"$cffile\" -s $opt_s")) {
+ if (system ("$FindBin::Bin/../build/parse-rules-for-masses -d \"$cffile\" -s $opt_s")) {
warn "parse-rules-for-masses failed!";
}
eval {
Modified: spamassassin/branches/jm_re2c_hacks/masses/logs-to-c
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/logs-to-c?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/logs-to-c (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/logs-to-c Fri Nov 24 09:29:40 2006
@@ -200,7 +200,7 @@
sub readscores {
print "Reading scores from \"$opt_cffile\"...\n";
- system ("./parse-rules-for-masses -d \"$opt_cffile\" -s $opt_scoreset") and die;
+ system ("../build/parse-rules-for-masses -d \"$opt_cffile\" -s $opt_scoreset") and die;
require "./tmp/rules.pl";
%allrules = %rules; # ensure it stays global
}
Modified: spamassassin/branches/jm_re2c_hacks/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/mass-check?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/mass-check (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/mass-check Fri Nov 24 09:29:40 2006
@@ -46,6 +46,7 @@
were encapsulated by servers matching the regexp RE
(default = extract all SpamAssassin-encapsulated mails)
--lint check rules for syntax before running
+ --cf='config line' Additional line of configuration
client/server mode options
--server host:port
@@ -78,11 +79,16 @@
-n no date sorting or spam/ham interleaving
--cache use cache information when selecting messages
--cachedir=dir write cache info for --cache in this directory tree
+ --all don't skip big messages
+
+ message selection options, can be specified for each target
--after=N only test mails received after time_t N (negative values
are an offset from current time, e.g. -86400 = last day)
or after date as parsed by Time::ParseDate (e.g. '-6 months')
--before=N same as --after, except received times are before time_t N
- --all don't skip big messages
+ --scanprob=N probability of scanning a message, range 0.0 - 1.0 (default: 1.0)
+
+ message selection options, can be specified for each target class
--head=N only check first N ham and N spam (N messages if -n used)
--tail=N only check last N ham and N spam (N messages if -n used)
@@ -117,8 +123,8 @@
$opt_mid $opt_net $opt_nosort $opt_progress $opt_showdots
$opt_spamlog $opt_tail $opt_rules $opt_restart $opt_loguris
$opt_logmem $opt_after $opt_before $opt_rewrite $opt_deencap
- $opt_learn $opt_reuse $opt_lint $opt_cache $opt_noisy
- $total_messages $statusevery $opt_cachedir
+ $opt_learn $opt_reuse $opt_lint $opt_cache $opt_noisy $opt_cf
+ $total_messages $statusevery $opt_cachedir $opt_scanprob
$opt_client $opt_cs_max $opt_cs_timeout $opt_cs_paths_only
$opt_server %postdata %real $svn_revision
$tmpfd %reuse %orig_conf %reuse_conf $reuse_rules_loaded_p);
@@ -148,16 +154,20 @@
$opt_spamlog = "spam.log";
$opt_learn = 0;
$reuse_rules_loaded_p = 0;
+$opt_cf = [];
my @ORIG_ARGV = @ARGV;
GetOptions("c=s", "p=s", "f=s", "j=i", "n", "o", "all", "bayes", "debug:s",
"hamlog=s", "head=i", "loghits", "mh", "mid", "ms", "net",
"progress", "rewrite:s", "showdots", "spamlog=s", "tail=i",
- "rules=s", "restart=i", "after=s", "before=s", "loguris",
+ "rules=s", "restart=i", "loguris",
"deencap=s", "logmem", "learn=i", "reuse", "lint", "cache",
- "cachedir=s", "noisy",
+ "cachedir=s", "noisy", "scanprob=f",
"server=s", "cs_max=i", "cs_timeout=i", "cs_paths_only",
"client=s",
+ "before=s" => \&deal_with_before_after,
+ "after=s" => \&deal_with_before_after,
+ 'cf=s' => \@{$opt_cf},
"dir" => sub { $opt_format = "dir"; },
"file" => sub { $opt_format = "file"; },
"mbox" => sub { $opt_format = "mbox"; },
@@ -177,7 +187,7 @@
# some people specify paths relatively, whereas this needs an absolute path,
# so "do the right thing"(tm).
my $abs_opt_c = File::Spec->rel2abs($opt_c);
- system("cd $FindBin::Bin; perl parse-rules-for-masses -d $abs_opt_c");
+ system("cd $FindBin::Bin; perl ../build/parse-rules-for-masses -d $abs_opt_c");
}
require $rules_path;
@@ -207,6 +217,7 @@
'local_tests_only' => $opt_net ? 0 : 1,
'only_these_rules' => $opt_rules,
'ignore_safety_expire_timeout' => 1,
+ 'post_config_text' => join("\n", @{$opt_cf})."\n",
PREFIX => '',
DEF_RULES_DIR => $opt_c,
LOCAL_RULES_DIR => '',
@@ -241,6 +252,7 @@
'local_tests_only' => $opt_net ? 0 : 1,
'only_these_rules' => $opt_rules,
'ignore_safety_expire_timeout' => 1,
+ 'post_config_text' => join("\n", @{$opt_cf})."\n",
PREFIX => '',
DEF_RULES_DIR => $opt_c,
LOCAL_RULES_DIR => '',
@@ -308,34 +320,21 @@
open(REWRITE, "> $rewrite") || die "open of $rewrite failed: $!";
}
- # Deal with --before and --after
- foreach my $time ($opt_before, $opt_after) {
- if ($time && $time =~ /^-\d+$/) {
- $time = time + $time;
- }
- elsif ($time && $time !~ /^-?\d+$/) {
- if (HAS_TIME_PARSEDATE) {
- $time = Time::ParseDate::parsedate($time, GMT => 1, PREFER_PAST => 1);
- }
- else {
- die "You need Time::ParseDate if you use either the --before or --after option.";
- }
- }
- }
-
- if ($opt_before && $opt_after && $opt_after >= $opt_before) {
- die "--before ($opt_before) <= --after ($opt_after) -- conflict!";
- }
-
# ArchiveIterator options for non-client mode
$AIopts->{'opt_n'} = $opt_n;
$AIopts->{'opt_head'} = $opt_head;
$AIopts->{'opt_tail'} = $opt_tail;
+ $AIopts->{'opt_scanprob'} = $opt_scanprob;
$AIopts->{'opt_cache'} = $opt_cache;
$AIopts->{'opt_cachedir'} = $opt_cachedir;
$AIopts->{'opt_after'} = $opt_after;
$AIopts->{'opt_before'} = $opt_before;
$AIopts->{'scan_progress_sub'} = \&showdots_blip;
+
+ # ensure that scanprob stuff is predictable and reproducable
+ if (defined $opt_scanprob && $opt_scanprob < 1.0) {
+ srand(1);
+ }
}
else {
# ArchiveIterator options for client mode -- tends to be simple
@@ -447,12 +446,23 @@
sub target {
my ($target) = @_;
+
+ # message-selection options; these can now be specified separately
+ # for each target
+ my %selopts = (
+ opt_head => $opt_head,
+ opt_tail => $opt_tail,
+ opt_scanprob => $opt_scanprob,
+ opt_after => $opt_after,
+ opt_before => $opt_before
+ );
+
if (!defined($opt_format)) {
- push(@targets, $target);
+ push(@targets, { %selopts, target => $target });
}
else {
$opt_o = 1;
- push(@targets, "spam:$opt_format:$target");
+ push(@targets, { %selopts, target => "spam:$opt_format:$target" });
}
}
@@ -736,6 +746,8 @@
}
}
+# use Mail::SpamAssassin::Util::MemoryDump; Mail::SpamAssassin::Util::MemoryDump::MEMDEBUG(); use Mail::SpamAssassin::Util::MemoryDump; Mail::SpamAssassin::Util::MemoryDump::MEMDEBUG_dump_obj($status); #JMD
+
if (defined $status) { $status->finish(); }
$ma->finish();
undef $ma; # clean 'em up
@@ -1828,5 +1840,32 @@
sub aidbg {
if (would_log("dbg", "mass-check") == 2) {
dbg (@_);
+ }
+}
+
+sub deal_with_before_after {
+ my($which, $time) = @_;
+
+ if ($time && $time =~ /^-\d+$/) {
+ $time = time + $time;
+ }
+ elsif ($time && $time !~ /^-?\d+$/) {
+ if (HAS_TIME_PARSEDATE) {
+ $time = Time::ParseDate::parsedate($time, GMT => 1, PREFER_PAST => 1);
+ }
+ else {
+ die "You need Time::ParseDate if you use either the --before or --after option.";
+ }
+ }
+
+ if ($which eq 'before') {
+ $opt_before = $time;
+ }
+ else {
+ $opt_after = $time;
+ }
+
+ if ($opt_before && $opt_after && $opt_after >= $opt_before) {
+ die "--before ($opt_before) <= --after ($opt_after) -- conflict!";
}
}
Modified: spamassassin/branches/jm_re2c_hacks/masses/mk-roc-graphs
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/mk-roc-graphs?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/mk-roc-graphs (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/mk-roc-graphs Fri Nov 24 09:29:40 2006
@@ -122,7 +122,7 @@
sub readscores {
warn "Reading scores from \"$opt_cffile\"...\n";
- system ("./parse-rules-for-masses -d \"$opt_cffile\" -s $opt_scoreset") and die;
+ system ("../build/parse-rules-for-masses -d \"$opt_cffile\" -s $opt_scoreset") and die;
require "./tmp/rules.pl";
%allrules = %rules; # ensure it stays global
}
Modified: spamassassin/branches/jm_re2c_hacks/masses/rewrite-cf-with-new-scores
URL: http://svn.apache.org/viewvc/spamassassin/branches/jm_re2c_hacks/masses/rewrite-cf-with-new-scores?view=diff&rev=478926&r1=478925&r2=478926
==============================================================================
--- spamassassin/branches/jm_re2c_hacks/masses/rewrite-cf-with-new-scores (original)
+++ spamassassin/branches/jm_re2c_hacks/masses/rewrite-cf-with-new-scores Fri Nov 24 09:29:40 2006
@@ -81,7 +81,7 @@
sub read_rules {
- system ("./parse-rules-for-masses -s $scoreset") and die;
+ system ("../build/parse-rules-for-masses -s $scoreset") and die;
if (-e "tmp/rules.pl") {
# note: the spaces need to stay in front of the require to work around
# a RPM 4.1 problem