You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2021/04/20 07:35:52 UTC
svn commit: r1888999 - in /spamassassin/trunk/masses:
evolve_metarule/preproc.pl fp-fn-statistics freqdiff hit-frequencies
logdiff logs-to-c mass-check mk-roc-graphs overlap post-ga-analysis.pl
tenpass/compute-current-tcr
Author: hege
Date: Tue Apr 20 07:35:51 2021
New Revision: 1888999
URL: http://svn.apache.org/viewvc?rev=1888999&view=rev
Log:
Support compacted/deduplicated RULE(hitcount) format for mass-check logs
Modified:
spamassassin/trunk/masses/evolve_metarule/preproc.pl
spamassassin/trunk/masses/fp-fn-statistics
spamassassin/trunk/masses/freqdiff
spamassassin/trunk/masses/hit-frequencies
spamassassin/trunk/masses/logdiff
spamassassin/trunk/masses/logs-to-c
spamassassin/trunk/masses/mass-check
spamassassin/trunk/masses/mk-roc-graphs
spamassassin/trunk/masses/overlap
spamassassin/trunk/masses/post-ga-analysis.pl
spamassassin/trunk/masses/tenpass/compute-current-tcr
Modified: spamassassin/trunk/masses/evolve_metarule/preproc.pl
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/evolve_metarule/preproc.pl?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/evolve_metarule/preproc.pl (original)
+++ spamassassin/trunk/masses/evolve_metarule/preproc.pl Tue Apr 20 07:35:51 2021
@@ -67,7 +67,17 @@ while (<HAM>) {
my (undef,undef,undef, $test_str, undef) = split /\s/;
# Extract the relevant rule hits and sort them by column number.
- my @hits = sort map { $rules{$_} } grep { exists $rules{$_} } split /,/, $test_str;
+ my @tests;
+ foreach my $r (split(/,/, $test_str)) {
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ next unless exists $rules{$r};
+ push @tests, $r for (1 .. $hits);
+ }
+ my @hits = sort map { $rules{$_} } @tests;
# Count the number of occurrences and size of this pattern.
$ham_patterns{join (' ', @hits)}++;
@@ -95,7 +105,17 @@ while (<SPAM>) {
my (undef,undef,undef, $test_str, undef) = split /\s/;
# Extract the relevant rule hits and sort them by column number.
- my @hits = sort map { $rules{$_} } grep { exists $rules{$_} } split /,/, $test_str;
+ my @tests;
+ foreach my $r (split(/,/, $test_str)) {
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ next unless exists $rules{$r};
+ push @tests, $r for (1 .. $hits);
+ }
+ my @hits = sort map { $rules{$_} } @tests;
# Count the number of occurrences and size of this pattern.
$spam_patterns{join (' ', @hits)}++;
Modified: spamassassin/trunk/masses/fp-fn-statistics
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/fp-fn-statistics?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/fp-fn-statistics (original)
+++ spamassassin/trunk/masses/fp-fn-statistics Tue Apr 20 07:35:51 2021
@@ -173,8 +173,16 @@ sub readlogs {
next unless ($caught eq 'Y' || $caught eq '.') && $rules;
# get tests, but ignore unknown tests and subrules
- my @tests = grep { defined $scores{$_} && !$allrules{$_}->{issubrule} }
- split(/,/, $rules);
+ my @tests;
+ foreach my $r (split(/,/, $rules)) {
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ next unless (defined $scores{$r} && !$allrules{$r}->{issubrule});
+ push @tests, $r for (1 .. $hits);
+ }
# run handler
log_line_count($isspam, $count, \@tests, $msgline);
Modified: spamassassin/trunk/masses/freqdiff
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/freqdiff?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/freqdiff (original)
+++ spamassassin/trunk/masses/freqdiff Tue Apr 20 07:35:51 2021
@@ -157,9 +157,15 @@ sub read_argv {
}
# "mass-check" format
elsif (/^[Y.]\s+-?\d+\s+\S+\s+(\S+)/) {
+ my $test_str = $1;
$type = 2;
- foreach (split(/,/, $1)) {
- $freq{$_}++;
+ foreach my $r (split(/,/, $test_str)) {
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ $freq{$r} += $hits;
}
}
# "scores" format
Modified: spamassassin/trunk/masses/hit-frequencies
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/hit-frequencies?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/hit-frequencies (original)
+++ spamassassin/trunk/masses/hit-frequencies Tue Apr 20 07:35:51 2021
@@ -730,6 +730,18 @@ sub readlogs {
';
}
+ $evalstr .= '
+ my @rules;
+ foreach my $r (split(/,/, $rules)) {
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ push @rules, $r for (1 .. $hits);
+ }
+ ';
+
my $hmapstr = '';
my $smapstr = '';
if ($isspam) {
@@ -747,7 +759,7 @@ sub readlogs {
}
$evalstr .= '
- foreach my $r (split(/,/, $rules)) {
+ foreach my $r (@rules) {
$freq_spam{$r}++ unless $freq_mesg{$r}++;
'.$hmapstr.$smapstr.'
}
@@ -768,7 +780,7 @@ sub readlogs {
}
$evalstr .= '
- foreach my $r (split(/,/, $rules)) {
+ foreach my $r (@rules) {
$freq_ham{$r}++ unless $freq_mesg{$r}++;
'.$hmapstr.$smapstr.'
}
Modified: spamassassin/trunk/masses/logdiff
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/logdiff?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/logdiff (original)
+++ spamassassin/trunk/masses/logdiff Tue Apr 20 07:35:51 2021
@@ -36,7 +36,15 @@ sub fixfile {
}
my ($scorepath, $rules, $meta) = ($1,$2,$3);
- my @rules = split(/,/, $rules);
+ my @rules;
+ foreach my $r (split(/,/, $rules)) {
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ push @rules, $r for (1 .. $hits);
+ }
@rules = sort grep {
$_ !~ /^AWL$/
} @rules;
Modified: spamassassin/trunk/masses/logs-to-c
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/logs-to-c?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/logs-to-c (original)
+++ spamassassin/trunk/masses/logs-to-c Tue Apr 20 07:35:51 2021
@@ -144,8 +144,16 @@ sub readlogs {
(undef, $rules) = split(/ /, $restofline, 3);
# get tests, but ignore unknown tests and subrules
- my @tests = grep { defined $scores{$_} && !$allrules{$_}->{issubrule} }
- split(/,/, $rules);
+ my @tests;
+ foreach my $r (split(/,/, $rules)) {
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ next unless (defined $scores{$r} && !$allrules{$r}->{issubrule});
+ push @tests, $r for (1 .. $hits);
+ }
if ($isspam) {
$num_spam++;
Modified: spamassassin/trunk/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mass-check?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/mass-check (original)
+++ spamassassin/trunk/masses/mass-check Tue Apr 20 07:35:51 2021
@@ -698,7 +698,8 @@ sub wanted {
# Amavis X-Spam-Status rules include score and are enclosed in []
# Amavis: [RULENAME=0.01,RULENAME_2=0.01]
# Spamassassin: RULENAME,RULENAME_2
- s/[\[\]]//, s/=.*// foreach (@previous);
+ # .. also support compact RULE(hits), no need to count hits here
+ s/[\[\]]//, s/=.*//, s/\(\d+\)$// foreach (@previous);
$ma->{metadata}->{reuse_tests_hit} = { map {$_ => 1} @previous };
$reusing = 1;
}
@@ -826,11 +827,19 @@ sub wanted {
# don't bother adjusting scores for reuse
$score = $status->get_score();
# list of tests hit
+ my %tests;
+ foreach ((
+ split(/,/, $status->get_names_of_tests_hit()),
+ split(/,/, $status->get_names_of_subtests_hit())
+ )) {
+ $tests{$_}++;
+ }
my @tests;
- push @tests, split(/,/, $status->get_names_of_tests_hit());
- push @tests, split(/,/, $status->get_names_of_subtests_hit());
-
- $tests = join(",", sort(@tests));
+ foreach (sort keys %tests) {
+ # Use compact RULE(hitcount) format
+ push @tests, $tests{$_} > 1 ? "$_($tests{$_})" : $_;
+ }
+ $tests = join(",", @tests);
$extra = join(",", @extra);
}
Modified: spamassassin/trunk/masses/mk-roc-graphs
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mk-roc-graphs?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/mk-roc-graphs (original)
+++ spamassassin/trunk/masses/mk-roc-graphs Tue Apr 20 07:35:51 2021
@@ -106,8 +106,16 @@ sub readlogs {
next unless ($caught eq 'Y' || $caught eq '.') && $rules;
# get tests, but ignore unknown tests and subrules
- my @tests = grep { defined $scores{$_} && !$allrules{$_}->{issubrule} }
- split(/,/, $rules);
+ my @tests;
+ foreach my $r (split(/,/, $rules)) {
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ next unless (defined $scores{$r} && !$allrules{$r}->{issubrule});
+ push @tests, $r for (1 .. $hits);
+ }
# run handler
$log_line->($isspam, $count, \@tests);
Modified: spamassassin/trunk/masses/overlap
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/overlap?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/overlap (original)
+++ spamassassin/trunk/masses/overlap Tue Apr 20 07:35:51 2021
@@ -105,8 +105,17 @@ sub read_file {
while(<FILE>) {
next if /^#/;
if (/^[Y.]\s+-?\d+\s+\S+\s+(\S+)/) {
- my @tests = split(/,/, $1);
- @tests = grep { !/^T_/ } @tests if $opt_t;
+ my $test_str = $1;
+ my @tests;
+ foreach my $r (split(/,/, $test_str)) {
+ next if ($opt_t && $r =~ /^T_/); # skip test rules
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ push @tests, $r for (1 .. $hits);
+ }
my $i = 0;
for my $a (@tests) {
$solo{$a}++;
Modified: spamassassin/trunk/masses/post-ga-analysis.pl
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/post-ga-analysis.pl?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/post-ga-analysis.pl (original)
+++ spamassassin/trunk/masses/post-ga-analysis.pl Tue Apr 20 07:35:51 2021
@@ -26,7 +26,16 @@ while(<SPAM>)
{
next if /^#/;
/.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)(\s+?:(?:bayes|time)=\S+)\s*?$/;
- my @rules=split /,/,$1;
+ my $test_str = $1;
+ my @rules;
+ foreach my $r (split(/,/, $test_str)) {
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ push @rules, $r for (1 .. $hits);
+ }
my $score = 0.0;
foreach $rule (@rules)
{
@@ -53,8 +62,17 @@ while(<NONSPAM>)
next if /^#/;
/.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
next unless defined($1);
+ my $test_str = $1;
+ my @rules;
+ foreach my $r (split(/,/, $test_str)) {
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ push @rules, $r for (1 .. $hits);
+ }
- my @rules=split /,/,$1;
my $score = 0.0;
foreach $rule (@rules)
{
Modified: spamassassin/trunk/masses/tenpass/compute-current-tcr
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/tenpass/compute-current-tcr?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/tenpass/compute-current-tcr (original)
+++ spamassassin/trunk/masses/tenpass/compute-current-tcr Tue Apr 20 07:35:51 2021
@@ -24,7 +24,16 @@ while(<SPAM>)
next if /^\#/;
/.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
my $testshit = $1; $testshit ||= '';
- my @rules=split /,/,$testshit;
+
+ my @rules;
+ foreach my $r (split(/,/, $testshit)) {
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ push @rules, $r for (1 .. $hits);
+ }
my $score = 0.0;
foreach $rule (@rules) {
@@ -46,7 +55,16 @@ while(<HAM>)
next if /^\#/;
/.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
my $testshit = $1; $testshit ||= '';
- my @rules=split /,/,$testshit;
+
+ my @rules;
+ foreach my $r (split(/,/, $testshit)) {
+ my $hits = 1;
+ # Support compacted RULE(hitcount) format
+ if ($r =~ s/\((\d+)\)$//) {
+ $hits = $1;
+ }
+ push @rules, $r for (1 .. $hits);
+ }
my $score = 0.0;
foreach $rule (@rules) {
mass-check log file format change
Posted by Henrik K <he...@hege.li>.
FYI, mass-check ham/spam.logs now use the compact hit logging by default, to
save space with those pesky __LOWER_E(157)'s..
In case you use your own log parser or such.
On Tue, Apr 20, 2021 at 07:35:52AM -0000, hege@apache.org wrote:
> Author: hege
> Date: Tue Apr 20 07:35:51 2021
> New Revision: 1888999
>
> URL: http://svn.apache.org/viewvc?rev=1888999&view=rev
> Log:
> Support compacted/deduplicated RULE(hitcount) format for mass-check logs
>
> Modified:
> spamassassin/trunk/masses/evolve_metarule/preproc.pl
> spamassassin/trunk/masses/fp-fn-statistics
> spamassassin/trunk/masses/freqdiff
> spamassassin/trunk/masses/hit-frequencies
> spamassassin/trunk/masses/logdiff
> spamassassin/trunk/masses/logs-to-c
> spamassassin/trunk/masses/mass-check
> spamassassin/trunk/masses/mk-roc-graphs
> spamassassin/trunk/masses/overlap
> spamassassin/trunk/masses/post-ga-analysis.pl
> spamassassin/trunk/masses/tenpass/compute-current-tcr
>
> Modified: spamassassin/trunk/masses/evolve_metarule/preproc.pl
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/evolve_metarule/preproc.pl?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/evolve_metarule/preproc.pl (original)
> +++ spamassassin/trunk/masses/evolve_metarule/preproc.pl Tue Apr 20 07:35:51 2021
> @@ -67,7 +67,17 @@ while (<HAM>) {
> my (undef,undef,undef, $test_str, undef) = split /\s/;
>
> # Extract the relevant rule hits and sort them by column number.
> - my @hits = sort map { $rules{$_} } grep { exists $rules{$_} } split /,/, $test_str;
> + my @tests;
> + foreach my $r (split(/,/, $test_str)) {
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + next unless exists $rules{$r};
> + push @tests, $r for (1 .. $hits);
> + }
> + my @hits = sort map { $rules{$_} } @tests;
>
> # Count the number of occurrences and size of this pattern.
> $ham_patterns{join (' ', @hits)}++;
> @@ -95,7 +105,17 @@ while (<SPAM>) {
> my (undef,undef,undef, $test_str, undef) = split /\s/;
>
> # Extract the relevant rule hits and sort them by column number.
> - my @hits = sort map { $rules{$_} } grep { exists $rules{$_} } split /,/, $test_str;
> + my @tests;
> + foreach my $r (split(/,/, $test_str)) {
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + next unless exists $rules{$r};
> + push @tests, $r for (1 .. $hits);
> + }
> + my @hits = sort map { $rules{$_} } @tests;
>
> # Count the number of occurrences and size of this pattern.
> $spam_patterns{join (' ', @hits)}++;
>
> Modified: spamassassin/trunk/masses/fp-fn-statistics
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/fp-fn-statistics?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/fp-fn-statistics (original)
> +++ spamassassin/trunk/masses/fp-fn-statistics Tue Apr 20 07:35:51 2021
> @@ -173,8 +173,16 @@ sub readlogs {
> next unless ($caught eq 'Y' || $caught eq '.') && $rules;
>
> # get tests, but ignore unknown tests and subrules
> - my @tests = grep { defined $scores{$_} && !$allrules{$_}->{issubrule} }
> - split(/,/, $rules);
> + my @tests;
> + foreach my $r (split(/,/, $rules)) {
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + next unless (defined $scores{$r} && !$allrules{$r}->{issubrule});
> + push @tests, $r for (1 .. $hits);
> + }
>
> # run handler
> log_line_count($isspam, $count, \@tests, $msgline);
>
> Modified: spamassassin/trunk/masses/freqdiff
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/freqdiff?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/freqdiff (original)
> +++ spamassassin/trunk/masses/freqdiff Tue Apr 20 07:35:51 2021
> @@ -157,9 +157,15 @@ sub read_argv {
> }
> # "mass-check" format
> elsif (/^[Y.]\s+-?\d+\s+\S+\s+(\S+)/) {
> + my $test_str = $1;
> $type = 2;
> - foreach (split(/,/, $1)) {
> - $freq{$_}++;
> + foreach my $r (split(/,/, $test_str)) {
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + $freq{$r} += $hits;
> }
> }
> # "scores" format
>
> Modified: spamassassin/trunk/masses/hit-frequencies
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/hit-frequencies?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/hit-frequencies (original)
> +++ spamassassin/trunk/masses/hit-frequencies Tue Apr 20 07:35:51 2021
> @@ -730,6 +730,18 @@ sub readlogs {
> ';
> }
>
> + $evalstr .= '
> + my @rules;
> + foreach my $r (split(/,/, $rules)) {
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + push @rules, $r for (1 .. $hits);
> + }
> + ';
> +
> my $hmapstr = '';
> my $smapstr = '';
> if ($isspam) {
> @@ -747,7 +759,7 @@ sub readlogs {
> }
>
> $evalstr .= '
> - foreach my $r (split(/,/, $rules)) {
> + foreach my $r (@rules) {
> $freq_spam{$r}++ unless $freq_mesg{$r}++;
> '.$hmapstr.$smapstr.'
> }
> @@ -768,7 +780,7 @@ sub readlogs {
> }
>
> $evalstr .= '
> - foreach my $r (split(/,/, $rules)) {
> + foreach my $r (@rules) {
> $freq_ham{$r}++ unless $freq_mesg{$r}++;
> '.$hmapstr.$smapstr.'
> }
>
> Modified: spamassassin/trunk/masses/logdiff
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/logdiff?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/logdiff (original)
> +++ spamassassin/trunk/masses/logdiff Tue Apr 20 07:35:51 2021
> @@ -36,7 +36,15 @@ sub fixfile {
> }
>
> my ($scorepath, $rules, $meta) = ($1,$2,$3);
> - my @rules = split(/,/, $rules);
> + my @rules;
> + foreach my $r (split(/,/, $rules)) {
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + push @rules, $r for (1 .. $hits);
> + }
> @rules = sort grep {
> $_ !~ /^AWL$/
> } @rules;
>
> Modified: spamassassin/trunk/masses/logs-to-c
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/logs-to-c?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/logs-to-c (original)
> +++ spamassassin/trunk/masses/logs-to-c Tue Apr 20 07:35:51 2021
> @@ -144,8 +144,16 @@ sub readlogs {
> (undef, $rules) = split(/ /, $restofline, 3);
>
> # get tests, but ignore unknown tests and subrules
> - my @tests = grep { defined $scores{$_} && !$allrules{$_}->{issubrule} }
> - split(/,/, $rules);
> + my @tests;
> + foreach my $r (split(/,/, $rules)) {
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + next unless (defined $scores{$r} && !$allrules{$r}->{issubrule});
> + push @tests, $r for (1 .. $hits);
> + }
>
> if ($isspam) {
> $num_spam++;
>
> Modified: spamassassin/trunk/masses/mass-check
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mass-check?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/mass-check (original)
> +++ spamassassin/trunk/masses/mass-check Tue Apr 20 07:35:51 2021
> @@ -698,7 +698,8 @@ sub wanted {
> # Amavis X-Spam-Status rules include score and are enclosed in []
> # Amavis: [RULENAME=0.01,RULENAME_2=0.01]
> # Spamassassin: RULENAME,RULENAME_2
> - s/[\[\]]//, s/=.*// foreach (@previous);
> + # .. also support compact RULE(hits), no need to count hits here
> + s/[\[\]]//, s/=.*//, s/\(\d+\)$// foreach (@previous);
> $ma->{metadata}->{reuse_tests_hit} = { map {$_ => 1} @previous };
> $reusing = 1;
> }
> @@ -826,11 +827,19 @@ sub wanted {
> # don't bother adjusting scores for reuse
> $score = $status->get_score();
> # list of tests hit
> + my %tests;
> + foreach ((
> + split(/,/, $status->get_names_of_tests_hit()),
> + split(/,/, $status->get_names_of_subtests_hit())
> + )) {
> + $tests{$_}++;
> + }
> my @tests;
> - push @tests, split(/,/, $status->get_names_of_tests_hit());
> - push @tests, split(/,/, $status->get_names_of_subtests_hit());
> -
> - $tests = join(",", sort(@tests));
> + foreach (sort keys %tests) {
> + # Use compact RULE(hitcount) format
> + push @tests, $tests{$_} > 1 ? "$_($tests{$_})" : $_;
> + }
> + $tests = join(",", @tests);
> $extra = join(",", @extra);
> }
>
>
> Modified: spamassassin/trunk/masses/mk-roc-graphs
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mk-roc-graphs?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/mk-roc-graphs (original)
> +++ spamassassin/trunk/masses/mk-roc-graphs Tue Apr 20 07:35:51 2021
> @@ -106,8 +106,16 @@ sub readlogs {
> next unless ($caught eq 'Y' || $caught eq '.') && $rules;
>
> # get tests, but ignore unknown tests and subrules
> - my @tests = grep { defined $scores{$_} && !$allrules{$_}->{issubrule} }
> - split(/,/, $rules);
> + my @tests;
> + foreach my $r (split(/,/, $rules)) {
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + next unless (defined $scores{$r} && !$allrules{$r}->{issubrule});
> + push @tests, $r for (1 .. $hits);
> + }
>
> # run handler
> $log_line->($isspam, $count, \@tests);
>
> Modified: spamassassin/trunk/masses/overlap
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/overlap?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/overlap (original)
> +++ spamassassin/trunk/masses/overlap Tue Apr 20 07:35:51 2021
> @@ -105,8 +105,17 @@ sub read_file {
> while(<FILE>) {
> next if /^#/;
> if (/^[Y.]\s+-?\d+\s+\S+\s+(\S+)/) {
> - my @tests = split(/,/, $1);
> - @tests = grep { !/^T_/ } @tests if $opt_t;
> + my $test_str = $1;
> + my @tests;
> + foreach my $r (split(/,/, $test_str)) {
> + next if ($opt_t && $r =~ /^T_/); # skip test rules
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + push @tests, $r for (1 .. $hits);
> + }
> my $i = 0;
> for my $a (@tests) {
> $solo{$a}++;
>
> Modified: spamassassin/trunk/masses/post-ga-analysis.pl
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/post-ga-analysis.pl?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/post-ga-analysis.pl (original)
> +++ spamassassin/trunk/masses/post-ga-analysis.pl Tue Apr 20 07:35:51 2021
> @@ -26,7 +26,16 @@ while(<SPAM>)
> {
> next if /^#/;
> /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)(\s+?:(?:bayes|time)=\S+)\s*?$/;
> - my @rules=split /,/,$1;
> + my $test_str = $1;
> + my @rules;
> + foreach my $r (split(/,/, $test_str)) {
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + push @rules, $r for (1 .. $hits);
> + }
> my $score = 0.0;
> foreach $rule (@rules)
> {
> @@ -53,8 +62,17 @@ while(<NONSPAM>)
> next if /^#/;
> /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
> next unless defined($1);
> + my $test_str = $1;
> + my @rules;
> + foreach my $r (split(/,/, $test_str)) {
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + push @rules, $r for (1 .. $hits);
> + }
>
> - my @rules=split /,/,$1;
> my $score = 0.0;
> foreach $rule (@rules)
> {
>
> Modified: spamassassin/trunk/masses/tenpass/compute-current-tcr
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/tenpass/compute-current-tcr?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/tenpass/compute-current-tcr (original)
> +++ spamassassin/trunk/masses/tenpass/compute-current-tcr Tue Apr 20 07:35:51 2021
> @@ -24,7 +24,16 @@ while(<SPAM>)
> next if /^\#/;
> /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
> my $testshit = $1; $testshit ||= '';
> - my @rules=split /,/,$testshit;
> +
> + my @rules;
> + foreach my $r (split(/,/, $testshit)) {
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + push @rules, $r for (1 .. $hits);
> + }
>
> my $score = 0.0;
> foreach $rule (@rules) {
> @@ -46,7 +55,16 @@ while(<HAM>)
> next if /^\#/;
> /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
> my $testshit = $1; $testshit ||= '';
> - my @rules=split /,/,$testshit;
> +
> + my @rules;
> + foreach my $r (split(/,/, $testshit)) {
> + my $hits = 1;
> + # Support compacted RULE(hitcount) format
> + if ($r =~ s/\((\d+)\)$//) {
> + $hits = $1;
> + }
> + push @rules, $r for (1 .. $hits);
> + }
>
> my $score = 0.0;
> foreach $rule (@rules) {
>