You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2021/04/20 07:35:52 UTC

svn commit: r1888999 - in /spamassassin/trunk/masses: evolve_metarule/preproc.pl fp-fn-statistics freqdiff hit-frequencies logdiff logs-to-c mass-check mk-roc-graphs overlap post-ga-analysis.pl tenpass/compute-current-tcr

Author: hege
Date: Tue Apr 20 07:35:51 2021
New Revision: 1888999

URL: http://svn.apache.org/viewvc?rev=1888999&view=rev
Log:
Support compacted/deduplicated RULE(hitcount) format for mass-check logs

Modified:
    spamassassin/trunk/masses/evolve_metarule/preproc.pl
    spamassassin/trunk/masses/fp-fn-statistics
    spamassassin/trunk/masses/freqdiff
    spamassassin/trunk/masses/hit-frequencies
    spamassassin/trunk/masses/logdiff
    spamassassin/trunk/masses/logs-to-c
    spamassassin/trunk/masses/mass-check
    spamassassin/trunk/masses/mk-roc-graphs
    spamassassin/trunk/masses/overlap
    spamassassin/trunk/masses/post-ga-analysis.pl
    spamassassin/trunk/masses/tenpass/compute-current-tcr

Modified: spamassassin/trunk/masses/evolve_metarule/preproc.pl
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/evolve_metarule/preproc.pl?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/evolve_metarule/preproc.pl (original)
+++ spamassassin/trunk/masses/evolve_metarule/preproc.pl Tue Apr 20 07:35:51 2021
@@ -67,7 +67,17 @@ while (<HAM>) {
 	my (undef,undef,undef, $test_str, undef) = split /\s/;
 
 	# Extract the relevant rule hits and sort them by column number.
-	my @hits = sort map { $rules{$_} } grep { exists $rules{$_} } split /,/, $test_str;
+	my @tests;
+	foreach my $r (split(/,/, $test_str)) {
+          my $hits = 1;
+          # Support compacted RULE(hitcount) format
+          if ($r =~ s/\((\d+)\)$//) {
+            $hits = $1;
+          }
+          next unless exists $rules{$r};
+          push @tests, $r for (1 .. $hits);
+        }
+	my @hits = sort map { $rules{$_} } @tests;
 
 	# Count the number of occurrences and size of this pattern.
 	$ham_patterns{join (' ', @hits)}++;
@@ -95,7 +105,17 @@ while (<SPAM>) {
 	my (undef,undef,undef, $test_str, undef) = split /\s/;
 
 	# Extract the relevant rule hits and sort them by column number.
-	my @hits = sort map { $rules{$_} } grep { exists $rules{$_} } split /,/, $test_str;
+	my @tests;
+	foreach my $r (split(/,/, $test_str)) {
+          my $hits = 1;
+          # Support compacted RULE(hitcount) format
+          if ($r =~ s/\((\d+)\)$//) {
+            $hits = $1;
+          }
+          next unless exists $rules{$r};
+          push @tests, $r for (1 .. $hits);
+        }
+	my @hits = sort map { $rules{$_} } @tests;
 
 	# Count the number of occurrences and size of this pattern.
 	$spam_patterns{join (' ', @hits)}++;

Modified: spamassassin/trunk/masses/fp-fn-statistics
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/fp-fn-statistics?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/fp-fn-statistics (original)
+++ spamassassin/trunk/masses/fp-fn-statistics Tue Apr 20 07:35:51 2021
@@ -173,8 +173,16 @@ sub readlogs {
       next unless ($caught eq 'Y' || $caught eq '.') && $rules;
 
       # get tests, but ignore unknown tests and subrules
-      my @tests = grep { defined $scores{$_} && !$allrules{$_}->{issubrule} }
-	split(/,/, $rules);
+      my @tests;
+      foreach my $r (split(/,/, $rules)) {
+        my $hits = 1;
+        # Support compacted RULE(hitcount) format
+        if ($r =~ s/\((\d+)\)$//) {
+          $hits = $1;
+        }
+        next unless (defined $scores{$r} && !$allrules{$r}->{issubrule});
+        push @tests, $r for (1 .. $hits);
+      }
 
       # run handler
       log_line_count($isspam, $count, \@tests, $msgline);

Modified: spamassassin/trunk/masses/freqdiff
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/freqdiff?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/freqdiff (original)
+++ spamassassin/trunk/masses/freqdiff Tue Apr 20 07:35:51 2021
@@ -157,9 +157,15 @@ sub read_argv {
 	}
 	# "mass-check" format
 	elsif (/^[Y.]\s+-?\d+\s+\S+\s+(\S+)/) {
+	    my $test_str = $1;
 	    $type = 2;
-	    foreach (split(/,/, $1)) {
-		$freq{$_}++;
+	    foreach my $r (split(/,/, $test_str)) {
+	        my $hits = 1;
+	        # Support compacted RULE(hitcount) format
+	        if ($r =~ s/\((\d+)\)$//) {
+	          $hits = $1;
+	        }
+		$freq{$r} += $hits;
 	    }
 	}
 	# "scores" format

Modified: spamassassin/trunk/masses/hit-frequencies
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/hit-frequencies?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/hit-frequencies (original)
+++ spamassassin/trunk/masses/hit-frequencies Tue Apr 20 07:35:51 2021
@@ -730,6 +730,18 @@ sub readlogs {
       ';
     }
 
+    $evalstr .= '
+      my @rules;
+      foreach my $r (split(/,/, $rules)) {
+        my $hits = 1;
+        # Support compacted RULE(hitcount) format
+        if ($r =~ s/\((\d+)\)$//) {
+          $hits = $1;
+        }
+        push @rules, $r for (1 .. $hits);
+      }
+    ';
+
     my $hmapstr = '';
     my $smapstr = '';
     if ($isspam) {
@@ -747,7 +759,7 @@ sub readlogs {
       }
 
       $evalstr .= '
-        foreach my $r (split(/,/, $rules)) {
+        foreach my $r (@rules) {
           $freq_spam{$r}++ unless $freq_mesg{$r}++;
           '.$hmapstr.$smapstr.'
         }
@@ -768,7 +780,7 @@ sub readlogs {
       }
 
       $evalstr .= '
-        foreach my $r (split(/,/, $rules)) {
+        foreach my $r (@rules) {
           $freq_ham{$r}++ unless $freq_mesg{$r}++;
           '.$hmapstr.$smapstr.'
         }

Modified: spamassassin/trunk/masses/logdiff
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/logdiff?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/logdiff (original)
+++ spamassassin/trunk/masses/logdiff Tue Apr 20 07:35:51 2021
@@ -36,7 +36,15 @@ sub fixfile {
     }
 
     my ($scorepath, $rules, $meta) = ($1,$2,$3);
-    my @rules = split(/,/, $rules);
+    my @rules;
+    foreach my $r (split(/,/, $rules)) {
+      my $hits = 1;
+      # Support compacted RULE(hitcount) format
+      if ($r =~ s/\((\d+)\)$//) {
+        $hits = $1;
+      }
+      push @rules, $r for (1 .. $hits);
+    }
     @rules = sort grep {
         $_ !~ /^AWL$/
     } @rules;

Modified: spamassassin/trunk/masses/logs-to-c
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/logs-to-c?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/logs-to-c (original)
+++ spamassassin/trunk/masses/logs-to-c Tue Apr 20 07:35:51 2021
@@ -144,8 +144,16 @@ sub readlogs {
       (undef, $rules) = split(/ /, $restofline, 3);
 
       # get tests, but ignore unknown tests and subrules
-      my @tests = grep { defined $scores{$_} && !$allrules{$_}->{issubrule} }
-	split(/,/, $rules);
+      my @tests;
+      foreach my $r (split(/,/, $rules)) {
+        my $hits = 1;
+        # Support compacted RULE(hitcount) format
+        if ($r =~ s/\((\d+)\)$//) {
+          $hits = $1;
+        }
+        next unless (defined $scores{$r} && !$allrules{$r}->{issubrule});
+        push @tests, $r for (1 .. $hits);
+      }
 
       if ($isspam) {
         $num_spam++;

Modified: spamassassin/trunk/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mass-check?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/mass-check (original)
+++ spamassassin/trunk/masses/mass-check Tue Apr 20 07:35:51 2021
@@ -698,7 +698,8 @@ sub wanted {
       # Amavis X-Spam-Status rules include score and are enclosed in []
       # Amavis: [RULENAME=0.01,RULENAME_2=0.01]
       # Spamassassin: RULENAME,RULENAME_2
-      s/[\[\]]//, s/=.*// foreach (@previous);
+      # .. also support compact RULE(hits), no need to count hits here
+      s/[\[\]]//, s/=.*//, s/\(\d+\)$// foreach (@previous);
       $ma->{metadata}->{reuse_tests_hit} = { map {$_ => 1} @previous };
       $reusing = 1;
     }
@@ -826,11 +827,19 @@ sub wanted {
     # don't bother adjusting scores for reuse
     $score = $status->get_score();
     # list of tests hit
+    my %tests;
+    foreach ((
+      split(/,/, $status->get_names_of_tests_hit()),
+      split(/,/, $status->get_names_of_subtests_hit())
+    )) {
+      $tests{$_}++;
+    }
     my @tests;
-    push @tests, split(/,/, $status->get_names_of_tests_hit());
-    push @tests, split(/,/, $status->get_names_of_subtests_hit());
-
-    $tests = join(",", sort(@tests));
+    foreach (sort keys %tests) {
+      # Use compact RULE(hitcount) format
+      push @tests, $tests{$_} > 1 ? "$_($tests{$_})" : $_;
+    }
+    $tests = join(",", @tests);
     $extra = join(",", @extra);
   }
 

Modified: spamassassin/trunk/masses/mk-roc-graphs
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mk-roc-graphs?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/mk-roc-graphs (original)
+++ spamassassin/trunk/masses/mk-roc-graphs Tue Apr 20 07:35:51 2021
@@ -106,8 +106,16 @@ sub readlogs {
       next unless ($caught eq 'Y' || $caught eq '.') && $rules;
 
       # get tests, but ignore unknown tests and subrules
-      my @tests = grep { defined $scores{$_} && !$allrules{$_}->{issubrule} }
-	split(/,/, $rules);
+      my @tests;
+      foreach my $r (split(/,/, $rules)) {
+        my $hits = 1;
+        # Support compacted RULE(hitcount) format
+        if ($r =~ s/\((\d+)\)$//) {
+          $hits = $1;
+        }
+        next unless (defined $scores{$r} && !$allrules{$r}->{issubrule});
+        push @tests, $r for (1 .. $hits);
+      }
 
       # run handler
       $log_line->($isspam, $count, \@tests);

Modified: spamassassin/trunk/masses/overlap
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/overlap?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/overlap (original)
+++ spamassassin/trunk/masses/overlap Tue Apr 20 07:35:51 2021
@@ -105,8 +105,17 @@ sub read_file {
     while(<FILE>) {
 	next if /^#/;
 	if (/^[Y.]\s+-?\d+\s+\S+\s+(\S+)/) {
-	    my @tests = split(/,/, $1);
-	    @tests = grep { !/^T_/ } @tests if $opt_t;
+	    my $test_str = $1;
+	    my @tests;
+	    foreach my $r (split(/,/, $test_str)) {
+              next if ($opt_t && $r =~ /^T_/); # skip test rules
+              my $hits = 1;
+              # Support compacted RULE(hitcount) format
+              if ($r =~ s/\((\d+)\)$//) {
+                $hits = $1;
+              }
+              push @tests, $r for (1 .. $hits);
+            }
 	    my $i = 0;
 	    for my $a (@tests) {
 		$solo{$a}++;

Modified: spamassassin/trunk/masses/post-ga-analysis.pl
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/post-ga-analysis.pl?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/post-ga-analysis.pl (original)
+++ spamassassin/trunk/masses/post-ga-analysis.pl Tue Apr 20 07:35:51 2021
@@ -26,7 +26,16 @@ while(<SPAM>)
 {
     next if /^#/;
     /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)(\s+?:(?:bayes|time)=\S+)\s*?$/;
-    my @rules=split /,/,$1;
+    my $test_str = $1;
+    my @rules;
+    foreach my $r (split(/,/, $test_str)) {
+      my $hits = 1;
+      # Support compacted RULE(hitcount) format
+      if ($r =~ s/\((\d+)\)$//) {
+        $hits = $1;
+      }
+      push @rules, $r for (1 .. $hits);
+    }
     my $score = 0.0;
     foreach $rule (@rules)
     {
@@ -53,8 +62,17 @@ while(<NONSPAM>)
     next if /^#/;
     /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
     next unless defined($1);
+    my $test_str = $1;
+    my @rules;
+    foreach my $r (split(/,/, $test_str)) {
+      my $hits = 1;
+      # Support compacted RULE(hitcount) format
+      if ($r =~ s/\((\d+)\)$//) {
+        $hits = $1;
+      }
+      push @rules, $r for (1 .. $hits);
+    }
 
-    my @rules=split /,/,$1;
     my $score = 0.0;
     foreach $rule (@rules)
     {

Modified: spamassassin/trunk/masses/tenpass/compute-current-tcr
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/tenpass/compute-current-tcr?rev=1888999&r1=1888998&r2=1888999&view=diff
==============================================================================
--- spamassassin/trunk/masses/tenpass/compute-current-tcr (original)
+++ spamassassin/trunk/masses/tenpass/compute-current-tcr Tue Apr 20 07:35:51 2021
@@ -24,7 +24,16 @@ while(<SPAM>)
     next if /^\#/;
     /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
     my $testshit = $1; $testshit ||= '';
-    my @rules=split /,/,$testshit;
+
+    my @rules;
+    foreach my $r (split(/,/, $testshit)) {
+      my $hits = 1;
+      # Support compacted RULE(hitcount) format
+      if ($r =~ s/\((\d+)\)$//) {
+        $hits = $1;
+      }
+      push @rules, $r for (1 .. $hits);
+    }
 
     my $score = 0.0;
     foreach $rule (@rules) {
@@ -46,7 +55,16 @@ while(<HAM>)
     next if /^\#/;
     /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
     my $testshit = $1; $testshit ||= '';
-    my @rules=split /,/,$testshit;
+
+    my @rules;
+    foreach my $r (split(/,/, $testshit)) {
+      my $hits = 1;
+      # Support compacted RULE(hitcount) format
+      if ($r =~ s/\((\d+)\)$//) {
+        $hits = $1;
+      }
+      push @rules, $r for (1 .. $hits);
+    }
 
     my $score = 0.0;
     foreach $rule (@rules) {



mass-check log file format change

Posted by Henrik K <he...@hege.li>.
FYI, mass-check ham/spam.logs now use the compact hit logging by default, to
save space with those pesky __LOWER_E(157)'s..

In case you use your own log parser or such.



On Tue, Apr 20, 2021 at 07:35:52AM -0000, hege@apache.org wrote:
> Author: hege
> Date: Tue Apr 20 07:35:51 2021
> New Revision: 1888999
> 
> URL: http://svn.apache.org/viewvc?rev=1888999&view=rev
> Log:
> Support compacted/deduplicated RULE(hitcount) format for mass-check logs
> 
> Modified:
>     spamassassin/trunk/masses/evolve_metarule/preproc.pl
>     spamassassin/trunk/masses/fp-fn-statistics
>     spamassassin/trunk/masses/freqdiff
>     spamassassin/trunk/masses/hit-frequencies
>     spamassassin/trunk/masses/logdiff
>     spamassassin/trunk/masses/logs-to-c
>     spamassassin/trunk/masses/mass-check
>     spamassassin/trunk/masses/mk-roc-graphs
>     spamassassin/trunk/masses/overlap
>     spamassassin/trunk/masses/post-ga-analysis.pl
>     spamassassin/trunk/masses/tenpass/compute-current-tcr
> 
> Modified: spamassassin/trunk/masses/evolve_metarule/preproc.pl
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/evolve_metarule/preproc.pl?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/evolve_metarule/preproc.pl (original)
> +++ spamassassin/trunk/masses/evolve_metarule/preproc.pl Tue Apr 20 07:35:51 2021
> @@ -67,7 +67,17 @@ while (<HAM>) {
>  	my (undef,undef,undef, $test_str, undef) = split /\s/;
>  
>  	# Extract the relevant rule hits and sort them by column number.
> -	my @hits = sort map { $rules{$_} } grep { exists $rules{$_} } split /,/, $test_str;
> +	my @tests;
> +	foreach my $r (split(/,/, $test_str)) {
> +          my $hits = 1;
> +          # Support compacted RULE(hitcount) format
> +          if ($r =~ s/\((\d+)\)$//) {
> +            $hits = $1;
> +          }
> +          next unless exists $rules{$r};
> +          push @tests, $r for (1 .. $hits);
> +        }
> +	my @hits = sort map { $rules{$_} } @tests;
>  
>  	# Count the number of occurrences and size of this pattern.
>  	$ham_patterns{join (' ', @hits)}++;
> @@ -95,7 +105,17 @@ while (<SPAM>) {
>  	my (undef,undef,undef, $test_str, undef) = split /\s/;
>  
>  	# Extract the relevant rule hits and sort them by column number.
> -	my @hits = sort map { $rules{$_} } grep { exists $rules{$_} } split /,/, $test_str;
> +	my @tests;
> +	foreach my $r (split(/,/, $test_str)) {
> +          my $hits = 1;
> +          # Support compacted RULE(hitcount) format
> +          if ($r =~ s/\((\d+)\)$//) {
> +            $hits = $1;
> +          }
> +          next unless exists $rules{$r};
> +          push @tests, $r for (1 .. $hits);
> +        }
> +	my @hits = sort map { $rules{$_} } @tests;
>  
>  	# Count the number of occurrences and size of this pattern.
>  	$spam_patterns{join (' ', @hits)}++;
> 
> Modified: spamassassin/trunk/masses/fp-fn-statistics
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/fp-fn-statistics?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/fp-fn-statistics (original)
> +++ spamassassin/trunk/masses/fp-fn-statistics Tue Apr 20 07:35:51 2021
> @@ -173,8 +173,16 @@ sub readlogs {
>        next unless ($caught eq 'Y' || $caught eq '.') && $rules;
>  
>        # get tests, but ignore unknown tests and subrules
> -      my @tests = grep { defined $scores{$_} && !$allrules{$_}->{issubrule} }
> -	split(/,/, $rules);
> +      my @tests;
> +      foreach my $r (split(/,/, $rules)) {
> +        my $hits = 1;
> +        # Support compacted RULE(hitcount) format
> +        if ($r =~ s/\((\d+)\)$//) {
> +          $hits = $1;
> +        }
> +        next unless (defined $scores{$r} && !$allrules{$r}->{issubrule});
> +        push @tests, $r for (1 .. $hits);
> +      }
>  
>        # run handler
>        log_line_count($isspam, $count, \@tests, $msgline);
> 
> Modified: spamassassin/trunk/masses/freqdiff
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/freqdiff?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/freqdiff (original)
> +++ spamassassin/trunk/masses/freqdiff Tue Apr 20 07:35:51 2021
> @@ -157,9 +157,15 @@ sub read_argv {
>  	}
>  	# "mass-check" format
>  	elsif (/^[Y.]\s+-?\d+\s+\S+\s+(\S+)/) {
> +	    my $test_str = $1;
>  	    $type = 2;
> -	    foreach (split(/,/, $1)) {
> -		$freq{$_}++;
> +	    foreach my $r (split(/,/, $test_str)) {
> +	        my $hits = 1;
> +	        # Support compacted RULE(hitcount) format
> +	        if ($r =~ s/\((\d+)\)$//) {
> +	          $hits = $1;
> +	        }
> +		$freq{$r} += $hits;
>  	    }
>  	}
>  	# "scores" format
> 
> Modified: spamassassin/trunk/masses/hit-frequencies
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/hit-frequencies?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/hit-frequencies (original)
> +++ spamassassin/trunk/masses/hit-frequencies Tue Apr 20 07:35:51 2021
> @@ -730,6 +730,18 @@ sub readlogs {
>        ';
>      }
>  
> +    $evalstr .= '
> +      my @rules;
> +      foreach my $r (split(/,/, $rules)) {
> +        my $hits = 1;
> +        # Support compacted RULE(hitcount) format
> +        if ($r =~ s/\((\d+)\)$//) {
> +          $hits = $1;
> +        }
> +        push @rules, $r for (1 .. $hits);
> +      }
> +    ';
> +
>      my $hmapstr = '';
>      my $smapstr = '';
>      if ($isspam) {
> @@ -747,7 +759,7 @@ sub readlogs {
>        }
>  
>        $evalstr .= '
> -        foreach my $r (split(/,/, $rules)) {
> +        foreach my $r (@rules) {
>            $freq_spam{$r}++ unless $freq_mesg{$r}++;
>            '.$hmapstr.$smapstr.'
>          }
> @@ -768,7 +780,7 @@ sub readlogs {
>        }
>  
>        $evalstr .= '
> -        foreach my $r (split(/,/, $rules)) {
> +        foreach my $r (@rules) {
>            $freq_ham{$r}++ unless $freq_mesg{$r}++;
>            '.$hmapstr.$smapstr.'
>          }
> 
> Modified: spamassassin/trunk/masses/logdiff
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/logdiff?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/logdiff (original)
> +++ spamassassin/trunk/masses/logdiff Tue Apr 20 07:35:51 2021
> @@ -36,7 +36,15 @@ sub fixfile {
>      }
>  
>      my ($scorepath, $rules, $meta) = ($1,$2,$3);
> -    my @rules = split(/,/, $rules);
> +    my @rules;
> +    foreach my $r (split(/,/, $rules)) {
> +      my $hits = 1;
> +      # Support compacted RULE(hitcount) format
> +      if ($r =~ s/\((\d+)\)$//) {
> +        $hits = $1;
> +      }
> +      push @rules, $r for (1 .. $hits);
> +    }
>      @rules = sort grep {
>          $_ !~ /^AWL$/
>      } @rules;
> 
> Modified: spamassassin/trunk/masses/logs-to-c
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/logs-to-c?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/logs-to-c (original)
> +++ spamassassin/trunk/masses/logs-to-c Tue Apr 20 07:35:51 2021
> @@ -144,8 +144,16 @@ sub readlogs {
>        (undef, $rules) = split(/ /, $restofline, 3);
>  
>        # get tests, but ignore unknown tests and subrules
> -      my @tests = grep { defined $scores{$_} && !$allrules{$_}->{issubrule} }
> -	split(/,/, $rules);
> +      my @tests;
> +      foreach my $r (split(/,/, $rules)) {
> +        my $hits = 1;
> +        # Support compacted RULE(hitcount) format
> +        if ($r =~ s/\((\d+)\)$//) {
> +          $hits = $1;
> +        }
> +        next unless (defined $scores{$r} && !$allrules{$r}->{issubrule});
> +        push @tests, $r for (1 .. $hits);
> +      }
>  
>        if ($isspam) {
>          $num_spam++;
> 
> Modified: spamassassin/trunk/masses/mass-check
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mass-check?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/mass-check (original)
> +++ spamassassin/trunk/masses/mass-check Tue Apr 20 07:35:51 2021
> @@ -698,7 +698,8 @@ sub wanted {
>        # Amavis X-Spam-Status rules include score and are enclosed in []
>        # Amavis: [RULENAME=0.01,RULENAME_2=0.01]
>        # Spamassassin: RULENAME,RULENAME_2
> -      s/[\[\]]//, s/=.*// foreach (@previous);
> +      # .. also support compact RULE(hits), no need to count hits here
> +      s/[\[\]]//, s/=.*//, s/\(\d+\)$// foreach (@previous);
>        $ma->{metadata}->{reuse_tests_hit} = { map {$_ => 1} @previous };
>        $reusing = 1;
>      }
> @@ -826,11 +827,19 @@ sub wanted {
>      # don't bother adjusting scores for reuse
>      $score = $status->get_score();
>      # list of tests hit
> +    my %tests;
> +    foreach ((
> +      split(/,/, $status->get_names_of_tests_hit()),
> +      split(/,/, $status->get_names_of_subtests_hit())
> +    )) {
> +      $tests{$_}++;
> +    }
>      my @tests;
> -    push @tests, split(/,/, $status->get_names_of_tests_hit());
> -    push @tests, split(/,/, $status->get_names_of_subtests_hit());
> -
> -    $tests = join(",", sort(@tests));
> +    foreach (sort keys %tests) {
> +      # Use compact RULE(hitcount) format
> +      push @tests, $tests{$_} > 1 ? "$_($tests{$_})" : $_;
> +    }
> +    $tests = join(",", @tests);
>      $extra = join(",", @extra);
>    }
>  
> 
> Modified: spamassassin/trunk/masses/mk-roc-graphs
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/mk-roc-graphs?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/mk-roc-graphs (original)
> +++ spamassassin/trunk/masses/mk-roc-graphs Tue Apr 20 07:35:51 2021
> @@ -106,8 +106,16 @@ sub readlogs {
>        next unless ($caught eq 'Y' || $caught eq '.') && $rules;
>  
>        # get tests, but ignore unknown tests and subrules
> -      my @tests = grep { defined $scores{$_} && !$allrules{$_}->{issubrule} }
> -	split(/,/, $rules);
> +      my @tests;
> +      foreach my $r (split(/,/, $rules)) {
> +        my $hits = 1;
> +        # Support compacted RULE(hitcount) format
> +        if ($r =~ s/\((\d+)\)$//) {
> +          $hits = $1;
> +        }
> +        next unless (defined $scores{$r} && !$allrules{$r}->{issubrule});
> +        push @tests, $r for (1 .. $hits);
> +      }
>  
>        # run handler
>        $log_line->($isspam, $count, \@tests);
> 
> Modified: spamassassin/trunk/masses/overlap
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/overlap?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/overlap (original)
> +++ spamassassin/trunk/masses/overlap Tue Apr 20 07:35:51 2021
> @@ -105,8 +105,17 @@ sub read_file {
>      while(<FILE>) {
>  	next if /^#/;
>  	if (/^[Y.]\s+-?\d+\s+\S+\s+(\S+)/) {
> -	    my @tests = split(/,/, $1);
> -	    @tests = grep { !/^T_/ } @tests if $opt_t;
> +	    my $test_str = $1;
> +	    my @tests;
> +	    foreach my $r (split(/,/, $test_str)) {
> +              next if ($opt_t && $r =~ /^T_/); # skip test rules
> +              my $hits = 1;
> +              # Support compacted RULE(hitcount) format
> +              if ($r =~ s/\((\d+)\)$//) {
> +                $hits = $1;
> +              }
> +              push @tests, $r for (1 .. $hits);
> +            }
>  	    my $i = 0;
>  	    for my $a (@tests) {
>  		$solo{$a}++;
> 
> Modified: spamassassin/trunk/masses/post-ga-analysis.pl
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/post-ga-analysis.pl?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/post-ga-analysis.pl (original)
> +++ spamassassin/trunk/masses/post-ga-analysis.pl Tue Apr 20 07:35:51 2021
> @@ -26,7 +26,16 @@ while(<SPAM>)
>  {
>      next if /^#/;
>      /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)(\s+?:(?:bayes|time)=\S+)\s*?$/;
> -    my @rules=split /,/,$1;
> +    my $test_str = $1;
> +    my @rules;
> +    foreach my $r (split(/,/, $test_str)) {
> +      my $hits = 1;
> +      # Support compacted RULE(hitcount) format
> +      if ($r =~ s/\((\d+)\)$//) {
> +        $hits = $1;
> +      }
> +      push @rules, $r for (1 .. $hits);
> +    }
>      my $score = 0.0;
>      foreach $rule (@rules)
>      {
> @@ -53,8 +62,17 @@ while(<NONSPAM>)
>      next if /^#/;
>      /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
>      next unless defined($1);
> +    my $test_str = $1;
> +    my @rules;
> +    foreach my $r (split(/,/, $test_str)) {
> +      my $hits = 1;
> +      # Support compacted RULE(hitcount) format
> +      if ($r =~ s/\((\d+)\)$//) {
> +        $hits = $1;
> +      }
> +      push @rules, $r for (1 .. $hits);
> +    }
>  
> -    my @rules=split /,/,$1;
>      my $score = 0.0;
>      foreach $rule (@rules)
>      {
> 
> Modified: spamassassin/trunk/masses/tenpass/compute-current-tcr
> URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/tenpass/compute-current-tcr?rev=1888999&r1=1888998&r2=1888999&view=diff
> ==============================================================================
> --- spamassassin/trunk/masses/tenpass/compute-current-tcr (original)
> +++ spamassassin/trunk/masses/tenpass/compute-current-tcr Tue Apr 20 07:35:51 2021
> @@ -24,7 +24,16 @@ while(<SPAM>)
>      next if /^\#/;
>      /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
>      my $testshit = $1; $testshit ||= '';
> -    my @rules=split /,/,$testshit;
> +
> +    my @rules;
> +    foreach my $r (split(/,/, $testshit)) {
> +      my $hits = 1;
> +      # Support compacted RULE(hitcount) format
> +      if ($r =~ s/\((\d+)\)$//) {
> +        $hits = $1;
> +      }
> +      push @rules, $r for (1 .. $hits);
> +    }
>  
>      my $score = 0.0;
>      foreach $rule (@rules) {
> @@ -46,7 +55,16 @@ while(<HAM>)
>      next if /^\#/;
>      /.\s+[-0-9]*\s+[^\s]+\s+([^\s]*)\s*$/;
>      my $testshit = $1; $testshit ||= '';
> -    my @rules=split /,/,$testshit;
> +
> +    my @rules;
> +    foreach my $r (split(/,/, $testshit)) {
> +      my $hits = 1;
> +      # Support compacted RULE(hitcount) format
> +      if ($r =~ s/\((\d+)\)$//) {
> +        $hits = $1;
> +      }
> +      push @rules, $r for (1 .. $hits);
> +    }
>  
>      my $score = 0.0;
>      foreach $rule (@rules) {
>