You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jh...@apache.org on 2010/09/19 19:45:53 UTC
svn commit: r998704 - /spamassassin/trunk/masses/hit-frequencies
Author: jhardin
Date: Sun Sep 19 17:45:52 2010
New Revision: 998704
URL: http://svn.apache.org/viewvc?rev=998704&view=rev
Log:
Make the lower limit of overlap reporting dynamic based on the highest overlap (e.g. report overlaps at < 20% if the highest overlap is < 20%) to assist analysis of very-high-S/O rule ham hits.
Modified:
spamassassin/trunk/masses/hit-frequencies
Modified: spamassassin/trunk/masses/hit-frequencies
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/hit-frequencies?rev=998704&r1=998703&r2=998704&view=diff
==============================================================================
--- spamassassin/trunk/masses/hit-frequencies (original)
+++ spamassassin/trunk/masses/hit-frequencies Sun Sep 19 17:45:52 2010
@@ -851,10 +851,22 @@ sub _print_overlap_ratios {
}
my %other_type_rules = %$hash_other_type;
+ my $minratio = -1;
foreach my $ratio (sort { $b <=> $a } keys %$hash1) {
$ratio ||= 0;
- last if ($ratio < 20); # 20% cutoff
+ if ($minratio < 0) {
+ # Adapt reporting cutoff point to actual ratios.
+ # The first (highest) ratio encountered determines reporting cutoff point:
+ # > 20, cutoff is 20% (100%-20%, the vast majority of rules), else
+ # > 10, cutoff is 5% (20%-5%), else
+ # cutoff is 1% (10%-1%)
+ # This will help FP analysis of very-high-S/O rules
+ if ($ratio > 20) {$minratio = 20;}
+ elsif ($ratio > 10) {$minratio = 5;}
+ else {$minratio = 1;}
+ }
+ last if ($ratio < $minratio); # don't report overlaps below cutoff
my $rules = _prettify_overlap_rules($r1, $hash1->{$ratio});
next if ($rules eq '');