You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jh...@apache.org on 2010/09/19 19:45:53 UTC

svn commit: r998704 - /spamassassin/trunk/masses/hit-frequencies

Author: jhardin
Date: Sun Sep 19 17:45:52 2010
New Revision: 998704

URL: http://svn.apache.org/viewvc?rev=998704&view=rev
Log:
Make the lower limit of overlap reporting dynamic based on the highest overlap (e.g. report overlaps at < 20% if the highest overlap is < 20%) to assist analysis of very-high-S/O rule ham hits.

Modified:
    spamassassin/trunk/masses/hit-frequencies

Modified: spamassassin/trunk/masses/hit-frequencies
URL: http://svn.apache.org/viewvc/spamassassin/trunk/masses/hit-frequencies?rev=998704&r1=998703&r2=998704&view=diff
==============================================================================
--- spamassassin/trunk/masses/hit-frequencies (original)
+++ spamassassin/trunk/masses/hit-frequencies Sun Sep 19 17:45:52 2010
@@ -851,10 +851,22 @@ sub _print_overlap_ratios {
   }
 
   my %other_type_rules = %$hash_other_type;
+  my $minratio = -1;
 
   foreach my $ratio (sort { $b <=> $a } keys %$hash1) {
     $ratio ||= 0;
-    last if ($ratio < 20);     # 20% cutoff
+    if ($minratio < 0) {
+        # Adapt reporting cutoff point to actual ratios.
+        # The first (highest) ratio encountered determines reporting cutoff point:
+        # > 20, cutoff is 20% (100%-20%, the vast majority of rules), else
+        # > 10, cutoff is 5% (20%-5%), else
+        # cutoff is 1% (10%-1%)
+        # This will help FP analysis of very-high-S/O rules
+        if ($ratio > 20) {$minratio = 20;}
+        elsif ($ratio > 10) {$minratio = 5;}
+        else {$minratio = 1;}
+    }
+    last if ($ratio < $minratio);       # don't report overlaps below cutoff
     my $rules = _prettify_overlap_rules($r1, $hash1->{$ratio});
     next if ($rules eq '');