You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2004/05/06 00:09:55 UTC

svn commit: rev 10534 - incubator/spamassassin/trunk/masses

Author: quinlan
Date: Wed May  5 15:09:54 2004
New Revision: 10534

Modified:
   incubator/spamassassin/trunk/masses/hit-frequencies
Log:
need a normalization factor since number of ranks will differ between
wanted and unwanted


Modified: incubator/spamassassin/trunk/masses/hit-frequencies
==============================================================================
--- incubator/spamassassin/trunk/masses/hit-frequencies	(original)
+++ incubator/spamassassin/trunk/masses/hit-frequencies	Wed May  5 15:09:54 2004
@@ -136,6 +136,8 @@
 # variables for wanted/unwanted RANK
 my %wanted;
 my %unwanted;
+my %wranks;
+my %uranks;
 
 foreach my $test (keys %freq_spam, keys %freq_ham) {
   next unless (exists $rules{$test});           # only valid tests
@@ -214,6 +216,9 @@
     # basic wanted/unwanted ranking
     $wanted{$test} = $isnice ? $fn : $fs;
     $unwanted{$test} = $isnice ? $fs : $fn;
+    # count number of ranks of each type
+    $wranks{$wanted{$test}} = 1;
+    $uranks{$unwanted{$test}} = 1;
   }
 }
 
@@ -222,22 +227,22 @@
   my @wanted = sort { $wanted{$a} <=> $wanted{$b} } keys %wanted;
   my @unwanted = sort { $unwanted{$b} <=> $unwanted{$a} } keys %wanted;
 
-  my $position;
-  my $last;
-
-  $position = 0;
-  $last = undef;
+  # first half of ranking is the wanted rank
+  my $position = 0;
+  my $last = undef;
   for my $test (@wanted) {
     $position++ if defined $last && $last != $wanted{$test};
     $ranking{$test} += $position;
     $last = $wanted{$test}
   }
 
+  # second half of ranking is the unwanted rank
+  my $normalize = (scalar keys %wranks) / (scalar keys %uranks);
   $position = 0;
   $last = undef;
   for my $test (@unwanted) {
     $position++ if defined $last && $last != $unwanted{$test};
-    $ranking{$test} += $position;
+    $ranking{$test} += ($position * $normalize);
     $last = $unwanted{$test};
     $rank_hi = $ranking{$test} if ($ranking{$test} > $rank_hi);
     $rank_lo = $ranking{$test} if ($ranking{$test} < $rank_lo);