You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2004/04/29 08:57:51 UTC
svn commit: rev 10420 - incubator/spamassassin/trunk/masses
Author: jm
Date: Wed Apr 28 23:57:50 2004
New Revision: 10420
Modified:
incubator/spamassassin/trunk/masses/hit-frequencies
Log:
some leftovers of dev code
Modified: incubator/spamassassin/trunk/masses/hit-frequencies
==============================================================================
--- incubator/spamassassin/trunk/masses/hit-frequencies (original)
+++ incubator/spamassassin/trunk/masses/hit-frequencies Wed Apr 28 23:57:50 2004
@@ -27,7 +27,7 @@
sub usage {
die "hit-frequencies [-c rules dir] [-f] [-m RE] [-M RE] [-X RE] [-l LC]
- [-s SC] [-a] [-p] [-x] [-g] [spam log] [ham log]
+ [-s SC] [-a] [-p] [-x] [spam log] [ham log]
-c p use p as the rules directory
-f falses. count only false-negative or false-positive matches
@@ -40,7 +40,6 @@
-a display all tests
-p percentages. implies -x
-x extended output, with S/O ratio and scores
- -g use Information Gain ranking
-s SC which scoreset to use
options -l and -L are mutually exclusive.
@@ -68,7 +67,6 @@
my $num_spam = 0;
my $num_ham = 0;
my %ranking = ();
-my %infogain = ();
my $ok_lang = '';
readscores($cffile);
@@ -208,22 +206,24 @@
my $safe_px0cch = ($px0cch || 0.0000001);
my $safe_px1ccs = ($px1ccs || 0.0000001);
my $safe_px1cch = ($px1cch || 0.0000001);
- my $infogain = ( $px0ccs * log2($safe_px0ccs / $safe_px0_dot_pccs) ) +
+ $rank = ( $px0ccs * log2($safe_px0ccs / $safe_px0_dot_pccs) ) +
( $px0cch * log2($safe_px0cch / $safe_px0_dot_pcch) ) +
( $px1ccs * log2($safe_px1ccs / $safe_px1_dot_pccs) ) +
( $px1cch * log2($safe_px1cch / $safe_px1_dot_pcch) );
- $ranking{$test} = $infogain;
+ $ranking{$test} = $rank;
+ $rank_hi = $rank if ($rank > $rank_hi);
+ $rank_lo = $rank if ($rank < $rank_lo);
}
}
-# {
-# # now normalise the rankings to [0, 1]
-# $rank_hi -= $rank_lo;
-# foreach $test (@tests) {
-# $ranking{$test} = $rank_hi == 0 ? 0.001 : ($ranking{$test} - $rank_lo) / $rank_hi;
-# }
-# }
+{
+ # now normalise the rankings to [0, 1]
+ $rank_hi -= $rank_lo;
+ foreach $test (@tests) {
+ $ranking{$test} = $rank_hi == 0 ? 0.001 : ($ranking{$test} - $rank_lo) / ($rank_hi);
+ }
+}
foreach $test (sort { $ranking{$b} <=> $ranking{$a} } @tests) {
next unless (exists $rules{$test}); # only valid tests