You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by fe...@apache.org on 2004/04/26 00:38:15 UTC
svn commit: rev 10265 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin
Author: felicity
Date: Sun Apr 25 15:38:14 2004
New Revision: 10265
Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
Log:
make expiry percent, period, and max exponent config variables instead of strictly hardcoded values
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm Sun Apr 25 15:38:14 2004
@@ -103,6 +103,9 @@
# database below this number of entries. 100k entries is roughly
# equivalent to a 5Mb database file.
$self->{expiry_max_db_size} = $conf->{bayes_expiry_max_db_size};
+ $self->{expiry_pct} = $conf->{bayes_expiry_pct};
+ $self->{expiry_period} = $conf->{bayes_expiry_period};
+ $self->{expiry_max_exponent} = $conf->{bayes_expiry_max_exponent};
$self->{bayes}->read_db_configs();
}
@@ -251,8 +254,8 @@
}
# How many tokens do we want to keep?
- my $goal_reduction = int($self->{expiry_max_db_size} * 0.75); # expire to 75% of max_db
- dbg("bayes: expiry check keep size, 75% of max: $goal_reduction");
+ my $goal_reduction = int($self->{expiry_max_db_size} * $self->{expiry_pct});
+ dbg("bayes: expiry check keep size, ".$self->{expiry_pct}*100."% of max: $goal_reduction");
# Make sure we keep at least 100000 tokens in the DB
if ( $goal_reduction < 100000 ) {
$goal_reduction = 100000;
@@ -284,7 +287,7 @@
# count and the current goal removal count.
my $ratio = ($vars[9] == 0 || $vars[9] > $goal_reduction) ? $vars[9]/$goal_reduction : $goal_reduction/$vars[9];
- dbg("bayes: First pass? Current: ".time().", Last: ".$vars[4].", atime: ".$vars[8].", count: ".$vars[9].", newdelta: $newdelta, ratio: $ratio");
+ dbg("bayes: First pass? Current: ".time().", Last: ".$vars[4].", atime: ".$vars[8].", count: ".$vars[9].", newdelta: $newdelta, ratio: $ratio, period: ".$self->{expiry_period});
## ESTIMATION PHASE
#
@@ -292,21 +295,23 @@
#
# - last expire was more than 30 days ago
# assume mail flow stays roughly the same month to month, recompute if it's > 1 month
- # - last atime delta was under 12hrs
+ # - last atime delta was under expiry period
# if we're expiring often max_db_size should go up, but let's recompute just to check
# - last reduction count was < 1000 tokens
# ditto
- # - new estimated atime delta is under 12hrs
+ # - new estimated atime delta is under expiry period
# ditto
# - difference of last reduction to current goal reduction is > 50%
# if the two values are out of balance, estimating atime is going to be funky, recompute
#
- if ( (time() - $vars[4] > 86400*30) || ($vars[8] < 43200) || ($vars[9] < 1000)
- || ($newdelta < 43200) || ($ratio > 1.5) ) {
+ if ( (time() - $vars[4] > 86400*30) || ($vars[8] < $self->{expiry_period}) || ($vars[9] < 1000)
+ || ($newdelta < $self->{expiry_period}) || ($ratio > 1.5) ) {
dbg("bayes: Can't use estimation method for expiry, something fishy, calculating optimal atime delta (first pass)");
- my $start = 43200; # exponential search starting at ...? 1/2 day, 1, 2, 4, 8, 16, ...
- my $max_expire_mult = 512; # $max_expire_mult * $start = max expire time (256 days), power of 2.
+ my $start = $self->{expiry_period}; # exponential search starting at ...? 1/2 day, 1, 2, 4, 8, 16, ...
+ my $max_expire_mult = 2**$self->{expiry_max_exponent}; # $max_expire_mult * $start = max expire time (256 days), power of 2.
+
+ dbg("bayes: expiry max exponent: ".$self->{expiry_max_exponent});
my %delta = $self->calculate_expire_delta($vars[10], $start, $max_expire_mult);
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Sun Apr 25 15:38:14 2004
@@ -241,6 +241,9 @@
$self->{bayes_use_hapaxes} = 1;
$self->{bayes_use_chi2_combining} = 1;
$self->{bayes_expiry_max_db_size} = 150000;
+ $self->{bayes_expiry_pct} = 0.75;
+ $self->{bayes_expiry_period} = 43200;
+ $self->{bayes_expiry_max_exponent} = 9;
$self->{bayes_auto_expire} = 1;
$self->{bayes_journal_max_size} = 102400;
$self->{bayes_ignore_headers} = [ ];