You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by pa...@apache.org on 2004/05/03 04:05:13 UTC
svn commit: rev 10502 - in incubator/spamassassin/trunk/lib/Mail/SpamAssassin: . BayesStore
Author: parker
Date: Sun May 2 19:05:12 2004
New Revision: 10502
Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm
Log:
Implement tok_touch_all to update useful tokens in one call
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm Sun May 2 19:05:12 2004
@@ -1196,6 +1196,8 @@
my $tinfo_spammy = $permsgstatus->{bayes_token_info_spammy} = [];
my $tinfo_hammy = $permsgstatus->{bayes_token_info_hammy} = [];
+ my @touch_tokens;
+
for (sort {
abs($pw{$b} - 0.5) <=> abs($pw{$a} - 0.5)
} keys %pw)
@@ -1218,10 +1220,13 @@
push (@sorted, $pw);
# update the atime on this token, it proved useful
- $self->{store}->tok_touch ($_, $msgatime);
+ push(@touch_tokens, $_);
dbg ("bayes token '$raw_token' => $pw");
}
+
+ # we don't really care about the return value here
+ $self->{store}->tok_touch_all(\@touch_tokens, $msgatime);
if (!@sorted || (REQUIRE_SIGNIFICANT_TOKENS_TO_SCORE > 0 &&
$#sorted <= REQUIRE_SIGNIFICANT_TOKENS_TO_SCORE))
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm Sun May 2 19:05:12 2004
@@ -697,6 +697,22 @@
die "tok_touch: not implemanted\n";
}
+=head2 tok_touch_all
+
+public instance (Boolean) tok_touch_all (\@ $tokens,
+ Time $atime)
+
+Description:
+This method does a mass update of the given list of tokens C<$tokens>, if the existing token
+atime is < C<$atime>.
+
+=cut
+
+sub tok_touch_all {
+ my ($self, $tokens, $atime) = @_;
+ die "tok_touch_all: not implemanted\n";
+}
+
=head2 cleanup
public instance (Boolean) cleanup ()
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm Sun May 2 19:05:12 2004
@@ -961,6 +961,17 @@
$self->defer_update ("t $atime $encoded_tok");
}
+sub tok_touch_all {
+ my ($self, $tokens, $atime) = @_;
+
+ foreach my $token (@{$tokens}) {
+ # we can't store the SHA1 binary value in the journal to convert it
+ # to a printable value that can be converted back later
+ my $encoded_tok = unpack("H*", $token);
+ $self->defer_update ("t $atime $encoded_tok");
+ }
+}
+
sub defer_update {
my ($self, $str) = @_;
$self->{string_to_journal} .= "$str\n";
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm Sun May 2 19:05:12 2004
@@ -993,7 +993,67 @@
return 1;
}
+=head2 tok_touch_all
+
+public instance (Boolean) tok_touch (\@ $tokens
+ String $atime)
+
+Description:
+This method does a mass update of the given list of tokens C<$tokens>, if the existing token
+atime is < C<$atime>.
+
+The assumption is that the tokens already exist in the database.
+
+We should never be touching more than N_SIGNIFICANT_TOKENS, so we can make
+some assumptions about how to handle the data (ie no need to batch like we
+do in tok_get_all)
+
=cut
+
+sub tok_touch_all {
+ my ($self, $tokens, $atime) = @_;
+
+ return 0 unless (defined($self->{_dbh}));
+
+ my $sql = "UPDATE bayes_token SET atime = ? WHERE id = ? AND token IN (";
+
+ my @bindings = ($atime, $self->{_userid});
+ foreach my $token (@{$tokens}) {
+ $sql .= "?,";
+ push(@bindings, $token);
+ }
+ chop($sql); # get rid of trailing ,
+
+ $sql .= ") AND atime < ?";
+ push(@bindings, $atime);
+
+ my $rows = $self->{_dbh}->do($sql, undef, @bindings);
+
+ unless (defined($rows)) {
+ dbg("bayes: tok_touch: SQL Error: ".$self->{_dbh}->errstr());
+ return 0;
+ }
+
+ # if we didn't update a row then no need to update newest_token_age
+ return 1 if ($rows eq '0E0');
+
+ # need to check newest_token_age
+ # no need to check oldest_token_age since we would only update if the
+ # atime was newer than what is in the database
+ $sql = "UPDATE bayes_vars
+ SET newest_token_age = ?
+ WHERE id = ?
+ AND newest_token_age < ?";
+
+ $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
+
+ unless (defined($rows)) {
+ dbg("bayes: tok_touch: SQL Error: ".$self->{_dbh}->errstr());
+ return 0;
+ }
+
+ return 1;
+}
=head2 cleanup