You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by pa...@apache.org on 2004/05/03 04:05:13 UTC

svn commit: rev 10502 - in incubator/spamassassin/trunk/lib/Mail/SpamAssassin: . BayesStore

Author: parker
Date: Sun May  2 19:05:12 2004
New Revision: 10502

Modified:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm
Log:
Implement tok_touch_all to update useful tokens in one call

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm	Sun May  2 19:05:12 2004
@@ -1196,6 +1196,8 @@
   my $tinfo_spammy = $permsgstatus->{bayes_token_info_spammy} = [];
   my $tinfo_hammy = $permsgstatus->{bayes_token_info_hammy} = [];
 
+  my @touch_tokens;
+
   for (sort {
               abs($pw{$b} - 0.5) <=> abs($pw{$a} - 0.5)
             } keys %pw)
@@ -1218,10 +1220,13 @@
     push (@sorted, $pw);
 
     # update the atime on this token, it proved useful
-    $self->{store}->tok_touch ($_, $msgatime);
+    push(@touch_tokens, $_);
 
     dbg ("bayes token '$raw_token' => $pw");
   }
+
+  # we don't really care about the return value here
+  $self->{store}->tok_touch_all(\@touch_tokens, $msgatime);
 
   if (!@sorted || (REQUIRE_SIGNIFICANT_TOKENS_TO_SCORE > 0 && 
 	$#sorted <= REQUIRE_SIGNIFICANT_TOKENS_TO_SCORE))

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm	Sun May  2 19:05:12 2004
@@ -697,6 +697,22 @@
   die "tok_touch: not implemanted\n";
 }
 
+=head2 tok_touch_all
+
+public instance (Boolean) tok_touch_all (\@ $tokens,
+                                         Time $atime)
+
+Description:
+This method does a mass update of the given list of tokens C<$tokens>, if the existing token
+atime is < C<$atime>.
+
+=cut
+
+sub tok_touch_all {
+  my ($self, $tokens, $atime) = @_;
+  die "tok_touch_all: not implemanted\n";
+}
+
 =head2 cleanup
 
 public instance (Boolean) cleanup ()

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm	Sun May  2 19:05:12 2004
@@ -961,6 +961,17 @@
   $self->defer_update ("t $atime $encoded_tok");
 }
 
+sub tok_touch_all {
+  my ($self, $tokens, $atime) = @_;
+
+  foreach my $token (@{$tokens}) {
+    # we can't store the SHA1 binary value in the journal to convert it
+    # to a printable value that can be converted back later
+    my $encoded_tok = unpack("H*", $token);
+    $self->defer_update ("t $atime $encoded_tok");
+  }
+}
+
 sub defer_update {
   my ($self, $str) = @_;
   $self->{string_to_journal} .= "$str\n";

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm	Sun May  2 19:05:12 2004
@@ -993,7 +993,67 @@
   return 1;
 }
 
+=head2 tok_touch_all
+
+public instance (Boolean) tok_touch (\@ $tokens
+                                     String $atime)
+
+Description:
+This method does a mass update of the given list of tokens C<$tokens>, if the existing token
+atime is < C<$atime>.
+
+The assumption is that the tokens already exist in the database.
+
+We should never be touching more than N_SIGNIFICANT_TOKENS, so we can make
+some assumptions about how to handle the data (ie no need to batch like we
+do in tok_get_all)
+
 =cut
+
+sub tok_touch_all {
+  my ($self, $tokens, $atime) = @_;
+
+  return 0 unless (defined($self->{_dbh}));
+
+  my $sql = "UPDATE bayes_token SET atime = ? WHERE id = ? AND token IN (";
+
+  my @bindings = ($atime, $self->{_userid});
+  foreach my $token (@{$tokens}) {
+    $sql .= "?,";
+    push(@bindings, $token);
+  }
+  chop($sql); # get rid of trailing ,
+
+  $sql .= ") AND atime < ?";
+  push(@bindings, $atime);
+
+  my $rows = $self->{_dbh}->do($sql, undef, @bindings);
+
+  unless (defined($rows)) {
+    dbg("bayes: tok_touch: SQL Error: ".$self->{_dbh}->errstr());
+    return 0;
+  }
+
+  # if we didn't update a row then no need to update newest_token_age
+  return 1 if ($rows eq '0E0');
+
+  # need to check newest_token_age
+  # no need to check oldest_token_age since we would only update if the
+  # atime was newer than what is in the database
+  $sql = "UPDATE bayes_vars
+             SET newest_token_age = ?
+           WHERE id = ?
+             AND newest_token_age < ?";
+
+  $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
+
+  unless (defined($rows)) {
+    dbg("bayes: tok_touch: SQL Error: ".$self->{_dbh}->errstr());
+    return 0;
+  }
+
+  return 1;
+}
 
 =head2 cleanup