You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@spamassassin.apache.org by Justin Mason <jm...@jmason.org> on 2008/12/26 16:53:01 UTC

Re: Bayes-SQL improvements

so you're basically sharding the bayes_token tables... could you open
a bug on the SpamAssassin bugzilla with this patch?  thanks!

--j.

Thorsten Meinl writes:
>--nextPart9059977.pG6Pp7397c
>Content-Type: multipart/mixed;
>  boundary="Boundary-01=_oj8UJ9IS8rBJw6M"
>Content-Transfer-Encoding: 7bit
>Content-Disposition: inline
>
>--Boundary-01=_oj8UJ9IS8rBJw6M
>Content-Type: text/plain;
>  charset="iso-8859-15"
>Content-Transfer-Encoding: quoted-printable
>Content-Disposition: inline
>
>Hi all,
>
>We have an installation of Spamassassin that serves about 2000 users. Their=
>=20
>Bayes-data is stored inside a Postgres database which is of fairly large=20
>size, the bayes_token table holds about 100 million rows. This often leads =
>to=20
>high loads on the machine, especially if bayes_expire is running. Therefore=
> I=20
>wrote a patch to Spamassassin (3.2.4) that splits the bayes_token table int=
>o=20
>several tables. Which user is contained in which table is looked up from=20
>bayes_vars which has an additional column "token_table". New user are=20
>automatically assigned to one table by using their name's CRC32 checksum=20
>(could have been any other but this one was easiest as it gives an int whic=
>h=20
>can be used to derive a simple number for the token table). This patch lead=
>=20
>to considerably lower loads on the machine and bayes_expire now only takes=
>=20
>about 5 hours instead of 20 before when using 10 instead of 1 table.
>The patch is attached, if the developers feel that it is worth integrating=
>=20
>into the distribution, they are free to do so.
>
>Cheers,
>
>Thorsten
>
>--Boundary-01=_oj8UJ9IS8rBJw6M
>Content-Type: text/x-diff;
>  charset="iso-8859-15";
>  name="spamassassin-3.2.4-r1-token-table.patch"
>Content-Transfer-Encoding: quoted-printable
>Content-Disposition: attachment;
>	filename="spamassassin-3.2.4-r1-token-table.patch"
>
>diff -ur Mail-SpamAssassin-3.2.4.orig/lib/Mail/SpamAssassin/BayesStore/MySQ=
>L.pm Mail-SpamAssassin-3.2.4/lib/Mail/SpamAssassin/BayesStore/MySQL.pm
>=2D-- Mail-SpamAssassin-3.2.4.orig/lib/Mail/SpamAssassin/BayesStore/MySQL.p=
>m	2008-01-05 22:10:35.000000000 +0100
>+++ Mail-SpamAssassin-3.2.4/lib/Mail/SpamAssassin/BayesStore/MySQL.pm	2008-=
>12-21 20:29:57.265157170 +0100
>@@ -75,7 +75,7 @@
>   my $too_old =3D $vars[10] - $newdelta; # tooold =3D newest - delta
>=20
>   # if token atime > newest, reset to newest ...
>=2D  my $sql =3D "UPDATE bayes_token SET atime =3D ?
>+  my $sql =3D "UPDATE $$self{_token_table} SET atime =3D ?
>               WHERE id  =3D ?
>                 AND atime > ?";
>=20
>@@ -89,7 +89,7 @@
>   }
>=20
>   # Check to make sure the expire won't remove too many tokens
>=2D  $sql =3D "SELECT count(token) FROM bayes_token
>+  $sql =3D "SELECT count(token) FROM $$self{_token_table}
>            WHERE id =3D ?
>              AND atime < ?";
>=20
>@@ -124,7 +124,7 @@
>   }
>   else {
>     # Do the expire
>=2D    $sql =3D "DELETE from bayes_token
>+    $sql =3D "DELETE from $$self{_token_table}
>              WHERE id =3D ?
>                AND atime < ?";
>=20
>@@ -146,7 +146,7 @@
>                                 last_atime_delta =3D ?,
>                                 last_expire_reduce =3D ?,
>                                 oldest_token_age =3D (SELECT min(atime)
>=2D                                                      FROM bayes_token
>+                                                      FROM $$self{_token_t=
>able}
>                                                      WHERE id =3D ?)
> 				WHERE id =3D ?";
>=20
>@@ -415,7 +415,7 @@
>=20
>   # shortcut, will only update atime for the token if the atime is less th=
>an
>   # what we are updating to
>=2D  my $sql =3D "UPDATE bayes_token
>+  my $sql =3D "UPDATE $$self{_token_table}
>                 SET atime =3D ?
>               WHERE id =3D ?
>                 AND token =3D ?
>@@ -477,7 +477,7 @@
>=20
>   return 1 unless (scalar(@{$tokens}));
>=20
>=2D  my $sql =3D "UPDATE bayes_token SET atime =3D ? WHERE id =3D ? AND tok=
>en IN (";
>+  my $sql =3D "UPDATE $$self{_token_table} SET atime =3D ? WHERE id =3D ? =
>AND token IN (";
>=20
>   my @bindings =3D ($atime, $self->{_userid});
>   foreach my $token (@{$tokens}) {
>@@ -538,7 +538,7 @@
>   # cleanup was needed, go ahead and clear the cleanup flag
>   $self->{needs_cleanup} =3D 0;
>=20
>=2D  my $sql =3D "DELETE from bayes_token
>+  my $sql =3D "DELETE from $$self{_token_table}
>               WHERE id =3D ?
>                 AND spam_count <=3D 0
>                 AND ham_count <=3D 0";
>@@ -616,7 +616,7 @@
>     return 0;
>   }
>=20
>=2D  $rows =3D $self->{_dbh}->do("DELETE FROM bayes_token WHERE id =3D ?",
>+  $rows =3D $self->{_dbh}->do("DELETE FROM $$self{_token_table} WHERE id =
>=3D ?",
> 			    undef,
> 			    $self->{_userid});
>   unless (defined($rows)) {
>@@ -785,7 +785,7 @@
>     # counts may have both reached 0
>     $self->{needs_cleanup} =3D 1;
>=20
>=2D    my $sql =3D "UPDATE bayes_token SET spam_count =3D GREATEST(spam_cou=
>nt + ?, 0),
>+    my $sql =3D "UPDATE $$self{_token_table} SET spam_count =3D GREATEST(s=
>pam_count + ?, 0),
>                                       ham_count =3D GREATEST(ham_count + ?=
>, 0)
>                 WHERE id =3D ?
>                   AND token =3D ?";
>@@ -810,7 +810,7 @@
>     }
>   }
>   else {
>=2D    my $sql =3D "INSERT INTO bayes_token
>+    my $sql =3D "INSERT INTO $$self{_token_table}
>                (id, token, spam_count, ham_count, atime)
>                VALUES (?,?,?,?,?)
>                ON DUPLICATE KEY UPDATE spam_count =3D GREATEST(spam_count =
>+ ?, 0),
>@@ -918,7 +918,7 @@
>     # counts may have both reached 0
>     $self->{needs_cleanup} =3D 1;
>=20
>=2D    my $sql =3D "UPDATE bayes_token SET spam_count =3D GREATEST(spam_cou=
>nt + ?, 0),
>+    my $sql =3D "UPDATE $$self{_token_table} SET spam_count =3D GREATEST(s=
>pam_count + ?, 0),
>                                       ham_count =3D GREATEST(ham_count + ?=
>, 0)
>                 WHERE id =3D ?
>                   AND token =3D ?";
>@@ -952,7 +952,7 @@
>     }
>   }
>   else {
>=2D    my $sql =3D "INSERT INTO bayes_token
>+    my $sql =3D "INSERT INTO $$self{_token_table}
>                (id, token, spam_count, ham_count, atime)
>                VALUES (?,?,?,?,?)
>                ON DUPLICATE KEY UPDATE spam_count =3D GREATEST(spam_count =
>+ ?, 0),
>diff -ur Mail-SpamAssassin-3.2.4.orig/lib/Mail/SpamAssassin/BayesStore/PgSQ=
>L.pm Mail-SpamAssassin-3.2.4/lib/Mail/SpamAssassin/BayesStore/PgSQL.pm
>=2D-- Mail-SpamAssassin-3.2.4.orig/lib/Mail/SpamAssassin/BayesStore/PgSQL.p=
>m	2008-01-05 22:10:35.000000000 +0100
>+++ Mail-SpamAssassin-3.2.4/lib/Mail/SpamAssassin/BayesStore/PgSQL.pm	2008-=
>12-21 20:29:57.275157321 +0100
>@@ -77,7 +77,7 @@
>   my $too_old =3D $vars[10] - $newdelta; # tooold =3D newest - delta
>=20
>   # if token atime > newest, reset to newest ...
>=2D  my $sql =3D "UPDATE bayes_token SET atime =3D ?
>+  my $sql =3D "UPDATE $$self{_token_table} SET atime =3D ?
>               WHERE id  =3D ?
>                 AND atime > ?";
>=20
>@@ -91,7 +91,7 @@
>   }
>=20
>   # Check to make sure the expire won't remove too many tokens
>=2D  $sql =3D "SELECT count(token) FROM bayes_token
>+  $sql =3D "SELECT count(token) FROM $$self{_token_table}
>            WHERE id =3D ?
>              AND atime < ?";
>=20
>@@ -126,7 +126,7 @@
>   }
>   else {
>     # Do the expire
>=2D    $sql =3D "DELETE from bayes_token
>+    $sql =3D "DELETE from $$self{_token_table}
>              WHERE id =3D ?
>                AND atime < ?";
>=20
>@@ -148,7 +148,7 @@
>                                 last_atime_delta =3D ?,
>                                 last_expire_reduce =3D ?,
>                                 oldest_token_age =3D (SELECT min(atime)
>=2D                                                      FROM bayes_token
>+                                                      FROM $$self{_token_t=
>able}
>                                                      WHERE id =3D ?)
> 				WHERE id =3D ?";
>=20
>@@ -358,7 +358,7 @@
>   return (0,0,0) unless (defined($self->{_dbh}));
>=20
>   my $sql =3D "SELECT spam_count, ham_count, atime
>=2D               FROM bayes_token
>+               FROM $$self{_token_table}
>               WHERE id =3D ?
>                 AND token =3D ?";
>=20
>@@ -415,7 +415,7 @@
>   my $bunch_end;
>=20
>   my $multi_sql =3D "SELECT token, spam_count, ham_count, atime
>=2D                     FROM bayes_token
>+                     FROM $$self{_token_table}
>                     WHERE id =3D ?
>                       AND token IN ";
>=20
>@@ -558,7 +558,7 @@
>=20
>   # shortcut, will only update atime for the token if the atime is less th=
>an
>   # what we are updating to
>=2D  my $sql =3D "UPDATE bayes_token
>+  my $sql =3D "UPDATE $$self{_token_table}
>                 SET atime =3D ?
>               WHERE id =3D ?
>                 AND token =3D ?
>@@ -644,7 +644,7 @@
>=20
>   return 1 unless (scalar(@{$tokens}));
>=20
>=2D  my $sql =3D "UPDATE bayes_token SET atime =3D ? WHERE id =3D ? AND tok=
>en IN (";
>+  my $sql =3D "UPDATE $$self{_token_table} SET atime =3D ? WHERE id =3D ? =
>AND token IN (";
>=20
>   my @bindings;
>   foreach my $token (sort @{$tokens}) {
>@@ -770,7 +770,7 @@
>   # cleanup was needed, go ahead and clear the cleanup flag
>   $self->{needs_cleanup} =3D 0;
>=20
>=2D  my $sql =3D "DELETE from bayes_token
>+  my $sql =3D "DELETE from $$self{_token_table}
>               WHERE id =3D ?
>                 AND spam_count <=3D 0
>                 AND ham_count <=3D 0";
>@@ -849,7 +849,7 @@
>     return 0;
>   }
>=20
>=2D  $rows =3D $self->{_dbh}->do("DELETE FROM bayes_token WHERE id =3D ?",
>+  $rows =3D $self->{_dbh}->do("DELETE FROM $$self{_token_table} WHERE id =
>=3D ?",
> 			    undef,
> 			    $self->{_userid});
>   unless (defined($rows)) {
>diff -ur Mail-SpamAssassin-3.2.4.orig/lib/Mail/SpamAssassin/BayesStore/SQL.=
>pm Mail-SpamAssassin-3.2.4/lib/Mail/SpamAssassin/BayesStore/SQL.pm
>=2D-- Mail-SpamAssassin-3.2.4.orig/lib/Mail/SpamAssassin/BayesStore/SQL.pm	=
>2008-01-05 22:10:35.000000000 +0100
>+++ Mail-SpamAssassin-3.2.4/lib/Mail/SpamAssassin/BayesStore/SQL.pm	2008-12=
>=2D21 20:29:57.275157321 +0100
>@@ -36,6 +36,7 @@
> use Mail::SpamAssassin::BayesStore;
> use Mail::SpamAssassin::Logger;
> use Digest::SHA1 qw(sha1);
>+use String::CRC32;
>=20
> use vars qw( @ISA );
>=20
>@@ -238,7 +239,7 @@
>   return %delta unless (defined($self->{_dbh}));
>=20
>   my $sql =3D "SELECT count(*)
>=2D               FROM bayes_token
>+               FROM $$self{_token_table}
>               WHERE id =3D ?
>                 AND atime < ?";
>=20
>@@ -290,7 +291,7 @@
>   my $too_old =3D $vars[10] - $newdelta; # tooold =3D newest - delta
>=20
>   # if token atime > newest, reset to newest ...
>=2D  my $sql =3D "UPDATE bayes_token SET atime =3D ?
>+  my $sql =3D "UPDATE $$self{_token_table} SET atime =3D ?
>               WHERE id  =3D ?
>                 AND atime > ?";
>=20
>@@ -303,7 +304,7 @@
>   }
>=20
>   # Check to make sure the expire won't remove too many tokens
>=2D  $sql =3D "SELECT count(token) FROM bayes_token
>+  $sql =3D "SELECT count(token) FROM $$self{_token_table}=20
>            WHERE id =3D ?
>              AND atime < ?";
>=20
>@@ -336,7 +337,7 @@
>   }
>   else {
>     # Do the expire
>=2D    $sql =3D "DELETE from bayes_token
>+    $sql =3D "DELETE from $$self{_token_table}=20
>              WHERE id =3D ?
>                AND atime < ?";
>=20
>@@ -628,7 +629,7 @@
>   my $token_select =3D $self->_token_select_string();
>=20
>   my $sql =3D "SELECT $token_select, spam_count, ham_count, atime
>=2D               FROM bayes_token
>+               FROM $$self{_token_table}=20
>               WHERE id =3D ?
>                 AND (spam_count > 0 OR ham_count > 0)";
>=20
>@@ -806,7 +807,7 @@
>   return (0,0,0) unless (defined($self->{_dbh}));
>=20
>   my $sql =3D "SELECT spam_count, ham_count, atime
>=2D               FROM bayes_token
>+               FROM $$self{_token_table}=20
>               WHERE id =3D ?
>                 AND token =3D ?";
>=20
>@@ -861,7 +862,7 @@
>   my $token_select =3D $self->_token_select_string();
>=20
>   my $multi_sql =3D "SELECT $token_select, spam_count, ham_count, atime
>=2D                     FROM bayes_token
>+                     FROM $$self{_token_table}
>                     WHERE id =3D ?
>                       AND token IN ";
>=20
>@@ -1058,7 +1059,7 @@
>=20
>   # shortcut, will only update atime for the token if the atime is less th=
>an
>   # what we are updating to
>=2D  my $sql =3D "UPDATE bayes_token
>+  my $sql =3D "UPDATE $$self{_token_table}=20
>                 SET atime =3D ?
>               WHERE id =3D ?
>                 AND token =3D ?
>@@ -1117,7 +1118,7 @@
>=20
>   return 1 unless (scalar(@{$tokens}));
>=20
>=2D  my $sql =3D "UPDATE bayes_token SET atime =3D ? WHERE id =3D ? AND tok=
>en IN (";
>+  my $sql =3D "UPDATE $$self{_token_table} SET atime =3D ? WHERE id =3D ? =
>AND token IN (";
>=20
>   my @bindings =3D ($atime, $self->{_userid});
>   foreach my $token (@{$tokens}) {
>@@ -1176,7 +1177,7 @@
>   # cleanup was needed, go ahead and clear the cleanup flag
>   $self->{needs_cleanup} =3D 0;
>=20
>=2D  my $sql =3D "DELETE from bayes_token
>+  my $sql =3D "DELETE from $$self{_token_table}
>               WHERE id =3D ?
>                 AND spam_count =3D 0
>                 AND ham_count =3D 0";
>@@ -1299,7 +1300,7 @@
>     return 0;
>   }
>=20
>=2D  $rows =3D $self->{_dbh}->do("DELETE FROM bayes_token WHERE id =3D ?",
>+  $rows =3D $self->{_dbh}->do("DELETE FROM $$self{_token_table} WHERE id =
>=3D ?",
> 			    undef,
> 			    $self->{_userid});
>   unless (defined($rows)) {
>@@ -1338,7 +1339,7 @@
>   my $token_select =3D $self->_token_select_string();
>=20
>   my $token_sql =3D "SELECT spam_count, ham_count, atime, $token_select
>=2D                     FROM bayes_token
>+                     FROM $$self{_token_table}
>                     WHERE id =3D ?
>                       AND (spam_count > 0 OR ham_count > 0)";
>=20
>@@ -1745,7 +1746,7 @@
>     }
>   }
>=20
>=2D  my $sqlselect =3D "SELECT id FROM bayes_vars WHERE username =3D ?";
>+  my $sqlselect =3D "SELECT id, token_table FROM bayes_vars WHERE username=
> =3D ?";
>=20
>   my $sthselect =3D $self->{_dbh}->prepare_cached($sqlselect);
>=20
>@@ -1761,24 +1762,32 @@
>     return 0;
>   }
>=20
>=2D  my ($id) =3D $sthselect->fetchrow_array();
>+  my ($id, $token_table) =3D $sthselect->fetchrow_array();
>=20
>   if ($id) {
>     $self->{_userid} =3D $id;
>=2D    dbg("bayes: Using userid: ".$self->{_userid});
>+    $self->{_token_table} =3D $token_table;
>+    dbg("bayes: Using userid: ".$self->{_userid}.", token table: ".$self->=
>{_token_table});
>     $sthselect->finish();
>     return 1;
>   }
>=20
>+  $self->{_token_table} =3D "bayes_token";
>   # Do not create an entry for this user unless we were specifically asked=
> to
>   return 0 unless ($create_entry_p);
>=20
>   # For now let the database setup the other variables as defaults
>=2D  my $sqlinsert =3D "INSERT INTO bayes_vars (username) VALUES (?)";
>+  my $sqlinsert =3D "INSERT INTO bayes_vars (username, token_table) VALUES=
> (?, ?)";
>+  if ($self->{bayes}->{conf}->{bayes_sql_token_table_count}
>+    && $self->{bayes}->{conf}->{bayes_sql_token_table_count} > 1) {
>+      $self->{_token_table} =3D "bayes_token_"
>+        . (crc32($self->{_username}) % $self->{bayes}->{conf}->{token_tabl=
>e_count});
>+  }
>=20
>   my $rows =3D $self->{_dbh}->do($sqlinsert,
> 			       undef,
>=2D			       $self->{_username});
>+			       $self->{_username},
>+			       $self->{_token_table});
>   unless (defined($rows)) {
>     dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr());
>     return 0;
>@@ -1843,7 +1852,7 @@
>     # if we are unable to find an entry.
>     return 1 if ($spam_count < 0 || $ham_count < 0);
>=20
>=2D    my $sql =3D "INSERT INTO bayes_token
>+    my $sql =3D "INSERT INTO $$self{_token_table}
>                (id, token, spam_count, ham_count, atime)
>                VALUES (?,?,?,?,?)";
>=20
>@@ -1930,7 +1939,7 @@
>       my $sql;
>       my @args;
>       if ($update_atime_p) {
>=2D	$sql =3D "UPDATE bayes_token
>+	$sql =3D "UPDATE $$self{_token_table}
>                    SET spam_count =3D spam_count + ?,
>                        atime =3D ?
>                  WHERE id =3D ?
>@@ -1940,7 +1949,7 @@
> 	$updated_atime_p =3D 1; # note the fact that we did do it
>       }
>       else {
>=2D	$sql =3D "UPDATE bayes_token
>+	$sql =3D "UPDATE $$self{_token_table}
>                    SET spam_count =3D spam_count + ?
>                  WHERE id =3D ?
>                    AND token =3D ?
>@@ -1960,7 +1969,7 @@
>       my $sql;
>       my @args;
>       if ($update_atime_p && !$updated_atime_p) {
>=2D	$sql =3D "UPDATE bayes_token
>+	$sql =3D "UPDATE $$self{_token_table}
>                    SET ham_count =3D ham_count + ?,
>                        atime =3D ?
>                  WHERE id =3D ?
>@@ -1970,7 +1979,7 @@
> 	$updated_atime_p =3D 1; # note the fact that we did do it
>       }
>       else {
>=2D	$sql =3D "UPDATE bayes_token
>+	$sql =3D "UPDATE $$self{_token_table}
>                    SET ham_count =3D ham_count + ?
>                  WHERE id =3D ?
>                    AND token =3D ?
>@@ -2034,7 +2043,7 @@
>   my $atime_inserted_p =3D 0;
>   my $new_tokens =3D 0;
>=20
>=2D  my $insertsql =3D "INSERT INTO bayes_token
>+  my $insertsql =3D "INSERT INTO $$self{_token_table}
>                    (id, token, spam_count, ham_count, atime)
>                    VALUES (?,?,?,?,?)";
>=20
>@@ -2101,7 +2110,7 @@
> 	my $sql;
> 	my @args;
> 	if ($update_atime_p) {
>=2D	  $sql =3D "UPDATE bayes_token
>+	  $sql =3D "UPDATE $$self{_token_table}
>                      SET spam_count =3D spam_count + ?,
>                          atime =3D ?
>                    WHERE id =3D ?
>@@ -2111,7 +2120,7 @@
> 	  $atime_updated_p =3D 1;
> 	}
> 	else {
>=2D	  $sql =3D "UPDATE bayes_token
>+	  $sql =3D "UPDATE $$self{_token_table}
>                      SET spam_count =3D spam_count + ?
>                    WHERE id =3D ?
>                      AND token =3D ?
>@@ -2131,7 +2140,7 @@
> 	my @args;
> 	# if $spam_count then we already updated the atime
> 	if ($update_atime_p && !$spam_count) {=20
>=2D	  $sql =3D "UPDATE bayes_token
>+	  $sql =3D "UPDATE $$self{_token_table}
>                      SET ham_count =3D ham_count + ?,
>                          atime =3D ?
>                    WHERE id =3D ?
>@@ -2141,7 +2150,7 @@
> 	  $atime_updated_p =3D 1;
> 	}
> 	else {
>=2D	  $sql =3D "UPDATE bayes_token
>+	  $sql =3D "UPDATE $$self{_token_table}
>                      SET ham_count =3D ham_count + ?
>                    WHERE id =3D ?
>                      AND token =3D ?
>@@ -2219,7 +2228,7 @@
>=20
>   return 0 unless (defined($self->{_dbh}));
>=20
>=2D  my $sql =3D "SELECT min(atime) FROM bayes_token
>+  my $sql =3D "SELECT min(atime) FROM $$self{_token_table}
>               WHERE id =3D ?";
>=20
>   my $sth =3D $self->{_dbh}->prepare_cached($sql);
>@@ -2260,7 +2269,7 @@
>   return 0 unless (defined($self->{_dbh}));
>=20
>   my $sql =3D "SELECT count(*)
>=2D               FROM bayes_token
>+               FROM $$self{_token_table}
>               WHERE id =3D ?
>                 AND spam_count + ham_count =3D 1";
>=20
>@@ -2302,7 +2311,7 @@
>   return 0 unless (defined($self->{_dbh}));
>=20
>   my $sql =3D "SELECT count(*)
>=2D               FROM bayes_token
>+               FROM $$self{_token_table}
>               WHERE id =3D ?
>                 AND (spam_count >=3D 0 AND spam_count < 8)
>                 AND (ham_count >=3D 0 AND ham_count < 8)
>diff -ur Mail-SpamAssassin-3.2.4.orig/lib/Mail/SpamAssassin/Conf.pm Mail-Sp=
>amAssassin-3.2.4/lib/Mail/SpamAssassin/Conf.pm
>=2D-- Mail-SpamAssassin-3.2.4.orig/lib/Mail/SpamAssassin/Conf.pm	2008-01-05=
> 22:11:03.000000000 +0100
>+++ Mail-SpamAssassin-3.2.4/lib/Mail/SpamAssassin/Conf.pm	2008-12-21 20:29:=
>57.275157321 +0100
>@@ -2683,6 +2683,27 @@
>     type =3D> $CONF_TYPE_BOOL
>   });
>=20
>+=3Ditem bayes_sql_token_table_count
>+
>+Used by BayesStore::SQL storage implementation.
>+
>+With this option you can spread the token information over several tables =
>if
>+the single table will otherwise get too large.
>+
>+After settings this to a value > 1 you must create the tables
>+bayes_token_0 through bayes_token_[number - 1] by yourself! They
>+have the same structure as the original bayes_token table.
>+
>+=3Dcut
>+
>+  push (@cmds, {
>+    setting =3D> 'bayes_sql_token_table_count',
>+    is_admin =3D> 1,
>+    default =3D> 1,
>+    type =3D> $CONF_TYPE_NUMERIC
>+  });
>+
>+
> =3Ditem user_scores_dsn DBI:databasetype:databasename:hostname:port
>=20
> If you load user scores from an SQL database, this will set the DSN
>diff -ur Mail-SpamAssassin-3.2.4.orig/sql/README.bayes Mail-SpamAssassin-3.=
>2.4/sql/README.bayes
>=2D-- Mail-SpamAssassin-3.2.4.orig/sql/README.bayes	2008-01-05 22:12:32.000=
>000000 +0100
>+++ Mail-SpamAssassin-3.2.4/sql/README.bayes	2008-12-21 20:30:16.305444389 =
>+0100
>@@ -66,6 +66,15 @@
> share bayesian filter data.  You can also use this config option to
> trick sa-learn to learn data as a specific user.
>=20
>+If your token table will get really large you may spread the bayes
>+tokens over several tables. You can set this in the config file with
>+
>+bayes_sql_token_table_count        number
>+
>+After settings this to a value > 1 you must create the tables
>+bayes_token_0 through bayes_token_[number - 1] by yourself! They
>+have the same structure as the original bayes_token table.
>+
>=20
> Requirements
> ------------
>diff -ur Mail-SpamAssassin-3.2.4.orig/sql/bayes_mysql.sql Mail-SpamAssassin=
>=2D3.2.4/sql/bayes_mysql.sql
>=2D-- Mail-SpamAssassin-3.2.4.orig/sql/bayes_mysql.sql	2008-01-05 22:12:32.=
>000000000 +0100
>+++ Mail-SpamAssassin-3.2.4/sql/bayes_mysql.sql	2008-12-21 20:30:16.3354448=
>41 +0100
>@@ -42,6 +42,7 @@
>   last_expire_reduce int(11) NOT NULL default '0',
>   oldest_token_age int(11) NOT NULL default '2147483647',
>   newest_token_age int(11) NOT NULL default '0',
>+  token_table varchar(20) NOT NULL default 'bayes_token',
>   PRIMARY KEY  (id),
>   UNIQUE bayes_vars_idx1 (username)
> ) TYPE=3DMyISAM;
>diff -ur Mail-SpamAssassin-3.2.4.orig/sql/bayes_pg.sql Mail-SpamAssassin-3.=
>2.4/sql/bayes_pg.sql
>=2D-- Mail-SpamAssassin-3.2.4.orig/sql/bayes_pg.sql	2008-01-05 22:12:32.000=
>000000 +0100
>+++ Mail-SpamAssassin-3.2.4/sql/bayes_pg.sql	2008-12-21 20:30:16.345444992 =
>+0100
>@@ -43,6 +43,7 @@
>   last_expire_reduce integer NOT NULL default '0',
>   oldest_token_age integer NOT NULL default '2147483647',
>   newest_token_age integer NOT NULL default '0',
>+  token_table varchar(20) NOT NULL default 'bayes_token',
>   PRIMARY KEY  (id)
> ) WITHOUT OIDS;
>=20
>@@ -65,25 +66,29 @@
>                                       inspam_count INTEGER,
>                                       inham_count INTEGER,
>                                       inatime INTEGER)
>=2DRETURNS VOID AS '=20
>+RETURNS VOID AS $$=20
> DECLARE
>   _token BYTEA;
>   new_tokens INTEGER :=3D 0;
>+  tt VARCHAR(20);
> BEGIN
>+  SELECT token_table INTO tt FROM bayes_vars WHERE (id =3D inuserid);
>+
>   for i in array_lower(intokenary, 1) .. array_upper(intokenary, 1)
>   LOOP
>     _token :=3D intokenary[i];
>=2D    UPDATE bayes_token
>=2D       SET spam_count =3D greatest_int(spam_count + inspam_count, 0),
>=2D           ham_count =3D greatest_int(ham_count + inham_count, 0),
>=2D           atime =3D greatest_int(atime, inatime)
>=2D     WHERE id =3D inuserid=20
>=2D       AND token =3D _token;
>+    EXECUTE 'UPDATE ' || tt || '
>+               SET spam_count =3D greatest_int(spam_count + ' || inspam_co=
>unt || ', 0),
>+                   ham_count =3D greatest_int(ham_count + ' || inham_count=
> || ', 0),
>+                   atime =3D greatest_int(atime, ' || inatime || ')
>+               WHERE id =3D inuserid=20
>+                   AND token =3D ' || quote_literal(token) || ';';
>     IF NOT FOUND THEN=20
>       -- we do not insert negative counts, just return true
>       IF NOT (inspam_count < 0 OR inham_count < 0) THEN
>=2D        INSERT INTO bayes_token (id, token, spam_count, ham_count, atime=
>)=20
>=2D        VALUES (inuserid, _token, inspam_count, inham_count, inatime);=20
>+        EXECUTE 'INSERT INTO ' || tt | ' (id, token, spam_count, ham_count=
>, atime)=20
>+                 VALUES (' || inuserid || ', ' || quote_literal(_token) ||=
> ', '=20
>+		           || inspam_count  || ', ' || inham_count || ', ' || inatime ||=
> ');';
>         IF FOUND THEN
>           new_tokens :=3D new_tokens + 1;
>         END IF;
>@@ -109,4 +114,4 @@
>   END IF;
>   RETURN;
> END;=20
>=2D' LANGUAGE 'plpgsql';=20
>+$$ LANGUAGE 'plpgsql';=20
>
>--Boundary-01=_oj8UJ9IS8rBJw6M--
>
>--nextPart9059977.pG6Pp7397c
>Content-Type: application/pgp-signature; name=signature.asc 
>Content-Description: This is a digitally signed message part.
>
>-----BEGIN PGP SIGNATURE-----
>Version: GnuPG v2.0.9 (GNU/Linux)
>
>iEYEABECAAYFAklTyOgACgkQ4nZ+DekCLflELgCgzvU3N5KYRfflp4p2gTShvWfU
>YrgAmgMicjQly8ylIxCLDXqfIYl7jAvz
>=1z97
>-----END PGP SIGNATURE-----
>
>--nextPart9059977.pG6Pp7397c--
>