You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by pa...@apache.org on 2004/04/27 16:27:57 UTC
svn commit: rev 10311 - in incubator/spamassassin/trunk: . lib/Mail/SpamAssassin lib/Mail/SpamAssassin/BayesStore tools
Author: parker
Date: Tue Apr 27 07:27:56 2004
New Revision: 10311
Removed:
incubator/spamassassin/trunk/tools/convert_bayes_dbm_to_sql
Modified:
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm
incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm
incubator/spamassassin/trunk/sa-learn.raw
Log:
Bug 3049: Bayes database backup/restore
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm Tue Apr 27 07:27:56 2004
@@ -1329,6 +1329,7 @@
if (!$self->{main}->{learn_caller_will_untie}) {
$self->{store}->untie_db();
}
+ return 1;
}
# Stolen from Archive Iteraator ... Should probably end up in M::SA::Util
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm Tue Apr 27 07:27:56 2004
@@ -741,6 +741,54 @@
die "perform_upgrade: not implemented\n";
}
+=head2 clear_database
+
+public instance (Boolean) clear_database ()
+
+Description:
+This method deletes all records for a particular user.
+
+Callers should be aware that any errors returned by this method
+could causes the database to be inconsistent for the given user.
+
+=cut
+
+sub clear_database {
+ my ($self) = @_;
+ die "clear_database: not implemented\n";
+}
+
+=head2 backup_database
+
+public instance (Boolean) backup_database ()
+
+Description:
+This method will dump the users database in a marchine readable format.
+
+=cut
+
+sub backup_database {
+ my ($self) = @_;
+ die "backup_database: not implemented\n";
+}
+
+=head2 restore_database
+
+public instance (Boolean) restore_database (String $filename, Boolean $showdots)
+
+Description:
+This method restores a database from the given filename, C<$filename>.
+
+Callers should be aware that any errors returned by this method
+could causes the database to be inconsistent for the given user.
+
+=cut
+
+sub restore_database {
+ my ($self, $filename, $showdots) = @_;
+ die "restore_database: not implemented\n";
+}
+
sub dbg { Mail::SpamAssassin::dbg (@_); }
sub sa_die { Mail::SpamAssassin::sa_die (@_); }
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm Tue Apr 27 07:27:56 2004
@@ -443,8 +443,6 @@
$self->{already_tied} = 0;
$self->{db_version} = undef;
-
- return 1;
}
###########################################################################
@@ -1256,6 +1254,275 @@
return 0;
}
+
+sub clear_database {
+ my ($self) = @_;
+
+ return 0 unless ($self->tie_db_writable());
+
+ my $path = $self->{bayes}->{main}->sed_path ($self->{bayes}->{main}->{conf}->{bayes_path});
+
+ foreach my $dbname (@DBNAMES, 'journal') {
+ my $name = $path.'_'.$dbname;
+ unlink $name;
+ dbg("bayes: clear_database: removing $dbname");
+ }
+
+ $self->untie_db();
+
+ return 1;
+}
+
+sub backup_database {
+ my ($self) = @_;
+
+ # we tie writable because we want the upgrade code to kick in if needed
+ return 0 unless ($self->tie_db_writable());
+
+ my @vars = $self->get_storage_variables();
+
+ print "v\t$vars[6]\tdb_version # this must be the first line!!!\n";
+ print "v\t$vars[1]\tnum_spam\n";
+ print "v\t$vars[2]\tnum_nonspam\n";
+
+ while (my ($tok, $packed) = each %{$self->{db_toks}}) {
+ next if ($tok =~ MAGIC_RE); # skip magic tokens
+
+ my ($ts, $th, $atime) = $self->tok_unpack($packed);
+
+ print "t\t$ts\t$th\t$atime\t$tok\n";
+ }
+
+ while (my ($msgid, $flag) = each %{$self->{db_seen}}) {
+ print "s\t$flag\t$msgid\n";
+ }
+
+ $self->untie_db();
+
+ return 1;
+}
+
+sub restore_database {
+ my ($self, $filename, $showdots) = @_;
+
+ if (!open(DUMPFILE, '<', $filename)) {
+ dbg("bayes: Unable to open backup file $filename: $!");
+ return 0;
+ }
+
+ if (!$self->tie_db_writable()) {
+ dbg("bayes: failed to tie db writable");
+ return 0;
+ }
+
+ my $main = $self->{bayes}->{main};
+ my $path = $main->sed_path ($main->{conf}->{bayes_path});
+
+ # use a temporary PID-based suffix just in case another one was
+ # created previously by an interrupted expire
+ my $tmpsuffix = "convert$$";
+ my $tmptoksdbname = $path.'_toks.'.$tmpsuffix;
+ my $tmpseendbname = $path.'_seen.'.$tmpsuffix;
+ my $toksdbname = $path.'_toks';
+ my $seendbname = $path.'_seen';
+
+ my %new_toks;
+ my %new_seen;
+ my $umask = umask 0;
+ unless (tie %new_toks, "DB_File", $tmptoksdbname, O_RDWR|O_CREAT|O_EXCL,
+ (oct ($main->{conf}->{bayes_file_mode}) & 0666)) {
+ dbg("bayes: Failed to tie temp toks db: $!");
+ $self->untie_db();
+ return 0;
+ }
+ unless (tie %new_seen, "DB_File", $tmpseendbname, O_RDWR|O_CREAT|O_EXCL,
+ (oct ($main->{conf}->{bayes_file_mode}) & 0666)) {
+ dbg("bayes: Failed to tie temp seen db: $!");
+ untie %new_toks;
+ unlink $tmptoksdbname;
+ $self->untie_db();
+ return 0;
+ }
+ umask $umask;
+
+ my $line_count = 0;
+ my $db_version;
+ my $token_count = 0;
+ my $num_spam = 0;
+ my $num_ham = 0;
+ my $error_p = 0;
+ my $newest_token_age = 0;
+ # Kinda wierd I know, but we need a nice big value and we know there will be
+ # no tokens > time() since we reset atime if > time(), so use that with a
+ # little buffer just in case.
+ my $oldest_token_age = time() + 100000;
+
+ my $line = <DUMPFILE>;
+ $line_count++;
+
+ # We require the database version line to be the first in the file so we can
+ # figure out how to properly deal with the file. If it is not the first
+ # line then fail
+ if ($line =~ m/^v\s+(\d+)\s+db_version/) {
+ $db_version = $1;
+ }
+ else {
+ dbg("bayes: Database Version must be the first line in the backup file, correct and re-run.");
+ untie %new_toks;
+ untie %new_seen;
+ unlink $tmptoksdbname;
+ unlink $tmpseendbname;
+ $self->untie_db();
+ return 0;
+ }
+
+ while (my $line = <DUMPFILE>) {
+ chomp($line);
+ $line_count++;
+
+ if ($line_count % 1000 == 0) {
+ print STDERR "." if ($showdots);
+ }
+
+ my @parsed_line = split(/\s+/, $line, 5);
+
+ if ($parsed_line[0] eq 'v') { # variable line
+ my $value = $parsed_line[1] + 0;
+ if ($parsed_line[2] eq 'num_spam') {
+ $num_spam = $value;
+ }
+ elsif ($parsed_line[2] eq 'num_nonspam') {
+ $num_ham = $value;
+ }
+ else {
+ dbg("bayes: restore_database: Skipping unknown line: $line");
+ }
+ }
+ elsif ($parsed_line[0] eq 't') { # token line
+ my $spam_count = $parsed_line[1] + 0;
+ my $ham_count = $parsed_line[2] + 0;
+ my $atime = $parsed_line[3] + 0;
+ my $token = $parsed_line[4];
+
+ my $token_warn_p = 0;
+ my @warnings;
+
+ if ($spam_count < 0) {
+ $spam_count = 0;
+ push(@warnings,'Spam Count < 0, resetting');
+ $token_warn_p = 1;
+ }
+ if ($ham_count < 0) {
+ $ham_count = 0;
+ push(@warnings,'Ham Count < 0, resetting');
+ $token_warn_p = 1;
+ }
+
+ if ($spam_count == 0 && $ham_count == 0) {
+ dbg("bayes: Token has zero spam and ham count, skipping.");
+ next;
+ }
+
+ if ($atime > time()) {
+ $atime = time();
+ push(@warnings,'atime > current time, resetting');
+ $token_warn_p = 1;
+ }
+
+ if ($token_warn_p) {
+ dbg("bayes: Token ($token) has the following warnings:\n".join("\n",@warnings));
+ }
+ $new_toks{$token} = $self->tok_pack($spam_count, $ham_count, $atime);
+ if ($atime < $oldest_token_age) {
+ $oldest_token_age = $atime;
+ }
+ if ($atime > $newest_token_age) {
+ $newest_token_age = $atime;
+ }
+ $token_count++;
+ }
+ elsif ($parsed_line[0] eq 's') { # seen line
+ my $flag = $parsed_line[1];
+ my $msgid = $parsed_line[2];
+
+ unless ($flag eq 'h' || $flag eq 's') {
+ dbg("bayes: Unknown seen flag ($flag) for line: $line, skipping");
+ next;
+ }
+ $new_seen{$msgid} = $flag;
+ }
+ else {
+ dbg("bayes: Skipping unknown line: $line");
+ next;
+ }
+ }
+ close(DUMPFILE);
+
+ print STDERR "\n" if ($showdots);
+
+ unless ($num_spam) {
+ dbg("bayes: Unable to find num spam, please check file.");
+ $error_p = 1;
+ }
+
+ unless ($num_ham) {
+ dbg("bayes: Unable to find num ham, please check file.");
+ $error_p = 1;
+ }
+
+ if ($error_p) {
+ dbg("bayes: Error(s) while attempting to load $filename, correct and Re-Run");
+
+ untie %new_toks;
+ untie %new_seen;
+ unlink $tmptoksdbname;
+ unlink $tmpseendbname;
+ $self->untie_db();
+ return 0;
+ }
+
+ # set the calculated magic tokens
+ $new_toks{$DB_VERSION_MAGIC_TOKEN} = 2;
+ $new_toks{$NTOKENS_MAGIC_TOKEN} = $token_count;
+ $new_toks{$NSPAM_MAGIC_TOKEN} = $num_spam;
+ $new_toks{$NHAM_MAGIC_TOKEN} = $num_ham;
+ $new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $newest_token_age;
+ $new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $oldest_token_age;
+
+ # go ahead and zero out these, chances are good that they are bogus anyway.
+ $new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = 0;
+ $new_toks{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = 0;
+ $new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = 0;
+ $new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} = 0;
+
+ local $SIG{'INT'} = 'IGNORE';
+ local $SIG{'TERM'} = 'IGNORE';
+ local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
+
+ untie %new_toks;
+ untie %new_seen;
+ $self->untie_db();
+
+ # Here is where something can go horribly wrong and screw up the bayes
+ # database files. If we are able to copy one and not the other then it
+ # will leave the database in an inconsistent state. Since this is an
+ # edge case, and they're trying to replace the DB anyway we should be ok.
+ unless (rename($tmptoksdbname, $toksdbname)) {
+ dbg("bayes: Error while renaming $tmptoksdbname to $toksdbname: $!");
+ return 0;
+ }
+ unless (rename($tmpseendbname, $seendbname)) {
+ dbg("bayes: Error while renaming $tmpseendbname to $seendbname: $!");
+ dbg("bayes: Database now in inconsistent state.");
+ return 0;
+ }
+
+ dbg("bayes: Parsed $line_count lines.");
+ dbg("bayes: Created database with $token_count tokens based on $num_spam Spam Messages and $num_ham Ham Messages.");
+
+ return 1;
+}
+
###########################################################################
Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm (original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm Tue Apr 27 07:27:56 2004
@@ -129,6 +129,8 @@
return 0 unless (HAS_DBI);
+ return 1 if ($self->{_dbh}); # already connected
+
my $main = $self->{bayes}->{main};
$self->read_db_configs();
@@ -183,6 +185,7 @@
return unless (defined($self->{_dbh}));
$self->{_dbh}->disconnect();
+ $self->{_dbh} = undef;
}
=head2 calculate_expire_delta
@@ -942,6 +945,342 @@
return 1;
}
+
+=head2 clear_database
+
+public instance (Boolean) clear_database ()
+
+Description:
+This method deletes all records for a particular user.
+
+Callers should be aware that any errors returned by this method
+could causes the database to be inconsistent for the given user.
+
+=cut
+
+sub clear_database {
+ my ($self) = @_;
+
+ $self->tie_db_writable();
+
+ return 0 unless (defined($self->{_dbh}));
+
+ my $rows = $self->{_dbh}->do("DELETE FROM bayes_vars WHERE username = ?",
+ undef,
+ $self->{_username});
+ unless (defined($rows)) {
+ dbg("SQL Error removing user (bayes_vars) data: ".$self->{_dbh}->errstr());
+ return 0;
+ }
+
+ $rows = $self->{_dbh}->do("DELETE FROM bayes_seen WHERE username = ?",
+ undef,
+ $self->{_username});
+ unless (defined($rows)) {
+ dbg("SQL Error removing seen data: ".$self->{_dbh}->errstr());
+ return 0;
+ }
+
+ $rows = $self->{_dbh}->do("DELETE FROM bayes_token WHERE username = ?",
+ undef,
+ $self->{_username});
+ unless (defined($rows)) {
+ dbg("SQL Error removing token data: ".$self->{_dbh}->errstr());
+ return 0;
+ }
+
+ return 1;
+}
+
+=head2 backup_database
+
+public instance (Boolean) backup_database ()
+
+Description:
+This method will dump the users database in a marchine readable format.
+
+=cut
+
+sub backup_database {
+ my ($self) = @_;
+
+ return 0 unless ($self->tie_db_readonly());
+
+ return 0 unless (defined($self->{_dbh}));
+
+ my @vars = $self->get_storage_variables();
+
+ print "v\t$vars[6]\tdb_version # this must be the first line!!!\n";
+ print "v\t$vars[1]\tnum_spam\n";
+ print "v\t$vars[2]\tnum_nonspam\n";
+
+ my $token_sql = "SELECT spam_count, ham_count, atime, token
+ FROM bayes_token
+ WHERE username = ?
+ AND (spam_count > 0 OR ham_count > 0)
+ ORDER BY token";
+
+ my $seen_sql = "SELECT flag, msgid
+ FROM bayes_seen
+ WHERE username = ?";
+
+ my $sth = $self->{_dbh}->prepare($token_sql);
+
+ unless (defined ($sth)) {
+ dbg("bayes: backup_database: SQL Error: ".$self->{_dbh}->errstr());
+ return 0;
+ }
+
+ my $rc = $sth->execute($self->{_username});
+
+ unless ($rc) {
+ dbg("bayes: backup_database: SQL Error: ".$self->{_dbh}->errstr());
+ return 0;
+ }
+
+ while (my @values = $sth->fetchrow_array()) {
+ print "t\t" . join("\t",@values) . "\n";
+ }
+
+ $sth->finish();
+
+ $sth = $self->{_dbh}->prepare($seen_sql);
+
+ unless (defined ($sth)) {
+ dbg("bayes: backup_database: SQL Error: ".$self->{_dbh}->errstr());
+ return 0;
+ }
+
+ $rc = $sth->execute($self->{_username});
+
+ unless ($rc) {
+ dbg("bayes: backup_database: SQL Error: ".$self->{_dbh}->errstr());
+ return 0;
+ }
+
+ while (my @values = $sth->fetchrow_array()) {
+ print "s\t" . join("\t",@values) . "\n";
+ }
+
+ $sth->finish();
+
+ $self->untie_db();
+
+ return 1;
+}
+
+=head2 restore_database
+
+public instance (Boolean) restore_database (String $filename, Boolean $showdots)
+
+Description:
+This method restores a database from the given filename, C<$filename>.
+
+Callers should be aware that any errors returned by this method
+could causes the database to be inconsistent for the given user.
+
+=cut
+
+sub restore_database {
+ my ($self, $filename, $showdots) = @_;
+
+ if (!open(DUMPFILE, '<', $filename)) {
+ dbg("bayes: Unable to open backup file $filename: $!");
+ return 0;
+ }
+
+ return 0 unless ($self->tie_db_writable());
+
+ return 0 unless (defined($self->{_dbh}));
+
+ # This is the critical phase (moving sql around), so don't allow it
+ # to be interrupted.
+ local $SIG{'INT'} = 'IGNORE';
+ local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
+ local $SIG{'TERM'} = 'IGNORE';
+
+ unless ($self->clear_database()) {
+ dbg("bayes: Database now in inconsistent state for ".$self->{_username});
+ return 0;
+ }
+
+ my $token_count = 0;
+ my $db_version;
+ my $num_spam = 0;
+ my $num_ham = 0;
+ my $error_p = 0;
+ my $line_count = 0;
+
+ my $line = <DUMPFILE>;
+ $line_count++;
+ # We require the database version line to be the first in the file so we can figure out how
+ # to properly deal with the file. If it is not the first line then fail
+ if ($line =~ m/^v\s+(\d+)\s+db_version/) {
+ $db_version = $1;
+ }
+ else {
+ dbg("bayes: Database Version must be the first line in the backup file, correct and re-run.");
+ return 0;
+ }
+
+ my $tokensql = "INSERT INTO bayes_token
+ (username, token, spam_count, ham_count, atime)
+ VALUES (?,?,?,?,?)";
+
+ my $tokensth = $self->{_dbh}->prepare_cached($tokensql);
+
+ my $seensql = "INSERT INTO bayes_seen (username, msgid, flag)
+ VALUES (?, ?, ?)";
+
+ my $seensth = $self->{_dbh}->prepare_cached($seensql);
+
+ unless (defined($seensth)) {
+ dbg("SQL Error: ".$self->{_dbh}->errstr());
+ dbg("bayes: Database now in inconsistent state for ".$self->{_username});
+ return 0;
+ }
+
+ while (my $line = <DUMPFILE>) {
+ chomp($line);
+ $line_count++;
+
+ if ($line_count % 1000 == 0) {
+ print STDERR "." if ($showdots);
+ }
+
+ my @parsed_line = split(/\s+/, $line, 5);
+
+ if ($parsed_line[0] eq 'v') { # variable line
+ my $value = $parsed_line[1] + 0;
+ if ($parsed_line[2] eq 'num_spam') {
+ $num_spam = $value;
+ }
+ elsif ($parsed_line[2] eq 'num_nonspam') {
+ $num_ham = $value;
+ }
+ else {
+ dbg("bayes: restore_database: Skipping unknown line: $line");
+ }
+ }
+ elsif ($parsed_line[0] eq 't') { # token line
+ my $spam_count = $parsed_line[1] + 0;
+ my $ham_count = $parsed_line[2] + 0;
+ my $atime = $parsed_line[3] + 0;
+ my $token = $parsed_line[4];
+
+ my $token_warn_p = 0;
+ my @warnings;
+
+ if ($spam_count < 0) {
+ $spam_count = 0;
+ push(@warnings,'Spam Count < 0, resetting');
+ $token_warn_p = 1;
+ }
+ if ($ham_count < 0) {
+ $ham_count = 0;
+ push(@warnings,'Ham Count < 0, resetting');
+ $token_warn_p = 1;
+ }
+
+ if ($spam_count == 0 && $ham_count == 0) {
+ dbg("bayes: Token has zero spam and ham count, skipping.");
+ next;
+ }
+
+ if ($atime > time()) {
+ $atime = time();
+ push(@warnings,'atime > current time, resetting');
+ $token_warn_p = 1;
+ }
+
+ if ($token_warn_p) {
+ dbg("bayes: Token ($token) has the following warnings:\n".join("\n",@warnings));
+ }
+
+ my $rc = $tokensth->execute($self->{_username},
+ $token,
+ $spam_count,
+ $ham_count,
+ $atime);
+ unless ($rc) {
+ dbg("bayes: Error inserting token for line: $line\nSQL Error: ".$self->errstr());
+ $error_p = 1;
+ }
+ $token_count++;
+ }
+ elsif ($parsed_line[0] eq 's') { # seen line
+ my $flag = $parsed_line[1];
+ my $msgid = $parsed_line[2];
+
+ unless ($flag eq 'h' || $flag eq 's') {
+ dbg("bayes: Unknown seen flag ($flag) for line: $line, skipping");
+ next;
+ }
+
+ my $rc = $seensth->execute($self->{_username},
+ $msgid,
+ $flag);
+ unless ($rc) {
+ dbg("bayes: Error inserting msgid in seen table for line: $line\nSQL Error: ".$self->errstr());
+ $error_p = 1;
+ }
+ }
+ else {
+ dbg("bayes: Skipping unknown line: $line");
+ next;
+ }
+ }
+ close(DUMPFILE);
+
+ print STDERR "\n" if ($showdots);
+
+ unless ($num_spam) {
+ dbg("bayes: Unable to find num spam, please check file.");
+ $error_p = 1;
+ }
+
+ unless ($num_ham) {
+ dbg("bayes: Unable to find num ham, please check file.");
+ $error_p = 1;
+ }
+
+ if ($error_p) {
+ dbg("bayes: Error(s) while attempting to load $filename, correct and Re-Run");
+
+ $self->clear_database();
+
+ dbg("bayes: Database now in inconsistent state for ".$self->{_username});
+ return 0;
+ }
+
+ # There is a race condition here which is why we suggest that the user
+ # turn off SA for the duration of a restore operation. If something comes
+ # along and calls initialize_db() before this little bit of code runs then
+ # this insert will fail, but at least we'll now wipe out the bayes_token
+ # entries for this user so that we are in a somewhat ok state.
+ my $varsupdatesql = "INSERT INTO bayes_vars (username, spam_count, ham_count)
+ VALUES(?,?,?)";
+
+ my $rows = $self->{_dbh}->do($varsupdatesql,
+ undef,
+ $self->{_username}, $num_spam, $num_ham);
+
+ unless (defined($rows)) {
+ dbg("bayes: Error inserting user variables (bayes_vars).");
+ dbg("bayes: SQL Error:".$self->{_dbh}->errstr());
+ $self->clear_database();
+ dbg("bayes; Database now in inconsistent state for ".$self->{_username});
+ return 0;
+ }
+
+ dbg("bayes: Parsed $line_count lines.");
+ dbg("bayes: Created database with $token_count tokens based on $num_spam Spam Messages and $num_ham Ham Messages.");
+
+ $self->untie_db();
+
+ return 1;
+}
+
=head1 Private Methods
Modified: incubator/spamassassin/trunk/sa-learn.raw
==============================================================================
--- incubator/spamassassin/trunk/sa-learn.raw (original)
+++ incubator/spamassassin/trunk/sa-learn.raw Tue Apr 27 07:27:56 2004
@@ -119,6 +119,10 @@
'dump:s' => \$opt{'dump'},
'import' => \$opt{'import'},
+ 'clear' => \$opt{'clear'},
+ 'backup' => \$opt{'backup'},
+ 'restore=s' => \$opt{'restore'},
+
'dir' => sub { $opt{'old_format'} = 'dir'; },
'file' => sub { $opt{'old_format'} = 'file'; },
'mbox' => sub { $opt{'format'} = 'mbox'; },
@@ -148,10 +152,13 @@
&& !defined $forget
&& !defined $opt{'dump'}
&& !defined $opt{'import'}
+ && !defined $opt{'clear'}
+ && !defined $opt{'backup'}
+ && !defined $opt{'restore'}
&& !defined $opt{'folders'} )
{
usage( 0,
-"Please select either --spam, --ham, --folders, --forget, --sync, --import or --dump"
+"Please select either --spam, --ham, --folders, --forget, --sync, --import,\n--dump, --clear, --backup or --restore"
);
}
@@ -235,6 +242,44 @@
exit( !$ret );
}
+if (defined $opt{'clear'}) {
+ unless ($spamtest->{bayes_scanner}->{store}->clear_database()) {
+ $spamtest->finish_learner();
+ die "ERROR: Bayes clear returned an error, please re-run with -D for more information\n";
+ }
+
+ $spamtest->finish_learner();
+ exit 0;
+}
+
+if (defined $opt{'backup'}) {
+ unless ($spamtest->{bayes_scanner}->{store}->backup_database()) {
+ $spamtest->finish_learner();
+ die "ERROR: Bayes backup returned an error, please re-run with -D for more information\n";
+ }
+
+ $spamtest->finish_learner();
+ exit 0;
+}
+
+if (defined $opt{'restore'}) {
+
+ my $filename = $opt{'restore'};
+
+ unless ($filename) {
+ $spamtest->finish_learner();
+ die "ERROR: You must specify a filename to restore.\n";
+ }
+
+ unless ($spamtest->{bayes_scanner}->{store}->restore_database($filename, $opt{'showdots'})) {
+ $spamtest->finish_learner();
+ die "ERROR: Bayes restore returned an error, please re-run with -D for more information\n";
+ }
+
+ $spamtest->finish_learner();
+ exit 0;
+}
+
if ( !$spamtest->{conf}->{use_bayes} ) {
warn "ERROR: configuration specifies 'use_bayes 0', sa-learn disabled\n";
exit 1;
@@ -461,6 +506,10 @@
databases after scan
-L, --local Operate locally, no network accesses
--import Upgrade data from an earlier database version
+ --clear Wipe out existing database
+ --backup Backup, to STDOUT, existing database
+ --restore <filename> Restore a database from filename
+
-C path, --configpath=path, --config-file=path Path to standard configuration dir
-p prefs, --prefspath=file, --prefs-file=file Set user preferences file
--siteconfigpath=path Path for site configs (def: /etc/mail/spamassassin)
@@ -741,6 +790,25 @@
Can also use the B<--regexp> I<RE> option to specify which tokens to
display based on a regular expression.
+
+=item B<--clear>
+
+Clear an existing Bayes database by removing all traces of the database.
+
+WARNING: This is destructive and should be used with care.
+
+=item B<--backup>
+
+Performs a dump of the Bayes database in machine/human readable format.
+
+The dump will include token and seen data. It is suitable for input back
+into the --restore command.
+
+=item B<--restore>=I<filename>
+
+Performs a restore of the Bayes database defined by I<filename>.
+
+WARNING: This is a destructive operation, previous Bayes data will be wiped out.
=item B<-h>, B<--help>