You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by pa...@apache.org on 2004/04/27 16:27:57 UTC

svn commit: rev 10311 - in incubator/spamassassin/trunk: . lib/Mail/SpamAssassin lib/Mail/SpamAssassin/BayesStore tools

Author: parker
Date: Tue Apr 27 07:27:56 2004
New Revision: 10311

Removed:
   incubator/spamassassin/trunk/tools/convert_bayes_dbm_to_sql
Modified:
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm
   incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm
   incubator/spamassassin/trunk/sa-learn.raw
Log:
Bug 3049: Bayes database backup/restore

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/Bayes.pm	Tue Apr 27 07:27:56 2004
@@ -1329,6 +1329,7 @@
   if (!$self->{main}->{learn_caller_will_untie}) {
     $self->{store}->untie_db();
   }
+  return 1;
 }
 
 # Stolen from Archive Iteraator ...  Should probably end up in M::SA::Util

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore.pm	Tue Apr 27 07:27:56 2004
@@ -741,6 +741,54 @@
   die "perform_upgrade: not implemented\n";
 }
 
+=head2 clear_database
+
+public instance (Boolean) clear_database ()
+
+Description:
+This method deletes all records for a particular user.
+
+Callers should be aware that any errors returned by this method
+could causes the database to be inconsistent for the given user.
+
+=cut
+
+sub clear_database {
+  my ($self) = @_;
+  die "clear_database: not implemented\n";
+}
+
+=head2 backup_database
+
+public instance (Boolean) backup_database ()
+
+Description:
+This method will dump the users database in a marchine readable format.
+
+=cut
+
+sub backup_database {
+  my ($self) = @_;
+  die "backup_database: not implemented\n";
+}
+
+=head2 restore_database
+
+public instance (Boolean) restore_database (String $filename, Boolean $showdots)
+
+Description:
+This method restores a database from the given filename, C<$filename>.
+
+Callers should be aware that any errors returned by this method
+could causes the database to be inconsistent for the given user.
+
+=cut
+
+sub restore_database {
+  my ($self, $filename, $showdots) = @_;
+  die "restore_database: not implemented\n";
+}
+
 sub dbg { Mail::SpamAssassin::dbg (@_); }
 sub sa_die { Mail::SpamAssassin::sa_die (@_); }
 

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/DBM.pm	Tue Apr 27 07:27:56 2004
@@ -443,8 +443,6 @@
 
   $self->{already_tied} = 0;
   $self->{db_version} = undef;
-
-  return 1;
 }
 
 ###########################################################################
@@ -1256,6 +1254,275 @@
 
   return 0;
 }
+
+sub clear_database {
+  my ($self) = @_;
+
+  return 0 unless ($self->tie_db_writable());
+
+  my $path = $self->{bayes}->{main}->sed_path ($self->{bayes}->{main}->{conf}->{bayes_path});
+
+  foreach my $dbname (@DBNAMES, 'journal') {
+    my $name = $path.'_'.$dbname;
+    unlink $name;
+    dbg("bayes: clear_database: removing $dbname");
+  }
+
+  $self->untie_db();
+
+  return 1;
+}
+
+sub backup_database {
+  my ($self) = @_;
+
+  # we tie writable because we want the upgrade code to kick in if needed
+  return 0 unless ($self->tie_db_writable());
+
+  my @vars = $self->get_storage_variables();
+
+  print "v\t$vars[6]\tdb_version # this must be the first line!!!\n";
+  print "v\t$vars[1]\tnum_spam\n";
+  print "v\t$vars[2]\tnum_nonspam\n";
+
+  while (my ($tok, $packed) = each %{$self->{db_toks}}) {
+    next if ($tok =~ MAGIC_RE); # skip magic tokens
+
+    my ($ts, $th, $atime) = $self->tok_unpack($packed);
+
+    print "t\t$ts\t$th\t$atime\t$tok\n";
+  }
+
+  while (my ($msgid, $flag) = each %{$self->{db_seen}}) {
+    print "s\t$flag\t$msgid\n";
+  }
+
+  $self->untie_db();
+
+  return 1;
+}
+
+sub restore_database {
+  my ($self, $filename, $showdots) = @_;
+
+  if (!open(DUMPFILE, '<', $filename)) {
+    dbg("bayes: Unable to open backup file $filename: $!");
+    return 0;
+  }
+   
+  if (!$self->tie_db_writable()) {
+    dbg("bayes: failed to tie db writable");
+    return 0;
+  }
+
+  my $main = $self->{bayes}->{main};
+  my $path = $main->sed_path ($main->{conf}->{bayes_path});
+
+  # use a temporary PID-based suffix just in case another one was
+  # created previously by an interrupted expire
+  my $tmpsuffix = "convert$$";
+  my $tmptoksdbname = $path.'_toks.'.$tmpsuffix;
+  my $tmpseendbname = $path.'_seen.'.$tmpsuffix;
+  my $toksdbname = $path.'_toks';
+  my $seendbname = $path.'_seen';
+
+  my %new_toks;
+  my %new_seen;
+  my $umask = umask 0;
+  unless (tie %new_toks, "DB_File", $tmptoksdbname, O_RDWR|O_CREAT|O_EXCL,
+	  (oct ($main->{conf}->{bayes_file_mode}) & 0666)) {
+    dbg("bayes: Failed to tie temp toks db: $!");
+    $self->untie_db();
+    return 0;
+  }
+  unless (tie %new_seen, "DB_File", $tmpseendbname, O_RDWR|O_CREAT|O_EXCL,
+	  (oct ($main->{conf}->{bayes_file_mode}) & 0666)) {
+    dbg("bayes: Failed to tie temp seen db: $!");
+    untie %new_toks;
+    unlink $tmptoksdbname;
+    $self->untie_db();
+    return 0;
+  }
+  umask $umask;
+
+  my $line_count = 0;
+  my $db_version;
+  my $token_count = 0;
+  my $num_spam = 0;
+  my $num_ham = 0;
+  my $error_p = 0;
+  my $newest_token_age = 0;
+  # Kinda wierd I know, but we need a nice big value and we know there will be
+  # no tokens > time() since we reset atime if > time(), so use that with a
+  # little buffer just in case.
+  my $oldest_token_age = time() + 100000;
+
+  my $line = <DUMPFILE>;
+  $line_count++;
+
+  # We require the database version line to be the first in the file so we can
+  # figure out how to properly deal with the file.  If it is not the first
+  # line then fail
+  if ($line =~ m/^v\s+(\d+)\s+db_version/) {
+    $db_version = $1;
+  }
+  else {
+    dbg("bayes: Database Version must be the first line in the backup file, correct and re-run.");
+    untie %new_toks;
+    untie %new_seen;
+    unlink $tmptoksdbname;
+    unlink $tmpseendbname;
+    $self->untie_db();
+    return 0;
+  }
+
+  while (my $line = <DUMPFILE>) {
+    chomp($line);
+    $line_count++;
+
+    if ($line_count % 1000 == 0) {
+      print STDERR "." if ($showdots);
+    }
+
+    my @parsed_line = split(/\s+/, $line, 5);
+
+    if ($parsed_line[0] eq 'v') { # variable line
+      my $value = $parsed_line[1] + 0;
+      if ($parsed_line[2] eq 'num_spam') {
+	$num_spam = $value;
+      }
+      elsif ($parsed_line[2] eq 'num_nonspam') {
+	$num_ham = $value;
+      }
+      else {
+	dbg("bayes: restore_database: Skipping unknown line: $line");
+      }
+    }
+    elsif ($parsed_line[0] eq 't') { # token line
+      my $spam_count = $parsed_line[1] + 0;
+      my $ham_count = $parsed_line[2] + 0;
+      my $atime = $parsed_line[3] + 0;
+      my $token = $parsed_line[4];
+
+      my $token_warn_p = 0;
+      my @warnings;
+
+      if ($spam_count < 0) {
+	$spam_count = 0;
+	push(@warnings,'Spam Count < 0, resetting');
+	$token_warn_p = 1;
+      }
+      if ($ham_count < 0) {
+	$ham_count = 0;
+	push(@warnings,'Ham Count < 0, resetting');
+	$token_warn_p = 1;
+      }
+
+      if ($spam_count == 0 && $ham_count == 0) {
+	dbg("bayes: Token has zero spam and ham count, skipping.");
+	next;
+      }
+
+      if ($atime > time()) {
+	$atime = time();
+	push(@warnings,'atime > current time, resetting');
+	$token_warn_p = 1;
+      }
+
+      if ($token_warn_p) {
+	dbg("bayes: Token ($token) has the following warnings:\n".join("\n",@warnings));
+      }
+      $new_toks{$token} = $self->tok_pack($spam_count, $ham_count, $atime);
+      if ($atime < $oldest_token_age) {
+	$oldest_token_age = $atime;
+      }
+      if ($atime > $newest_token_age) {
+	$newest_token_age = $atime;
+      }
+      $token_count++;
+    }
+    elsif ($parsed_line[0] eq 's') { # seen line
+      my $flag = $parsed_line[1];
+      my $msgid = $parsed_line[2];
+
+      unless ($flag eq 'h' || $flag eq 's') {
+	dbg("bayes: Unknown seen flag ($flag) for line: $line, skipping");
+	next;
+      }
+      $new_seen{$msgid} = $flag;
+    }
+    else {
+      dbg("bayes: Skipping unknown line: $line");
+      next;
+    }
+  }
+  close(DUMPFILE);
+
+  print STDERR "\n" if ($showdots);
+
+  unless ($num_spam) {
+    dbg("bayes: Unable to find num spam, please check file.");
+    $error_p = 1;
+  }
+
+  unless ($num_ham) {
+    dbg("bayes: Unable to find num ham, please check file.");
+    $error_p = 1;
+  }
+
+  if ($error_p) {
+    dbg("bayes: Error(s) while attempting to load $filename, correct and Re-Run");
+
+    untie %new_toks;
+    untie %new_seen;
+    unlink $tmptoksdbname;
+    unlink $tmpseendbname;
+    $self->untie_db();
+    return 0;
+  }
+
+  # set the calculated magic tokens
+  $new_toks{$DB_VERSION_MAGIC_TOKEN} = 2;
+  $new_toks{$NTOKENS_MAGIC_TOKEN} = $token_count;
+  $new_toks{$NSPAM_MAGIC_TOKEN} = $num_spam;
+  $new_toks{$NHAM_MAGIC_TOKEN} = $num_ham;
+  $new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $newest_token_age;
+  $new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $oldest_token_age;
+
+  # go ahead and zero out these, chances are good that they are bogus anyway.
+  $new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = 0;
+  $new_toks{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = 0;
+  $new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = 0;
+  $new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} = 0;
+
+  local $SIG{'INT'} = 'IGNORE';
+  local $SIG{'TERM'} = 'IGNORE';
+  local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
+
+  untie %new_toks;
+  untie %new_seen;
+  $self->untie_db();
+
+  # Here is where something can go horribly wrong and screw up the bayes
+  # database files.  If we are able to copy one and not the other then it
+  # will leave the database in an inconsistent state.  Since this is an
+  # edge case, and they're trying to replace the DB anyway we should be ok.
+  unless (rename($tmptoksdbname, $toksdbname)) {
+    dbg("bayes: Error while renaming $tmptoksdbname to $toksdbname: $!");
+    return 0;
+  }
+  unless (rename($tmpseendbname, $seendbname)) {
+    dbg("bayes: Error while renaming $tmpseendbname to $seendbname: $!");
+    dbg("bayes: Database now in inconsistent state.");
+    return 0;
+  }
+
+  dbg("bayes: Parsed $line_count lines.");
+  dbg("bayes: Created database with $token_count tokens based on $num_spam Spam Messages and $num_ham Ham Messages.");
+
+  return 1;
+}
+
 
 ###########################################################################
 

Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm
==============================================================================
--- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm	(original)
+++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/BayesStore/SQL.pm	Tue Apr 27 07:27:56 2004
@@ -129,6 +129,8 @@
 
   return 0 unless (HAS_DBI);
 
+  return 1 if ($self->{_dbh}); # already connected
+
   my $main = $self->{bayes}->{main};
 
   $self->read_db_configs();
@@ -183,6 +185,7 @@
   return unless (defined($self->{_dbh}));
 
   $self->{_dbh}->disconnect();
+  $self->{_dbh} = undef;
 }
 
 =head2 calculate_expire_delta
@@ -942,6 +945,342 @@
 
   return 1;
 }
+
+=head2 clear_database
+
+public instance (Boolean) clear_database ()
+
+Description:
+This method deletes all records for a particular user.
+
+Callers should be aware that any errors returned by this method
+could causes the database to be inconsistent for the given user.
+
+=cut
+
+sub clear_database {
+  my ($self) = @_;
+
+  $self->tie_db_writable();
+
+  return 0 unless (defined($self->{_dbh}));
+
+  my $rows = $self->{_dbh}->do("DELETE FROM bayes_vars WHERE username = ?",
+			       undef,
+			       $self->{_username});
+  unless (defined($rows)) {
+    dbg("SQL Error removing user (bayes_vars) data: ".$self->{_dbh}->errstr());
+    return 0;
+  }
+
+  $rows = $self->{_dbh}->do("DELETE FROM bayes_seen WHERE username = ?",
+			    undef,
+			    $self->{_username});
+  unless (defined($rows)) {
+    dbg("SQL Error removing seen data: ".$self->{_dbh}->errstr());
+    return 0;
+  }
+
+  $rows = $self->{_dbh}->do("DELETE FROM bayes_token WHERE username = ?",
+			    undef,
+			    $self->{_username});
+  unless (defined($rows)) {
+    dbg("SQL Error removing token data: ".$self->{_dbh}->errstr());
+    return 0;
+  }
+
+  return 1;
+}
+
+=head2 backup_database
+
+public instance (Boolean) backup_database ()
+
+Description:
+This method will dump the users database in a marchine readable format.
+
+=cut
+
+sub backup_database {
+  my ($self) = @_;
+
+  return 0 unless ($self->tie_db_readonly());
+
+  return 0 unless (defined($self->{_dbh}));
+
+  my @vars = $self->get_storage_variables();
+
+  print "v\t$vars[6]\tdb_version # this must be the first line!!!\n";
+  print "v\t$vars[1]\tnum_spam\n";
+  print "v\t$vars[2]\tnum_nonspam\n";
+
+  my $token_sql = "SELECT spam_count, ham_count, atime, token
+                     FROM bayes_token
+                    WHERE username = ?
+                      AND (spam_count > 0 OR ham_count > 0)
+                    ORDER BY token";
+
+  my $seen_sql = "SELECT flag, msgid
+                    FROM bayes_seen
+                   WHERE username = ?";
+
+  my $sth = $self->{_dbh}->prepare($token_sql);
+
+  unless (defined ($sth)) {
+    dbg("bayes: backup_database: SQL Error: ".$self->{_dbh}->errstr());
+    return 0;
+  }
+
+  my $rc = $sth->execute($self->{_username});
+
+  unless ($rc) {
+    dbg("bayes: backup_database: SQL Error: ".$self->{_dbh}->errstr());
+    return 0;
+  }
+
+  while (my @values = $sth->fetchrow_array()) {
+    print "t\t" . join("\t",@values) . "\n";
+  }
+
+  $sth->finish();
+
+  $sth = $self->{_dbh}->prepare($seen_sql);
+
+  unless (defined ($sth)) {
+    dbg("bayes: backup_database: SQL Error: ".$self->{_dbh}->errstr());
+    return 0;
+  }
+
+  $rc = $sth->execute($self->{_username});
+
+  unless ($rc) {
+    dbg("bayes: backup_database: SQL Error: ".$self->{_dbh}->errstr());
+    return 0;
+  }
+
+  while (my @values = $sth->fetchrow_array()) {
+    print "s\t" . join("\t",@values) . "\n";
+  }
+
+  $sth->finish();
+
+  $self->untie_db();
+
+  return 1;
+}
+
+=head2 restore_database
+
+public instance (Boolean) restore_database (String $filename, Boolean $showdots)
+
+Description:
+This method restores a database from the given filename, C<$filename>.
+
+Callers should be aware that any errors returned by this method
+could causes the database to be inconsistent for the given user.
+
+=cut
+
+sub restore_database {
+  my ($self, $filename, $showdots) = @_;
+
+  if (!open(DUMPFILE, '<', $filename)) {
+    dbg("bayes: Unable to open backup file $filename: $!");
+    return 0;
+  }
+
+  return 0 unless ($self->tie_db_writable());
+
+  return 0 unless (defined($self->{_dbh}));
+
+  # This is the critical phase (moving sql around), so don't allow it
+  # to be interrupted.
+  local $SIG{'INT'} = 'IGNORE';
+  local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
+  local $SIG{'TERM'} = 'IGNORE';
+
+  unless ($self->clear_database()) {
+    dbg("bayes: Database now in inconsistent state for ".$self->{_username});
+    return 0;
+  }
+
+  my $token_count = 0;
+  my $db_version;
+  my $num_spam = 0;
+  my $num_ham = 0;
+  my $error_p = 0;
+  my $line_count = 0;
+
+  my $line = <DUMPFILE>;
+  $line_count++;
+  # We require the database version line to be the first in the file so we can figure out how
+  # to properly deal with the file.  If it is not the first line then fail
+  if ($line =~ m/^v\s+(\d+)\s+db_version/) {
+    $db_version = $1;
+  }
+  else {
+    dbg("bayes: Database Version must be the first line in the backup file, correct and re-run.");
+    return 0;
+  }
+
+  my $tokensql = "INSERT INTO bayes_token
+                    (username, token, spam_count, ham_count, atime)
+                  VALUES (?,?,?,?,?)";
+
+  my $tokensth = $self->{_dbh}->prepare_cached($tokensql);
+
+  my $seensql = "INSERT INTO bayes_seen (username, msgid, flag)
+                   VALUES (?, ?, ?)";
+
+  my $seensth = $self->{_dbh}->prepare_cached($seensql);
+
+  unless (defined($seensth)) {
+    dbg("SQL Error: ".$self->{_dbh}->errstr());
+    dbg("bayes: Database now in inconsistent state for ".$self->{_username});
+    return 0;
+  }
+
+  while (my $line = <DUMPFILE>) {
+    chomp($line);
+    $line_count++;
+
+    if ($line_count % 1000 == 0) {
+      print STDERR "." if ($showdots);
+    }
+
+    my @parsed_line = split(/\s+/, $line, 5);
+
+    if ($parsed_line[0] eq 'v') { # variable line
+      my $value = $parsed_line[1] + 0;
+      if ($parsed_line[2] eq 'num_spam') {
+	$num_spam = $value;
+      }
+      elsif ($parsed_line[2] eq 'num_nonspam') {
+	$num_ham = $value;
+      }
+      else {
+	dbg("bayes: restore_database: Skipping unknown line: $line");
+      }
+    }
+    elsif ($parsed_line[0] eq 't') { # token line
+      my $spam_count = $parsed_line[1] + 0;
+      my $ham_count = $parsed_line[2] + 0;
+      my $atime = $parsed_line[3] + 0;
+      my $token = $parsed_line[4];
+
+      my $token_warn_p = 0;
+      my @warnings;
+
+      if ($spam_count < 0) {
+	$spam_count = 0;
+	push(@warnings,'Spam Count < 0, resetting');
+	$token_warn_p = 1;
+      }
+      if ($ham_count < 0) {
+	$ham_count = 0;
+	push(@warnings,'Ham Count < 0, resetting');
+	$token_warn_p = 1;
+      }
+
+      if ($spam_count == 0 && $ham_count == 0) {
+	dbg("bayes: Token has zero spam and ham count, skipping.");
+	next;
+      }
+
+      if ($atime > time()) {
+	$atime = time();
+	push(@warnings,'atime > current time, resetting');
+	$token_warn_p = 1;
+      }
+
+      if ($token_warn_p) {
+	dbg("bayes: Token ($token) has the following warnings:\n".join("\n",@warnings));
+      }
+
+      my $rc = $tokensth->execute($self->{_username},
+				  $token,
+				  $spam_count,
+				  $ham_count,
+				  $atime);
+      unless ($rc) {
+	dbg("bayes: Error inserting token for line: $line\nSQL Error: ".$self->errstr());
+	$error_p = 1;
+      }
+      $token_count++;
+    }
+    elsif ($parsed_line[0] eq 's') { # seen line
+      my $flag = $parsed_line[1];
+      my $msgid = $parsed_line[2];
+
+      unless ($flag eq 'h' || $flag eq 's') {
+	dbg("bayes: Unknown seen flag ($flag) for line: $line, skipping");
+	next;
+      }
+
+      my $rc = $seensth->execute($self->{_username},
+				 $msgid,
+				 $flag);
+      unless ($rc) {
+	dbg("bayes: Error inserting msgid in seen table for line: $line\nSQL Error: ".$self->errstr());
+	$error_p = 1;
+      }
+    }
+    else {
+      dbg("bayes: Skipping unknown line: $line");
+      next;
+    }
+  }
+  close(DUMPFILE);
+
+  print STDERR "\n" if ($showdots);
+
+  unless ($num_spam) {
+    dbg("bayes: Unable to find num spam, please check file.");
+    $error_p = 1;
+  }
+
+  unless ($num_ham) {
+    dbg("bayes: Unable to find num ham, please check file.");
+    $error_p = 1;
+  }
+
+  if ($error_p) {
+    dbg("bayes: Error(s) while attempting to load $filename, correct and Re-Run");
+
+    $self->clear_database();
+
+    dbg("bayes: Database now in inconsistent state for ".$self->{_username});
+    return 0;
+  }
+
+  # There is a race condition here which is why we suggest that the user
+  # turn off SA for the duration of a restore operation.  If something comes
+  # along and calls initialize_db() before this little bit of code runs then
+  # this insert will fail, but at least we'll now wipe out the bayes_token
+  # entries for this user so that we are in a somewhat ok state.
+  my $varsupdatesql = "INSERT INTO bayes_vars (username, spam_count, ham_count)
+                       VALUES(?,?,?)";
+  
+  my $rows = $self->{_dbh}->do($varsupdatesql,
+			       undef,
+			       $self->{_username}, $num_spam, $num_ham);
+  
+  unless (defined($rows)) {
+    dbg("bayes: Error inserting user variables (bayes_vars).");
+    dbg("bayes: SQL Error:".$self->{_dbh}->errstr());
+    $self->clear_database();
+    dbg("bayes; Database now in inconsistent state for ".$self->{_username});
+    return 0;
+  }
+
+  dbg("bayes: Parsed $line_count lines.");
+  dbg("bayes: Created database with $token_count tokens based on $num_spam Spam Messages and $num_ham Ham Messages.");
+
+  $self->untie_db();
+
+  return 1;
+}
+
 
 =head1 Private Methods
 

Modified: incubator/spamassassin/trunk/sa-learn.raw
==============================================================================
--- incubator/spamassassin/trunk/sa-learn.raw	(original)
+++ incubator/spamassassin/trunk/sa-learn.raw	Tue Apr 27 07:27:56 2004
@@ -119,6 +119,10 @@
   'dump:s' => \$opt{'dump'},
   'import' => \$opt{'import'},
 
+  'clear' => \$opt{'clear'},
+  'backup' => \$opt{'backup'},
+  'restore=s' => \$opt{'restore'},
+
   'dir'    => sub { $opt{'old_format'} = 'dir'; },
   'file'   => sub { $opt{'old_format'} = 'file'; },
   'mbox'   => sub { $opt{'format'}     = 'mbox'; },
@@ -148,10 +152,13 @@
   && !defined $forget
   && !defined $opt{'dump'}
   && !defined $opt{'import'}
+  && !defined $opt{'clear'}
+  && !defined $opt{'backup'}
+  && !defined $opt{'restore'}
   && !defined $opt{'folders'} )
 {
   usage( 0,
-"Please select either --spam, --ham, --folders, --forget, --sync, --import or --dump"
+"Please select either --spam, --ham, --folders, --forget, --sync, --import,\n--dump, --clear, --backup or --restore"
   );
 }
 
@@ -235,6 +242,44 @@
   exit( !$ret );
 }
 
+if (defined $opt{'clear'}) {
+  unless ($spamtest->{bayes_scanner}->{store}->clear_database()) {
+    $spamtest->finish_learner();
+    die "ERROR: Bayes clear returned an error, please re-run with -D for more information\n";
+  }
+
+  $spamtest->finish_learner();
+  exit 0;
+}
+
+if (defined $opt{'backup'}) {
+  unless ($spamtest->{bayes_scanner}->{store}->backup_database()) {
+    $spamtest->finish_learner();
+    die "ERROR: Bayes backup returned an error, please re-run with -D for more information\n";
+  }
+
+  $spamtest->finish_learner();
+  exit 0;
+}
+
+if (defined $opt{'restore'}) {
+
+  my $filename = $opt{'restore'};
+
+  unless ($filename) {
+    $spamtest->finish_learner();
+    die "ERROR: You must specify a filename to restore.\n";
+  }
+
+  unless ($spamtest->{bayes_scanner}->{store}->restore_database($filename, $opt{'showdots'})) {
+    $spamtest->finish_learner();
+    die "ERROR: Bayes restore returned an error, please re-run with -D for more information\n";
+  }
+
+  $spamtest->finish_learner();
+  exit 0;
+}
+
 if ( !$spamtest->{conf}->{use_bayes} ) {
   warn "ERROR: configuration specifies 'use_bayes 0', sa-learn disabled\n";
   exit 1;
@@ -461,6 +506,10 @@
                                    databases after scan
  -L, --local			   Operate locally, no network accesses
  --import                          Upgrade data from an earlier database version
+ --clear                           Wipe out existing database
+ --backup                          Backup, to STDOUT, existing database
+ --restore <filename>              Restore a database from filename
+
  -C path, --configpath=path, --config-file=path   Path to standard configuration dir
  -p prefs, --prefspath=file, --prefs-file=file    Set user preferences file
  --siteconfigpath=path             Path for site configs (def: /etc/mail/spamassassin)
@@ -741,6 +790,25 @@
 
 Can also use the B<--regexp> I<RE> option to specify which tokens to
 display based on a regular expression.
+
+=item B<--clear>
+
+Clear an existing Bayes database by removing all traces of the database.
+
+WARNING: This is destructive and should be used with care.
+
+=item B<--backup>
+
+Performs a dump of the Bayes database in machine/human readable format.
+
+The dump will include token and seen data.  It is suitable for input back
+into the --restore command.
+
+=item B<--restore>=I<filename>
+
+Performs a restore of the Bayes database defined by I<filename>.
+
+WARNING: This is a destructive operation, previous Bayes data will be wiped out.
 
 =item B<-h>, B<--help>