You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by Apache Wiki <wi...@apache.org> on 2005/12/18 00:15:17 UTC

[Spamassassin Wiki] Update of "StatsPlugin" by JamesKeating

Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Spamassassin Wiki" for change notification.

The following page has been changed by JamesKeating:
http://wiki.apache.org/spamassassin/StatsPlugin

New page:
= Stats Plugin =

NOTE: This module requires !SpamAssassin 3.1+

Stats Plugin provides real-time statistics genereated by SpamAssassin.  Statistics are stored inside of a MySQL table (defined as stats)  which contains the following information:

$day $username $domain $ham $spam $total

Each entry is rotated daily, meaning when the day changes a new entry is placed into the table with the new date and the following fields are set to zero: ham, spam, total.  The table also contains an entry which retains totals for the system for each day.  This entry has the username and domain replaced by the string "$TOTALS".

== To Begin Using ==

To begin using the Stats Plugin you will need to create a table for the plugin to write to. Here is the necessary schema: [[BR]]

'''Warning: This has only been used with MySQL 4.0.24!''' [[BR]]

{{{
CREATE TABLE stats (
  day date NOT NULL default'',
  username varchar(100) NOT NULL default '',
  domain varchar(100) default '',
  spam int(20) default '0',
  ham int(20) default '0',
  total int(20) default '0',
  PRIMARY KEY (day,username,domain)
) TYPE=MyISAM
}}}

Once the table has been created you will need to add the following options into your local.cf or other configuration file: [[BR]]

''' You will need to replace the following variables with settings for you configuration ''' [[BR]]

{{{
# Configure SQL for statistical storage
use_stats                       1
user_stats_dsn                  DBI:mysql:spamassassin_beta:sql_hostname
user_stats_sql_username         sql_username
user_stats_sql_password         sql_password
user_stats_sql_table            sql_table
}}}

Then the plugin will need to be added to your init.pre or other .pre file:
{{{

# Stats Plugin - store stats in a MySQL DB
#
loadplugin Mail::SpamAssassin::Plugin::Stats

}}}

A test will also need to be added to the system to ensure that it is ran every time.  Please notice the test is set to have an extremely high priority, even higher than AWL.  This ensures that it runs last of all:

{{{
# Copyright 2005 James Keating
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
                                         

ifplugin Mail::SpamAssassin::Plugin::Stats

full RECORD_STATS_BODY                  eval:record_stats()
describe RECORD_STATS_BODY              Message has logged via Stats plugin
priority RECORD_STATS_BODY              1001    

endif   # Mail::SpamAssassin::Plugin::Stats
}}}


Finally, we need to put the plugin inside of the SpamAssassin plugin directory:

{{{
=head1 NAME

package Mail::SpamAssassin::Plugin::Stats - Keep Real Time SpamAssassin Stastics 

=head1 SYNOPSIS

  loadplugin Mail::SpamAssassin::Plugin::Stats

  user_stats_dsn                  DBI:mysql:spamassassin_beta:newlatta.donet.com
  user_stats_sql_username         spam_beta
  user_stats_sql_password         c3pLM6
  user_stats_sql_table            stats

=head1 DESCRIPTION

This SpamAssassin plugin records real-time, user-level statistics.  The statistics are stored inside of a MySQL database and are rotated daily.  Each entry inside the table contains the current date, user, domain, number of hams (legitimate mail), number of spams (unsolicited mail) and total number of messages for that individual.  There is also a total field which also rotates daily.  It contains the total number of hams, spams, and messages that the system has seen for that day.  This entry is notated with the $TOTALS username and $TOTALS domain name.

=cut
package Mail::SpamAssassin::Plugin::Stats;

use strict;
use warnings;
use bytes;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Logger;

use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);


sub new {

  my ($class, $mailsa) = @_;
  $class = ref($class) || $class;
  my $self = $class->SUPER::new($mailsa);
  bless ($self, $class);

  $self->register_eval_rule ("record_stats");
  $self->set_config($mailsa->{conf});

  $self;
}

sub set_config {

  my ($self, $conf) = @_;
  my @cmds = ();

  push (@cmds, {
        setting => 'use_stats',
        default => 1,
        type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
  });

  push (@cmds, {
        setting => 'user_stats_dsn',
        type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
  });

  push (@cmds, {
        setting => 'user_stats_sql_username',
        type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
  });
  
  push (@cmds, {
        setting => 'user_stats_sql_password',
        type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
  });
  
  push (@cmds, {
        setting => 'user_stats_sql_table',
        type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
  });
  
  $conf->{parser}->register_commands(\@cmds);
}


sub record_stats {
  my ($self, $pms) = @_;

  return 0 unless ($pms->{conf}->{use_stats});

  dbg("stats: Executing stats-plugin");

  my $dsn = $self->{main}->{conf}->{user_stats_dsn};
  if (!defined($dsn)) {
        dbg("stats: no DSN specified; HALT!");
        return 1;
  }

  require DBI;

  my $main = $self->{main};
  my $dbuser = $main->{conf}->{user_stats_sql_username};
  my $dbpass = $main->{conf}->{user_stats_sql_password};
  my $table = $main->{conf}->{user_stats_sql_table};

  my $f_spam = 'spam';
  my $f_ham = 'ham';
  my $f_total = 'total';
  my $f_username = 'username';
  my $f_domain = 'domain';
  my $f_day = 'day';
  my $isspam;
  my $user;
  my $domain;
  my $tot_user = '$TOTALS';
  my $tot_domain = '$TOTALS';

  my $username = $self->{main}->{username};
  $username = lc($username);
  my $score = $pms->{score};
  my $required_score = $main->{conf}->{required_score};

  dbg("stats: Splitting $username based on @");
  ($user,$domain) =  split /@/,$username;
  if (!defined($domain)) {
        $domain = '';
  }

  dbg("stats: User: $user Domain: $domain");
  dbg("stats: Message Score: $score out of $required_score");

  if ($score >= $required_score ) {
        $isspam = 1;
  }
  else {
        $isspam = 0;
  }

  dbg("stats: IsSpam is $isspam");

  my $dbh = DBI->connect($dsn, $dbuser, $dbpass, {'PrintError' => 0});
  my $user_exists = &get_current_entry($user, $domain, $dbh, $table, $f_username, $f_domain, $f_day);
  my $total_exists = &get_current_entry($tot_user, $tot_domain, $dbh, $table, $f_username, $f_domain, $f_day);

  if ($dbh) {
        &execute_stats($user, $domain, $dbh, $table, $f_spam, $f_ham, $f_total, $f_username, $f_domain, $f_day, $isspam, $user_exists);
        &execute_stats($tot_user, $tot_domain, $dbh, $table, $f_spam, $f_ham, $f_total, $f_username, $f_domain, $f_day, $isspam, $total_exists);
        $dbh->disconnect();
  }
  else {
        die "stats: SQL error: " . DBI->errstr . "\n";
  }
0;
}


sub get_current_entry {
  my ($user, $domain, $dbh, $table, $f_username, $f_domain, $f_day) = @_;
  my $sql = "SELECT $f_username from $table where $f_username = '$user' and $f_domain = '$domain' and $f_day = curdate()";
  dbg("stats: Executing $sql");
  my $sth = $dbh->prepare($sql);
        if ($sth) {
                my $rv = $sth->execute();
                dbg("stats: rv contains $rv");
                if ($rv eq "0E0") {
                        dbg("stats: Entry does not exist for $user\@$domain");
                        return 0;
                }
                else {
                        dbg("stats: Entry already exists for $user\@$domain");
                        return 1;
                }
        }
        else {
                die "stats: SQL error: " . $dbh->errstr . "\n";
        }
}


sub execute_stats {
  my ($user, $domain, $dbh, $table, $f_spam, $f_ham, $f_total, $f_username, $f_domain, $f_day, $isspam, $exists) = @_;
  my $sql;

  if ($exists) {
        $sql = "UPDATE $table SET $f_total = $f_total + 1 WHERE $f_username = '$user' and $f_domain = '$domain' and $f_day = curdate()";
  }
  else {
        $sql = "INSERT into $table ($f_day,$f_username,$f_domain,$f_spam,$f_ham,$f_total) VALUES (curdate(),'$user','$domain',0,0,1)";
  }

  dbg("stats: config: SQL executing $sql");
  my $sth = $dbh->prepare($sql);

  if ($sth) {
        my $rv = $sth->execute();

        if ($rv) {
                dbg("stats: Stats change for $user\@$domain");
                if ($isspam) {
                        $sql = "UPDATE $table SET $f_spam = $f_spam + 1 WHERE $f_username = '$user' and $f_domain = '$domain' and $f_day = curdate()";
                }
                else {
                        $sql = "UPDATE $table SET $f_ham = $f_ham + 1 WHERE $f_username = '$user' and $f_domain = '$domain' and $f_day = curdate()";
                }
                $sth = $dbh->prepare($sql);
                if ($sth) {
                        $rv = $sth->execute();
                        if ($rv) {
                                dbg("stats: Updated Spam Type $sql")
                        }
                        else {
                                die "stats: SQL error $sql\n".$sth->errstr."\n";
                        }
                }
                else {
                        die "stats: SQL error $sql\n".$sth->errstr."\n";
                }
        }
        else {
                die "stats: SQL error $sql\n".$sth->errstr."\n";
        }
  }
  else {
        die "stats: SQL error: " . $dbh->errstr . "\n";
  }

}


1;

}}}