You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by ax...@apache.org on 2016/03/10 22:45:19 UTC
svn commit: r1734462 - in /spamassassin/trunk/tools: dallas-sa-stats.pl
sare-sa-stats.pl
Author: axb
Date: Thu Mar 10 21:45:18 2016
New Revision: 1734462
URL: http://svn.apache.org/viewvc?rev=1734462&view=rev
Log: (empty)
Added:
spamassassin/trunk/tools/sare-sa-stats.pl
Removed:
spamassassin/trunk/tools/dallas-sa-stats.pl
Added: spamassassin/trunk/tools/sare-sa-stats.pl
URL: http://svn.apache.org/viewvc/spamassassin/trunk/tools/sare-sa-stats.pl?rev=1734462&view=auto
==============================================================================
--- spamassassin/trunk/tools/sare-sa-stats.pl (added)
+++ spamassassin/trunk/tools/sare-sa-stats.pl Thu Mar 10 21:45:18 2016
@@ -0,0 +1,333 @@
+#!/usr/bin/perl
+
+# @hourly /usr/local/bin/sa-stats.pl --web --n=25 > /var/www/html/spamstat/index.html
+
+
+# -------------------------------------------------------------
+# file: sa-stats.pl (SARE release)
+# created: 2005-01-31
+# updated: 2007-01-30
+# version: 1.03
+# author: Dallas Engelken <da...@uribl.com>
+# desc: Generates Top Spam/Ham Rules fired for SA 3.1.x installations.
+#
+# IMPORTANT NOTES
+#
+# SA 3.0.x log files do not have user=<user> in
+# the report: log entries, so this does not work with 3.0.
+# See http://www.rulesemporium.com/programs/sa-stats.txt for
+# a SA 3.0.x version ( no per-domain / per-user support )
+#
+# If your top 5 does not contain URIBL_BLACK, see
+# http://www.uribl.com/usage.shtml
+# -------------------------------------------------------------
+
+# Per User and Per Domain Statistics...
+# -------------------------------------------------------------
+#
+# ./sa-stats -r postmaster
+# - this would give all stats for postmaster users,
+# regardless of which domain it was for. handy if you
+# have alot of domain aliases
+#
+# ./sa-stats -r @domain
+# - this would give all stats for the domain specified.
+# make sure you include the '@' sign before the
+# domain or the script will assume you wanted a user
+# name instead.
+#
+# ./sa-stats -r user@domain.com
+# - this would give all stats for a specific email address.
+# this assumes you pass 'spamc -u <fullemail>' vs.
+# 'spamc -u <userpart>'. If you do the latter, you simply
+# want to call -r <userpart> instead.
+#
+# -------------------------------------------------------------
+
+use Getopt::Long;
+use Pod::Usage;
+
+my ($LOG_DIR,$FILE,$TOPRULES,$PRINT_TO_WEB,$HELP,$RECIP);
+
+GetOptions (
+ 'logdir|l=s' => \$LOG_DIR,
+ 'filename|f=s' => \$FILE,
+ 'recip|r=s' => \$RECIP,
+ 'num|n=i' => \$TOPRULES,
+ 'web|w' => \$PRINT_TO_WEB,
+ 'help|h' => \$HELP
+);
+
+if ($HELP) {
+ print "usage: $0 [-l <dir>] [-f <file>] [-n <num>] [-w]\n";
+ print "\t--logdir|-l <dir>\tDirectory containing spamd logs\n";
+ print "\t--filename|-f <file>\tFile names or regex to look for in the logdir\n";
+ print "\t--num|-n <num>\tNumber of top rules to display\n";
+ print "\t--web|-w\tMake it web friendly output\n";
+ print "\t--help|-h\tPrints this help\n";
+ exit;
+}
+
+if (!defined $TOPRULES) { $TOPRULES=20 }
+if (!defined $LOG_DIR) { $LOG_DIR="/var/log" }
+if (!defined $FILE) { $FILE='^maillog$' } # regex
+
+# LEAVE THE REST ALONE UNLESS YOU KNOW WHAT YOU ARE DOING...
+################################################################
+
+my $NUM_EMAIL=0; my $NUM_SPAM=0; my $NUM_HAM=0;
+my $EMAIL_HITS=0; my $SPAM_HITS=0; my $HAM_HITS=0;
+my %SPAM_RULES=(); my %HAM_RULES=();
+my $TOTAL_SPAM_RULES=0; my $TOTAL_HAM_RULES=0;
+my $ALSPAM=0; my $ALHAM=0; my $ALNO=0;
+my $HAM_SEC=0; my $SPAM_SEC=0; my $EMAIL_SEC=0;
+
+my $footer = '</div><div id="footer"><p>CGI by <a href="mailto:dallase@nmgi.com">Dallas Engelken</a></p></div>';
+
+opendir (DIR,"$LOG_DIR");
+my @logs = grep /$FILE/i, readdir DIR;
+closedir DIR;
+
+foreach my $log (@logs) {
+ &calcstats($LOG_DIR."/".$log);
+}
+
+&summarize();
+exit;
+
+#############################
+
+sub calcstats {
+
+ my $log=shift;
+
+ if (!-e $log || -d $log) {
+ print "$log not found..\n";
+ return;
+ }
+
+ open(F,"$log");
+ while(<F>) {
+
+ my ($result,$score,$rules,$time,$size,$learn,$recip);
+ my $spam=0;
+ # for user=, it may be %domain or $GLOBAL or @GLOBAL or user@domain..
+
+
+ if (/.*result:\s+(\w|\.)\s+(\-?\d+)\s+\-\s+(.*)\s+scantime\=([\d\.]+)\,size\=(\d+).*user=([^\,]+).*autolearn=(\w+)/) {
+ $result=$1;
+ $score=$2;
+ $rules=$3;
+ $time=$4;
+ $size=$5;
+ $recip=$6;
+ $learn=$7;
+ }
+ else {
+ next;
+ }
+
+ my ($user,$domain);
+
+ if ($recip =~ m/^[\%\@](.+)/) {
+ $user = undef;
+ $domain = '@'.$1;
+ }
+ if ($recip =~ m/(.+)\@(.+)/) {
+ $user=$1;
+ $domain='@'.$2;
+ }
+ else {
+ $user=$recip;
+ $domain='@localhost';
+ }
+
+ my $email = $user.$domain;
+
+
+ next if ($RECIP && $RECIP !~ m/\@/ && $RECIP ne $user);
+ next if ($RECIP =~ m/^[\%\@](.+)/ && $RECIP ne $domain);
+ next if ($RECIP =~ m/(.+)\@(.+)/ && $RECIP ne $email);
+
+ if ($result eq "Y") {
+ $SPAM_SEC+=$time;
+ }
+ else {
+ $HAM_SEC+=$time;
+ }
+ $EMAIL_SEC+=$time;
+
+ $spam=1 if ($result =~ m/Y/);
+ if ($learn =~ /ham/) {
+ $ALHAM++;
+ }
+ elsif ($learn =~ /spam/) {
+ $ALSPAM++;
+ }
+ else {
+ $ALNO++;
+ }
+
+ my @tmprules=split(/\,/,$rules);
+ foreach my $r (@tmprules) {
+ if ($spam) {
+ $TOTAL_SPAM_RULES++;
+ if (defined $SPAM_RULES{$r}) {
+ $SPAM_RULES{$r}++;
+ }
+ else {
+ $SPAM_RULES{$r}=1;
+ }
+ }
+ else {
+ $TOTAL_HAM_RULES++;
+ if (defined $HAM_RULES{$r}) {
+ $HAM_RULES{$r}++;
+ }
+ else {
+ $HAM_RULES{$r}=1;
+ }
+ }
+ }
+
+ if ($spam) {
+ $NUM_SPAM++;
+ $SPAM_HITS += $score;
+ }
+ else {
+ $NUM_HAM++;
+ $HAM_HITS += $score;
+ }
+ $NUM_EMAIL++;
+ $EMAIL_HITS += $score;
+}
+close(F);
+
+}
+
+
+sub summarize {
+
+ my ($avgspamhits,$avghamhits,$avgemailhits);
+
+ print "Content-type: text/html\n\n" if ($PRINT_TO_WEB);
+ print "<pre>" if ($PRINT_TO_WEB);
+
+ if ($NUM_SPAM > 0) {
+ $avgspamhits= sprintf("%.2f",$SPAM_HITS/$NUM_SPAM);
+ $avgspamtime= sprintf("%.2f",$SPAM_SEC/$NUM_SPAM);
+ }
+ else {
+ $avgspamhits=0;
+ $avgspamtime=0;
+ }
+
+ if ($NUM_HAM > 0) {
+ $avghamhits= sprintf("%.2f",$HAM_HITS/$NUM_HAM);
+ $avghamtime= sprintf("%.2f",$HAM_SEC/$NUM_HAM);
+ }
+ else {
+ $avghamhits=0;
+ $avghamtime=0;
+ }
+
+ if ($NUM_EMAIL > 0) {
+ $avgemailhits= sprintf("%.2f",$EMAIL_HITS/$NUM_EMAIL);
+ $avgemailtime= sprintf("%.2f",$EMAIL_SEC/$NUM_EMAIL);
+ }
+ else {
+ $avgemailhits=0;
+ $avgemailtime=0;
+ }
+
+
+ print "\n\n";
+
+ if ($RECIP) {
+ print "SPAM STATS FOR $RECIP\n";
+ print "-" x 60 . "\n";
+ }
+
+ my $ALTOT=$ALSPAM+$ALHAM;
+ printf("Email: %8s Autolearn: %5s AvgScore: %6.2f AvgScanTime: %5.2f sec\n",$NUM_EMAIL,$ALTOT,$avgemailhits,$avgemailtime);
+ printf("Spam: %8s Autolearn: %5s AvgScore: %6.2f AvgScanTime: %5.2f sec\n",$NUM_SPAM,$ALSPAM,$avgspamhits,$avgspamtime);
+ printf("Ham: %8s Autolearn: %5s AvgScore: %6.2f AvgScanTime: %5.2f sec\n",$NUM_HAM,$ALHAM,$avghamhits,$avghamtime);
+
+ &br;
+ printf "Time Spent Running SA: %7.2f hours\n",$EMAIL_SEC/60/60;
+ printf "Time Spent Processing Spam: %7.2f hours\n",$SPAM_SEC/60/60;
+ printf "Time Spent Processing Ham: %7.2f hours\n",$HAM_SEC/60/60;
+
+ &br;
+
+ my $count=0;
+ print "TOP SPAM RULES FIRED";
+ print " FOR $RECIP" if ($RECIP);
+ print "\n";
+
+ &hr;
+ printf("%4s\t%-24s\t%5s %8s %7s %7s %7s\n","RANK","RULE NAME","COUNT","\%OFMAIL","\%OFSPAM","\%OFHAM");
+ &hr;
+ foreach my $key (sort { $SPAM_RULES{$b} <=> $SPAM_RULES{$a} } keys %SPAM_RULES) {
+ #my $perc1=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_EMAIL)*100);
+ my $perc1=sprintf("%.2f",(($SPAM_RULES{$key}+$HAM_RULES{$key})/$NUM_EMAIL)*100);
+ my $perc2=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_SPAM)*100);
+ my $perc3=sprintf("%.2f",($HAM_RULES{$key}/$NUM_HAM)*100);
+ printf("%4d\t%-24s\t%5s\t%6.2f\t%6.2f\t%6.2f\n",$count+1,$key,$SPAM_RULES{$key},$perc1,$perc2,$perc3);
+ $count++;
+ if ($count >= $TOPRULES && $TOPRULES > 0) {
+ last;
+ }
+ }
+ &hr;
+ &br;
+
+ $count=0; # thanks mike.
+ print "TOP HAM RULES FIRED";
+ print " FOR $RECIP" if ($RECIP);
+ print "\n";
+ &hr;
+ printf("%4s\t%-24s\t%5s %8s %7s %7s %7s\n","RANK","RULE NAME","COUNT","\%OFMAIL","\%OFSPAM","\%OFHAM");
+ &hr;
+ foreach my $key (sort { $HAM_RULES{$b} <=> $HAM_RULES{$a} } keys %HAM_RULES) {
+ #my $perc1=sprintf("%.2f",($HAM_RULES{$key}/$NUM_EMAIL)*100);
+ my $perc1=sprintf("%.2f",(($SPAM_RULES{$key}+$HAM_RULES{$key})/$NUM_EMAIL)*100);
+ my $perc2=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_SPAM)*100);
+ my $perc3=sprintf("%.2f",($HAM_RULES{$key}/$NUM_HAM)*100);
+ printf("%4d\t%-24s\t%5s\t%6.2f\t%6.2f\t%6.2f\n",$count+1,$key,$HAM_RULES{$key},$perc1,$perc2,$perc3);
+ $count++;
+ if ($count >= $TOPRULES && $TOPRULES > 0) {
+ last;
+ }
+ }
+ &hr;
+ &br;
+ print "</pre>\n" if ($PRINT_TO_WEB);
+ print $footer if ($PRINT_TO_WEB && $footer ne "");
+ print "\n";
+}
+
+#######################
+sub hr {
+ if ($PRINT_TO_WEB) {
+ print "<hr size=1 width=50% align=left>";
+ }
+ else {
+ print "-" x 70 ."\n";
+ }
+}
+#######################
+sub br {
+ if ($PRINT_TO_WEB) {
+ print "<br>";
+ }
+ else {
+ print "\n";
+ }
+}
+
+
+
+
+
+