You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2004/02/08 03:15:14 UTC

svn commit: rev 6575 - incubator/spamassassin/trunk/tools

Author: jm
Date: Sat Feb  7 18:15:13 2004
New Revision: 6575

Added:
   incubator/spamassassin/trunk/tools/bayes_dump_to_trusted_networks   (contents, props changed)
Log:
added tool to generate 'trusted_networks' lines from a Bayes db dump

Added: incubator/spamassassin/trunk/tools/bayes_dump_to_trusted_networks
==============================================================================
--- (empty file)
+++ incubator/spamassassin/trunk/tools/bayes_dump_to_trusted_networks	Sat Feb  7 18:15:13 2004
@@ -0,0 +1,139 @@
+#!/usr/bin/perl
+
+=head1 NAME
+
+bayes_dump_to_trusted_networks - generate configuration from Bayes database
+
+=head1 SYNOPSIS
+
+sa-learn --dump | bayes_dump_to_trusted_networks [opts] > trust.cf
+bayes_dump_to_trusted_networks bayes.dump [opts] > trust.cf
+
+options:
+
+  --minham n
+  --rdns
+
+=head1 DESCRIPTION
+
+This tool uses a dump of your Bayes database to determine which
+IP addresses are 'trustworthy', and therefore should be listed in
+'trusted_networks' lines in your configuration.
+
+This will reduce unneccesary DNSBL lookups, will whitelist mails from
+trustworthy sources, and allows several SpamAssassin rules to operate more
+effectively.
+
+A 'trustworthy' IP is one that is trusted not to B<forge> emails; in other
+words, it's not a subverted machine running a proxy, or one under spammer
+control.
+
+As such, any IP that has relayed more than 3 ham mails is considered
+trustworthy.  It doesn't matter if it has ever relayed spam mails to you, since
+large ISP smarthost relays will have done so -- relaying both ham and spam from
+their customer pool.  (The important thing is that it relayed the mail without
+forging sender address information.)
+
+=head1 OPTIONS
+
+=over 4
+
+=item --minham n
+
+Require C<n> or more ham messages before considering an IP a candidate
+for trust.  Default: 3.
+
+=item --rdns
+
+Annotate with reverse-DNS for that IP address.  Slows things down,
+but easier to read.
+
+=back
+
+=cut
+
+use vars qw{
+  $IP_ADDRESS $IP_IN_RESERVED_RANGE
+};
+
+# an IP address, in IPv4, IPv4-mapped-in-IPv6, or IPv6 format.  NOTE: cannot
+# just refer to $IPV4_ADDRESS, due to perl bug reported in nesting qr//s. :(
+#
+$IP_ADDRESS = qr/\b (?:IPv6:|) (?: (?:0*:0*:ffff:(?:0*:|)|) # IPv4-mapped-in-IPv6
+                    (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
+                    (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
+                    (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
+                    (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)
+                  | # an IPv6 address, seems to always be at least 6 words
+                    [a-f0-9]{0,4} \:[a-f0-9]{0,4}
+                    \:[a-f0-9]{0,4} \:[a-f0-9]{0,4}
+                    \:[a-f0-9]{0,4} \:[a-f0-9]{0,4} (?:\:[a-f0-9]{0,4})*
+                  )\b/ix;
+
+$IP_IN_RESERVED_RANGE = qr{^(?:
+  192\.168|                        # 192.168/16:              Private Use
+  10|                              # 10/8:                    Private Use
+  172\.(?:1[6-9]|2[0-9]|3[01])|    # 172.16-172.31/16:        Private Use
+  169\.254|                        # 169.254/16:              Private Use (APIPA)
+  127|                             # 127/8:                   Private Use (local host)
+  [01257]|                         # 000-002/8, 005/8, 007/8: Reserved
+  2[37]|                           # 023/8, 027/8:            Reserved
+  3[179]|                          # 031/8, 037/8, 039/8:     Reserved
+  4[12]|                           # 041/8, 042/8:            Reserved
+  5[89]|                           # 058/8, 059/8:            Reserved
+  60|                              # 060/8:                   Reserved
+  7[0-9]|                          # 070-079/8:               Reserved
+  9[0-9]|                          #  -
+  1[01][0-9]|                      #  -
+  12[0-6]|                         # 126/8:                   Reserved
+  197|                             # 197/8:                   Reserved
+  22[23]|                          # 222/8, 223/8:            Reserved
+  24[0-9]|                         # 240-
+  25[0-5]                         # 255/8:                   Reserved
+)\.}x;
+
+use Getopt::Long;
+
+sub usage {
+  die "
+usage: bayes_dump_to_trusted_networks [--minham n] [--rdns] [file]
+";
+}
+
+use vars qw(
+                $opt_minham $opt_rdns $opt_help
+        );
+
+GetOptions(
+  'minham:i'		=> \$opt_minham,
+  'rdns'		=> \$opt_rdns,
+  'help'                => \$opt_help
+) or usage();
+$opt_help and usage();
+
+$opt_rdns ||= 0;
+$opt_minham ||= 3;
+
+while (<>) {
+  my ($prob, $nspam, $nham, $atime, $tok) = split;
+
+  # only select IP-address Received tokens
+  next if ($tok !~ /^H\*r:ip\*(${IP_ADDRESS})/o);
+  my $ip = $1;
+
+  next unless ($nham >= $opt_minham);	# has relayed >= n ham mails
+  next if ($ip =~ /$IP_IN_RESERVED_RANGE/o);
+  $ip =~ s/[^0-9\.\:]/_/gs;	# sanitise!
+
+  my $rdns = '';
+  if ($opt_rdns) {
+    my $host = `host $ip 2>&1`;
+    $host =~ s/^.* domain name pointer (.+)\s*$/$1/gm;
+    $host =~ s/\.$//;
+    $rdns = "\t# $host";
+  }
+
+  print "trusted_networks $ip$rdns\n";
+}
+
+