You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@spamassassin.apache.org by Derek Harding <de...@innovyx.com> on 2005/08/08 23:05:03 UTC
Re: [SPAM] RE: GeoCities Link-only spam
On Mon, 2005-08-08 at 15:53 -0500, salist@floridacpu.com wrote:
> >
> > It allows rules such as:
> > uricountry URICOUNTRY_CN CN
> > header URICOUNTRY_CN eval:check_uricountry('URICOUNTRY_CN')
> > describe URICOUNTRY_CN Contains a URI hosted in China
> > tflags URICOUNTRY_CN net
> > score URICOUNTRY_CN 2.0
> >
> > Derek
>
>
> Oh yes, that type code would be very nice to have indeed for people like
> me who can't outright RBL them. Do you also have code for Korea even? But
> dare I ask too much. :-) I could give it a score of 4 or so... and up it
> even more when spammer simpletons start thinking they are on to the latest
> greatest China spam idea. :-)
The code will work for any country. Just write a rule for that country.
Here's what's needed in your local.cf
loadplugin Mail::SpamAssassin::Plugin::URICountry
uricountry URICOUNTRY_CN CN
header URICOUNTRY_CN eval:check_uricountry('URICOUNTRY_CN')
describe URICOUNTRY_CN Contains a URI hosted in China
tflags URICOUNTRY_CN net
score URICOUNTRY_CN 2.0
uricountry URICOUNTRY_KR KR
header URICOUNTRY_KR eval:check_uricountry('URICOUNTRY_KR')
describe URICOUNTRY_KR Contains a URI hosted in Korea
tflags URICOUNTRY_KR net
score URICOUNTRY_KR 2.0
uricountry URICOUNTRY_BR BR
header URICOUNTRY_BR eval:check_uricountry('URICOUNTRY_BR')
describe URICOUNTRY_BR Contains a URI hosted in Brazil
tflags URICOUNTRY_BR net
score URICOUNTRY_BR 2.0
Derek
-- code for the plugin follows --
=head1 NAME
URICountry - add message metadata indicating the country code of each
relay
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::URICountry
=head1 REQUIREMENT
This plugin requires the IP::Country::Fast module from CPAN.
=cut
package Mail::SpamAssassin::Plugin::URICountry;
use Mail::SpamAssassin::Plugin;
use strict;
use bytes;
use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
# constructor: register the eval rule
sub new {
my $class = shift;
my $mailsaobject = shift;
# some boilerplate...
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsaobject);
bless ($self, $class);
$self->register_eval_rule ("check_uricountry");
return $self;
}
# this is just a placeholder; in fact the results are dealt with later
sub check_uricountry {
my ($self, $permsgstatus, $rulename) = @_;
return 0;
}
# and the eval rule itself
sub parsed_metadata {
my ($self, $opts) = @_;
my $scanner = $opts->{permsgstatus};
my $reg;
eval {
require IP::Country::Fast;
$reg = IP::Country::Fast->new();
};
if ($@) {
dbg ("failed to load 'IP::Country::Fast', skipping");
return 1;
}
my %domlist = ();
foreach my $uri ($scanner->get_uri_list()) {
my $dom = my_uri_to_domain($uri);
dbg("debug: URICountry $uri in $dom");
if ($dom) {
$domlist{$dom} = 1;
}
}
# Build a list of the countries for URIs in the message.
my %countries = ();
foreach my $dom (keys(%domlist)) {
my $cc = $reg->inet_atocc($dom) || "XX";
dbg("debug: URICountry $dom in $cc");
$countries{lc($cc)} = 1;
}
# Now check if any match any defined rules.
foreach my $rule (keys(%{$scanner->{conf}->{uricountry}})) {
my $country = lc($scanner->{conf}->{uricountry}->{$rule});
if($countries{$country}) {
dbg ("debug: URICountry hit rule: $country");
$scanner->got_hit($rule, "");
}
}
return 1;
}
sub parse_config {
my ($self, $opts) = @_;
my $key = $opts->{key};
if ($key eq 'uricountry') {
if ($opts->{value} =~ /^(\S+)\s+(\S+)\s*$/) {
my $rulename = $1;
my $country = $2;
dbg("debug: URICountry: registering $rulename");
$opts->{conf}->{uricountry}->{$rulename} = $country;
$self->inhibit_further_callbacks(); return 1;
}
}
return 0;
}
# Taken from the one in Util.pm but we don't want to drop the hostname
doing so
# often leaves us with no A record.
sub my_uri_to_domain {
my ($uri) = @_;
# Javascript is not going to help us, so return.
return if ($uri =~ /^javascript:/i);
$uri =~ s,#.*$,,gs; # drop fragment
$uri =~ s#^[a-z]+:/{0,2}##gsi; # drop the protocol
$uri =~ s,^[^/]*\@,,gs; # username/passwd
$uri =~ s,[/\?\&].*$,,gs; # path/cgi params
$uri =~ s,:\d+$,,gs; # port
return if $uri =~ /\%/; # skip undecoded URIs.
# we'll see the decoded version as well
# keep IPs intact
if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) {
# get rid of hostname part of domain, understanding delegation
#$uri =
Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($uri);
# ignore invalid domains
return unless
(Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($uri));
}
# $uri is now the domain only
return lc $uri;
}
sub dbg { Mail::SpamAssassin::dbg (@_); }
1;
-- end code --
Re: [SPAM] RE: GeoCities Link-only spam
Posted by "Eric A. Hall" <eh...@ehsco.com>.
On 8/22/2005 3:50 PM, Eric A. Hall wrote:
> IP::Country use Whois lookups instead though, and UDP/DNS lookups are
> going to be faster than chained TCP/Whois queries.
> I'll play with the plugin and see what kind of times and load I get
Some poking around, IP::Country::Fast uses a pre-built mapping database
instead of issuing lookups (IP::Country::Slow) or caching lookups
(IP::Country::Medium). The pre-built databse is stored in the ".gif" files
in /usr/lib/perl5/site_perl/5.8.6/IP/Country/Fast/ on my system, and
presumably this stuff gets repackaged when IP allocations change. This
means keeping the package synched, of course, but it does seem to be
somewhat faster and requires less overhead.
BTW, lookups for dead domain names are really slow and block the rest of
the message processing.
--
Eric A. Hall http://www.ehsco.com/
Internet Core Protocols http://www.oreilly.com/catalog/coreprot/
Re: [SPAM] RE: GeoCities Link-only spam
Posted by "Eric A. Hall" <eh...@ehsco.com>.
On 8/22/2005 3:34 PM, Derek Harding wrote:
> On Sun, 2005-08-21 at 20:05 -0400, Eric A. Hall wrote:
>
>>What's the benefit of using this instead of the uridnsbl plugin? The code
>>below will look for the IP address behind a URI and then query the
>>cn-kr.blackholes.us RBL to see if that addr is in China:
>
> This one doesn't require a DNS lookup which makes it faster.
IP::Country use Whois lookups instead though, and UDP/DNS lookups are
going to be faster than chained TCP/Whois queries.
> blackholes.us only covers a limited set.
Just an example for discussion purposes (worth noting that their main web
site is down too). http://countries.nerd.dk/more.html is another one
I'll play with the plugin and see what kind of times and load I get
--
Eric A. Hall http://www.ehsco.com/
Internet Core Protocols http://www.oreilly.com/catalog/coreprot/
Re: [SPAM] RE: GeoCities Link-only spam
Posted by Derek Harding <de...@innovyx.com>.
On Sun, 2005-08-21 at 20:05 -0400, Eric A. Hall wrote:
> What's the benefit of using this instead of the uridnsbl plugin? The code
> below will look for the IP address behind a URI and then query the
> cn-kr.blackholes.us RBL to see if that addr is in China:
This one doesn't require a DNS lookup which makes it faster. Also it can
work for just about any country, blackholes.us only covers a limited
set.
Derek
Re: [SPAM] RE: GeoCities Link-only spam
Posted by "Eric A. Hall" <eh...@ehsco.com>.
On 8/8/2005 5:05 PM, Derek Harding wrote:
>>>It allows rules such as:
>>>uricountry URICOUNTRY_CN CN
>>>header URICOUNTRY_CN eval:check_uricountry('URICOUNTRY_CN')
>>>describe URICOUNTRY_CN Contains a URI hosted in China
>>>tflags URICOUNTRY_CN net
>>>score URICOUNTRY_CN 2.0
What's the benefit of using this instead of the uridnsbl plugin? The code
below will look for the IP address behind a URI and then query the
cn-kr.blackholes.us RBL to see if that addr is in China:
uridnsbl URIBL_CNKR cn-kr.blackholes.us TXT
body URIBL_CNKR eval:check_uridnsbl('URIBL_CNKR')
tflags URIBL_CNKR net
score URIBL_CNKR 2.0
I'm sure there's a difference but I guess I'm not seeing it
--
Eric A. Hall http://www.ehsco.com/
Internet Core Protocols http://www.oreilly.com/catalog/coreprot/
RE: [SPAM] RE: GeoCities Link-only spam
Posted by Greg Allen <sa...@floridacpu.com>.
Sorry, I misunderstood at first what you had there. You are checking uri...
This is good, but it might be even better to have a check for connecting IP
(or use in conjunction with the uri) since the spammers can auto-flip
websites from UK, to china, Korea, RU, etc within a few seconds. I am not a
coder, so I may not be able to help much here...but I can take a whack at
it.
Basically, what I would like to see would be a check of the connecting IP
against a China (or whatever) RBL. If the connecting IP matches an IP in the
China RBL we could add a point value. Country RBL source preferably from
something like here. http://www.blackholes.us
Some RBL's already post code like this on their websites, but I am not sure
those would be usable with ALL RBL systems.
Basically, something like this...but with China, Korea, etc. to place in
local.cf
---start example----
header X_RBL_INTERSIL_NET eval:check_rbl('INTERSIL_NET',
'blackholes.intersil.net')
describe X_RBL_INTERSIL_NET Sender IP has a bad track record
tflags X_RBL_INTERSIL_NET net
score X_RBL_INTERSIL_NET .5
header RCVD_IN_DYNABLOCK eval:check_rbl('sorbs-notfirsthop',
'dnsbl.sorbs.net.', '127.0.0.10')
describe RCVD_IN_DYNABLOCK Sent directly from dynamic IP address
tflags RCVD_IN_DYNABLOCK net
score RCVD_IN_DYNABLOCK .5
---end example----
Actually, something similar to the above syntax might work for the bad
countries, if I just go through the time to type several of them in and test
them. I am just not sure yet. :-)
-----Original Message-----
From: Derek Harding [mailto:derek@innovyx.com]
Sent: Monday, August 08, 2005 5:05 PM
To: salist@floridacpu.com
Cc: Users@Spamassassin. Apache. Org
Subject: Re: [SPAM] RE: GeoCities Link-only spam
On Mon, 2005-08-08 at 15:53 -0500, salist@floridacpu.com wrote:
> >
> > It allows rules such as:
> > uricountry URICOUNTRY_CN CN
> > header URICOUNTRY_CN eval:check_uricountry('URICOUNTRY_CN')
> > describe URICOUNTRY_CN Contains a URI hosted in China
> > tflags URICOUNTRY_CN net
> > score URICOUNTRY_CN 2.0
> >
> > Derek
>
>
> Oh yes, that type code would be very nice to have indeed for people like
> me who can't outright RBL them. Do you also have code for Korea even? But
> dare I ask too much. :-) I could give it a score of 4 or so... and up it
> even more when spammer simpletons start thinking they are on to the latest
> greatest China spam idea. :-)
The code will work for any country. Just write a rule for that country.
Here's what's needed in your local.cf
loadplugin Mail::SpamAssassin::Plugin::URICountry
uricountry URICOUNTRY_CN CN
header URICOUNTRY_CN eval:check_uricountry('URICOUNTRY_CN')
describe URICOUNTRY_CN Contains a URI hosted in China
tflags URICOUNTRY_CN net
score URICOUNTRY_CN 2.0
uricountry URICOUNTRY_KR KR
header URICOUNTRY_KR eval:check_uricountry('URICOUNTRY_KR')
describe URICOUNTRY_KR Contains a URI hosted in Korea
tflags URICOUNTRY_KR net
score URICOUNTRY_KR 2.0
uricountry URICOUNTRY_BR BR
header URICOUNTRY_BR eval:check_uricountry('URICOUNTRY_BR')
describe URICOUNTRY_BR Contains a URI hosted in Brazil
tflags URICOUNTRY_BR net
score URICOUNTRY_BR 2.0
Derek
-- code for the plugin follows --
=head1 NAME
URICountry - add message metadata indicating the country code of each
relay
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::URICountry
=head1 REQUIREMENT
This plugin requires the IP::Country::Fast module from CPAN.
=cut
package Mail::SpamAssassin::Plugin::URICountry;
use Mail::SpamAssassin::Plugin;
use strict;
use bytes;
use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
# constructor: register the eval rule
sub new {
my $class = shift;
my $mailsaobject = shift;
# some boilerplate...
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsaobject);
bless ($self, $class);
$self->register_eval_rule ("check_uricountry");
return $self;
}
# this is just a placeholder; in fact the results are dealt with later
sub check_uricountry {
my ($self, $permsgstatus, $rulename) = @_;
return 0;
}
# and the eval rule itself
sub parsed_metadata {
my ($self, $opts) = @_;
my $scanner = $opts->{permsgstatus};
my $reg;
eval {
require IP::Country::Fast;
$reg = IP::Country::Fast->new();
};
if ($@) {
dbg ("failed to load 'IP::Country::Fast', skipping");
return 1;
}
my %domlist = ();
foreach my $uri ($scanner->get_uri_list()) {
my $dom = my_uri_to_domain($uri);
dbg("debug: URICountry $uri in $dom");
if ($dom) {
$domlist{$dom} = 1;
}
}
# Build a list of the countries for URIs in the message.
my %countries = ();
foreach my $dom (keys(%domlist)) {
my $cc = $reg->inet_atocc($dom) || "XX";
dbg("debug: URICountry $dom in $cc");
$countries{lc($cc)} = 1;
}
# Now check if any match any defined rules.
foreach my $rule (keys(%{$scanner->{conf}->{uricountry}})) {
my $country = lc($scanner->{conf}->{uricountry}->{$rule});
if($countries{$country}) {
dbg ("debug: URICountry hit rule: $country");
$scanner->got_hit($rule, "");
}
}
return 1;
}
sub parse_config {
my ($self, $opts) = @_;
my $key = $opts->{key};
if ($key eq 'uricountry') {
if ($opts->{value} =~ /^(\S+)\s+(\S+)\s*$/) {
my $rulename = $1;
my $country = $2;
dbg("debug: URICountry: registering $rulename");
$opts->{conf}->{uricountry}->{$rulename} = $country;
$self->inhibit_further_callbacks(); return 1;
}
}
return 0;
}
# Taken from the one in Util.pm but we don't want to drop the hostname
doing so
# often leaves us with no A record.
sub my_uri_to_domain {
my ($uri) = @_;
# Javascript is not going to help us, so return.
return if ($uri =~ /^javascript:/i);
$uri =~ s,#.*$,,gs; # drop fragment
$uri =~ s#^[a-z]+:/{0,2}##gsi; # drop the protocol
$uri =~ s,^[^/]*\@,,gs; # username/passwd
$uri =~ s,[/\?\&].*$,,gs; # path/cgi params
$uri =~ s,:\d+$,,gs; # port
return if $uri =~ /\%/; # skip undecoded URIs.
# we'll see the decoded version as well
# keep IPs intact
if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) {
# get rid of hostname part of domain, understanding delegation
#$uri =
Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($uri);
# ignore invalid domains
return unless
(Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($uri));
}
# $uri is now the domain only
return lc $uri;
}
sub dbg { Mail::SpamAssassin::dbg (@_); }
1;
-- end code --