You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@spamassassin.apache.org by Derek Harding <de...@innovyx.com> on 2005/08/08 23:05:03 UTC

Re: [SPAM] RE: GeoCities Link-only spam

On Mon, 2005-08-08 at 15:53 -0500, salist@floridacpu.com wrote:
> >
> > It allows rules such as:
> > uricountry      URICOUNTRY_CN   CN
> > header          URICOUNTRY_CN   eval:check_uricountry('URICOUNTRY_CN')
> > describe        URICOUNTRY_CN   Contains a URI hosted in China
> > tflags          URICOUNTRY_CN   net
> > score URICOUNTRY_CN 2.0
> >
> > Derek
> 
> 
> Oh yes, that type code would be very nice to have indeed for people like
> me who can't outright RBL them. Do you also have code for Korea even? But
> dare I ask too much. :-) I could give it a score of 4 or so... and up it
> even more when spammer simpletons start thinking they are on to the latest
> greatest China spam idea. :-)

The code will work for any country. Just write a rule for that country.

Here's what's needed in your local.cf

loadplugin Mail::SpamAssassin::Plugin::URICountry

uricountry      URICOUNTRY_CN   CN
header          URICOUNTRY_CN   eval:check_uricountry('URICOUNTRY_CN')
describe        URICOUNTRY_CN   Contains a URI hosted in China
tflags          URICOUNTRY_CN   net
score URICOUNTRY_CN 2.0

uricountry      URICOUNTRY_KR   KR
header          URICOUNTRY_KR   eval:check_uricountry('URICOUNTRY_KR')
describe        URICOUNTRY_KR   Contains a URI hosted in Korea
tflags          URICOUNTRY_KR   net
score URICOUNTRY_KR 2.0

uricountry      URICOUNTRY_BR   BR
header          URICOUNTRY_BR   eval:check_uricountry('URICOUNTRY_BR')
describe        URICOUNTRY_BR   Contains a URI hosted in Brazil
tflags          URICOUNTRY_BR   net
score URICOUNTRY_BR 2.0

Derek


-- code for the plugin follows --
=head1 NAME

URICountry - add message metadata indicating the country code of each
relay

=head1 SYNOPSIS

  loadplugin     Mail::SpamAssassin::Plugin::URICountry

=head1 REQUIREMENT

This plugin requires the IP::Country::Fast module from CPAN.

=cut

package Mail::SpamAssassin::Plugin::URICountry;

use Mail::SpamAssassin::Plugin;
use strict;
use bytes;

use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);

# constructor: register the eval rule
sub new {
  my $class = shift;
  my $mailsaobject = shift;

  # some boilerplate...
  $class = ref($class) || $class;
  my $self = $class->SUPER::new($mailsaobject);
  bless ($self, $class);

  $self->register_eval_rule ("check_uricountry");

  return $self;
}

# this is just a placeholder; in fact the results are dealt with later
sub check_uricountry {
  my ($self, $permsgstatus, $rulename) = @_;
  return 0;
}

# and the eval rule itself
sub parsed_metadata {
  my ($self, $opts) = @_;
  my $scanner = $opts->{permsgstatus};

  my $reg;

  eval {
    require IP::Country::Fast;
    $reg = IP::Country::Fast->new();
  };
  if ($@) {
    dbg ("failed to load 'IP::Country::Fast', skipping");
    return 1;
  }

  my %domlist = ();
  foreach my $uri ($scanner->get_uri_list()) {
    my $dom = my_uri_to_domain($uri);
    dbg("debug: URICountry $uri in $dom");
    if ($dom) {
      $domlist{$dom} = 1;
    }
  }

  # Build a list of the countries for URIs in the message.
  my %countries = ();
  foreach my $dom (keys(%domlist)) {
    my $cc = $reg->inet_atocc($dom) || "XX";
    dbg("debug: URICountry $dom in $cc");
    $countries{lc($cc)} = 1;
  }

  # Now check if any match any defined rules.
  foreach my $rule (keys(%{$scanner->{conf}->{uricountry}})) {
    my $country = lc($scanner->{conf}->{uricountry}->{$rule});
    if($countries{$country}) {
      dbg ("debug: URICountry hit rule: $country");
      $scanner->got_hit($rule, "");
    }
  }

  return 1;
}

sub parse_config {
  my ($self, $opts) = @_;

  my $key = $opts->{key};

  if ($key eq 'uricountry') {
    if ($opts->{value} =~ /^(\S+)\s+(\S+)\s*$/) {
      my $rulename = $1;
      my $country = $2;

      dbg("debug: URICountry: registering $rulename");
      $opts->{conf}->{uricountry}->{$rulename} = $country;
      $self->inhibit_further_callbacks(); return 1;
    }
  }

  return 0;
}

# Taken from the one in Util.pm but we don't want to drop the hostname
doing so
# often leaves us with no A record.
sub my_uri_to_domain {
  my ($uri) = @_;

  # Javascript is not going to help us, so return.
  return if ($uri =~ /^javascript:/i);

  $uri =~ s,#.*$,,gs;                   # drop fragment
  $uri =~ s#^[a-z]+:/{0,2}##gsi;        # drop the protocol
  $uri =~ s,^[^/]*\@,,gs;               # username/passwd
  $uri =~ s,[/\?\&].*$,,gs;             # path/cgi params
  $uri =~ s,:\d+$,,gs;                  # port

  return if $uri =~ /\%/;         # skip undecoded URIs.
  # we'll see the decoded version as well

  # keep IPs intact
  if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) {
    # get rid of hostname part of domain, understanding delegation
    #$uri =
Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($uri);

    # ignore invalid domains
    return unless

(Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($uri));
  }

  # $uri is now the domain only
  return lc $uri;
}

sub dbg { Mail::SpamAssassin::dbg (@_); }

1;
-- end code --


Re: [SPAM] RE: GeoCities Link-only spam

Posted by "Eric A. Hall" <eh...@ehsco.com>.
On 8/22/2005 3:50 PM, Eric A. Hall wrote:

> IP::Country use Whois lookups instead though, and UDP/DNS lookups are
> going to be faster than chained TCP/Whois queries.

> I'll play with the plugin and see what kind of times and load I get

Some poking around, IP::Country::Fast uses a pre-built mapping database
instead of issuing lookups (IP::Country::Slow) or caching lookups
(IP::Country::Medium). The pre-built databse is stored in the ".gif" files
in /usr/lib/perl5/site_perl/5.8.6/IP/Country/Fast/ on my system, and
presumably this stuff gets repackaged when IP allocations change. This
means keeping the package synched, of course, but it does seem to be
somewhat faster and requires less overhead.

BTW, lookups for dead domain names  are really slow and block the rest of
the message processing.

-- 
Eric A. Hall                                        http://www.ehsco.com/
Internet Core Protocols          http://www.oreilly.com/catalog/coreprot/

Re: [SPAM] RE: GeoCities Link-only spam

Posted by "Eric A. Hall" <eh...@ehsco.com>.
On 8/22/2005 3:34 PM, Derek Harding wrote:
> On Sun, 2005-08-21 at 20:05 -0400, Eric A. Hall wrote:
> 
>>What's the benefit of using this instead of the uridnsbl plugin? The code
>>below will look for the IP address behind a URI and then query the
>>cn-kr.blackholes.us RBL to see if that addr is in China:
> 
> This one doesn't require a DNS lookup which makes it faster.

IP::Country use Whois lookups instead though, and UDP/DNS lookups are
going to be faster than chained TCP/Whois queries.

> blackholes.us only covers a limited set.

Just an example for discussion purposes (worth noting that their main web
site is down too). http://countries.nerd.dk/more.html is another one

I'll play with the plugin and see what kind of times and load I get

-- 
Eric A. Hall                                        http://www.ehsco.com/
Internet Core Protocols          http://www.oreilly.com/catalog/coreprot/

Re: [SPAM] RE: GeoCities Link-only spam

Posted by Derek Harding <de...@innovyx.com>.
On Sun, 2005-08-21 at 20:05 -0400, Eric A. Hall wrote:
> What's the benefit of using this instead of the uridnsbl plugin? The code
> below will look for the IP address behind a URI and then query the
> cn-kr.blackholes.us RBL to see if that addr is in China:

This one doesn't require a DNS lookup which makes it faster. Also it can
work for just about any country, blackholes.us only covers a limited
set.

Derek





Re: [SPAM] RE: GeoCities Link-only spam

Posted by "Eric A. Hall" <eh...@ehsco.com>.
On 8/8/2005 5:05 PM, Derek Harding wrote:

>>>It allows rules such as:
>>>uricountry      URICOUNTRY_CN   CN
>>>header          URICOUNTRY_CN   eval:check_uricountry('URICOUNTRY_CN')
>>>describe        URICOUNTRY_CN   Contains a URI hosted in China
>>>tflags          URICOUNTRY_CN   net
>>>score URICOUNTRY_CN 2.0

What's the benefit of using this instead of the uridnsbl plugin? The code
below will look for the IP address behind a URI and then query the
cn-kr.blackholes.us RBL to see if that addr is in China:

  uridnsbl        URIBL_CNKR      cn-kr.blackholes.us TXT
  body            URIBL_CNKR      eval:check_uridnsbl('URIBL_CNKR')
  tflags          URIBL_CNKR      net
  score           URIBL_CNKR      2.0

I'm sure there's a difference but I guess I'm not seeing it

-- 
Eric A. Hall                                        http://www.ehsco.com/
Internet Core Protocols          http://www.oreilly.com/catalog/coreprot/

RE: [SPAM] RE: GeoCities Link-only spam

Posted by Greg Allen <sa...@floridacpu.com>.
Sorry, I misunderstood at first what you had there. You are checking uri...

This is good, but it might be even better to have a check for connecting IP
(or use in conjunction with the uri) since the spammers can auto-flip
websites from UK, to china, Korea, RU, etc within a few seconds. I am not a
coder, so I may not be able to help much here...but I can take a whack at
it.

Basically, what I would like to see would be a check of the connecting IP
against a China (or whatever) RBL. If the connecting IP matches an IP in the
China RBL we could add a point value. Country RBL source preferably from
something like here. http://www.blackholes.us

Some RBL's already post code like this on their websites, but I am not sure
those would be usable with ALL RBL systems.

Basically, something like this...but with China, Korea, etc. to place in
local.cf


---start example----

header X_RBL_INTERSIL_NET	eval:check_rbl('INTERSIL_NET',
'blackholes.intersil.net')
describe X_RBL_INTERSIL_NET  	Sender IP has a bad track record
tflags X_RBL_INTERSIL_NET	net
score X_RBL_INTERSIL_NET .5

header RCVD_IN_DYNABLOCK        eval:check_rbl('sorbs-notfirsthop',
'dnsbl.sorbs.net.', '127.0.0.10')
describe RCVD_IN_DYNABLOCK      Sent directly from dynamic IP address
tflags RCVD_IN_DYNABLOCK        net
score RCVD_IN_DYNABLOCK .5

---end example----


Actually, something similar to the above syntax might work for the bad
countries, if I just go through the time to type several of them in and test
them. I am just not sure yet.  :-)






-----Original Message-----
From: Derek Harding [mailto:derek@innovyx.com]
Sent: Monday, August 08, 2005 5:05 PM
To: salist@floridacpu.com
Cc: Users@Spamassassin. Apache. Org
Subject: Re: [SPAM] RE: GeoCities Link-only spam


On Mon, 2005-08-08 at 15:53 -0500, salist@floridacpu.com wrote:
> >
> > It allows rules such as:
> > uricountry      URICOUNTRY_CN   CN
> > header          URICOUNTRY_CN   eval:check_uricountry('URICOUNTRY_CN')
> > describe        URICOUNTRY_CN   Contains a URI hosted in China
> > tflags          URICOUNTRY_CN   net
> > score URICOUNTRY_CN 2.0
> >
> > Derek
>
>
> Oh yes, that type code would be very nice to have indeed for people like
> me who can't outright RBL them. Do you also have code for Korea even? But
> dare I ask too much. :-) I could give it a score of 4 or so... and up it
> even more when spammer simpletons start thinking they are on to the latest
> greatest China spam idea. :-)

The code will work for any country. Just write a rule for that country.

Here's what's needed in your local.cf

loadplugin Mail::SpamAssassin::Plugin::URICountry

uricountry      URICOUNTRY_CN   CN
header          URICOUNTRY_CN   eval:check_uricountry('URICOUNTRY_CN')
describe        URICOUNTRY_CN   Contains a URI hosted in China
tflags          URICOUNTRY_CN   net
score URICOUNTRY_CN 2.0

uricountry      URICOUNTRY_KR   KR
header          URICOUNTRY_KR   eval:check_uricountry('URICOUNTRY_KR')
describe        URICOUNTRY_KR   Contains a URI hosted in Korea
tflags          URICOUNTRY_KR   net
score URICOUNTRY_KR 2.0

uricountry      URICOUNTRY_BR   BR
header          URICOUNTRY_BR   eval:check_uricountry('URICOUNTRY_BR')
describe        URICOUNTRY_BR   Contains a URI hosted in Brazil
tflags          URICOUNTRY_BR   net
score URICOUNTRY_BR 2.0

Derek


-- code for the plugin follows --
=head1 NAME

URICountry - add message metadata indicating the country code of each
relay

=head1 SYNOPSIS

  loadplugin     Mail::SpamAssassin::Plugin::URICountry

=head1 REQUIREMENT

This plugin requires the IP::Country::Fast module from CPAN.

=cut

package Mail::SpamAssassin::Plugin::URICountry;

use Mail::SpamAssassin::Plugin;
use strict;
use bytes;

use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);

# constructor: register the eval rule
sub new {
  my $class = shift;
  my $mailsaobject = shift;

  # some boilerplate...
  $class = ref($class) || $class;
  my $self = $class->SUPER::new($mailsaobject);
  bless ($self, $class);

  $self->register_eval_rule ("check_uricountry");

  return $self;
}

# this is just a placeholder; in fact the results are dealt with later
sub check_uricountry {
  my ($self, $permsgstatus, $rulename) = @_;
  return 0;
}

# and the eval rule itself
sub parsed_metadata {
  my ($self, $opts) = @_;
  my $scanner = $opts->{permsgstatus};

  my $reg;

  eval {
    require IP::Country::Fast;
    $reg = IP::Country::Fast->new();
  };
  if ($@) {
    dbg ("failed to load 'IP::Country::Fast', skipping");
    return 1;
  }

  my %domlist = ();
  foreach my $uri ($scanner->get_uri_list()) {
    my $dom = my_uri_to_domain($uri);
    dbg("debug: URICountry $uri in $dom");
    if ($dom) {
      $domlist{$dom} = 1;
    }
  }

  # Build a list of the countries for URIs in the message.
  my %countries = ();
  foreach my $dom (keys(%domlist)) {
    my $cc = $reg->inet_atocc($dom) || "XX";
    dbg("debug: URICountry $dom in $cc");
    $countries{lc($cc)} = 1;
  }

  # Now check if any match any defined rules.
  foreach my $rule (keys(%{$scanner->{conf}->{uricountry}})) {
    my $country = lc($scanner->{conf}->{uricountry}->{$rule});
    if($countries{$country}) {
      dbg ("debug: URICountry hit rule: $country");
      $scanner->got_hit($rule, "");
    }
  }

  return 1;
}

sub parse_config {
  my ($self, $opts) = @_;

  my $key = $opts->{key};

  if ($key eq 'uricountry') {
    if ($opts->{value} =~ /^(\S+)\s+(\S+)\s*$/) {
      my $rulename = $1;
      my $country = $2;

      dbg("debug: URICountry: registering $rulename");
      $opts->{conf}->{uricountry}->{$rulename} = $country;
      $self->inhibit_further_callbacks(); return 1;
    }
  }

  return 0;
}

# Taken from the one in Util.pm but we don't want to drop the hostname
doing so
# often leaves us with no A record.
sub my_uri_to_domain {
  my ($uri) = @_;

  # Javascript is not going to help us, so return.
  return if ($uri =~ /^javascript:/i);

  $uri =~ s,#.*$,,gs;                   # drop fragment
  $uri =~ s#^[a-z]+:/{0,2}##gsi;        # drop the protocol
  $uri =~ s,^[^/]*\@,,gs;               # username/passwd
  $uri =~ s,[/\?\&].*$,,gs;             # path/cgi params
  $uri =~ s,:\d+$,,gs;                  # port

  return if $uri =~ /\%/;         # skip undecoded URIs.
  # we'll see the decoded version as well

  # keep IPs intact
  if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) {
    # get rid of hostname part of domain, understanding delegation
    #$uri =
Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($uri);

    # ignore invalid domains
    return unless

(Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($uri));
  }

  # $uri is now the domain only
  return lc $uri;
}

sub dbg { Mail::SpamAssassin::dbg (@_); }

1;
-- end code --