You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2004/07/23 05:33:38 UTC
svn commit: rev 23171 - in spamassassin/trunk: . lib/Mail lib/Mail/SpamAssassin

Author: quinlan
Date: Thu Jul 22 20:33:38 2004
New Revision: 23171

Modified:
   spamassassin/trunk/INSTALL
   spamassassin/trunk/lib/Mail/SpamAssassin.pm
   spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
   spamassassin/trunk/lib/Mail/SpamAssassin/Reporter.pm
   spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
   spamassassin/trunk/spamassassin.raw
Log:
bug 3621: add SpamCop reporting feature


Modified: spamassassin/trunk/INSTALL
==============================================================================
--- spamassassin/trunk/INSTALL	(original)
+++ spamassassin/trunk/INSTALL	Thu Jul 22 20:33:38 2004
@@ -230,8 +230,14 @@
 
   - Net::DNS        (from CPAN)
 
-    Used to check the RBL, RSS, DUL etc. and perform MX checks.
-    Recommended.
+    Used for all DNS-based tests (SBL, XBL, SpamCop, DSBL, etc.),
+    perform MX checks, and is also used when manually reporting spam to
+    SpamCop.  Recommended.
+
+
+  - Net::SMTP        (from CPAN)
+
+    Used when manually reporting spam to SpamCop.
 
 
   - Mail::SPF::Query     (from CPAN)

Modified: spamassassin/trunk/lib/Mail/SpamAssassin.pm
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin.pm	(original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin.pm	Thu Jul 22 20:33:38 2004
@@ -615,9 +615,10 @@
 
 =item $f->report_as_spam ($mail, $options)
 
-Report a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as human-verified spam.
-This will submit the mail message to live, collaborative, spam-blocker
-databases, allowing other users to block this message.
+Report a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
+human-verified spam.  This will submit the mail message to live,
+collaborative, spam-blocker databases, allowing other users to block this
+message.
 
 It will also submit the mail to SpamAssassin's Bayesian learner.
 
@@ -626,20 +627,21 @@
 
 =over 4
 
-=item dont_report_to_razor
-
-Inhibits reporting of the spam to Razor; useful if you know it's already
-been listed there.
-
 =item dont_report_to_dcc
 
-Inhibits reporting of the spam to DCC; useful if you know it's already
-been listed there.
+Inhibits reporting of the spam to DCC.
 
 =item dont_report_to_pyzor
 
-Inhibits reporting of the spam to Pyzor; useful if you know it's already
-been listed there.
+Inhibits reporting of the spam to Pyzor.
+
+=item dont_report_to_razor
+
+Inhibits reporting of the spam to Razor.
+
+=item dont_report_to_spamcop
+
+Inhibits reporting of the spam to SpamCop.
 
 =back
 
@@ -665,9 +667,10 @@
 
 =item $f->revoke_as_spam ($mail, $options)
 
-Revoke a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as human-verified ham
-(non-spam).  This will revoke the mail message from live, collaborative,
-spam-blocker databases, allowing other users to block this message.
+Revoke a mail, encapsulated in a C<Mail::SpamAssassin::Message> object, as
+human-verified ham (non-spam).  This will revoke the mail message from live,
+collaborative, spam-blocker databases, allowing other users to block this
+message.
 
 It will also submit the mail to SpamAssassin's Bayesian learner as nonspam.
 

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm	(original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm	Thu Jul 22 20:33:38 2004
@@ -1067,6 +1067,48 @@
     }
   });
 
+=item spamcop_from_address add@ress.com   (default: none)
+
+This address is used during manual reports to SpamCop as the From:
+address.  You can use your normal email address.  If this is not set, a
+guess will be used as the From: address in SpamCop reports.
+
+=cut
+
+  push (@cmds, {
+    setting => 'spamcop_from_address',
+    default => '',
+    type => $CONF_TYPE_STRING,
+    code => sub {
+      my ($self, $key, $value, $line) = @_;
+      if ($value =~ /([^<\s]+\@[^>\s]+)/) {
+        $self->{spamcop_from_address} = $1;
+      }
+    },
+  });
+
+=item spamcop_to_address add@ress.com   (default: generic reporting address)
+
+Your customized SpamCop report submission address.  You need to obtain
+this address by registering at C<http://www.spamcop.net/>.  If this is
+not set, SpamCop reports will go to a generic reporting address for
+SpamAssassin users and your reports will probably have less weight in
+the SpamCop system.
+
+=cut
+
+  push (@cmds, {
+    setting => 'spamcop_to_address',
+    default => 'spamassassin-submit@spam.spamcop.net',
+    type => $CONF_TYPE_STRING,
+    code => sub {
+      my ($self, $key, $value, $line) = @_;
+      if ($value =~ /([^<\s]+\@[^>\s]+)/) {
+        $self->{spamcop_to_address} = $1;
+      }
+    },
+  });
+
 =item trusted_networks ip.add.re.ss[/mask] ...   (default: none)
 
 What networks or hosts are 'trusted' in your setup.   B<Trusted> in this case

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Reporter.pm
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Reporter.pm	(original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Reporter.pm	Thu Jul 22 20:33:38 2004
@@ -22,6 +22,8 @@
 use bytes;
 use Carp;
 use POSIX ":sys_wait_h";
+use constant HAS_NET_DNS => eval { require Net::DNS; };
+use constant HAS_NET_SMTP => eval { require Net::SMTP; };
 
 use vars qw{
   @ISA $VERSION
@@ -58,16 +60,6 @@
 
   my $text = $self->{main}->remove_spamassassin_markup ($self->{msg});
 
-  if (!$self->{options}->{dont_report_to_razor} && $self->is_razor_available()) {
-    if ($self->razor_report($text)) {
-      $available = 1;
-      dbg ("SpamAssassin: spam reported to Razor.");
-      $return = 0;
-    }
-    else {
-      dbg ("SpamAssassin: could not report spam to Razor.");
-    }
-  }
   if (!$self->{options}->{dont_report_to_dcc} && $self->is_dcc_available()) {
     if ($self->dcc_report($text)) {
       $available = 1;
@@ -88,6 +80,26 @@
       dbg ("SpamAssassin: could not report spam to Pyzor.");
     }
   }
+  if (!$self->{options}->{dont_report_to_razor} && $self->is_razor_available()) {
+    if ($self->razor_report($text)) {
+      $available = 1;
+      dbg ("SpamAssassin: spam reported to Razor.");
+      $return = 0;
+    }
+    else {
+      dbg ("SpamAssassin: could not report spam to Razor.");
+    }
+  }
+  if (!$self->{options}->{dont_report_to_spamcop} && $self->is_spamcop_available()) {
+    if ($self->spamcop_report($text)) {
+      $available = 1;
+      dbg ("SpamAssassin: spam reported to SpamCop.");
+      $return = 0;
+    }
+    else {
+      dbg ("SpamAssassin: could not report spam to SpamCop.");
+    }
+  }
 
   $self->delete_fulltext_tmpfile();
 
@@ -364,6 +376,122 @@
 
   return 1;
 }
+
+sub smtp_dbg {
+  my ($command, $smtp) = @_;
+
+  dbg("SpamCop -> sent $command");
+  my $code = $smtp->code();
+  my $message = $smtp->message();
+  my $debug;
+  $debug .= $code if $code;
+  $debug .= ($code ? " " : "") . $message if $message;
+  chomp $debug;
+  dbg("SpamCop -> received $debug");
+  return 1;
+}
+
+sub spamcop_report {
+  my ($self, $original) = @_;
+
+  # check date
+  my $header = $original;
+  $header =~ s/\r?\n\r?\n.*//s;
+  my $date = Mail::SpamAssassin::Util::receive_date($header);
+  if ($date && $date < time - 3*86400) {
+    warn ("SpamCop -> message older than 3 days, not reporting\n");
+    return 0;
+  }
+
+  # message variables
+  my $boundary = "----------=_" . sprintf("%08X.%08X",time,int(rand(2**32)));
+  while ($original =~ /^\Q${boundary}\E$/m) {
+    $boundary .= "/".sprintf("%08X",int(rand(2**32)));
+  }
+  my $description = "spam report via " . Mail::SpamAssassin::Version();
+  my $trusted = $self->{msg}->{metadata}->{relays_trusted_str};
+  my $untrusted = $self->{msg}->{metadata}->{relays_untrusted_str};
+  my $user = $self->{main}->{'username'} || 'unknown';
+  my $host = Mail::SpamAssassin::Util::fq_hostname() || 'unknown';
+  my $from = $self->{conf}->{spamcop_from_address} || "$user\@$host";
+  my $name = (Mail::SpamAssassin::Util::portable_getpwuid($>))[6] || "Unknown";
+
+  # message data
+  my %head = (
+	      'To' => $self->{conf}->{spamcop_to_address},
+	      'From' => "\"$name\" <$from>",
+	      'Subject' => 'report spam',
+	      'Date' => Mail::SpamAssassin::Util::time_to_rfc822_date(),
+	      'Message-Id' =>
+		sprintf("<%08X.%08X@%s>",time,int(rand(2**32)),$host),
+	      'MIME-Version' => '1.0',
+	      'Content-Type' => "multipart/mixed; boundary=\"$boundary\"",
+	      );
+
+  # truncate message
+  if (length($original) > 64*1024) {
+    substr($original,(64*1024)) = "\n[truncated by SpamAssassin]\n";
+  }
+
+  my $body = <<"EOM";
+This is a multi-part message in MIME format.
+
+--$boundary
+Content-Type: message/rfc822; x-spam-type=report
+Content-Description: $description
+Content-Disposition: attachment
+Content-Transfer-Encoding: 8bit
+X-Spam-Relays-Trusted: $trusted
+X-Spam-Relays-Untrusted: $untrusted
+
+$original
+--$boundary--
+
+EOM
+
+  # compose message
+  my $message;
+  while (my ($k, $v) = each %head) {
+    $message .= "$k: $v\n";
+  }
+  $message .= "\n" . $body;
+
+  # send message
+  my $failure;
+  my $mx = $head{To};
+  my $hello = Mail::SpamAssassin::Util::fq_hostname() || $from;
+  $mx =~ s/.*\@//;
+  $hello =~ s/.*\@//;
+  for my $rr (Net::DNS::mx($mx)) {
+    my $exchange = Mail::SpamAssassin::Util::untaint_hostname($rr->exchange);
+    next unless $exchange;
+    my $smtp;
+    if ($smtp = Net::SMTP->new($exchange,
+			       Hello => $hello,
+			       Port => 25, # change to 587 before 3.0.0-final
+			       Timeout => 10))
+    {
+      if ($smtp->mail($from) && smtp_dbg("FROM $from", $smtp) &&
+	  $smtp->recipient($head{To}) && smtp_dbg("TO $head{To}", $smtp) &&
+	  $smtp->data($message) && smtp_dbg("DATA", $smtp) &&
+	  $smtp->quit() && smtp_dbg("QUIT", $smtp))
+      {
+	# tell user we succeeded after first attempt if we previously failed
+	warn("SpamCop -> report to $exchange succeeded\n") if defined $failure;
+	return 1;
+      }
+      my $code = $smtp->code();
+      my $text = $smtp->message();
+      $failure = "$code $text" if ($code && $text);
+    }
+    $failure ||= "Net::SMTP error";
+    chomp $failure;
+    warn("SpamCop -> report to $exchange failed: $failure\n");
+  }
+
+  return 0;
+}
+
 ###########################################################################
 
 sub dbg { Mail::SpamAssassin::dbg (@_); }
@@ -371,10 +499,20 @@
 sub delete_fulltext_tmpfile { Mail::SpamAssassin::PerMsgStatus::delete_fulltext_tmpfile(@_) }
 
 # Use the Dns versions ...  At least something only needs 1 copy of code ...
-sub is_pyzor_available { Mail::SpamAssassin::PerMsgStatus::is_pyzor_available(@_); }
-sub is_dcc_available { Mail::SpamAssassin::PerMsgStatus::is_dcc_available(@_); }
+sub is_dcc_available {
+  Mail::SpamAssassin::PerMsgStatus::is_dcc_available(@_);
+}
+sub is_pyzor_available {
+  Mail::SpamAssassin::PerMsgStatus::is_pyzor_available(@_);
+}
 sub is_razor_available {
   Mail::SpamAssassin::PerMsgStatus::is_razor2_available(@_);
+}
+sub is_spamcop_available {
+  my ($self) = @_;
+  return (HAS_NET_DNS &&
+	  HAS_NET_SMTP &&
+	  $self->{conf}{scores}{'RCVD_IN_BL_SPAMCOP_NET'});
 }
 
 sub enter_helper_run_mode { Mail::SpamAssassin::PerMsgStatus::enter_helper_run_mode(@_); }

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm	(original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm	Thu Jul 22 20:33:38 2004
@@ -194,6 +194,25 @@
   }
 }
 
+sub untaint_hostname {
+  my ($host) = @_;
+
+  return unless defined($host);
+  return '' if ($host eq '');
+
+  # from RFC 1035, but allowing domains starting with numbers
+  my $label = q/[A-Za-z\d](?:[A-Za-z\d-]{0,61}[A-Za-z\d])?/;
+  my $domain = qq<$label(?:\.$label)*>;
+
+  if (length($host) <= 255 && $host =~ /^($domain)$/) {
+    return $1;
+  }
+  else {
+    warn "security: cannot untaint hostname: \"$host\"\n";
+    return $host;
+  }
+}
+
 # This sub takes a scalar or a reference to an array, hash, scalar or another
 # reference and recursively untaints all its values (and keys if it's a
 # reference to a hash). It should be used with caution as blindly untainting
@@ -438,6 +457,20 @@
 
   s/\=\r?\n//gs;
   s/\=([0-9a-fA-F]{2})/chr(hex($1))/ge;
+  return $_;
+}
+
+sub base64_encode {
+  local $_ = shift;
+
+  if (HAS_MIME_BASE64) {
+    return MIME::Base64::encode_base64($_);
+  }
+
+  $_ = pack("u57", $_);
+  s/^.//mg;
+  tr| -_`|A-Za-z0-9+/A|;
+  s/(A+)$/'=' x length $1/e;
   return $_;
 }
 

Modified: spamassassin/trunk/spamassassin.raw
==============================================================================
--- spamassassin/trunk/spamassassin.raw	(original)
+++ spamassassin/trunk/spamassassin.raw	Thu Jul 22 20:33:38 2004
@@ -515,39 +515,42 @@
 
 =item B<-r>, B<--report>
 
-Report this message as verified spam.  This will submit the mail message
-read from STDIN to various spam-blocker databases.  Currently, these are
-Vipul's Razor ( http://razor.sourceforge.net/ ), the Distributed Checksum
-Clearinghouse ( http://www.rhyolite.com/anti-spam/dcc/ ), and Pyzor (
-http://pyzor.sourceforge.net/ ).
-
-If the message contains SpamAssassin markup, this will be stripped out
-automatically before submission.  The support modules for DCC, Razor
-and/or Pyzor must be installed for spam to be reported to each service.
+Report this message as manually-verified spam.  This will submit the mail
+message read from STDIN to various spam-blocker databases.  Currently,
+these are the Distributed Checksum Clearinghouse
+C<http://www.rhyolite.com/anti-spam/dcc/>, Pyzor
+C<http://pyzor.sourceforge.net/>, Vipul's Razor
+C<http://razor.sourceforge.net/>, and SpamCop C<http://www.spamcop.net/>.
+
+If the message contains SpamAssassin markup, the markup will be stripped
+out automatically before submission.  The support modules for DCC, Pyzor,
+and Razor must be installed for spam to be reported to each service.
+SpamCop reports will have greater effect if you register and set the
+C<spamcop_submission_address> option.
 
 The message will also be submitted to SpamAssassin's learning systems;
-currently this is the internal Bayesian statistical-filtering system (the BAYES
-rules).   (Note that if you I<only> want to perform statistical learning, and
-do not want to report mail to a third-party server, you should use the
-C<sa-learn> command directly instead.)
+currently this is the internal Bayesian statistical-filtering system (the
+BAYES rules).  (Note that if you I<only> want to perform statistical
+learning, and do not want to report mail to third-parties, you should use
+the C<sa-learn> command directly instead.)
 
 =item B<-k>, B<--revoke>
 
 Revoke this message.  This will revoke the mail message read from STDIN from
 various spam-blocker databases.  Currently, these are Vipul's Razor.
 
-Revocation support for the Distributed Checksum Clearinghouse, and Pyzor
-is not currently available.
+Revocation support for the Distributed Checksum Clearinghouse, Pyzor, and
+SpamCop is not currently available.
 
-If the message contains SpamAssassin markup, this will be stripped out
-automatically before submission.  The support modules for Razor must be
-installed for spam to be revoked from the service.
+If the message contains SpamAssassin markup, the markup will be stripped
+out automatically before submission.  The support modules for Razor must
+be installed for spam to be revoked from the service.
 
 The message will also be submitted as 'ham' (non-spam) to SpamAssassin's
-learning systems; currently this is the internal Bayesian statistical-filtering
-system (the BAYES rules).   (Note that if you I<only> want to perform
-statistical learning, and do not want to report mail to a third-party server,
-you should use the C<sa-learn> command directly instead.)
+learning systems; currently this is the internal Bayesian
+statistical-filtering system (the BAYES rules).  (Note that if you I<only>
+want to perform statistical learning, and do not want to report mail to
+third-parties, you should use the C<sa-learn> command directly instead.)
 
 =item B<--lint>