You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by qu...@apache.org on 2005/05/04 05:39:33 UTC
svn commit: r168067 - in /spamassassin/trunk: MANIFEST
lib/Mail/SpamAssassin/Util.pm t/uri_text.t
Author: quinlan
Date: Tue May 3 20:39:32 2005
New Revision: 168067
URL: http://svn.apache.org/viewcvs?rev=168067&view=rev
Log:
new t test for grabbing URIs from text
Added:
spamassassin/trunk/t/uri_text.t (with props)
Modified:
spamassassin/trunk/MANIFEST
spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/MANIFEST?rev=168067&r1=168066&r2=168067&view=diff
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Tue May 3 20:39:32 2005
@@ -413,6 +413,7 @@
t/stripmarkup.t
t/test_dir
t/uri.t
+t/uri_text.t
t/utf8.t
t/whitelist_addrs.t
t/whitelist_from.t
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm?rev=168067&r1=168066&r2=168067&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Util.pm Tue May 3 20:39:32 2005
@@ -950,10 +950,14 @@
}
# www.foo.biz -> http://www.foo.biz
- # unschemed URI? assume a default of "http://" as most
- # HTML-displaying MUAs would
+ # unschemed URIs: assume default of "http://" as most MUAs do
if ($nuri !~ /^[-_a-z0-9]+:/i) {
- $nuri =~ s/^/http:\/\//g;
+ if ($nuri =~ /^ftp\./) {
+ $nuri =~ s/^/ftp:\/\//g;
+ }
+ else {
+ $nuri =~ s/^/http:\/\//g;
+ }
}
# http://www.foo.biz?id=3 -> http://www.foo.biz/?id=3
Added: spamassassin/trunk/t/uri_text.t
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/t/uri_text.t?rev=168067&view=auto
==============================================================================
--- spamassassin/trunk/t/uri_text.t (added)
+++ spamassassin/trunk/t/uri_text.t Tue May 3 20:39:32 2005
@@ -0,0 +1,147 @@
+#!/usr/bin/perl -w
+
+# test URIs as grabbed from text/plain messages
+
+BEGIN {
+ if (-e 't/test_dir') { # if we are running "t/rule_names.t", kluge around ...
+ chdir 't';
+ }
+
+ if (-e 'test_dir') { # running from test directory, not ..
+ unshift(@INC, '../blib/lib');
+ }
+}
+
+my $prefix = '.';
+if (-e 'test_dir') { # running from test directory, not ..
+ $prefix = '..';
+}
+
+use strict;
+use SATest; sa_t_init("uri_text");
+use Test;
+use Mail::SpamAssassin;
+use IO::File;
+use vars qw(%patterns %anti_patterns);
+
+# settings
+plan tests => 2;
+
+# initialize SpamAssassin
+my $sa = Mail::SpamAssassin->new({
+ rules_filename => "$prefix/t/log/test_rules_copy",
+ site_rules_filename => "$prefix/t/log/test_default.cf",
+ userprefs_filename => "$prefix/masses/spamassassin/user_prefs",
+ local_tests_only => 1,
+ debug => 0,
+ dont_copy_prefs => 1,
+});
+$sa->init(0); # parse rules
+
+# load tests and write mail
+my $mail = 'log/uri_text.eml';
+%patterns = ();
+%anti_patterns = ();
+write_mail();
+
+# test message
+my $fh = IO::File->new_tmpfile();
+open(STDERR, ">&=".fileno($fh)) || die "Cannot reopen STDERR";
+ok(sarun("-t --debug=uri < log/uri_text.eml"));
+seek($fh, 0, 0);
+my $error = do {
+ local $/;
+ <$fh>;
+};
+
+# run patterns and anti-patterns
+my $failures = 0;
+for my $pattern (keys %patterns) {
+ if ($error !~ /\Q${pattern}\E/) {
+ print "did not find $pattern\n";
+ $failures++;
+ }
+}
+for my $anti_pattern (keys %anti_patterns) {
+ if ($error =~ /\Q${anti_pattern}\E/) {
+ print "did find $anti_pattern\n";
+ $failures++;
+ }
+}
+ok(!$failures);
+
+# function to write test email
+sub write_mail {
+ if (open(MAIL, ">$mail")) {
+ print MAIL <<'EOF';
+Message-ID: <cl...@example.com>
+Date: Mon, 07 Oct 2002 09:00:00 +0000
+From: Sender <se...@example.com>
+MIME-Version: 1.0
+To: Recipient <re...@example.com>
+Subject: this is a trivial message
+Content-Type: text/plain
+Content-Transfer-Encoding: 7bit
+
+EOF
+ while (<DATA>) {
+ if (/^(.*?)\t+(.*?)\s*$/) {
+ my $string = $1;
+ my @patterns = split(' ', $2);
+ if ($string && @patterns) {
+ print MAIL "$string\n";
+ for my $pattern (@patterns) {
+ if ($pattern =~ /^\!(.*)/) {
+ $anti_patterns{$1} = 1;
+ }
+ else {
+ $patterns{$pattern} = 1;
+ }
+ }
+ }
+ }
+ }
+ close(MAIL);
+ }
+ else {
+ die "can't open output file: $!";
+ }
+}
+
+# <line> : <string><tabs><matches>
+# <string> : string in the body
+# <tabs> : one or more tabs
+# <matches> : patterns expected to be found in URI output, if preceded by ! if
+# it is an antipattern, each pattern is separated by whitespace
+__DATA__
+www5.poh6feib.com poh6feib
+vau6yaer.com vau6yaer
+www5.poh6feib.info poh6feib
+Haegh3de.co.uk Haegh3de
+
+ftp.yeinaix3.co.uk ftp://ftp.yeinaix3.co.uk !http://ftp.yeinaix3.co.uk
+ftp5.riexai5r.co.uk http://ftp5.riexai5r.co.uk !ftp://ftp5.riexai5r.co.uk
+
+10.1.1.1 !10.1.1.1
+10.1.2.1/ !10.1.2.1
+http://10.1.3.1/ 10.1.3.1
+
+quau0wig.quau0wig !quau0wig
+foo.Cahl1goo.php !Cahl1goo
+www5.mi1coozu.php !mi1coozu
+www.mezeel0P.php !mezeel0P
+bar.neih6fee.com.php !neih6fee
+www.zai6Vuwi.com.bar !zai6Vuwi
+
+=www.deiJ1pha.com www.deiJ1pha.com
+@www.Te0xohxu.com www.Te0xohxu.com
+.www.kuiH5sai.com www.kuiH5sai.com
+
+a=www.zaiNgoo7.com www.zaiNgoo7.com
+b@www.vohWais0.com mailto:b@www.vohWais0.com !http://www.vohWais0.com
+c.www.moSaoga8.com www.moSaoga8.com
+
+foo @ cae8kaip.com mailto:foo@cae8kaip.com
+xyz..geifoza0.com !geifoza0
+
+joe@koja3fui.koja3fui !koja3fui
Propchange: spamassassin/trunk/t/uri_text.t
------------------------------------------------------------------------------
svn:executable = *