You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/04/21 18:40:43 UTC
svn commit: r1900117 - in /spamassassin/branches/trunk-welcomelist: ./ MANIFEST lib/Mail/SpamAssassin/ArchiveIterator.pm rules/active.list t/bayesbdb.t t/bayesdbm.t t/bayessql.t t/data/nice.mbox
Author: hege
Date: Thu Apr 21 18:40:43 2022
New Revision: 1900117
URL: http://svn.apache.org/viewvc?rev=1900117&view=rev
Log:
Merge from trunk r1900116 to trunk-welcomelist
Added:
spamassassin/branches/trunk-welcomelist/t/data/nice.mbox
- copied unchanged from r1900116, spamassassin/trunk/t/data/nice.mbox
Modified:
spamassassin/branches/trunk-welcomelist/ (props changed)
spamassassin/branches/trunk-welcomelist/MANIFEST
spamassassin/branches/trunk-welcomelist/lib/Mail/SpamAssassin/ArchiveIterator.pm
spamassassin/branches/trunk-welcomelist/rules/active.list
spamassassin/branches/trunk-welcomelist/t/bayesbdb.t
spamassassin/branches/trunk-welcomelist/t/bayesdbm.t
spamassassin/branches/trunk-welcomelist/t/bayessql.t
Propchange: spamassassin/branches/trunk-welcomelist/
------------------------------------------------------------------------------
Merged /spamassassin/trunk:r1900096-1900116
Modified: spamassassin/branches/trunk-welcomelist/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/trunk-welcomelist/MANIFEST?rev=1900117&r1=1900116&r2=1900117&view=diff
==============================================================================
--- spamassassin/branches/trunk-welcomelist/MANIFEST (original)
+++ spamassassin/branches/trunk-welcomelist/MANIFEST Thu Apr 21 18:40:43 2022
@@ -385,6 +385,7 @@ t/data/nice/spf5-received-spf-crlf
t/data/nice/spf6-received-spf-crlf2
t/data/nice/unicode1
t/data/nice/unicode2
+t/data/nice.mbox
t/data/phishing/openphish-feed.txt
t/data/phishing/phishtank-feed.csv
t/data/reporterplugin.pm
Modified: spamassassin/branches/trunk-welcomelist/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/trunk-welcomelist/lib/Mail/SpamAssassin/ArchiveIterator.pm?rev=1900117&r1=1900116&r2=1900117&view=diff
==============================================================================
--- spamassassin/branches/trunk-welcomelist/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/branches/trunk-welcomelist/lib/Mail/SpamAssassin/ArchiveIterator.pm Thu Apr 21 18:40:43 2022
@@ -415,7 +415,7 @@ sub _run_mailbox {
my @msg;
my $header;
- my $fh = $self->_mail_open($where, 1);
+ my $fh = $self->_mail_open($file, 1);
return unless $fh;
my $opt_max_size = $self->{opt_max_size};
@@ -466,7 +466,7 @@ sub _run_mbx {
my @msg;
my $header;
- my $fh = $self->_mail_open($where, 1);
+ my $fh = $self->_mail_open($file, 1);
return unless $fh;
my $opt_max_size = $self->{opt_max_size};
@@ -1001,7 +1001,7 @@ sub _scan_mailbox {
foreach my $file (@files) {
$self->_bump_scan_progress();
- if ($file =~ /\.(?:gz|bz2|xz)$/i) {
+ if ($file =~ /\.(?:gz|bz2|xz|lz[o4]?)$/i) {
warn "archive-iterator: compressed mbox folders are not supported at this time\n";
next;
}
@@ -1129,7 +1129,7 @@ sub _scan_mbx {
foreach my $file (@files) {
$self->_bump_scan_progress();
- if ($folder =~ /\.(?:gz|bz2|xz)$/i) {
+ if ($folder =~ /\.(?:gz|bz2|xz|lz[o4]?)$/i) {
warn "archive-iterator: compressed mbx folders are not supported at this time\n";
next;
}
Modified: spamassassin/branches/trunk-welcomelist/rules/active.list
URL: http://svn.apache.org/viewvc/spamassassin/branches/trunk-welcomelist/rules/active.list?rev=1900117&r1=1900116&r2=1900117&view=diff
==============================================================================
--- spamassassin/branches/trunk-welcomelist/rules/active.list (original)
+++ spamassassin/branches/trunk-welcomelist/rules/active.list Thu Apr 21 18:40:43 2022
@@ -1,6 +1,6 @@
# DO NOT EDIT: file generated by build/mkupdates/listpromotable
# active ruleset list, automatically generated from https://ruleqa.spamassassin.org/
-# with results from: last-net: net-darxus net-ena-week0 net-ena-week1 net-ena-week2 net-ena-week3 net-ena-week4 net-giovanni-ham net-giovanni-spam net-giovanni-spammy net-hege net-llanga net-mmiroslaw-mails-ham net-mmiroslaw-mails-spam net-pds net-spamsponge net-thendrikx; day 1: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spamsponge thendrikx; day 2: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam spamsponge thendrikx; day 3: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spamsponge thendrikx; day 4: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spams
ponge thendrikx; day 5: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy grenier hege jhardin llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spamsponge thendrikx
+# with results from: last-net: net-darxus net-ena-week0 net-ena-week1 net-ena-week2 net-ena-week3 net-ena-week4 net-giovanni-ham net-giovanni-spam net-giovanni-spammy net-hege net-llanga net-mmiroslaw-mails-ham net-mmiroslaw-mails-spam net-pds net-spamsponge net-thendrikx; day 1: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spamsponge thendrikx; day 2: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spamsponge thendrikx; day 3: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam spamsponge thendrikx; day 4: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spams
ponge thendrikx; day 5: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spamsponge thendrikx
# tflags publish
AC_BR_BONANZA
@@ -597,9 +597,6 @@ FROM_SUSPICIOUS_NTLD_FP
FROM_UNBAL1
# good enough
-FROM_UNBAL2
-
-# good enough
FROM_WSP_TRAIL
# tflags net
@@ -680,9 +677,6 @@ FUZZY_UNSUBSCRIBE
# tflags publish
FUZZY_WALLET
-# good enough
-GAPPY_GENITALIA
-
# tflags publish
GAPPY_SALES_LEADS_FREEM
@@ -1110,6 +1104,9 @@ PDS_BTC_NTLD
PDS_DBL_URL_TNB_RUNON
# tflags net
+PDS_FROM_2_EMAILS
+
+# tflags net
PDS_HELO_SPF_FAIL
# good enough
@@ -1577,9 +1574,6 @@ STATIC_XPRIO_OLE
# tflags publish
STOCK_TIP
-# good enough
-STY_INVIS_DIRECT
-
# tflags userconf
SUBJECT_IN_BLACKLIST
@@ -1619,10 +1613,10 @@ TARINGANET_IMG_NOT_RCVD_TN
# tflags publish
TEQF_USR_IMAGE
-# tflags net
+# tflags publish
TEQF_USR_MSGID_HEX
-# tflags net
+# tflags publish
TEQF_USR_MSGID_MALF
# tflags publish
@@ -1691,6 +1685,9 @@ TVD_PH_7
# good enough
TVD_PH_BODY_META
+# good enough
+TVD_RCVD_SPACE_BRACKET
+
# tflags net
TVD_SPACE_ENCODED
Modified: spamassassin/branches/trunk-welcomelist/t/bayesbdb.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/trunk-welcomelist/t/bayesbdb.t?rev=1900117&r1=1900116&r2=1900117&view=diff
==============================================================================
--- spamassassin/branches/trunk-welcomelist/t/bayesbdb.t (original)
+++ spamassassin/branches/trunk-welcomelist/t/bayesbdb.t Thu Apr 21 18:40:43 2022
@@ -1,6 +1,6 @@
#!/usr/bin/perl -T
-use Data::Dumper;
+use File::Find qw(find);
use lib '.'; use lib 't';
use SATest; sa_t_init("bayesbdb");
@@ -16,7 +16,7 @@ plan skip_all => "BerkeleyDB is unavaila
plan skip_all => "BerkeleyDB >= 4.6 is required" unless $BerkeleyDB::db_version >= 4.6;
}
-plan tests => 42;
+plan tests => 46;
tstprefs ("
@@ -155,15 +155,26 @@ tstprefs ("
# our own checking callback and keep using the existing ok_all_patterns call
%patterns = ( 1 => 'Acted on message' );
+$wanted_examined = count_files("data/spam");
ok(salearnrun("--spam data/spam", \&check_examined));
ok_all_patterns();
+$wanted_examined = count_files("data/nice");
ok(salearnrun("--ham data/nice", \&check_examined));
ok_all_patterns();
+$wanted_examined = count_files("data/welcomelists");
ok(salearnrun("--ham data/welcomelists", \&check_examined));
ok_all_patterns();
+$wanted_examined = 3;
+ok(salearnrun("--ham --mbox data/nice.mbox", \&check_examined));
+ok_all_patterns();
+
+$wanted_examined = 3;
+ok(salearnrun("--ham --mbox < data/nice.mbox", \&check_examined));
+ok_all_patterns();
+
%patterns = ( 'non-token data: bayes db version' => 'db version' );
ok(salearnrun("--dump magic", \&patterns_run_cb));
ok_all_patterns();
@@ -243,7 +254,17 @@ sub check_examined {
$_ = join ('', <IN>);
}
- if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \(\d+ message\(s\) examined\)/) {
- $found{'Acted on message'}++;
+ if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \((\d+) message\(s\) examined\)/) {
+ #print STDERR "examined $1 messages\n";
+ if (defined $wanted_examined && $wanted_examined == $1) {
+ $found{'Acted on message'}++;
+ }
}
}
+
+sub count_files {
+ my $cnt = 0;
+ find({wanted => sub { $cnt++ if -f $_; }, no_chdir => 1}, $_[0]);
+ return $cnt;
+}
+
Modified: spamassassin/branches/trunk-welcomelist/t/bayesdbm.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/trunk-welcomelist/t/bayesdbm.t?rev=1900117&r1=1900116&r2=1900117&view=diff
==============================================================================
--- spamassassin/branches/trunk-welcomelist/t/bayesdbm.t (original)
+++ spamassassin/branches/trunk-welcomelist/t/bayesdbm.t Thu Apr 21 18:40:43 2022
@@ -1,6 +1,6 @@
#!/usr/bin/perl -T
-use Data::Dumper;
+use File::Find qw(find);
use lib '.'; use lib 't';
use SATest; sa_t_init("bayesdbm");
@@ -10,7 +10,7 @@ use Test::More;
plan skip_all => "Long running tests disabled" unless conf_bool('run_long_tests');
plan skip_all => "DB_File is unavailable" unless HAS_DB_FILE;
-plan tests => 48;
+plan tests => 52;
tstprefs ("
bayes_learn_to_journal 0
@@ -175,15 +175,26 @@ tstprefs ("
# our own checking callback and keep using the existing ok_all_patterns call
%patterns = ( 1 => 'Acted on message' );
+$wanted_examined = count_files("data/spam");
ok(salearnrun("--spam data/spam", \&check_examined));
ok_all_patterns();
+$wanted_examined = count_files("data/nice");
ok(salearnrun("--ham data/nice", \&check_examined));
ok_all_patterns();
+$wanted_examined = count_files("data/welcomelists");
ok(salearnrun("--ham data/welcomelists", \&check_examined));
ok_all_patterns();
+$wanted_examined = 3;
+ok(salearnrun("--ham --mbox data/nice.mbox", \&check_examined));
+ok_all_patterns();
+
+$wanted_examined = 3;
+ok(salearnrun("--ham --mbox < data/nice.mbox", \&check_examined));
+ok_all_patterns();
+
%patterns = ( 'non-token data: bayes db version' => 'db version' );
ok(salearnrun("--dump magic", \&patterns_run_cb));
ok_all_patterns();
@@ -263,9 +274,17 @@ sub check_examined {
$_ = join ('', <IN>);
}
- if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \(\d+ message\(s\) examined\)/) {
- $found{'Acted on message'}++;
+ if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \((\d+) message\(s\) examined\)/) {
+ #print STDERR "examined $1 messages\n";
+ if (defined $wanted_examined && $wanted_examined == $1) {
+ $found{'Acted on message'}++;
+ }
}
}
+sub count_files {
+ my $cnt = 0;
+ find({wanted => sub { $cnt++ if -f $_; }, no_chdir => 1}, $_[0]);
+ return $cnt;
+}
Modified: spamassassin/branches/trunk-welcomelist/t/bayessql.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/trunk-welcomelist/t/bayessql.t?rev=1900117&r1=1900116&r2=1900117&view=diff
==============================================================================
--- spamassassin/branches/trunk-welcomelist/t/bayessql.t (original)
+++ spamassassin/branches/trunk-welcomelist/t/bayessql.t Thu Apr 21 18:40:43 2022
@@ -1,5 +1,6 @@
#!/usr/bin/perl -T
+use File::Find qw(find);
use lib '.'; use lib 't';
use SATest; sa_t_init("bayessql");
@@ -14,8 +15,8 @@ plan skip_all => "DBI is unavailable on
plan skip_all => "Bayes SQL tests are disabled or DBD::SQLite not found" unless (SQLITE || SQL);
my $tests = 0;
-$tests += 53 if (SQLITE);
-$tests += 53 if (SQL);
+$tests += 57 if (SQLITE);
+$tests += 57 if (SQL);
plan tests => $tests;
diag "Note: Failure may be due to an incorrect config.";
@@ -339,15 +340,26 @@ tstprefs ("
# our own checking callback and keep using the existing ok_all_patterns call
%patterns = ( 1 => 'Acted on message' );
+$wanted_examined = count_files("data/spam");
ok(salearnrun("--spam data/spam", \&check_examined));
ok_all_patterns();
+$wanted_examined = count_files("data/nice");
ok(salearnrun("--ham data/nice", \&check_examined));
ok_all_patterns();
+$wanted_examined = count_files("data/welcomelists");
ok(salearnrun("--ham data/welcomelists", \&check_examined));
ok_all_patterns();
+$wanted_examined = 3;
+ok(salearnrun("--ham --mbox data/nice.mbox", \&check_examined));
+ok_all_patterns();
+
+$wanted_examined = 3;
+ok(salearnrun("--ham --mbox < data/nice.mbox", \&check_examined));
+ok_all_patterns();
+
%patterns = ( 'non-token data: bayes db version' => 'db version' );
ok(salearnrun("--dump magic", \&patterns_run_cb));
ok_all_patterns();
@@ -450,11 +462,20 @@ sub check_examined {
$_ = join ('', <IN>);
}
- if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \(\d+ message\(s\) examined\)/) {
- $found{'Acted on message'}++;
+ if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \((\d+) message\(s\) examined\)/) {
+ #print STDERR "examined $1 messages\n";
+ if (defined $wanted_examined && $wanted_examined == $1) {
+ $found{'Acted on message'}++;
+ }
}
}
+sub count_files {
+ my $cnt = 0;
+ find({wanted => sub { $cnt++ if -f $_; }, no_chdir => 1}, $_[0]);
+ return $cnt;
+}
+
# WARNING! Do not use this as an example, this breaks abstraction
# and is here strictly to help the regression tests.
sub database_clear_p {