You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/04/21 18:40:43 UTC

svn commit: r1900117 - in /spamassassin/branches/trunk-welcomelist: ./ MANIFEST lib/Mail/SpamAssassin/ArchiveIterator.pm rules/active.list t/bayesbdb.t t/bayesdbm.t t/bayessql.t t/data/nice.mbox

Author: hege
Date: Thu Apr 21 18:40:43 2022
New Revision: 1900117

URL: http://svn.apache.org/viewvc?rev=1900117&view=rev
Log:
Merge from trunk r1900116 to trunk-welcomelist

Added:
    spamassassin/branches/trunk-welcomelist/t/data/nice.mbox
      - copied unchanged from r1900116, spamassassin/trunk/t/data/nice.mbox
Modified:
    spamassassin/branches/trunk-welcomelist/   (props changed)
    spamassassin/branches/trunk-welcomelist/MANIFEST
    spamassassin/branches/trunk-welcomelist/lib/Mail/SpamAssassin/ArchiveIterator.pm
    spamassassin/branches/trunk-welcomelist/rules/active.list
    spamassassin/branches/trunk-welcomelist/t/bayesbdb.t
    spamassassin/branches/trunk-welcomelist/t/bayesdbm.t
    spamassassin/branches/trunk-welcomelist/t/bayessql.t

Propchange: spamassassin/branches/trunk-welcomelist/
------------------------------------------------------------------------------
  Merged /spamassassin/trunk:r1900096-1900116

Modified: spamassassin/branches/trunk-welcomelist/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/branches/trunk-welcomelist/MANIFEST?rev=1900117&r1=1900116&r2=1900117&view=diff
==============================================================================
--- spamassassin/branches/trunk-welcomelist/MANIFEST (original)
+++ spamassassin/branches/trunk-welcomelist/MANIFEST Thu Apr 21 18:40:43 2022
@@ -385,6 +385,7 @@ t/data/nice/spf5-received-spf-crlf
 t/data/nice/spf6-received-spf-crlf2
 t/data/nice/unicode1
 t/data/nice/unicode2
+t/data/nice.mbox
 t/data/phishing/openphish-feed.txt
 t/data/phishing/phishtank-feed.csv
 t/data/reporterplugin.pm

Modified: spamassassin/branches/trunk-welcomelist/lib/Mail/SpamAssassin/ArchiveIterator.pm
URL: http://svn.apache.org/viewvc/spamassassin/branches/trunk-welcomelist/lib/Mail/SpamAssassin/ArchiveIterator.pm?rev=1900117&r1=1900116&r2=1900117&view=diff
==============================================================================
--- spamassassin/branches/trunk-welcomelist/lib/Mail/SpamAssassin/ArchiveIterator.pm (original)
+++ spamassassin/branches/trunk-welcomelist/lib/Mail/SpamAssassin/ArchiveIterator.pm Thu Apr 21 18:40:43 2022
@@ -415,7 +415,7 @@ sub _run_mailbox {
   my @msg;
   my $header;
 
-  my $fh = $self->_mail_open($where, 1);
+  my $fh = $self->_mail_open($file, 1);
   return unless $fh;
 
   my $opt_max_size = $self->{opt_max_size};
@@ -466,7 +466,7 @@ sub _run_mbx {
   my @msg;
   my $header;
 
-  my $fh = $self->_mail_open($where, 1);
+  my $fh = $self->_mail_open($file, 1);
   return unless $fh;
 
   my $opt_max_size = $self->{opt_max_size};
@@ -1001,7 +1001,7 @@ sub _scan_mailbox {
 
   foreach my $file (@files) {
     $self->_bump_scan_progress();
-    if ($file =~ /\.(?:gz|bz2|xz)$/i) {
+    if ($file =~ /\.(?:gz|bz2|xz|lz[o4]?)$/i) {
       warn "archive-iterator: compressed mbox folders are not supported at this time\n";
       next;
     }
@@ -1129,7 +1129,7 @@ sub _scan_mbx {
   foreach my $file (@files) {
     $self->_bump_scan_progress();
 
-    if ($folder =~ /\.(?:gz|bz2|xz)$/i) {
+    if ($folder =~ /\.(?:gz|bz2|xz|lz[o4]?)$/i) {
       warn "archive-iterator: compressed mbx folders are not supported at this time\n";
       next;
     }

Modified: spamassassin/branches/trunk-welcomelist/rules/active.list
URL: http://svn.apache.org/viewvc/spamassassin/branches/trunk-welcomelist/rules/active.list?rev=1900117&r1=1900116&r2=1900117&view=diff
==============================================================================
--- spamassassin/branches/trunk-welcomelist/rules/active.list (original)
+++ spamassassin/branches/trunk-welcomelist/rules/active.list Thu Apr 21 18:40:43 2022
@@ -1,6 +1,6 @@
 # DO NOT EDIT: file generated by build/mkupdates/listpromotable
 # active ruleset list, automatically generated from https://ruleqa.spamassassin.org/
-# with results from: last-net: net-darxus net-ena-week0 net-ena-week1 net-ena-week2 net-ena-week3 net-ena-week4 net-giovanni-ham net-giovanni-spam net-giovanni-spammy net-hege net-llanga net-mmiroslaw-mails-ham net-mmiroslaw-mails-spam net-pds net-spamsponge net-thendrikx; day 1: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spamsponge thendrikx; day 2: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam spamsponge thendrikx; day 3: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spamsponge thendrikx; day 4: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spams
 ponge thendrikx; day 5: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy grenier hege jhardin llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spamsponge thendrikx
+# with results from: last-net: net-darxus net-ena-week0 net-ena-week1 net-ena-week2 net-ena-week3 net-ena-week4 net-giovanni-ham net-giovanni-spam net-giovanni-spammy net-hege net-llanga net-mmiroslaw-mails-ham net-mmiroslaw-mails-spam net-pds net-spamsponge net-thendrikx; day 1: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spamsponge thendrikx; day 2: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spamsponge thendrikx; day 3: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam spamsponge thendrikx; day 4: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spams
 ponge thendrikx; day 5: darxus ena-week0 ena-week1 ena-week2 ena-week3 ena-week4 giovanni-ham giovanni-spam giovanni-spammy hege llanga mmiroslaw-mails-ham mmiroslaw-mails-spam pds spamsponge thendrikx
 
 # tflags publish
 AC_BR_BONANZA
@@ -597,9 +597,6 @@ FROM_SUSPICIOUS_NTLD_FP
 FROM_UNBAL1
 
 # good enough
-FROM_UNBAL2
-
-# good enough
 FROM_WSP_TRAIL
 
 # tflags net
@@ -680,9 +677,6 @@ FUZZY_UNSUBSCRIBE
 # tflags publish
 FUZZY_WALLET
 
-# good enough
-GAPPY_GENITALIA
-
 # tflags publish
 GAPPY_SALES_LEADS_FREEM
 
@@ -1110,6 +1104,9 @@ PDS_BTC_NTLD
 PDS_DBL_URL_TNB_RUNON
 
 # tflags net
+PDS_FROM_2_EMAILS
+
+# tflags net
 PDS_HELO_SPF_FAIL
 
 # good enough
@@ -1577,9 +1574,6 @@ STATIC_XPRIO_OLE
 # tflags publish
 STOCK_TIP
 
-# good enough
-STY_INVIS_DIRECT
-
 # tflags userconf
 SUBJECT_IN_BLACKLIST
 
@@ -1619,10 +1613,10 @@ TARINGANET_IMG_NOT_RCVD_TN
 # tflags publish
 TEQF_USR_IMAGE
 
-# tflags net
+# tflags publish
 TEQF_USR_MSGID_HEX
 
-# tflags net
+# tflags publish
 TEQF_USR_MSGID_MALF
 
 # tflags publish
@@ -1691,6 +1685,9 @@ TVD_PH_7
 # good enough
 TVD_PH_BODY_META
 
+# good enough
+TVD_RCVD_SPACE_BRACKET
+
 # tflags net
 TVD_SPACE_ENCODED
 

Modified: spamassassin/branches/trunk-welcomelist/t/bayesbdb.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/trunk-welcomelist/t/bayesbdb.t?rev=1900117&r1=1900116&r2=1900117&view=diff
==============================================================================
--- spamassassin/branches/trunk-welcomelist/t/bayesbdb.t (original)
+++ spamassassin/branches/trunk-welcomelist/t/bayesbdb.t Thu Apr 21 18:40:43 2022
@@ -1,6 +1,6 @@
 #!/usr/bin/perl -T
 
-use Data::Dumper;
+use File::Find qw(find);
 use lib '.'; use lib 't';
 use SATest; sa_t_init("bayesbdb");
 
@@ -16,7 +16,7 @@ plan skip_all => "BerkeleyDB is unavaila
   plan skip_all => "BerkeleyDB >= 4.6 is required" unless $BerkeleyDB::db_version >= 4.6;
 }
 
-plan tests => 42;
+plan tests => 46;
 
 
 tstprefs ("
@@ -155,15 +155,26 @@ tstprefs ("
 # our own checking callback and keep using the existing ok_all_patterns call
 %patterns = ( 1 => 'Acted on message' );
 
+$wanted_examined = count_files("data/spam");
 ok(salearnrun("--spam data/spam", \&check_examined));
 ok_all_patterns();
 
+$wanted_examined = count_files("data/nice");
 ok(salearnrun("--ham data/nice", \&check_examined));
 ok_all_patterns();
 
+$wanted_examined = count_files("data/welcomelists");
 ok(salearnrun("--ham data/welcomelists", \&check_examined));
 ok_all_patterns();
 
+$wanted_examined = 3;
+ok(salearnrun("--ham --mbox data/nice.mbox", \&check_examined));
+ok_all_patterns();
+
+$wanted_examined = 3;
+ok(salearnrun("--ham --mbox < data/nice.mbox", \&check_examined));
+ok_all_patterns();
+
 %patterns = ( 'non-token data: bayes db version' => 'db version' );
 ok(salearnrun("--dump magic", \&patterns_run_cb));
 ok_all_patterns();
@@ -243,7 +254,17 @@ sub check_examined {
     $_ = join ('', <IN>);
   }
 
-  if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \(\d+ message\(s\) examined\)/) {
-    $found{'Acted on message'}++;
+  if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \((\d+) message\(s\) examined\)/) {
+    #print STDERR "examined $1 messages\n";
+    if (defined $wanted_examined && $wanted_examined == $1) {
+      $found{'Acted on message'}++;
+    }
   }
 }
+
+sub count_files {
+  my $cnt = 0;
+  find({wanted => sub { $cnt++ if -f $_; }, no_chdir => 1}, $_[0]);
+  return $cnt;
+}
+

Modified: spamassassin/branches/trunk-welcomelist/t/bayesdbm.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/trunk-welcomelist/t/bayesdbm.t?rev=1900117&r1=1900116&r2=1900117&view=diff
==============================================================================
--- spamassassin/branches/trunk-welcomelist/t/bayesdbm.t (original)
+++ spamassassin/branches/trunk-welcomelist/t/bayesdbm.t Thu Apr 21 18:40:43 2022
@@ -1,6 +1,6 @@
 #!/usr/bin/perl -T
 
-use Data::Dumper;
+use File::Find qw(find);
 use lib '.'; use lib 't';
 use SATest; sa_t_init("bayesdbm");
 
@@ -10,7 +10,7 @@ use Test::More;
 
 plan skip_all => "Long running tests disabled" unless conf_bool('run_long_tests');
 plan skip_all => "DB_File is unavailable" unless HAS_DB_FILE;
-plan tests => 48;
+plan tests => 52;
 
 tstprefs ("
   bayes_learn_to_journal 0
@@ -175,15 +175,26 @@ tstprefs ("
 # our own checking callback and keep using the existing ok_all_patterns call
 %patterns = ( 1 => 'Acted on message' );
 
+$wanted_examined = count_files("data/spam");
 ok(salearnrun("--spam data/spam", \&check_examined));
 ok_all_patterns();
 
+$wanted_examined = count_files("data/nice");
 ok(salearnrun("--ham data/nice", \&check_examined));
 ok_all_patterns();
 
+$wanted_examined = count_files("data/welcomelists");
 ok(salearnrun("--ham data/welcomelists", \&check_examined));
 ok_all_patterns();
 
+$wanted_examined = 3;
+ok(salearnrun("--ham --mbox data/nice.mbox", \&check_examined));
+ok_all_patterns();
+
+$wanted_examined = 3;
+ok(salearnrun("--ham --mbox < data/nice.mbox", \&check_examined));
+ok_all_patterns();
+
 %patterns = ( 'non-token data: bayes db version' => 'db version' );
 ok(salearnrun("--dump magic", \&patterns_run_cb));
 ok_all_patterns();
@@ -263,9 +274,17 @@ sub check_examined {
     $_ = join ('', <IN>);
   }
 
-  if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \(\d+ message\(s\) examined\)/) {
-    $found{'Acted on message'}++;
+  if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \((\d+) message\(s\) examined\)/) {
+    #print STDERR "examined $1 messages\n";
+    if (defined $wanted_examined && $wanted_examined == $1) {
+      $found{'Acted on message'}++;
+    }
   }
 }
 
+sub count_files {
+  my $cnt = 0;
+  find({wanted => sub { $cnt++ if -f $_; }, no_chdir => 1}, $_[0]);
+  return $cnt;
+}
 

Modified: spamassassin/branches/trunk-welcomelist/t/bayessql.t
URL: http://svn.apache.org/viewvc/spamassassin/branches/trunk-welcomelist/t/bayessql.t?rev=1900117&r1=1900116&r2=1900117&view=diff
==============================================================================
--- spamassassin/branches/trunk-welcomelist/t/bayessql.t (original)
+++ spamassassin/branches/trunk-welcomelist/t/bayessql.t Thu Apr 21 18:40:43 2022
@@ -1,5 +1,6 @@
 #!/usr/bin/perl -T
 
+use File::Find qw(find);
 use lib '.'; use lib 't';
 use SATest; sa_t_init("bayessql");
 
@@ -14,8 +15,8 @@ plan skip_all => "DBI is unavailable on
 plan skip_all => "Bayes SQL tests are disabled or DBD::SQLite not found" unless (SQLITE || SQL);
 
 my $tests = 0;
-$tests += 53 if (SQLITE);
-$tests += 53 if (SQL);
+$tests += 57 if (SQLITE);
+$tests += 57 if (SQL);
 plan tests => $tests;
 
 diag "Note: Failure may be due to an incorrect config.";
@@ -339,15 +340,26 @@ tstprefs ("
 # our own checking callback and keep using the existing ok_all_patterns call
 %patterns = ( 1 => 'Acted on message' );
 
+$wanted_examined = count_files("data/spam");
 ok(salearnrun("--spam data/spam", \&check_examined));
 ok_all_patterns();
 
+$wanted_examined = count_files("data/nice");
 ok(salearnrun("--ham data/nice", \&check_examined));
 ok_all_patterns();
 
+$wanted_examined = count_files("data/welcomelists");
 ok(salearnrun("--ham data/welcomelists", \&check_examined));
 ok_all_patterns();
 
+$wanted_examined = 3;
+ok(salearnrun("--ham --mbox data/nice.mbox", \&check_examined));
+ok_all_patterns();
+
+$wanted_examined = 3;
+ok(salearnrun("--ham --mbox < data/nice.mbox", \&check_examined));
+ok_all_patterns();
+
 %patterns = ( 'non-token data: bayes db version' => 'db version' );
 ok(salearnrun("--dump magic", \&patterns_run_cb));
 ok_all_patterns();
@@ -450,11 +462,20 @@ sub check_examined {
     $_ = join ('', <IN>);
   }
 
-  if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \(\d+ message\(s\) examined\)/) {
-    $found{'Acted on message'}++;
+  if ($_ =~ /(?:Forgot|Learned) tokens from \d+ message\(s\) \((\d+) message\(s\) examined\)/) {
+    #print STDERR "examined $1 messages\n";
+    if (defined $wanted_examined && $wanted_examined == $1) {
+      $found{'Acted on message'}++;
+    }
   }
 }
 
+sub count_files {
+  my $cnt = 0;
+  find({wanted => sub { $cnt++ if -f $_; }, no_chdir => 1}, $_[0]);
+  return $cnt;
+}
+
 # WARNING! Do not use this as an example, this breaks abstraction
 # and is here strictly to help the regression tests.
 sub database_clear_p {