You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/04/03 07:34:21 UTC

svn commit: r1899525 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Conf.pm PerMsgStatus.pm

Author: hege
Date: Sun Apr  3 07:34:21 2022
New Revision: 1899525

URL: http://svn.apache.org/viewvc?rev=1899525&view=rev
Log:
Bug 7905/7906: Rewrote autolearn logic. Meta points are now split between head/body, according to how many head/body rules it depends on (not recursive, just first deps are checked). If there are no head/body deps, nothing is added. No discrimination of network rules anymore.

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?rev=1899525&r1=1899524&r2=1899525&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Sun Apr  3 07:34:21 2022
@@ -5187,56 +5187,9 @@ sub get_description_for_rule {
 
 ###########################################################################
 
-sub maybe_header_only {
-  my($self,$rulename) = @_;
-  my $type = $self->{test_types}->{$rulename};
-
-  if (index($rulename, 'AUTOLEARNTEST') == 0) {
-    dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}.");
-  }
- 
-  return 0 if (!defined ($type));
-
-  if (($type == $TYPE_HEAD_TESTS) || ($type == $TYPE_HEAD_EVALS)) {
-    return 1;
-
-  } elsif ($type == $TYPE_META_TESTS) {
-    if (($self->{tflags}->{$rulename}||'') =~ /\bnet\b/) {
-      return 0;
-    } else {
-      return 1;
-    }
-  }
-
-  return 0;
-}
-
-sub maybe_body_only {
-  my($self,$rulename) = @_;
-  my $type = $self->{test_types}->{$rulename};
-
-  if (index($rulename, 'AUTOLEARNTEST') == 0) {
-    dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}.");
-  }
-
-  return 0 if (!defined ($type));
-
-  if (($type == $TYPE_BODY_TESTS) || ($type == $TYPE_BODY_EVALS)
-        || ($type == $TYPE_URI_TESTS) || ($type == $TYPE_URI_EVALS))
-  {
-    # some rawbody go off of headers...
-    return 1;
-
-  } elsif ($type == $TYPE_META_TESTS) {
-    if (($self->{tflags}->{$rulename}||'') =~ /\bnet\b/) {
-      return 0;
-    } else {
-      return 1;
-    }
-  }
-
-  return 0;
-}
+# Deprecated since Bug 7905/7906
+sub maybe_header_only { warn "Deprecated Conf::maybe_header_only() called"; }
+sub maybe_body_only { warn "Deprecated Conf::maybe_body_only() called"; }
 
 ###########################################################################
 

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1899525&r1=1899524&r2=1899525&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Sun Apr  3 07:34:21 2022
@@ -724,6 +724,21 @@ sub get_autolearn_force_names {
   return $names;
 }
 
+sub _get_autolearn_testtype {
+  my ($self, $test) = @_;
+  return '' unless defined $test;
+  return 'head' if $test == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS
+                || $test == $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS;
+  return 'body' if $test == $Mail::SpamAssassin::Conf::TYPE_BODY_TESTS
+                || $test == $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
+                || $test == $Mail::SpamAssassin::Conf::TYPE_RAWBODY_TESTS
+                || $test == $Mail::SpamAssassin::Conf::TYPE_RAWBODY_EVALS
+                || $test == $Mail::SpamAssassin::Conf::TYPE_URI_TESTS
+                || $test == $Mail::SpamAssassin::Conf::TYPE_URI_EVALS;
+  return 'meta' if $test == $Mail::SpamAssassin::Conf::TYPE_META_TESTS;
+  return '';
+}
+
 sub _get_autolearn_points {
   my ($self) = @_;
 
@@ -731,11 +746,13 @@ sub _get_autolearn_points {
   # ensure it only gets computed once, even if we return early
   $self->{autolearn_points} = 0;
 
+  my $conf = $self->{conf};
+
   # This function needs to use use sum($score[scoreset % 2]) not just {score}.
   # otherwise we shift what we autolearn on and it gets really weird.  - tvd
-  my $orig_scoreset = $self->{conf}->get_score_set();
+  my $orig_scoreset = $conf->get_score_set();
   my $new_scoreset = $orig_scoreset;
-  my $scores = $self->{conf}->{scores};
+  my $scores = $conf->{scores};
 
   if (($orig_scoreset & 2) == 0) { # we don't need to recompute
     dbg("learn: auto-learn: currently using scoreset $orig_scoreset");
@@ -743,10 +760,10 @@ sub _get_autolearn_points {
   else {
     $new_scoreset = $orig_scoreset & ~2;
     dbg("learn: auto-learn: currently using scoreset $orig_scoreset, recomputing score based on scoreset $new_scoreset");
-    $scores = $self->{conf}->{scoreset}->[$new_scoreset];
+    $scores = $conf->{scoreset}->[$new_scoreset];
   }
 
-  my $tflags = $self->{conf}->{tflags};
+  my $tflags = $conf->{tflags};
   my $points = 0;
 
   # Just in case this function is called multiple times, clear out the
@@ -767,7 +784,7 @@ sub _get_autolearn_points {
       # Use the original scoreset since it'll be 0 in sets 0 and 1.
       if ($tflags->{$test} =~ /\blearn\b/) {
 	# we're guaranteed that the score will be defined
-        $self->{learned_points} += $self->{conf}->{scoreset}->[$orig_scoreset]->{$test};
+        $self->{learned_points} += $conf->{scoreset}->[$orig_scoreset]->{$test};
 	next;
       }
 
@@ -784,14 +801,41 @@ sub _get_autolearn_points {
 
     # Go ahead and add points to the proper locations
     # Changed logic because in testing, I was getting both head and body. Bug 5503
-    if ($self->{conf}->maybe_header_only ($test)) {
+    # Cleanup logic, Bug 7905/7906
+    my $type = $self->_get_autolearn_testtype($conf->{test_types}->{$test});
+    if ($type eq 'head') {
       $self->{head_only_points} += $scores->{$test};
-      dbg("learn: auto-learn: adding head_only points $scores->{$test}");
-    } elsif ($self->{conf}->maybe_body_only ($test)) {
+      dbg("learn: auto-learn: adding header points $scores->{$test} ($test)");
+    }
+    elsif ($type eq 'body') {
       $self->{body_only_points} += $scores->{$test};
-      dbg("learn: auto-learn: adding body_only points $scores->{$test}");
-    } else {
-      dbg("learn: auto-learn: not considered head or body scores: $scores->{$test}");
+      dbg("learn: auto-learn: adding body points $scores->{$test} ($test)");
+    }
+    elsif ($type eq 'meta') {
+      if ($conf->{meta_dependencies}->{$test}) {
+        my $dep_head = 0;
+        my $dep_body = 0;
+        foreach my $deptest (@{$conf->{meta_dependencies}->{$test}}) {
+          my $deptype = $self->_get_autolearn_testtype($conf->{test_types}->{$deptest});
+          if ($deptype eq 'head') { $dep_head++; }
+          elsif ($deptype eq 'body') { $dep_body++; }
+        }
+        if ($dep_head || $dep_body) {
+          my $dep_total = $dep_head + $dep_body;
+          my $p_head = sprintf "%0.3f", $scores->{$test} * ($dep_head / $dep_total);
+          my $p_body = sprintf "%0.3f", $scores->{$test} * ($dep_body / $dep_total);
+          $self->{head_only_points} += $p_head;
+          $self->{body_only_points} += $p_body;
+          dbg("learn: auto-learn: adding $p_head header and $p_body body points, $dep_head/$dep_body ratio ($test)");
+        } else {
+          dbg("learn: auto-learn: not considered as header or body points, no header/body deps ($test)");
+        }
+      } else {
+          dbg("learn: auto-learn: not considered as header or body points, no meta deps ($test)");
+      }
+    }
+    else {
+      dbg("learn: auto-learn: not considered as header or body points, ignored ruletype ($test)");
     }
 
     $points += $scores->{$test};