You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/04/03 07:34:21 UTC
svn commit: r1899525 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Conf.pm PerMsgStatus.pm
Author: hege
Date: Sun Apr 3 07:34:21 2022
New Revision: 1899525
URL: http://svn.apache.org/viewvc?rev=1899525&view=rev
Log:
Bug 7905/7906: Rewrote autolearn logic. Meta points are now split between head/body, according to how many head/body rules it depends on (not recursive, just first deps are checked). If there are no head/body deps, nothing is added. No discrimination of network rules anymore.
Modified:
spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm?rev=1899525&r1=1899524&r2=1899525&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Conf.pm Sun Apr 3 07:34:21 2022
@@ -5187,56 +5187,9 @@ sub get_description_for_rule {
###########################################################################
-sub maybe_header_only {
- my($self,$rulename) = @_;
- my $type = $self->{test_types}->{$rulename};
-
- if (index($rulename, 'AUTOLEARNTEST') == 0) {
- dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}.");
- }
-
- return 0 if (!defined ($type));
-
- if (($type == $TYPE_HEAD_TESTS) || ($type == $TYPE_HEAD_EVALS)) {
- return 1;
-
- } elsif ($type == $TYPE_META_TESTS) {
- if (($self->{tflags}->{$rulename}||'') =~ /\bnet\b/) {
- return 0;
- } else {
- return 1;
- }
- }
-
- return 0;
-}
-
-sub maybe_body_only {
- my($self,$rulename) = @_;
- my $type = $self->{test_types}->{$rulename};
-
- if (index($rulename, 'AUTOLEARNTEST') == 0) {
- dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}.");
- }
-
- return 0 if (!defined ($type));
-
- if (($type == $TYPE_BODY_TESTS) || ($type == $TYPE_BODY_EVALS)
- || ($type == $TYPE_URI_TESTS) || ($type == $TYPE_URI_EVALS))
- {
- # some rawbody go off of headers...
- return 1;
-
- } elsif ($type == $TYPE_META_TESTS) {
- if (($self->{tflags}->{$rulename}||'') =~ /\bnet\b/) {
- return 0;
- } else {
- return 1;
- }
- }
-
- return 0;
-}
+# Deprecated since Bug 7905/7906
+sub maybe_header_only { warn "Deprecated Conf::maybe_header_only() called"; }
+sub maybe_body_only { warn "Deprecated Conf::maybe_body_only() called"; }
###########################################################################
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1899525&r1=1899524&r2=1899525&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Sun Apr 3 07:34:21 2022
@@ -724,6 +724,21 @@ sub get_autolearn_force_names {
return $names;
}
+sub _get_autolearn_testtype {
+ my ($self, $test) = @_;
+ return '' unless defined $test;
+ return 'head' if $test == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS
+ || $test == $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS;
+ return 'body' if $test == $Mail::SpamAssassin::Conf::TYPE_BODY_TESTS
+ || $test == $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS
+ || $test == $Mail::SpamAssassin::Conf::TYPE_RAWBODY_TESTS
+ || $test == $Mail::SpamAssassin::Conf::TYPE_RAWBODY_EVALS
+ || $test == $Mail::SpamAssassin::Conf::TYPE_URI_TESTS
+ || $test == $Mail::SpamAssassin::Conf::TYPE_URI_EVALS;
+ return 'meta' if $test == $Mail::SpamAssassin::Conf::TYPE_META_TESTS;
+ return '';
+}
+
sub _get_autolearn_points {
my ($self) = @_;
@@ -731,11 +746,13 @@ sub _get_autolearn_points {
# ensure it only gets computed once, even if we return early
$self->{autolearn_points} = 0;
+ my $conf = $self->{conf};
+
# This function needs to use use sum($score[scoreset % 2]) not just {score}.
# otherwise we shift what we autolearn on and it gets really weird. - tvd
- my $orig_scoreset = $self->{conf}->get_score_set();
+ my $orig_scoreset = $conf->get_score_set();
my $new_scoreset = $orig_scoreset;
- my $scores = $self->{conf}->{scores};
+ my $scores = $conf->{scores};
if (($orig_scoreset & 2) == 0) { # we don't need to recompute
dbg("learn: auto-learn: currently using scoreset $orig_scoreset");
@@ -743,10 +760,10 @@ sub _get_autolearn_points {
else {
$new_scoreset = $orig_scoreset & ~2;
dbg("learn: auto-learn: currently using scoreset $orig_scoreset, recomputing score based on scoreset $new_scoreset");
- $scores = $self->{conf}->{scoreset}->[$new_scoreset];
+ $scores = $conf->{scoreset}->[$new_scoreset];
}
- my $tflags = $self->{conf}->{tflags};
+ my $tflags = $conf->{tflags};
my $points = 0;
# Just in case this function is called multiple times, clear out the
@@ -767,7 +784,7 @@ sub _get_autolearn_points {
# Use the original scoreset since it'll be 0 in sets 0 and 1.
if ($tflags->{$test} =~ /\blearn\b/) {
# we're guaranteed that the score will be defined
- $self->{learned_points} += $self->{conf}->{scoreset}->[$orig_scoreset]->{$test};
+ $self->{learned_points} += $conf->{scoreset}->[$orig_scoreset]->{$test};
next;
}
@@ -784,14 +801,41 @@ sub _get_autolearn_points {
# Go ahead and add points to the proper locations
# Changed logic because in testing, I was getting both head and body. Bug 5503
- if ($self->{conf}->maybe_header_only ($test)) {
+ # Cleanup logic, Bug 7905/7906
+ my $type = $self->_get_autolearn_testtype($conf->{test_types}->{$test});
+ if ($type eq 'head') {
$self->{head_only_points} += $scores->{$test};
- dbg("learn: auto-learn: adding head_only points $scores->{$test}");
- } elsif ($self->{conf}->maybe_body_only ($test)) {
+ dbg("learn: auto-learn: adding header points $scores->{$test} ($test)");
+ }
+ elsif ($type eq 'body') {
$self->{body_only_points} += $scores->{$test};
- dbg("learn: auto-learn: adding body_only points $scores->{$test}");
- } else {
- dbg("learn: auto-learn: not considered head or body scores: $scores->{$test}");
+ dbg("learn: auto-learn: adding body points $scores->{$test} ($test)");
+ }
+ elsif ($type eq 'meta') {
+ if ($conf->{meta_dependencies}->{$test}) {
+ my $dep_head = 0;
+ my $dep_body = 0;
+ foreach my $deptest (@{$conf->{meta_dependencies}->{$test}}) {
+ my $deptype = $self->_get_autolearn_testtype($conf->{test_types}->{$deptest});
+ if ($deptype eq 'head') { $dep_head++; }
+ elsif ($deptype eq 'body') { $dep_body++; }
+ }
+ if ($dep_head || $dep_body) {
+ my $dep_total = $dep_head + $dep_body;
+ my $p_head = sprintf "%0.3f", $scores->{$test} * ($dep_head / $dep_total);
+ my $p_body = sprintf "%0.3f", $scores->{$test} * ($dep_body / $dep_total);
+ $self->{head_only_points} += $p_head;
+ $self->{body_only_points} += $p_body;
+ dbg("learn: auto-learn: adding $p_head header and $p_body body points, $dep_head/$dep_body ratio ($test)");
+ } else {
+ dbg("learn: auto-learn: not considered as header or body points, no header/body deps ($test)");
+ }
+ } else {
+ dbg("learn: auto-learn: not considered as header or body points, no meta deps ($test)");
+ }
+ }
+ else {
+ dbg("learn: auto-learn: not considered as header or body points, ignored ruletype ($test)");
}
$points += $scores->{$test};