You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by km...@apache.org on 2014/02/11 18:26:52 UTC
svn commit: r1567225 [7/15] - in /spamassassin/site/full/3.4.x: ./ doc/
Added: spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.html
URL: http://svn.apache.org/viewvc/spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.html?rev=1567225&view=auto
==============================================================================
--- spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.html (added)
+++ spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.html Tue Feb 11 17:26:49 2014
@@ -0,0 +1,814 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>Mail::SpamAssassin::PerMsgStatus - per-message status</title>
+<link rev="made" href="mailto:root@twm2005-dev.thoughtworthy.com" />
+</head>
+
+<body style="background-color: white">
+
+<p><a name="__index__"></a></p>
+<!-- INDEX BEGIN -->
+
+<ul>
+
+ <li><a href="#name">NAME</a></li>
+ <li><a href="#synopsis">SYNOPSIS</a></li>
+ <li><a href="#description">DESCRIPTION</a></li>
+ <li><a href="#methods">METHODS</a></li>
+ <li><a href="#see_also">SEE ALSO</a></li>
+</ul>
+<!-- INDEX END -->
+
+<hr />
+<p>
+</p>
+<h1><a name="name">NAME</a></h1>
+<p>Mail::SpamAssassin::PerMsgStatus - per-message status (spam or not-spam)</p>
+<p>
+</p>
+<hr />
+<h1><a name="synopsis">SYNOPSIS</a></h1>
+<pre>
+ my $spamtest = new Mail::SpamAssassin ({
+ 'rules_filename' => '/etc/spamassassin.rules',
+ 'userprefs_filename' => $ENV{HOME}.'/.spamassassin/user_prefs'
+ });
+ my $mail = $spamtest->parse();</pre>
+<pre>
+ my $status = $spamtest->check ($mail);</pre>
+<pre>
+ my $rewritten_mail;
+ if ($status->is_spam()) {
+ $rewritten_mail = $status->rewrite_mail ();
+ }
+ ...</pre>
+<p>
+</p>
+<hr />
+<h1><a name="description">DESCRIPTION</a></h1>
+<p>The Mail::SpamAssassin <a href="#item_check"><code>check()</code></a> method returns an object of this
+class. This object encapsulates all the per-message state.</p>
+<p>
+</p>
+<hr />
+<h1><a name="methods">METHODS</a></h1>
+<dl>
+<dt><strong><a name="item_check">$status->check ()</a></strong><br />
+</dt>
+<dd>
+Runs the SpamAssassin rules against the message pointed to by the object.
+</dd>
+<p></p>
+<dt><strong><a name="item_learn">$status-><code>learn()</code></a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called. If the score
+is outside a certain range around the threshold, ie. if the message is judged
+more-or-less definitely spam or definitely non-spam, it will be fed into
+SpamAssassin's learning systems (currently the naive Bayesian classifier),
+so that future similar mails will be caught.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_autolearn_points">$score = $status-><code>get_autolearn_points()</code></a></strong><br />
+</dt>
+<dd>
+Return the message's score as computed for auto-learning. Certain tests are
+ignored:
+</dd>
+<dd>
+<pre>
+ - rules with tflags set to 'learn' (the Bayesian rules)</pre>
+</dd>
+<dd>
+<pre>
+ - rules with tflags set to 'userconf' (user white/black-listing rules, etc)</pre>
+</dd>
+<dd>
+<pre>
+ - rules with tflags set to 'noautolearn'</pre>
+</dd>
+<dd>
+<p>Also note that auto-learning occurs using scores from either scoreset 0 or 1,
+depending on what scoreset is used during message check. It is likely that the
+message check and auto-learn scores will be different.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_head_only_points">$score = $status-><code>get_head_only_points()</code></a></strong><br />
+</dt>
+<dd>
+Return the message's score as computed for auto-learning, ignoring
+all rules except for header-based ones.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_learned_points">$score = $status-><code>get_learned_points()</code></a></strong><br />
+</dt>
+<dd>
+Return the message's score as computed for auto-learning, ignoring
+all rules except for learning-based ones.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_body_only_points">$score = $status-><code>get_body_only_points()</code></a></strong><br />
+</dt>
+<dd>
+Return the message's score as computed for auto-learning, ignoring
+all rules except for body-based ones.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_autolearn_force_status">$score = $status-><code>get_autolearn_force_status()</code></a></strong><br />
+</dt>
+<dd>
+Return whether a message's score included any rules that are flagged as
+autolearn_force.
+
+</dd>
+<dd>
+<pre>
+
+=cut</pre>
+</dd>
+<dd>
+<p>sub get_autolearn_force_status {
+ my ($self) = @_;
+ $self->_get_autolearn_points();
+ return $self->{autolearn_force};
+}</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_autolearn_force_names">$rule_names = $status-><code>get_autolearn_force_names()</code></a></strong><br />
+</dt>
+<dd>
+Return a list of comma separated list of rule names if a message's
+score included any rules that are flagged as autolearn_force.
+
+</dd>
+<dd>
+<pre>
+
+=cut</pre>
+</dd>
+<dd>
+<p>sub get_autolearn_force_names {
+ my ($self) = @_;
+ my ($names);</p>
+</dd>
+<dd>
+<pre>
+ $self->_get_autolearn_points();
+ $names = $self->{autolearn_force_names};
+
+ if (defined $names) {
+ #remove trailing comma
+ $names =~ s/,$//;
+ } else {
+ $names = "";
+ }</pre>
+</dd>
+<dd>
+<pre>
+ return $names;
+}</pre>
+</dd>
+<dd>
+<p>sub _get_autolearn_points {
+ my ($self) = @_;</p>
+</dd>
+<dd>
+<pre>
+ return if (exists $self->{autolearn_points});
+ # ensure it only gets computed once, even if we return early
+ $self->{autolearn_points} = 0;</pre>
+</dd>
+<dd>
+<pre>
+ # This function needs to use use sum($score[scoreset % 2]) not just {score}.
+ # otherwise we shift what we autolearn on and it gets really wierd. - tvd
+ my $orig_scoreset = $self->{conf}->get_score_set();
+ my $new_scoreset = $orig_scoreset;
+ my $scores = $self->{conf}->{scores};</pre>
+</dd>
+<dd>
+<pre>
+ if (($orig_scoreset & 2) == 0) { # we don't need to recompute
+ dbg("learn: auto-learn: currently using scoreset $orig_scoreset");
+ }
+ else {
+ $new_scoreset = $orig_scoreset & ~2;
+ dbg("learn: auto-learn: currently using scoreset $orig_scoreset, recomputing score based on scoreset $new_scoreset");
+ $scores = $self->{conf}->{scoreset}->[$new_scoreset];
+ }</pre>
+</dd>
+<dd>
+<pre>
+ my $tflags = $self->{conf}->{tflags};
+ my $points = 0;</pre>
+</dd>
+<dd>
+<pre>
+ # Just in case this function is called multiple times, clear out the
+ # previous calculated values
+ $self->{learned_points} = 0;
+ $self->{body_only_points} = 0;
+ $self->{head_only_points} = 0;
+ $self->{autolearn_force} = 0;</pre>
+</dd>
+<dd>
+<pre>
+ foreach my $test (@{$self->{test_names_hit}}) {
+ # According to the documentation, noautolearn, userconf, and learn
+ # rules are ignored for autolearning.
+ if (exists $tflags->{$test}) {
+ next if $tflags->{$test} =~ /\bnoautolearn\b/;
+ next if $tflags->{$test} =~ /\buserconf\b/;</pre>
+</dd>
+<dd>
+<pre>
+ # Keep track of the learn points for an additional autolearn check.
+ # Use the original scoreset since it'll be 0 in sets 0 and 1.
+ if ($tflags->{$test} =~ /\blearn\b/) {
+ # we're guaranteed that the score will be defined
+ $self->{learned_points} += $self->{conf}->{scoreset}->[$orig_scoreset]->{$test};
+ next;
+ }
+
+ #IF ANY RULES ARE AUTOLEARN FORCE, SET THAT FLAG
+ if ($tflags->{$test} =~ /\bautolearn_force\b/) {
+ $self->{autolearn_force}++;
+ #ADD RULE NAME TO LIST
+ $self->{autolearn_force_names}.="$test,";
+ }
+ }</pre>
+</dd>
+<dd>
+<pre>
+ # ignore tests with 0 score (or undefined) in this scoreset
+ next if !$scores->{$test};</pre>
+</dd>
+<dd>
+<pre>
+ # Go ahead and add points to the proper locations
+ # Changed logic because in testing, I was getting both head and body. Bug 5503
+ if ($self->{conf}->maybe_header_only ($test)) {
+ $self->{head_only_points} += $scores->{$test};
+ dbg("learn: auto-learn: adding head_only points $scores->{$test}");
+ } elsif ($self->{conf}->maybe_body_only ($test)) {
+ $self->{body_only_points} += $scores->{$test};
+ dbg("learn: auto-learn: adding body_only points $scores->{$test}");
+ } else {
+ dbg("learn: auto-learn: not considered head or body scores: $scores->{$test}");
+ }</pre>
+</dd>
+<dd>
+<pre>
+ $points += $scores->{$test};
+ }</pre>
+</dd>
+<dd>
+<pre>
+ # Figure out the final value we'll use for autolearning
+ $points = (sprintf "%0.3f", $points) + 0;
+ dbg("learn: auto-learn: message score: ".$self->{score}.", computed score for autolearn: $points");</pre>
+</dd>
+<dd>
+<pre>
+ $self->{autolearn_points} = $points;
+}</pre>
+</dd>
+<dd>
+<p>###########################################################################</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_is_spam">$isspam = $status->is_spam ()</a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called. It will
+return 1 for mail determined likely to be spam, 0 if it does not seem
+spam-like.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_names_of_tests_hit">$list = $status->get_names_of_tests_hit ()</a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called. It will
+return a comma-separated string, listing all the symbolic test names
+of the tests which were trigged by the mail.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_names_of_subtests_hit">$list = $status->get_names_of_subtests_hit ()</a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called. It will
+return a comma-separated string, listing all the symbolic test names of the
+meta-rule sub-tests which were trigged by the mail. Sub-tests are the
+normally-hidden rules, which score 0 and have names beginning with two
+underscores, used in meta rules.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_score">$num = $status->get_score ()</a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called. It will
+return the message's score.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_required_score">$num = $status->get_required_score ()</a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called. It will
+return the score required for a mail to be considered spam.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_autolearn_status">$num = $status->get_autolearn_status ()</a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called. It will
+return one of the following strings depending on whether the mail was
+auto-learned or not: ``ham'', ``no'', ``spam'', ``disabled'', ``failed'', ``unavailable''.
+</dd>
+<dd>
+<p>It also returns is flagged with auto_learn_force, it will also include the status
+and the rules hit. For example: ``autolearn_force=yes (AUTOLEARNTEST_BODY)''</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_report">$report = $status->get_report ()</a></strong><br />
+</dt>
+<dd>
+Deliver a ``spam report'' on the checked mail message. This contains details of
+how many spam detection rules it triggered.
+</dd>
+<dd>
+<p>The report is returned as a multi-line string, with the lines separated by
+<code>\n</code> characters.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_content_preview">$preview = $status->get_content_preview ()</a></strong><br />
+</dt>
+<dd>
+Give a ``preview'' of the content.
+</dd>
+<dd>
+<p>This is returned as a multi-line string, with the lines separated by <code>\n</code>
+characters, containing a fully-decoded, safe, plain-text sample of the first
+few lines of the message body.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_message">$msg = $status-><code>get_message()</code></a></strong><br />
+</dt>
+<dd>
+Return the object representing the message being scanned.
+</dd>
+<p></p>
+<dt><strong><a name="item_rewrite_mail">$status->rewrite_mail ()</a></strong><br />
+</dt>
+<dd>
+Rewrite the mail message. This will at minimum add headers, and at
+maximum MIME-encapsulate the message text, to reflect its spam or not-spam
+status. The function will return a scalar of the rewritten message.
+</dd>
+<dd>
+<p>The actual modifications depend on the configuration (see
+<code>Mail::SpamAssassin::Conf</code> for more information).</p>
+</dd>
+<dd>
+<p>The possible modifications are as follows:</p>
+</dd>
+<dl>
+<dt><strong><a name="item_to_3a_2c_from_3a_and_subject_3a_modification_on_sp">To:, From: and Subject: modification on spam mails</a></strong><br />
+</dt>
+<dd>
+Depending on the configuration, the To: and From: lines can have a
+user-defined RFC 2822 comment appended for spam mail. The subject line
+may have a user-defined string prepended to it for spam mail.
+</dd>
+<p></p>
+<dt><strong><a name="item_x_2dspam_2d_2a_headers_for_all_mails">X-Spam-* headers for all mails</a></strong><br />
+</dt>
+<dd>
+Depending on the configuration, zero or more headers with names
+beginning with <code>X-Spam-</code> will be added to mail depending on whether
+it is spam or ham.
+</dd>
+<p></p>
+<dt><strong><a name="item_spam_message_with_report_safe">spam message with report_safe</a></strong><br />
+</dt>
+<dd>
+If report_safe is set to true (1), then spam messages are encapsulated
+into their own message/rfc822 MIME attachment without any modifications
+being made.
+</dd>
+<dd>
+<p>If report_safe is set to false (0), then the message will only have the
+above headers added/modified.</p>
+</dd>
+<p></p></dl>
+<dt><strong><a name="item_action_depends_on_tags">$status->action_depends_on_tags($tags, $code, @args)</a></strong><br />
+</dt>
+<dd>
+Enqueue the supplied subroutine reference <code>$code</code>, to become runnable when
+all the specified tags become available. The <code>$tags</code> may be a simple
+scalar - a tag name, or a listref of tag names. The subroutine <code>&$code</code>
+when called will be passed a <code>permessagestatus</code> object as its first argument,
+followed by the supplied (optional) list <code>@args</code> .
+</dd>
+<p></p>
+<dt><strong><a name="item_set_tag">$status->set_tag($tagname, $value)</a></strong><br />
+</dt>
+<dd>
+Set a template tag, as used in <code>add_header</code>, report templates, etc.
+This API is intended for use by plugins. Tag names will be converted
+to an all-uppercase representation internally.
+</dd>
+<dd>
+<p><code>$value</code> can be a simple scalar (string or number), or a reference to an
+array, in which case the public method get_tag will join array elements
+using a space as a separator, returning a single string for backward
+compatibility.</p>
+</dd>
+<dd>
+<p><code>$value</code> can also be a subroutine reference, which will be evaluated
+each time the template is expanded. The first argument passed by get_tag
+to a called subroutine will be a PerMsgStatus object (this module's object),
+followed by optional arguments provided a caller to get_tag.</p>
+</dd>
+<dd>
+<p>Note that perl supports closures, which means that variables set in the
+caller's scope can be accessed inside this <code>sub</code>. For example:</p>
+</dd>
+<dd>
+<pre>
+ my $text = "hello world!";
+ $status->set_tag("FOO", sub {
+ my $pms = shift;
+ return $text;
+ });</pre>
+</dd>
+<dd>
+<p>See <code>Mail::SpamAssassin::Conf</code>'s <code>TEMPLATE TAGS</code> section for more details
+on how template tags are used.</p>
+</dd>
+<dd>
+<p><code>undef</code> will be returned if a tag by that name has not been defined.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_tag">$string = $status-><code>get_tag($tagname)</code></a></strong><br />
+</dt>
+<dd>
+Get the current value of a template tag, as used in <code>add_header</code>, report
+templates, etc. This API is intended for use by plugins. Tag names will be
+converted to an all-uppercase representation internally. See
+<code>Mail::SpamAssassin::Conf</code>'s <code>TEMPLATE TAGS</code> section for more details on
+tags.
+</dd>
+<dd>
+<p><code>undef</code> will be returned if a tag by that name has not been defined.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_tag_raw">$string = $status->get_tag_raw($tagname, @args)</a></strong><br />
+</dt>
+<dd>
+Similar to <a href="#item_get_tag"><code>get_tag</code></a>, but keeps a tag name unchanged (does not uppercase it),
+and does not convert arrayref tag values into a single string.
+</dd>
+<p></p>
+<dt><strong><a name="item_set_spamd_result_item">$status-><code>set_spamd_result_item($subref)</code></a></strong><br />
+</dt>
+<dd>
+Set an entry for the spamd result log line. <code>$subref</code> should be a code
+reference for a subroutine which will return a string in <code>'name=VALUE'</code>
+format, similar to the other entries in the spamd result line:
+</dd>
+<dd>
+<pre>
+ Jul 17 14:10:47 radish spamd[16670]: spamd: result: Y 22 - ALL_NATURAL,
+ DATE_IN_FUTURE_03_06,DIET_1,DRUGS_ERECTILE,DRUGS_PAIN,
+ TEST_FORGED_YAHOO_RCVD,TEST_INVALID_DATE,TEST_NOREALNAME,
+ TEST_NORMAL_HTTP_TO_IP,UNDISC_RECIPS scantime=0.4,size=3138,user=jm,
+ uid=1000,required_score=5.0,rhost=localhost,raddr=127.0.0.1,
+ rport=33153,mid=<9PS291LhupY>,autolearn=spam</pre>
+</dd>
+<dd>
+<p><code>name</code> and <code>VALUE</code> must not contain <code>=</code> or <code>,</code> characters, as it
+is important that these log lines are easy to parse.</p>
+</dd>
+<dd>
+<p>The code reference will be called by spamd after the message has been scanned,
+and the <a href="#item_check"><code>PerMsgStatus::check()</code></a> method has returned.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_finish">$status->finish ()</a></strong><br />
+</dt>
+<dd>
+Indicate that this <code>$status</code> object is finished with, and can be destroyed.
+</dd>
+<dd>
+<p>If you are using SpamAssassin in a persistent environment, or checking many
+mail messages from one <code>Mail::SpamAssassin</code> factory, this method should be
+called to ensure Perl's garbage collection will clean up old status objects.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_current_eval_rule_name">$name = $status-><code>get_current_eval_rule_name()</code></a></strong><br />
+</dt>
+<dd>
+Return the name of the currently-running eval rule. <code>undef</code> is
+returned if no eval rule is currently being run. Useful for plugins
+to determine the current rule name while inside an eval test function
+call.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_decoded_body_text_array">$status->get_decoded_body_text_array ()</a></strong><br />
+</dt>
+<dd>
+Returns the message body, with <strong>base64</strong> or <strong>quoted-printable</strong> encodings
+decoded, and non-text parts or non-inline attachments stripped.
+</dd>
+<dd>
+<p>It is returned as an array of strings, with each string representing
+one newline-separated line of the body.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_decoded_stripped_body_text_array">$status->get_decoded_stripped_body_text_array ()</a></strong><br />
+</dt>
+<dd>
+Returns the message body, decoded (as described in
+get_decoded_body_text_array()), with HTML rendered, and with whitespace
+normalized.
+</dd>
+<dd>
+<p>It will always render text/html, and will use a heuristic to determine if other
+text/* parts should be considered text/html.</p>
+</dd>
+<dd>
+<p>It is returned as an array of strings, with each string representing one
+'paragraph'. Paragraphs, in plain-text mails, are double-newline-separated
+blocks of multi-line text.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get">$status->get (header_name [, default_value])</a></strong><br />
+</dt>
+<dd>
+Returns a message header, pseudo-header, real name or address.
+<code>header_name</code> is the name of a mail header, such as 'Subject', 'To',
+etc. If <code>default_value</code> is given, it will be used if the requested
+<code>header_name</code> does not exist.
+</dd>
+<dd>
+<p>Appending <code>:raw</code> to the header name will inhibit decoding of quoted-printable
+or base-64 encoded strings.</p>
+</dd>
+<dd>
+<p>Appending a modifier <code>:addr</code> to a header field name will cause everything
+except the first email address to be removed from the header field. It is
+mainly applicable to header fields 'From', 'Sender', 'To', 'Cc' along with
+their 'Resent-*' counterparts, and the 'Return-Path'. For example, all of
+the following will result in ``example@foo'':</p>
+</dd>
+<dl>
+<dt><strong><a name="item_example_40foo">example@foo</a></strong><br />
+</dt>
+<dt><strong><a name="item_foo">example@foo (Foo Blah)</a></strong><br />
+</dt>
+<dt><strong><a name="item_example_40foo_2c_example_40bar">example@foo, example@bar</a></strong><br />
+</dt>
+<dt><strong>display: example@foo (Foo Blah), example@bar ;</strong><br />
+</dt>
+<dt><strong><a name="item_foo_blah__3cexample_40foo_3e">Foo Blah <example@foo></a></strong><br />
+</dt>
+<dt><strong><a name="item__22foo_blah_22__3cexample_40foo_3e">``Foo Blah'' <example@foo></a></strong><br />
+</dt>
+<dt><strong><a name="item__22_27foo_blah_27_22__3cexample_40foo_3e">``'Foo Blah''' <example@foo></a></strong><br />
+</dt>
+</dl>
+<p>Appending a modifier <code>:name</code> to a header field name will cause everything
+except the first display name to be removed from the header field. It is
+mainly applicable to header fields containing a single mail address: 'From',
+'Sender', along with their 'Resent-From' and 'Resent-Sender' counterparts.
+For example, all of the following will result in ``Foo Blah''. One level of
+single quotes is stripped too, as it is often seen.</p>
+<dl>
+<dt><strong>example@foo (Foo Blah)</strong><br />
+</dt>
+<dt><strong>example@foo (Foo Blah), example@bar</strong><br />
+</dt>
+<dt><strong>display: example@foo (Foo Blah), example@bar ;</strong><br />
+</dt>
+<dt><strong>Foo Blah <example@foo></strong><br />
+</dt>
+<dt><strong>``Foo Blah'' <example@foo></strong><br />
+</dt>
+<dt><strong>``'Foo Blah''' <example@foo></strong><br />
+</dt>
+</dl>
+<p>There are several special pseudo-headers that can be specified:</p>
+<dl>
+<dt><strong><a name="item_all_can_be_used_to_mean_the_text_of_all_the_messag"><code>ALL</code> can be used to mean the text of all the message's headers.</a></strong><br />
+</dt>
+<dt><strong><a name="item_all_2dtrusted_can_be_used_to_mean_the_text_of_all_"><code>ALL-TRUSTED</code> can be used to mean the text of all the message's headers
+that could only have been added by trusted relays.</a></strong><br />
+</dt>
+<dt><strong><a name="item_all_2dinternal_can_be_used_to_mean_the_text_of_all"><code>ALL-INTERNAL</code> can be used to mean the text of all the message's headers
+that could only have been added by internal relays.</a></strong><br />
+</dt>
+<dt><strong><a name="item_all_2duntrusted_can_be_used_to_mean_the_text_of_al"><code>ALL-UNTRUSTED</code> can be used to mean the text of all the message's
+headers that may have been added by untrusted relays. To make this
+pseudo-header more useful for header rules the 'Received' header that was added
+by the last trusted relay is included, even though it can be trusted.</a></strong><br />
+</dt>
+<dt><strong><a name="item_all_2dexternal_can_be_used_to_mean_the_text_of_all"><code>ALL-EXTERNAL</code> can be used to mean the text of all the message's headers
+that may have been added by external relays. Like <code>ALL-UNTRUSTED</code> the
+'Received' header added by the last internal relay is included.</a></strong><br />
+</dt>
+<dt><strong><a name="item_tocc_can_be_used_to_mean_the_contents_of_both_the_"><code>ToCc</code> can be used to mean the contents of both the 'To' and 'Cc'
+headers.</a></strong><br />
+</dt>
+<dt><strong><a name="item_envelopefrom_is_the_address_used_in_the__27mail_fr"><code>EnvelopeFrom</code> is the address used in the 'MAIL FROM:' phase of the SMTP
+transaction that delivered this message, if this data has been made available
+by the SMTP server.</a></strong><br />
+</dt>
+<dt><strong><a name="item_messageid_is_a_symbol_meaning_all_message_2did_27s"><code>MESSAGEID</code> is a symbol meaning all Message-Id's found in the message;
+some mailing list software moves the real 'Message-Id' to 'Resent-Message-Id'
+or 'X-Message-Id', then uses its own one in the 'Message-Id' header. The value
+returned for this symbol is the text from all 3 headers, separated by newlines.</a></strong><br />
+</dt>
+<dt><strong><a name="item_x_2dspam_2drelays_2duntrusted_is_the_generated_met"><code>X-Spam-Relays-Untrusted</code> is the generated metadata of untrusted relays
+the message has passed through</a></strong><br />
+</dt>
+<dt><strong><a name="item_x_2dspam_2drelays_2dtrusted_is_the_generated_metad"><code>X-Spam-Relays-Trusted</code> is the generated metadata of trusted relays
+the message has passed through</a></strong><br />
+</dt>
+</dl>
+<dt><strong><a name="item_get_uri_list">$status->get_uri_list ()</a></strong><br />
+</dt>
+<dd>
+Returns an array of all unique URIs found in the message. It takes
+a combination of the URIs found in the rendered (decoded and HTML
+stripped) body and the URIs found when parsing the HTML in the message.
+Will also set $status->{uri_list} (the array as returned by this function).
+</dd>
+<dd>
+<p>The returned array will include the ``raw'' URI as well as
+``slightly cooked'' versions. For example, the single URI
+'http://%77&#00119;%77.example.com/' will get turned into:
+( 'http://%77&#00119;%77.example.com/', 'http://www.example.com/' )</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_uri_detail_list">$status->get_uri_detail_list ()</a></strong><br />
+</dt>
+<dd>
+Returns a hash reference of all unique URIs found in the message and
+various data about where the URIs were found in the message. It takes a
+combination of the URIs found in the rendered (decoded and HTML stripped)
+body and the URIs found when parsing the HTML in the message. Will also
+set $status->{uri_detail_list} (the hash reference as returned by this
+function). This function will also set $status->{uri_domain_count} (count of
+unique domains).
+</dd>
+<dd>
+<p>The hash format looks something like this:</p>
+</dd>
+<dd>
+<pre>
+ raw_uri => {
+ types => { a => 1, img => 1, parsed => 1 },
+ cleaned => [ canonified_uri ],
+ anchor_text => [ "click here", "no click here" ],
+ domains => { domain1 => 1, domain2 => 1 },
+ }</pre>
+</dd>
+<dd>
+<p><code>raw_uri</code> is whatever the URI was in the message itself
+(http://spamassassin.apache%2Eorg/).</p>
+</dd>
+<dd>
+<p><code>types</code> is a hash of the HTML tags (lowercase) which referenced
+the raw_uri. <em>parsed</em> is a faked type which specifies that the
+raw_uri was seen in the rendered text.</p>
+</dd>
+<dd>
+<p><code>cleaned</code> is an array of the raw and canonified version of the raw_uri
+(http://spamassassin.apache%2Eorg/, <a href="http://spamassassin.apache.org/).">http://spamassassin.apache.org/).</a></p>
+</dd>
+<dd>
+<p><code>anchor_text</code> is an array of the anchor text (text between <a> and
+</a>), if any, which linked to the URI.</p>
+</dd>
+<dd>
+<p><code>domains</code> is a hash of the domains found in the canonified URIs.</p>
+</dd>
+<dd>
+<p><code>hosts</code> is a hash of unstripped hostnames found in the canonified URIs
+as hash keys, with their domain part stored as a value of each hash entry.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_clear_test_state">$status-><code>clear_test_state()</code></a></strong><br />
+</dt>
+<dd>
+Clear test state, including test log messages from <code>$status->test_log()</code>.
+</dd>
+<p></p>
+<dt><strong><a name="item_got_hit">$status->got_hit ($rulename, $desc_prepend [, name => value, ...])</a></strong><br />
+</dt>
+<dd>
+Register a hit against a rule in the ruleset.
+</dd>
+<dd>
+<p>There are two mandatory arguments. These are <code>$rulename</code>, the name of the rule
+that fired, and <code>$desc_prepend</code>, which is a short string that will be
+prepended to the rules <code>describe</code> string in output reports.</p>
+</dd>
+<dd>
+<p>In addition, callers can supplement that with the following optional
+data:</p>
+</dd>
+<dl>
+<dt><strong><a name="item_score__3d_3e__24num">score => $num</a></strong><br />
+</dt>
+<dd>
+Optional: the score to use for the rule hit. If unspecified,
+the value from the <code>Mail::SpamAssassin::Conf</code> object's <code>{scores}</code>
+hash will be used (a configured score), and in its absence the
+<code>defscore</code> option value.
+</dd>
+<p></p>
+<dt><strong><a name="item_defscore__3d_3e__24num">defscore => $num</a></strong><br />
+</dt>
+<dd>
+Optional: the score to use for the rule hit if neither the
+option <code>score</code> is provided, nor a configured score value is provided.
+</dd>
+<p></p>
+<dt><strong><a name="item_value__3d_3e__24num">value => $num</a></strong><br />
+</dt>
+<dd>
+Optional: the value to assign to the rule; the default value is <code>1</code>.
+<em>tflags multiple</em> rules use values of greater than 1 to indicate
+multiple hits. This value is accessible to meta rules.
+</dd>
+<p></p>
+<dt><strong><a name="item_ruletype__3d_3e__24type">ruletype => $type</a></strong><br />
+</dt>
+<dd>
+Optional, but recommended: the rule type string. This is used in the
+<code>hit_rule</code> plugin call, called by this method. If unset, <em>'unknown'</em> is
+used.
+</dd>
+<p></p>
+<dt><strong><a name="item_tflags__3d_3e__24string">tflags => $string</a></strong><br />
+</dt>
+<dd>
+Optional: a string, i.e. a space-separated list of additional tflags
+to be appended to an existing list of flags in $self->{conf}->{tflags},
+such as: ``nice noautolearn multiple''. No syntax checks are performed.
+</dd>
+<p></p>
+<dt><strong><a name="item_description__3d_3e__24string">description => $string</a></strong><br />
+</dt>
+<dd>
+Optional: a custom rule description string. This is used in the
+<code>hit_rule</code> plugin call, called by this method. If unset, the static
+description is used.
+</dd>
+<p></p></dl>
+<p>Backward compatibility: the two mandatory arguments have been part of this API
+since SpamAssassin 2.x. The optional <em>name=<gt</em>value> pairs, however, are a
+new addition in SpamAssassin 3.2.0.</p>
+<dt><strong><a name="item_create_fulltext_tmpfile">$status->create_fulltext_tmpfile (fulltext_ref)</a></strong><br />
+</dt>
+<dd>
+This function creates a temporary file containing the passed scalar
+reference data (typically the full/pristine text of the message).
+This is typically used by external programs like pyzor and dccproc, to
+avoid hangs due to buffering issues. Methods that need this, should
+call $self-><a href="#item_create_fulltext_tmpfile"><code>create_fulltext_tmpfile($fulltext)</code></a> to retrieve the temporary
+filename; it will be created if it has not already been.
+</dd>
+<dd>
+<p>Note: This can only be called once until $status-><a href="#item_delete_fulltext_tmpfile"><code>delete_fulltext_tmpfile()</code></a> is
+called.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_delete_fulltext_tmpfile">$status->delete_fulltext_tmpfile ()</a></strong><br />
+</dt>
+<dd>
+Will cleanup after a $status-><a href="#item_create_fulltext_tmpfile"><code>create_fulltext_tmpfile()</code></a> call. Deletes the
+temporary file and uncaches the filename.
+</dd>
+<p></p>
+<dt><strong><a name="item_all_from_addrs_domains">all_from_addrs_domains</a></strong><br />
+</dt>
+<dd>
+This function returns all the various from addresses in a message using <code>all_from_addrs()</code>
+and then returns only the domain names.
+</dd>
+<p></p></dl>
+<p>
+</p>
+<hr />
+<h1><a name="see_also">SEE ALSO</a></h1>
+<p><code>Mail::SpamAssassin</code>
+<code>spamassassin</code>
+
+</p>
+
+</body>
+
+</html>
Added: spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.txt
URL: http://svn.apache.org/viewvc/spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.txt?rev=1567225&view=auto
==============================================================================
--- spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.txt (added)
+++ spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.txt Tue Feb 11 17:26:49 2014
@@ -0,0 +1,450 @@
+NAME
+ Mail::SpamAssassin::PerMsgStatus - per-message status (spam or not-spam)
+
+SYNOPSIS
+ my $spamtest = new Mail::SpamAssassin ({
+ 'rules_filename' => '/etc/spamassassin.rules',
+ 'userprefs_filename' => $ENV{HOME}.'/.spamassassin/user_prefs'
+ });
+ my $mail = $spamtest->parse();
+
+ my $status = $spamtest->check ($mail);
+
+ my $rewritten_mail;
+ if ($status->is_spam()) {
+ $rewritten_mail = $status->rewrite_mail ();
+ }
+ ...
+
+DESCRIPTION
+ The Mail::SpamAssassin "check()" method returns an object of this class.
+ This object encapsulates all the per-message state.
+
+METHODS
+ $status->check ()
+ Runs the SpamAssassin rules against the message pointed to by the
+ object.
+
+ $status->learn()
+ After a mail message has been checked, this method can be called. If
+ the score is outside a certain range around the threshold, ie. if
+ the message is judged more-or-less definitely spam or definitely
+ non-spam, it will be fed into SpamAssassin's learning systems
+ (currently the naive Bayesian classifier), so that future similar
+ mails will be caught.
+
+ $score = $status->get_autolearn_points()
+ Return the message's score as computed for auto-learning. Certain
+ tests are ignored:
+
+ - rules with tflags set to 'learn' (the Bayesian rules)
+
+ - rules with tflags set to 'userconf' (user white/black-listing rules, etc)
+
+ - rules with tflags set to 'noautolearn'
+
+ Also note that auto-learning occurs using scores from either
+ scoreset 0 or 1, depending on what scoreset is used during message
+ check. It is likely that the message check and auto-learn scores
+ will be different.
+
+ $score = $status->get_head_only_points()
+ Return the message's score as computed for auto-learning, ignoring
+ all rules except for header-based ones.
+
+ $score = $status->get_learned_points()
+ Return the message's score as computed for auto-learning, ignoring
+ all rules except for learning-based ones.
+
+ $score = $status->get_body_only_points()
+ Return the message's score as computed for auto-learning, ignoring
+ all rules except for body-based ones.
+
+ $score = $status->get_autolearn_force_status()
+ Return whether a message's score included any rules that are flagged
+ as autolearn_force.
+
+ $rule_names = $status->get_autolearn_force_names()
+ Return a list of comma separated list of rule names if a message's
+ score included any rules that are flagged as autolearn_force.
+
+ $isspam = $status->is_spam ()
+ After a mail message has been checked, this method can be called. It
+ will return 1 for mail determined likely to be spam, 0 if it does
+ not seem spam-like.
+
+ $list = $status->get_names_of_tests_hit ()
+ After a mail message has been checked, this method can be called. It
+ will return a comma-separated string, listing all the symbolic test
+ names of the tests which were trigged by the mail.
+
+ $list = $status->get_names_of_subtests_hit ()
+ After a mail message has been checked, this method can be called. It
+ will return a comma-separated string, listing all the symbolic test
+ names of the meta-rule sub-tests which were trigged by the mail.
+ Sub-tests are the normally-hidden rules, which score 0 and have
+ names beginning with two underscores, used in meta rules.
+
+ $num = $status->get_score ()
+ After a mail message has been checked, this method can be called. It
+ will return the message's score.
+
+ $num = $status->get_required_score ()
+ After a mail message has been checked, this method can be called. It
+ will return the score required for a mail to be considered spam.
+
+ $num = $status->get_autolearn_status ()
+ After a mail message has been checked, this method can be called. It
+ will return one of the following strings depending on whether the
+ mail was auto-learned or not: "ham", "no", "spam", "disabled",
+ "failed", "unavailable".
+
+ It also returns is flagged with auto_learn_force, it will also
+ include the status and the rules hit. For example:
+ "autolearn_force=yes (AUTOLEARNTEST_BODY)"
+
+ $report = $status->get_report ()
+ Deliver a "spam report" on the checked mail message. This contains
+ details of how many spam detection rules it triggered.
+
+ The report is returned as a multi-line string, with the lines
+ separated by "\n" characters.
+
+ $preview = $status->get_content_preview ()
+ Give a "preview" of the content.
+
+ This is returned as a multi-line string, with the lines separated by
+ "\n" characters, containing a fully-decoded, safe, plain-text sample
+ of the first few lines of the message body.
+
+ $msg = $status->get_message()
+ Return the object representing the message being scanned.
+
+ $status->rewrite_mail ()
+ Rewrite the mail message. This will at minimum add headers, and at
+ maximum MIME-encapsulate the message text, to reflect its spam or
+ not-spam status. The function will return a scalar of the rewritten
+ message.
+
+ The actual modifications depend on the configuration (see
+ "Mail::SpamAssassin::Conf" for more information).
+
+ The possible modifications are as follows:
+
+ To:, From: and Subject: modification on spam mails
+ Depending on the configuration, the To: and From: lines can have
+ a user-defined RFC 2822 comment appended for spam mail. The
+ subject line may have a user-defined string prepended to it for
+ spam mail.
+
+ X-Spam-* headers for all mails
+ Depending on the configuration, zero or more headers with names
+ beginning with "X-Spam-" will be added to mail depending on
+ whether it is spam or ham.
+
+ spam message with report_safe
+ If report_safe is set to true (1), then spam messages are
+ encapsulated into their own message/rfc822 MIME attachment
+ without any modifications being made.
+
+ If report_safe is set to false (0), then the message will only
+ have the above headers added/modified.
+
+ $status->action_depends_on_tags($tags, $code, @args)
+ Enqueue the supplied subroutine reference $code, to become runnable
+ when all the specified tags become available. The $tags may be a
+ simple scalar - a tag name, or a listref of tag names. The
+ subroutine &$code when called will be passed a "permessagestatus"
+ object as its first argument, followed by the supplied (optional)
+ list @args .
+
+ $status->set_tag($tagname, $value)
+ Set a template tag, as used in "add_header", report templates, etc.
+ This API is intended for use by plugins. Tag names will be converted
+ to an all-uppercase representation internally.
+
+ $value can be a simple scalar (string or number), or a reference to
+ an array, in which case the public method get_tag will join array
+ elements using a space as a separator, returning a single string for
+ backward compatibility.
+
+ $value can also be a subroutine reference, which will be evaluated
+ each time the template is expanded. The first argument passed by
+ get_tag to a called subroutine will be a PerMsgStatus object (this
+ module's object), followed by optional arguments provided a caller
+ to get_tag.
+
+ Note that perl supports closures, which means that variables set in
+ the caller's scope can be accessed inside this "sub". For example:
+
+ my $text = "hello world!";
+ $status->set_tag("FOO", sub {
+ my $pms = shift;
+ return $text;
+ });
+
+ See "Mail::SpamAssassin::Conf"'s "TEMPLATE TAGS" section for more
+ details on how template tags are used.
+
+ "undef" will be returned if a tag by that name has not been defined.
+
+ $string = $status->get_tag($tagname)
+ Get the current value of a template tag, as used in "add_header",
+ report templates, etc. This API is intended for use by plugins. Tag
+ names will be converted to an all-uppercase representation
+ internally. See "Mail::SpamAssassin::Conf"'s "TEMPLATE TAGS" section
+ for more details on tags.
+
+ "undef" will be returned if a tag by that name has not been defined.
+
+ $string = $status->get_tag_raw($tagname, @args)
+ Similar to "get_tag", but keeps a tag name unchanged (does not
+ uppercase it), and does not convert arrayref tag values into a
+ single string.
+
+ $status->set_spamd_result_item($subref)
+ Set an entry for the spamd result log line. $subref should be a code
+ reference for a subroutine which will return a string in
+ 'name=VALUE' format, similar to the other entries in the spamd
+ result line:
+
+ Jul 17 14:10:47 radish spamd[16670]: spamd: result: Y 22 - ALL_NATURAL,
+ DATE_IN_FUTURE_03_06,DIET_1,DRUGS_ERECTILE,DRUGS_PAIN,
+ TEST_FORGED_YAHOO_RCVD,TEST_INVALID_DATE,TEST_NOREALNAME,
+ TEST_NORMAL_HTTP_TO_IP,UNDISC_RECIPS scantime=0.4,size=3138,user=jm,
+ uid=1000,required_score=5.0,rhost=localhost,raddr=127.0.0.1,
+ rport=33153,mid=<9PS291LhupY>,autolearn=spam
+
+ "name" and "VALUE" must not contain "=" or "," characters, as it is
+ important that these log lines are easy to parse.
+
+ The code reference will be called by spamd after the message has
+ been scanned, and the "PerMsgStatus::check()" method has returned.
+
+ $status->finish ()
+ Indicate that this $status object is finished with, and can be
+ destroyed.
+
+ If you are using SpamAssassin in a persistent environment, or
+ checking many mail messages from one "Mail::SpamAssassin" factory,
+ this method should be called to ensure Perl's garbage collection
+ will clean up old status objects.
+
+ $name = $status->get_current_eval_rule_name()
+ Return the name of the currently-running eval rule. "undef" is
+ returned if no eval rule is currently being run. Useful for plugins
+ to determine the current rule name while inside an eval test
+ function call.
+
+ $status->get_decoded_body_text_array ()
+ Returns the message body, with base64 or quoted-printable encodings
+ decoded, and non-text parts or non-inline attachments stripped.
+
+ It is returned as an array of strings, with each string representing
+ one newline-separated line of the body.
+
+ $status->get_decoded_stripped_body_text_array ()
+ Returns the message body, decoded (as described in
+ get_decoded_body_text_array()), with HTML rendered, and with
+ whitespace normalized.
+
+ It will always render text/html, and will use a heuristic to
+ determine if other text/* parts should be considered text/html.
+
+ It is returned as an array of strings, with each string representing
+ one 'paragraph'. Paragraphs, in plain-text mails, are
+ double-newline-separated blocks of multi-line text.
+
+ $status->get (header_name [, default_value])
+ Returns a message header, pseudo-header, real name or address.
+ "header_name" is the name of a mail header, such as 'Subject', 'To',
+ etc. If "default_value" is given, it will be used if the requested
+ "header_name" does not exist.
+
+ Appending ":raw" to the header name will inhibit decoding of
+ quoted-printable or base-64 encoded strings.
+
+ Appending a modifier ":addr" to a header field name will cause
+ everything except the first email address to be removed from the
+ header field. It is mainly applicable to header fields 'From',
+ 'Sender', 'To', 'Cc' along with their 'Resent-*' counterparts, and
+ the 'Return-Path'. For example, all of the following will result in
+ "example@foo":
+
+ example@foo
+ example@foo (Foo Blah)
+ example@foo, example@bar
+ display: example@foo (Foo Blah), example@bar ;
+ Foo Blah <ex...@foo>
+ "Foo Blah" <ex...@foo>
+ "'Foo Blah'" <ex...@foo>
+
+ Appending a modifier ":name" to a header field name will cause
+ everything except the first display name to be removed from the
+ header field. It is mainly applicable to header fields containing a
+ single mail address: 'From', 'Sender', along with their
+ 'Resent-From' and 'Resent-Sender' counterparts. For example, all of
+ the following will result in "Foo Blah". One level of single quotes
+ is stripped too, as it is often seen.
+
+ example@foo (Foo Blah)
+ example@foo (Foo Blah), example@bar
+ display: example@foo (Foo Blah), example@bar ;
+ Foo Blah <ex...@foo>
+ "Foo Blah" <ex...@foo>
+ "'Foo Blah'" <ex...@foo>
+
+ There are several special pseudo-headers that can be specified:
+
+ "ALL" can be used to mean the text of all the message's headers.
+ "ALL-TRUSTED" can be used to mean the text of all the message's
+ headers that could only have been added by trusted relays.
+ "ALL-INTERNAL" can be used to mean the text of all the message's
+ headers that could only have been added by internal relays.
+ "ALL-UNTRUSTED" can be used to mean the text of all the message's
+ headers that may have been added by untrusted relays. To make this
+ pseudo-header more useful for header rules the 'Received' header
+ that was added by the last trusted relay is included, even though it
+ can be trusted.
+ "ALL-EXTERNAL" can be used to mean the text of all the message's
+ headers that may have been added by external relays. Like
+ "ALL-UNTRUSTED" the 'Received' header added by the last internal
+ relay is included.
+ "ToCc" can be used to mean the contents of both the 'To' and 'Cc'
+ headers.
+ "EnvelopeFrom" is the address used in the 'MAIL FROM:' phase of the
+ SMTP transaction that delivered this message, if this data has been
+ made available by the SMTP server.
+ "MESSAGEID" is a symbol meaning all Message-Id's found in the
+ message; some mailing list software moves the real 'Message-Id' to
+ 'Resent-Message-Id' or 'X-Message-Id', then uses its own one in the
+ 'Message-Id' header. The value returned for this symbol is the text
+ from all 3 headers, separated by newlines.
+ "X-Spam-Relays-Untrusted" is the generated metadata of untrusted
+ relays the message has passed through
+ "X-Spam-Relays-Trusted" is the generated metadata of trusted relays
+ the message has passed through
+
+ $status->get_uri_list ()
+ Returns an array of all unique URIs found in the message. It takes a
+ combination of the URIs found in the rendered (decoded and HTML
+ stripped) body and the URIs found when parsing the HTML in the
+ message. Will also set $status->{uri_list} (the array as returned by
+ this function).
+
+ The returned array will include the "raw" URI as well as "slightly
+ cooked" versions. For example, the single URI
+ 'http://%77w%77.example.com/' will get turned into: (
+ 'http://%77w%77.example.com/', 'http://www.example.com/' )
+
+ $status->get_uri_detail_list ()
+ Returns a hash reference of all unique URIs found in the message and
+ various data about where the URIs were found in the message. It
+ takes a combination of the URIs found in the rendered (decoded and
+ HTML stripped) body and the URIs found when parsing the HTML in the
+ message. Will also set $status->{uri_detail_list} (the hash
+ reference as returned by this function). This function will also set
+ $status->{uri_domain_count} (count of unique domains).
+
+ The hash format looks something like this:
+
+ raw_uri => {
+ types => { a => 1, img => 1, parsed => 1 },
+ cleaned => [ canonified_uri ],
+ anchor_text => [ "click here", "no click here" ],
+ domains => { domain1 => 1, domain2 => 1 },
+ }
+
+ "raw_uri" is whatever the URI was in the message itself
+ (http://spamassassin.apache%2Eorg/).
+
+ "types" is a hash of the HTML tags (lowercase) which referenced the
+ raw_uri. *parsed* is a faked type which specifies that the raw_uri
+ was seen in the rendered text.
+
+ "cleaned" is an array of the raw and canonified version of the
+ raw_uri (http://spamassassin.apache%2Eorg/,
+ http://spamassassin.apache.org/).
+
+ "anchor_text" is an array of the anchor text (text between <a> and
+ </a>), if any, which linked to the URI.
+
+ "domains" is a hash of the domains found in the canonified URIs.
+
+ "hosts" is a hash of unstripped hostnames found in the canonified
+ URIs as hash keys, with their domain part stored as a value of each
+ hash entry.
+
+ $status->clear_test_state()
+ Clear test state, including test log messages from
+ "$status->test_log()".
+
+ $status->got_hit ($rulename, $desc_prepend [, name => value, ...])
+ Register a hit against a rule in the ruleset.
+
+ There are two mandatory arguments. These are $rulename, the name of
+ the rule that fired, and $desc_prepend, which is a short string that
+ will be prepended to the rules "describe" string in output reports.
+
+ In addition, callers can supplement that with the following optional
+ data:
+
+ score => $num
+ Optional: the score to use for the rule hit. If unspecified, the
+ value from the "Mail::SpamAssassin::Conf" object's "{scores}"
+ hash will be used (a configured score), and in its absence the
+ "defscore" option value.
+
+ defscore => $num
+ Optional: the score to use for the rule hit if neither the
+ option "score" is provided, nor a configured score value is
+ provided.
+
+ value => $num
+ Optional: the value to assign to the rule; the default value is
+ 1. *tflags multiple* rules use values of greater than 1 to
+ indicate multiple hits. This value is accessible to meta rules.
+
+ ruletype => $type
+ Optional, but recommended: the rule type string. This is used in
+ the "hit_rule" plugin call, called by this method. If unset,
+ *'unknown'* is used.
+
+ tflags => $string
+ Optional: a string, i.e. a space-separated list of additional
+ tflags to be appended to an existing list of flags in
+ $self->{conf}->{tflags}, such as: "nice noautolearn multiple".
+ No syntax checks are performed.
+
+ description => $string
+ Optional: a custom rule description string. This is used in the
+ "hit_rule" plugin call, called by this method. If unset, the
+ static description is used.
+
+ Backward compatibility: the two mandatory arguments have been part
+ of this API since SpamAssassin 2.x. The optional *name=<gt*value>
+ pairs, however, are a new addition in SpamAssassin 3.2.0.
+
+ $status->create_fulltext_tmpfile (fulltext_ref)
+ This function creates a temporary file containing the passed scalar
+ reference data (typically the full/pristine text of the message).
+ This is typically used by external programs like pyzor and dccproc,
+ to avoid hangs due to buffering issues. Methods that need this,
+ should call $self->create_fulltext_tmpfile($fulltext) to retrieve
+ the temporary filename; it will be created if it has not already
+ been.
+
+ Note: This can only be called once until
+ $status->delete_fulltext_tmpfile() is called.
+
+ $status->delete_fulltext_tmpfile ()
+ Will cleanup after a $status->create_fulltext_tmpfile() call.
+ Deletes the temporary file and uncaches the filename.
+
+ all_from_addrs_domains
+ This function returns all the various from addresses in a message
+ using all_from_addrs() and then returns only the domain names.
+
+SEE ALSO
+ "Mail::SpamAssassin" "spamassassin"
+
Added: spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.html
URL: http://svn.apache.org/viewvc/spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.html?rev=1567225&view=auto
==============================================================================
--- spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.html (added)
+++ spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.html Tue Feb 11 17:26:49 2014
@@ -0,0 +1,112 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>Mail::SpamAssassin::PersistentAddrList - persistent address list base class</title>
+<link rev="made" href="mailto:root@twm2005-dev.thoughtworthy.com" />
+</head>
+
+<body style="background-color: white">
+
+<p><a name="__index__"></a></p>
+<!-- INDEX BEGIN -->
+
+<ul>
+
+ <li><a href="#name">NAME</a></li>
+ <li><a href="#synopsis">SYNOPSIS</a></li>
+ <li><a href="#description">DESCRIPTION</a></li>
+ <li><a href="#methods">METHODS</a></li>
+</ul>
+<!-- INDEX END -->
+
+<hr />
+<p>
+</p>
+<h1><a name="name">NAME</a></h1>
+<p>Mail::SpamAssassin::PersistentAddrList - persistent address list base class</p>
+<p>
+</p>
+<hr />
+<h1><a name="synopsis">SYNOPSIS</a></h1>
+<pre>
+ my $factory = PersistentAddrListSubclass->new();
+ $spamtest->set_persistent_addr_list_factory ($factory);
+ ... call into SpamAssassin classes...</pre>
+<p>SpamAssassin will call:</p>
+<pre>
+ my $addrlist = $factory->new_checker($spamtest);
+ $entry = $addrlist->get_addr_entry ($addr);
+ ...</pre>
+<p>
+</p>
+<hr />
+<h1><a name="description">DESCRIPTION</a></h1>
+<p>All persistent address list implementations, used by the auto-whitelist
+code to track known-good email addresses, use this as a base class.</p>
+<p>See <code>Mail::SpamAssassin::DBBasedAddrList</code> for an example.</p>
+<p>
+</p>
+<hr />
+<h1><a name="methods">METHODS</a></h1>
+<dl>
+<dt><strong><a name="item_new">$factory = PersistentAddrListSubclass->new();</a></strong><br />
+</dt>
+<dd>
+This creates a factory object, which SpamAssassin will call to create
+a new checker object for the persistent address list.
+</dd>
+<p></p>
+<dt><strong><a name="item_new_checker">my $addrlist = $factory->new_checker();</a></strong><br />
+</dt>
+<dd>
+Create a new address-list checker object from the factory. Called by the
+SpamAssassin classes.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_addr_entry">$entry = $addrlist->get_addr_entry ($addr);</a></strong><br />
+</dt>
+<dd>
+Given an email address <code>$addr</code>, return an entry object with the details of
+that address.
+</dd>
+<dd>
+<p>The entry object is a reference to a hash, which must contain at least
+two keys: <code>count</code>, which is the count of times that address has been
+encountered before; and <code>totscore</code>, which is the total of all scores for
+messages associated with that address. From these two fields, an average
+score will be calculated, and the score for the current message will be
+regressed towards that mean message score.</p>
+</dd>
+<dd>
+<p>The hash can contain whatever other data your back-end needs to store,
+under other keys.</p>
+</dd>
+<dd>
+<p>The method should never return <code>undef</code>, or a hash that does not contain
+a <code>count</code> key and a <code>totscore</code> key.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_add_score">$entry = $addrlist->add_score($entry, $score);</a></strong><br />
+</dt>
+<dd>
+This method should add the given score to the whitelist database for the
+given entry, and then return the new entry.
+</dd>
+<p></p>
+<dt><strong><a name="item_remove_entry">$entry = $addrlist->remove_entry ($entry);</a></strong><br />
+</dt>
+<dd>
+This method should remove the given entry from the whitelist database.
+</dd>
+<p></p>
+<dt><strong><a name="item_finish">$entry = $addrlist->finish ();</a></strong><br />
+</dt>
+<dd>
+Clean up, if necessary. Called by SpamAssassin when it has finished
+checking, or adding to, the auto-whitelist database.
+</dd>
+<p></p></dl>
+
+</body>
+
+</html>
Added: spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.txt
URL: http://svn.apache.org/viewvc/spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.txt?rev=1567225&view=auto
==============================================================================
--- spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.txt (added)
+++ spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.txt Tue Feb 11 17:26:49 2014
@@ -0,0 +1,59 @@
+NAME
+ Mail::SpamAssassin::PersistentAddrList - persistent address list base
+ class
+
+SYNOPSIS
+ my $factory = PersistentAddrListSubclass->new();
+ $spamtest->set_persistent_addr_list_factory ($factory);
+ ... call into SpamAssassin classes...
+
+ SpamAssassin will call:
+
+ my $addrlist = $factory->new_checker($spamtest);
+ $entry = $addrlist->get_addr_entry ($addr);
+ ...
+
+DESCRIPTION
+ All persistent address list implementations, used by the auto-whitelist
+ code to track known-good email addresses, use this as a base class.
+
+ See "Mail::SpamAssassin::DBBasedAddrList" for an example.
+
+METHODS
+ $factory = PersistentAddrListSubclass->new();
+ This creates a factory object, which SpamAssassin will call to
+ create a new checker object for the persistent address list.
+
+ my $addrlist = $factory->new_checker();
+ Create a new address-list checker object from the factory. Called by
+ the SpamAssassin classes.
+
+ $entry = $addrlist->get_addr_entry ($addr);
+ Given an email address $addr, return an entry object with the
+ details of that address.
+
+ The entry object is a reference to a hash, which must contain at
+ least two keys: "count", which is the count of times that address
+ has been encountered before; and "totscore", which is the total of
+ all scores for messages associated with that address. From these two
+ fields, an average score will be calculated, and the score for the
+ current message will be regressed towards that mean message score.
+
+ The hash can contain whatever other data your back-end needs to
+ store, under other keys.
+
+ The method should never return "undef", or a hash that does not
+ contain a "count" key and a "totscore" key.
+
+ $entry = $addrlist->add_score($entry, $score);
+ This method should add the given score to the whitelist database for
+ the given entry, and then return the new entry.
+
+ $entry = $addrlist->remove_entry ($entry);
+ This method should remove the given entry from the whitelist
+ database.
+
+ $entry = $addrlist->finish ();
+ Clean up, if necessary. Called by SpamAssassin when it has finished
+ checking, or adding to, the auto-whitelist database.
+