You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by km...@apache.org on 2014/02/11 18:26:52 UTC

svn commit: r1567225 [7/15] - in /spamassassin/site/full/3.4.x: ./ doc/

Added: spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.html
URL: http://svn.apache.org/viewvc/spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.html?rev=1567225&view=auto
==============================================================================
--- spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.html (added)
+++ spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.html Tue Feb 11 17:26:49 2014
@@ -0,0 +1,814 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>Mail::SpamAssassin::PerMsgStatus - per-message status</title>
+<link rev="made" href="mailto:root@twm2005-dev.thoughtworthy.com" />
+</head>
+
+<body style="background-color: white">
+
+<p><a name="__index__"></a></p>
+<!-- INDEX BEGIN -->
+
+<ul>
+
+	<li><a href="#name">NAME</a></li>
+	<li><a href="#synopsis">SYNOPSIS</a></li>
+	<li><a href="#description">DESCRIPTION</a></li>
+	<li><a href="#methods">METHODS</a></li>
+	<li><a href="#see_also">SEE ALSO</a></li>
+</ul>
+<!-- INDEX END -->
+
+<hr />
+<p>
+</p>
+<h1><a name="name">NAME</a></h1>
+<p>Mail::SpamAssassin::PerMsgStatus - per-message status (spam or not-spam)</p>
+<p>
+</p>
+<hr />
+<h1><a name="synopsis">SYNOPSIS</a></h1>
+<pre>
+  my $spamtest = new Mail::SpamAssassin ({
+    'rules_filename'      =&gt; '/etc/spamassassin.rules',
+    'userprefs_filename'  =&gt; $ENV{HOME}.'/.spamassassin/user_prefs'
+  });
+  my $mail = $spamtest-&gt;parse();</pre>
+<pre>
+  my $status = $spamtest-&gt;check ($mail);</pre>
+<pre>
+  my $rewritten_mail;
+  if ($status-&gt;is_spam()) {
+    $rewritten_mail = $status-&gt;rewrite_mail ();
+  }
+  ...</pre>
+<p>
+</p>
+<hr />
+<h1><a name="description">DESCRIPTION</a></h1>
+<p>The Mail::SpamAssassin <a href="#item_check"><code>check()</code></a> method returns an object of this
+class.  This object encapsulates all the per-message state.</p>
+<p>
+</p>
+<hr />
+<h1><a name="methods">METHODS</a></h1>
+<dl>
+<dt><strong><a name="item_check">$status-&gt;check ()</a></strong><br />
+</dt>
+<dd>
+Runs the SpamAssassin rules against the message pointed to by the object.
+</dd>
+<p></p>
+<dt><strong><a name="item_learn">$status-&gt;<code>learn()</code></a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called.  If the score
+is outside a certain range around the threshold, ie. if the message is judged
+more-or-less definitely spam or definitely non-spam, it will be fed into
+SpamAssassin's learning systems (currently the naive Bayesian classifier),
+so that future similar mails will be caught.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_autolearn_points">$score = $status-&gt;<code>get_autolearn_points()</code></a></strong><br />
+</dt>
+<dd>
+Return the message's score as computed for auto-learning.  Certain tests are
+ignored:
+</dd>
+<dd>
+<pre>
+  - rules with tflags set to 'learn' (the Bayesian rules)</pre>
+</dd>
+<dd>
+<pre>
+  - rules with tflags set to 'userconf' (user white/black-listing rules, etc)</pre>
+</dd>
+<dd>
+<pre>
+  - rules with tflags set to 'noautolearn'</pre>
+</dd>
+<dd>
+<p>Also note that auto-learning occurs using scores from either scoreset 0 or 1,
+depending on what scoreset is used during message check.  It is likely that the
+message check and auto-learn scores will be different.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_head_only_points">$score = $status-&gt;<code>get_head_only_points()</code></a></strong><br />
+</dt>
+<dd>
+Return the message's score as computed for auto-learning, ignoring
+all rules except for header-based ones.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_learned_points">$score = $status-&gt;<code>get_learned_points()</code></a></strong><br />
+</dt>
+<dd>
+Return the message's score as computed for auto-learning, ignoring
+all rules except for learning-based ones.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_body_only_points">$score = $status-&gt;<code>get_body_only_points()</code></a></strong><br />
+</dt>
+<dd>
+Return the message's score as computed for auto-learning, ignoring
+all rules except for body-based ones.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_autolearn_force_status">$score = $status-&gt;<code>get_autolearn_force_status()</code></a></strong><br />
+</dt>
+<dd>
+Return whether a message's score included any rules that are flagged as 
+autolearn_force.
+
+</dd>
+<dd>
+<pre>
+
+=cut</pre>
+</dd>
+<dd>
+<p>sub get_autolearn_force_status {
+  my ($self) = @_;
+  $self-&gt;_get_autolearn_points();
+  return $self-&gt;{autolearn_force};
+}</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_autolearn_force_names">$rule_names = $status-&gt;<code>get_autolearn_force_names()</code></a></strong><br />
+</dt>
+<dd>
+Return a list of comma separated list of rule names if a message's 
+score included any rules that are flagged as autolearn_force.
+
+</dd>
+<dd>
+<pre>
+
+=cut</pre>
+</dd>
+<dd>
+<p>sub get_autolearn_force_names {
+  my ($self) = @_;
+  my ($names);</p>
+</dd>
+<dd>
+<pre>
+  $self-&gt;_get_autolearn_points();
+  $names = $self-&gt;{autolearn_force_names};
+ 
+  if (defined $names) { 
+    #remove trailing comma
+    $names =~ s/,$//;
+  } else {
+    $names = &quot;&quot;;
+  }</pre>
+</dd>
+<dd>
+<pre>
+  return $names;
+}</pre>
+</dd>
+<dd>
+<p>sub _get_autolearn_points {
+  my ($self) = @_;</p>
+</dd>
+<dd>
+<pre>
+  return if (exists $self-&gt;{autolearn_points});
+  # ensure it only gets computed once, even if we return early
+  $self-&gt;{autolearn_points} = 0;</pre>
+</dd>
+<dd>
+<pre>
+  # This function needs to use use sum($score[scoreset % 2]) not just {score}.
+  # otherwise we shift what we autolearn on and it gets really wierd.  - tvd
+  my $orig_scoreset = $self-&gt;{conf}-&gt;get_score_set();
+  my $new_scoreset = $orig_scoreset;
+  my $scores = $self-&gt;{conf}-&gt;{scores};</pre>
+</dd>
+<dd>
+<pre>
+  if (($orig_scoreset &amp; 2) == 0) { # we don't need to recompute
+    dbg(&quot;learn: auto-learn: currently using scoreset $orig_scoreset&quot;);
+  }
+  else {
+    $new_scoreset = $orig_scoreset &amp; ~2;
+    dbg(&quot;learn: auto-learn: currently using scoreset $orig_scoreset, recomputing score based on scoreset $new_scoreset&quot;);
+    $scores = $self-&gt;{conf}-&gt;{scoreset}-&gt;[$new_scoreset];
+  }</pre>
+</dd>
+<dd>
+<pre>
+  my $tflags = $self-&gt;{conf}-&gt;{tflags};
+  my $points = 0;</pre>
+</dd>
+<dd>
+<pre>
+  # Just in case this function is called multiple times, clear out the
+  # previous calculated values
+  $self-&gt;{learned_points} = 0;
+  $self-&gt;{body_only_points} = 0;
+  $self-&gt;{head_only_points} = 0;
+  $self-&gt;{autolearn_force} = 0;</pre>
+</dd>
+<dd>
+<pre>
+  foreach my $test (@{$self-&gt;{test_names_hit}}) {
+    # According to the documentation, noautolearn, userconf, and learn
+    # rules are ignored for autolearning.
+    if (exists $tflags-&gt;{$test}) {
+      next if $tflags-&gt;{$test} =~ /\bnoautolearn\b/;
+      next if $tflags-&gt;{$test} =~ /\buserconf\b/;</pre>
+</dd>
+<dd>
+<pre>
+      # Keep track of the learn points for an additional autolearn check.
+      # Use the original scoreset since it'll be 0 in sets 0 and 1.
+      if ($tflags-&gt;{$test} =~ /\blearn\b/) {
+        # we're guaranteed that the score will be defined
+        $self-&gt;{learned_points} += $self-&gt;{conf}-&gt;{scoreset}-&gt;[$orig_scoreset]-&gt;{$test};
+        next;
+      }
+    
+      #IF ANY RULES ARE AUTOLEARN FORCE, SET THAT FLAG  
+      if ($tflags-&gt;{$test} =~ /\bautolearn_force\b/) {
+        $self-&gt;{autolearn_force}++;
+        #ADD RULE NAME TO LIST
+        $self-&gt;{autolearn_force_names}.=&quot;$test,&quot;;
+      }
+    }</pre>
+</dd>
+<dd>
+<pre>
+    # ignore tests with 0 score (or undefined) in this scoreset
+    next if !$scores-&gt;{$test};</pre>
+</dd>
+<dd>
+<pre>
+    # Go ahead and add points to the proper locations 
+    # Changed logic because in testing, I was getting both head and body. Bug 5503
+    if ($self-&gt;{conf}-&gt;maybe_header_only ($test)) {
+      $self-&gt;{head_only_points} += $scores-&gt;{$test};
+      dbg(&quot;learn: auto-learn: adding head_only points $scores-&gt;{$test}&quot;);
+    } elsif ($self-&gt;{conf}-&gt;maybe_body_only ($test)) {
+      $self-&gt;{body_only_points} += $scores-&gt;{$test};
+      dbg(&quot;learn: auto-learn: adding body_only points $scores-&gt;{$test}&quot;);
+    } else {
+      dbg(&quot;learn: auto-learn: not considered head or body scores: $scores-&gt;{$test}&quot;);
+    }</pre>
+</dd>
+<dd>
+<pre>
+    $points += $scores-&gt;{$test};
+  }</pre>
+</dd>
+<dd>
+<pre>
+  # Figure out the final value we'll use for autolearning
+  $points = (sprintf &quot;%0.3f&quot;, $points) + 0;
+  dbg(&quot;learn: auto-learn: message score: &quot;.$self-&gt;{score}.&quot;, computed score for autolearn: $points&quot;);</pre>
+</dd>
+<dd>
+<pre>
+  $self-&gt;{autolearn_points} = $points;
+}</pre>
+</dd>
+<dd>
+<p>###########################################################################</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_is_spam">$isspam = $status-&gt;is_spam ()</a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called.  It will
+return 1 for mail determined likely to be spam, 0 if it does not seem
+spam-like.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_names_of_tests_hit">$list = $status-&gt;get_names_of_tests_hit ()</a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called. It will
+return a comma-separated string, listing all the symbolic test names
+of the tests which were trigged by the mail.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_names_of_subtests_hit">$list = $status-&gt;get_names_of_subtests_hit ()</a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called.  It will
+return a comma-separated string, listing all the symbolic test names of the
+meta-rule sub-tests which were trigged by the mail.  Sub-tests are the
+normally-hidden rules, which score 0 and have names beginning with two
+underscores, used in meta rules.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_score">$num = $status-&gt;get_score ()</a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called.  It will
+return the message's score.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_required_score">$num = $status-&gt;get_required_score ()</a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called.  It will
+return the score required for a mail to be considered spam.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_autolearn_status">$num = $status-&gt;get_autolearn_status ()</a></strong><br />
+</dt>
+<dd>
+After a mail message has been checked, this method can be called.  It will
+return one of the following strings depending on whether the mail was
+auto-learned or not: ``ham'', ``no'', ``spam'', ``disabled'', ``failed'', ``unavailable''.
+</dd>
+<dd>
+<p>It also returns is flagged with auto_learn_force, it will also include the status
+and the rules hit.  For example: ``autolearn_force=yes (AUTOLEARNTEST_BODY)''</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_report">$report = $status-&gt;get_report ()</a></strong><br />
+</dt>
+<dd>
+Deliver a ``spam report'' on the checked mail message.  This contains details of
+how many spam detection rules it triggered.
+</dd>
+<dd>
+<p>The report is returned as a multi-line string, with the lines separated by
+<code>\n</code> characters.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_content_preview">$preview = $status-&gt;get_content_preview ()</a></strong><br />
+</dt>
+<dd>
+Give a ``preview'' of the content.
+</dd>
+<dd>
+<p>This is returned as a multi-line string, with the lines separated by <code>\n</code>
+characters, containing a fully-decoded, safe, plain-text sample of the first
+few lines of the message body.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_message">$msg = $status-&gt;<code>get_message()</code></a></strong><br />
+</dt>
+<dd>
+Return the object representing the message being scanned.
+</dd>
+<p></p>
+<dt><strong><a name="item_rewrite_mail">$status-&gt;rewrite_mail ()</a></strong><br />
+</dt>
+<dd>
+Rewrite the mail message.  This will at minimum add headers, and at
+maximum MIME-encapsulate the message text, to reflect its spam or not-spam
+status.  The function will return a scalar of the rewritten message.
+</dd>
+<dd>
+<p>The actual modifications depend on the configuration (see
+<code>Mail::SpamAssassin::Conf</code> for more information).</p>
+</dd>
+<dd>
+<p>The possible modifications are as follows:</p>
+</dd>
+<dl>
+<dt><strong><a name="item_to_3a_2c_from_3a_and_subject_3a_modification_on_sp">To:, From: and Subject: modification on spam mails</a></strong><br />
+</dt>
+<dd>
+Depending on the configuration, the To: and From: lines can have a
+user-defined RFC 2822 comment appended for spam mail. The subject line
+may have a user-defined string prepended to it for spam mail.
+</dd>
+<p></p>
+<dt><strong><a name="item_x_2dspam_2d_2a_headers_for_all_mails">X-Spam-* headers for all mails</a></strong><br />
+</dt>
+<dd>
+Depending on the configuration, zero or more headers with names
+beginning with <code>X-Spam-</code> will be added to mail depending on whether
+it is spam or ham.
+</dd>
+<p></p>
+<dt><strong><a name="item_spam_message_with_report_safe">spam message with report_safe</a></strong><br />
+</dt>
+<dd>
+If report_safe is set to true (1), then spam messages are encapsulated
+into their own message/rfc822 MIME attachment without any modifications
+being made.
+</dd>
+<dd>
+<p>If report_safe is set to false (0), then the message will only have the
+above headers added/modified.</p>
+</dd>
+<p></p></dl>
+<dt><strong><a name="item_action_depends_on_tags">$status-&gt;action_depends_on_tags($tags, $code, @args)</a></strong><br />
+</dt>
+<dd>
+Enqueue the supplied subroutine reference <code>$code</code>, to become runnable when
+all the specified tags become available. The <code>$tags</code> may be a simple
+scalar - a tag name, or a listref of tag names. The subroutine <code>&amp;$code</code>
+when called will be passed a <code>permessagestatus</code> object as its first argument,
+followed by the supplied (optional) list <code>@args</code> .
+</dd>
+<p></p>
+<dt><strong><a name="item_set_tag">$status-&gt;set_tag($tagname, $value)</a></strong><br />
+</dt>
+<dd>
+Set a template tag, as used in <code>add_header</code>, report templates, etc.
+This API is intended for use by plugins.  Tag names will be converted
+to an all-uppercase representation internally.
+</dd>
+<dd>
+<p><code>$value</code> can be a simple scalar (string or number), or a reference to an
+array, in which case the public method get_tag will join array elements
+using a space as a separator, returning a single string for backward
+compatibility.</p>
+</dd>
+<dd>
+<p><code>$value</code> can also be a subroutine reference, which will be evaluated
+each time the template is expanded. The first argument passed by get_tag
+to a called subroutine will be a PerMsgStatus object (this module's object),
+followed by optional arguments provided a caller to get_tag.</p>
+</dd>
+<dd>
+<p>Note that perl supports closures, which means that variables set in the
+caller's scope can be accessed inside this <code>sub</code>. For example:</p>
+</dd>
+<dd>
+<pre>
+    my $text = &quot;hello world!&quot;;
+    $status-&gt;set_tag(&quot;FOO&quot;, sub {
+              my $pms = shift;
+              return $text;
+            });</pre>
+</dd>
+<dd>
+<p>See <code>Mail::SpamAssassin::Conf</code>'s <code>TEMPLATE TAGS</code> section for more details
+on how template tags are used.</p>
+</dd>
+<dd>
+<p><code>undef</code> will be returned if a tag by that name has not been defined.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_tag">$string = $status-&gt;<code>get_tag($tagname)</code></a></strong><br />
+</dt>
+<dd>
+Get the current value of a template tag, as used in <code>add_header</code>, report
+templates, etc. This API is intended for use by plugins.  Tag names will be
+converted to an all-uppercase representation internally.  See
+<code>Mail::SpamAssassin::Conf</code>'s <code>TEMPLATE TAGS</code> section for more details on
+tags.
+</dd>
+<dd>
+<p><code>undef</code> will be returned if a tag by that name has not been defined.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_tag_raw">$string = $status-&gt;get_tag_raw($tagname, @args)</a></strong><br />
+</dt>
+<dd>
+Similar to <a href="#item_get_tag"><code>get_tag</code></a>, but keeps a tag name unchanged (does not uppercase it),
+and does not convert arrayref tag values into a single string.
+</dd>
+<p></p>
+<dt><strong><a name="item_set_spamd_result_item">$status-&gt;<code>set_spamd_result_item($subref)</code></a></strong><br />
+</dt>
+<dd>
+Set an entry for the spamd result log line.  <code>$subref</code> should be a code
+reference for a subroutine which will return a string in <code>'name=VALUE'</code>
+format, similar to the other entries in the spamd result line:
+</dd>
+<dd>
+<pre>
+  Jul 17 14:10:47 radish spamd[16670]: spamd: result: Y 22 - ALL_NATURAL,
+  DATE_IN_FUTURE_03_06,DIET_1,DRUGS_ERECTILE,DRUGS_PAIN,
+  TEST_FORGED_YAHOO_RCVD,TEST_INVALID_DATE,TEST_NOREALNAME,
+  TEST_NORMAL_HTTP_TO_IP,UNDISC_RECIPS scantime=0.4,size=3138,user=jm,
+  uid=1000,required_score=5.0,rhost=localhost,raddr=127.0.0.1,
+  rport=33153,mid=&lt;9PS291LhupY&gt;,autolearn=spam</pre>
+</dd>
+<dd>
+<p><code>name</code> and <code>VALUE</code> must not contain <code>=</code> or <code>,</code> characters, as it
+is important that these log lines are easy to parse.</p>
+</dd>
+<dd>
+<p>The code reference will be called by spamd after the message has been scanned,
+and the <a href="#item_check"><code>PerMsgStatus::check()</code></a> method has returned.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_finish">$status-&gt;finish ()</a></strong><br />
+</dt>
+<dd>
+Indicate that this <code>$status</code> object is finished with, and can be destroyed.
+</dd>
+<dd>
+<p>If you are using SpamAssassin in a persistent environment, or checking many
+mail messages from one <code>Mail::SpamAssassin</code> factory, this method should be
+called to ensure Perl's garbage collection will clean up old status objects.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_current_eval_rule_name">$name = $status-&gt;<code>get_current_eval_rule_name()</code></a></strong><br />
+</dt>
+<dd>
+Return the name of the currently-running eval rule.  <code>undef</code> is
+returned if no eval rule is currently being run.  Useful for plugins
+to determine the current rule name while inside an eval test function
+call.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_decoded_body_text_array">$status-&gt;get_decoded_body_text_array ()</a></strong><br />
+</dt>
+<dd>
+Returns the message body, with <strong>base64</strong> or <strong>quoted-printable</strong> encodings
+decoded, and non-text parts or non-inline attachments stripped.
+</dd>
+<dd>
+<p>It is returned as an array of strings, with each string representing
+one newline-separated line of the body.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_decoded_stripped_body_text_array">$status-&gt;get_decoded_stripped_body_text_array ()</a></strong><br />
+</dt>
+<dd>
+Returns the message body, decoded (as described in
+get_decoded_body_text_array()), with HTML rendered, and with whitespace
+normalized.
+</dd>
+<dd>
+<p>It will always render text/html, and will use a heuristic to determine if other
+text/* parts should be considered text/html.</p>
+</dd>
+<dd>
+<p>It is returned as an array of strings, with each string representing one
+'paragraph'.  Paragraphs, in plain-text mails, are double-newline-separated
+blocks of multi-line text.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get">$status-&gt;get (header_name [, default_value])</a></strong><br />
+</dt>
+<dd>
+Returns a message header, pseudo-header, real name or address.
+<code>header_name</code> is the name of a mail header, such as 'Subject', 'To',
+etc.  If <code>default_value</code> is given, it will be used if the requested
+<code>header_name</code> does not exist.
+</dd>
+<dd>
+<p>Appending <code>:raw</code> to the header name will inhibit decoding of quoted-printable
+or base-64 encoded strings.</p>
+</dd>
+<dd>
+<p>Appending a modifier <code>:addr</code> to a header field name will cause everything
+except the first email address to be removed from the header field.  It is
+mainly applicable to header fields 'From', 'Sender', 'To', 'Cc' along with
+their 'Resent-*' counterparts, and the 'Return-Path'. For example, all of
+the following will result in ``example@foo'':</p>
+</dd>
+<dl>
+<dt><strong><a name="item_example_40foo">example@foo</a></strong><br />
+</dt>
+<dt><strong><a name="item_foo">example@foo (Foo Blah)</a></strong><br />
+</dt>
+<dt><strong><a name="item_example_40foo_2c_example_40bar">example@foo, example@bar</a></strong><br />
+</dt>
+<dt><strong>display: example@foo (Foo Blah), example@bar ;</strong><br />
+</dt>
+<dt><strong><a name="item_foo_blah__3cexample_40foo_3e">Foo Blah &lt;example@foo&gt;</a></strong><br />
+</dt>
+<dt><strong><a name="item__22foo_blah_22__3cexample_40foo_3e">``Foo Blah'' &lt;example@foo&gt;</a></strong><br />
+</dt>
+<dt><strong><a name="item__22_27foo_blah_27_22__3cexample_40foo_3e">``'Foo Blah''' &lt;example@foo&gt;</a></strong><br />
+</dt>
+</dl>
+<p>Appending a modifier <code>:name</code> to a header field name will cause everything
+except the first display name to be removed from the header field. It is
+mainly applicable to header fields containing a single mail address: 'From',
+'Sender', along with their 'Resent-From' and 'Resent-Sender' counterparts.
+For example, all of the following will result in ``Foo Blah''. One level of
+single quotes is stripped too, as it is often seen.</p>
+<dl>
+<dt><strong>example@foo (Foo Blah)</strong><br />
+</dt>
+<dt><strong>example@foo (Foo Blah), example@bar</strong><br />
+</dt>
+<dt><strong>display: example@foo (Foo Blah), example@bar ;</strong><br />
+</dt>
+<dt><strong>Foo Blah &lt;example@foo&gt;</strong><br />
+</dt>
+<dt><strong>``Foo Blah'' &lt;example@foo&gt;</strong><br />
+</dt>
+<dt><strong>``'Foo Blah''' &lt;example@foo&gt;</strong><br />
+</dt>
+</dl>
+<p>There are several special pseudo-headers that can be specified:</p>
+<dl>
+<dt><strong><a name="item_all_can_be_used_to_mean_the_text_of_all_the_messag"><code>ALL</code> can be used to mean the text of all the message's headers.</a></strong><br />
+</dt>
+<dt><strong><a name="item_all_2dtrusted_can_be_used_to_mean_the_text_of_all_"><code>ALL-TRUSTED</code> can be used to mean the text of all the message's headers
+that could only have been added by trusted relays.</a></strong><br />
+</dt>
+<dt><strong><a name="item_all_2dinternal_can_be_used_to_mean_the_text_of_all"><code>ALL-INTERNAL</code> can be used to mean the text of all the message's headers
+that could only have been added by internal relays.</a></strong><br />
+</dt>
+<dt><strong><a name="item_all_2duntrusted_can_be_used_to_mean_the_text_of_al"><code>ALL-UNTRUSTED</code> can be used to mean the text of all the message's
+headers that may have been added by untrusted relays.  To make this
+pseudo-header more useful for header rules the 'Received' header that was added
+by the last trusted relay is included, even though it can be trusted.</a></strong><br />
+</dt>
+<dt><strong><a name="item_all_2dexternal_can_be_used_to_mean_the_text_of_all"><code>ALL-EXTERNAL</code> can be used to mean the text of all the message's headers
+that may have been added by external relays.  Like <code>ALL-UNTRUSTED</code> the
+'Received' header added by the last internal relay is included.</a></strong><br />
+</dt>
+<dt><strong><a name="item_tocc_can_be_used_to_mean_the_contents_of_both_the_"><code>ToCc</code> can be used to mean the contents of both the 'To' and 'Cc'
+headers.</a></strong><br />
+</dt>
+<dt><strong><a name="item_envelopefrom_is_the_address_used_in_the__27mail_fr"><code>EnvelopeFrom</code> is the address used in the 'MAIL FROM:' phase of the SMTP
+transaction that delivered this message, if this data has been made available
+by the SMTP server.</a></strong><br />
+</dt>
+<dt><strong><a name="item_messageid_is_a_symbol_meaning_all_message_2did_27s"><code>MESSAGEID</code> is a symbol meaning all Message-Id's found in the message;
+some mailing list software moves the real 'Message-Id' to 'Resent-Message-Id'
+or 'X-Message-Id', then uses its own one in the 'Message-Id' header.  The value
+returned for this symbol is the text from all 3 headers, separated by newlines.</a></strong><br />
+</dt>
+<dt><strong><a name="item_x_2dspam_2drelays_2duntrusted_is_the_generated_met"><code>X-Spam-Relays-Untrusted</code> is the generated metadata of untrusted relays
+the message has passed through</a></strong><br />
+</dt>
+<dt><strong><a name="item_x_2dspam_2drelays_2dtrusted_is_the_generated_metad"><code>X-Spam-Relays-Trusted</code> is the generated metadata of trusted relays
+the message has passed through</a></strong><br />
+</dt>
+</dl>
+<dt><strong><a name="item_get_uri_list">$status-&gt;get_uri_list ()</a></strong><br />
+</dt>
+<dd>
+Returns an array of all unique URIs found in the message.  It takes
+a combination of the URIs found in the rendered (decoded and HTML
+stripped) body and the URIs found when parsing the HTML in the message.
+Will also set $status-&gt;{uri_list} (the array as returned by this function).
+</dd>
+<dd>
+<p>The returned array will include the ``raw'' URI as well as
+``slightly cooked'' versions.  For example, the single URI
+'http://%77&amp;#00119;%77.example.com/' will get turned into:
+( 'http://%77&amp;#00119;%77.example.com/', 'http://www.example.com/' )</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_get_uri_detail_list">$status-&gt;get_uri_detail_list ()</a></strong><br />
+</dt>
+<dd>
+Returns a hash reference of all unique URIs found in the message and
+various data about where the URIs were found in the message.  It takes a
+combination of the URIs found in the rendered (decoded and HTML stripped)
+body and the URIs found when parsing the HTML in the message.  Will also
+set $status-&gt;{uri_detail_list} (the hash reference as returned by this
+function).  This function will also set $status-&gt;{uri_domain_count} (count of
+unique domains).
+</dd>
+<dd>
+<p>The hash format looks something like this:</p>
+</dd>
+<dd>
+<pre>
+  raw_uri =&gt; {
+    types =&gt; { a =&gt; 1, img =&gt; 1, parsed =&gt; 1 },
+    cleaned =&gt; [ canonified_uri ],
+    anchor_text =&gt; [ &quot;click here&quot;, &quot;no click here&quot; ],
+    domains =&gt; { domain1 =&gt; 1, domain2 =&gt; 1 },
+  }</pre>
+</dd>
+<dd>
+<p><code>raw_uri</code> is whatever the URI was in the message itself
+(http://spamassassin.apache%2Eorg/).</p>
+</dd>
+<dd>
+<p><code>types</code> is a hash of the HTML tags (lowercase) which referenced
+the raw_uri.  <em>parsed</em> is a faked type which specifies that the
+raw_uri was seen in the rendered text.</p>
+</dd>
+<dd>
+<p><code>cleaned</code> is an array of the raw and canonified version of the raw_uri
+(http://spamassassin.apache%2Eorg/, <a href="http://spamassassin.apache.org/).">http://spamassassin.apache.org/).</a></p>
+</dd>
+<dd>
+<p><code>anchor_text</code> is an array of the anchor text (text between &lt;a&gt; and
+&lt;/a&gt;), if any, which linked to the URI.</p>
+</dd>
+<dd>
+<p><code>domains</code> is a hash of the domains found in the canonified URIs.</p>
+</dd>
+<dd>
+<p><code>hosts</code> is a hash of unstripped hostnames found in the canonified URIs
+as hash keys, with their domain part stored as a value of each hash entry.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_clear_test_state">$status-&gt;<code>clear_test_state()</code></a></strong><br />
+</dt>
+<dd>
+Clear test state, including test log messages from <code>$status-&gt;test_log()</code>.
+</dd>
+<p></p>
+<dt><strong><a name="item_got_hit">$status-&gt;got_hit ($rulename, $desc_prepend [, name =&gt; value, ...])</a></strong><br />
+</dt>
+<dd>
+Register a hit against a rule in the ruleset.
+</dd>
+<dd>
+<p>There are two mandatory arguments. These are <code>$rulename</code>, the name of the rule
+that fired, and <code>$desc_prepend</code>, which is a short string that will be
+prepended to the rules <code>describe</code> string in output reports.</p>
+</dd>
+<dd>
+<p>In addition, callers can supplement that with the following optional
+data:</p>
+</dd>
+<dl>
+<dt><strong><a name="item_score__3d_3e__24num">score =&gt; $num</a></strong><br />
+</dt>
+<dd>
+Optional: the score to use for the rule hit.  If unspecified,
+the value from the <code>Mail::SpamAssassin::Conf</code> object's <code>{scores}</code>
+hash will be used (a configured score), and in its absence the
+<code>defscore</code> option value.
+</dd>
+<p></p>
+<dt><strong><a name="item_defscore__3d_3e__24num">defscore =&gt; $num</a></strong><br />
+</dt>
+<dd>
+Optional: the score to use for the rule hit if neither the
+option <code>score</code> is provided, nor a configured score value is provided.
+</dd>
+<p></p>
+<dt><strong><a name="item_value__3d_3e__24num">value =&gt; $num</a></strong><br />
+</dt>
+<dd>
+Optional: the value to assign to the rule; the default value is <code>1</code>.
+<em>tflags multiple</em> rules use values of greater than 1 to indicate
+multiple hits.  This value is accessible to meta rules.
+</dd>
+<p></p>
+<dt><strong><a name="item_ruletype__3d_3e__24type">ruletype =&gt; $type</a></strong><br />
+</dt>
+<dd>
+Optional, but recommended: the rule type string.  This is used in the
+<code>hit_rule</code> plugin call, called by this method.  If unset, <em>'unknown'</em> is
+used.
+</dd>
+<p></p>
+<dt><strong><a name="item_tflags__3d_3e__24string">tflags =&gt; $string</a></strong><br />
+</dt>
+<dd>
+Optional: a string, i.e. a space-separated list of additional tflags
+to be appended to an existing list of flags in $self-&gt;{conf}-&gt;{tflags},
+such as: ``nice noautolearn multiple''. No syntax checks are performed.
+</dd>
+<p></p>
+<dt><strong><a name="item_description__3d_3e__24string">description =&gt; $string</a></strong><br />
+</dt>
+<dd>
+Optional: a custom rule description string.  This is used in the
+<code>hit_rule</code> plugin call, called by this method. If unset, the static
+description is used.
+</dd>
+<p></p></dl>
+<p>Backward compatibility: the two mandatory arguments have been part of this API
+since SpamAssassin 2.x.  The optional <em>name=&lt;gt</em>value&gt; pairs, however, are a
+new addition in SpamAssassin 3.2.0.</p>
+<dt><strong><a name="item_create_fulltext_tmpfile">$status-&gt;create_fulltext_tmpfile (fulltext_ref)</a></strong><br />
+</dt>
+<dd>
+This function creates a temporary file containing the passed scalar
+reference data (typically the full/pristine text of the message).
+This is typically used by external programs like pyzor and dccproc, to
+avoid hangs due to buffering issues.   Methods that need this, should
+call $self-&gt;<a href="#item_create_fulltext_tmpfile"><code>create_fulltext_tmpfile($fulltext)</code></a> to retrieve the temporary
+filename; it will be created if it has not already been.
+</dd>
+<dd>
+<p>Note: This can only be called once until $status-&gt;<a href="#item_delete_fulltext_tmpfile"><code>delete_fulltext_tmpfile()</code></a> is
+called.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_delete_fulltext_tmpfile">$status-&gt;delete_fulltext_tmpfile ()</a></strong><br />
+</dt>
+<dd>
+Will cleanup after a $status-&gt;<a href="#item_create_fulltext_tmpfile"><code>create_fulltext_tmpfile()</code></a> call.  Deletes the
+temporary file and uncaches the filename.
+</dd>
+<p></p>
+<dt><strong><a name="item_all_from_addrs_domains">all_from_addrs_domains</a></strong><br />
+</dt>
+<dd>
+This function returns all the various from addresses in a message using <code>all_from_addrs()</code> 
+and then returns only the domain names.
+</dd>
+<p></p></dl>
+<p>
+</p>
+<hr />
+<h1><a name="see_also">SEE ALSO</a></h1>
+<p><code>Mail::SpamAssassin</code>
+<code>spamassassin</code>
+
+</p>
+
+</body>
+
+</html>

Added: spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.txt
URL: http://svn.apache.org/viewvc/spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.txt?rev=1567225&view=auto
==============================================================================
--- spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.txt (added)
+++ spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PerMsgStatus.txt Tue Feb 11 17:26:49 2014
@@ -0,0 +1,450 @@
+NAME
+    Mail::SpamAssassin::PerMsgStatus - per-message status (spam or not-spam)
+
+SYNOPSIS
+      my $spamtest = new Mail::SpamAssassin ({
+        'rules_filename'      => '/etc/spamassassin.rules',
+        'userprefs_filename'  => $ENV{HOME}.'/.spamassassin/user_prefs'
+      });
+      my $mail = $spamtest->parse();
+
+      my $status = $spamtest->check ($mail);
+
+      my $rewritten_mail;
+      if ($status->is_spam()) {
+        $rewritten_mail = $status->rewrite_mail ();
+      }
+      ...
+
+DESCRIPTION
+    The Mail::SpamAssassin "check()" method returns an object of this class.
+    This object encapsulates all the per-message state.
+
+METHODS
+    $status->check ()
+        Runs the SpamAssassin rules against the message pointed to by the
+        object.
+
+    $status->learn()
+        After a mail message has been checked, this method can be called. If
+        the score is outside a certain range around the threshold, ie. if
+        the message is judged more-or-less definitely spam or definitely
+        non-spam, it will be fed into SpamAssassin's learning systems
+        (currently the naive Bayesian classifier), so that future similar
+        mails will be caught.
+
+    $score = $status->get_autolearn_points()
+        Return the message's score as computed for auto-learning. Certain
+        tests are ignored:
+
+          - rules with tflags set to 'learn' (the Bayesian rules)
+
+          - rules with tflags set to 'userconf' (user white/black-listing rules, etc)
+
+          - rules with tflags set to 'noautolearn'
+
+        Also note that auto-learning occurs using scores from either
+        scoreset 0 or 1, depending on what scoreset is used during message
+        check. It is likely that the message check and auto-learn scores
+        will be different.
+
+    $score = $status->get_head_only_points()
+        Return the message's score as computed for auto-learning, ignoring
+        all rules except for header-based ones.
+
+    $score = $status->get_learned_points()
+        Return the message's score as computed for auto-learning, ignoring
+        all rules except for learning-based ones.
+
+    $score = $status->get_body_only_points()
+        Return the message's score as computed for auto-learning, ignoring
+        all rules except for body-based ones.
+
+    $score = $status->get_autolearn_force_status()
+        Return whether a message's score included any rules that are flagged
+        as autolearn_force.
+
+    $rule_names = $status->get_autolearn_force_names()
+        Return a list of comma separated list of rule names if a message's
+        score included any rules that are flagged as autolearn_force.
+
+    $isspam = $status->is_spam ()
+        After a mail message has been checked, this method can be called. It
+        will return 1 for mail determined likely to be spam, 0 if it does
+        not seem spam-like.
+
+    $list = $status->get_names_of_tests_hit ()
+        After a mail message has been checked, this method can be called. It
+        will return a comma-separated string, listing all the symbolic test
+        names of the tests which were trigged by the mail.
+
+    $list = $status->get_names_of_subtests_hit ()
+        After a mail message has been checked, this method can be called. It
+        will return a comma-separated string, listing all the symbolic test
+        names of the meta-rule sub-tests which were trigged by the mail.
+        Sub-tests are the normally-hidden rules, which score 0 and have
+        names beginning with two underscores, used in meta rules.
+
+    $num = $status->get_score ()
+        After a mail message has been checked, this method can be called. It
+        will return the message's score.
+
+    $num = $status->get_required_score ()
+        After a mail message has been checked, this method can be called. It
+        will return the score required for a mail to be considered spam.
+
+    $num = $status->get_autolearn_status ()
+        After a mail message has been checked, this method can be called. It
+        will return one of the following strings depending on whether the
+        mail was auto-learned or not: "ham", "no", "spam", "disabled",
+        "failed", "unavailable".
+
+        It also returns is flagged with auto_learn_force, it will also
+        include the status and the rules hit. For example:
+        "autolearn_force=yes (AUTOLEARNTEST_BODY)"
+
+    $report = $status->get_report ()
+        Deliver a "spam report" on the checked mail message. This contains
+        details of how many spam detection rules it triggered.
+
+        The report is returned as a multi-line string, with the lines
+        separated by "\n" characters.
+
+    $preview = $status->get_content_preview ()
+        Give a "preview" of the content.
+
+        This is returned as a multi-line string, with the lines separated by
+        "\n" characters, containing a fully-decoded, safe, plain-text sample
+        of the first few lines of the message body.
+
+    $msg = $status->get_message()
+        Return the object representing the message being scanned.
+
+    $status->rewrite_mail ()
+        Rewrite the mail message. This will at minimum add headers, and at
+        maximum MIME-encapsulate the message text, to reflect its spam or
+        not-spam status. The function will return a scalar of the rewritten
+        message.
+
+        The actual modifications depend on the configuration (see
+        "Mail::SpamAssassin::Conf" for more information).
+
+        The possible modifications are as follows:
+
+        To:, From: and Subject: modification on spam mails
+            Depending on the configuration, the To: and From: lines can have
+            a user-defined RFC 2822 comment appended for spam mail. The
+            subject line may have a user-defined string prepended to it for
+            spam mail.
+
+        X-Spam-* headers for all mails
+            Depending on the configuration, zero or more headers with names
+            beginning with "X-Spam-" will be added to mail depending on
+            whether it is spam or ham.
+
+        spam message with report_safe
+            If report_safe is set to true (1), then spam messages are
+            encapsulated into their own message/rfc822 MIME attachment
+            without any modifications being made.
+
+            If report_safe is set to false (0), then the message will only
+            have the above headers added/modified.
+
+    $status->action_depends_on_tags($tags, $code, @args)
+        Enqueue the supplied subroutine reference $code, to become runnable
+        when all the specified tags become available. The $tags may be a
+        simple scalar - a tag name, or a listref of tag names. The
+        subroutine &$code when called will be passed a "permessagestatus"
+        object as its first argument, followed by the supplied (optional)
+        list @args .
+
+    $status->set_tag($tagname, $value)
+        Set a template tag, as used in "add_header", report templates, etc.
+        This API is intended for use by plugins. Tag names will be converted
+        to an all-uppercase representation internally.
+
+        $value can be a simple scalar (string or number), or a reference to
+        an array, in which case the public method get_tag will join array
+        elements using a space as a separator, returning a single string for
+        backward compatibility.
+
+        $value can also be a subroutine reference, which will be evaluated
+        each time the template is expanded. The first argument passed by
+        get_tag to a called subroutine will be a PerMsgStatus object (this
+        module's object), followed by optional arguments provided a caller
+        to get_tag.
+
+        Note that perl supports closures, which means that variables set in
+        the caller's scope can be accessed inside this "sub". For example:
+
+            my $text = "hello world!";
+            $status->set_tag("FOO", sub {
+                      my $pms = shift;
+                      return $text;
+                    });
+
+        See "Mail::SpamAssassin::Conf"'s "TEMPLATE TAGS" section for more
+        details on how template tags are used.
+
+        "undef" will be returned if a tag by that name has not been defined.
+
+    $string = $status->get_tag($tagname)
+        Get the current value of a template tag, as used in "add_header",
+        report templates, etc. This API is intended for use by plugins. Tag
+        names will be converted to an all-uppercase representation
+        internally. See "Mail::SpamAssassin::Conf"'s "TEMPLATE TAGS" section
+        for more details on tags.
+
+        "undef" will be returned if a tag by that name has not been defined.
+
+    $string = $status->get_tag_raw($tagname, @args)
+        Similar to "get_tag", but keeps a tag name unchanged (does not
+        uppercase it), and does not convert arrayref tag values into a
+        single string.
+
+    $status->set_spamd_result_item($subref)
+        Set an entry for the spamd result log line. $subref should be a code
+        reference for a subroutine which will return a string in
+        'name=VALUE' format, similar to the other entries in the spamd
+        result line:
+
+          Jul 17 14:10:47 radish spamd[16670]: spamd: result: Y 22 - ALL_NATURAL,
+          DATE_IN_FUTURE_03_06,DIET_1,DRUGS_ERECTILE,DRUGS_PAIN,
+          TEST_FORGED_YAHOO_RCVD,TEST_INVALID_DATE,TEST_NOREALNAME,
+          TEST_NORMAL_HTTP_TO_IP,UNDISC_RECIPS scantime=0.4,size=3138,user=jm,
+          uid=1000,required_score=5.0,rhost=localhost,raddr=127.0.0.1,
+          rport=33153,mid=<9PS291LhupY>,autolearn=spam
+
+        "name" and "VALUE" must not contain "=" or "," characters, as it is
+        important that these log lines are easy to parse.
+
+        The code reference will be called by spamd after the message has
+        been scanned, and the "PerMsgStatus::check()" method has returned.
+
+    $status->finish ()
+        Indicate that this $status object is finished with, and can be
+        destroyed.
+
+        If you are using SpamAssassin in a persistent environment, or
+        checking many mail messages from one "Mail::SpamAssassin" factory,
+        this method should be called to ensure Perl's garbage collection
+        will clean up old status objects.
+
+    $name = $status->get_current_eval_rule_name()
+        Return the name of the currently-running eval rule. "undef" is
+        returned if no eval rule is currently being run. Useful for plugins
+        to determine the current rule name while inside an eval test
+        function call.
+
+    $status->get_decoded_body_text_array ()
+        Returns the message body, with base64 or quoted-printable encodings
+        decoded, and non-text parts or non-inline attachments stripped.
+
+        It is returned as an array of strings, with each string representing
+        one newline-separated line of the body.
+
+    $status->get_decoded_stripped_body_text_array ()
+        Returns the message body, decoded (as described in
+        get_decoded_body_text_array()), with HTML rendered, and with
+        whitespace normalized.
+
+        It will always render text/html, and will use a heuristic to
+        determine if other text/* parts should be considered text/html.
+
+        It is returned as an array of strings, with each string representing
+        one 'paragraph'. Paragraphs, in plain-text mails, are
+        double-newline-separated blocks of multi-line text.
+
+    $status->get (header_name [, default_value])
+        Returns a message header, pseudo-header, real name or address.
+        "header_name" is the name of a mail header, such as 'Subject', 'To',
+        etc. If "default_value" is given, it will be used if the requested
+        "header_name" does not exist.
+
+        Appending ":raw" to the header name will inhibit decoding of
+        quoted-printable or base-64 encoded strings.
+
+        Appending a modifier ":addr" to a header field name will cause
+        everything except the first email address to be removed from the
+        header field. It is mainly applicable to header fields 'From',
+        'Sender', 'To', 'Cc' along with their 'Resent-*' counterparts, and
+        the 'Return-Path'. For example, all of the following will result in
+        "example@foo":
+
+        example@foo
+        example@foo (Foo Blah)
+        example@foo, example@bar
+        display: example@foo (Foo Blah), example@bar ;
+        Foo Blah <ex...@foo>
+        "Foo Blah" <ex...@foo>
+        "'Foo Blah'" <ex...@foo>
+
+        Appending a modifier ":name" to a header field name will cause
+        everything except the first display name to be removed from the
+        header field. It is mainly applicable to header fields containing a
+        single mail address: 'From', 'Sender', along with their
+        'Resent-From' and 'Resent-Sender' counterparts. For example, all of
+        the following will result in "Foo Blah". One level of single quotes
+        is stripped too, as it is often seen.
+
+        example@foo (Foo Blah)
+        example@foo (Foo Blah), example@bar
+        display: example@foo (Foo Blah), example@bar ;
+        Foo Blah <ex...@foo>
+        "Foo Blah" <ex...@foo>
+        "'Foo Blah'" <ex...@foo>
+
+        There are several special pseudo-headers that can be specified:
+
+        "ALL" can be used to mean the text of all the message's headers.
+        "ALL-TRUSTED" can be used to mean the text of all the message's
+        headers that could only have been added by trusted relays.
+        "ALL-INTERNAL" can be used to mean the text of all the message's
+        headers that could only have been added by internal relays.
+        "ALL-UNTRUSTED" can be used to mean the text of all the message's
+        headers that may have been added by untrusted relays. To make this
+        pseudo-header more useful for header rules the 'Received' header
+        that was added by the last trusted relay is included, even though it
+        can be trusted.
+        "ALL-EXTERNAL" can be used to mean the text of all the message's
+        headers that may have been added by external relays. Like
+        "ALL-UNTRUSTED" the 'Received' header added by the last internal
+        relay is included.
+        "ToCc" can be used to mean the contents of both the 'To' and 'Cc'
+        headers.
+        "EnvelopeFrom" is the address used in the 'MAIL FROM:' phase of the
+        SMTP transaction that delivered this message, if this data has been
+        made available by the SMTP server.
+        "MESSAGEID" is a symbol meaning all Message-Id's found in the
+        message; some mailing list software moves the real 'Message-Id' to
+        'Resent-Message-Id' or 'X-Message-Id', then uses its own one in the
+        'Message-Id' header. The value returned for this symbol is the text
+        from all 3 headers, separated by newlines.
+        "X-Spam-Relays-Untrusted" is the generated metadata of untrusted
+        relays the message has passed through
+        "X-Spam-Relays-Trusted" is the generated metadata of trusted relays
+        the message has passed through
+
+    $status->get_uri_list ()
+        Returns an array of all unique URIs found in the message. It takes a
+        combination of the URIs found in the rendered (decoded and HTML
+        stripped) body and the URIs found when parsing the HTML in the
+        message. Will also set $status->{uri_list} (the array as returned by
+        this function).
+
+        The returned array will include the "raw" URI as well as "slightly
+        cooked" versions. For example, the single URI
+        'http://%77&#00119;%77.example.com/' will get turned into: (
+        'http://%77&#00119;%77.example.com/', 'http://www.example.com/' )
+
+    $status->get_uri_detail_list ()
+        Returns a hash reference of all unique URIs found in the message and
+        various data about where the URIs were found in the message. It
+        takes a combination of the URIs found in the rendered (decoded and
+        HTML stripped) body and the URIs found when parsing the HTML in the
+        message. Will also set $status->{uri_detail_list} (the hash
+        reference as returned by this function). This function will also set
+        $status->{uri_domain_count} (count of unique domains).
+
+        The hash format looks something like this:
+
+          raw_uri => {
+            types => { a => 1, img => 1, parsed => 1 },
+            cleaned => [ canonified_uri ],
+            anchor_text => [ "click here", "no click here" ],
+            domains => { domain1 => 1, domain2 => 1 },
+          }
+
+        "raw_uri" is whatever the URI was in the message itself
+        (http://spamassassin.apache%2Eorg/).
+
+        "types" is a hash of the HTML tags (lowercase) which referenced the
+        raw_uri. *parsed* is a faked type which specifies that the raw_uri
+        was seen in the rendered text.
+
+        "cleaned" is an array of the raw and canonified version of the
+        raw_uri (http://spamassassin.apache%2Eorg/,
+        http://spamassassin.apache.org/).
+
+        "anchor_text" is an array of the anchor text (text between <a> and
+        </a>), if any, which linked to the URI.
+
+        "domains" is a hash of the domains found in the canonified URIs.
+
+        "hosts" is a hash of unstripped hostnames found in the canonified
+        URIs as hash keys, with their domain part stored as a value of each
+        hash entry.
+
+    $status->clear_test_state()
+        Clear test state, including test log messages from
+        "$status->test_log()".
+
+    $status->got_hit ($rulename, $desc_prepend [, name => value, ...])
+        Register a hit against a rule in the ruleset.
+
+        There are two mandatory arguments. These are $rulename, the name of
+        the rule that fired, and $desc_prepend, which is a short string that
+        will be prepended to the rules "describe" string in output reports.
+
+        In addition, callers can supplement that with the following optional
+        data:
+
+        score => $num
+            Optional: the score to use for the rule hit. If unspecified, the
+            value from the "Mail::SpamAssassin::Conf" object's "{scores}"
+            hash will be used (a configured score), and in its absence the
+            "defscore" option value.
+
+        defscore => $num
+            Optional: the score to use for the rule hit if neither the
+            option "score" is provided, nor a configured score value is
+            provided.
+
+        value => $num
+            Optional: the value to assign to the rule; the default value is
+            1. *tflags multiple* rules use values of greater than 1 to
+            indicate multiple hits. This value is accessible to meta rules.
+
+        ruletype => $type
+            Optional, but recommended: the rule type string. This is used in
+            the "hit_rule" plugin call, called by this method. If unset,
+            *'unknown'* is used.
+
+        tflags => $string
+            Optional: a string, i.e. a space-separated list of additional
+            tflags to be appended to an existing list of flags in
+            $self->{conf}->{tflags}, such as: "nice noautolearn multiple".
+            No syntax checks are performed.
+
+        description => $string
+            Optional: a custom rule description string. This is used in the
+            "hit_rule" plugin call, called by this method. If unset, the
+            static description is used.
+
+        Backward compatibility: the two mandatory arguments have been part
+        of this API since SpamAssassin 2.x. The optional *name=<gt*value>
+        pairs, however, are a new addition in SpamAssassin 3.2.0.
+
+    $status->create_fulltext_tmpfile (fulltext_ref)
+        This function creates a temporary file containing the passed scalar
+        reference data (typically the full/pristine text of the message).
+        This is typically used by external programs like pyzor and dccproc,
+        to avoid hangs due to buffering issues. Methods that need this,
+        should call $self->create_fulltext_tmpfile($fulltext) to retrieve
+        the temporary filename; it will be created if it has not already
+        been.
+
+        Note: This can only be called once until
+        $status->delete_fulltext_tmpfile() is called.
+
+    $status->delete_fulltext_tmpfile ()
+        Will cleanup after a $status->create_fulltext_tmpfile() call.
+        Deletes the temporary file and uncaches the filename.
+
+    all_from_addrs_domains
+        This function returns all the various from addresses in a message
+        using all_from_addrs() and then returns only the domain names.
+
+SEE ALSO
+    "Mail::SpamAssassin" "spamassassin"
+

Added: spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.html
URL: http://svn.apache.org/viewvc/spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.html?rev=1567225&view=auto
==============================================================================
--- spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.html (added)
+++ spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.html Tue Feb 11 17:26:49 2014
@@ -0,0 +1,112 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>Mail::SpamAssassin::PersistentAddrList - persistent address list base class</title>
+<link rev="made" href="mailto:root@twm2005-dev.thoughtworthy.com" />
+</head>
+
+<body style="background-color: white">
+
+<p><a name="__index__"></a></p>
+<!-- INDEX BEGIN -->
+
+<ul>
+
+	<li><a href="#name">NAME</a></li>
+	<li><a href="#synopsis">SYNOPSIS</a></li>
+	<li><a href="#description">DESCRIPTION</a></li>
+	<li><a href="#methods">METHODS</a></li>
+</ul>
+<!-- INDEX END -->
+
+<hr />
+<p>
+</p>
+<h1><a name="name">NAME</a></h1>
+<p>Mail::SpamAssassin::PersistentAddrList - persistent address list base class</p>
+<p>
+</p>
+<hr />
+<h1><a name="synopsis">SYNOPSIS</a></h1>
+<pre>
+  my $factory = PersistentAddrListSubclass-&gt;new();
+  $spamtest-&gt;set_persistent_addr_list_factory ($factory);
+  ... call into SpamAssassin classes...</pre>
+<p>SpamAssassin will call:</p>
+<pre>
+  my $addrlist = $factory-&gt;new_checker($spamtest);
+  $entry = $addrlist-&gt;get_addr_entry ($addr);
+  ...</pre>
+<p>
+</p>
+<hr />
+<h1><a name="description">DESCRIPTION</a></h1>
+<p>All persistent address list implementations, used by the auto-whitelist
+code to track known-good email addresses, use this as a base class.</p>
+<p>See <code>Mail::SpamAssassin::DBBasedAddrList</code> for an example.</p>
+<p>
+</p>
+<hr />
+<h1><a name="methods">METHODS</a></h1>
+<dl>
+<dt><strong><a name="item_new">$factory = PersistentAddrListSubclass-&gt;new();</a></strong><br />
+</dt>
+<dd>
+This creates a factory object, which SpamAssassin will call to create
+a new checker object for the persistent address list.
+</dd>
+<p></p>
+<dt><strong><a name="item_new_checker">my $addrlist = $factory-&gt;new_checker();</a></strong><br />
+</dt>
+<dd>
+Create a new address-list checker object from the factory. Called by the
+SpamAssassin classes.
+</dd>
+<p></p>
+<dt><strong><a name="item_get_addr_entry">$entry = $addrlist-&gt;get_addr_entry ($addr);</a></strong><br />
+</dt>
+<dd>
+Given an email address <code>$addr</code>, return an entry object with the details of
+that address.
+</dd>
+<dd>
+<p>The entry object is a reference to a hash, which must contain at least
+two keys: <code>count</code>, which is the count of times that address has been
+encountered before; and <code>totscore</code>, which is the total of all scores for
+messages associated with that address.  From these two fields, an average
+score will be calculated, and the score for the current message will be
+regressed towards that mean message score.</p>
+</dd>
+<dd>
+<p>The hash can contain whatever other data your back-end needs to store,
+under other keys.</p>
+</dd>
+<dd>
+<p>The method should never return <code>undef</code>, or a hash that does not contain
+a <code>count</code> key and a <code>totscore</code> key.</p>
+</dd>
+<p></p>
+<dt><strong><a name="item_add_score">$entry = $addrlist-&gt;add_score($entry, $score);</a></strong><br />
+</dt>
+<dd>
+This method should add the given score to the whitelist database for the
+given entry, and then return the new entry.
+</dd>
+<p></p>
+<dt><strong><a name="item_remove_entry">$entry = $addrlist-&gt;remove_entry ($entry);</a></strong><br />
+</dt>
+<dd>
+This method should remove the given entry from the whitelist database.
+</dd>
+<p></p>
+<dt><strong><a name="item_finish">$entry = $addrlist-&gt;finish ();</a></strong><br />
+</dt>
+<dd>
+Clean up, if necessary.  Called by SpamAssassin when it has finished
+checking, or adding to, the auto-whitelist database.
+</dd>
+<p></p></dl>
+
+</body>
+
+</html>

Added: spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.txt
URL: http://svn.apache.org/viewvc/spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.txt?rev=1567225&view=auto
==============================================================================
--- spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.txt (added)
+++ spamassassin/site/full/3.4.x/doc/Mail_SpamAssassin_PersistentAddrList.txt Tue Feb 11 17:26:49 2014
@@ -0,0 +1,59 @@
+NAME
+    Mail::SpamAssassin::PersistentAddrList - persistent address list base
+    class
+
+SYNOPSIS
+      my $factory = PersistentAddrListSubclass->new();
+      $spamtest->set_persistent_addr_list_factory ($factory);
+      ... call into SpamAssassin classes...
+
+    SpamAssassin will call:
+
+      my $addrlist = $factory->new_checker($spamtest);
+      $entry = $addrlist->get_addr_entry ($addr);
+      ...
+
+DESCRIPTION
+    All persistent address list implementations, used by the auto-whitelist
+    code to track known-good email addresses, use this as a base class.
+
+    See "Mail::SpamAssassin::DBBasedAddrList" for an example.
+
+METHODS
+    $factory = PersistentAddrListSubclass->new();
+        This creates a factory object, which SpamAssassin will call to
+        create a new checker object for the persistent address list.
+
+    my $addrlist = $factory->new_checker();
+        Create a new address-list checker object from the factory. Called by
+        the SpamAssassin classes.
+
+    $entry = $addrlist->get_addr_entry ($addr);
+        Given an email address $addr, return an entry object with the
+        details of that address.
+
+        The entry object is a reference to a hash, which must contain at
+        least two keys: "count", which is the count of times that address
+        has been encountered before; and "totscore", which is the total of
+        all scores for messages associated with that address. From these two
+        fields, an average score will be calculated, and the score for the
+        current message will be regressed towards that mean message score.
+
+        The hash can contain whatever other data your back-end needs to
+        store, under other keys.
+
+        The method should never return "undef", or a hash that does not
+        contain a "count" key and a "totscore" key.
+
+    $entry = $addrlist->add_score($entry, $score);
+        This method should add the given score to the whitelist database for
+        the given entry, and then return the new entry.
+
+    $entry = $addrlist->remove_entry ($entry);
+        This method should remove the given entry from the whitelist
+        database.
+
+    $entry = $addrlist->finish ();
+        Clean up, if necessary. Called by SpamAssassin when it has finished
+        checking, or adding to, the auto-whitelist database.
+