You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by jm...@apache.org on 2007/01/17 15:52:18 UTC

svn commit: r497030 - in /spamassassin/trunk: ./ MANIFEST spamc/libspamc.c spamc/libspamc.h spamc/spamc.c spamc/spamc.pod spamd/PROTOCOL spamd/spamd.raw t/spamc_headers.t

Author: jm
Date: Wed Jan 17 06:52:17 2007
New Revision: 497030

URL: http://svn.apache.org/viewvc?view=rev&rev=497030
Log:
bug 5296: add spamc --headers switch, which scans messages and transmits back just rewritten headers.  This is more bandwidth-efficient than the normal mode of scanning, but only works for 'report_safe 0'.  Bump spamc/spamd's protocol version to 1.4, to reflect new HEADERS verb.   update spamd/PROTOCOL for current protocol.  add 'sa-compile' to the SVN ignored-files list.

Added:
    spamassassin/trunk/t/spamc_headers.t   (with props)
Modified:
    spamassassin/trunk/   (props changed)
    spamassassin/trunk/MANIFEST
    spamassassin/trunk/spamc/libspamc.c
    spamassassin/trunk/spamc/libspamc.h
    spamassassin/trunk/spamc/spamc.c
    spamassassin/trunk/spamc/spamc.pod
    spamassassin/trunk/spamd/PROTOCOL
    spamassassin/trunk/spamd/spamd.raw

Propchange: spamassassin/trunk/
------------------------------------------------------------------------------
--- svn:ignore (original)
+++ svn:ignore Wed Jan 17 06:52:17 2007
@@ -17,6 +17,7 @@
 sa-filter
 sa-learn
 sa-update
+sa-compile
 site_perl
 spamassassin
 testmails

Modified: spamassassin/trunk/MANIFEST
URL: http://svn.apache.org/viewvc/spamassassin/trunk/MANIFEST?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/MANIFEST (original)
+++ spamassassin/trunk/MANIFEST Wed Jan 17 06:52:17 2007
@@ -478,3 +478,4 @@
 t/basic_obj_api.t
 t/tainted_msg.t
 t/data/taintcheckplugin.pm
+t/spamc_headers.t

Modified: spamassassin/trunk/spamc/libspamc.c
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamc/libspamc.c?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/spamc/libspamc.c (original)
+++ spamassassin/trunk/spamc/libspamc.c Wed Jan 17 06:52:17 2007
@@ -139,7 +139,7 @@
  */
 
 /* Set the protocol version that this spamc speaks */
-static const char *PROTOCOL_VERSION = "SPAMC/1.3";
+static const char *PROTOCOL_VERSION = "SPAMC/1.4";
 
 /* "private" part of struct message.
  * we use this instead of the struct message directly, so that we
@@ -148,6 +148,7 @@
 struct libspamc_private_message
 {
     int flags;			/* copied from "flags" arg to message_read() */
+    int alloced_size;           /* allocated space for the "out" buffer */
 };
 
 int libspamc_timeout = 0;
@@ -730,6 +731,7 @@
 	return EX_OSERR;
     }
     m->priv->flags = flags;
+    m->priv->alloced_size = 0;
 
     if (flags & SPAMC_PING) {
       _clear_message(m);
@@ -1065,6 +1067,55 @@
 #endif
 }
 
+int
+_append_original_body (struct message *m, int flags)
+{
+    char *cp, *cpend, *bodystart;
+    int bodylen, outspaceleft, towrite;
+
+    /* at this stage, m->out now contains the rewritten headers.
+     * find and append the raw message's body, up to m->priv->alloced_size
+     * bytes.
+     */
+
+#define CRNLCRNL        "\r\n\r\n"
+#define CRNLCRNL_LEN    4
+#define NLNL            "\n\n"
+#define NLNL_LEN        2
+
+    cpend = m->raw + m->raw_len;
+    bodystart = NULL;
+
+    for (cp = m->raw; cp < cpend; cp++) {
+        if (*cp == '\r' && cpend - cp >= CRNLCRNL_LEN && 
+                            !strncmp(cp, CRNLCRNL, CRNLCRNL_LEN))
+        {
+            bodystart = cp + CRNLCRNL_LEN;
+            break;
+        }
+        else if (*cp == '\n' && cpend - cp >= NLNL_LEN && 
+                           !strncmp(cp, NLNL, NLNL_LEN))
+        {
+            bodystart = cp + NLNL_LEN;
+            break;
+        }
+    }
+
+    if (bodystart == NULL) {
+        libspamc_log(flags, LOG_ERR, "failed to find end-of-headers");
+        return EX_SOFTWARE;
+    }
+
+    bodylen = cpend - bodystart;
+    outspaceleft = (m->priv->alloced_size-1) - m->out_len;
+    towrite = (bodylen < outspaceleft ? bodylen : outspaceleft);
+
+    /* copy in the body; careful not to overflow */
+    strncpy (m->out + m->out_len, bodystart, towrite);
+    m->out_len += towrite;
+    return EX_OK;
+}
+
 int message_filter(struct transport *tp, const char *username,
                    int flags, struct message *m)
 {
@@ -1118,7 +1169,8 @@
     }
 
     m->is_spam = EX_TOOBIG;
-    if ((m->outbuf = malloc(m->max_len + EXPANSION_ALLOWANCE + 1)) == NULL) {
+    m->priv->alloced_size = m->max_len + EXPANSION_ALLOWANCE + 1;
+    if ((m->outbuf = malloc(m->priv->alloced_size)) == NULL) {
 	failureval = EX_OSERR;
 	goto failure;
     }
@@ -1136,6 +1188,8 @@
 	strcpy(buf, "SYMBOLS ");
     else if (flags & SPAMC_PING)
 	strcpy(buf, "PING ");
+    else if (flags & SPAMC_HEADERS)
+	strcpy(buf, "HEADERS ");
     else
 	strcpy(buf, "PROCESS ");
 
@@ -1293,20 +1347,17 @@
 
 	if (flags & SPAMC_USE_SSL) {
 	    len = full_read_ssl(ssl, (unsigned char *) m->out + m->out_len,
-				m->max_len + EXPANSION_ALLOWANCE + 1 -
-				m->out_len,
-				m->max_len + EXPANSION_ALLOWANCE + 1 -
-				m->out_len);
+				m->priv->alloced_size - m->out_len,
+				m->priv->alloced_size - m->out_len);
 	}
 	else {
 	    len = full_read(sock, 0, m->out + m->out_len,
-			    m->max_len + EXPANSION_ALLOWANCE + 1 - m->out_len,
-			    m->max_len + EXPANSION_ALLOWANCE + 1 -
-			    m->out_len);
+			    m->priv->alloced_size - m->out_len,
+			    m->priv->alloced_size - m->out_len);
 	}
 
 
-	if (len + m->out_len > m->max_len + EXPANSION_ALLOWANCE) {
+	if (len + m->out_len > (m->priv->alloced_size-1)) {
 	    failureval = EX_TOOBIG;
 	    goto failure;
 	}
@@ -1326,6 +1377,12 @@
 	goto failure;
     }
 
+    if (flags & SPAMC_HEADERS) {
+        if (_append_original_body(m, flags) != EX_OK) {
+            goto failure;
+        }
+    }
+
     return EX_OK;
 
   failure:
@@ -1344,7 +1401,6 @@
     return failureval;
 }
 
-
 int message_process(struct transport *trans, char *username, int max_size,
 		    int in_fd, int out_fd, const int flags)
 {
@@ -1426,7 +1482,8 @@
     }
 
     m->is_spam = EX_TOOBIG;
-    if ((m->outbuf = malloc(m->max_len + EXPANSION_ALLOWANCE + 1)) == NULL) {
+    m->priv->alloced_size = m->max_len + EXPANSION_ALLOWANCE + 1;
+    if ((m->outbuf = malloc(m->priv->alloced_size)) == NULL) {
 	failureval = EX_OSERR;
 	goto failure;
     }

Modified: spamassassin/trunk/spamc/libspamc.h
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamc/libspamc.h?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/spamc/libspamc.h (original)
+++ spamassassin/trunk/spamc/libspamc.h Wed Jan 17 06:52:17 2007
@@ -125,6 +125,9 @@
 /* Nov 30, 2006 jm: add -z, zlib support */
 #define SPAMC_USE_ZLIB        (1<<16)
 
+/* Jan 16, 2007 jm: get markup headers from spamd */
+#define SPAMC_HEADERS         (1<<15)
+
 #define SPAMC_MESSAGE_CLASS_SPAM 1
 #define SPAMC_MESSAGE_CLASS_HAM  2
 

Modified: spamassassin/trunk/spamc/spamc.c
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamc/spamc.c?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/spamc/spamc.c (original)
+++ spamassassin/trunk/spamc/spamc.c Wed Jan 17 06:52:17 2007
@@ -182,6 +182,7 @@
     usg("  -r, --full-spam     Print full report for messages identified as\n"
         "                      spam.\n");
     usg("  -R, --full          Print full report for all messages.\n");
+    usg("  --headers           Rewrite only the message headers.\n");
     usg("  -E, --exitcode      Filter as normal, and set an exit code.\n");
 
     usg("  -x, --no-safe-fallback\n"
@@ -214,9 +215,9 @@
           struct transport *ptrn)
 {
 #ifndef _WIN32
-    const char *opts = "-BcrRd:e:fyp:t:s:u:L:C:xzSHU:ElhVKF:0:1:";
+    const char *opts = "-BcrRd:e:fyp:t:s:u:L:C:xzSHU:ElhVKF:0:1:2";
 #else
-    const char *opts = "-BcrRd:fyp:t:s:u:L:C:xzSHElhVKF:0:1:";
+    const char *opts = "-BcrRd:fyp:t:s:u:L:C:xzSHElhVKF:0:1:2";
 #endif
     int opt;
     int ret = EX_OK;
@@ -241,6 +242,7 @@
        { "tests", no_argument, 0, 'y' },
        { "full-spam", no_argument, 0, 'r' },
        { "full", no_argument, 0, 'R' },
+       { "headers", no_argument, 0, 2 },
        { "exitcode", no_argument, 0, 'E' },
        { "no-safe-fallback", no_argument, 0, 'x' },
        { "log-to-stderr", no_argument, 0, 'l' },
@@ -457,6 +459,11 @@
                 ptrn->retry_sleep = atoi(spamc_optarg);
                 break;
             }
+            case 2:
+            {
+                flags |= SPAMC_HEADERS;
+                break;
+            }
         }
     }
 
@@ -473,7 +480,7 @@
 	    ret = EX_USAGE;
 	}
         if (flags & SPAMC_PING) {
-	    libspamc_log(flags, LOG_ERR, "Learning excludes ping");
+            libspamc_log(flags, LOG_ERR, "Learning excludes ping");
 	    ret = EX_USAGE;
 	}
 	if (flags & SPAMC_REPORT_IFSPAM) {

Modified: spamassassin/trunk/spamc/spamc.pod
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamc/spamc.pod?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/spamc/spamc.pod (original)
+++ spamassassin/trunk/spamc/spamc.pod Wed Jan 17 06:52:17 2007
@@ -229,6 +229,17 @@
 C<Compress::Zlib> perl module on the server side; an error will be returned
 otherwise.
 
+=item B<--headers>
+
+Perform a scan, but instead of allowing any part of the message (header and
+body) to be rewritten, limit rewriting to only the message headers. This is
+much more efficient in bandwidth usage, since the response message transmitted
+back from the spamd server will not include the body.
+
+Note that this only makes sense if you are using C<report_safe 0> in the
+scanning configuration on the remote end; with C<report_safe 1>, it is
+likely to result in corrupt messages.
+
 =back
 
 =head1 CONFIGURATION FILE

Modified: spamassassin/trunk/spamd/PROTOCOL
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamd/PROTOCOL?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/spamd/PROTOCOL (original)
+++ spamassassin/trunk/spamd/PROTOCOL Wed Jan 17 06:52:17 2007
@@ -25,7 +25,7 @@
 After each side is done writing, it shuts down its side of the connection.
 
 The first line from spamc is the command for spamd to execute (PROCESS a
-message is the command in protocol<=1.2) followed by the protocol version.
+message is the command in protocol<=1.3) followed by the protocol version.
 
 There may be additional headers following the command, which are as yet
 undefined.  Servers should ignore these, and keep looking for headers which
@@ -45,7 +45,7 @@
 Commands
 --------
 
-The following commands are defined as of protocol 1.2:
+The following commands are defined as of protocol 1.4:
 
 CHECK         --  Just check if the passed message is spam or not and reply as
                   described below
@@ -70,6 +70,9 @@
                   with that message.  This includes setting or removing a local
                   or a remote database (learning, reporting, forgetting, revoking).
 
+HEADERS       --  Same as PROCESS, but return only modified headers, not body
+                  (new in protocol 1.4)
+
 
 CHECK command returns just a header (terminated by "\r\n\r\n") with the first
 line as for PROCESS (ie a response code and message), and then a header called
@@ -94,7 +97,8 @@
 rule names, separated by commas.  Note that some versions of the protocol
 terminate this line with "\r\n", and some do not, due to an oversight; so
 clients should be flexible on whether or not a CR-LF pair follows
-the symbol text, and how many CR-LFs there are.
+the symbol text, and how many CR-LFs there are.  Protocol version 1.3
+onwards will always not terminate the line with "\r\n".
 
 
 REPORT command returns the same as CHECK, followed immediately by the report
@@ -122,7 +126,7 @@
 SKIP) no additional input is expected. It returns a simple confirmation
 response, like this:
 
-	SPAMD/1.2 0 PONG\r\n
+	SPAMD/1.4 0 PONG\r\n
 
 This facility may be useful for monitoring programs which wish to check that
 the daemon is alive and providing at least a basic response within a reasonable
@@ -153,15 +157,22 @@
 Set: local\r\n
 Remove: remote\r\n
 
+HEADERS returns the same as PROCESS, up to and including the double-newline
+separator between message headers and body -- but stops there.  It was
+added in SpamAssassin 3.2.0.  Note that this requires protocol version
+1.4.
+
+
+
 Headers
 -------
 
-The following optional headers are defined as of protocol 1.2:
+The following optional headers are defined as of protocol 1.4:
 
 Content-length
 
     Length of a request or response body, in bytes (generally a requirement
-    as of protocol 1.2)
+    as of protocol version 1.2 onwards)
 
 Spam
 
@@ -177,9 +188,10 @@
 
     An optional header, sent by the client to the server, whose value may
     consist of the string "zlib", indicating that the message body transmitted
-    by the client is compressed using Zlib compression.  This is new in
-    SpamAssassin 3.2.0.
+    by the client is compressed using Zlib compression.  (This is new in
+    SpamAssassin 3.2.0.)
 
 As-yet-undefined headers should not be treated as errors, and instead
-should be ignored.
+should be ignored.  Multiple headers can appear in requests and responses
+(this was not clearly defined until protocol version 1.3).
 

Modified: spamassassin/trunk/spamd/spamd.raw
URL: http://svn.apache.org/viewvc/spamassassin/trunk/spamd/spamd.raw?view=diff&rev=497030&r1=497029&r2=497030
==============================================================================
--- spamassassin/trunk/spamd/spamd.raw (original)
+++ spamassassin/trunk/spamd/spamd.raw Wed Jan 17 06:52:17 2007
@@ -1252,14 +1252,15 @@
   # If we get the PROCESS command, the client is going to send a
   # message that we need to filter.
 
-  elsif (/(PROCESS|CHECK|SYMBOLS|REPORT|REPORT_IFSPAM) SPAMC\/(.*)/) {
+  elsif (/(PROCESS|CHECK|SYMBOLS|REPORT|HEADERS|REPORT_IFSPAM) SPAMC\/(.*)/) {
     my $method = $1;
+    my $version = $2;
     eval {
       Mail::SpamAssassin::Util::trap_sigalrm_fully(sub {
                           die "child processing timeout";
                         });
       alarm $timeout_child if ($timeout_child);
-      check($method, $2, $start, $remote_hostname, $remote_hostaddr);
+      check($method, $version, $start, $remote_hostname, $remote_hostaddr);
     };
     alarm 0;
 
@@ -1300,7 +1301,7 @@
   # Looks like a client is just seeing if we're alive.
 
   elsif (/PING SPAMC\/(.*)/) {
-    syswrite( $client, "SPAMD/1.2 $resphash{EX_OK} PONG\r\n" );
+    syswrite( $client, "SPAMD/1.4 $resphash{EX_OK} PONG\r\n" );
   }
 
   # If it was none of the above, then we don't know what it was.
@@ -1517,14 +1518,21 @@
 
   my $spamhdr = "Spam: $response_spam_status ; $msg_score / $msg_threshold";
 
-  if ( $method eq 'PROCESS' ) {
+  if ( $method eq 'PROCESS' || $method eq 'HEADERS' ) {
 
     $status->set_tag('REMOTEHOSTNAME', $remote_hostname);
     $status->set_tag('REMOTEHOSTADDR', $remote_hostaddr);
 
     # Build the message to send back and measure it
     my $msg_resp        = $status->rewrite_mail();
+
+    if ($method eq 'HEADERS') {
+      # just the headers; delete everything after first \r\n\r\n
+      $msg_resp =~ s/(\r?\n\r?\n).*$/$1/gs;
+    }
+
     my $msg_resp_length = length($msg_resp);
+
     if ( $version >= 1.3 )    # Spamc protocol 1.3 means multi hdrs are OK
     {
       syswrite_full_buffer( $client, "SPAMD/1.1 $resphash{$resp} $resp\r\n" .

Added: spamassassin/trunk/t/spamc_headers.t
URL: http://svn.apache.org/viewvc/spamassassin/trunk/t/spamc_headers.t?view=auto&rev=497030
==============================================================================
--- spamassassin/trunk/t/spamc_headers.t (added)
+++ spamassassin/trunk/t/spamc_headers.t Wed Jan 17 06:52:17 2007
@@ -0,0 +1,28 @@
+#!/usr/bin/perl
+
+use lib '.'; use lib 't';
+use SATest; sa_t_init("spamc_A");
+
+use Test; plan tests => ($NO_SPAMC_EXE ? 0 : 5);
+exit if $NO_SPAMC_EXE;
+
+# ---------------------------------------------------------------------------
+
+%patterns = (
+
+  q{ Message-Id: <78...@yahoo.com> } => 'msgid',
+  q{ X-Spam-Status: Yes, } => 'xss',
+  q{ TEST_NOREALNAME}, 'noreal',
+  q{ subscription cancelable at anytime } => 'body',
+
+);
+
+%anti_patterns = (
+
+);
+
+start_spamd("-L --cf='report_safe 0'");
+ok (spamcrun ("-A < data/spam/009", \&patterns_run_cb));
+ok_all_patterns();
+stop_spamd();
+

Propchange: spamassassin/trunk/t/spamc_headers.t
------------------------------------------------------------------------------
    svn:executable = *



Re: svn commit: r497030 - in /spamassassin/trunk: ./ MANIFEST spamc/libspamc.c spamc/libspamc.h spamc/spamc.c spamc/spamc.pod spamd/PROTOCOL spamd/spamd.raw t/spamc_headers.t

Posted by Michael Parker <pa...@pobox.com>.
jm@apache.org wrote:

> +ok (spamcrun ("-A < data/spam/009", \&patterns_run_cb));

Need to update the test case to remove -A and go with --headers

Michael