You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by sf...@apache.org on 2012/04/13 10:09:47 UTC

svn commit: r1325647 - in /httpd/mod_mbox/branches/convert-charsets/module-2.0: mod_mbox.h mod_mbox_cte.c

Author: sf
Date: Fri Apr 13 08:09:47 2012
New Revision: 1325647

URL: http://svn.apache.org/viewvc?rev=1325647&view=rev
Log:
discard spaces between adjacent encoded words, as required by RFC 2047

- avoid a lot of re-allocating and copying in mbox_cte_decode_header() and
mbox_cte_decode_rfc2047()
- make mbox_cte_decode_rfc2047() static

Modified:
    httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h
    httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_cte.c

Modified: httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h?rev=1325647&r1=1325646&r2=1325647&view=diff
==============================================================================
--- httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h (original)
+++ httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h Fri Apr 13 08:09:47 2012
@@ -27,6 +27,7 @@
 #include "util_varbuf.h"
 
 #include "apr_date.h"
+#include "apr_lib.h"
 #include "apr_strings.h"
 #include "apr_dbm.h"
 #include "apr_hash.h"
@@ -121,7 +122,6 @@ apr_size_t mbox_cte_decode_qp(char *p);
 apr_size_t mbox_cte_decode_b64(char *src);
 apr_size_t mbox_cte_escape_html(apr_pool_t *p, const char *s,
                                 apr_size_t len, char **body);
-char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src, struct ap_varbuf *vb);
 char *mbox_cte_decode_header(apr_pool_t *p, char *src);
 apr_status_t mbox_cte_convert_to_utf8(apr_pool_t *p, const char *charset,
                                       const char *src, apr_size_t len,

Modified: httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_cte.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_cte.c?rev=1325647&r1=1325646&r2=1325647&view=diff
==============================================================================
--- httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_cte.c (original)
+++ httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_cte.c Fri Apr 13 08:09:47 2012
@@ -294,34 +294,28 @@ out:
  * These strings complies to the following syntax :
  * =?charset?mode?data?= rest
  */
-char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src, struct ap_varbuf *vb)
+static char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src, struct ap_varbuf *vb)
 {
     char *charset, *mode, *data, *rest;
     int i;
     apr_status_t rv;
+    apr_size_t data_len;
 
-    /* Look for charset */
-    charset = strstr(src, "=?");
-    if (!charset) {
+    if (strncmp(src, "=?", 2) != 0)
         return src;
-    }
-    *charset = 0;
-    charset += strlen("=?");
 
     /* Encoding mode (first '?' after charset) */
-    mode = strstr(charset, "?");
-    if (!mode) {
+    mode = strstr(src + 2, "?");
+    if (!mode || mode == src) {
         return src;
     }
-    *mode = 0;
     mode++;
 
     /* Fetch data */
     data = strstr(mode, "?");
-    if (!data) {
+    if (!data || data != mode + 1) {
         return src;
     }
-    *data = 0;
     data++;
 
     /* Look for the end bound */
@@ -329,11 +323,10 @@ char *mbox_cte_decode_rfc2047(apr_pool_t
     if (!rest) {
         return src;
     }
-    *rest = 0;
+    *rest = '\0';
 
     /* Quoted-Printable decoding : mode 'q' */
     if ((*mode == 'q') || (*mode == 'Q')) {
-        apr_size_t data_len;
         int i;
 
         /* In QP header encoding, spaces are encoded either in =20 (as
@@ -347,65 +340,80 @@ char *mbox_cte_decode_rfc2047(apr_pool_t
         }
 
         data_len = mbox_cte_decode_qp(data);
-        data[data_len] = 0;
     }
     else if ((*mode == 'b') || (*mode == 'B')) {
-        apr_size_t data_len;
-
         data_len = mbox_cte_decode_b64(data);
-        data[data_len] = 0;
+    }
+    else {
+        /* XXX we may have modified data above */
+        *rest = '?';
+        return src;
     }
 
     /* Convert charset to uppercase */
+    charset = apr_pstrmemdup(p, src, mode - src - 1);
     for (i = 0; i < strlen(charset); i++) {
         charset[i] = toupper(charset[i]);
     }
 
     /* Charset conversion */
-    vb->strlen = 0;
-    rv = mbox_cte_convert_to_utf8(p, charset, data, strlen(data), vb);
+    rv = mbox_cte_convert_to_utf8(p, charset, data, data_len, vb);
     if (rv != APR_SUCCESS) {
         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
                      "conversion from %s to utf-8 failed", charset);
-        return data;
+        *rest = '?';
+        return src;
     }
-    return vb->buf;
+    return rest + strlen("?=");;
 }
 
 /* MIME header decoding (see RFC 2047). */
 char *mbox_cte_decode_header(apr_pool_t *p, char *src)
 {
-    char *start, *end, *part;
-    char *result = "";
+    char *start, *end, *cont;
     struct ap_varbuf vb;
+    int seen_encoded_word = 0;
     ap_varbuf_init(p, &vb, 100);
     vb.strlen = 0;
 
     do {
-        char c;
-
         start = strstr(src, "=?");
-        if (!start) {
-            result = apr_psprintf(p, "%s%s", result, src);
-            return result;
-        }
+        if (!start)
+            return apr_pstrcat(p, vb.buf, src, NULL);
 
         end = strstr(start, "?=");
-        if (!end) {
-            result = apr_psprintf(p, "%s%s", result, src);
-            return result;
-        }
-
-        c = *start;
-        *start = 0;
-        result = apr_psprintf(p, "%s%s", result, src);
-        *start = c;
+        if (!end)
+            return apr_pstrcat(p, vb.buf, src, NULL);
 
-        part = mbox_cte_decode_rfc2047(p, start, &vb);
-        result = apr_psprintf(p, "%s%s", result, part);
+        if (start != src) {
+            if (seen_encoded_word) {
+                /* space between consecutive encoded words must be discarded */
+                char *p = src;
+                while (p < start && apr_isspace(*p))
+                    p++;
+                if (p == start)
+                    src = start;
+		/* XXX: this is wrong if the next encoded word fails to decode */
+            }
+            if (start != src) {
+                ap_varbuf_strmemcat(&vb, src, start - src);
+                seen_encoded_word = 0;
+            }
+        }
 
-        src = end + 2;
+        cont = mbox_cte_decode_rfc2047(p, start, &vb);
+        if (cont == start) {
+            /* decoding failed, copy original string */
+            end += 2;
+            ap_varbuf_strmemcat(&vb, start, end - start);
+            src = end;
+        }
+        else {
+            src = cont;
+            seen_encoded_word = 1;
+        }
     } while (src && *src);
 
-    return result;
+    /* vb.buf is pool memory */
+    return vb.buf;
 }