You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by sf...@apache.org on 2012/04/14 11:30:49 UTC

svn commit: r1326076 - in /httpd/mod_mbox/trunk: ./ module-2.0/mbox_parse.c module-2.0/mbox_parse.h module-2.0/mbox_thread.c module-2.0/mod_mbox.c module-2.0/mod_mbox.h module-2.0/mod_mbox_cte.c module-2.0/mod_mbox_mime.c module-2.0/mod_mbox_out.c

Author: sf
Date: Sat Apr 14 09:30:48 2012
New Revision: 1326076

URL: http://svn.apache.org/viewvc?rev=1326076&view=rev
Log:
Merge branch convert-charsets

Now, mails are converted to UTF-8.
Minimum httpd version is 2.3.15

Modified:
    httpd/mod_mbox/trunk/   (props changed)
    httpd/mod_mbox/trunk/module-2.0/mbox_parse.c
    httpd/mod_mbox/trunk/module-2.0/mbox_parse.h
    httpd/mod_mbox/trunk/module-2.0/mbox_thread.c
    httpd/mod_mbox/trunk/module-2.0/mod_mbox.c
    httpd/mod_mbox/trunk/module-2.0/mod_mbox.h
    httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c
    httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c
    httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c

Propchange: httpd/mod_mbox/trunk/
------------------------------------------------------------------------------
  Merged /httpd/mod_mbox/branches/convert-charsets:r1226496-1326075

Modified: httpd/mod_mbox/trunk/module-2.0/mbox_parse.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mbox_parse.c?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mbox_parse.c (original)
+++ httpd/mod_mbox/trunk/module-2.0/mbox_parse.c Sat Apr 14 09:30:48 2012
@@ -59,6 +59,7 @@ typedef struct mb_dbm_data
     const char *subject;
     const char *references;
     const char *content_type;
+    const char *charset;
     const char *boundary;
 } mb_dbm_data;
 
@@ -67,10 +68,8 @@ typedef struct mb_dbm_data
  */
 void mbox_fillbuf(MBOX_BUFF *fb)
 {
-    int len;
-
     if (fb->fd) {
-        len = strlen(fb->b);
+        int len = strlen(fb->b);
 
         /* We are backed by a file descriptor.
          * Read a new set of characters in.
@@ -507,6 +506,9 @@ static void normalize_message(request_re
     if (!m->content_type || !*m->content_type)
         m->content_type = "text/plain";
 
+    if (m->charset && !*m->charset)
+        m->charset = NULL;
+
     apr_time_exp_gmt(&time_exp, m->date);
 
     m->str_date = (char *) apr_pcalloc(r->pool, APR_RFC822_DATE_LEN);
@@ -571,6 +573,7 @@ static apr_status_t fetch_msgc(apr_pool_
     fetch_cstring(pool, msgc->subject, msgValue.dptr, pos, tlen);
     fetch_cstring(pool, msgc->references, msgValue.dptr, pos, tlen);
     fetch_cstring(pool, msgc->content_type, msgValue.dptr, pos, tlen);
+    fetch_cstring(pool, msgc->charset, msgValue.dptr, pos, tlen);
     fetch_cstring(pool, msgc->boundary, msgValue.dptr, pos, tlen);
 
     return APR_SUCCESS;
@@ -627,6 +630,7 @@ static apr_status_t store_msgc(apr_pool_
         sstrlen(msgc->subject) + sizeof(tlen) +
         sstrlen(msgc->references) + sizeof(tlen) +
         sstrlen(msgc->content_type) + sizeof(tlen) +
+        sstrlen(msgc->charset) + sizeof(tlen) +
         sstrlen(msgc->boundary) + sizeof(tlen);
 
     value = apr_palloc(pool, vlen);
@@ -648,6 +652,7 @@ static apr_status_t store_msgc(apr_pool_
     store_cstring(msgc->subject, value, pos, tlen);
     store_cstring(msgc->references, value, pos, tlen);
     store_cstring(msgc->content_type, value, pos, tlen);
+    store_cstring(msgc->charset, value, pos, tlen);
     store_cstring(msgc->boundary, value, pos, tlen);
 
     msgValue.dptr = (char *) value;
@@ -807,11 +812,12 @@ apr_status_t mbox_generate_index(request
 
                 temp = apr_table_get(table, "Content-Type");
                 if (temp) {
-                    char *p, *boundary, *dup;
+                    char *p, *boundary, *dup, *charset;
                     dup = apr_pstrdup(tpool, temp);
                     boundary = mbox_strcasestr(dup, "boundary=");
+                    charset = mbox_strcasestr(dup, "charset=");
                     if (boundary) {
-                        msgc.boundary += sizeof("boundary=") - 1;
+                        boundary += strlen("boundary=");
                         if (boundary[0] == '"') {
                             ++boundary;
                             if ((p = strstr(boundary, "\""))) {
@@ -824,7 +830,22 @@ apr_status_t mbox_generate_index(request
                             }
                         }
                     }
+                    if (charset) {
+                        charset += strlen("charset=");
+                        if (charset[0] == '"') {
+                            ++charset;
+                            if ((p = strstr(charset, "\""))) {
+                                *p = '\0';
+                            }
+                        }
+                        else {
+                            if ((p = strstr(charset, ";"))) {
+                                *p = '\0';
+                            }
+                        }
+                    }
                     msgc.boundary = boundary;
+                    msgc.charset = charset;
                     p = strstr(dup, ";");
                     if (p) {
                         *p = '\0';
@@ -913,6 +934,7 @@ MBOX_LIST *mbox_load_index(request_rec *
         curMsg->from = apr_pstrdup(r->pool, msgc.from);
         curMsg->subject = apr_pstrdup(r->pool, msgc.subject);
         curMsg->content_type = apr_pstrdup(r->pool, msgc.content_type);
+        curMsg->charset = apr_pstrdup(r->pool, msgc.charset);
         curMsg->boundary = apr_pstrdup(r->pool, msgc.boundary);
         curMsg->date = msgc.date;
         curMsg->raw_ref = apr_pstrdup(r->pool, msgc.references);
@@ -980,6 +1002,7 @@ Message *mbox_fetch_index(request_rec *r
     curMsg->from = apr_pstrdup(r->pool, msgc.from);
     curMsg->subject = apr_pstrdup(r->pool, msgc.subject);
     curMsg->content_type = apr_pstrdup(r->pool, msgc.content_type);
+    curMsg->charset = apr_pstrdup(r->pool, msgc.charset);
     curMsg->boundary = apr_pstrdup(r->pool, msgc.boundary);
     curMsg->date = msgc.date;
     curMsg->raw_ref = apr_pstrdup(r->pool, msgc.references);
@@ -1033,13 +1056,13 @@ static apr_table_t *fetch_first_headers(
 {
     apr_status_t status;
     apr_table_t *table = NULL;
+    MBOX_BUFF b;
 #ifdef APR_HAS_MMAP
     apr_finfo_t fi;
+    const char *temp;
 #else
     char buf[HUGE_STRING_LEN + 1];
 #endif
-    MBOX_BUFF b;
-    const char *temp;
 
 #ifdef APR_HAS_MMAP
     status = apr_file_name_get(&temp, f);

Modified: httpd/mod_mbox/trunk/module-2.0/mbox_parse.h
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mbox_parse.h?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mbox_parse.h (original)
+++ httpd/mod_mbox/trunk/module-2.0/mbox_parse.h Sat Apr 14 09:30:48 2012
@@ -118,6 +118,7 @@ typedef struct mbox_mime_message
     char *boundary;
 
     char *content_type;
+    char *charset;
     char *content_encoding;
     char *content_disposition;
     char *content_name;
@@ -142,6 +143,7 @@ struct Message_Struct
     char *rfc822_date;
 
     char *content_type;
+    char *charset;
     char *boundary;
     mbox_cte_e cte;
 

Modified: httpd/mod_mbox/trunk/module-2.0/mbox_thread.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mbox_thread.c?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mbox_thread.c (original)
+++ httpd/mod_mbox/trunk/module-2.0/mbox_thread.c Sat Apr 14 09:30:48 2012
@@ -59,13 +59,13 @@ static int is_reply(Message *m)
  */
 static char *strip_subject(apr_pool_t *p, Message *m)
 {
-    char *newVal = m->subject, *match = m->subject, *tmp = 0;
+    char *newVal, *match = m->subject, *tmp = NULL;
 
     /* Match the following cases: Re:, RE:, RE[1]:, Re: Re[2]: Re: */
     while (match && *match) {
         /* When we don't have a match, tmp contains the "real" subject. */
         tmp = newVal = match;
-        match = 0;
+        match = NULL;
         if (*newVal == 'R' && (*++newVal == 'e' || *newVal == 'E')) {
             /* Note to self.  In pure compressed syntax, the famous dangling
              * else occurs.  Oh, well. */

Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox.c?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mod_mbox.c (original)
+++ httpd/mod_mbox/trunk/module-2.0/mod_mbox.c Sat Apr 14 09:30:48 2012
@@ -177,28 +177,29 @@ char *mbox_wrap_text(char *str)
     return str;
 }
 
-char *mbox_ascii_escape(apr_pool_t *p, const char *s)
+/* Escape control chars */
+char *mbox_cntrl_escape(apr_pool_t *p, char *s)
 {
     int i, j;
     char *x;
 
     /* first, count the number of extra characters */
     for (i = 0, j = 0; s[i] != '\0'; i++)
-        if (!apr_isascii(s[i]))
+        if (apr_iscntrl(s[i]))
             j += 5;
 
     if (j == 0)
-        return apr_pstrmemdup(p, s, i);
+        return s;
 
     x = apr_palloc(p, i + j + 1);
-    for (i = 0, j = 0; s[i] != '\0'; i++, j++)
-        if (!apr_isascii(s[i])) {
-            char *esc = apr_psprintf(p, "&#%3.3d;", (unsigned char)s[i]);
-            memcpy(&x[j], esc, 6);
+    for (i = 0, j = 0; s[i] != '\0'; i++, j++) {
+        if (apr_iscntrl(s[i])) {
+            snprintf(&x[j], 7, "&#%3.3d;", (unsigned char)s[i]);
             j += 5;
         }
         else
             x[j] = s[i];
+    }
 
     x[j] = '\0';
     return x;

Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox.h
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox.h?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mod_mbox.h (original)
+++ httpd/mod_mbox/trunk/module-2.0/mod_mbox.h Sat Apr 14 09:30:48 2012
@@ -24,8 +24,10 @@
 #include "http_protocol.h"
 #include "http_request.h"
 #include "util_script.h"
+#include "util_varbuf.h"
 
 #include "apr_date.h"
+#include "apr_lib.h"
 #include "apr_strings.h"
 #include "apr_dbm.h"
 #include "apr_hash.h"
@@ -120,17 +122,20 @@ apr_size_t mbox_cte_decode_qp(char *p);
 apr_size_t mbox_cte_decode_b64(char *src);
 apr_size_t mbox_cte_escape_html(apr_pool_t *p, const char *s,
                                 apr_size_t len, char **body);
-char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src);
 char *mbox_cte_decode_header(apr_pool_t *p, char *src);
+apr_status_t mbox_cte_convert_to_utf8(apr_pool_t *p, const char *charset,
+                                      const char *src, apr_size_t len,
+                                      struct ap_varbuf *vb);
 
 /* MIME decoding functions */
-mbox_mime_message_t *mbox_mime_decode_multipart(apr_pool_t *p,
+mbox_mime_message_t *mbox_mime_decode_multipart(request_rec *r, apr_pool_t *p,
                                                 char *body, char *ct,
+                                                char *charset,
                                                 mbox_cte_e cte,
                                                 char *boundary);
 char *mbox_mime_decode_body(apr_pool_t *p, mbox_cte_e cte, char *body,
                             apr_size_t len, apr_size_t *ret_len);
-char *mbox_mime_get_body(apr_pool_t *p, mbox_mime_message_t *m);
+char *mbox_mime_get_body(request_rec *r, apr_pool_t *p, mbox_mime_message_t *m);
 void mbox_mime_display_static_structure(request_rec *r,
                                         mbox_mime_message_t *m,
                                         char *link);
@@ -139,18 +144,18 @@ void mbox_mime_display_xml_structure(req
 
 /* Utility functions */
 char *mbox_wrap_text(char *str);
-char *mbox_ascii_escape(apr_pool_t *p, const char *s);
+char *mbox_cntrl_escape(apr_pool_t *p, char *s);
 const char *get_base_path(request_rec *r);
 const char *get_base_uri(request_rec *r);
 const char *get_base_name(request_rec *r);
 
-#if AP_MODULE_MAGIC_AT_LEAST(20081231,0)
+/* XXX This should enforce that the result is valid UTF-8 */
 #define ESCAPE_OR_BLANK(pool, s) \
-(s ? ap_escape_html2(pool, s, 1) : "")
-#else
-#define ESCAPE_OR_BLANK(pool, s) \
-(s ? mbox_ascii_escape(pool, ap_escape_html(pool, s)) : "")
-#endif
+(s ? mbox_cntrl_escape(pool, ap_escape_html(pool, s)) : "")
+
+/* XXX This should enforce that the result is valid UTF-8 */
+#define ESCAPE_AND_CONV_HDR(pool, s) \
+(s ? mbox_cntrl_escape(pool, ap_escape_html(pool, mbox_cte_decode_header(pool, s))) : "")
 
 #define URI_ESCAPE_OR_BLANK(pool, s) \
 (s ? ap_escape_uri(pool, s) : "")

Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c (original)
+++ httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c Sat Apr 14 09:30:48 2012
@@ -80,6 +80,7 @@ const char *mbox_cte_to_char(mbox_cte_e 
 
 /* Unlike the original ap_escape_html, this one is also binary
  * safe.
+ * The result is always NUL-terminated
  */
 apr_size_t mbox_cte_escape_html(apr_pool_t *p, const char *s,
                                 apr_size_t len, char **body)
@@ -107,7 +108,7 @@ apr_size_t mbox_cte_escape_html(apr_pool
     /* Otherwise, we have some extra characters to insert : allocate
        enough space for them, and process the data. */
     else {
-        x = apr_palloc(p, i + j);
+        x = apr_palloc(p, i + j + 1);
 
         for (i = 0, j = 0; i < len; i++, j++) {
             if (s[i] == '<') {
@@ -126,6 +127,7 @@ apr_size_t mbox_cte_escape_html(apr_pool
                 x[j] = s[i];
             }
         }
+	x[j] = '\0';
     }
 
     *body = x;
@@ -244,53 +246,94 @@ apr_size_t mbox_cte_decode_qp(char *p)
     return len;
 }
 
+apr_status_t mbox_cte_convert_to_utf8(apr_pool_t *p, const char *charset,
+                                      const char *src, apr_size_t len,
+                                      struct ap_varbuf *vb)
+{
+    apr_xlate_t *convset;
+    apr_status_t rv;
+    apr_size_t outbytes_left, inbytes_left = len;
+    char *dst;
+    if (len <= 0)
+        return APR_SUCCESS;
+    rv = apr_xlate_open(&convset, "UTF-8", charset, p);
+    if (rv != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
+                     "could not open convset '%s'", charset);
+        return rv;
+    }
+    ap_log_error(APLOG_MARK, APLOG_TRACE6, rv, ap_server_conf,
+                 "using convset %s", charset);
+
+    while (inbytes_left > 0) {
+        ap_varbuf_grow(vb, vb->strlen + inbytes_left + 8);
+        dst = vb->buf + vb->strlen;
+        outbytes_left = vb->avail - vb->strlen;
+        rv = apr_xlate_conv_buffer(convset, src + len - inbytes_left, &inbytes_left,
+                                   dst, &outbytes_left);
+        if (rv != APR_SUCCESS) {
+            *dst = '\0';
+            goto out;
+        }
+        vb->strlen = vb->avail - outbytes_left;
+    }
+    ap_varbuf_grow(vb, vb->strlen + 8);
+    outbytes_left = vb->avail - vb->strlen;
+    dst = vb->buf + vb->strlen;
+    rv = apr_xlate_conv_buffer(convset, NULL, NULL, dst, &outbytes_left);
+    if (rv != APR_SUCCESS) {
+        *dst = '\0';
+        goto out;
+    }
+    vb->strlen = vb->avail - outbytes_left;
+    vb->buf[vb->strlen] = '\0';
+
+out:
+    apr_xlate_close(convset);
+    return rv;
+}
+
 /* This function performs the decoding of strings like :
  * =?UTF-8?B?QnJhbmtvIMSMaWJlag==?=
  *
  * These strings complies to the following syntax :
  * =?charset?mode?data?= rest
+ *
+ * Appends decoded string to vb, resturns
+ * position where to continue parsing.
  */
-char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src)
+static char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src, struct ap_varbuf *vb)
 {
-    apr_xlate_t *xlate;
-
     char *charset, *mode, *data, *rest;
     int i;
+    apr_status_t rv;
+    apr_size_t data_len;
 
-    /* Look for charset */
-    charset = strstr(src, "=?");
-    if (!charset) {
+    if (strncmp(src, "=?", 2) != 0)
         return src;
-    }
-    *charset = 0;
-    charset += strlen("=?");
+    charset = src + strlen("=?");
 
     /* Encoding mode (first '?' after charset) */
     mode = strstr(charset, "?");
     if (!mode) {
         return src;
     }
-    *mode = 0;
     mode++;
 
     /* Fetch data */
     data = strstr(mode, "?");
-    if (!data) {
+    if (!data || data != mode + 1)
         return src;
-    }
-    *data = 0;
     data++;
 
     /* Look for the end bound */
     rest = strstr(data, "?=");
-    if (!rest) {
+    if (!rest)
         return src;
-    }
-    *rest = 0;
+    data = apr_pstrmemdup(p, data, rest - data);
 
     /* Quoted-Printable decoding : mode 'q' */
     if ((*mode == 'q') || (*mode == 'Q')) {
-        apr_size_t data_len;
         int i;
 
         /* In QP header encoding, spaces are encoded either in =20 (as
@@ -304,77 +347,78 @@ char *mbox_cte_decode_rfc2047(apr_pool_t
         }
 
         data_len = mbox_cte_decode_qp(data);
-        data[data_len] = 0;
     }
     else if ((*mode == 'b') || (*mode == 'B')) {
-        apr_size_t data_len;
-
         data_len = mbox_cte_decode_b64(data);
-        data[data_len] = 0;
+    }
+    else {
+        return src;
     }
 
     /* Convert charset to uppercase */
+    charset = apr_pstrmemdup(p, charset, mode - charset - 1);
     for (i = 0; i < strlen(charset); i++) {
         charset[i] = toupper(charset[i]);
     }
 
     /* Charset conversion */
-    if (apr_xlate_open(&xlate, "UTF-8", charset, p) == APR_SUCCESS) {
-        apr_size_t inbytes_left, outbytes_left;
-        apr_size_t outbuf_len = strlen(data);
-
-        char *new_data;
-
-        /* Allocate some memory for our resulting data, and initialize
-           counters. */
-        new_data = apr_palloc(p, outbuf_len);
-        inbytes_left = strlen(data);
-        outbytes_left = strlen(data);
-
-        /* Convert */
-        //      apr_xlate_conv_buffer(xlate, data, &inbytes_left,
-        //                  new_data, &outbytes_left);
-
-        //      new_data[outbuf_len - outbytes_left] = 0;
-        //      data = new_data;
-
-        apr_xlate_close(xlate);
+    rv = mbox_cte_convert_to_utf8(p, charset, data, data_len, vb);
+    if (rv != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
+                     "conversion from %s to utf-8 failed", charset);
+        *rest = '?';
+        return src;
     }
-
-    return data;
+    return rest + strlen("?=");;
 }
 
 /* MIME header decoding (see RFC 2047). */
 char *mbox_cte_decode_header(apr_pool_t *p, char *src)
 {
-    char *start, *end, *part;
-    char *result = "";
+    char *start, *cont;
+    struct ap_varbuf vb;
+    int seen_encoded_word = 0;
+    if (src == NULL || *src == '\0')
+        return "";
+    ap_varbuf_init(p, &vb, 0);
+    vb.strlen = 0;
 
     do {
-        char c;
-
         start = strstr(src, "=?");
         if (!start) {
-            result = apr_psprintf(p, "%s%s", result, src);
-            return result;
+            if (vb.strlen == 0)
+                return src;
+            return apr_pstrcat(p, vb.buf, src, NULL);
         }
 
-        end = strstr(start, "?=");
-        if (!end) {
-            result = apr_psprintf(p, "%s%s", result, src);
-            return result;
+        if (start != src) {
+            if (seen_encoded_word) {
+                /* space between consecutive encoded words must be discarded */
+                char *p = src;
+                while (p < start && apr_isspace(*p))
+                    p++;
+                if (p == start)
+                    src = start;
+                /* XXX: this is wrong if the next encoded word fails to decode */
+            }
+            if (start != src) {
+                ap_varbuf_strmemcat(&vb, src, start - src);
+                seen_encoded_word = 0;
+            }
         }
 
-        c = *start;
-        *start = 0;
-        result = apr_psprintf(p, "%s%s", result, src);
-        *start = c;
-
-        part = mbox_cte_decode_rfc2047(p, start);
-        result = apr_psprintf(p, "%s%s", result, part);
-
-        src = end + 2;
+        cont = mbox_cte_decode_rfc2047(p, start, &vb);
+        if (cont == start) {
+            /* decoding failed, copy start delimiter and continue */
+            ap_varbuf_strmemcat(&vb, start, 2);
+            src = start + 2;
+        }
+        else {
+            src = cont;
+            seen_encoded_word = 1;
+        }
     } while (src && *src);
 
-    return result;
+    /* vb.buf is pool memory */
+    return vb.buf;
 }

Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c (original)
+++ httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c Sat Apr 14 09:30:48 2012
@@ -18,11 +18,99 @@
  */
 
 #include "mod_mbox.h"
+#include <apr_lib.h>
 
 #ifdef APLOG_USE_MODULE
 APLOG_USE_MODULE(mbox);
 #endif
 
+/**
+ * find certain header line, return copy of first part (up to first ";")
+ * @param p pool to allocate from
+ * @param name name of the header
+ * @param string input: pointer to pointer string where to find the header;
+ *        output: pointer to the ";" or "\n" after the copied value
+ * @param end pointer where to stop searching
+ * @note string must be NUL-terminated (but the NUL may be after *end)
+ * @return copy of the header value or NULL if not found
+ */
+static char *mbox_mime_get_header(apr_pool_t *p, const char *name,
+                                  char **string, const char *end)
+{
+    char *ptr;
+    int namelen = strlen(name);
+    for (ptr = *string;
+         ptr && *ptr && ptr < end ;
+         ptr = ap_strchr(ptr + 1, '\n'))
+    {
+        int l;
+        if (strncasecmp(ptr, name, namelen) != 0)
+            continue;
+        ptr += namelen;
+        if (*ptr != ':')
+            continue;
+        ptr++;
+        while (*ptr == ' ')
+            ptr++;
+        if (ptr >= end)
+            break;
+        l = strcspn(ptr, ";\n");
+        *string = ptr + l;
+        while (apr_isspace(ptr[l]) && l > 0)
+            l--;
+        return apr_pstrndup(p, ptr, l);
+    }
+    return NULL;
+}
+
+/**
+ * find value for parameter with certain name
+ * @param p pool to allocate from
+ * @param name name of the attribute
+ * @param string string with name=value pairs separated by ";",
+ *        value may be a quoted string delimited by double quotes
+ * @param end pointer where to stop searching
+ * @note string must be NUL-terminated (but the NUL may be after *end)
+ * @return copy of the value, NULL if not found
+ */
+static char *mbox_mime_get_parameter(apr_pool_t *p, const char *name,
+                                     const char *string, const char *end)
+{
+    const char *ptr = string;
+    int namelen = strlen(name);
+    while (ptr && *ptr && ptr < end) {
+        int have_match = 0;
+        const char *val_end;
+        while (*ptr && apr_isspace(*ptr))
+            ptr++;
+        if (strncasecmp(ptr, name, namelen) == 0) {
+            ptr += strlen(name);
+            while (*ptr && apr_isspace(*ptr) && ptr < end)
+                ptr++;
+            if (*ptr == '=') {
+                have_match = 1;
+                ptr++;
+                if (ptr >= end)
+                    break;
+                while (*ptr && apr_isspace(*ptr) && ptr < end)
+                    ptr++;
+            }
+        }
+        if (!have_match)
+            ptr += strcspn(ptr, "= \t");
+        if (*ptr == '"')
+            val_end = ap_strchr_c(++ptr, '"');
+        else
+            val_end = ptr + strcspn(ptr, ";\n ");
+        if (!val_end || val_end > end)
+            val_end = end;
+        if (have_match)
+            return apr_pstrmemdup(p, ptr, val_end - ptr);
+        ptr = val_end + 1;
+    }
+    return NULL;
+}
+
 static apr_status_t cleanup_mime_msg(void *data)
 {
     mbox_mime_message_t *mail = data;
@@ -33,15 +121,17 @@ static apr_status_t cleanup_mime_msg(voi
 /* Decode a multipart (or not) email. In order to support multiple
  * levels of MIME parts, this function is recursive.
  */
-mbox_mime_message_t *mbox_mime_decode_multipart(apr_pool_t *p, char *body,
-                                                char *ct, mbox_cte_e cte,
-                                                char *boundary)
+mbox_mime_message_t *mbox_mime_decode_multipart(request_rec *r, apr_pool_t *p, char *body,
+                                                char *ct, char *charset,
+                                                mbox_cte_e cte, char *boundary)
 {
     mbox_mime_message_t *mail;
-    char *tmp = NULL, *k = NULL, *end_bound = NULL;
+    char *tmp = NULL, *end_bound = NULL;
     char *headers_bound = NULL;
 
     if (!body) {
+        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                      "mbox_mime_decode_multipart: no body");
         return NULL;
     }
 
@@ -49,6 +139,8 @@ mbox_mime_message_t *mbox_mime_decode_mu
     if (!ct) {
         headers_bound = ap_strstr(body, "\n\n");
         if (!headers_bound) {
+            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                          "no '\\n\\n' header separator found");
             return NULL;
         }
     }
@@ -56,142 +148,59 @@ mbox_mime_message_t *mbox_mime_decode_mu
         headers_bound = body;
     }
 
-    /* If no Content-Type is provided, it means that we are parsing a
-       sub-part of the multipart message. The Content-Type header
-       should then be the first line of the part. If not, use
-       text/plain as default for the sub-part. */
-    tmp = ap_strstr(body, "Content-Type: ");
-    if (!ct && (!tmp || tmp > headers_bound)) {
-        ct = "text/plain";
-    }
-
     mail = apr_pcalloc(p, sizeof(mbox_mime_message_t));
     /* make sure the memory allocated by realloc() below is cleaned up */
     apr_pool_cleanup_register(p, mail, cleanup_mime_msg, apr_pool_cleanup_null);
 
-    /* If no Content-Type is given, we have to look for it. */
     if (!ct) {
-        tmp += sizeof("Content-Type: ") - 1;
-        k = strchr(tmp, ';');
-
-        /* Isolate the Content-Type string (between 'Content-Type: '
-           and ';' or end of line */
-        if (k && k < headers_bound) {
-            *k = 0;
+        /* If no Content-Type is provided, it means that we are parsing a
+         * sub-part of the multipart message. The Content-Type header
+         * should then be the first line of the part. If not, use
+         * text/plain as default for the sub-part.
+         */
+        tmp = body;
+        ct = mbox_mime_get_header(p, "Content-Type", &tmp, headers_bound);
+        if (!ct) {
+            ct = "text/plain";
         }
         else {
-            k = tmp;
-            while (*k) {
-                if (isspace(*k)) {
-                    *k = 0;
-                    break;
-                }
-                k++;
-            }
-        }
-
-        /* Copy the Content-Type and reset *k */
-        mail->content_type = apr_pstrdup(p, tmp);
-        *k = ';';
-
-        /* If available, get MIME part name */
-        tmp = ap_strstr(body, "name=");
-        if (tmp && tmp < headers_bound) {
-            char c = '\0';
-            tmp += sizeof("name=") - 1;
-            k = tmp;
-
-            while (*k) {
-                if (isspace(*k) || *k == ';') {
-                    c = *k;
-                    *k = 0;
-                    break;
-                }
-                k++;
-            }
-
-            /* Check for double quotes */
-            if ((*tmp == '"') && (tmp[strlen(tmp) - 1] == '"')) {
-                mail->content_name =
-                    apr_pstrndup(p, tmp + 1, strlen(tmp) - 2);
-            }
-            else {
-                mail->content_name = apr_pstrdup(p, tmp);
-            }
-
-            *k = c;
+            if (!charset)
+                charset = mbox_mime_get_parameter(p, "charset", tmp, headers_bound);
+            mail->content_name = mbox_mime_get_parameter(p, "name", tmp, headers_bound);
         }
+        mail->content_type = ct;
     }
     else {
         mail->content_type = ct;
+        if (!charset)
+            charset = mbox_mime_get_parameter(p, "charset", ct, ct + strlen(ct));
     }
+    mail->charset = charset;
 
     /* Now we have a Content-Type. Look for other useful header information */
 
     /* Check Content-Disposition if the match is within the headers */
-    tmp = ap_strstr(body, "Content-Disposition: ");
-    if (tmp && tmp < headers_bound) {
-        char c = '\0';
-        tmp += sizeof("Content-Disposition: ") - 1;
-        k = tmp;
-
-        while (*k) {
-            if (isspace(*k) || *k == ';') {
-                c = *k;
-                *k = 0;
-                break;
-            }
-            k++;
-        }
-
-        /* Copy the Content-Disposition and reset *k */
-        mail->content_disposition = apr_pstrdup(p, tmp);
-        *k = c;
-    }
-    else {
-        mail->content_disposition = apr_pstrdup(p, "inline");
-    }
+    tmp = body;
+    mail->content_disposition = mbox_mime_get_header(p, "Content-Disposition", &tmp, headers_bound);
+    if (!mail->content_disposition)
+        mail->content_disposition = "inline";
 
     /* Check Content-Transfer-Encoding, if needed */
     if (cte == CTE_NONE) {
-        tmp = ap_strstr(body, "Content-Transfer-Encoding: ");
-        if (tmp && tmp < headers_bound) {
-            char c = '\0';
-            tmp += sizeof("Content-Transfer-Encoding: ") - 1;
-            k = tmp;
-
-            while (*k) {
-                if (isspace(*k) || *k == ';') {
-                    c = *k;
-                    *k = 0;
-                    break;
-                }
-                k++;
-            }
-
-            /* Copy the Content-Disposition and reset *k */
+        tmp = body;
+        tmp = mbox_mime_get_header(p, "Content-Transfer-Encoding", &tmp, headers_bound);
+        if (tmp)
             mail->cte = mbox_parse_cte_header(tmp);
-            *k = c;
-        }
     }
     else {
         mail->cte = cte;
     }
 
-    /* Now we have all the headers we need. Start processing the
-       body. If the Content-Type was given at call time, the body
-       starts where it's given. Otherwise it's after the headers
-       (first new empty line) */
-
-    if (ct) {
+    /* Now we have all the headers we need. Start processing the body */
+    if (headers_bound == body)
         mail->body = body;
-    }
-    else {
-        mail->body = ap_strstr(body, "\n\n");
-        if (mail->body != NULL) {
-            mail->body += 2;
-        }
-    }
+    else
+        mail->body = headers_bound + 2; /* skip double new line */
 
     /* If the mail is a multipart message, search for the boundary,
        and process its sub parts by recursive calls. */
@@ -201,30 +210,16 @@ mbox_mime_message_t *mbox_mime_decode_mu
 
         /* If the boundary was not given, we must look for it in the headers */
         if (!boundary) {
-            tmp = ap_strstr(body, "boundary=\"");
-            if (!tmp) {
+            boundary = mbox_mime_get_parameter(p, "boundary", body, headers_bound);
+            if (!boundary) {
+                ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                              "invalid multipart message: no boundary defined");
                 return NULL;
             }
-
-            tmp += sizeof("boundary=\"") - 1;
-            k = tmp;
-
-            while (*k) {
-                if (*k == '"') {
-                    *k = 0;
-                    break;
-                }
-                k++;
-            }
-
-            mail->boundary = apr_pstrdup(p, tmp);
-            *k = '"';
-        }
-
-        /* Otherwise, the boundary is as given to us */
-        else {
-            mail->boundary = boundary;
         }
+        mail->boundary = boundary;
+        ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
+                      "decoding multipart message: boundary %s", boundary);
 
         /* Now we have our boundary string. We must : look for it once
            (begining of MIME part) and then look for the end boundary :
@@ -264,8 +259,10 @@ mbox_mime_message_t *mbox_mime_decode_mu
             /* Allocate a new pointer for the sub part, and parse it. */
             mail->sub =
                 realloc(mail->sub, ++count * sizeof(struct mimemsg *));
+            ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r,
+                          "decoding part %d", count);
             mail->sub[count - 1] =
-                mbox_mime_decode_multipart(p, search, NULL, CTE_NONE, NULL);
+                mbox_mime_decode_multipart(r, p, search, NULL, NULL, CTE_NONE, NULL);
 
             /* If the boudary is found again, it means we have another
                MIME part in the same multipart message. Set the new
@@ -286,6 +283,9 @@ mbox_mime_message_t *mbox_mime_decode_mu
 
         /* Finally reset the end-body pointer. */
         //      *tmp = '-';
+        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                      "done decoding multipart message (boundary %s)",
+                      boundary);
     }
 
     /* If the parsed body is not multipart or is a MIME part, the body
@@ -330,43 +330,74 @@ char *mbox_mime_decode_body(apr_pool_t *
     return new_body;
 }
 
-
 /* This function returns the relevant MIME part from a message. For
  * the moment, it just returns the first text/ MIME part available.
  */
-char *mbox_mime_get_body(apr_pool_t *p, mbox_mime_message_t *m)
+char *mbox_mime_get_body(request_rec *r, apr_pool_t *p, mbox_mime_message_t *m)
 {
     int i;
 
     /* If the message structure or the message body is empty, just
        return NULL */
     if (!m || !m->body) {
+        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                      "mbox_mime_get_body: %s",
+                      m == NULL ? "no message???" : "no body");
         return MBOX_FETCH_ERROR_STR;
     }
 
     if (strncasecmp(m->content_type, "text/", strlen("text/")) == 0) {
         char *new_body;
-
-        new_body =
-            mbox_mime_decode_body(p, m->cte, m->body, m->body_len, NULL);
+        apr_size_t new_len;
+        new_body = mbox_mime_decode_body(p, m->cte, m->body, m->body_len,
+                                         &new_len);
         if (!new_body) {
+            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                          "mbox_mime_get_body: could not decode body");
             return MBOX_FETCH_ERROR_STR;
         }
 
-        m->body_len =
-            mbox_cte_escape_html(p, new_body, m->body_len, &(m->body));
+        if (m->charset) {
+            struct ap_varbuf vb;
+            apr_status_t rv;
+            ap_varbuf_init(p, &vb, 0);
+            vb.strlen = 0;
+            ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
+                          "mbox_mime_get_body: converting %d bytes from %s",
+                          new_len, m->charset);
+            if ((rv = mbox_cte_convert_to_utf8(p, m->charset, new_body, new_len, &vb))
+                == APR_SUCCESS) {
+                new_body = vb.buf;
+                new_len = vb.strlen + 1;
+            }
+            else {
+                ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
+                             "conversion from '%s' to utf-8 failed", m->charset);
+            }
+            ap_log_rerror(APLOG_MARK, APLOG_TRACE4, 0, r,
+                          "mbox_mime_get_body: conversion done");
+        }
 
-        return apr_pstrndup(p, m->body, m->body_len);
+        mbox_cte_escape_html(p, new_body, new_len, &new_body);
+        return new_body;
     }
 
     if (!m->sub) {
+        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                      "mbox_mime_get_body: message not text/* and no sub parts");
         return MBOX_FETCH_ERROR_STR;
     }
 
     for (i = 0; i < m->sub_count; i++) {
-        return mbox_mime_get_body(p, m->sub[i]);
+        /* XXX this loop is bullshit, should check result of mbox_mime_get_body()  */
+        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                      "mbox_mime_get_body: choosing m->sub[%d]", i);
+        return mbox_mime_get_body(r, p, m->sub[i]);
     }
 
+    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                  "m->sub != NULL but m->subcount == 0 ???");
+
     return MBOX_FETCH_ERROR_STR;
 }
 
@@ -390,10 +421,12 @@ void mbox_mime_display_static_structure(
     }
 
     if (m->content_name) {
-        ap_rprintf(r, "%s", m->content_name);
+        ap_rprintf(r, "%s (%s)",
+                   ESCAPE_OR_BLANK(r->pool, m->content_name),
+                   ESCAPE_OR_BLANK(r->pool, m->content_type));
     }
     else {
-        ap_rprintf(r, "Unnamed %s", m->content_type);
+        ap_rprintf(r, "Unnamed %s", ESCAPE_OR_BLANK(r->pool, m->content_type));
     }
 
     if (m->body_len) {

Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c (original)
+++ httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c Sat Apr 14 09:30:48 2012
@@ -49,9 +49,9 @@ static void display_atom_entry(request_r
     char *c;
 
     ap_rputs("<entry>\n", r);
-    ap_rprintf(r, "<title>%s</title>\n", ESCAPE_OR_BLANK(pool, m->subject));
+    ap_rprintf(r, "<title>%s</title>\n", ESCAPE_AND_CONV_HDR(pool, m->subject));
     ap_rprintf(r, "<author><name>%s</name></author>\n",
-               ESCAPE_OR_BLANK(pool, m->from));
+               ESCAPE_AND_CONV_HDR(pool, m->from));
 
     ap_rprintf(r, "<link rel=\"alternate\" href=\"%s%s/%s\"/>\n",
                ap_construct_url(r->pool, r->uri, r),
@@ -79,12 +79,13 @@ static void display_atom_entry(request_r
 
     load_message(pool, f, m);
     /* Parse multipart information */
-    m->mime_msg = mbox_mime_decode_multipart(pool, m->raw_body,
+    m->mime_msg = mbox_mime_decode_multipart(r, pool, m->raw_body,
                                              m->content_type,
+                                             m->charset,
                                              m->cte, m->boundary);
 
     ap_rprintf(r, "%s",
-               mbox_ascii_escape(pool, mbox_wrap_text(mbox_mime_get_body(pool, m->mime_msg))));
+               mbox_cntrl_escape(pool, mbox_wrap_text(mbox_mime_get_body(r, pool, m->mime_msg))));
 
     ap_rputs("\n</pre>\n</div>\n</content>\n", r);
     ap_rputs("</entry>\n", r);
@@ -381,7 +382,11 @@ apr_status_t mbox_static_index_boxlist(r
     return APR_SUCCESS;
 }
 
-/* Antispam protection */
+/* Antispam protection,
+ * proper order is:
+ * apply mbox_cte_decode_header(), then email_antispam(), then
+ * ESCAPE_OR_BLANK()
+ */
 static char *email_antispam(char *email)
 {
     char *tmp;
@@ -418,22 +423,22 @@ static void display_static_msglist_entry
 {
     mbox_dir_cfg_t *conf;
 
-    char *from;
+    char *tmp;
     int i;
 
     conf = ap_get_module_config(r->per_dir_config, &mbox_module);
 
-    from = ESCAPE_OR_BLANK(r->pool, m->str_from);
-    from = mbox_cte_decode_header(r->pool, from);
-    if (conf->antispam) {
-        from = email_antispam(from);
-    }
-
     /* Message author */
     ap_rputs("   <tr>\n", r);
 
+    tmp = mbox_cte_decode_header(r->pool, m->str_from);
+    if (conf->antispam) {
+        tmp = email_antispam(tmp);
+    }
+    tmp = ESCAPE_OR_BLANK(r->pool, tmp);
+
     if (linked) {
-        ap_rprintf(r, "    <td class=\"author\">%s</td>\n", from);
+        ap_rprintf(r, "    <td class=\"author\">%s</td>\n", tmp);
     }
     else {
         ap_rputs("    <td class=\"author\"></td>\n", r);
@@ -450,7 +455,7 @@ static void display_static_msglist_entry
                    URI_ESCAPE_OR_BLANK(r->pool, m->msgID));
     }
 
-    ap_rprintf(r, "%s", ESCAPE_OR_BLANK(r->pool, m->subject));
+    ap_rprintf(r, "%s", ESCAPE_AND_CONV_HDR(r->pool, m->subject));
     if (linked) {
         ap_rputs("</a>", r);
     }
@@ -482,17 +487,17 @@ static void display_xml_msglist_entry(re
     if (conf->antispam) {
         from = email_antispam(from);
     }
+    from = ESCAPE_OR_BLANK(r->pool, from);
 
     ap_rprintf(r, " <message linked=\"%d\" depth=\"%d\" id=\"%s\">\n",
                linked, depth, ESCAPE_OR_BLANK(r->pool, m->msgID));
 
-    ap_rprintf(r, "  <from><![CDATA[%s]]></from>\n",
-               ESCAPE_OR_BLANK(r->pool, from));
+    ap_rprintf(r, "  <from><![CDATA[%s]]></from>\n", from);
     ap_rprintf(r, "  <date><![CDATA[%s]]></date>\n",
                ESCAPE_OR_BLANK(r->pool, m->str_date));
 
     ap_rprintf(r, "  <subject><![CDATA[%s]]></subject>\n",
-               ESCAPE_OR_BLANK(r->pool, m->subject));
+               ESCAPE_AND_CONV_HDR(r->pool, m->subject));
     ap_rprintf(r, " </message>\n");
 }
 
@@ -991,8 +996,9 @@ int mbox_raw_message(request_rec *r, apr
 
     /* First, parse the MIME structure, and look for the correct
        subpart */
-    m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body,
+    m->mime_msg = mbox_mime_decode_multipart(r, r->pool, m->raw_body,
                                              m->content_type,
+                                             m->charset,
                                              m->cte, m->boundary);
 
     mime_part = m->mime_msg;
@@ -1111,7 +1117,7 @@ int mbox_static_message(request_rec *r, 
     Message *m;
 
     const char *baseURI;
-    char *from, **context, *msgID, *escaped_msgID;
+    char *from, **context, *msgID, *escaped_msgID, *subject;
 
     conf = ap_get_module_config(r->per_dir_config, &mbox_module);
     baseURI = get_base_uri(r);
@@ -1132,12 +1138,13 @@ int mbox_static_message(request_rec *r, 
     }
 
     /* Parse multipart information */
-    m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body,
+    m->mime_msg = mbox_mime_decode_multipart(r, r->pool, m->raw_body,
                                              m->content_type,
+                                             m->charset,
                                              m->cte, m->boundary);
 
-    send_page_header(r,
-                     ESCAPE_OR_BLANK(r->pool, m->subject),
+    subject = ESCAPE_AND_CONV_HDR(r->pool, m->subject);
+    send_page_header(r, subject,
                      apr_psprintf(r->pool, "%s mailing list archives",
                                   get_base_name(r)),
                      0);
@@ -1153,11 +1160,11 @@ int mbox_static_message(request_rec *r, 
                "List index</a></h5>", get_base_path(r));
 
     /* Display context message list */
-    from = ESCAPE_OR_BLANK(r->pool, m->from);
-    from = mbox_cte_decode_header(r->pool, from);
+    from = mbox_cte_decode_header(r->pool, m->from);
     if (conf->antispam) {
         from = email_antispam(from);
     }
+    from = ESCAPE_OR_BLANK(r->pool, from);
 
     ap_rputs("  <table class=\"static\" id=\"msgview\">\n", r);
 
@@ -1186,7 +1193,7 @@ int mbox_static_message(request_rec *r, 
     ap_rprintf(r, "   <tr class=\"subject\">\n"
                "    <td class=\"left\">Subject</td>\n"
                "    <td class=\"right\">%s</td>\n"
-               "   </tr>\n", ESCAPE_OR_BLANK(r->pool, m->subject));
+               "   </tr>\n", subject);
 
     ap_rprintf(r, "   <tr class=\"date\">\n"
                "    <td class=\"left\">Date</td>\n"
@@ -1196,7 +1203,7 @@ int mbox_static_message(request_rec *r, 
     /* Message body */
     ap_rputs("   <tr class=\"contents\"><td colspan=\"2\"><pre>\n", r);
     ap_rprintf(r, "%s",
-               mbox_wrap_text(mbox_mime_get_body(r->pool, m->mime_msg)));
+               mbox_wrap_text(mbox_mime_get_body(r, r->pool, m->mime_msg)));
     ap_rputs("</pre></td></tr>\n", r);
 
     /* MIME structure */
@@ -1228,8 +1235,7 @@ apr_status_t mbox_xml_message(request_re
 {
     mbox_dir_cfg_t *conf;
     Message *m;
-
-    char *from, *msgID;
+    char *from, *subj, *msgID;
 
     conf = ap_get_module_config(r->per_dir_config, &mbox_module);
 
@@ -1243,8 +1249,9 @@ apr_status_t mbox_xml_message(request_re
     }
 
     /* Parse multipart information */
-    m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body,
+    m->mime_msg = mbox_mime_decode_multipart(r, r->pool, m->raw_body,
                                              m->content_type,
+                                             m->charset,
                                              m->cte, m->boundary);
 
     ap_rputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", r);
@@ -1254,6 +1261,7 @@ apr_status_t mbox_xml_message(request_re
         from = email_antispam(from);
     }
     from = ESCAPE_OR_BLANK(r->pool, from);
+    subj = ESCAPE_AND_CONV_HDR(r->pool, m->subject);
 
     ap_rprintf(r, "<mail id=\"%s\">\n"
                " <from><![CDATA[%s]]></from>\n"
@@ -1261,12 +1269,11 @@ apr_status_t mbox_xml_message(request_re
                " <date><![CDATA[%s]]></date>\n"
                " <contents><![CDATA[",
                URI_ESCAPE_OR_BLANK(r->pool, m->msgID),
-               from,
-               ESCAPE_OR_BLANK(r->pool, m->subject),
+               from, subj,
                ESCAPE_OR_BLANK(r->pool, m->rfc822_date));
 
     ap_rprintf(r, "%s",
-               mbox_ascii_escape(r->pool, mbox_wrap_text(mbox_mime_get_body(r->pool, m->mime_msg))));
+               mbox_cntrl_escape(r->pool, mbox_wrap_text(mbox_mime_get_body(r, r->pool, m->mime_msg))));
     ap_rputs("]]></contents>\n", r);
     ap_rputs(" <mime>\n", r);
     mbox_mime_display_xml_structure(r, m->mime_msg,