You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by sf...@apache.org on 2012/04/14 11:30:49 UTC
svn commit: r1326076 - in /httpd/mod_mbox/trunk: ./ module-2.0/mbox_parse.c
module-2.0/mbox_parse.h module-2.0/mbox_thread.c module-2.0/mod_mbox.c
module-2.0/mod_mbox.h module-2.0/mod_mbox_cte.c module-2.0/mod_mbox_mime.c
module-2.0/mod_mbox_out.c
Author: sf
Date: Sat Apr 14 09:30:48 2012
New Revision: 1326076
URL: http://svn.apache.org/viewvc?rev=1326076&view=rev
Log:
Merge branch convert-charsets
Now, mails are converted to UTF-8.
Minimum httpd version is 2.3.15
Modified:
httpd/mod_mbox/trunk/ (props changed)
httpd/mod_mbox/trunk/module-2.0/mbox_parse.c
httpd/mod_mbox/trunk/module-2.0/mbox_parse.h
httpd/mod_mbox/trunk/module-2.0/mbox_thread.c
httpd/mod_mbox/trunk/module-2.0/mod_mbox.c
httpd/mod_mbox/trunk/module-2.0/mod_mbox.h
httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c
httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c
httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c
Propchange: httpd/mod_mbox/trunk/
------------------------------------------------------------------------------
Merged /httpd/mod_mbox/branches/convert-charsets:r1226496-1326075
Modified: httpd/mod_mbox/trunk/module-2.0/mbox_parse.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mbox_parse.c?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mbox_parse.c (original)
+++ httpd/mod_mbox/trunk/module-2.0/mbox_parse.c Sat Apr 14 09:30:48 2012
@@ -59,6 +59,7 @@ typedef struct mb_dbm_data
const char *subject;
const char *references;
const char *content_type;
+ const char *charset;
const char *boundary;
} mb_dbm_data;
@@ -67,10 +68,8 @@ typedef struct mb_dbm_data
*/
void mbox_fillbuf(MBOX_BUFF *fb)
{
- int len;
-
if (fb->fd) {
- len = strlen(fb->b);
+ int len = strlen(fb->b);
/* We are backed by a file descriptor.
* Read a new set of characters in.
@@ -507,6 +506,9 @@ static void normalize_message(request_re
if (!m->content_type || !*m->content_type)
m->content_type = "text/plain";
+ if (m->charset && !*m->charset)
+ m->charset = NULL;
+
apr_time_exp_gmt(&time_exp, m->date);
m->str_date = (char *) apr_pcalloc(r->pool, APR_RFC822_DATE_LEN);
@@ -571,6 +573,7 @@ static apr_status_t fetch_msgc(apr_pool_
fetch_cstring(pool, msgc->subject, msgValue.dptr, pos, tlen);
fetch_cstring(pool, msgc->references, msgValue.dptr, pos, tlen);
fetch_cstring(pool, msgc->content_type, msgValue.dptr, pos, tlen);
+ fetch_cstring(pool, msgc->charset, msgValue.dptr, pos, tlen);
fetch_cstring(pool, msgc->boundary, msgValue.dptr, pos, tlen);
return APR_SUCCESS;
@@ -627,6 +630,7 @@ static apr_status_t store_msgc(apr_pool_
sstrlen(msgc->subject) + sizeof(tlen) +
sstrlen(msgc->references) + sizeof(tlen) +
sstrlen(msgc->content_type) + sizeof(tlen) +
+ sstrlen(msgc->charset) + sizeof(tlen) +
sstrlen(msgc->boundary) + sizeof(tlen);
value = apr_palloc(pool, vlen);
@@ -648,6 +652,7 @@ static apr_status_t store_msgc(apr_pool_
store_cstring(msgc->subject, value, pos, tlen);
store_cstring(msgc->references, value, pos, tlen);
store_cstring(msgc->content_type, value, pos, tlen);
+ store_cstring(msgc->charset, value, pos, tlen);
store_cstring(msgc->boundary, value, pos, tlen);
msgValue.dptr = (char *) value;
@@ -807,11 +812,12 @@ apr_status_t mbox_generate_index(request
temp = apr_table_get(table, "Content-Type");
if (temp) {
- char *p, *boundary, *dup;
+ char *p, *boundary, *dup, *charset;
dup = apr_pstrdup(tpool, temp);
boundary = mbox_strcasestr(dup, "boundary=");
+ charset = mbox_strcasestr(dup, "charset=");
if (boundary) {
- msgc.boundary += sizeof("boundary=") - 1;
+ boundary += strlen("boundary=");
if (boundary[0] == '"') {
++boundary;
if ((p = strstr(boundary, "\""))) {
@@ -824,7 +830,22 @@ apr_status_t mbox_generate_index(request
}
}
}
+ if (charset) {
+ charset += strlen("charset=");
+ if (charset[0] == '"') {
+ ++charset;
+ if ((p = strstr(charset, "\""))) {
+ *p = '\0';
+ }
+ }
+ else {
+ if ((p = strstr(charset, ";"))) {
+ *p = '\0';
+ }
+ }
+ }
msgc.boundary = boundary;
+ msgc.charset = charset;
p = strstr(dup, ";");
if (p) {
*p = '\0';
@@ -913,6 +934,7 @@ MBOX_LIST *mbox_load_index(request_rec *
curMsg->from = apr_pstrdup(r->pool, msgc.from);
curMsg->subject = apr_pstrdup(r->pool, msgc.subject);
curMsg->content_type = apr_pstrdup(r->pool, msgc.content_type);
+ curMsg->charset = apr_pstrdup(r->pool, msgc.charset);
curMsg->boundary = apr_pstrdup(r->pool, msgc.boundary);
curMsg->date = msgc.date;
curMsg->raw_ref = apr_pstrdup(r->pool, msgc.references);
@@ -980,6 +1002,7 @@ Message *mbox_fetch_index(request_rec *r
curMsg->from = apr_pstrdup(r->pool, msgc.from);
curMsg->subject = apr_pstrdup(r->pool, msgc.subject);
curMsg->content_type = apr_pstrdup(r->pool, msgc.content_type);
+ curMsg->charset = apr_pstrdup(r->pool, msgc.charset);
curMsg->boundary = apr_pstrdup(r->pool, msgc.boundary);
curMsg->date = msgc.date;
curMsg->raw_ref = apr_pstrdup(r->pool, msgc.references);
@@ -1033,13 +1056,13 @@ static apr_table_t *fetch_first_headers(
{
apr_status_t status;
apr_table_t *table = NULL;
+ MBOX_BUFF b;
#ifdef APR_HAS_MMAP
apr_finfo_t fi;
+ const char *temp;
#else
char buf[HUGE_STRING_LEN + 1];
#endif
- MBOX_BUFF b;
- const char *temp;
#ifdef APR_HAS_MMAP
status = apr_file_name_get(&temp, f);
Modified: httpd/mod_mbox/trunk/module-2.0/mbox_parse.h
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mbox_parse.h?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mbox_parse.h (original)
+++ httpd/mod_mbox/trunk/module-2.0/mbox_parse.h Sat Apr 14 09:30:48 2012
@@ -118,6 +118,7 @@ typedef struct mbox_mime_message
char *boundary;
char *content_type;
+ char *charset;
char *content_encoding;
char *content_disposition;
char *content_name;
@@ -142,6 +143,7 @@ struct Message_Struct
char *rfc822_date;
char *content_type;
+ char *charset;
char *boundary;
mbox_cte_e cte;
Modified: httpd/mod_mbox/trunk/module-2.0/mbox_thread.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mbox_thread.c?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mbox_thread.c (original)
+++ httpd/mod_mbox/trunk/module-2.0/mbox_thread.c Sat Apr 14 09:30:48 2012
@@ -59,13 +59,13 @@ static int is_reply(Message *m)
*/
static char *strip_subject(apr_pool_t *p, Message *m)
{
- char *newVal = m->subject, *match = m->subject, *tmp = 0;
+ char *newVal, *match = m->subject, *tmp = NULL;
/* Match the following cases: Re:, RE:, RE[1]:, Re: Re[2]: Re: */
while (match && *match) {
/* When we don't have a match, tmp contains the "real" subject. */
tmp = newVal = match;
- match = 0;
+ match = NULL;
if (*newVal == 'R' && (*++newVal == 'e' || *newVal == 'E')) {
/* Note to self. In pure compressed syntax, the famous dangling
* else occurs. Oh, well. */
Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox.c?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mod_mbox.c (original)
+++ httpd/mod_mbox/trunk/module-2.0/mod_mbox.c Sat Apr 14 09:30:48 2012
@@ -177,28 +177,29 @@ char *mbox_wrap_text(char *str)
return str;
}
-char *mbox_ascii_escape(apr_pool_t *p, const char *s)
+/* Escape control chars */
+char *mbox_cntrl_escape(apr_pool_t *p, char *s)
{
int i, j;
char *x;
/* first, count the number of extra characters */
for (i = 0, j = 0; s[i] != '\0'; i++)
- if (!apr_isascii(s[i]))
+ if (apr_iscntrl(s[i]))
j += 5;
if (j == 0)
- return apr_pstrmemdup(p, s, i);
+ return s;
x = apr_palloc(p, i + j + 1);
- for (i = 0, j = 0; s[i] != '\0'; i++, j++)
- if (!apr_isascii(s[i])) {
- char *esc = apr_psprintf(p, "&#%3.3d;", (unsigned char)s[i]);
- memcpy(&x[j], esc, 6);
+ for (i = 0, j = 0; s[i] != '\0'; i++, j++) {
+ if (apr_iscntrl(s[i])) {
+ snprintf(&x[j], 7, "&#%3.3d;", (unsigned char)s[i]);
j += 5;
}
else
x[j] = s[i];
+ }
x[j] = '\0';
return x;
Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox.h
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox.h?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mod_mbox.h (original)
+++ httpd/mod_mbox/trunk/module-2.0/mod_mbox.h Sat Apr 14 09:30:48 2012
@@ -24,8 +24,10 @@
#include "http_protocol.h"
#include "http_request.h"
#include "util_script.h"
+#include "util_varbuf.h"
#include "apr_date.h"
+#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_dbm.h"
#include "apr_hash.h"
@@ -120,17 +122,20 @@ apr_size_t mbox_cte_decode_qp(char *p);
apr_size_t mbox_cte_decode_b64(char *src);
apr_size_t mbox_cte_escape_html(apr_pool_t *p, const char *s,
apr_size_t len, char **body);
-char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src);
char *mbox_cte_decode_header(apr_pool_t *p, char *src);
+apr_status_t mbox_cte_convert_to_utf8(apr_pool_t *p, const char *charset,
+ const char *src, apr_size_t len,
+ struct ap_varbuf *vb);
/* MIME decoding functions */
-mbox_mime_message_t *mbox_mime_decode_multipart(apr_pool_t *p,
+mbox_mime_message_t *mbox_mime_decode_multipart(request_rec *r, apr_pool_t *p,
char *body, char *ct,
+ char *charset,
mbox_cte_e cte,
char *boundary);
char *mbox_mime_decode_body(apr_pool_t *p, mbox_cte_e cte, char *body,
apr_size_t len, apr_size_t *ret_len);
-char *mbox_mime_get_body(apr_pool_t *p, mbox_mime_message_t *m);
+char *mbox_mime_get_body(request_rec *r, apr_pool_t *p, mbox_mime_message_t *m);
void mbox_mime_display_static_structure(request_rec *r,
mbox_mime_message_t *m,
char *link);
@@ -139,18 +144,18 @@ void mbox_mime_display_xml_structure(req
/* Utility functions */
char *mbox_wrap_text(char *str);
-char *mbox_ascii_escape(apr_pool_t *p, const char *s);
+char *mbox_cntrl_escape(apr_pool_t *p, char *s);
const char *get_base_path(request_rec *r);
const char *get_base_uri(request_rec *r);
const char *get_base_name(request_rec *r);
-#if AP_MODULE_MAGIC_AT_LEAST(20081231,0)
+/* XXX This should enforce that the result is valid UTF-8 */
#define ESCAPE_OR_BLANK(pool, s) \
-(s ? ap_escape_html2(pool, s, 1) : "")
-#else
-#define ESCAPE_OR_BLANK(pool, s) \
-(s ? mbox_ascii_escape(pool, ap_escape_html(pool, s)) : "")
-#endif
+(s ? mbox_cntrl_escape(pool, ap_escape_html(pool, s)) : "")
+
+/* XXX This should enforce that the result is valid UTF-8 */
+#define ESCAPE_AND_CONV_HDR(pool, s) \
+(s ? mbox_cntrl_escape(pool, ap_escape_html(pool, mbox_cte_decode_header(pool, s))) : "")
#define URI_ESCAPE_OR_BLANK(pool, s) \
(s ? ap_escape_uri(pool, s) : "")
Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c (original)
+++ httpd/mod_mbox/trunk/module-2.0/mod_mbox_cte.c Sat Apr 14 09:30:48 2012
@@ -80,6 +80,7 @@ const char *mbox_cte_to_char(mbox_cte_e
/* Unlike the original ap_escape_html, this one is also binary
* safe.
+ * The result is always NUL-terminated
*/
apr_size_t mbox_cte_escape_html(apr_pool_t *p, const char *s,
apr_size_t len, char **body)
@@ -107,7 +108,7 @@ apr_size_t mbox_cte_escape_html(apr_pool
/* Otherwise, we have some extra characters to insert : allocate
enough space for them, and process the data. */
else {
- x = apr_palloc(p, i + j);
+ x = apr_palloc(p, i + j + 1);
for (i = 0, j = 0; i < len; i++, j++) {
if (s[i] == '<') {
@@ -126,6 +127,7 @@ apr_size_t mbox_cte_escape_html(apr_pool
x[j] = s[i];
}
}
+ x[j] = '\0';
}
*body = x;
@@ -244,53 +246,94 @@ apr_size_t mbox_cte_decode_qp(char *p)
return len;
}
+apr_status_t mbox_cte_convert_to_utf8(apr_pool_t *p, const char *charset,
+ const char *src, apr_size_t len,
+ struct ap_varbuf *vb)
+{
+ apr_xlate_t *convset;
+ apr_status_t rv;
+ apr_size_t outbytes_left, inbytes_left = len;
+ char *dst;
+ if (len <= 0)
+ return APR_SUCCESS;
+ rv = apr_xlate_open(&convset, "UTF-8", charset, p);
+ if (rv != APR_SUCCESS) {
+ ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
+ "could not open convset '%s'", charset);
+ return rv;
+ }
+ ap_log_error(APLOG_MARK, APLOG_TRACE6, rv, ap_server_conf,
+ "using convset %s", charset);
+
+ while (inbytes_left > 0) {
+ ap_varbuf_grow(vb, vb->strlen + inbytes_left + 8);
+ dst = vb->buf + vb->strlen;
+ outbytes_left = vb->avail - vb->strlen;
+ rv = apr_xlate_conv_buffer(convset, src + len - inbytes_left, &inbytes_left,
+ dst, &outbytes_left);
+ if (rv != APR_SUCCESS) {
+ *dst = '\0';
+ goto out;
+ }
+ vb->strlen = vb->avail - outbytes_left;
+ }
+ ap_varbuf_grow(vb, vb->strlen + 8);
+ outbytes_left = vb->avail - vb->strlen;
+ dst = vb->buf + vb->strlen;
+ rv = apr_xlate_conv_buffer(convset, NULL, NULL, dst, &outbytes_left);
+ if (rv != APR_SUCCESS) {
+ *dst = '\0';
+ goto out;
+ }
+ vb->strlen = vb->avail - outbytes_left;
+ vb->buf[vb->strlen] = '\0';
+
+out:
+ apr_xlate_close(convset);
+ return rv;
+}
+
/* This function performs the decoding of strings like :
* =?UTF-8?B?QnJhbmtvIMSMaWJlag==?=
*
* These strings complies to the following syntax :
* =?charset?mode?data?= rest
+ *
+ * Appends decoded string to vb, resturns
+ * position where to continue parsing.
*/
-char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src)
+static char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src, struct ap_varbuf *vb)
{
- apr_xlate_t *xlate;
-
char *charset, *mode, *data, *rest;
int i;
+ apr_status_t rv;
+ apr_size_t data_len;
- /* Look for charset */
- charset = strstr(src, "=?");
- if (!charset) {
+ if (strncmp(src, "=?", 2) != 0)
return src;
- }
- *charset = 0;
- charset += strlen("=?");
+ charset = src + strlen("=?");
/* Encoding mode (first '?' after charset) */
mode = strstr(charset, "?");
if (!mode) {
return src;
}
- *mode = 0;
mode++;
/* Fetch data */
data = strstr(mode, "?");
- if (!data) {
+ if (!data || data != mode + 1)
return src;
- }
- *data = 0;
data++;
/* Look for the end bound */
rest = strstr(data, "?=");
- if (!rest) {
+ if (!rest)
return src;
- }
- *rest = 0;
+ data = apr_pstrmemdup(p, data, rest - data);
/* Quoted-Printable decoding : mode 'q' */
if ((*mode == 'q') || (*mode == 'Q')) {
- apr_size_t data_len;
int i;
/* In QP header encoding, spaces are encoded either in =20 (as
@@ -304,77 +347,78 @@ char *mbox_cte_decode_rfc2047(apr_pool_t
}
data_len = mbox_cte_decode_qp(data);
- data[data_len] = 0;
}
else if ((*mode == 'b') || (*mode == 'B')) {
- apr_size_t data_len;
-
data_len = mbox_cte_decode_b64(data);
- data[data_len] = 0;
+ }
+ else {
+ return src;
}
/* Convert charset to uppercase */
+ charset = apr_pstrmemdup(p, charset, mode - charset - 1);
for (i = 0; i < strlen(charset); i++) {
charset[i] = toupper(charset[i]);
}
/* Charset conversion */
- if (apr_xlate_open(&xlate, "UTF-8", charset, p) == APR_SUCCESS) {
- apr_size_t inbytes_left, outbytes_left;
- apr_size_t outbuf_len = strlen(data);
-
- char *new_data;
-
- /* Allocate some memory for our resulting data, and initialize
- counters. */
- new_data = apr_palloc(p, outbuf_len);
- inbytes_left = strlen(data);
- outbytes_left = strlen(data);
-
- /* Convert */
- // apr_xlate_conv_buffer(xlate, data, &inbytes_left,
- // new_data, &outbytes_left);
-
- // new_data[outbuf_len - outbytes_left] = 0;
- // data = new_data;
-
- apr_xlate_close(xlate);
+ rv = mbox_cte_convert_to_utf8(p, charset, data, data_len, vb);
+ if (rv != APR_SUCCESS) {
+ ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
+ "conversion from %s to utf-8 failed", charset);
+ *rest = '?';
+ return src;
}
-
- return data;
+ return rest + strlen("?=");;
}
/* MIME header decoding (see RFC 2047). */
char *mbox_cte_decode_header(apr_pool_t *p, char *src)
{
- char *start, *end, *part;
- char *result = "";
+ char *start, *cont;
+ struct ap_varbuf vb;
+ int seen_encoded_word = 0;
+ if (src == NULL || *src == '\0')
+ return "";
+ ap_varbuf_init(p, &vb, 0);
+ vb.strlen = 0;
do {
- char c;
-
start = strstr(src, "=?");
if (!start) {
- result = apr_psprintf(p, "%s%s", result, src);
- return result;
+ if (vb.strlen == 0)
+ return src;
+ return apr_pstrcat(p, vb.buf, src, NULL);
}
- end = strstr(start, "?=");
- if (!end) {
- result = apr_psprintf(p, "%s%s", result, src);
- return result;
+ if (start != src) {
+ if (seen_encoded_word) {
+ /* space between consecutive encoded words must be discarded */
+ char *p = src;
+ while (p < start && apr_isspace(*p))
+ p++;
+ if (p == start)
+ src = start;
+ /* XXX: this is wrong if the next encoded word fails to decode */
+ }
+ if (start != src) {
+ ap_varbuf_strmemcat(&vb, src, start - src);
+ seen_encoded_word = 0;
+ }
}
- c = *start;
- *start = 0;
- result = apr_psprintf(p, "%s%s", result, src);
- *start = c;
-
- part = mbox_cte_decode_rfc2047(p, start);
- result = apr_psprintf(p, "%s%s", result, part);
-
- src = end + 2;
+ cont = mbox_cte_decode_rfc2047(p, start, &vb);
+ if (cont == start) {
+ /* decoding failed, copy start delimiter and continue */
+ ap_varbuf_strmemcat(&vb, start, 2);
+ src = start + 2;
+ }
+ else {
+ src = cont;
+ seen_encoded_word = 1;
+ }
} while (src && *src);
- return result;
+ /* vb.buf is pool memory */
+ return vb.buf;
}
Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c (original)
+++ httpd/mod_mbox/trunk/module-2.0/mod_mbox_mime.c Sat Apr 14 09:30:48 2012
@@ -18,11 +18,99 @@
*/
#include "mod_mbox.h"
+#include <apr_lib.h>
#ifdef APLOG_USE_MODULE
APLOG_USE_MODULE(mbox);
#endif
+/**
+ * find certain header line, return copy of first part (up to first ";")
+ * @param p pool to allocate from
+ * @param name name of the header
+ * @param string input: pointer to pointer string where to find the header;
+ * output: pointer to the ";" or "\n" after the copied value
+ * @param end pointer where to stop searching
+ * @note string must be NUL-terminated (but the NUL may be after *end)
+ * @return copy of the header value or NULL if not found
+ */
+static char *mbox_mime_get_header(apr_pool_t *p, const char *name,
+ char **string, const char *end)
+{
+ char *ptr;
+ int namelen = strlen(name);
+ for (ptr = *string;
+ ptr && *ptr && ptr < end ;
+ ptr = ap_strchr(ptr + 1, '\n'))
+ {
+ int l;
+ if (strncasecmp(ptr, name, namelen) != 0)
+ continue;
+ ptr += namelen;
+ if (*ptr != ':')
+ continue;
+ ptr++;
+ while (*ptr == ' ')
+ ptr++;
+ if (ptr >= end)
+ break;
+ l = strcspn(ptr, ";\n");
+ *string = ptr + l;
+ while (apr_isspace(ptr[l]) && l > 0)
+ l--;
+ return apr_pstrndup(p, ptr, l);
+ }
+ return NULL;
+}
+
+/**
+ * find value for parameter with certain name
+ * @param p pool to allocate from
+ * @param name name of the attribute
+ * @param string string with name=value pairs separated by ";",
+ * value may be a quoted string delimited by double quotes
+ * @param end pointer where to stop searching
+ * @note string must be NUL-terminated (but the NUL may be after *end)
+ * @return copy of the value, NULL if not found
+ */
+static char *mbox_mime_get_parameter(apr_pool_t *p, const char *name,
+ const char *string, const char *end)
+{
+ const char *ptr = string;
+ int namelen = strlen(name);
+ while (ptr && *ptr && ptr < end) {
+ int have_match = 0;
+ const char *val_end;
+ while (*ptr && apr_isspace(*ptr))
+ ptr++;
+ if (strncasecmp(ptr, name, namelen) == 0) {
+ ptr += strlen(name);
+ while (*ptr && apr_isspace(*ptr) && ptr < end)
+ ptr++;
+ if (*ptr == '=') {
+ have_match = 1;
+ ptr++;
+ if (ptr >= end)
+ break;
+ while (*ptr && apr_isspace(*ptr) && ptr < end)
+ ptr++;
+ }
+ }
+ if (!have_match)
+ ptr += strcspn(ptr, "= \t");
+ if (*ptr == '"')
+ val_end = ap_strchr_c(++ptr, '"');
+ else
+ val_end = ptr + strcspn(ptr, ";\n ");
+ if (!val_end || val_end > end)
+ val_end = end;
+ if (have_match)
+ return apr_pstrmemdup(p, ptr, val_end - ptr);
+ ptr = val_end + 1;
+ }
+ return NULL;
+}
+
static apr_status_t cleanup_mime_msg(void *data)
{
mbox_mime_message_t *mail = data;
@@ -33,15 +121,17 @@ static apr_status_t cleanup_mime_msg(voi
/* Decode a multipart (or not) email. In order to support multiple
* levels of MIME parts, this function is recursive.
*/
-mbox_mime_message_t *mbox_mime_decode_multipart(apr_pool_t *p, char *body,
- char *ct, mbox_cte_e cte,
- char *boundary)
+mbox_mime_message_t *mbox_mime_decode_multipart(request_rec *r, apr_pool_t *p, char *body,
+ char *ct, char *charset,
+ mbox_cte_e cte, char *boundary)
{
mbox_mime_message_t *mail;
- char *tmp = NULL, *k = NULL, *end_bound = NULL;
+ char *tmp = NULL, *end_bound = NULL;
char *headers_bound = NULL;
if (!body) {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+ "mbox_mime_decode_multipart: no body");
return NULL;
}
@@ -49,6 +139,8 @@ mbox_mime_message_t *mbox_mime_decode_mu
if (!ct) {
headers_bound = ap_strstr(body, "\n\n");
if (!headers_bound) {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+ "no '\\n\\n' header separator found");
return NULL;
}
}
@@ -56,142 +148,59 @@ mbox_mime_message_t *mbox_mime_decode_mu
headers_bound = body;
}
- /* If no Content-Type is provided, it means that we are parsing a
- sub-part of the multipart message. The Content-Type header
- should then be the first line of the part. If not, use
- text/plain as default for the sub-part. */
- tmp = ap_strstr(body, "Content-Type: ");
- if (!ct && (!tmp || tmp > headers_bound)) {
- ct = "text/plain";
- }
-
mail = apr_pcalloc(p, sizeof(mbox_mime_message_t));
/* make sure the memory allocated by realloc() below is cleaned up */
apr_pool_cleanup_register(p, mail, cleanup_mime_msg, apr_pool_cleanup_null);
- /* If no Content-Type is given, we have to look for it. */
if (!ct) {
- tmp += sizeof("Content-Type: ") - 1;
- k = strchr(tmp, ';');
-
- /* Isolate the Content-Type string (between 'Content-Type: '
- and ';' or end of line */
- if (k && k < headers_bound) {
- *k = 0;
+ /* If no Content-Type is provided, it means that we are parsing a
+ * sub-part of the multipart message. The Content-Type header
+ * should then be the first line of the part. If not, use
+ * text/plain as default for the sub-part.
+ */
+ tmp = body;
+ ct = mbox_mime_get_header(p, "Content-Type", &tmp, headers_bound);
+ if (!ct) {
+ ct = "text/plain";
}
else {
- k = tmp;
- while (*k) {
- if (isspace(*k)) {
- *k = 0;
- break;
- }
- k++;
- }
- }
-
- /* Copy the Content-Type and reset *k */
- mail->content_type = apr_pstrdup(p, tmp);
- *k = ';';
-
- /* If available, get MIME part name */
- tmp = ap_strstr(body, "name=");
- if (tmp && tmp < headers_bound) {
- char c = '\0';
- tmp += sizeof("name=") - 1;
- k = tmp;
-
- while (*k) {
- if (isspace(*k) || *k == ';') {
- c = *k;
- *k = 0;
- break;
- }
- k++;
- }
-
- /* Check for double quotes */
- if ((*tmp == '"') && (tmp[strlen(tmp) - 1] == '"')) {
- mail->content_name =
- apr_pstrndup(p, tmp + 1, strlen(tmp) - 2);
- }
- else {
- mail->content_name = apr_pstrdup(p, tmp);
- }
-
- *k = c;
+ if (!charset)
+ charset = mbox_mime_get_parameter(p, "charset", tmp, headers_bound);
+ mail->content_name = mbox_mime_get_parameter(p, "name", tmp, headers_bound);
}
+ mail->content_type = ct;
}
else {
mail->content_type = ct;
+ if (!charset)
+ charset = mbox_mime_get_parameter(p, "charset", ct, ct + strlen(ct));
}
+ mail->charset = charset;
/* Now we have a Content-Type. Look for other useful header information */
/* Check Content-Disposition if the match is within the headers */
- tmp = ap_strstr(body, "Content-Disposition: ");
- if (tmp && tmp < headers_bound) {
- char c = '\0';
- tmp += sizeof("Content-Disposition: ") - 1;
- k = tmp;
-
- while (*k) {
- if (isspace(*k) || *k == ';') {
- c = *k;
- *k = 0;
- break;
- }
- k++;
- }
-
- /* Copy the Content-Disposition and reset *k */
- mail->content_disposition = apr_pstrdup(p, tmp);
- *k = c;
- }
- else {
- mail->content_disposition = apr_pstrdup(p, "inline");
- }
+ tmp = body;
+ mail->content_disposition = mbox_mime_get_header(p, "Content-Disposition", &tmp, headers_bound);
+ if (!mail->content_disposition)
+ mail->content_disposition = "inline";
/* Check Content-Transfer-Encoding, if needed */
if (cte == CTE_NONE) {
- tmp = ap_strstr(body, "Content-Transfer-Encoding: ");
- if (tmp && tmp < headers_bound) {
- char c = '\0';
- tmp += sizeof("Content-Transfer-Encoding: ") - 1;
- k = tmp;
-
- while (*k) {
- if (isspace(*k) || *k == ';') {
- c = *k;
- *k = 0;
- break;
- }
- k++;
- }
-
- /* Copy the Content-Disposition and reset *k */
+ tmp = body;
+ tmp = mbox_mime_get_header(p, "Content-Transfer-Encoding", &tmp, headers_bound);
+ if (tmp)
mail->cte = mbox_parse_cte_header(tmp);
- *k = c;
- }
}
else {
mail->cte = cte;
}
- /* Now we have all the headers we need. Start processing the
- body. If the Content-Type was given at call time, the body
- starts where it's given. Otherwise it's after the headers
- (first new empty line) */
-
- if (ct) {
+ /* Now we have all the headers we need. Start processing the body */
+ if (headers_bound == body)
mail->body = body;
- }
- else {
- mail->body = ap_strstr(body, "\n\n");
- if (mail->body != NULL) {
- mail->body += 2;
- }
- }
+ else
+ mail->body = headers_bound + 2; /* skip double new line */
/* If the mail is a multipart message, search for the boundary,
and process its sub parts by recursive calls. */
@@ -201,30 +210,16 @@ mbox_mime_message_t *mbox_mime_decode_mu
/* If the boundary was not given, we must look for it in the headers */
if (!boundary) {
- tmp = ap_strstr(body, "boundary=\"");
- if (!tmp) {
+ boundary = mbox_mime_get_parameter(p, "boundary", body, headers_bound);
+ if (!boundary) {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+ "invalid multipart message: no boundary defined");
return NULL;
}
-
- tmp += sizeof("boundary=\"") - 1;
- k = tmp;
-
- while (*k) {
- if (*k == '"') {
- *k = 0;
- break;
- }
- k++;
- }
-
- mail->boundary = apr_pstrdup(p, tmp);
- *k = '"';
- }
-
- /* Otherwise, the boundary is as given to us */
- else {
- mail->boundary = boundary;
}
+ mail->boundary = boundary;
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
+ "decoding multipart message: boundary %s", boundary);
/* Now we have our boundary string. We must : look for it once
(begining of MIME part) and then look for the end boundary :
@@ -264,8 +259,10 @@ mbox_mime_message_t *mbox_mime_decode_mu
/* Allocate a new pointer for the sub part, and parse it. */
mail->sub =
realloc(mail->sub, ++count * sizeof(struct mimemsg *));
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r,
+ "decoding part %d", count);
mail->sub[count - 1] =
- mbox_mime_decode_multipart(p, search, NULL, CTE_NONE, NULL);
+ mbox_mime_decode_multipart(r, p, search, NULL, NULL, CTE_NONE, NULL);
/* If the boudary is found again, it means we have another
MIME part in the same multipart message. Set the new
@@ -286,6 +283,9 @@ mbox_mime_message_t *mbox_mime_decode_mu
/* Finally reset the end-body pointer. */
// *tmp = '-';
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+ "done decoding multipart message (boundary %s)",
+ boundary);
}
/* If the parsed body is not multipart or is a MIME part, the body
@@ -330,43 +330,74 @@ char *mbox_mime_decode_body(apr_pool_t *
return new_body;
}
-
/* This function returns the relevant MIME part from a message. For
* the moment, it just returns the first text/ MIME part available.
*/
-char *mbox_mime_get_body(apr_pool_t *p, mbox_mime_message_t *m)
+char *mbox_mime_get_body(request_rec *r, apr_pool_t *p, mbox_mime_message_t *m)
{
int i;
/* If the message structure or the message body is empty, just
return NULL */
if (!m || !m->body) {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+ "mbox_mime_get_body: %s",
+ m == NULL ? "no message???" : "no body");
return MBOX_FETCH_ERROR_STR;
}
if (strncasecmp(m->content_type, "text/", strlen("text/")) == 0) {
char *new_body;
-
- new_body =
- mbox_mime_decode_body(p, m->cte, m->body, m->body_len, NULL);
+ apr_size_t new_len;
+ new_body = mbox_mime_decode_body(p, m->cte, m->body, m->body_len,
+ &new_len);
if (!new_body) {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+ "mbox_mime_get_body: could not decode body");
return MBOX_FETCH_ERROR_STR;
}
- m->body_len =
- mbox_cte_escape_html(p, new_body, m->body_len, &(m->body));
+ if (m->charset) {
+ struct ap_varbuf vb;
+ apr_status_t rv;
+ ap_varbuf_init(p, &vb, 0);
+ vb.strlen = 0;
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
+ "mbox_mime_get_body: converting %d bytes from %s",
+ new_len, m->charset);
+ if ((rv = mbox_cte_convert_to_utf8(p, m->charset, new_body, new_len, &vb))
+ == APR_SUCCESS) {
+ new_body = vb.buf;
+ new_len = vb.strlen + 1;
+ }
+ else {
+ ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
+ "conversion from '%s' to utf-8 failed", m->charset);
+ }
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE4, 0, r,
+ "mbox_mime_get_body: conversion done");
+ }
- return apr_pstrndup(p, m->body, m->body_len);
+ mbox_cte_escape_html(p, new_body, new_len, &new_body);
+ return new_body;
}
if (!m->sub) {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+ "mbox_mime_get_body: message not text/* and no sub parts");
return MBOX_FETCH_ERROR_STR;
}
for (i = 0; i < m->sub_count; i++) {
- return mbox_mime_get_body(p, m->sub[i]);
+ /* XXX this loop is bullshit, should check result of mbox_mime_get_body() */
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+ "mbox_mime_get_body: choosing m->sub[%d]", i);
+ return mbox_mime_get_body(r, p, m->sub[i]);
}
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+ "m->sub != NULL but m->subcount == 0 ???");
+
return MBOX_FETCH_ERROR_STR;
}
@@ -390,10 +421,12 @@ void mbox_mime_display_static_structure(
}
if (m->content_name) {
- ap_rprintf(r, "%s", m->content_name);
+ ap_rprintf(r, "%s (%s)",
+ ESCAPE_OR_BLANK(r->pool, m->content_name),
+ ESCAPE_OR_BLANK(r->pool, m->content_type));
}
else {
- ap_rprintf(r, "Unnamed %s", m->content_type);
+ ap_rprintf(r, "Unnamed %s", ESCAPE_OR_BLANK(r->pool, m->content_type));
}
if (m->body_len) {
Modified: httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c?rev=1326076&r1=1326075&r2=1326076&view=diff
==============================================================================
--- httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c (original)
+++ httpd/mod_mbox/trunk/module-2.0/mod_mbox_out.c Sat Apr 14 09:30:48 2012
@@ -49,9 +49,9 @@ static void display_atom_entry(request_r
char *c;
ap_rputs("<entry>\n", r);
- ap_rprintf(r, "<title>%s</title>\n", ESCAPE_OR_BLANK(pool, m->subject));
+ ap_rprintf(r, "<title>%s</title>\n", ESCAPE_AND_CONV_HDR(pool, m->subject));
ap_rprintf(r, "<author><name>%s</name></author>\n",
- ESCAPE_OR_BLANK(pool, m->from));
+ ESCAPE_AND_CONV_HDR(pool, m->from));
ap_rprintf(r, "<link rel=\"alternate\" href=\"%s%s/%s\"/>\n",
ap_construct_url(r->pool, r->uri, r),
@@ -79,12 +79,13 @@ static void display_atom_entry(request_r
load_message(pool, f, m);
/* Parse multipart information */
- m->mime_msg = mbox_mime_decode_multipart(pool, m->raw_body,
+ m->mime_msg = mbox_mime_decode_multipart(r, pool, m->raw_body,
m->content_type,
+ m->charset,
m->cte, m->boundary);
ap_rprintf(r, "%s",
- mbox_ascii_escape(pool, mbox_wrap_text(mbox_mime_get_body(pool, m->mime_msg))));
+ mbox_cntrl_escape(pool, mbox_wrap_text(mbox_mime_get_body(r, pool, m->mime_msg))));
ap_rputs("\n</pre>\n</div>\n</content>\n", r);
ap_rputs("</entry>\n", r);
@@ -381,7 +382,11 @@ apr_status_t mbox_static_index_boxlist(r
return APR_SUCCESS;
}
-/* Antispam protection */
+/* Antispam protection,
+ * proper order is:
+ * apply mbox_cte_decode_header(), then email_antispam(), then
+ * ESCAPE_OR_BLANK()
+ */
static char *email_antispam(char *email)
{
char *tmp;
@@ -418,22 +423,22 @@ static void display_static_msglist_entry
{
mbox_dir_cfg_t *conf;
- char *from;
+ char *tmp;
int i;
conf = ap_get_module_config(r->per_dir_config, &mbox_module);
- from = ESCAPE_OR_BLANK(r->pool, m->str_from);
- from = mbox_cte_decode_header(r->pool, from);
- if (conf->antispam) {
- from = email_antispam(from);
- }
-
/* Message author */
ap_rputs(" <tr>\n", r);
+ tmp = mbox_cte_decode_header(r->pool, m->str_from);
+ if (conf->antispam) {
+ tmp = email_antispam(tmp);
+ }
+ tmp = ESCAPE_OR_BLANK(r->pool, tmp);
+
if (linked) {
- ap_rprintf(r, " <td class=\"author\">%s</td>\n", from);
+ ap_rprintf(r, " <td class=\"author\">%s</td>\n", tmp);
}
else {
ap_rputs(" <td class=\"author\"></td>\n", r);
@@ -450,7 +455,7 @@ static void display_static_msglist_entry
URI_ESCAPE_OR_BLANK(r->pool, m->msgID));
}
- ap_rprintf(r, "%s", ESCAPE_OR_BLANK(r->pool, m->subject));
+ ap_rprintf(r, "%s", ESCAPE_AND_CONV_HDR(r->pool, m->subject));
if (linked) {
ap_rputs("</a>", r);
}
@@ -482,17 +487,17 @@ static void display_xml_msglist_entry(re
if (conf->antispam) {
from = email_antispam(from);
}
+ from = ESCAPE_OR_BLANK(r->pool, from);
ap_rprintf(r, " <message linked=\"%d\" depth=\"%d\" id=\"%s\">\n",
linked, depth, ESCAPE_OR_BLANK(r->pool, m->msgID));
- ap_rprintf(r, " <from><![CDATA[%s]]></from>\n",
- ESCAPE_OR_BLANK(r->pool, from));
+ ap_rprintf(r, " <from><![CDATA[%s]]></from>\n", from);
ap_rprintf(r, " <date><![CDATA[%s]]></date>\n",
ESCAPE_OR_BLANK(r->pool, m->str_date));
ap_rprintf(r, " <subject><![CDATA[%s]]></subject>\n",
- ESCAPE_OR_BLANK(r->pool, m->subject));
+ ESCAPE_AND_CONV_HDR(r->pool, m->subject));
ap_rprintf(r, " </message>\n");
}
@@ -991,8 +996,9 @@ int mbox_raw_message(request_rec *r, apr
/* First, parse the MIME structure, and look for the correct
subpart */
- m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body,
+ m->mime_msg = mbox_mime_decode_multipart(r, r->pool, m->raw_body,
m->content_type,
+ m->charset,
m->cte, m->boundary);
mime_part = m->mime_msg;
@@ -1111,7 +1117,7 @@ int mbox_static_message(request_rec *r,
Message *m;
const char *baseURI;
- char *from, **context, *msgID, *escaped_msgID;
+ char *from, **context, *msgID, *escaped_msgID, *subject;
conf = ap_get_module_config(r->per_dir_config, &mbox_module);
baseURI = get_base_uri(r);
@@ -1132,12 +1138,13 @@ int mbox_static_message(request_rec *r,
}
/* Parse multipart information */
- m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body,
+ m->mime_msg = mbox_mime_decode_multipart(r, r->pool, m->raw_body,
m->content_type,
+ m->charset,
m->cte, m->boundary);
- send_page_header(r,
- ESCAPE_OR_BLANK(r->pool, m->subject),
+ subject = ESCAPE_AND_CONV_HDR(r->pool, m->subject);
+ send_page_header(r, subject,
apr_psprintf(r->pool, "%s mailing list archives",
get_base_name(r)),
0);
@@ -1153,11 +1160,11 @@ int mbox_static_message(request_rec *r,
"List index</a></h5>", get_base_path(r));
/* Display context message list */
- from = ESCAPE_OR_BLANK(r->pool, m->from);
- from = mbox_cte_decode_header(r->pool, from);
+ from = mbox_cte_decode_header(r->pool, m->from);
if (conf->antispam) {
from = email_antispam(from);
}
+ from = ESCAPE_OR_BLANK(r->pool, from);
ap_rputs(" <table class=\"static\" id=\"msgview\">\n", r);
@@ -1186,7 +1193,7 @@ int mbox_static_message(request_rec *r,
ap_rprintf(r, " <tr class=\"subject\">\n"
" <td class=\"left\">Subject</td>\n"
" <td class=\"right\">%s</td>\n"
- " </tr>\n", ESCAPE_OR_BLANK(r->pool, m->subject));
+ " </tr>\n", subject);
ap_rprintf(r, " <tr class=\"date\">\n"
" <td class=\"left\">Date</td>\n"
@@ -1196,7 +1203,7 @@ int mbox_static_message(request_rec *r,
/* Message body */
ap_rputs(" <tr class=\"contents\"><td colspan=\"2\"><pre>\n", r);
ap_rprintf(r, "%s",
- mbox_wrap_text(mbox_mime_get_body(r->pool, m->mime_msg)));
+ mbox_wrap_text(mbox_mime_get_body(r, r->pool, m->mime_msg)));
ap_rputs("</pre></td></tr>\n", r);
/* MIME structure */
@@ -1228,8 +1235,7 @@ apr_status_t mbox_xml_message(request_re
{
mbox_dir_cfg_t *conf;
Message *m;
-
- char *from, *msgID;
+ char *from, *subj, *msgID;
conf = ap_get_module_config(r->per_dir_config, &mbox_module);
@@ -1243,8 +1249,9 @@ apr_status_t mbox_xml_message(request_re
}
/* Parse multipart information */
- m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body,
+ m->mime_msg = mbox_mime_decode_multipart(r, r->pool, m->raw_body,
m->content_type,
+ m->charset,
m->cte, m->boundary);
ap_rputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", r);
@@ -1254,6 +1261,7 @@ apr_status_t mbox_xml_message(request_re
from = email_antispam(from);
}
from = ESCAPE_OR_BLANK(r->pool, from);
+ subj = ESCAPE_AND_CONV_HDR(r->pool, m->subject);
ap_rprintf(r, "<mail id=\"%s\">\n"
" <from><![CDATA[%s]]></from>\n"
@@ -1261,12 +1269,11 @@ apr_status_t mbox_xml_message(request_re
" <date><![CDATA[%s]]></date>\n"
" <contents><![CDATA[",
URI_ESCAPE_OR_BLANK(r->pool, m->msgID),
- from,
- ESCAPE_OR_BLANK(r->pool, m->subject),
+ from, subj,
ESCAPE_OR_BLANK(r->pool, m->rfc822_date));
ap_rprintf(r, "%s",
- mbox_ascii_escape(r->pool, mbox_wrap_text(mbox_mime_get_body(r->pool, m->mime_msg))));
+ mbox_cntrl_escape(r->pool, mbox_wrap_text(mbox_mime_get_body(r, r->pool, m->mime_msg))));
ap_rputs("]]></contents>\n", r);
ap_rputs(" <mime>\n", r);
mbox_mime_display_xml_structure(r, m->mime_msg,