You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by sf...@apache.org on 2012/04/13 10:09:47 UTC
svn commit: r1325647 - in
/httpd/mod_mbox/branches/convert-charsets/module-2.0: mod_mbox.h
mod_mbox_cte.c
Author: sf
Date: Fri Apr 13 08:09:47 2012
New Revision: 1325647
URL: http://svn.apache.org/viewvc?rev=1325647&view=rev
Log:
discard spaces between adjacent encoded words, as required by RFC 2047
- avoid a lot of re-allocating and copying in mbox_cte_decode_header() and
mbox_cte_decode_rfc2047()
- make mbox_cte_decode_rfc2047() static
Modified:
httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h
httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_cte.c
Modified: httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h?rev=1325647&r1=1325646&r2=1325647&view=diff
==============================================================================
--- httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h (original)
+++ httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h Fri Apr 13 08:09:47 2012
@@ -27,6 +27,7 @@
#include "util_varbuf.h"
#include "apr_date.h"
+#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_dbm.h"
#include "apr_hash.h"
@@ -121,7 +122,6 @@ apr_size_t mbox_cte_decode_qp(char *p);
apr_size_t mbox_cte_decode_b64(char *src);
apr_size_t mbox_cte_escape_html(apr_pool_t *p, const char *s,
apr_size_t len, char **body);
-char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src, struct ap_varbuf *vb);
char *mbox_cte_decode_header(apr_pool_t *p, char *src);
apr_status_t mbox_cte_convert_to_utf8(apr_pool_t *p, const char *charset,
const char *src, apr_size_t len,
Modified: httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_cte.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_cte.c?rev=1325647&r1=1325646&r2=1325647&view=diff
==============================================================================
--- httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_cte.c (original)
+++ httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_cte.c Fri Apr 13 08:09:47 2012
@@ -294,34 +294,28 @@ out:
* These strings complies to the following syntax :
* =?charset?mode?data?= rest
*/
-char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src, struct ap_varbuf *vb)
+static char *mbox_cte_decode_rfc2047(apr_pool_t *p, char *src, struct ap_varbuf *vb)
{
char *charset, *mode, *data, *rest;
int i;
apr_status_t rv;
+ apr_size_t data_len;
- /* Look for charset */
- charset = strstr(src, "=?");
- if (!charset) {
+ if (strncmp(src, "=?", 2) != 0)
return src;
- }
- *charset = 0;
- charset += strlen("=?");
/* Encoding mode (first '?' after charset) */
- mode = strstr(charset, "?");
- if (!mode) {
+ mode = strstr(src + 2, "?");
+ if (!mode || mode == src) {
return src;
}
- *mode = 0;
mode++;
/* Fetch data */
data = strstr(mode, "?");
- if (!data) {
+ if (!data || data != mode + 1) {
return src;
}
- *data = 0;
data++;
/* Look for the end bound */
@@ -329,11 +323,10 @@ char *mbox_cte_decode_rfc2047(apr_pool_t
if (!rest) {
return src;
}
- *rest = 0;
+ *rest = '\0';
/* Quoted-Printable decoding : mode 'q' */
if ((*mode == 'q') || (*mode == 'Q')) {
- apr_size_t data_len;
int i;
/* In QP header encoding, spaces are encoded either in =20 (as
@@ -347,65 +340,80 @@ char *mbox_cte_decode_rfc2047(apr_pool_t
}
data_len = mbox_cte_decode_qp(data);
- data[data_len] = 0;
}
else if ((*mode == 'b') || (*mode == 'B')) {
- apr_size_t data_len;
-
data_len = mbox_cte_decode_b64(data);
- data[data_len] = 0;
+ }
+ else {
+ /* XXX we may have modified data above */
+ *rest = '?';
+ return src;
}
/* Convert charset to uppercase */
+ charset = apr_pstrmemdup(p, src, mode - src - 1);
for (i = 0; i < strlen(charset); i++) {
charset[i] = toupper(charset[i]);
}
/* Charset conversion */
- vb->strlen = 0;
- rv = mbox_cte_convert_to_utf8(p, charset, data, strlen(data), vb);
+ rv = mbox_cte_convert_to_utf8(p, charset, data, data_len, vb);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
"conversion from %s to utf-8 failed", charset);
- return data;
+ *rest = '?';
+ return src;
}
- return vb->buf;
+ return rest + strlen("?=");;
}
/* MIME header decoding (see RFC 2047). */
char *mbox_cte_decode_header(apr_pool_t *p, char *src)
{
- char *start, *end, *part;
- char *result = "";
+ char *start, *end, *cont;
struct ap_varbuf vb;
+ int seen_encoded_word = 0;
ap_varbuf_init(p, &vb, 100);
vb.strlen = 0;
do {
- char c;
-
start = strstr(src, "=?");
- if (!start) {
- result = apr_psprintf(p, "%s%s", result, src);
- return result;
- }
+ if (!start)
+ return apr_pstrcat(p, vb.buf, src, NULL);
end = strstr(start, "?=");
- if (!end) {
- result = apr_psprintf(p, "%s%s", result, src);
- return result;
- }
-
- c = *start;
- *start = 0;
- result = apr_psprintf(p, "%s%s", result, src);
- *start = c;
+ if (!end)
+ return apr_pstrcat(p, vb.buf, src, NULL);
- part = mbox_cte_decode_rfc2047(p, start, &vb);
- result = apr_psprintf(p, "%s%s", result, part);
+ if (start != src) {
+ if (seen_encoded_word) {
+ /* space between consecutive encoded words must be discarded */
+ char *p = src;
+ while (p < start && apr_isspace(*p))
+ p++;
+ if (p == start)
+ src = start;
+ /* XXX: this is wrong if the next encoded word fails to decode */
+ }
+ if (start != src) {
+ ap_varbuf_strmemcat(&vb, src, start - src);
+ seen_encoded_word = 0;
+ }
+ }
- src = end + 2;
+ cont = mbox_cte_decode_rfc2047(p, start, &vb);
+ if (cont == start) {
+ /* decoding failed, copy original string */
+ end += 2;
+ ap_varbuf_strmemcat(&vb, start, end - start);
+ src = end;
+ }
+ else {
+ src = cont;
+ seen_encoded_word = 1;
+ }
} while (src && *src);
- return result;
+ /* vb.buf is pool memory */
+ return vb.buf;
}