You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by wr...@apache.org on 2002/04/05 06:22:46 UTC
cvs commit: httpd-2.0/server util.c
wrowe 02/04/04 20:22:46
Modified: server util.c
Log:
One for 2.0.34 - to fix the problem of utf-8 configuration files saved
from MeSsed up editors that believe zero byte joiner lead bytes signify
a utf-8 file. Someone point me at an RFC ... gheesh.
FWIW; fixes bugs report 10125
Revision Changes Path
1.124 +23 -0 httpd-2.0/server/util.c
Index: util.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/util.c,v
retrieving revision 1.123
retrieving revision 1.124
diff -u -r1.123 -r1.124
--- util.c 21 Mar 2002 10:10:40 -0000 1.123
+++ util.c 5 Apr 2002 04:22:45 -0000 1.124
@@ -916,6 +916,29 @@
return APR_EBADF;
}
+#ifdef WIN32
+ /* Some twisted character [no pun intended] at MS decided that a
+ * zero width joiner as the lead wide character would be ideal for
+ * describing Unicode text files. This was further convoluted to
+ * another MSism that the same character mapped into utf-8, EF BB BF
+ * would signify utf-8 text files.
+ *
+ * Since MS configuration files are all protecting utf-8 encoded
+ * Unicode path, file and resource names, we already have the correct
+ * WinNT encoding. But at least eat the stupid three bytes up front.
+ */
+ {
+ unsigned char buf[4];
+ apr_size_t len = 3;
+ status = apr_file_read(file, buf, &len);
+ if ((status != APR_SUCCESS) || (len < 3)
+ || memcmp(buf, "\xEF\xBB\xBF", 3) != 0) {
+ apr_off_t zero = 0;
+ apr_file_seek(file, APR_SET, &zero);
+ }
+ }
+#endif
+
new_cfg = apr_palloc(p, sizeof(*new_cfg));
new_cfg->param = file;
new_cfg->name = apr_pstrdup(p, name);