You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by co...@hyperreal.org on 1999/12/09 19:10:43 UTC
cvs commit: apache-1.3/htdocs/manual/mod mod_mime.html
coar 99/12/09 10:10:42
Modified: src CHANGES
. STATUS
src/modules/standard mod_mime.c
htdocs/manual/mod mod_mime.html
Log:
Add the AddCharset functionality to mod_mime.
Submitted by: Youichirou Koga <y-...@isoternet.org>
Reviewed by: Ken Coar, Martin Kraemer
Revision Changes Path
1.1478 +3 -0 apache-1.3/src/CHANGES
Index: CHANGES
===================================================================
RCS file: /home/cvs/apache-1.3/src/CHANGES,v
retrieving revision 1.1477
retrieving revision 1.1478
diff -u -r1.1477 -r1.1478
--- CHANGES 1999/12/09 17:19:28 1.1477
+++ CHANGES 1999/12/09 18:10:24 1.1478
@@ -1,4 +1,7 @@
Changes with Apache 1.3.10
+ *) Enhance mod_mime with an AddCharset directive to properly handle
+ that negotiation dimension.
+ [Youichirou Koga <y-...@isoternet.org>]
*) OS: Added first cut at support for IBM's OS/390.
[Ovies Brabson <ov...@us.ibm.com>]
1.764 +1 -4 apache-1.3/STATUS
Index: STATUS
===================================================================
RCS file: /home/cvs/apache-1.3/STATUS,v
retrieving revision 1.763
retrieving revision 1.764
diff -u -r1.763 -r1.764
--- STATUS 1999/12/09 17:21:35 1.763
+++ STATUS 1999/12/09 18:10:30 1.764
@@ -1,5 +1,5 @@
1.3 STATUS:
- Last modified at [$Date: 1999/12/09 17:21:35 $]
+ Last modified at [$Date: 1999/12/09 18:10:30 $]
Release:
@@ -42,9 +42,6 @@
Available Patches:
- * Youichirou Koga's patch to add AddCharset to mod_mime.
- Message-ID: <384E91DF.D92FABC2.Golux.Com>
- Status: Ken +1, Martin +1(after minor fix)
* Andrew Ford's patch (1999/12/05) to add absolute times to mod_expires
Message-ID: <m3...@icarus.demon.co.uk>
1.50 +336 -11 apache-1.3/src/modules/standard/mod_mime.c
Index: mod_mime.c
===================================================================
RCS file: /home/cvs/apache-1.3/src/modules/standard/mod_mime.c,v
retrieving revision 1.49
retrieving revision 1.50
diff -u -r1.49 -r1.50
--- mod_mime.c 1999/04/20 17:27:51 1.49
+++ mod_mime.c 1999/12/09 18:10:34 1.50
@@ -75,6 +75,7 @@
typedef struct {
table *forced_types; /* Additional AddTyped stuff */
table *encoding_types; /* Added with AddEncoding... */
+ table *charset_types; /* Added with AddCharset... */
table *language_types; /* Added with AddLanguage... */
table *handlers; /* Added with AddHandler... */
array_header *handlers_remove; /* List of handlers to remove */
@@ -84,6 +85,24 @@
char *default_language; /* Language if no AddLanguage ext found */
} mime_dir_config;
+typedef struct param_s {
+ char *attr;
+ char *val;
+ struct param_s *next;
+} param;
+
+typedef struct {
+ char *type;
+ char *subtype;
+ param *param;
+} content_type;
+
+static char tspecial[] = {
+ '(', ')', '<', '>', '@', ',', ';', ':',
+ '\\', '"', '/', '[', ']', '?', '=',
+ '\0'
+};
+
module MODULE_VAR_EXPORT mime_module;
static void *create_mime_dir_config(pool *p, char *dummy)
@@ -93,6 +112,7 @@
new->forced_types = ap_make_table(p, 4);
new->encoding_types = ap_make_table(p, 4);
+ new->charset_types = ap_make_table(p, 4);
new->language_types = ap_make_table(p, 4);
new->handlers = ap_make_table(p, 4);
new->handlers_remove = ap_make_array(p, 4, sizeof(handlers_info));
@@ -119,9 +139,11 @@
}
new->forced_types = ap_overlay_tables(p, add->forced_types,
- base->forced_types);
+ base->forced_types);
new->encoding_types = ap_overlay_tables(p, add->encoding_types,
base->encoding_types);
+ new->charset_types = ap_overlay_tables(p, add->charset_types,
+ base->charset_types);
new->language_types = ap_overlay_tables(p, add->language_types,
base->language_types);
new->handlers = ap_overlay_tables(p, add->handlers,
@@ -135,17 +157,18 @@
return new;
}
-static const char *add_type(cmd_parms *cmd, mime_dir_config * m, char *ct,
+static const char *add_type(cmd_parms *cmd, mime_dir_config *m, char *ct,
char *ext)
{
if (*ext == '.')
- ++ext;
+ ++ext;
+
ap_str_tolower(ct);
ap_table_setn(m->forced_types, ext, ct);
return NULL;
}
-static const char *add_encoding(cmd_parms *cmd, mime_dir_config * m, char *enc,
+static const char *add_encoding(cmd_parms *cmd, mime_dir_config *m, char *enc,
char *ext)
{
if (*ext == '.')
@@ -155,17 +178,29 @@
return NULL;
}
-static const char *add_language(cmd_parms *cmd, mime_dir_config * m, char *lang,
+static const char *add_charset(cmd_parms *cmd, mime_dir_config *m,
+ char *charset, char *ext)
+{
+ if (*ext == '.') {
+ ++ext;
+ }
+ ap_str_tolower(charset);
+ ap_table_setn(m->charset_types, ext, charset);
+ return NULL;
+}
+
+static const char *add_language(cmd_parms *cmd, mime_dir_config *m, char *lang,
char *ext)
{
- if (*ext == '.')
- ++ext;
+ if (*ext == '.') {
+ ++ext;
+ }
ap_str_tolower(lang);
ap_table_setn(m->language_types, ext, lang);
return NULL;
}
-static const char *add_handler(cmd_parms *cmd, mime_dir_config * m, char *hdlr,
+static const char *add_handler(cmd_parms *cmd, mime_dir_config *m, char *hdlr,
char *ext)
{
if (*ext == '.')
@@ -209,6 +244,8 @@
"a mime type followed by one or more file extensions"},
{"AddEncoding", add_encoding, NULL, OR_FILEINFO, ITERATE2,
"an encoding (e.g., gzip), followed by one or more file extensions"},
+ {"AddCharset", add_charset, NULL, OR_FILEINFO, ITERATE2,
+ "a charset (e.g., iso-2022-jp), followed by one or more file extensions"},
{"AddLanguage", add_language, NULL, OR_FILEINFO, ITERATE2,
"a language (e.g., fr), followed by one or more file extensions"},
{"AddHandler", add_handler, NULL, OR_FILEINFO, ITERATE2,
@@ -275,6 +312,248 @@
ap_cfg_closefile(f);
}
+static char *zap_sp(char *s)
+{
+ char *tp;
+
+ if (s == NULL) {
+ return (NULL);
+ }
+ if (*s == '\0') {
+ return (s);
+ }
+
+ /* delete prefixed white space */
+ for (; *s == ' ' || *s == '\t' || *s == '\n'; s++);
+
+ /* delete postfixed white space */
+ for (tp = s; *tp != '\0'; tp++);
+ for (tp--; tp != s && (*tp == ' ' || *tp == '\t' || *tp == '\n'); tp--) {
+ *tp = '\0';
+ }
+ return (s);
+}
+
+static int is_token(char c)
+{
+ int res;
+
+ res = (isascii(c) && isgraph(c)
+ && (strchr(tspecial, c) == NULL)) ? 1 : -1;
+ return res;
+}
+
+static int is_qtext(char c)
+{
+ int res;
+
+ res = (isascii(c) && (c != '"') && (c != '\\') && (c != '\n')) ? 1 : -1;
+ return res;
+}
+
+static int is_quoted_pair(char *s)
+{
+ int res = -1;
+ int c;
+
+ if (((s + 1) != NULL) && (*s == '\\')) {
+ c = (int) *(s + 1);
+ if (isascii(c)) {
+ res = 1;
+ }
+ }
+ return (res);
+}
+
+static content_type *analyze_ct(pool *p, char *s)
+{
+ char *tp, *mp, *cp;
+ char *attribute, *value;
+ int quoted = 0;
+
+ content_type *ctp;
+ param *pp, *npp;
+
+ /* initialize ctp */
+ ctp = (content_type *) ap_palloc(p, sizeof(content_type));
+ ctp->type = NULL;
+ ctp->subtype = NULL;
+ ctp->param = NULL;
+
+ tp = ap_pstrdup(p, s);
+
+ mp = tp;
+ cp = mp;
+
+ /* getting a type */
+ if (!(cp = strchr(mp, '/'))) {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
+ "mod_mime: analyze_ct: cannot get media type from '%s'",
+ mp);
+ return (NULL);
+ }
+ ctp->type = ap_pstrndup(p, mp, cp - mp);
+ ctp->type = zap_sp(ctp->type);
+ if (ctp->type == NULL || *(ctp->type) == '\0' ||
+ strchr(ctp->type, ';') || strchr(ctp->type, ' ') ||
+ strchr(ctp->type, '\t')) {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
+ "Cannot get media subtype.");
+ return (NULL);
+ }
+
+ /* getting a subtype */
+ cp++;
+ mp = cp;
+
+ for (; *cp != ';' && *cp != '\0'; cp++);
+ ctp->subtype = ap_pstrndup(p, mp, cp - mp);
+ ctp->subtype = zap_sp(ctp->subtype);
+ if ((ctp->subtype == NULL) || (*(ctp->subtype) == '\0') ||
+ strchr(ctp->subtype, ' ') || strchr(ctp->subtype, '\t')) {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
+ "Cannot get media subtype.");
+ return (NULL);
+ }
+ cp = zap_sp(cp);
+ if (cp == NULL || *cp == '\0') {
+ return (ctp);
+ }
+
+ /* getting parameters */
+ cp++;
+ cp = zap_sp(cp);
+ if (cp == NULL || *cp == '\0') {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
+ "Cannot get media parameter.");
+ return (NULL);
+ }
+ mp = cp;
+ attribute = NULL;
+ value = NULL;
+
+ while (cp != NULL && *cp != '\0') {
+ if (attribute == NULL) {
+ if (is_token((int) *cp) > 0) {
+ cp++;
+ continue;
+ }
+ else if (*cp == ' ' || *cp == '\t' || *cp == '\n') {
+ cp++;
+ continue;
+ }
+ else if (*cp == '=') {
+ attribute = ap_pstrndup(p, mp, cp - mp);
+ attribute = zap_sp(attribute);
+ if (attribute == NULL || *attribute == '\0') {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
+ "Cannot get media parameter.");
+ return (NULL);
+ }
+ cp++;
+ cp = zap_sp(cp);
+ if (cp == NULL || *cp == '\0') {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
+ "Cannot get media parameter.");
+ return (NULL);
+ }
+ mp = cp;
+ continue;
+ }
+ else {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
+ "Cannot get media parameter.");
+ return (NULL);
+ }
+ }
+ else {
+ if (mp == cp) {
+ if (*cp == '"') {
+ quoted = 1;
+ cp++;
+ }
+ else {
+ quoted = 0;
+ }
+ }
+ if (quoted > 0) {
+ while (quoted && *cp != '\0') {
+ if (is_qtext((int) *cp) > 0) {
+ cp++;
+ }
+ else if (is_quoted_pair(cp) > 0) {
+ cp += 2;
+ }
+ else if (*cp == '"') {
+ cp++;
+ while (*cp == ' ' || *cp == '\t' || *cp == '\n') {
+ cp++;
+ }
+ if (*cp != ';' && *cp != '\0') {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
+ "Cannot get media parameter.");
+ return(NULL);
+ }
+ quoted = 0;
+ }
+ else {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
+ "Cannot get media parameter.");
+ return (NULL);
+ }
+ }
+ }
+ else {
+ while (1) {
+ if (is_token((int) *cp) > 0) {
+ cp++;
+ }
+ else if (*cp == '\0' || *cp == ';') {
+ break;
+ }
+ else {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
+ "Cannot get media parameter.");
+ return (NULL);
+ }
+ }
+ }
+ value = ap_pstrndup(p, mp, cp - mp);
+ value = zap_sp(value);
+ if (value == NULL || *value == '\0') {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
+ "Cannot get media parameter.");
+ return (NULL);
+ }
+
+ pp = ap_palloc(p, sizeof(param));
+ pp->attr = attribute;
+ pp->val = value;
+ pp->next = NULL;
+
+ if (ctp->param == NULL) {
+ ctp->param = pp;
+ }
+ else {
+ npp = ctp->param;
+ while (npp->next) {
+ npp = npp->next;
+ }
+ npp->next = pp;
+ }
+ quoted = 0;
+ attribute = NULL;
+ value = NULL;
+ if (*cp == '\0') {
+ break;
+ }
+ cp++;
+ mp = cp;
+ }
+ }
+ return (ctp);
+}
+
static int find_ct(request_rec *r)
{
const char *fn = strrchr(r->filename, '/');
@@ -283,6 +562,7 @@
char *ext;
const char *orighandler = r->handler;
const char *type;
+ const char *charset = NULL;
if (S_ISDIR(r->finfo.st_mode)) {
r->content_type = DIR_MAGIC_TYPE;
@@ -294,8 +574,9 @@
* pointer to getword, causing a SEGV ..
*/
- if (fn == NULL)
- fn = r->filename;
+ if (fn == NULL) {
+ fn = r->filename;
+ }
/* Parse filename extensions, which can be in any order */
while ((ext = ap_getword(r->pool, &fn, '.')) && *ext) {
@@ -308,6 +589,12 @@
found = 1;
}
+ /* Add charset to Content-Type */
+ if ((type = ap_table_get(conf->charset_types, ext))) {
+ charset = type;
+ found = 1;
+ }
+
/* Check for Content-Language */
if ((type = ap_table_get(conf->language_types, ext))) {
const char **new;
@@ -347,8 +634,46 @@
r->content_languages = NULL;
r->content_encoding = NULL;
r->handler = orighandler;
- }
+ charset = NULL;
+ }
+ }
+ if (r->content_type) {
+ content_type *ctp;
+ char *ct;
+ int override = 0;
+
+ ct = (char *) ap_palloc(r->pool,
+ sizeof(char) * (strlen(r->content_type) + 1));
+ strcpy(ct, r->content_type);
+
+ if ((ctp = analyze_ct(r->pool, ct))) {
+ param *pp = ctp->param;
+ r->content_type = ap_pstrcat(r->pool, ctp->type, "/",
+ ctp->subtype, NULL);
+ while (pp != NULL) {
+ if (charset && !strcmp(pp->attr, "charset")) {
+ if (!override) {
+ r->content_type = ap_pstrcat(r->pool, r->content_type,
+ "; charset=", charset,
+ NULL);
+ override = 1;
+ }
+ }
+ else {
+ r->content_type = ap_pstrcat(r->pool, r->content_type,
+ "; ", pp->attr,
+ "=", pp->val,
+ NULL);
+ }
+ pp = pp->next;
+ }
+ if (charset && !override) {
+ r->content_type = ap_pstrcat(r->pool, r->content_type,
+ "; charset=", charset,
+ NULL);
+ }
+ }
}
/* Set default language, if none was specified by the extensions
1.33 +60 -5 apache-1.3/htdocs/manual/mod/mod_mime.html
Index: mod_mime.html
===================================================================
RCS file: /home/cvs/apache-1.3/htdocs/manual/mod/mod_mime.html,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -r1.32 -r1.33
--- mod_mime.html 1999/04/29 19:55:53 1.32
+++ mod_mime.html 1999/12/09 18:10:39 1.33
@@ -29,12 +29,12 @@
<P>
-The directives <A HREF="#addencoding">AddEncoding</A>, <A
-HREF="#addhandler">AddHandler</A>, <A
-HREF="#addlanguage">AddLanguage</A> and <A HREF="#addtype">AddType</A>
+The directives <a href="#addcharset">AddCharset</a>,
+<A HREF="#addencoding">AddEncoding</A>, <A HREF="#addhandler">AddHandler</A>,
+<A HREF="#addlanguage">AddLanguage</A> and <A HREF="#addtype">AddType</A>
are all used to map file extensions onto the meta-information for that
-file. Respectively they set the content-encoding, handler,
-content-language and MIME-type (content-type) of documents. The
+file. Respectively they set the character set, content-encoding, handler,
+content-language, and MIME-type (content-type) of documents. The
directive <A HREF="#typesconfig">TypesConfig</A> is used to specify a
file which also maps extensions onto MIME types. The directives <A
HREF="#forcetype">ForceType</A> and <A
@@ -86,6 +86,7 @@
<H2>Directives</H2>
<UL>
+<li><a href="#addcharset">AddCharset</a></li>
<LI><A HREF="#addencoding">AddEncoding</A>
<LI><A HREF="#addhandler">AddHandler</A>
<LI><A HREF="#addlanguage">AddLanguage</A>
@@ -98,6 +99,60 @@
</UL>
<HR>
+<H2><A NAME="addcharset">AddCharset</A></H2>
+<A HREF="directive-dict.html#Syntax" REL="Help"
+><STRONG>Syntax:</STRONG></A> AddCharset <i>charset extension
+ [extension...]</i><br>
+<A HREF="directive-dict.html#Context" REL="Help"
+><STRONG>Context:</STRONG></A> server config, virtual host, directory, .htaccess<BR>
+<A
+ HREF="directive-dict.html#Override"
+ REL="Help"
+><STRONG>Override:</STRONG></A> FileInfo<BR>
+<A
+ HREF="directive-dict.html#Status"
+ REL="Help"
+><STRONG>Status:</STRONG></A> Base<BR>
+<A
+ HREF="directive-dict.html#Module"
+ REL="Help"
+><STRONG>Module:</STRONG></A> mod_mime
+<br>
+<A HREF="directive-dict.html#Compatibility" REL="Help"
+><STRONG>Compatibility:</STRONG></A> AddCharset is only available in Apache
+1.3.10 and later
+
+<P>
+The AddCharset directive maps the given filename extensions to the
+specified content charset. <i>charset</i> is the MIME charset
+parameter of filenames containing <i>extension</i>. This mapping is
+added to any already in force, overriding any mappings that already
+exist for the same <i>extension</i>.
+</P>
+<P>
+Example:
+<pre>
+ AddLanguage ja .ja
+ AddCharset EUC-JP .euc
+ AddCharset ISO-2022-JP .jis
+ AddCharset SHIFT_JIS .sjis
+</pre>
+
+<P>
+Then the document <samp>xxxx.ja.jis</samp> will be treated as being a
+Japanese document whose charset is ISO-2022-JP (as will the document
+<samp>xxxx.jis.ja</samp>). Although the content charset is reported to
+the client, the browser is unlikely to use this information. The
+AddCharset directive is more useful for
+<A HREF="../content-negotiation.html">content negotiation</A>, where
+the server returns one from several documents based on the client's
+charset preference.
+</P>
+<P>
+<STRONG>See also</STRONG>: <A HREF="mod_negotiation.html">mod_negotiation</A>
+</P>
+
+<hr>
<H2><A NAME="addencoding">AddEncoding</A></H2>
<!--%plaintext <?INDEX {\tt AddEncoding} directive> -->