You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by co...@hyperreal.org on 1999/12/09 19:10:43 UTC

cvs commit: apache-1.3/htdocs/manual/mod mod_mime.html

coar        99/12/09 10:10:42

  Modified:    src      CHANGES
               .        STATUS
               src/modules/standard mod_mime.c
               htdocs/manual/mod mod_mime.html
  Log:
  	Add the AddCharset functionality to mod_mime.
  
  Submitted by:	Youichirou Koga <y-...@isoternet.org>
  Reviewed by:	Ken Coar, Martin Kraemer
  
  Revision  Changes    Path
  1.1478    +3 -0      apache-1.3/src/CHANGES
  
  Index: CHANGES
  ===================================================================
  RCS file: /home/cvs/apache-1.3/src/CHANGES,v
  retrieving revision 1.1477
  retrieving revision 1.1478
  diff -u -r1.1477 -r1.1478
  --- CHANGES	1999/12/09 17:19:28	1.1477
  +++ CHANGES	1999/12/09 18:10:24	1.1478
  @@ -1,4 +1,7 @@
   Changes with Apache 1.3.10
  +  *) Enhance mod_mime with an AddCharset directive to properly handle
  +     that negotiation dimension.
  +     [Youichirou Koga <y-...@isoternet.org>]
   
     *) OS: Added first cut at support for IBM's OS/390.
        [Ovies Brabson <ov...@us.ibm.com>]
  
  
  
  1.764     +1 -4      apache-1.3/STATUS
  
  Index: STATUS
  ===================================================================
  RCS file: /home/cvs/apache-1.3/STATUS,v
  retrieving revision 1.763
  retrieving revision 1.764
  diff -u -r1.763 -r1.764
  --- STATUS	1999/12/09 17:21:35	1.763
  +++ STATUS	1999/12/09 18:10:30	1.764
  @@ -1,5 +1,5 @@
     1.3 STATUS:
  -  Last modified at [$Date: 1999/12/09 17:21:35 $]
  +  Last modified at [$Date: 1999/12/09 18:10:30 $]
   
   Release:
   
  @@ -42,9 +42,6 @@
   
   
   Available Patches:
  -    * Youichirou Koga's patch to add AddCharset to mod_mime.
  -	Message-ID: <384E91DF.D92FABC2.Golux.Com>
  -	Status: Ken +1, Martin +1(after minor fix)
   
       * Andrew Ford's patch (1999/12/05) to add absolute times to mod_expires
   	Message-ID: <m3...@icarus.demon.co.uk>
  
  
  
  1.50      +336 -11   apache-1.3/src/modules/standard/mod_mime.c
  
  Index: mod_mime.c
  ===================================================================
  RCS file: /home/cvs/apache-1.3/src/modules/standard/mod_mime.c,v
  retrieving revision 1.49
  retrieving revision 1.50
  diff -u -r1.49 -r1.50
  --- mod_mime.c	1999/04/20 17:27:51	1.49
  +++ mod_mime.c	1999/12/09 18:10:34	1.50
  @@ -75,6 +75,7 @@
   typedef struct {
       table *forced_types;        /* Additional AddTyped stuff */
       table *encoding_types;      /* Added with AddEncoding... */
  +    table *charset_types;	/* Added with AddCharset... */
       table *language_types;      /* Added with AddLanguage... */
       table *handlers;            /* Added with AddHandler...  */
       array_header *handlers_remove;     /* List of handlers to remove */
  @@ -84,6 +85,24 @@
       char *default_language;     /* Language if no AddLanguage ext found */
   } mime_dir_config;
   
  +typedef struct param_s {
  +    char *attr;
  +    char *val;
  +    struct param_s *next;
  +} param;
  +
  +typedef struct {
  +    char *type;
  +    char *subtype;
  +    param *param;
  +} content_type;
  +
  +static char tspecial[] = {
  +    '(', ')', '<', '>', '@', ',', ';', ':',
  +    '\\', '"', '/', '[', ']', '?', '=',
  +    '\0'
  +};
  +
   module MODULE_VAR_EXPORT mime_module;
   
   static void *create_mime_dir_config(pool *p, char *dummy)
  @@ -93,6 +112,7 @@
   
       new->forced_types = ap_make_table(p, 4);
       new->encoding_types = ap_make_table(p, 4);
  +    new->charset_types = ap_make_table(p, 4);
       new->language_types = ap_make_table(p, 4);
       new->handlers = ap_make_table(p, 4);
       new->handlers_remove = ap_make_array(p, 4, sizeof(handlers_info));
  @@ -119,9 +139,11 @@
       }
   
       new->forced_types = ap_overlay_tables(p, add->forced_types,
  -                                       base->forced_types);
  +					 base->forced_types);
       new->encoding_types = ap_overlay_tables(p, add->encoding_types,
                                            base->encoding_types);
  +    new->charset_types = ap_overlay_tables(p, add->charset_types,
  +					   base->charset_types);
       new->language_types = ap_overlay_tables(p, add->language_types,
                                            base->language_types);
       new->handlers = ap_overlay_tables(p, add->handlers,
  @@ -135,17 +157,18 @@
       return new;
   }
   
  -static const char *add_type(cmd_parms *cmd, mime_dir_config * m, char *ct,
  +static const char *add_type(cmd_parms *cmd, mime_dir_config *m, char *ct,
                               char *ext)
   {
       if (*ext == '.')
  -        ++ext;
  +	++ext;
  +	
       ap_str_tolower(ct);
       ap_table_setn(m->forced_types, ext, ct);
       return NULL;
   }
   
  -static const char *add_encoding(cmd_parms *cmd, mime_dir_config * m, char *enc,
  +static const char *add_encoding(cmd_parms *cmd, mime_dir_config *m, char *enc,
                                   char *ext)
   {
       if (*ext == '.')
  @@ -155,17 +178,29 @@
       return NULL;
   }
   
  -static const char *add_language(cmd_parms *cmd, mime_dir_config * m, char *lang,
  +static const char *add_charset(cmd_parms *cmd, mime_dir_config *m,
  +			       char *charset, char *ext)
  +{
  +    if (*ext == '.') {
  +	++ext;
  +    }
  +    ap_str_tolower(charset);
  +    ap_table_setn(m->charset_types, ext, charset);
  +    return NULL;
  +}
  +
  +static const char *add_language(cmd_parms *cmd, mime_dir_config *m, char *lang,
                                   char *ext)
   {
  -    if (*ext == '.')
  -        ++ext;
  +    if (*ext == '.') {
  +	++ext;
  +    }
       ap_str_tolower(lang);
       ap_table_setn(m->language_types, ext, lang);
       return NULL;
   }
   
  -static const char *add_handler(cmd_parms *cmd, mime_dir_config * m, char *hdlr,
  +static const char *add_handler(cmd_parms *cmd, mime_dir_config *m, char *hdlr,
                                  char *ext)
   {
       if (*ext == '.')
  @@ -209,6 +244,8 @@
        "a mime type followed by one or more file extensions"},
       {"AddEncoding", add_encoding, NULL, OR_FILEINFO, ITERATE2,
        "an encoding (e.g., gzip), followed by one or more file extensions"},
  +    {"AddCharset", add_charset, NULL, OR_FILEINFO, ITERATE2,
  +     "a charset (e.g., iso-2022-jp), followed by one or more file extensions"},
       {"AddLanguage", add_language, NULL, OR_FILEINFO, ITERATE2,
        "a language (e.g., fr), followed by one or more file extensions"},
       {"AddHandler", add_handler, NULL, OR_FILEINFO, ITERATE2,
  @@ -275,6 +312,248 @@
       ap_cfg_closefile(f);
   }
   
  +static char *zap_sp(char *s)
  +{
  +    char *tp;
  +
  +    if (s == NULL) {
  +	return (NULL);
  +    }
  +    if (*s == '\0') {
  +	return (s);
  +    }
  +
  +    /* delete prefixed white space */
  +    for (; *s == ' ' || *s == '\t' || *s == '\n'; s++);
  +
  +    /* delete postfixed white space */
  +    for (tp = s; *tp != '\0'; tp++);
  +    for (tp--; tp != s && (*tp == ' ' || *tp == '\t' || *tp == '\n'); tp--) {
  +	*tp = '\0';
  +    }
  +    return (s);
  +}
  +
  +static int is_token(char c)
  +{
  +    int res;
  +
  +    res = (isascii(c) && isgraph(c)
  +	   && (strchr(tspecial, c) == NULL)) ? 1 : -1;
  +    return res;
  +}
  +
  +static int is_qtext(char c)
  +{
  +    int res;
  +
  +    res = (isascii(c) && (c != '"') && (c != '\\') && (c != '\n')) ? 1 : -1;
  +    return res;
  +}
  +
  +static int is_quoted_pair(char *s)
  +{
  +    int res = -1;
  +    int c;
  +
  +    if (((s + 1) != NULL) && (*s == '\\')) {
  +	c = (int) *(s + 1);
  +	if (isascii(c)) {
  +	    res = 1;
  +	}
  +    }
  +    return (res);
  +}
  +
  +static content_type *analyze_ct(pool *p, char *s)
  +{
  +    char *tp, *mp, *cp;
  +    char *attribute, *value;
  +    int quoted = 0;
  +
  +    content_type *ctp;
  +    param *pp, *npp;
  +
  +    /* initialize ctp */
  +    ctp = (content_type *) ap_palloc(p, sizeof(content_type));
  +    ctp->type = NULL;
  +    ctp->subtype = NULL;
  +    ctp->param = NULL;
  +
  +    tp = ap_pstrdup(p, s);
  +
  +    mp = tp;
  +    cp = mp;
  +
  +    /* getting a type */
  +    if (!(cp = strchr(mp, '/'))) {
  +	ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +		     "mod_mime: analyze_ct: cannot get media type from '%s'",
  +		     mp);
  +	return (NULL);
  +    }
  +    ctp->type = ap_pstrndup(p, mp, cp - mp);
  +    ctp->type = zap_sp(ctp->type);
  +    if (ctp->type == NULL || *(ctp->type) == '\0' ||
  +	strchr(ctp->type, ';') || strchr(ctp->type, ' ') ||
  +	strchr(ctp->type, '\t')) {
  +	ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +		     "Cannot get media subtype.");
  +	return (NULL);
  +    }
  +
  +    /* getting a subtype */
  +    cp++;
  +    mp = cp;
  +
  +    for (; *cp != ';' && *cp != '\0'; cp++);
  +    ctp->subtype = ap_pstrndup(p, mp, cp - mp);
  +    ctp->subtype = zap_sp(ctp->subtype);
  +    if ((ctp->subtype == NULL) || (*(ctp->subtype) == '\0') ||
  +	strchr(ctp->subtype, ' ') || strchr(ctp->subtype, '\t')) {
  +	ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +		     "Cannot get media subtype.");
  +	return (NULL);
  +    }
  +    cp = zap_sp(cp);
  +    if (cp == NULL || *cp == '\0') {
  +	return (ctp);
  +    }
  +
  +    /* getting parameters */
  +    cp++;
  +    cp = zap_sp(cp);
  +    if (cp == NULL || *cp == '\0') {
  +	ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +		     "Cannot get media parameter.");
  +	return (NULL);
  +    }
  +    mp = cp;
  +    attribute = NULL;
  +    value = NULL;
  +
  +    while (cp != NULL && *cp != '\0') {
  +	if (attribute == NULL) {
  +	    if (is_token((int) *cp) > 0) {
  +		cp++;
  +		continue;
  +	    }
  +	    else if (*cp == ' ' || *cp == '\t' || *cp == '\n') {
  +		cp++;
  +		continue;
  +	    }
  +	    else if (*cp == '=') {
  +		attribute = ap_pstrndup(p, mp, cp - mp);
  +		attribute = zap_sp(attribute);
  +		if (attribute == NULL || *attribute == '\0') {
  +		    ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +				 "Cannot get media parameter.");
  +		    return (NULL);
  +		}
  +		cp++;
  +		cp = zap_sp(cp);
  +		if (cp == NULL || *cp == '\0') {
  +		    ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +				 "Cannot get media parameter.");
  +		    return (NULL);
  +		}
  +		mp = cp;
  +		continue;
  +	    }
  +	    else {
  +		ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +			     "Cannot get media parameter.");
  +		return (NULL);
  +	    }
  +	}
  +	else {
  +	    if (mp == cp) {
  +		if (*cp == '"') {
  +		    quoted = 1;
  +		    cp++;
  +		}
  +		else {
  +		    quoted = 0;
  +		}
  +	    }
  +	    if (quoted > 0) {
  +		while (quoted && *cp != '\0') {
  +		    if (is_qtext((int) *cp) > 0) {
  +			cp++;
  +		    }
  +		    else if (is_quoted_pair(cp) > 0) {
  +			cp += 2;
  +		    }
  +		    else if (*cp == '"') {
  +			cp++;
  +			while (*cp == ' ' || *cp == '\t' || *cp == '\n') {
  +			    cp++;
  +			}
  +			if (*cp != ';' && *cp != '\0') {
  +			    ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +					 "Cannot get media parameter.");
  +			    return(NULL);
  +			}
  +			quoted = 0;
  +		    }
  +		    else {
  +			ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +				     "Cannot get media parameter.");
  +			return (NULL);
  +		    }
  +		}
  +	    }
  +	    else {
  +		while (1) {
  +		    if (is_token((int) *cp) > 0) {
  +			cp++;
  +		    }
  +		    else if (*cp == '\0' || *cp == ';') {
  +			break;
  +		    }
  +		    else {
  +			ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +				     "Cannot get media parameter.");
  +			return (NULL);
  +		    }
  +		}
  +	    }
  +	    value = ap_pstrndup(p, mp, cp - mp);
  +	    value = zap_sp(value);
  +	    if (value == NULL || *value == '\0') {
  +		ap_log_error(APLOG_MARK, APLOG_WARNING, NULL,
  +			     "Cannot get media parameter.");
  +		return (NULL);
  +	    }
  +
  +	    pp = ap_palloc(p, sizeof(param));
  +	    pp->attr = attribute;
  +	    pp->val = value;
  +	    pp->next = NULL;
  +
  +	    if (ctp->param == NULL) {
  +		ctp->param = pp;
  +	    }
  +	    else {
  +		npp = ctp->param;
  +		while (npp->next) {
  +		    npp = npp->next;
  +		}
  +		npp->next = pp;
  +	    }
  +	    quoted = 0;
  +	    attribute = NULL;
  +	    value = NULL;
  +	    if (*cp == '\0') {
  +		break;
  +	    }
  +	    cp++;
  +	    mp = cp;
  +	}
  +    }
  +    return (ctp);
  +}
  +
   static int find_ct(request_rec *r)
   {
       const char *fn = strrchr(r->filename, '/');
  @@ -283,6 +562,7 @@
       char *ext;
       const char *orighandler = r->handler;
       const char *type;
  +    const char *charset = NULL;
   
       if (S_ISDIR(r->finfo.st_mode)) {
           r->content_type = DIR_MAGIC_TYPE;
  @@ -294,8 +574,9 @@
        * pointer to getword, causing a SEGV ..
        */
   
  -    if (fn == NULL)
  -        fn = r->filename;
  +    if (fn == NULL) {
  +	fn = r->filename;
  +    }
   
       /* Parse filename extensions, which can be in any order */
       while ((ext = ap_getword(r->pool, &fn, '.')) && *ext) {
  @@ -308,6 +589,12 @@
               found = 1;
           }
   
  +	/* Add charset to Content-Type */
  +	if ((type = ap_table_get(conf->charset_types, ext))) {
  +	    charset = type;
  +	    found = 1;
  +	}
  +
           /* Check for Content-Language */
           if ((type = ap_table_get(conf->language_types, ext))) {
               const char **new;
  @@ -347,8 +634,46 @@
               r->content_languages = NULL;
               r->content_encoding = NULL;
               r->handler = orighandler;
  -        }
  +	    charset = NULL;
  +	}
  +    }
   
  +    if (r->content_type) {
  +	content_type *ctp;
  +	char *ct;
  +	int override = 0;
  +
  +	ct = (char *) ap_palloc(r->pool,
  +				sizeof(char) * (strlen(r->content_type) + 1));
  +	strcpy(ct, r->content_type);
  +
  +	if ((ctp = analyze_ct(r->pool, ct))) {
  +	    param *pp = ctp->param;
  +	    r->content_type = ap_pstrcat(r->pool, ctp->type, "/",
  +					 ctp->subtype, NULL);
  +	    while (pp != NULL) {
  +		if (charset && !strcmp(pp->attr, "charset")) {
  +		    if (!override) {
  +			r->content_type = ap_pstrcat(r->pool, r->content_type,
  +						     "; charset=", charset,
  +						     NULL);
  +			override = 1;
  +		    }
  +		}
  +		else {
  +		    r->content_type = ap_pstrcat(r->pool, r->content_type,
  +						 "; ", pp->attr,
  +						 "=", pp->val,
  +						 NULL);
  +		}
  +		pp = pp->next;
  +	    }
  +	    if (charset && !override) {
  +		r->content_type = ap_pstrcat(r->pool, r->content_type,
  +					     "; charset=", charset,
  +					     NULL);
  +	    }
  +	}
       }
   
       /* Set default language, if none was specified by the extensions
  
  
  
  1.33      +60 -5     apache-1.3/htdocs/manual/mod/mod_mime.html
  
  Index: mod_mime.html
  ===================================================================
  RCS file: /home/cvs/apache-1.3/htdocs/manual/mod/mod_mime.html,v
  retrieving revision 1.32
  retrieving revision 1.33
  diff -u -r1.32 -r1.33
  --- mod_mime.html	1999/04/29 19:55:53	1.32
  +++ mod_mime.html	1999/12/09 18:10:39	1.33
  @@ -29,12 +29,12 @@
   
   <P>
   
  -The directives <A HREF="#addencoding">AddEncoding</A>, <A
  -HREF="#addhandler">AddHandler</A>, <A
  -HREF="#addlanguage">AddLanguage</A> and <A HREF="#addtype">AddType</A>
  +The directives <a href="#addcharset">AddCharset</a>,
  +<A HREF="#addencoding">AddEncoding</A>, <A HREF="#addhandler">AddHandler</A>,
  +<A HREF="#addlanguage">AddLanguage</A> and <A HREF="#addtype">AddType</A>
   are all used to map file extensions onto the meta-information for that
  -file.  Respectively they set the content-encoding, handler,
  -content-language and MIME-type (content-type) of documents.  The
  +file.  Respectively they set the character set, content-encoding, handler,
  +content-language, and MIME-type (content-type) of documents.  The
   directive <A HREF="#typesconfig">TypesConfig</A> is used to specify a
   file which also maps extensions onto MIME types. The directives <A
   HREF="#forcetype">ForceType</A> and <A
  @@ -86,6 +86,7 @@
   
   <H2>Directives</H2>
   <UL>
  +<li><a href="#addcharset">AddCharset</a></li>
   <LI><A HREF="#addencoding">AddEncoding</A>
   <LI><A HREF="#addhandler">AddHandler</A>
   <LI><A HREF="#addlanguage">AddLanguage</A>
  @@ -98,6 +99,60 @@
   </UL>
   <HR>
   
  +<H2><A NAME="addcharset">AddCharset</A></H2>
  +<A HREF="directive-dict.html#Syntax" REL="Help"
  +><STRONG>Syntax:</STRONG></A> AddCharset <i>charset extension
  + [extension...]</i><br>
  +<A HREF="directive-dict.html#Context" REL="Help"
  +><STRONG>Context:</STRONG></A> server config, virtual host, directory, .htaccess<BR>
  +<A
  + HREF="directive-dict.html#Override"
  + REL="Help"
  +><STRONG>Override:</STRONG></A> FileInfo<BR>
  +<A
  + HREF="directive-dict.html#Status"
  + REL="Help"
  +><STRONG>Status:</STRONG></A> Base<BR>
  +<A
  + HREF="directive-dict.html#Module"
  + REL="Help"
  +><STRONG>Module:</STRONG></A> mod_mime
  +<br>
  +<A HREF="directive-dict.html#Compatibility" REL="Help"
  +><STRONG>Compatibility:</STRONG></A> AddCharset is only available in Apache
  +1.3.10 and later
  +
  +<P>
  +The AddCharset directive maps the given filename extensions to the
  +specified content charset. <i>charset</i> is the MIME charset
  +parameter of filenames containing <i>extension</i>.  This mapping is
  +added to any already in force, overriding any mappings that already
  +exist for the same <i>extension</i>.
  +</P>
  +<P>
  +Example:
  +<pre>
  +    AddLanguage ja .ja
  +    AddCharset EUC-JP .euc
  +    AddCharset ISO-2022-JP .jis
  +    AddCharset SHIFT_JIS .sjis
  +</pre>
  +
  +<P>
  +Then the document <samp>xxxx.ja.jis</samp> will be treated as being a
  +Japanese document whose charset is ISO-2022-JP (as will the document
  +<samp>xxxx.jis.ja</samp>). Although the content charset is reported to
  +the client, the browser is unlikely to use this information. The
  +AddCharset directive is more useful for
  +<A HREF="../content-negotiation.html">content negotiation</A>, where
  +the server returns one from several documents based on the client's
  +charset preference.
  +</P>
  +<P>
  +<STRONG>See also</STRONG>: <A HREF="mod_negotiation.html">mod_negotiation</A>
  +</P>
  +
  +<hr>
   
   <H2><A NAME="addencoding">AddEncoding</A></H2>
   <!--%plaintext &lt;?INDEX {\tt AddEncoding} directive&gt; -->