You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@httpd.apache.org by Rob Hartill <ro...@imdb.com> on 1996/04/14 18:50:35 UTC
charset= and qc= handling into .var files (patch included) (fwd)
Not acked....
To: apache-bugs@apache.org
Message-Id: <KH...@astral.msk.su>
Organization: Olahm Ha-Yetzirah
Date: Sun, 14 Apr 1996 20:16:16 +0400 (MSD)
X-Mailer: Mail/@ [v2.42 FreeBSD]
From: =?KOI8-R?Q?=E1=CE=C4=D2=C5=CA_=FE=C5=D2=CE=CF=D7?=
(aka Andrey A. Chernov, Black Mage) <ac...@astral.msk.su>
Return-Receipt-To: ache@astral.msk.su
X-Class: Fast
Precedence: special-delivery
Subject: charset= and qc= handling into .var files (patch included)
Lines: 222
Mime-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
One of the main problems for russian language WWW sites is choosing
right charset for client. Russians have at least 4 active
different Russian charsets (I know that the same problem exist for
Japanese too f.e.) This problem can be solved by using "Accept-Charset:"
from client side and finding proper document at server side
(according to latest IETF-HTTP-V11 draft). .var mechanism in Apache is
well applicable here, but when I try to use
URI: a.html-koi8-r
Content-Type=text/html; charset=KOI8-R
into .var file, I notice that Apache 1.0.3 ignores charset parameter
completely, so I made this patch which counts "charset=" for .var
files, senses client "Accept-Charset:" field and uses charset quality
parameter "qc" according to latest IETF-HTTP-V11 draft. Please apply
this patch or at least tell me, how I can improve things.
Thanx in advance.
*** mod_negotiation.c.orig Sat Feb 17 11:32:09 1996
--- mod_negotiation.c Sun Apr 7 00:25:05 1996
***************
*** 139,145 ****
--- 139,147 ----
typedef struct accept_rec {
char *type_name;
+ char *charset;
float quality;
+ float qc;
float max_bytes;
float level;
} accept_rec;
***************
*** 168,175 ****
--- 170,179 ----
char *file_name;
char *content_encoding;
char *content_language;
+ char *charset;
float level; /* Auxiliary to content-type... */
float qs;
+ float qc;
float bytes;
int lang_index;
int is_pseudo_html; /* text/html, *or* the INCLUDES_MAGIC_TYPEs */
***************
*** 194,199 ****
--- 198,204 ----
array_header *accepts; /* accept_recs */
array_header *accept_encodings; /* accept_recs */
array_header *accept_langs; /* accept_recs */
+ array_header *accept_charsets; /* accept_recs */
array_header *avail_vars; /* available variants */
} negotiation_state;
***************
*** 208,218 ****
--- 213,225 ----
mime_info->file_name = "";
mime_info->content_encoding = "";
mime_info->content_language = "";
+ mime_info->charset = "";
mime_info->is_pseudo_html = 0.0;
mime_info->level = 0.0;
mime_info->level_matched = 0.0;
mime_info->qs = 0.0;
+ mime_info->qc = 0.0;
mime_info->quality = 0.0;
mime_info->bytes = 0;
mime_info->lang_index = -1;
***************
*** 225,231 ****
--- 232,240 ----
void set_mime_fields (var_rec *var, accept_rec *mime_info)
{
var->type_name = mime_info->type_name;
+ var->charset = mime_info->charset;
var->qs = mime_info->quality;
+ var->qc = mime_info->qc;
var->quality = mime_info->quality; /* Initial quality is just qs */
var->level = mime_info->level;
***************
*** 295,301 ****
--- 304,312 ----
char *get_entry (pool *p, accept_rec *result, char *accept_line)
{
result->quality = 1.0;
+ result->qc = 1.0;
result->max_bytes = 0.0;
+ result->charset = "";
result->level = 0.0;
/* Note that this handles what I gather is the "old format",
***************
*** 351,361 ****
--- 362,382 ----
if (parm[0] == 'q'
&& (parm[1] == '\0' || (parm[1] == 's' && parm[2] == '\0')))
result->quality = atof(cp);
+ else if (parm[0] == 'q' && parm[1] == 'c' && parm[2] == '\0')
+ result->qc = atof(cp);
else if (parm[0] == 'm' && parm[1] == 'x' &&
parm[2] == 'b' && parm[3] == '\0')
result->max_bytes = atof(cp);
else if (parm[0] == 'l' && !strcmp (&parm[1], "evel"))
result->level = atof(cp);
+ else if (parm[0] == 'c' && !strcmp (&parm[1], "harset")) {
+ result->charset = cp;
+ if ((cp = strchr (result->charset, '\n')) != NULL)
+ *cp = '\0';
+ if ((cp = strrchr (result->charset, '"')) != NULL)
+ *cp = '\0';
+ str_tolower (result->charset);
+ }
}
if (*accept_line == ',') ++accept_line;
***************
*** 403,408 ****
--- 424,431 ----
do_header_line (r->pool, table_get (hdrs, "Accept-encoding"));
new->accept_langs =
do_header_line (r->pool, table_get (hdrs, "Accept-language"));
+ new->accept_charsets =
+ do_header_line (r->pool, table_get (hdrs, "Accept-charset"));
new->avail_vars = make_array (r->pool, 40, sizeof (var_rec));
return new;
***************
*** 421,428 ****
--- 444,453 ----
new_accept->type_name = CGI_MAGIC_TYPE;
new_accept->quality = prefer_scripts ? 1e-20 : 1e20;
+ new_accept->qc = 1.0;
new_accept->level = 0.0;
new_accept->max_bytes = 0.0;
+ new_accept->charset = "";
if (neg->accepts->nelts > 1) return;
***************
*** 430,437 ****
--- 455,464 ----
new_accept->type_name = "*/*";
new_accept->quality = 1.0;
+ new_accept->qc = 1.0;
new_accept->level = 0.0;
new_accept->max_bytes = 0.0;
+ new_accept->charset = "";
}
/*****************************************************************
***************
*** 735,740 ****
--- 762,792 ----
return OK;
}
+ int charset_match (negotiation_state *neg, var_rec *avail)
+ {
+ accept_rec *accs;
+ char *charset;
+ int i;
+
+ /* If no Accept-Charset is present, everything is acceptable */
+
+ if (!neg->accept_charsets->nelts)
+ return 1;
+
+ charset = avail->charset;
+ if (!*charset)
+ charset = "iso-8859-1";
+
+ accs = (accept_rec *)neg->accept_charsets->elts;
+
+ for (i = 0; i < neg->accept_charsets->nelts; ++i) {
+ if (!strcmp (charset, accs[i].type_name))
+ return 1;
+ }
+
+ return 0;
+ }
+
/* This code implements a piece of the tie-breaking algorithm between
* variants of equal quality. This piece is the treatment of variants
* of the same base media type, but different levels. What we want to
***************
*** 966,972 ****
for (j = 0; j < neg->avail_vars->nelts; ++j) {
var_rec *variant = &avail_recs[j];
! float q = type->quality * variant->quality;
/* If we've already rejected this variant, don't waste time */
--- 1018,1024 ----
for (j = 0; j < neg->avail_vars->nelts; ++j) {
var_rec *variant = &avail_recs[j];
! float q = type->quality * variant->quality * variant->qc;
/* If we've already rejected this variant, don't waste time */
***************
*** 977,982 ****
--- 1029,1036 ----
*/
if (!mime_match(type, variant)) continue;
+
+ if (!charset_match(neg, variant)) continue;
/* Check maxbytes */
--
Andrey A. Chernov : And I rest so composedly, /Now, in my bed,
ache@astral.msk.su : That any beholder /Might fancy me dead -
http://dt.demos.su/~ache : Might start at beholding me, /Thinking me dead.
RELCOM Team,FreeBSD Team : E.A.Poe From "For Annie" 1849
----- End of forwarded message from =?KOI8-R?Q?=E1=CE=C4=D2=C5=CA_=FE=C5=D2=CE=CF=D7?= -----
--
Rob Hartill (robh@imdb.com)
The Internet Movie Database (IMDb) http://www.imdb.com/
...more movie info than you can poke a stick at.