You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@httpd.apache.org by Randy Terbush <ra...@zyzzyva.com> on 1996/07/21 21:55:23 UTC
FreeBSD port and charset support
> > Strewth. It's a pity that the changes made weren't forwarded to
> > new-httpd so people could see if they merited inclusion in the real
> > 1.1.1 (perhaps they were,in which case NULL). I'd venture to say
>
> They were. In fact, the person responsible for those changes (they're not
> related to the FreeBSD project, I don't think) has sent them to us at
> least twice. As far as I know, we keep ignoring them.
Is anyone opposed to me including the following patches for this
charset support feature?
*** conf/srm.conf-dist.orig Fri Jul 5 06:41:53 1996
- - --- conf/srm.conf-dist Fri Jul 5 06:41:54 1996
***************
*** 122,127 ****
- - --- 122,134 ----
LanguagePriority en fr de
+ # GuessCharset allows you to do charset guessing for clients which
+ # forget to specify Accept-Charset header field. Guessing based on
+ # User-Agent header field pattern.
+ # Format: GuessCharset user-agent_pattern accept-charset_value
+ # user-agent_pattern may contain '*' and '?' shell meta-characters
+ # Example: GuessCharset "Mozilla/* (X11;*" "koi8-r; q=0.8"
+
# Redirect allows you to tell clients about documents which used to exist in
# your server's namespace, but do not anymore. This allows you to tell the
# clients where to look for the relocated document.
*** src/mod_negotiation.c.orig Sun Jun 9 05:12:21 1996
- - --- src/mod_negotiation.c Fri Jul 5 06:45:29 1996
***************
*** 71,76 ****
- - --- 71,77 ----
typedef struct {
array_header *language_priority;
+ table *charset_patterns; /* Added with GuessCharset... */
} neg_dir_config;
module negotiation_module;
***************
*** 81,86 ****
- - --- 82,88 ----
(neg_dir_config *) palloc (p, sizeof (neg_dir_config));
new->language_priority = make_array (p, 4, sizeof (char *));
+ new->charset_patterns = make_table (p, 4);
return new;
}
***************
*** 94,99 ****
- - --- 96,103 ----
/* give priority to the config in the subdirectory */
new->language_priority = append_arrays (p, add->language_priority,
base->language_priority);
+ new->charset_patterns = overlay_tables (p, add->charset_patterns,
+ base->charset_patterns);
return new;
}
***************
*** 114,119 ****
- - --- 118,129 ----
return NULL;
}
+ char *set_guess_charset (cmd_parms *cmd, neg_dir_config *m, char *pattern, char *charset)
+ {
+ table_set (m->charset_patterns, pattern, charset);
+ return NULL;
+ }
+
int do_cache_negotiated_docs (server_rec *s)
{
return (get_module_config (s->module_config, &negotiation_module) != NULL);
***************
*** 124,129 ****
- - --- 134,140 ----
NULL },
{ "LanguagePriority", set_language_priority, NULL, OR_FILEINFO, ITERATE,
NULL },
+ { "GuessCharset", set_guess_charset, NULL, OR_FILEINFO, TAKE2, NULL },
{ NULL }
};
***************
*** 139,145 ****
- - --- 150,158 ----
typedef struct accept_rec {
char *type_name;
+ char *charset;
float quality;
+ float qc;
float max_bytes;
float level;
} accept_rec;
***************
*** 168,175 ****
- - --- 181,190 ----
char *file_name;
char *content_encoding;
char *content_language;
+ char *charset;
float level; /* Auxiliary to content-type... */
float qs;
+ float qc;
float bytes;
int lang_index;
int is_pseudo_html; /* text/html, *or* the INCLUDES_MAGIC_TYPEs */
***************
*** 195,200 ****
- - --- 210,216 ----
array_header *accepts; /* accept_recs */
array_header *accept_encodings; /* accept_recs */
array_header *accept_langs; /* accept_recs */
+ array_header *accept_charsets; /* accept_recs */
array_header *avail_vars; /* available variants */
} negotiation_state;
***************
*** 209,219 ****
- - --- 225,237 ----
mime_info->file_name = "";
mime_info->content_encoding = "";
mime_info->content_language = "";
+ mime_info->charset = "";
mime_info->is_pseudo_html = 0;
mime_info->level = 0.0;
mime_info->level_matched = 0.0;
mime_info->qs = 0.0;
+ mime_info->qc = 0.0;
mime_info->quality = 0.0;
mime_info->bytes = 0;
mime_info->lang_index = -1;
***************
*** 227,233 ****
- - --- 245,253 ----
void set_mime_fields (var_rec *var, accept_rec *mime_info)
{
var->type_name = mime_info->type_name;
+ var->charset = mime_info->charset;
var->qs = mime_info->quality;
+ var->qc = mime_info->qc;
var->quality = mime_info->quality; /* Initial quality is just qs */
var->level = mime_info->level;
***************
*** 251,257 ****
- - --- 271,279 ----
char *get_entry (pool *p, accept_rec *result, char *accept_line)
{
result->quality = 1.0;
+ result->qc = 1.0;
result->max_bytes = 0.0;
+ result->charset = "";
result->level = 0.0;
/* Note that this handles what I gather is the "old format",
***************
*** 307,317 ****
- - --- 329,349 ----
if (parm[0] == 'q'
&& (parm[1] == '\0' || (parm[1] == 's' && parm[2] == '\0')))
result->quality = atof(cp);
+ else if (parm[0] == 'q' && parm[1] == 'c' && parm[2] == '\0')
+ result->qc = atof(cp);
else if (parm[0] == 'm' && parm[1] == 'x' &&
parm[2] == 'b' && parm[3] == '\0')
result->max_bytes = atof(cp);
else if (parm[0] == 'l' && !strcmp (&parm[1], "evel"))
result->level = atof(cp);
+ else if (parm[0] == 'c' && !strcmp (&parm[1], "harset")) {
+ result->charset = cp;
+ if ((cp = strchr (result->charset, '\n')) != NULL)
+ *cp = '\0';
+ if ((cp = strrchr (result->charset, '"')) != NULL)
+ *cp = '\0';
+ str_tolower (result->charset);
+ }
}
if (*accept_line == ',') ++accept_line;
***************
*** 344,349 ****
- - --- 376,409 ----
* Handling header lines from clients...
*/
+ char *get_accept_charset (request_rec *r)
+ {
+ char *s;
+
+ if ((s = table_get (r->headers_in, "Accept-charset")) == NULL) {
+ neg_dir_config *conf =
+ (neg_dir_config *) get_module_config (r->per_dir_config,
+ &negotiation_module);
+ if (conf != NULL) {
+ char *agent = table_get (r->headers_in, "User-Agent");
+
+ if (agent != NULL) {
+ table *t = conf->charset_patterns;
+ table_entry *elts = (table_entry *)t->elts;
+ int i;
+
+ for (i = 0; i < t->nelts; ++i) {
+ if (!strcmp_match (agent, elts[i].key)) {
+ s = elts[i].val;
+ break;
+ }
+ }
+ }
+ }
+ }
+ return s;
+ }
+
negotiation_state *parse_accept_headers (request_rec *r)
{
negotiation_state *new =
***************
*** 359,364 ****
- - --- 419,426 ----
do_header_line (r->pool, table_get (hdrs, "Accept-encoding"));
new->accept_langs =
do_header_line (r->pool, table_get (hdrs, "Accept-language"));
+ new->accept_charsets =
+ do_header_line (r->pool, get_accept_charset (r));
new->avail_vars = make_array (r->pool, 40, sizeof (var_rec));
return new;
***************
*** 377,384 ****
- - --- 439,448 ----
new_accept->type_name = CGI_MAGIC_TYPE;
new_accept->quality = prefer_scripts ? 1e-20 : 1e20;
+ new_accept->qc = 1.0;
new_accept->level = 0.0;
new_accept->max_bytes = 0.0;
+ new_accept->charset = "";
if (neg->accepts->nelts > 1) return;
***************
*** 386,393 ****
- - --- 450,459 ----
new_accept->type_name = "*/*";
new_accept->quality = 1.0;
+ new_accept->qc = 1.0;
new_accept->level = 0.0;
new_accept->max_bytes = 0.0;
+ new_accept->charset = "";
}
/*****************************************************************
***************
*** 712,717 ****
- - --- 778,810 ----
return OK;
}
+ float charset_quality (negotiation_state *neg, var_rec *avail)
+ {
+ accept_rec *accs;
+ char *charset;
+ int i;
+
+ /* If no Accept-Charset is present, everything is acceptable */
+
+ if (!neg->accept_charsets->nelts)
+ return 1.0;
+
+ charset = avail->charset;
+ if (!*charset)
+ charset = "iso-8859-1"; /* default */
+
+ accs = (accept_rec *)neg->accept_charsets->elts;
+
+ for (i = 0; i < neg->accept_charsets->nelts; ++i)
+ if (!strcmp (charset, accs[i].type_name))
+ return accs[i].quality;
+
+ if (!strcmp (charset, "iso-8859-1"))
+ return 1.0;
+
+ return 0.0;
+ }
+
/* This code implements a piece of the tie-breaking algorithm between
* variants of equal quality. This piece is the treatment of variants
* of the same base media type, but different levels. What we want to
***************
*** 943,954 ****
for (j = 0; j < neg->avail_vars->nelts; ++j) {
var_rec *variant = &avail_recs[j];
! float q = type->quality * variant->quality;
/* If we've already rejected this variant, don't waste time */
if (q == 0.0) continue;
/* If media types don't match, forget it.
* (This includes the level check).
*/
- - --- 1036,1051 ----
for (j = 0; j < neg->avail_vars->nelts; ++j) {
var_rec *variant = &avail_recs[j];
! float q = type->quality * variant->quality * variant->qc;
/* If we've already rejected this variant, don't waste time */
if (q == 0.0) continue;
+ q *= charset_quality(neg, variant);
+
+ if (q == 0.0) continue;
+
/* If media types don't match, forget it.
* (This includes the level check).
*/
*** src/util_script.c.orig Mon Jun 3 16:04:20 1996
- - --- src/util_script.c Fri Jul 5 06:41:54 1996
***************
*** 61,66 ****
- - --- 61,68 ----
#include "http_request.h" /* for sub_req_lookup_uri() */
#include "util_script.h"
+ extern char *get_accept_charset (request_rec *r);
+
/*
* Various utility functions which are common to a whole lot of
* script-type extensions mechanisms, and might as well be gathered
***************
*** 130,136 ****
conn_rec *c = r->connection;
const char *rem_logname;
! char port[40],*env_path;
array_header *hdrs_arr = table_elts (r->headers_in);
table_entry *hdrs = (table_entry *)hdrs_arr->elts;
- - --- 132,138 ----
conn_rec *c = r->connection;
const char *rem_logname;
! char port[40],*env_path,*accept_charset;
array_header *hdrs_arr = table_elts (r->headers_in);
table_entry *hdrs = (table_entry *)hdrs_arr->elts;
***************
*** 155,163 ****
- - --- 157,169 ----
table_set (e, "CONTENT_LENGTH", hdrs[i].val);
else if (!strcasecmp (hdrs[i].key, "Authorization"))
continue;
+ else if (!strcasecmp (hdrs[i].key, "Accept-charset"))
+ continue; /* do it later */
else
table_set (e, http2env (r->pool, hdrs[i].key), hdrs[i].val);
}
+ if ((accept_charset = get_accept_charset (r)) != NULL)
+ table_set (e, "HTTP_ACCEPT_CHARSET", accept_charset);
sprintf(port, "%d", s->port);
Re: FreeBSD port and charset support
Posted by Brian Behlendorf <br...@organic.com>.
On Sun, 21 Jul 1996, Randy Terbush wrote:
> > > Strewth. It's a pity that the changes made weren't forwarded to
> > > new-httpd so people could see if they merited inclusion in the real
> > > 1.1.1 (perhaps they were,in which case NULL). I'd venture to say
> >
> > They were. In fact, the person responsible for those changes (they're not
> > related to the FreeBSD project, I don't think) has sent them to us at
> > least twice. As far as I know, we keep ignoring them.
>
> Is anyone opposed to me including the following patches for this
> charset support feature?
yes, from a design perspective:
> + # GuessCharset allows you to do charset guessing for clients which
> + # forget to specify Accept-Charset header field. Guessing based on
> + # User-Agent header field pattern.
> + # Format: GuessCharset user-agent_pattern accept-charset_value
> + # user-agent_pattern may contain '*' and '?' shell meta-characters
> + # Example: GuessCharset "Mozilla/* (X11;*" "koi8-r; q=0.8"
> +
Okay, so let me get this straight. Because certain browsers which
understand multiple charsets are not sending charset information to the
server properly (in proper Accept-Charset patterns), we need to guess what
it'll be? This is the same rathole, I suppose, as the content
type negotiation situation, but it's more like the (im)possibility of
trying to determine if a particular plug-in is installed. For example,
the above "GuessCharset" directive example means "if you're using X
Newtscape, you can probably understand the KOI8-R charset". Bullshit,
it's exactly like saying all MacNetscape's are running with the Shockwave
plug-in installed.
We should definitely support charset in negotiation algorithms - and I
thought we did, but this patch may be fixing that if not.
I guess if we want to make this just another variable that could be set
using my "BrowserMatch" proposal from Sunday, fine.
Brian