You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by ia...@apache.org on 2008/09/10 05:54:00 UTC
svn commit: r693697 - in /httpd/sandbox/mod_valid_utf8: ./ README
mod_valid_utf8.c
Author: ianh
Date: Tue Sep 9 20:53:59 2008
New Revision: 693697
URL: http://svn.apache.org/viewvc?rev=693697&view=rev
Log:
initial check in.
this filter validates that the incoming request contains valid UTF8 characters.
Added:
httpd/sandbox/mod_valid_utf8/
httpd/sandbox/mod_valid_utf8/README (with props)
httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c (with props)
Added: httpd/sandbox/mod_valid_utf8/README
URL: http://svn.apache.org/viewvc/httpd/sandbox/mod_valid_utf8/README?rev=693697&view=auto
==============================================================================
--- httpd/sandbox/mod_valid_utf8/README (added)
+++ httpd/sandbox/mod_valid_utf8/README Tue Sep 9 20:53:59 2008
@@ -0,0 +1,3 @@
+mod valid utf8:
+
+verifies an incoming request has valid UTF8 characters. If it doesn't it refuses to process it.
Propchange: httpd/sandbox/mod_valid_utf8/README
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: httpd/sandbox/mod_valid_utf8/README
------------------------------------------------------------------------------
svn:keywords = Id Author Revision LastChangedBy LastChangedDate HeadURL
Added: httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c
URL: http://svn.apache.org/viewvc/httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c?rev=693697&view=auto
==============================================================================
--- httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c (added)
+++ httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c Tue Sep 9 20:53:59 2008
@@ -0,0 +1,198 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Written by Ian Holsman
+ *
+ */
+
+#include "apr_strings.h"
+#include "apr_lib.h"
+#include "apr_hash.h"
+#include "apr_optional.h"
+
+#define APR_WANT_STRFUNC
+#include "apr_want.h"
+
+#include "ap_config.h"
+#include "mod_log_config.h"
+#include "httpd.h"
+#include "http_core.h"
+#include "http_config.h"
+#include "http_log.h"
+#include "http_connection.h"
+#include "http_protocol.h"
+
+module AP_MODULE_DECLARE_DATA valid_utf8_module;
+
+static const char utf8_filter_name[] = "UTF8VALIDATOR";
+
+
+typedef struct utf8_ctx {
+ apr_bucket_brigade *bb;
+} utf8_ctx;
+
+static char* validate_buffer( apr_pool_t *p, const char* inbuf, apr_size_t* length)
+{
+ apr_size_t i=0;
+ apr_size_t j=0;
+ apr_size_t k=0;
+ apr_size_t len=*length;
+
+ char *buffer = apr_palloc( p, len+1);
+ /* step 1. remove the encodings */
+ while (i < len ) {
+ if (inbuf[i] == '%' ) {
+ if ((i < len -2 ) && ishexnumber( inbuf[i+1]) && ishexnumber( inbuf[i+2])) {
+ int num;
+ if (isnumber( inbuf[i+1] ) ) {
+ num = inbuf[i+1]-'0';
+ }
+ else {
+ num = 10 + inbuf[i+1]-'A';
+ }
+ num *= 16;
+ if (isnumber( inbuf[i+2] ) ) {
+ num += inbuf[i+2]-'0';
+ }
+ else {
+ num += 10 + inbuf[i+2]-'A';
+ }
+ buffer[j++] = num;
+ i += 3;
+ }
+ else {
+ buffer[j++]=inbuf[i++];
+ }
+
+ }
+ else {
+ buffer[j++]=inbuf[i++];
+ }
+ }
+ *length=j;
+ buffer[j]=0;
+ /* step 2. check for validity */
+ while (k<j) {
+ int ch1 = buffer[k];
+ int ch2 = buffer[k+1];
+
+ if (ch1 >= '\xf5' ) {
+ return NULL;
+ }
+ if ( ch1 == '\xc0' || ch1 == '\xc1' ) {
+ if (ch2 != 0 ) {
+ return NULL;
+ }
+ }
+ if ( ch1 == '\xe0' ) {
+ if (ch2 < '\xa0' ) {
+ return NULL;
+ }
+ }
+ if ( ch1 == '\xf0' ) {
+ if (ch2 < '\x90' ) {
+ return NULL;
+ }
+ }
+ k++;
+ }
+
+ return (char*)buffer;
+
+}
+
+static apr_status_t utf8_in_filter(ap_filter_t *f,
+ apr_bucket_brigade *bb,
+ ap_input_mode_t mode,
+ apr_read_type_e block,
+ apr_off_t readbytes) {
+ apr_status_t rv;
+ utf8_ctx *ctx = f->ctx;
+
+ if (!ctx) {
+ ctx = apr_pcalloc( f->c->pool, sizeof(*ctx));
+ ctx->bb = apr_brigade_create( f->c->pool, f->c->bucket_alloc );
+ }
+ rv = ap_get_brigade(f->next, ctx->bb, mode, block, readbytes);
+ if ( rv != APR_SUCCESS ) {
+ return rv;
+ }
+
+ while ( !APR_BRIGADE_EMPTY(ctx->bb) ) {
+ apr_size_t length;
+ char *buffer;
+ const char *data;
+ apr_bucket *cpy;
+ int i ;
+ apr_bucket *b;
+
+ b = APR_BRIGADE_FIRST(ctx->bb );
+ APR_BUCKET_REMOVE(b);
+ if ( APR_BUCKET_IS_EOS(b) ) {
+ APR_BRIGADE_INSERT_TAIL( bb, b);
+ break;
+ }
+ length = b->length;
+
+ rv = apr_bucket_read( b, &data, &length, APR_BLOCK_READ);
+ if ( rv != APR_SUCCESS ) {
+ return rv;
+ }
+
+ buffer = validate_buffer(f->c->pool, data, &length);
+ if ( !buffer ) {
+ ap_log_cerror( APLOG_MARK, APLOG_ERR, rv,f->c, "Unable to validate:%*s", b->length, data);
+ return APR_EGENERAL;
+ }
+ ap_log_cerror( APLOG_MARK, APLOG_DEBUG, rv,f->c, "LEN:%d %*s", length, length, buffer );
+ /* we throw away our buffer, as fixing up the content length and messing with the data makes this a much harder problem */
+ cpy = apr_bucket_pool_create( data , b->length, f->c->pool, f->c->bucket_alloc);
+ APR_BRIGADE_INSERT_TAIL(bb, cpy);
+
+ }
+
+ return APR_SUCCESS;
+
+
+ }
+
+static int utf8_pre_conn(conn_rec *c, void *csd) {
+ ap_add_input_filter(utf8_filter_name, NULL, NULL, c);
+
+ return OK;
+}
+
+static void register_hooks(apr_pool_t *p)
+{
+
+ ap_hook_pre_connection(utf8_pre_conn, NULL, NULL, APR_HOOK_MIDDLE);
+
+ ap_register_input_filter(utf8_filter_name, utf8_in_filter, NULL,
+ AP_FTYPE_NETWORK - 1);
+
+}
+
+module AP_MODULE_DECLARE_DATA valid_utf8_module =
+{
+ STANDARD20_MODULE_STUFF,
+ NULL, /* create per-dir config */
+ NULL, /* merge per-dir config */
+ NULL, /* server config */
+ NULL, /* merge server config */
+ NULL, /* command apr_table_t */
+ register_hooks /* register hooks */
+};
Propchange: httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c
------------------------------------------------------------------------------
svn:keywords = Id Author Revision LastChangedBy LastChangedDate HeadURL
Re: svn commit: r693697 - in /httpd/sandbox/mod_valid_utf8: ./ README
mod_valid_utf8.c
Posted by Ruediger Pluem <rp...@apache.org>.
On 09/10/2008 05:54 AM, ianh@apache.org wrote:
> Author: ianh
> Date: Tue Sep 9 20:53:59 2008
> New Revision: 693697
>
> URL: http://svn.apache.org/viewvc?rev=693697&view=rev
> Log:
> initial check in.
> this filter validates that the incoming request contains valid UTF8 characters.
>
> Added:
> httpd/sandbox/mod_valid_utf8/
> httpd/sandbox/mod_valid_utf8/README (with props)
> httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c (with props)
>
> Added: httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c
> URL: http://svn.apache.org/viewvc/httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c?rev=693697&view=auto
> ==============================================================================
> +
> +static apr_status_t utf8_in_filter(ap_filter_t *f,
> + apr_bucket_brigade *bb,
> + ap_input_mode_t mode,
> + apr_read_type_e block,
> + apr_off_t readbytes) {
> + apr_status_t rv;
> + utf8_ctx *ctx = f->ctx;
> +
> + if (!ctx) {
> + ctx = apr_pcalloc( f->c->pool, sizeof(*ctx));
> + ctx->bb = apr_brigade_create( f->c->pool, f->c->bucket_alloc );
> + }
> + rv = ap_get_brigade(f->next, ctx->bb, mode, block, readbytes);
> + if ( rv != APR_SUCCESS ) {
> + return rv;
> + }
> +
> + while ( !APR_BRIGADE_EMPTY(ctx->bb) ) {
> + apr_size_t length;
> + char *buffer;
> + const char *data;
> + apr_bucket *cpy;
> + int i ;
> + apr_bucket *b;
> +
> + b = APR_BRIGADE_FIRST(ctx->bb );
> + APR_BUCKET_REMOVE(b);
> + if ( APR_BUCKET_IS_EOS(b) ) {
> + APR_BRIGADE_INSERT_TAIL( bb, b);
> + break;
> + }
> + length = b->length;
> +
> + rv = apr_bucket_read( b, &data, &length, APR_BLOCK_READ);
> + if ( rv != APR_SUCCESS ) {
> + return rv;
> + }
> +
> + buffer = validate_buffer(f->c->pool, data, &length);
What do you do when a multibyte character is split over multiple buckets?
Regards
RĂ¼diger
Re: svn commit: r693697 - in /httpd/sandbox/mod_valid_utf8: ./ README
mod_valid_utf8.c
Posted by Ian Holsman <li...@holsman.net>.
Thanks for the feedback.
I'll fix the code soon.
Nick Kew wrote:
> On Wed, 10 Sep 2008 03:54:00 -0000
> ianh@apache.org wrote:
>
>
>> Author: ianh
>> Date: Tue Sep 9 20:53:59 2008
>> New Revision: 693697
>>
>> URL: http://svn.apache.org/viewvc?rev=693697&view=rev
>> Log:
>> initial check in.
>> this filter validates that the incoming request contains valid UTF8
>> characters.
>>
>
> Why?
>
> Last time I looked, incoming charsets were indeed a problem area,
> but a browser submitting an HTML form would de-facto use the
> same charset as the form. Not necessarily utf-8.
>
> Not to mention the many other use cases for sending non-utf8 data.
>
>
>> +static char* validate_buffer( apr_pool_t *p, const char* inbuf,
>> apr_size_t* length) +{
>>
>
> Looks like a potential util_ function. Cousin to both apr_uri
> and apr_xlate.
>
>
>> + if (inbuf[i] == '%' ) {
>> + if ((i < len -2 ) && ishexnumber( inbuf[i+1]) &&
>> ishexnumber( inbuf[i+2])) {
>>
>
> ... is all very well, but
>
>
>> + else {
>> + buffer[j++]=inbuf[i++];
>> + }
>>
>
> Shouldn't that at least be marked /* FIXME */
> where you potentially let through the chars you're
> supposed to block ?
>
>
>
>> + ap_hook_pre_connection(utf8_pre_conn, NULL, NULL,
>> APR_HOOK_MIDDLE); +
>> + ap_register_input_filter(utf8_filter_name, utf8_in_filter, NULL,
>> + AP_FTYPE_NETWORK - 1);
>>
>
> Huh? Isn't that before mod_ssl, let alone mod_deflate, mod_charset?
> And no excape path for binary data either. It'll cripple any server
> with it loaded!
>
>
>
Re: svn commit: r693697 - in /httpd/sandbox/mod_valid_utf8: ./ README
mod_valid_utf8.c
Posted by "William A. Rowe, Jr." <wr...@rowe-clan.net>.
Nick Kew wrote:
>
> Looks like a potential util_ function. Cousin to both apr_uri
> and apr_xlate.
xlate() rejects such nonsense, so apr shouldn't need it. It's only
an issue in other ecosystems behind httpd, such as dodging the recent
Tomcat vulnerability.
Re: svn commit: r693697 - in /httpd/sandbox/mod_valid_utf8: ./ README
mod_valid_utf8.c
Posted by "William A. Rowe, Jr." <wr...@rowe-clan.net>.
Nick Kew wrote:
>
> Last time I looked, incoming charsets were indeed a problem area,
> but a browser submitting an HTML form would de-facto use the
> same charset as the form. Not necessarily utf-8.
It's worthwhile to add some logic to 'step out of the way' when the
charset is inferred as other-than utf-8.
Re: svn commit: r693697 - in /httpd/sandbox/mod_valid_utf8: ./
README mod_valid_utf8.c
Posted by Nick Kew <ni...@webthing.com>.
On Wed, 10 Sep 2008 03:54:00 -0000
ianh@apache.org wrote:
> Author: ianh
> Date: Tue Sep 9 20:53:59 2008
> New Revision: 693697
>
> URL: http://svn.apache.org/viewvc?rev=693697&view=rev
> Log:
> initial check in.
> this filter validates that the incoming request contains valid UTF8
> characters.
Why?
Last time I looked, incoming charsets were indeed a problem area,
but a browser submitting an HTML form would de-facto use the
same charset as the form. Not necessarily utf-8.
Not to mention the many other use cases for sending non-utf8 data.
> +static char* validate_buffer( apr_pool_t *p, const char* inbuf,
> apr_size_t* length) +{
Looks like a potential util_ function. Cousin to both apr_uri
and apr_xlate.
> + if (inbuf[i] == '%' ) {
> + if ((i < len -2 ) && ishexnumber( inbuf[i+1]) &&
> ishexnumber( inbuf[i+2])) {
... is all very well, but
> + else {
> + buffer[j++]=inbuf[i++];
> + }
Shouldn't that at least be marked /* FIXME */
where you potentially let through the chars you're
supposed to block ?
> + ap_hook_pre_connection(utf8_pre_conn, NULL, NULL,
> APR_HOOK_MIDDLE); +
> + ap_register_input_filter(utf8_filter_name, utf8_in_filter, NULL,
> + AP_FTYPE_NETWORK - 1);
Huh? Isn't that before mod_ssl, let alone mod_deflate, mod_charset?
And no excape path for binary data either. It'll cripple any server
with it loaded!
--
Nick Kew
Application Development with Apache - the Apache Modules Book
http://www.apachetutor.org/