You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by ia...@apache.org on 2008/09/10 05:54:00 UTC

svn commit: r693697 - in /httpd/sandbox/mod_valid_utf8: ./ README mod_valid_utf8.c

Author: ianh
Date: Tue Sep  9 20:53:59 2008
New Revision: 693697

URL: http://svn.apache.org/viewvc?rev=693697&view=rev
Log:
initial check in.
this filter validates that the incoming request contains valid UTF8 characters.

Added:
    httpd/sandbox/mod_valid_utf8/
    httpd/sandbox/mod_valid_utf8/README   (with props)
    httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c   (with props)

Added: httpd/sandbox/mod_valid_utf8/README
URL: http://svn.apache.org/viewvc/httpd/sandbox/mod_valid_utf8/README?rev=693697&view=auto
==============================================================================
--- httpd/sandbox/mod_valid_utf8/README (added)
+++ httpd/sandbox/mod_valid_utf8/README Tue Sep  9 20:53:59 2008
@@ -0,0 +1,3 @@
+mod valid utf8:
+
+verifies an incoming request has valid UTF8 characters. If it doesn't it refuses to process it.

Propchange: httpd/sandbox/mod_valid_utf8/README
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: httpd/sandbox/mod_valid_utf8/README
------------------------------------------------------------------------------
    svn:keywords = Id Author Revision LastChangedBy LastChangedDate HeadURL

Added: httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c
URL: http://svn.apache.org/viewvc/httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c?rev=693697&view=auto
==============================================================================
--- httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c (added)
+++ httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c Tue Sep  9 20:53:59 2008
@@ -0,0 +1,198 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Written by Ian Holsman
+ *
+ */
+
+#include "apr_strings.h"
+#include "apr_lib.h"
+#include "apr_hash.h"
+#include "apr_optional.h"
+
+#define APR_WANT_STRFUNC
+#include "apr_want.h"
+
+#include "ap_config.h"
+#include "mod_log_config.h"
+#include "httpd.h"
+#include "http_core.h"
+#include "http_config.h"
+#include "http_log.h"
+#include "http_connection.h"
+#include "http_protocol.h"
+
+module AP_MODULE_DECLARE_DATA valid_utf8_module;
+
+static const char utf8_filter_name[] = "UTF8VALIDATOR";
+
+
+typedef struct utf8_ctx {
+    apr_bucket_brigade *bb;
+} utf8_ctx;
+
+static char* validate_buffer( apr_pool_t *p,  const char* inbuf, apr_size_t* length)
+{
+    apr_size_t i=0;
+    apr_size_t j=0;
+    apr_size_t k=0;
+    apr_size_t len=*length;
+
+    char *buffer = apr_palloc( p, len+1);
+    /* step 1. remove the encodings */
+    while (i < len ) {
+        if (inbuf[i] == '%' ) {
+            if ((i < len -2 ) && ishexnumber( inbuf[i+1]) && ishexnumber( inbuf[i+2])) {
+                int num;
+                if (isnumber( inbuf[i+1] ) ) {
+                    num = inbuf[i+1]-'0';
+                }
+                else {
+                    num = 10 + inbuf[i+1]-'A';
+                }
+                num *= 16;
+                if (isnumber( inbuf[i+2] ) ) {
+                    num += inbuf[i+2]-'0';
+                }
+                else {
+                    num += 10 + inbuf[i+2]-'A';
+                }
+                buffer[j++] = num;
+                i += 3;
+            }
+            else {
+                buffer[j++]=inbuf[i++];
+            }
+
+        }
+        else {
+            buffer[j++]=inbuf[i++];
+        }
+    }
+    *length=j;
+    buffer[j]=0;
+    /* step 2. check for validity */
+    while (k<j) {
+        int ch1 = buffer[k];
+        int ch2 = buffer[k+1];
+
+        if (ch1  >= '\xf5' )  {
+            return NULL;
+        }
+        if ( ch1 == '\xc0' || ch1  == '\xc1' ) {
+            if (ch2 != 0 ) {
+                return NULL;
+            }
+        }
+        if (  ch1 == '\xe0' ) {
+            if (ch2 < '\xa0' ) {
+                return NULL;
+            }
+        }
+        if ( ch1 == '\xf0' ) {
+            if (ch2 < '\x90' ) {
+                return NULL;
+            }
+        }
+        k++;
+    }
+
+    return (char*)buffer;
+
+}
+
+static apr_status_t utf8_in_filter(ap_filter_t *f,
+                                    apr_bucket_brigade *bb,
+                                    ap_input_mode_t mode,
+                                    apr_read_type_e block,
+                                    apr_off_t readbytes) {
+    apr_status_t rv;
+    utf8_ctx *ctx = f->ctx;
+
+    if (!ctx) {
+        ctx = apr_pcalloc( f->c->pool, sizeof(*ctx));
+        ctx->bb = apr_brigade_create( f->c->pool, f->c->bucket_alloc );
+    }
+    rv = ap_get_brigade(f->next, ctx->bb, mode, block, readbytes);
+    if ( rv != APR_SUCCESS ) {
+        return rv;
+    }
+
+    while ( !APR_BRIGADE_EMPTY(ctx->bb) ) {
+        apr_size_t length;
+        char *buffer;
+        const char *data;
+        apr_bucket *cpy;
+        int i ;
+        apr_bucket *b;
+        
+        b = APR_BRIGADE_FIRST(ctx->bb );
+        APR_BUCKET_REMOVE(b);
+        if ( APR_BUCKET_IS_EOS(b) ) {
+            APR_BRIGADE_INSERT_TAIL( bb, b);
+            break;
+        }
+        length = b->length;
+
+        rv = apr_bucket_read( b, &data, &length, APR_BLOCK_READ);
+        if ( rv != APR_SUCCESS ) {
+            return rv;
+        }
+
+        buffer = validate_buffer(f->c->pool, data, &length);
+        if ( !buffer ) {
+            ap_log_cerror( APLOG_MARK, APLOG_ERR, rv,f->c, "Unable to validate:%*s", b->length, data);
+            return APR_EGENERAL;
+        }
+        ap_log_cerror( APLOG_MARK, APLOG_DEBUG, rv,f->c, "LEN:%d %*s", length, length, buffer );
+        /* we throw away our buffer, as fixing up the content length and messing with the data makes this a much harder problem */
+        cpy = apr_bucket_pool_create( data , b->length, f->c->pool, f->c->bucket_alloc);
+        APR_BRIGADE_INSERT_TAIL(bb, cpy);
+
+    }
+
+    return APR_SUCCESS;
+
+
+    }
+
+static int utf8_pre_conn(conn_rec *c, void *csd) {
+    ap_add_input_filter(utf8_filter_name, NULL, NULL, c);
+
+    return OK;
+}
+
+static void register_hooks(apr_pool_t *p)
+{
+
+    ap_hook_pre_connection(utf8_pre_conn, NULL, NULL, APR_HOOK_MIDDLE);
+
+    ap_register_input_filter(utf8_filter_name, utf8_in_filter, NULL,
+                             AP_FTYPE_NETWORK - 1);
+
+}
+
+module AP_MODULE_DECLARE_DATA valid_utf8_module =
+{
+    STANDARD20_MODULE_STUFF,
+    NULL,                       /* create per-dir config */
+    NULL,                       /* merge per-dir config */
+    NULL,                       /* server config */
+    NULL,                       /* merge server config */
+    NULL,                       /* command apr_table_t */
+    register_hooks              /* register hooks */
+};

Propchange: httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c
------------------------------------------------------------------------------
    svn:keywords = Id Author Revision LastChangedBy LastChangedDate HeadURL



Re: svn commit: r693697 - in /httpd/sandbox/mod_valid_utf8: ./ README mod_valid_utf8.c

Posted by Ruediger Pluem <rp...@apache.org>.

On 09/10/2008 05:54 AM, ianh@apache.org wrote:
> Author: ianh
> Date: Tue Sep  9 20:53:59 2008
> New Revision: 693697
> 
> URL: http://svn.apache.org/viewvc?rev=693697&view=rev
> Log:
> initial check in.
> this filter validates that the incoming request contains valid UTF8 characters.
> 
> Added:
>     httpd/sandbox/mod_valid_utf8/
>     httpd/sandbox/mod_valid_utf8/README   (with props)
>     httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c   (with props)

> 
> Added: httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c
> URL: http://svn.apache.org/viewvc/httpd/sandbox/mod_valid_utf8/mod_valid_utf8.c?rev=693697&view=auto
> ==============================================================================

> +
> +static apr_status_t utf8_in_filter(ap_filter_t *f,
> +                                    apr_bucket_brigade *bb,
> +                                    ap_input_mode_t mode,
> +                                    apr_read_type_e block,
> +                                    apr_off_t readbytes) {
> +    apr_status_t rv;
> +    utf8_ctx *ctx = f->ctx;
> +
> +    if (!ctx) {
> +        ctx = apr_pcalloc( f->c->pool, sizeof(*ctx));
> +        ctx->bb = apr_brigade_create( f->c->pool, f->c->bucket_alloc );
> +    }
> +    rv = ap_get_brigade(f->next, ctx->bb, mode, block, readbytes);
> +    if ( rv != APR_SUCCESS ) {
> +        return rv;
> +    }
> +
> +    while ( !APR_BRIGADE_EMPTY(ctx->bb) ) {
> +        apr_size_t length;
> +        char *buffer;
> +        const char *data;
> +        apr_bucket *cpy;
> +        int i ;
> +        apr_bucket *b;
> +        
> +        b = APR_BRIGADE_FIRST(ctx->bb );
> +        APR_BUCKET_REMOVE(b);
> +        if ( APR_BUCKET_IS_EOS(b) ) {
> +            APR_BRIGADE_INSERT_TAIL( bb, b);
> +            break;
> +        }
> +        length = b->length;
> +
> +        rv = apr_bucket_read( b, &data, &length, APR_BLOCK_READ);
> +        if ( rv != APR_SUCCESS ) {
> +            return rv;
> +        }
> +
> +        buffer = validate_buffer(f->c->pool, data, &length);

What do you do when a multibyte character is split over multiple buckets?

Regards

RĂ¼diger

Re: svn commit: r693697 - in /httpd/sandbox/mod_valid_utf8: ./ README mod_valid_utf8.c

Posted by Ian Holsman <li...@holsman.net>.
Thanks for the feedback.

I'll fix the code soon.

Nick Kew wrote:
> On Wed, 10 Sep 2008 03:54:00 -0000
> ianh@apache.org wrote:
>
>   
>> Author: ianh
>> Date: Tue Sep  9 20:53:59 2008
>> New Revision: 693697
>>
>> URL: http://svn.apache.org/viewvc?rev=693697&view=rev
>> Log:
>> initial check in.
>> this filter validates that the incoming request contains valid UTF8
>> characters.
>>     
>
> Why?
>
> Last time I looked, incoming charsets were indeed a problem area,
> but a browser submitting an HTML form would de-facto use the
> same charset as the form.  Not necessarily utf-8.
>
> Not to mention the many other use cases for sending non-utf8 data.
>
>   
>> +static char* validate_buffer( apr_pool_t *p,  const char* inbuf,
>> apr_size_t* length) +{
>>     
>
> Looks like a potential util_ function.  Cousin to both apr_uri
> and apr_xlate.
>
>   
>> +        if (inbuf[i] == '%' ) {
>> +            if ((i < len -2 ) && ishexnumber( inbuf[i+1]) &&
>> ishexnumber( inbuf[i+2])) {
>>     
>
> ... is all very well, but
>
>   
>> +            else {
>> +                buffer[j++]=inbuf[i++];
>> +            }
>>     
>
> Shouldn't that at least be marked /* FIXME */
> where you potentially let through the chars you're
> supposed to block ?
>
>
>   
>> +    ap_hook_pre_connection(utf8_pre_conn, NULL, NULL,
>> APR_HOOK_MIDDLE); +
>> +    ap_register_input_filter(utf8_filter_name, utf8_in_filter, NULL,
>> +                             AP_FTYPE_NETWORK - 1);
>>     
>
> Huh?  Isn't that before mod_ssl, let alone mod_deflate, mod_charset?
> And no excape path for binary data either.  It'll cripple any server
> with it loaded!
>
>
>   


Re: svn commit: r693697 - in /httpd/sandbox/mod_valid_utf8: ./ README mod_valid_utf8.c

Posted by "William A. Rowe, Jr." <wr...@rowe-clan.net>.
Nick Kew wrote:
> 
> Looks like a potential util_ function.  Cousin to both apr_uri
> and apr_xlate.

xlate() rejects such nonsense, so apr shouldn't need it.  It's only
an issue in other ecosystems behind httpd, such as dodging the recent
Tomcat vulnerability.

Re: svn commit: r693697 - in /httpd/sandbox/mod_valid_utf8: ./ README mod_valid_utf8.c

Posted by "William A. Rowe, Jr." <wr...@rowe-clan.net>.
Nick Kew wrote:
> 
> Last time I looked, incoming charsets were indeed a problem area,
> but a browser submitting an HTML form would de-facto use the
> same charset as the form.  Not necessarily utf-8.

It's worthwhile to add some logic to 'step out of the way' when the
charset is inferred as other-than utf-8.

Re: svn commit: r693697 - in /httpd/sandbox/mod_valid_utf8: ./ README mod_valid_utf8.c

Posted by Nick Kew <ni...@webthing.com>.
On Wed, 10 Sep 2008 03:54:00 -0000
ianh@apache.org wrote:

> Author: ianh
> Date: Tue Sep  9 20:53:59 2008
> New Revision: 693697
> 
> URL: http://svn.apache.org/viewvc?rev=693697&view=rev
> Log:
> initial check in.
> this filter validates that the incoming request contains valid UTF8
> characters.

Why?

Last time I looked, incoming charsets were indeed a problem area,
but a browser submitting an HTML form would de-facto use the
same charset as the form.  Not necessarily utf-8.

Not to mention the many other use cases for sending non-utf8 data.

> +static char* validate_buffer( apr_pool_t *p,  const char* inbuf,
> apr_size_t* length) +{

Looks like a potential util_ function.  Cousin to both apr_uri
and apr_xlate.

> +        if (inbuf[i] == '%' ) {
> +            if ((i < len -2 ) && ishexnumber( inbuf[i+1]) &&
> ishexnumber( inbuf[i+2])) {

... is all very well, but

> +            else {
> +                buffer[j++]=inbuf[i++];
> +            }

Shouldn't that at least be marked /* FIXME */
where you potentially let through the chars you're
supposed to block ?


> +    ap_hook_pre_connection(utf8_pre_conn, NULL, NULL,
> APR_HOOK_MIDDLE); +
> +    ap_register_input_filter(utf8_filter_name, utf8_in_filter, NULL,
> +                             AP_FTYPE_NETWORK - 1);

Huh?  Isn't that before mod_ssl, let alone mod_deflate, mod_charset?
And no excape path for binary data either.  It'll cripple any server
with it loaded!


-- 
Nick Kew

Application Development with Apache - the Apache Modules Book
http://www.apachetutor.org/