You are viewing a plain text version of this content. The canonical link for it is here.
Posted to apache-bugdb@apache.org by Andrew Wansink <an...@sharinga.com> on 2001/03/02 07:15:09 UTC

general/7342: This is a small parser for host headers

>Number:         7342
>Category:       general
>Synopsis:       This is a small parser for host headers
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    apache
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   apache
>Arrival-Date:   Thu Mar 01 22:20:00 PST 2001
>Closed-Date:
>Last-Modified:
>Originator:     andy@sharinga.com
>Release:        1.3.17
>Organization:
apache
>Environment:
Linux 2.2.18, gcc-2.95.2
>Description:
Here is a much more flexible parser than the current if/else
parsing structure for parsing host headers.


--- apache_1.3.17/src/main/http_vhost.c	Wed Jan 24 01:14:06 2001
+++ apache_1.3.17.new_host_header_parser/src/main/http_vhost.c	Fri Mar  2 17:17:53 2001
@@ -703,49 +703,102 @@
     const char *src;
     char *dst;
 
+    enum {
+        E_PERIOD    = 0,
+        E_SLASH     = 1,
+        E_BACKSLASH = 2,
+        E_COLON     = 3,
+        E_DIGIT     = 4,
+        E_OTHER     = 5,
+        E_NULL      = 6,
+
+        A_COPY      = 0,
+        A_BAD       = 1,
+        A_LAST      = 2,
+
+        NUM_EVENTS  = 7,
+        NUM_STATES  = 5
+    };
+
+
+
+    static const unsigned transitions[NUM_STATES][NUM_EVENTS] = {
+// #        '.'        '/'       '\'       ':'       digit     other     null
+/* 0 */ {    0,        0,        0,        0,        1,        1,        0    },
+/* 1 */ {    2,        0,        0,        3,        1,        1,        0    },
+/* 2 */ {    0,        0,        0,        3,        1,        1,        0    },
+/* 3 */ {    0,        0,        0,        0,        4,        0,        0    },
+/* 4 */ {    0,        0,        0,        0,        4,        0,        0    }
+};
+
+
+    static const unsigned actions[NUM_STATES][NUM_EVENTS] = {
+// #      '.'      '/'      '\'       ':'       digit     other     null
+/* 0 */ { A_BAD,   A_BAD,   A_BAD,    A_BAD,    A_COPY,   A_COPY,   A_BAD  },
+/* 1 */ { A_COPY,  A_BAD,   A_BAD,    A_COPY,   A_COPY,   A_COPY,   A_LAST },
+/* 2 */ { A_BAD,   A_BAD,   A_BAD,    A_COPY,   A_COPY,   A_COPY,   A_LAST },
+/* 3 */ { A_BAD,   A_BAD,   A_BAD,    A_BAD,    A_COPY,   A_BAD,    A_BAD  },
+/* 4 */ { A_BAD,   A_BAD,   A_BAD,    A_BAD,    A_COPY,   A_BAD,    A_LAST }
+};
+
+    unsigned event;
+    unsigned action;
+    unsigned previous_state;
+    unsigned parser_state = 0;
+
     /* check and copy the host part */
     src = r->hostname;
     dst = host;
-    while (*src) {
-	if (*src == '.') {
-	    *dst++ = *src++;
-	    if (*src == '.')
-		goto bad;
-	    else
-		continue;
-	}
-	if (*src == '/' || *src == '\\') {
-	    goto bad;
-	}
-        if (*src == ':') {
-            /* check the port part */
-            while (*++src) {
-                if (!ap_isdigit(*src)) {
-                    goto bad;
-                }
+
+    while (1) {
+        switch(*src) {
+
+        case '/':    event = E_SLASH;     break;
+        case ':':    event = E_COLON;     break;
+        case '.':    event = E_PERIOD;    break;
+        case '\0':   event = E_NULL;      break;
+        case '\\':   event = E_BACKSLASH; break;
+
+        default:
+            if (isdigit(*src)){
+                event = E_DIGIT;
             }
-            if (src[-1] == ':')
-                goto bad;
             else
-                break;
+                event = E_OTHER;
         }
-	*dst++ = *src++;
-    }
-    /* strip trailing gubbins */
-    if (dst > host && dst[-1] == '.') {
-	dst[-1] = '\0';
-    } else {
-	dst[0] = '\0';
-    }
 
-    r->hostname = host;
-    return;
+        previous_state = parser_state;
+
+        parser_state = transitions[previous_state][event];
+        action       = actions    [previous_state][event];
 
-bad:
-    r->status = HTTP_BAD_REQUEST;
-    ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, r,
-		  "Client sent malformed Host header");
-    return;
+        switch(action) {
+
+        case A_COPY:
+            *dst++ = *src++;
+            break;
+
+        case A_LAST:
+            if(previous_state == 2) {
+                dst[-1] = '\0';
+            }
+            else dst[0] = '\0';
+                
+            r->hostname = host;
+            return;
+
+        case A_BAD:
+            r->status = HTTP_BAD_REQUEST;
+            ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, r,
+                            "Client sent malformed Host header");
+            return;
+        }
+    }
 }
 
 
>How-To-Repeat:

>Fix:

>Release-Note:
>Audit-Trail:
>Unformatted:
 [In order for any reply to be added to the PR database, you need]
 [to include <ap...@Apache.Org> in the Cc line and make sure the]
 [subject line starts with the report component and number, with ]
 [or without any 'Re:' prefixes (such as "general/1098:" or      ]
 ["Re: general/1098:").  If the subject doesn't match this       ]
 [pattern, your message will be misfiled and ignored.  The       ]
 ["apbugs" address is not added to the Cc line of messages from  ]
 [the database automatically because of the potential for mail   ]
 [loops.  If you do not include this Cc, your reply may be ig-   ]
 [nored unless you are responding to an explicit request from a  ]
 [developer.  Reply only with text; DO NOT SEND ATTACHMENTS!     ]