You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@httpd.apache.org by sf...@apache.org on 2012/12/30 02:23:25 UTC

svn commit: r1426877 - in /httpd/httpd/trunk: CHANGES include/ap_mmn.h include/http_core.h include/httpd.h modules/http/http_filters.c server/core.c server/protocol.c server/util.c server/vhost.c

Author: sf
Date: Sun Dec 30 01:23:24 2012
New Revision: 1426877

URL: http://svn.apache.org/viewvc?rev=1426877&view=rev
Log:
Add an option to enforce stricter HTTP conformance

This is a first stab, the checks will likely have to be revised.
For now, we check

 * if the request line contains control characters
 * if the request uri has fragment or username/password
 * that the request method is standard or registered with RegisterHttpMethod
 * that the request protocol is of the form HTTP/[1-9]+.[0-9]+,
   or missing for 0.9
 * if there is garbage in the request line after the protocol
 * if any request header contains control characters
 * if any request header has an empty name
 * for the host name in the URL or Host header:
   - if an IPv4 dotted decimal address: Reject octal or hex values, require
     exactly four parts
   - if a DNS host name: Reject non-alphanumeric characters besides '.' and
     '-'. As a side effect, this rejects multiple Host headers.
 * if any response header contains control characters
 * if any response header has an empty name
 * that the Location response header (if present) has a valid scheme and is
   absolute

If we have a host name both from the URL and the Host header, we replace the
Host header with the value from the URL to enforce RFC conformance.

There is a log-only mode, but the loglevels of the logged messages need some
thought/work. Currently, the  checks for incoming data log for 'core' and the
checks for outgoing data log for 'http'. Maybe we need a way to configure the
loglevels separately from the core/http loglevels.


Modified:
    httpd/httpd/trunk/CHANGES
    httpd/httpd/trunk/include/ap_mmn.h
    httpd/httpd/trunk/include/http_core.h
    httpd/httpd/trunk/include/httpd.h
    httpd/httpd/trunk/modules/http/http_filters.c
    httpd/httpd/trunk/server/core.c
    httpd/httpd/trunk/server/protocol.c
    httpd/httpd/trunk/server/util.c
    httpd/httpd/trunk/server/vhost.c

Modified: httpd/httpd/trunk/CHANGES
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/CHANGES?rev=1426877&r1=1426876&r2=1426877&view=diff
==============================================================================
--- httpd/httpd/trunk/CHANGES [utf-8] (original)
+++ httpd/httpd/trunk/CHANGES [utf-8] Sun Dec 30 01:23:24 2012
@@ -1,6 +1,9 @@
                                                          -*- coding: utf-8 -*-
 Changes with Apache 2.5.0
 
+  *) core, http: Extend HttpProtocol with an option to enforce stricter HTTP
+     conformance or to only log the found problems. [Stefan Fritsch]
+
   *) core: Correctly parse an IPv6 literal host specification in an absolute
      URL in the request line. [Stefan Fritsch]
 

Modified: httpd/httpd/trunk/include/ap_mmn.h
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/include/ap_mmn.h?rev=1426877&r1=1426876&r2=1426877&view=diff
==============================================================================
--- httpd/httpd/trunk/include/ap_mmn.h (original)
+++ httpd/httpd/trunk/include/ap_mmn.h Sun Dec 30 01:23:24 2012
@@ -410,6 +410,8 @@
  * 20120724.9 (2.5.0-dev)  Add ap_bin2hex()
  * 20121222.0 (2.5.0-dev)  Remove min_http_version/max_http_version from
  *                         core_server_config again, add http09_enable
+ * 20121222.1 (2.5.0-dev)  Add http_conformance to core_server_config,
+ *                         add ap_has_cntrl()
  */
 
 #define MODULE_MAGIC_COOKIE 0x41503235UL /* "AP25" */
@@ -417,7 +419,7 @@
 #ifndef MODULE_MAGIC_NUMBER_MAJOR
 #define MODULE_MAGIC_NUMBER_MAJOR 20121222
 #endif
-#define MODULE_MAGIC_NUMBER_MINOR 0                   /* 0...n */
+#define MODULE_MAGIC_NUMBER_MINOR 1                   /* 0...n */
 
 /**
  * Determine if the server's current MODULE_MAGIC_NUMBER is at least a

Modified: httpd/httpd/trunk/include/http_core.h
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/include/http_core.h?rev=1426877&r1=1426876&r2=1426877&view=diff
==============================================================================
--- httpd/httpd/trunk/include/http_core.h (original)
+++ httpd/httpd/trunk/include/http_core.h Sun Dec 30 01:23:24 2012
@@ -671,6 +671,11 @@ typedef struct {
 #define AP_HTTP09_DISABLE 2
     char http09_enable;
 
+#define AP_HTTP_CONFORMANCE_UNSET     0
+#define AP_HTTP_CONFORMANCE_LIBERAL   1
+#define AP_HTTP_CONFORMANCE_STRICT    2
+#define AP_HTTP_CONFORMANCE_LOGONLY   4
+    char http_conformance;
 } core_server_config;
 
 /* for AddOutputFiltersByType in core.c */

Modified: httpd/httpd/trunk/include/httpd.h
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/include/httpd.h?rev=1426877&r1=1426876&r2=1426877&view=diff
==============================================================================
--- httpd/httpd/trunk/include/httpd.h (original)
+++ httpd/httpd/trunk/include/httpd.h Sun Dec 30 01:23:24 2012
@@ -2265,6 +2265,15 @@ AP_DECLARE(void) ap_get_loadavg(ap_loada
 AP_DECLARE(void) ap_bin2hex(const void *src, apr_size_t srclen, char *dest)
                  AP_FN_ATTR_NONNULL_ALL;
 
+/**
+ * Check if string contains a control character
+ * @param str the string to check
+ * @param srclen length of the data
+ * @return 1 if yes, 0 if no control characters
+ */
+AP_DECLARE(int) ap_has_cntrl(const char *str)
+                AP_FN_ATTR_NONNULL_ALL;
+
 #define AP_NORESTART APR_OS_START_USEERR + 1
 
 #ifdef __cplusplus

Modified: httpd/httpd/trunk/modules/http/http_filters.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/http/http_filters.c?rev=1426877&r1=1426876&r2=1426877&view=diff
==============================================================================
--- httpd/httpd/trunk/modules/http/http_filters.c (original)
+++ httpd/httpd/trunk/modules/http/http_filters.c Sun Dec 30 01:23:24 2012
@@ -639,14 +639,91 @@ static long get_chunk_size(char *b)
     return chunksize;
 }
 
+struct check_header_ctx {
+    request_rec *r;
+    int error;
+};
+
+/* check a single header, to be used with apr_table_do() */
+static int check_header(void *arg, const char *name, const char *val)
+{
+    struct check_header_ctx *ctx = arg;
+    if (name[0] == '\0') {
+        ctx->error = 1;
+        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->r, APLOGNO()
+                      "Empty response header name, aborting request");
+        return 0;
+    }
+    if (ap_has_cntrl(name)) {
+        ctx->error = 1;
+        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->r, APLOGNO()
+                      "Response header name '%s' contains control "
+                      "characters, aborting request",
+                      name);
+        return 0;
+    }
+    if (ap_has_cntrl(val)) {
+        ctx->error = 1;
+        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, ctx->r, APLOGNO()
+                      "Response header '%s' contains control characters, "
+                      "aborting request: %s",
+                      name, val);
+        return 0;
+    }
+    return 1;
+}
+
+/**
+ * Check headers for HTTP conformance
+ * @return 1 if ok, 0 if bad
+ */
+static APR_INLINE int check_headers(request_rec *r)
+{
+    const char *loc;
+    struct check_header_ctx ctx = { r, 0 };
+
+    apr_table_do(check_header, &ctx, r->headers_out, NULL);
+    if (ctx.error)
+        return 0; /* problem has been logged by check_header() */
+
+    if ((loc = apr_table_get(r->headers_out, "Location")) != NULL) {
+        const char *scheme_end = ap_strchr_c(loc, ':');
+        const char *s = loc;
+
+        /*
+         * Check that the URI has a valid scheme and is absolute
+         * XXX Should we do a full uri parse here?
+         */
+        if (scheme_end == NULL || scheme_end == loc)
+            goto bad;
+
+        do {
+            if ((!apr_isalnum(*s) && *s != '.' && *s != '+' && *s != '-')
+                || !apr_isascii(*s) ) {
+                goto bad;
+            }
+        } while (++s < scheme_end);
+
+        if (scheme_end[1] != '/' || scheme_end[2] != '/')
+            goto bad;
+    }
+
+    return 1;
+
+bad:
+    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO()
+                  "Bad Location header in response: '%s', aborting request",
+                  loc);
+    return 0;
+}
+
 typedef struct header_struct {
     apr_pool_t *pool;
     apr_bucket_brigade *bb;
 } header_struct;
 
 /* Send a single HTTP header field to the client.  Note that this function
- * is used in calls to table_do(), so their interfaces are co-dependent.
- * In other words, don't change this one without checking table_do in alloc.c.
+ * is used in calls to apr_table_do(), so don't change its interface.
  * It returns true unless there was a write error of some kind.
  */
 static int form_header_field(header_struct *h,
@@ -1132,6 +1209,7 @@ AP_CORE_DECLARE_NONSTD(apr_status_t) ap_
     header_filter_ctx *ctx = f->ctx;
     const char *ctype;
     ap_bucket_error *eb = NULL;
+    core_server_config *conf;
 
     AP_DEBUG_ASSERT(!r->main);
 
@@ -1187,6 +1265,15 @@ AP_CORE_DECLARE_NONSTD(apr_status_t) ap_
                                            r->headers_out);
     }
 
+    conf = ap_get_core_module_config(r->server->module_config);
+    if (conf->http_conformance & AP_HTTP_CONFORMANCE_STRICT) {
+        int ok = check_headers(r);
+        if (!ok && !(conf->http_conformance & AP_HTTP_CONFORMANCE_LOGONLY)) {
+            ap_die(HTTP_INTERNAL_SERVER_ERROR, r);
+            return AP_FILTER_ERROR;
+        }
+    }
+
     /*
      * Remove the 'Vary' header field if the client can't handle it.
      * Since this will have nasty effects on HTTP/1.1 caches, force

Modified: httpd/httpd/trunk/server/core.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/core.c?rev=1426877&r1=1426876&r2=1426877&view=diff
==============================================================================
--- httpd/httpd/trunk/server/core.c (original)
+++ httpd/httpd/trunk/server/core.c Sun Dec 30 01:23:24 2012
@@ -510,6 +510,9 @@ static void *merge_core_server_configs(a
     if (virt->http09_enable != AP_HTTP09_UNSET)
         conf->http09_enable = virt->http09_enable;
 
+    if (virt->http_conformance != AP_HTTP_CONFORMANCE_UNSET)
+        conf->http_conformance = virt->http_conformance;
+
     /* no action for virt->accf_map, not allowed per-vhost */
 
     if (virt->protocol)
@@ -3712,10 +3715,27 @@ static const char *set_http_protocol(cmd
         else if (strcmp(arg, "1.0") == 0)
             conf->http09_enable = AP_HTTP09_DISABLE;
         else
-            return "HttpProtocol min must be one of '0.9' and '1.0'";
-	return NULL;
+            return "HttpProtocol 'min' must be one of '0.9' and '1.0'";
+        return NULL;
+    }
+
+    if (strcmp(arg, "strict") == 0)
+        conf->http_conformance = AP_HTTP_CONFORMANCE_STRICT;
+    else if (strcmp(arg, "strict,log-only") == 0)
+        conf->http_conformance = AP_HTTP_CONFORMANCE_STRICT|
+                                 AP_HTTP_CONFORMANCE_LOGONLY;
+    else if (strcmp(arg, "liberal") == 0)
+        conf->http_conformance = AP_HTTP_CONFORMANCE_LIBERAL;
+    else
+        return "HttpProtocol accepts 'min=0.9', 'min=1.0', 'liberal', "
+               "'strict', 'strict,log-only'";
+
+    if ((conf->http_conformance & AP_HTTP_CONFORMANCE_STRICT) &&
+        (conf->http_conformance & AP_HTTP_CONFORMANCE_LIBERAL)) {
+        return "HttpProtocol 'strict' and 'liberal' are mutually exclusive";
     }
-    return "HttpProtocol must be min=0.9|1.0";
+
+    return NULL;
 }
 
 static const char *set_http_method(cmd_parms *cmd, void *conf, const char *arg)
@@ -4230,8 +4250,9 @@ AP_INIT_TAKE1("EnableExceptionHook", ap_
 #endif
 AP_INIT_TAKE1("TraceEnable", set_trace_enable, NULL, RSRC_CONF,
               "'on' (default), 'off' or 'extended' to trace request body content"),
-AP_INIT_TAKE1("HttpProtocol", set_http_protocol, NULL, RSRC_CONF,
-              "'min=0.9' (default) or 'min=1.0' to allow/deny HTTP/0.9"),
+AP_INIT_ITERATE("HttpProtocol", set_http_protocol, NULL, RSRC_CONF,
+              "'min=0.9' (default) or 'min=1.0' to allow/deny HTTP/0.9; "
+              "'liberal', 'strict', 'strict,log-only'"),
 AP_INIT_ITERATE("RegisterHttpMethod", set_http_method, NULL, RSRC_CONF,
                 "Registers non-standard HTTP methods"),
 { NULL }

Modified: httpd/httpd/trunk/server/protocol.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/protocol.c?rev=1426877&r1=1426876&r2=1426877&view=diff
==============================================================================
--- httpd/httpd/trunk/server/protocol.c (original)
+++ httpd/httpd/trunk/server/protocol.c Sun Dec 30 01:23:24 2012
@@ -566,6 +566,9 @@ static int read_request_line(request_rec
     apr_size_t len;
     int num_blank_lines = 0;
     int max_blank_lines = r->server->limit_req_fields;
+    core_server_config *conf = ap_get_core_module_config(r->server->module_config);
+    int strict = conf->http_conformance & AP_HTTP_CONFORMANCE_STRICT;
+    int enforce_strict = !(conf->http_conformance & AP_HTTP_CONFORMANCE_LOGONLY);
 
     if (max_blank_lines <= 0) {
         max_blank_lines = DEFAULT_LIMIT_REQUEST_FIELDS;
@@ -644,8 +647,6 @@ static int read_request_line(request_rec
         pro = ll;
         len = strlen(ll);
     } else {
-        core_server_config *conf;
-        conf = ap_get_core_module_config(r->server->module_config);
         r->assbackwards = 1;
         pro = "HTTP/0.9";
         len = 8;
@@ -670,12 +671,82 @@ static int read_request_line(request_rec
         && apr_isdigit(pro[7])) {
         r->proto_num = HTTP_VERSION(pro[5] - '0', pro[7] - '0');
     }
-    else if (3 == sscanf(r->protocol, "%4s/%u.%u", http, &major, &minor)
-             && (strcasecmp("http", http) == 0)
-             && (minor < HTTP_VERSION(1, 0)) ) /* don't allow HTTP/0.1000 */
-        r->proto_num = HTTP_VERSION(major, minor);
-    else
-        r->proto_num = HTTP_VERSION(1, 0);
+    else {
+        int nmatch, pos;
+        nmatch = sscanf(r->protocol, "%4s/%u.%u%n", http, &major, &minor, &pos);
+        if (strict) {
+            /*
+             * According to the GNU sscanf man page, there are implementations
+             * that increment the number of matches for %n. Therefore we check
+             * nmatch with "<" instead of "!=".
+             */
+            if (nmatch < 3 || (strcmp("HTTP", http) != 0)
+                || (minor >= HTTP_VERSION(1, 0)) ) { /* don't allow HTTP/0.1000 */
+                ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
+                              "Invalid protocol: %s", r->protocol);
+                if (enforce_strict) {
+                    r->status = HTTP_NOT_IMPLEMENTED;
+                    return 0;
+                }
+            }
+            if (r->protocol[pos] != '\0') {
+                while (r->protocol[pos] == ' ')
+                    pos++;
+                if (r->protocol[pos] != '\0') {
+                    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
+                                  "Garbage after request line: ... %s",
+                                  r->protocol);
+                    if (enforce_strict) {
+                        r->status = HTTP_BAD_REQUEST;
+                        return 0;
+                    }
+                }
+            }
+            r->proto_num = HTTP_VERSION(major, minor);
+        }
+        else if (nmatch >= 3 && (strcasecmp("http", http) == 0)
+                 && (minor < HTTP_VERSION(1, 0)) ) { /* don't allow HTTP/0.1000 */
+            r->proto_num = HTTP_VERSION(major, minor);
+        }
+        else {
+            r->proto_num = HTTP_VERSION(1, 0);
+        }
+    }
+
+    if (strict) {
+        int err = 0;
+        if (ap_has_cntrl(r->the_request)) {
+            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
+                          "Request line must not contain control characters");
+            err = HTTP_BAD_REQUEST;
+        }
+        if (r->parsed_uri.fragment) {
+            /* RFC3986 3.5: no fragment */
+            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
+                          "URI must not contain a fragment");
+            err = HTTP_BAD_REQUEST;
+        }
+        else if (r->parsed_uri.user || r->parsed_uri.password) {
+            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
+                          "URI must not contain a username/password");
+            err = HTTP_BAD_REQUEST;
+        }
+        else if (r->method_number == M_INVALID) {
+            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
+                          "Invalid HTTP method string: %s", r->method);
+            err = HTTP_NOT_IMPLEMENTED;
+        }
+        else if (r->assbackwards == 0 && r->proto_num < HTTP_VERSION(1, 0)) {
+            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
+                          "HTTP/0.x does not take a protocol");
+            err = HTTP_BAD_REQUEST;
+        }
+
+        if (err && enforce_strict) {
+            r->status = err;
+            return 0;
+        }
+    }
 
     return 1;
 }
@@ -719,6 +790,7 @@ AP_DECLARE(void) ap_get_mime_headers_cor
     apr_size_t len;
     int fields_read = 0;
     char *tmp_field;
+    core_server_config *conf = ap_get_core_module_config(r->server->module_config);
 
     /*
      * Read header lines until we get the empty separator line, a read error,
@@ -867,6 +939,33 @@ AP_DECLARE(void) ap_get_mime_headers_cor
                     *tmp_field-- = '\0';
                 }
 
+                if (conf->http_conformance & AP_HTTP_CONFORMANCE_STRICT) {
+                    int err = 0;
+
+                    if (*last_field == '\0') {
+                        err = HTTP_BAD_REQUEST;
+                        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO()
+                                      "Empty request header field name not allowed");
+                    }
+                    else if (ap_has_cntrl(last_field)) {
+                        err = HTTP_BAD_REQUEST;
+                        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO()
+                                      "[HTTP strict] Request header field name contains "
+                                      "control character: %.*s",
+                                      (int)LOG_NAME_MAX_LEN, last_field);
+                    }
+                    else if (ap_has_cntrl(value)) {
+                        err = HTTP_BAD_REQUEST;
+                        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO()
+                                      "Request header field '%.*s' contains"
+                                      "control character", (int)LOG_NAME_MAX_LEN,
+                                      last_field);
+                    }
+                    if (err && !(conf->http_conformance & AP_HTTP_CONFORMANCE_LOGONLY)) {
+                        r->status = err;
+                        return;
+                    }
+                }
                 apr_table_addn(r->headers_in, last_field, value);
 
                 /* reset the alloc_len so that we'll allocate a new
@@ -916,7 +1015,7 @@ request_rec *ap_read_request(conn_rec *c
     request_rec *r;
     apr_pool_t *p;
     const char *expect;
-    int access_status = HTTP_OK;
+    int access_status;
     apr_bucket_brigade *tmp_bb;
     apr_socket_t *csd;
     apr_interval_time_t cur_timeout;
@@ -973,9 +1072,11 @@ request_rec *ap_read_request(conn_rec *c
 
     /* Get the request... */
     if (!read_request_line(r, tmp_bb)) {
-        if (r->status == HTTP_REQUEST_URI_TOO_LARGE
-            || r->status == HTTP_BAD_REQUEST
-            || r->status == HTTP_VERSION_NOT_SUPPORTED) {
+        switch (r->status) {
+        case HTTP_REQUEST_URI_TOO_LARGE:
+        case HTTP_BAD_REQUEST:
+        case HTTP_VERSION_NOT_SUPPORTED:
+        case HTTP_NOT_IMPLEMENTED:
             if (r->status == HTTP_REQUEST_URI_TOO_LARGE) {
                 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(00565)
                               "request failed: URI too long (longer than %d)",
@@ -990,19 +1091,17 @@ request_rec *ap_read_request(conn_rec *c
             ap_run_log_transaction(r);
             apr_brigade_destroy(tmp_bb);
             goto traceout;
-        }
-        else if (r->status == HTTP_REQUEST_TIME_OUT) {
+        case HTTP_REQUEST_TIME_OUT:
             ap_update_child_status(conn->sbh, SERVER_BUSY_LOG, r);
-            if (!r->connection->keepalives) {
+            if (!r->connection->keepalives)
                 ap_run_log_transaction(r);
-            }
             apr_brigade_destroy(tmp_bb);
             goto traceout;
+        default:
+            apr_brigade_destroy(tmp_bb);
+            r = NULL;
+            goto traceout;
         }
-
-        apr_brigade_destroy(tmp_bb);
-        r = NULL;
-        goto traceout;
     }
 
     /* We may have been in keep_alive_timeout mode, so toggle back
@@ -1063,6 +1162,7 @@ request_rec *ap_read_request(conn_rec *c
      * now read. may update status.
      */
     ap_update_vhost_from_headers(r);
+    access_status = r->status;
 
     /* Toggle to the Host:-based vhost's timeout mode to fetch the
      * request body and send the response body, if needed.

Modified: httpd/httpd/trunk/server/util.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/util.c?rev=1426877&r1=1426876&r2=1426877&view=diff
==============================================================================
--- httpd/httpd/trunk/server/util.c (original)
+++ httpd/httpd/trunk/server/util.c Sun Dec 30 01:23:24 2012
@@ -1970,6 +1970,16 @@ AP_DECLARE(void) ap_bin2hex(const void *
     *dest = '\0';
 }
 
+AP_DECLARE(int) ap_has_cntrl(const char *str)
+{
+    while (*str) {
+        if (apr_iscntrl(*str))
+            return 1;
+        str++;
+    }
+    return 0;
+}
+
 AP_DECLARE(int) ap_is_directory(apr_pool_t *p, const char *path)
 {
     apr_finfo_t finfo;

Modified: httpd/httpd/trunk/server/vhost.c
URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/vhost.c?rev=1426877&r1=1426876&r2=1426877&view=diff
==============================================================================
--- httpd/httpd/trunk/server/vhost.c (original)
+++ httpd/httpd/trunk/server/vhost.c Sun Dec 30 01:23:24 2012
@@ -735,6 +735,59 @@ static apr_status_t fix_hostname_non_v6(
     return APR_SUCCESS;
 }
 
+/*
+ * If strict mode ever becomes the default, this should be folded into
+ * fix_hostname_non_v6()
+ */
+static apr_status_t strict_hostname_check(request_rec *r, char *host,
+                                          int logonly)
+{
+    char *ch;
+    int is_dotted_decimal = 1, leading_zeroes = 0, dots = 0;
+
+    for (ch = host; *ch; ch++) {
+        if (!apr_isascii(*ch)) {
+            goto bad;
+        }
+        else if (apr_isalpha(*ch) || *ch == '-') {
+            is_dotted_decimal = 0;
+        }
+        else if (ch[0] == '.') {
+            dots++;
+            if (ch[1] == '0' && apr_isdigit(ch[2]))
+                leading_zeroes = 1;
+        }
+        else if (!apr_isdigit(*ch)) {
+           /* also takes care of multiple Host headers by denying commas */
+            goto bad;
+        }
+    }
+    if (is_dotted_decimal) {
+        if (host[0] == '.' || (host[0] == '0' && apr_isdigit(host[1])))
+            leading_zeroes = 1;
+        if (leading_zeroes || dots != 3) {
+            /* RFC 3986 7.4 */
+            goto bad;
+        }
+    }
+    else {
+        /* The top-level domain must start with a letter (RFC 1123 2.1) */
+        while (ch > host && *ch != '.')
+            ch--;
+        if (ch[0] == '.' && ch[1] != '\0' && !apr_isalpha(ch[1]))
+            goto bad;
+    }
+    return APR_SUCCESS;
+
+bad:
+    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
+                  "[strict] Invalid host name '%s'%s%.6s",
+                  host, *ch ? ", problem near: " : "", ch);
+    if (logonly)
+        return APR_SUCCESS;
+    return APR_EINVAL;
+}
+
 /* Lowercase and remove any trailing dot and/or :port from the hostname,
  * and check that it is sane.
  *
@@ -748,13 +801,17 @@ static apr_status_t fix_hostname_non_v6(
  * Instead we just check for filesystem metacharacters: directory
  * separators / and \ and sequences of more than one dot.
  */
-static void fix_hostname(request_rec *r, const char *host_header)
+static int fix_hostname(request_rec *r, const char *host_header,
+                        unsigned http_conformance)
 {
     const char *src;
     char *host, *scope_id;
     apr_port_t port;
     apr_status_t rv;
     const char *c;
+    int is_v6literal = 0;
+    int strict = http_conformance & AP_HTTP_CONFORMANCE_STRICT;
+    int strict_logonly = http_conformance & AP_HTTP_CONFORMANCE_LOGONLY;
 
     src = host_header ? host_header : r->hostname;
 
@@ -763,7 +820,7 @@ static void fix_hostname(request_rec *r,
      * XXX name'. Can this happen?
      */
     if (!*src) {
-        return;
+        return is_v6literal;
     }
 
     /* apr_parse_addr_port will interpret a bare integer as a port
@@ -772,8 +829,16 @@ static void fix_hostname(request_rec *r,
     for (c = src; apr_isdigit(*c); ++c);
     if (!*c) {
         /* pure integer */
+        if (strict) {
+            /* RFC 3986 7.4 */
+            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
+                         "[strict] purely numeric host names not allowed: %s",
+                         src);
+            if (!strict_logonly)
+                goto bad_nolog;
+        }
         r->hostname = src;
-        return;
+        return is_v6literal;
     }
 
     if (host_header) {
@@ -790,9 +855,7 @@ static void fix_hostname(request_rec *r,
             r->parsed_uri.port_str = apr_itoa(r->pool, (int)port);
         }
         if (host_header[0] == '[')
-            rv = fix_hostname_v6_literal(r, host);
-        else
-            rv = fix_hostname_non_v6(r, host);
+            is_v6literal = 1;
     }
     else {
         /*
@@ -801,24 +864,32 @@ static void fix_hostname(request_rec *r,
          */
         host = apr_pstrdup(r->pool, r->hostname);
         if (ap_strchr(host, ':') != NULL)
-            rv = fix_hostname_v6_literal(r, host);
-        else
-            rv = fix_hostname_non_v6(r, host);
+            is_v6literal = 1;
+    }
+
+    if (is_v6literal) {
+        rv = fix_hostname_v6_literal(r, host);
+    }
+    else {
+        rv = fix_hostname_non_v6(r, host);
+        if (strict && rv == APR_SUCCESS)
+            rv = strict_hostname_check(r, host, strict_logonly);
     }
     if (rv != APR_SUCCESS)
         goto bad;
+
     r->hostname = host;
-    return;
+    return is_v6literal;
 
 bad:
-    r->status = HTTP_BAD_REQUEST;
     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00550)
                   "Client sent malformed Host header: %s",
-                  r->hostname);
-    return;
+                  src);
+bad_nolog:
+    r->status = HTTP_BAD_REQUEST;
+    return is_v6literal;
 }
 
-
 /* return 1 if host matches ServerName or ServerAliases */
 static int matches_aliases(server_rec *s, const char *host)
 {
@@ -1028,23 +1099,79 @@ static void check_serverpath(request_rec
     }
 }
 
+static APR_INLINE const char *construct_host_header(request_rec *r,
+                                                    int is_v6literal)
+{
+    struct iovec iov[5];
+    apr_size_t nvec = 0;
+    /*
+     * We cannot use ap_get_server_name/port here, because we must
+     * ignore UseCanonicalName/Port.
+     */
+    if (is_v6literal) {
+        iov[nvec].iov_base = "[";
+        iov[nvec].iov_len = 1;
+        nvec++;
+    }
+    iov[nvec].iov_base = (void *)r->hostname;
+    iov[nvec].iov_len = strlen(r->hostname);
+    nvec++;
+    if (is_v6literal) {
+        iov[nvec].iov_base = "]";
+        iov[nvec].iov_len = 1;
+        nvec++;
+    }
+    if (r->parsed_uri.port_str) {
+        iov[nvec].iov_base = ":";
+        iov[nvec].iov_len = 1;
+        nvec++;
+        iov[nvec].iov_base = r->parsed_uri.port_str;
+        iov[nvec].iov_len = strlen(r->parsed_uri.port_str);
+        nvec++;
+    }
+    return apr_pstrcatv(r->pool, iov, nvec, NULL);
+}
 
 AP_DECLARE(void) ap_update_vhost_from_headers(request_rec *r)
 {
-    const char *host_header;
+    core_server_config *conf = ap_get_core_module_config(r->server->module_config);
+    const char *host_header = apr_table_get(r->headers_in, "Host");
+    int is_v6literal, have_hostname_from_url = 0;
 
     if (r->hostname) {
         /*
          * If there was a host part in the Request-URI, ignore the 'Host'
          * header.
          */
-        fix_hostname(r, NULL);
+        have_hostname_from_url = 1;
+        is_v6literal = fix_hostname(r, NULL, conf->http_conformance);
     }
-    else if ((host_header = apr_table_get(r->headers_in, "Host")) != NULL ) {
-        fix_hostname(r, host_header);
+    else if (host_header != NULL) {
+        is_v6literal = fix_hostname(r, host_header, conf->http_conformance);
     }
     if (r->status != HTTP_OK)
         return;
+
+    if (conf->http_conformance & AP_HTTP_CONFORMANCE_STRICT) {
+        /*
+         * If we have both hostname from an absoluteURI and a Host header,
+         * we must ignore the Host header (RFC 2616 5.2).
+         * To enforce this, we reset the Host header to the value from the
+         * request line.
+         */
+        if (have_hostname_from_url && host_header != NULL) {
+            const char *info = "Would replace";
+            const char *new = construct_host_header(r, is_v6literal);
+            if (!(conf->http_conformance & AP_HTTP_CONFORMANCE_LOGONLY)) {
+                apr_table_set(r->headers_in, "Host", r->hostname);
+                info = "Replacing";
+            }
+            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
+                          "%s Host header '%s' with host from request uri: "
+                          "'%s'", info, host_header, new);
+        }
+    }
+
     /* check if we tucked away a name_chain */
     if (r->connection->vhost_lookup_data) {
         if (r->hostname)



Re: svn commit: r1426877 - in /httpd/httpd/trunk: CHANGES include/ap_mmn.h include/http_core.h include/httpd.h modules/http/http_filters.c server/core.c server/protocol.c server/util.c server/vhost.c

Posted by Stefan Fritsch <sf...@sfritsch.de>.
Hi Roy,

On Sunday 30 December 2012, Roy T. Fielding wrote:
> Thanks for this work, but I don't consider HTTP conformance to be
> an option.  These are checks we should be making while parsing
> the received message, not as a separate pass, and in many cases
> they are required to result in a 400, 500, or 502 response.

Some of the new checks are bound to cause problems with existing sites 
and/or clients. I think we need to offer some soft migration path. 
Like

1) make the checks an option, defaulting to off
2) change the default to on
3) make the checks mandatory

I guess the time between the steps depends on the feedback we get from 
users. I expect one year may be reasonable. Of course, we may choose 
to make some checks mandatory earlier, if they are very unlikely to 
break things.

For some parts, the new checks are done during the normal parsing. But 
in order to allow them to be switched off, some separation was 
necessary.

> I am trying to get HTTPbis ready for last call this week.
> After that, I will be looking into making the changes in httpd,
> and I won't be using a configurable option.  I suggest we just
> remove that part and iterate on these checks as we go.
>
> The current HTTP/1.1 drafts are at
> 
>  
> http://svn.tools.ietf.org/svn/wg/httpbis/draft-ietf-httpbis/latest
> /
> 
> and the message parsing requirements are in p1-messaging.html

Thanks, I hadn't looked at this previously, but it seems to be mostly 
in line to what I have implemented. HTTPbis mandates some more 
restrictions that should be checked, too.
 
> BTW, the protocol version is now restricted to uppercase and
> one digit per major and minor, so we can simplify that check
> to a very specific "HTTP/[0-9].[0-9]".
> 
> And, yes, it is possible to have a valid empty Host because
> HTTP can be used with a proxy for any URI (including URNs).

OK, I will change the protocol parsing and remove the comment about 
empty Host headers.

> ....Roy
> 
> On Dec 29, 2012, at 5:23 PM, sf@apache.org wrote:
> > Author: sf
> > Date: Sun Dec 30 01:23:24 2012
> > New Revision: 1426877
> > 
> > URL: http://svn.apache.org/viewvc?rev=1426877&view=rev
> > Log:
> > Add an option to enforce stricter HTTP conformance
> > 
> > This is a first stab, the checks will likely have to be revised.

Re: svn commit: r1426877 - in /httpd/httpd/trunk: CHANGES include/ap_mmn.h include/http_core.h include/httpd.h modules/http/http_filters.c server/core.c server/protocol.c server/util.c server/vhost.c

Posted by "Roy T. Fielding" <fi...@gbiv.com>.
Hi Stefan,

Thanks for this work, but I don't consider HTTP conformance to be
an option.  These are checks we should be making while parsing
the received message, not as a separate pass, and in many cases
they are required to result in a 400, 500, or 502 response.

I am trying to get HTTPbis ready for last call this week.
After that, I will be looking into making the changes in httpd,
and I won't be using a configurable option.  I suggest we just
remove that part and iterate on these checks as we go.

The current HTTP/1.1 drafts are at

  http://svn.tools.ietf.org/svn/wg/httpbis/draft-ietf-httpbis/latest/

and the message parsing requirements are in p1-messaging.html

BTW, the protocol version is now restricted to uppercase and
one digit per major and minor, so we can simplify that check
to a very specific "HTTP/[0-9].[0-9]".

And, yes, it is possible to have a valid empty Host because
HTTP can be used with a proxy for any URI (including URNs).

....Roy

On Dec 29, 2012, at 5:23 PM, sf@apache.org wrote:

> Author: sf
> Date: Sun Dec 30 01:23:24 2012
> New Revision: 1426877
> 
> URL: http://svn.apache.org/viewvc?rev=1426877&view=rev
> Log:
> Add an option to enforce stricter HTTP conformance
> 
> This is a first stab, the checks will likely have to be revised.
> For now, we check
> 
> * if the request line contains control characters
> * if the request uri has fragment or username/password
> * that the request method is standard or registered with RegisterHttpMethod
> * that the request protocol is of the form HTTP/[1-9]+.[0-9]+,
>   or missing for 0.9
> * if there is garbage in the request line after the protocol
> * if any request header contains control characters
> * if any request header has an empty name
> * for the host name in the URL or Host header:
>   - if an IPv4 dotted decimal address: Reject octal or hex values, require
>     exactly four parts
>   - if a DNS host name: Reject non-alphanumeric characters besides '.' and
>     '-'. As a side effect, this rejects multiple Host headers.
> * if any response header contains control characters
> * if any response header has an empty name
> * that the Location response header (if present) has a valid scheme and is
>   absolute
> 
> If we have a host name both from the URL and the Host header, we replace the
> Host header with the value from the URL to enforce RFC conformance.
> 
> There is a log-only mode, but the loglevels of the logged messages need some
> thought/work. Currently, the  checks for incoming data log for 'core' and the
> checks for outgoing data log for 'http'. Maybe we need a way to configure the
> loglevels separately from the core/http loglevels.



Re: svn commit: r1426877 - in /httpd/httpd/trunk: CHANGES include/ap_mmn.h include/http_core.h include/httpd.h modules/http/http_filters.c server/core.c server/protocol.c server/util.c server/vhost.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
This has been the object of some debate, read Lisa's errata rejection of ID
1081 and 1353...

https://www.rfc-editor.org/errata/rfc1123



On Sep 16, 2017 10:00, "Eric Covener" <co...@gmail.com> wrote:

On Sat, Sep 16, 2017 at 9:48 AM, Yann Ylavic <yl...@gmail.com> wrote:
> On Sat, Sep 16, 2017 at 3:37 AM, Eric Covener <co...@gmail.com> wrote:
>> On Sat, Dec 29, 2012 at 8:23 PM,  <sf...@apache.org> wrote:
>>>
>>> +/*
>>> + * If strict mode ever becomes the default, this should be folded into
>>> + * fix_hostname_non_v6()
>>> + */
>>> +static apr_status_t strict_hostname_check(request_rec *r, char *host,
>>> +                                          int logonly)
>>> +{
>>> +    char *ch;
>>> +    int is_dotted_decimal = 1, leading_zeroes = 0, dots = 0;
>>> +
>>> +    for (ch = host; *ch; ch++) {
>>> +        if (!apr_isascii(*ch)) {
>>> +            goto bad;
>>> +        }
>>> +        else if (apr_isalpha(*ch) || *ch == '-') {
>>> +            is_dotted_decimal = 0;
>>> +        }
>>> +        else if (ch[0] == '.') {
>>> +            dots++;
>>> +            if (ch[1] == '0' && apr_isdigit(ch[2]))
>>> +                leading_zeroes = 1;
>>> +        }
>>> +        else if (!apr_isdigit(*ch)) {
>>> +           /* also takes care of multiple Host headers by denying
commas */
>>> +            goto bad;
>>> +        }
>>> +    }
>>> +    if (is_dotted_decimal) {
>>> +        if (host[0] == '.' || (host[0] == '0' && apr_isdigit(host[1])))
>>> +            leading_zeroes = 1;
>>> +        if (leading_zeroes || dots != 3) {
>>> +            /* RFC 3986 7.4 */
>>> +            goto bad;
>>> +        }
>>> +    }
>>> +    else {
>>> +        /* The top-level domain must start with a letter (RFC 1123
2.1) */
>>> +        while (ch > host && *ch != '.')
>>> +            ch--;
>>> +        if (ch[0] == '.' && ch[1] != '\0' && !apr_isalpha(ch[1]))
>>> +            goto bad;
>>> +    }
>>> +    return APR_SUCCESS;
>>> +
>>> +bad:
>>> +    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
>>> +                  "[strict] Invalid host name '%s'%s%.6s",
>>> +                  host, *ch ? ", problem near: " : "", ch);
>>> +    if (logonly)
>>> +        return APR_SUCCESS;
>>> +    return APR_EINVAL;
>>> +}
>>
>> (sorry for the necromancy of this very old commit)
>>
>> Re: the 1123 2.1 reference a dozen lines from the end of the function:
>> RFC 1123 2.1 seems to say the opposite. Just a bug or something over
>> my head?
>>
>>   2.1  Host Names and Numbers
>>
>>       The syntax of a legal Internet host name was specified in RFC-952
>>       [DNS:4].  One aspect of host name syntax is hereby changed: the
>>       restriction on the first character is relaxed to allow either a
>>       letter or a digit.  Host software MUST support this more liberal
>>       syntax.
>
> RFC 1123 2.1 seems to be about the first character of the host, while
> the code checks the first one of the TLD. Are there TLDs starting with
> a digit?

I see, thanks.  The basis in 1123 is a bit later in 2.1 but doesn't
really seem normative:

If a dotted-decimal number can be entered without such
           identifying delimiters, then a full syntactic check must be
           made, because a segment of a host domain name is now allowed
           to begin with a digit and could legally be entirely numeric
           (see Section 6.1.2.4).  However, a valid host name can never
           have the dotted-decimal form #.#.#.#, since at least the
           highest-level component label will be alphabetic.

The 6.1.2.4 reference is likely an error because that is about compression.

It seems like we'd reject "1foo" but accept "1foo.com", but i am not
sure if this warrants an exception or reconsidering the check.

(In the case that had me looking, a high TCP port was used as the
hostname AND port in the Host header so it is clearly someone elses
bug at the core)

--
Eric Covener
covener@gmail.com

Re: svn commit: r1426877 - in /httpd/httpd/trunk: CHANGES include/ap_mmn.h include/http_core.h include/httpd.h modules/http/http_filters.c server/core.c server/protocol.c server/util.c server/vhost.c

Posted by Eric Covener <co...@gmail.com>.
On Sat, Sep 16, 2017 at 9:48 AM, Yann Ylavic <yl...@gmail.com> wrote:
> On Sat, Sep 16, 2017 at 3:37 AM, Eric Covener <co...@gmail.com> wrote:
>> On Sat, Dec 29, 2012 at 8:23 PM,  <sf...@apache.org> wrote:
>>>
>>> +/*
>>> + * If strict mode ever becomes the default, this should be folded into
>>> + * fix_hostname_non_v6()
>>> + */
>>> +static apr_status_t strict_hostname_check(request_rec *r, char *host,
>>> +                                          int logonly)
>>> +{
>>> +    char *ch;
>>> +    int is_dotted_decimal = 1, leading_zeroes = 0, dots = 0;
>>> +
>>> +    for (ch = host; *ch; ch++) {
>>> +        if (!apr_isascii(*ch)) {
>>> +            goto bad;
>>> +        }
>>> +        else if (apr_isalpha(*ch) || *ch == '-') {
>>> +            is_dotted_decimal = 0;
>>> +        }
>>> +        else if (ch[0] == '.') {
>>> +            dots++;
>>> +            if (ch[1] == '0' && apr_isdigit(ch[2]))
>>> +                leading_zeroes = 1;
>>> +        }
>>> +        else if (!apr_isdigit(*ch)) {
>>> +           /* also takes care of multiple Host headers by denying commas */
>>> +            goto bad;
>>> +        }
>>> +    }
>>> +    if (is_dotted_decimal) {
>>> +        if (host[0] == '.' || (host[0] == '0' && apr_isdigit(host[1])))
>>> +            leading_zeroes = 1;
>>> +        if (leading_zeroes || dots != 3) {
>>> +            /* RFC 3986 7.4 */
>>> +            goto bad;
>>> +        }
>>> +    }
>>> +    else {
>>> +        /* The top-level domain must start with a letter (RFC 1123 2.1) */
>>> +        while (ch > host && *ch != '.')
>>> +            ch--;
>>> +        if (ch[0] == '.' && ch[1] != '\0' && !apr_isalpha(ch[1]))
>>> +            goto bad;
>>> +    }
>>> +    return APR_SUCCESS;
>>> +
>>> +bad:
>>> +    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
>>> +                  "[strict] Invalid host name '%s'%s%.6s",
>>> +                  host, *ch ? ", problem near: " : "", ch);
>>> +    if (logonly)
>>> +        return APR_SUCCESS;
>>> +    return APR_EINVAL;
>>> +}
>>
>> (sorry for the necromancy of this very old commit)
>>
>> Re: the 1123 2.1 reference a dozen lines from the end of the function:
>> RFC 1123 2.1 seems to say the opposite. Just a bug or something over
>> my head?
>>
>>   2.1  Host Names and Numbers
>>
>>       The syntax of a legal Internet host name was specified in RFC-952
>>       [DNS:4].  One aspect of host name syntax is hereby changed: the
>>       restriction on the first character is relaxed to allow either a
>>       letter or a digit.  Host software MUST support this more liberal
>>       syntax.
>
> RFC 1123 2.1 seems to be about the first character of the host, while
> the code checks the first one of the TLD. Are there TLDs starting with
> a digit?

I see, thanks.  The basis in 1123 is a bit later in 2.1 but doesn't
really seem normative:

If a dotted-decimal number can be entered without such
           identifying delimiters, then a full syntactic check must be
           made, because a segment of a host domain name is now allowed
           to begin with a digit and could legally be entirely numeric
           (see Section 6.1.2.4).  However, a valid host name can never
           have the dotted-decimal form #.#.#.#, since at least the
           highest-level component label will be alphabetic.

The 6.1.2.4 reference is likely an error because that is about compression.

It seems like we'd reject "1foo" but accept "1foo.com", but i am not
sure if this warrants an exception or reconsidering the check.

(In the case that had me looking, a high TCP port was used as the
hostname AND port in the Host header so it is clearly someone elses
bug at the core)

-- 
Eric Covener
covener@gmail.com

Re: svn commit: r1426877 - in /httpd/httpd/trunk: CHANGES include/ap_mmn.h include/http_core.h include/httpd.h modules/http/http_filters.c server/core.c server/protocol.c server/util.c server/vhost.c

Posted by Yann Ylavic <yl...@gmail.com>.
On Sat, Sep 16, 2017 at 3:37 AM, Eric Covener <co...@gmail.com> wrote:
> On Sat, Dec 29, 2012 at 8:23 PM,  <sf...@apache.org> wrote:
>>
>> +/*
>> + * If strict mode ever becomes the default, this should be folded into
>> + * fix_hostname_non_v6()
>> + */
>> +static apr_status_t strict_hostname_check(request_rec *r, char *host,
>> +                                          int logonly)
>> +{
>> +    char *ch;
>> +    int is_dotted_decimal = 1, leading_zeroes = 0, dots = 0;
>> +
>> +    for (ch = host; *ch; ch++) {
>> +        if (!apr_isascii(*ch)) {
>> +            goto bad;
>> +        }
>> +        else if (apr_isalpha(*ch) || *ch == '-') {
>> +            is_dotted_decimal = 0;
>> +        }
>> +        else if (ch[0] == '.') {
>> +            dots++;
>> +            if (ch[1] == '0' && apr_isdigit(ch[2]))
>> +                leading_zeroes = 1;
>> +        }
>> +        else if (!apr_isdigit(*ch)) {
>> +           /* also takes care of multiple Host headers by denying commas */
>> +            goto bad;
>> +        }
>> +    }
>> +    if (is_dotted_decimal) {
>> +        if (host[0] == '.' || (host[0] == '0' && apr_isdigit(host[1])))
>> +            leading_zeroes = 1;
>> +        if (leading_zeroes || dots != 3) {
>> +            /* RFC 3986 7.4 */
>> +            goto bad;
>> +        }
>> +    }
>> +    else {
>> +        /* The top-level domain must start with a letter (RFC 1123 2.1) */
>> +        while (ch > host && *ch != '.')
>> +            ch--;
>> +        if (ch[0] == '.' && ch[1] != '\0' && !apr_isalpha(ch[1]))
>> +            goto bad;
>> +    }
>> +    return APR_SUCCESS;
>> +
>> +bad:
>> +    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
>> +                  "[strict] Invalid host name '%s'%s%.6s",
>> +                  host, *ch ? ", problem near: " : "", ch);
>> +    if (logonly)
>> +        return APR_SUCCESS;
>> +    return APR_EINVAL;
>> +}
>
> (sorry for the necromancy of this very old commit)
>
> Re: the 1123 2.1 reference a dozen lines from the end of the function:
> RFC 1123 2.1 seems to say the opposite. Just a bug or something over
> my head?
>
>   2.1  Host Names and Numbers
>
>       The syntax of a legal Internet host name was specified in RFC-952
>       [DNS:4].  One aspect of host name syntax is hereby changed: the
>       restriction on the first character is relaxed to allow either a
>       letter or a digit.  Host software MUST support this more liberal
>       syntax.

RFC 1123 2.1 seems to be about the first character of the host, while
the code checks the first one of the TLD. Are there TLDs starting with
a digit?

Re: svn commit: r1426877 - in /httpd/httpd/trunk: CHANGES include/ap_mmn.h include/http_core.h include/httpd.h modules/http/http_filters.c server/core.c server/protocol.c server/util.c server/vhost.c

Posted by Eric Covener <co...@gmail.com>.
On Sat, Dec 29, 2012 at 8:23 PM,  <sf...@apache.org> wrote:
> Author: sf
> Date: Sun Dec 30 01:23:24 2012
> New Revision: 1426877
>
> URL: http://svn.apache.org/viewvc?rev=1426877&view=rev
> Log:
> Add an option to enforce stricter HTTP conformance
>
> Modified: httpd/httpd/trunk/server/vhost.c

> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/vhost.c?rev=1426877&r1=1426876&r2=1426877&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/server/vhost.c (original)
> +++ httpd/httpd/trunk/server/vhost.c Sun Dec 30 01:23:24 2012
> @@ -735,6 +735,59 @@ static apr_status_t fix_hostname_non_v6(
>      return APR_SUCCESS;
>  }
>
> +/*
> + * If strict mode ever becomes the default, this should be folded into
> + * fix_hostname_non_v6()
> + */
> +static apr_status_t strict_hostname_check(request_rec *r, char *host,
> +                                          int logonly)
> +{
> +    char *ch;
> +    int is_dotted_decimal = 1, leading_zeroes = 0, dots = 0;
> +
> +    for (ch = host; *ch; ch++) {
> +        if (!apr_isascii(*ch)) {
> +            goto bad;
> +        }
> +        else if (apr_isalpha(*ch) || *ch == '-') {
> +            is_dotted_decimal = 0;
> +        }
> +        else if (ch[0] == '.') {
> +            dots++;
> +            if (ch[1] == '0' && apr_isdigit(ch[2]))
> +                leading_zeroes = 1;
> +        }
> +        else if (!apr_isdigit(*ch)) {
> +           /* also takes care of multiple Host headers by denying commas */
> +            goto bad;
> +        }
> +    }
> +    if (is_dotted_decimal) {
> +        if (host[0] == '.' || (host[0] == '0' && apr_isdigit(host[1])))
> +            leading_zeroes = 1;
> +        if (leading_zeroes || dots != 3) {
> +            /* RFC 3986 7.4 */
> +            goto bad;
> +        }
> +    }
> +    else {
> +        /* The top-level domain must start with a letter (RFC 1123 2.1) */
> +        while (ch > host && *ch != '.')
> +            ch--;
> +        if (ch[0] == '.' && ch[1] != '\0' && !apr_isalpha(ch[1]))
> +            goto bad;
> +    }
> +    return APR_SUCCESS;
> +
> +bad:
> +    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO()
> +                  "[strict] Invalid host name '%s'%s%.6s",
> +                  host, *ch ? ", problem near: " : "", ch);
> +    if (logonly)
> +        return APR_SUCCESS;
> +    return APR_EINVAL;
> +}

(sorry for the necromancy of this very old commit)

Re: the 1123 2.1 reference a dozen lines from the end of the function:
RFC 1123 2.1 seems to say the opposite. Just a bug or something over
my head?

  2.1  Host Names and Numbers

      The syntax of a legal Internet host name was specified in RFC-952
      [DNS:4].  One aspect of host name syntax is hereby changed: the
      restriction on the first character is relaxed to allow either a
      letter or a digit.  Host software MUST support this more liberal
      syntax.