You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by zw...@apache.org on 2020/02/05 17:53:26 UTC

[trafficserver] branch 9.0.x updated (bb2054b -> ae3136c)

This is an automated email from the ASF dual-hosted git repository.

zwoop pushed a change to branch 9.0.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git.


    from bb2054b  Free TSMgmtString after using it.
     new 8b00e56  Fixed how we handle uknown schemes
     new ae3136c  Change header validation

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 proxy/hdrs/HTTP.cc    |  24 ++++++------
 proxy/hdrs/HdrTest.cc | 101 +++++++++++++++++++++++++++++++++++++++-----------
 proxy/hdrs/URL.cc     |  49 +++++++++++++-----------
 3 files changed, 119 insertions(+), 55 deletions(-)


[trafficserver] 02/02: Change header validation

Posted by zw...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

zwoop pushed a commit to branch 9.0.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git

commit ae3136ce7c62a4e5d21f6889c9cd69adc55a95c1
Author: ZeddYu Lu <ze...@gmail.com>
AuthorDate: Tue Feb 4 11:20:13 2020 -0800

    Change header validation
    
    (cherry picked from commit 5830bc72611e85e7a31098ce86710242f29076dc)
---
 proxy/hdrs/HTTP.cc | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc
index 7c18f91..7f0dc82 100644
--- a/proxy/hdrs/HTTP.cc
+++ b/proxy/hdrs/HTTP.cc
@@ -1114,20 +1114,18 @@ http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const
 
     end                    = real_end;
     parser->m_parsing_http = false;
-
-    ParseResult ret =
-      mime_parser_parse(&parser->m_mime_parser, heap, hh->m_fields_impl, start, end, must_copy_strings, eof, max_hdr_field_size);
-    // If we're done with the main parse do some validation
-    if (ret == PARSE_RESULT_DONE) {
-      ret = validate_hdr_host(hh); // check HOST header
-    }
-    if (ret == PARSE_RESULT_DONE) {
-      ret = validate_hdr_content_length(heap, hh);
-    }
-    return ret;
   }
 
-  return mime_parser_parse(&parser->m_mime_parser, heap, hh->m_fields_impl, start, end, must_copy_strings, eof, max_hdr_field_size);
+  ParseResult ret =
+    mime_parser_parse(&parser->m_mime_parser, heap, hh->m_fields_impl, start, end, must_copy_strings, eof, max_hdr_field_size);
+  // If we're done with the main parse do some validation
+  if (ret == PARSE_RESULT_DONE) {
+    ret = validate_hdr_host(hh); // check HOST header
+  }
+  if (ret == PARSE_RESULT_DONE) {
+    ret = validate_hdr_content_length(heap, hh);
+  }
+  return ret;
 }
 
 ParseResult
@@ -1179,7 +1177,7 @@ validate_hdr_content_length(HdrHeap *heap, HTTPHdrImpl *hh)
     if (mime_hdr_field_find(hh->m_fields_impl, MIME_FIELD_TRANSFER_ENCODING, MIME_LEN_TRANSFER_ENCODING) != nullptr) {
       // Delete all Content-Length headers
       Debug("http", "Transfer-Encoding header and Content-Length headers the request, removing all Content-Length headers");
-      mime_hdr_field_delete(heap, hh->m_fields_impl, content_length_field);
+      mime_hdr_field_delete(heap, hh->m_fields_impl, content_length_field, true);
       return PARSE_RESULT_DONE;
     }
 


[trafficserver] 01/02: Fixed how we handle uknown schemes

Posted by zw...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

zwoop pushed a commit to branch 9.0.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git

commit 8b00e56de8beb4b26544f3283f69017950cf5d17
Author: Bryan Call <bc...@apache.org>
AuthorDate: Thu Jan 30 10:42:00 2020 -0800

    Fixed how we handle uknown schemes
    
    (cherry picked from commit c20a369fa93b22b492e6aed8bee1da5fa4af1cab)
---
 proxy/hdrs/HdrTest.cc | 101 +++++++++++++++++++++++++++++++++++++++-----------
 proxy/hdrs/URL.cc     |  49 +++++++++++++-----------
 2 files changed, 108 insertions(+), 42 deletions(-)

diff --git a/proxy/hdrs/HdrTest.cc b/proxy/hdrs/HdrTest.cc
index b0f84d3..cb767dc 100644
--- a/proxy/hdrs/HdrTest.cc
+++ b/proxy/hdrs/HdrTest.cc
@@ -230,44 +230,100 @@ HdrTest::test_url()
     // Start with an easy one...
     "http://trafficserver.apache.org/index.html",
 
-    // "cheese://bogosity",         This fails, but it's not clear it should work...
-
-    "some.place", "some.place/", "http://some.place", "http://some.place/", "http://some.place/path",
-    "http://some.place/path;params", "http://some.place/path;params?query", "http://some.place/path;params?query#fragment",
-    "http://some.place/path?query#fragment", "http://some.place/path#fragment",
+    "cheese://bogosity",
+
+    "some.place",
+    "some.place/",
+    "http://some.place",
+    "http://some.place/",
+    "http://some.place/path",
+    "http://some.place/path;params",
+    "http://some.place/path;params?query",
+    "http://some.place/path;params?query#fragment",
+    "http://some.place/path?query#fragment",
+    "http://some.place/path#fragment",
 
-    "some.place:80", "some.place:80/", "http://some.place:80", "http://some.place:80/",
+    "some.place:80",
+    "some.place:80/",
+    "http://some.place:80",
+    "http://some.place:80/",
 
-    "foo@some.place:80", "foo@some.place:80/", "http://foo@some.place:80", "http://foo@some.place:80/",
+    "foo@some.place:80",
+    "foo@some.place:80/",
+    "http://foo@some.place:80",
+    "http://foo@some.place:80/",
 
-    "foo:bar@some.place:80", "foo:bar@some.place:80/", "http://foo:bar@some.place:80", "http://foo:bar@some.place:80/",
+    "foo:bar@some.place:80",
+    "foo:bar@some.place:80/",
+    "http://foo:bar@some.place:80",
+    "http://foo:bar@some.place:80/",
 
     // Some address stuff
-    "http://172.16.28.101", "http://172.16.28.101:8080", "http://[::]", "http://[::1]", "http://[fc01:172:16:28::101]",
-    "http://[fc01:172:16:28::101]:80", "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]",
-    "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080", "http://172.16.28.101/some/path", "http://172.16.28.101:8080/some/path",
-    "http://[::1]/some/path", "http://[fc01:172:16:28::101]/some/path", "http://[fc01:172:16:28::101]:80/some/path",
-    "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]/some/path", "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/some/path",
-    "http://172.16.28.101/", "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/",
-
-    "foo:bar@some.place", "foo:bar@some.place/", "http://foo:bar@some.place", "http://foo:bar@some.place/",
-    "http://foo:bar@[::1]:8080/", "http://foo@[::1]",
-
-    "mms://sm02.tsqa.example.com/0102rally.asf", "pnm://foo:bar@some.place:80/path;params?query#fragment",
-    "rtsp://foo:bar@some.place:80/path;params?query#fragment", "rtspu://foo:bar@some.place:80/path;params?query#fragment",
+    "http://172.16.28.101",
+    "http://172.16.28.101:8080",
+    "http://[::]",
+    "http://[::1]",
+    "http://[fc01:172:16:28::101]",
+    "http://[fc01:172:16:28::101]:80",
+    "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]",
+    "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080",
+    "http://172.16.28.101/some/path",
+    "http://172.16.28.101:8080/some/path",
+    "http://[::1]/some/path",
+    "http://[fc01:172:16:28::101]/some/path",
+    "http://[fc01:172:16:28::101]:80/some/path",
+    "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]/some/path",
+    "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/some/path",
+    "http://172.16.28.101/",
+    "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/",
+
+    // "foo:@some.place", TODO - foo:@some.place is change to foo@some.place in the test
+    "foo:bar@some.place",
+    "foo:bar@some.place/",
+    "http://foo:bar@some.place",
+    "http://foo:bar@some.place/",
+    "http://foo:bar@[::1]:8080/",
+    "http://foo@[::1]",
+
+    "mms://sm02.tsqa.example.com/0102rally.asf",
+    "pnm://foo:bar@some.place:80/path;params?query#fragment",
+    "rtsp://foo:bar@some.place:80/path;params?query#fragment",
+    "rtspu://foo:bar@some.place:80/path;params?query#fragment",
     "/finance/external/cbsm/*http://cbs.marketwatch.com/archive/19990713/news/current/net.htx?source=blq/yhoo&dist=yhoo",
-    "http://a.b.com/xx.jpg?newpath=http://bob.dave.com"};
+    "http://a.b.com/xx.jpg?newpath=http://bob.dave.com",
+
+    "ht-tp://a.b.com",
+    "ht+tp://a.b.com",
+    "ht.tp://a.b.com",
+
+    "h1ttp://a.b.com",
+    "http1://a.b.com",
+  };
 
   static const char *bad[] = {
     "http://[1:2:3:4:5:6:7:8:9]",
     "http://1:2:3:4:5:6:7:8:A:B",
     "http://bob.com[::1]",
     "http://[::1].com",
+
     "http://foo:bar:baz@bob.com/",
     "http://foo:bar:baz@[::1]:8080/",
+
     "http://]",
     "http://:",
+
     "http:/",
+    "http:/foo.bar.com/",
+    "~http://invalid.char.in.scheme/foo",
+    "http~://invalid.char.in.scheme/foo",
+    "ht~tp://invalid.char.in.scheme/foo",
+    "1http://first.char.not.alpha",
+    "some.domain.com/http://invalid.domain/foo",
+    ":",
+    "://",
+
+    // maybe this should be a valid URL
+    "a.b.com/xx.jpg?newpath=http://bob.dave.com",
   };
 
   int err, failed;
@@ -287,6 +343,7 @@ HdrTest::test_url()
     url.create(nullptr);
     err = url.parse(&start, end);
     if (err < 0) {
+      printf("Failed to parse url '%s'\n", start);
       failed = 1;
       break;
     }
@@ -335,6 +392,8 @@ HdrTest::test_url()
       failed = 1;
       printf("Successfully parsed invalid url '%s'", x);
       break;
+    } else {
+      printf("   bad URL - PARSE FAILED: '%s'\n", bad[i]);
     }
   }
 
diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index 8c94eb5..ad100a5 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -1117,34 +1117,41 @@ url_parse_scheme(HdrHeap *heap, URLImpl *url, const char **start, const char *en
   const char *scheme_end   = nullptr;
   int scheme_wks_idx;
 
+  // Skip over spaces
   while (' ' == *cur && ++cur < end) {
-    ;
   }
+
   if (cur < end) {
     scheme_start = scheme_end = cur;
-    // special case 'http:' for performance
-    if ((end - cur >= 5) && (((cur[0] ^ 'h') | (cur[1] ^ 't') | (cur[2] ^ 't') | (cur[3] ^ 'p') | (cur[4] ^ ':')) == 0)) {
-      scheme_end = cur + 4; // point to colon
-      url_scheme_set(heap, url, scheme_start, URL_WKSIDX_HTTP, 4, copy_strings_p);
-    } else if ('/' != *cur) {
-      // For forward transparent mode, the URL for the method can just be a path,
-      // so don't scan that for a scheme, as we could find a false positive if there
-      // is a URL in the parameters (which is legal).
+
+    // If the URL is more complex then a path, parse to see if there is a scheme
+    if ('/' != *cur) {
+      // Search for a : it could be part of a scheme or a username:password
       while (':' != *cur && ++cur < end) {
-        ;
       }
-      if (cur < end) { // found a colon
-        scheme_wks_idx = hdrtoken_tokenize(scheme_start, cur - scheme_start, &scheme_wks);
-
-        /*  Distinguish between a scheme only and a username by looking past the colon. If it is missing
-            or it's a slash, presume scheme. Otherwise it's a username with a password.
-        */
-        if ((scheme_wks_idx > 0 && hdrtoken_wks_to_token_type(scheme_wks) == HDRTOKEN_TYPE_SCHEME) || // known scheme
-            (cur >= end - 1 || cur[1] == '/')) // no more data or slash past colon
-        {
-          scheme_end = cur;
-          url_scheme_set(heap, url, scheme_start, scheme_wks_idx, scheme_end - scheme_start, copy_strings_p);
+
+      // If there is a :// then there is a scheme
+      if (cur + 2 < end && cur[1] == '/' && cur[2] == '/') { // found "://"
+        scheme_end     = cur;
+        scheme_wks_idx = hdrtoken_tokenize(scheme_start, scheme_end - scheme_start, &scheme_wks);
+
+        if (!(scheme_wks_idx > 0 && hdrtoken_wks_to_token_type(scheme_wks) == HDRTOKEN_TYPE_SCHEME)) {
+          // Unknown scheme, validate the scheme
+
+          // RFC 3986 Section 3.1
+          // These are the valid characters in a scheme:
+          //   scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+          // return an error if there is another character in the scheme
+          if (!ParseRules::is_alpha(*scheme_start)) {
+            return PARSE_RESULT_ERROR;
+          }
+          for (cur = scheme_start + 1; cur < scheme_end; ++cur) {
+            if (!(ParseRules::is_alnum(*cur) != 0 || *cur == '+' || *cur == '-' || *cur == '.')) {
+              return PARSE_RESULT_ERROR;
+            }
+          }
         }
+        url_scheme_set(heap, url, scheme_start, scheme_wks_idx, scheme_end - scheme_start, copy_strings_p);
       }
     }
     *start = scheme_end;