You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by bc...@apache.org on 2020/02/05 02:03:48 UTC
[trafficserver] branch 8.0.x updated: Fixed how we handle uknown
schemes
This is an automated email from the ASF dual-hosted git repository.
bcall pushed a commit to branch 8.0.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/8.0.x by this push:
new 6affb92 Fixed how we handle uknown schemes
6affb92 is described below
commit 6affb92d8c85896e8b60abb9f684af8109280a0a
Author: Bryan Call <bc...@apache.org>
AuthorDate: Thu Jan 30 10:42:00 2020 -0800
Fixed how we handle uknown schemes
(cherry picked from commit c20a369fa93b22b492e6aed8bee1da5fa4af1cab)
---
proxy/hdrs/HdrTest.cc | 101 +++++++++++++++++++++++++++++++++++++++-----------
proxy/hdrs/URL.cc | 49 +++++++++++++-----------
2 files changed, 108 insertions(+), 42 deletions(-)
diff --git a/proxy/hdrs/HdrTest.cc b/proxy/hdrs/HdrTest.cc
index 49107ca..8046e69 100644
--- a/proxy/hdrs/HdrTest.cc
+++ b/proxy/hdrs/HdrTest.cc
@@ -343,44 +343,100 @@ HdrTest::test_url()
// Start with an easy one...
"http://trafficserver.apache.org/index.html",
- // "cheese://bogosity", This fails, but it's not clear it should work...
-
- "some.place", "some.place/", "http://some.place", "http://some.place/", "http://some.place/path",
- "http://some.place/path;params", "http://some.place/path;params?query", "http://some.place/path;params?query#fragment",
- "http://some.place/path?query#fragment", "http://some.place/path#fragment",
+ "cheese://bogosity",
+
+ "some.place",
+ "some.place/",
+ "http://some.place",
+ "http://some.place/",
+ "http://some.place/path",
+ "http://some.place/path;params",
+ "http://some.place/path;params?query",
+ "http://some.place/path;params?query#fragment",
+ "http://some.place/path?query#fragment",
+ "http://some.place/path#fragment",
- "some.place:80", "some.place:80/", "http://some.place:80", "http://some.place:80/",
+ "some.place:80",
+ "some.place:80/",
+ "http://some.place:80",
+ "http://some.place:80/",
- "foo@some.place:80", "foo@some.place:80/", "http://foo@some.place:80", "http://foo@some.place:80/",
+ "foo@some.place:80",
+ "foo@some.place:80/",
+ "http://foo@some.place:80",
+ "http://foo@some.place:80/",
- "foo:bar@some.place:80", "foo:bar@some.place:80/", "http://foo:bar@some.place:80", "http://foo:bar@some.place:80/",
+ "foo:bar@some.place:80",
+ "foo:bar@some.place:80/",
+ "http://foo:bar@some.place:80",
+ "http://foo:bar@some.place:80/",
// Some address stuff
- "http://172.16.28.101", "http://172.16.28.101:8080", "http://[::]", "http://[::1]", "http://[fc01:172:16:28::101]",
- "http://[fc01:172:16:28::101]:80", "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]",
- "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080", "http://172.16.28.101/some/path", "http://172.16.28.101:8080/some/path",
- "http://[::1]/some/path", "http://[fc01:172:16:28::101]/some/path", "http://[fc01:172:16:28::101]:80/some/path",
- "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]/some/path", "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/some/path",
- "http://172.16.28.101/", "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/",
-
- "foo:bar@some.place", "foo:bar@some.place/", "http://foo:bar@some.place", "http://foo:bar@some.place/",
- "http://foo:bar@[::1]:8080/", "http://foo@[::1]",
-
- "mms://sm02.tsqa.example.com/0102rally.asf", "pnm://foo:bar@some.place:80/path;params?query#fragment",
- "rtsp://foo:bar@some.place:80/path;params?query#fragment", "rtspu://foo:bar@some.place:80/path;params?query#fragment",
+ "http://172.16.28.101",
+ "http://172.16.28.101:8080",
+ "http://[::]",
+ "http://[::1]",
+ "http://[fc01:172:16:28::101]",
+ "http://[fc01:172:16:28::101]:80",
+ "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]",
+ "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080",
+ "http://172.16.28.101/some/path",
+ "http://172.16.28.101:8080/some/path",
+ "http://[::1]/some/path",
+ "http://[fc01:172:16:28::101]/some/path",
+ "http://[fc01:172:16:28::101]:80/some/path",
+ "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]/some/path",
+ "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/some/path",
+ "http://172.16.28.101/",
+ "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/",
+
+ // "foo:@some.place", TODO - foo:@some.place is change to foo@some.place in the test
+ "foo:bar@some.place",
+ "foo:bar@some.place/",
+ "http://foo:bar@some.place",
+ "http://foo:bar@some.place/",
+ "http://foo:bar@[::1]:8080/",
+ "http://foo@[::1]",
+
+ "mms://sm02.tsqa.example.com/0102rally.asf",
+ "pnm://foo:bar@some.place:80/path;params?query#fragment",
+ "rtsp://foo:bar@some.place:80/path;params?query#fragment",
+ "rtspu://foo:bar@some.place:80/path;params?query#fragment",
"/finance/external/cbsm/*http://cbs.marketwatch.com/archive/19990713/news/current/net.htx?source=blq/yhoo&dist=yhoo",
- "http://a.b.com/xx.jpg?newpath=http://bob.dave.com"};
+ "http://a.b.com/xx.jpg?newpath=http://bob.dave.com",
+
+ "ht-tp://a.b.com",
+ "ht+tp://a.b.com",
+ "ht.tp://a.b.com",
+
+ "h1ttp://a.b.com",
+ "http1://a.b.com",
+ };
static const char *bad[] = {
"http://[1:2:3:4:5:6:7:8:9]",
"http://1:2:3:4:5:6:7:8:A:B",
"http://bob.com[::1]",
"http://[::1].com",
+
"http://foo:bar:baz@bob.com/",
"http://foo:bar:baz@[::1]:8080/",
+
"http://]",
"http://:",
+
"http:/",
+ "http:/foo.bar.com/",
+ "~http://invalid.char.in.scheme/foo",
+ "http~://invalid.char.in.scheme/foo",
+ "ht~tp://invalid.char.in.scheme/foo",
+ "1http://first.char.not.alpha",
+ "some.domain.com/http://invalid.domain/foo",
+ ":",
+ "://",
+
+ // maybe this should be a valid URL
+ "a.b.com/xx.jpg?newpath=http://bob.dave.com",
};
int err, failed;
@@ -400,6 +456,7 @@ HdrTest::test_url()
url.create(nullptr);
err = url.parse(&start, end);
if (err < 0) {
+ printf("Failed to parse url '%s'\n", start);
failed = 1;
break;
}
@@ -448,6 +505,8 @@ HdrTest::test_url()
failed = 1;
printf("Successfully parsed invalid url '%s'", x);
break;
+ } else {
+ printf(" bad URL - PARSE FAILED: '%s'\n", bad[i]);
}
}
diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index 8f94329..d38a1f5 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -1120,34 +1120,41 @@ url_parse_scheme(HdrHeap *heap, URLImpl *url, const char **start, const char *en
const char *scheme_end = nullptr;
int scheme_wks_idx;
+ // Skip over spaces
while (' ' == *cur && ++cur < end) {
- ;
}
+
if (cur < end) {
scheme_start = scheme_end = cur;
- // special case 'http:' for performance
- if ((end - cur >= 5) && (((cur[0] ^ 'h') | (cur[1] ^ 't') | (cur[2] ^ 't') | (cur[3] ^ 'p') | (cur[4] ^ ':')) == 0)) {
- scheme_end = cur + 4; // point to colon
- url_scheme_set(heap, url, scheme_start, URL_WKSIDX_HTTP, 4, copy_strings_p);
- } else if ('/' != *cur) {
- // For forward transparent mode, the URL for the method can just be a path,
- // so don't scan that for a scheme, as we could find a false positive if there
- // is a URL in the parameters (which is legal).
+
+ // If the URL is more complex then a path, parse to see if there is a scheme
+ if ('/' != *cur) {
+ // Search for a : it could be part of a scheme or a username:password
while (':' != *cur && ++cur < end) {
- ;
}
- if (cur < end) { // found a colon
- scheme_wks_idx = hdrtoken_tokenize(scheme_start, cur - scheme_start, &scheme_wks);
-
- /* Distinguish between a scheme only and a username by looking past the colon. If it is missing
- or it's a slash, presume scheme. Otherwise it's a username with a password.
- */
- if ((scheme_wks_idx > 0 && hdrtoken_wks_to_token_type(scheme_wks) == HDRTOKEN_TYPE_SCHEME) || // known scheme
- (cur >= end - 1 || cur[1] == '/')) // no more data or slash past colon
- {
- scheme_end = cur;
- url_scheme_set(heap, url, scheme_start, scheme_wks_idx, scheme_end - scheme_start, copy_strings_p);
+
+ // If there is a :// then there is a scheme
+ if (cur + 2 < end && cur[1] == '/' && cur[2] == '/') { // found "://"
+ scheme_end = cur;
+ scheme_wks_idx = hdrtoken_tokenize(scheme_start, scheme_end - scheme_start, &scheme_wks);
+
+ if (!(scheme_wks_idx > 0 && hdrtoken_wks_to_token_type(scheme_wks) == HDRTOKEN_TYPE_SCHEME)) {
+ // Unknown scheme, validate the scheme
+
+ // RFC 3986 Section 3.1
+ // These are the valid characters in a scheme:
+ // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ // return an error if there is another character in the scheme
+ if (!ParseRules::is_alpha(*scheme_start)) {
+ return PARSE_RESULT_ERROR;
+ }
+ for (cur = scheme_start + 1; cur < scheme_end; ++cur) {
+ if (!(ParseRules::is_alnum(*cur) != 0 || *cur == '+' || *cur == '-' || *cur == '.')) {
+ return PARSE_RESULT_ERROR;
+ }
+ }
}
+ url_scheme_set(heap, url, scheme_start, scheme_wks_idx, scheme_end - scheme_start, copy_strings_p);
}
}
*start = scheme_end;