You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by zw...@apache.org on 2022/02/04 17:27:25 UTC
[trafficserver] 03/03: Add option to mostly strictly check URL characters (#8012)
This is an automated email from the ASF dual-hosted git repository.
zwoop pushed a commit to branch 9.1.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
commit 9533e29802b1fb7b22a2f80c94e817765117c4b8
Author: Susan Hinrichs <sh...@verizonmedia.com>
AuthorDate: Mon Jul 12 18:01:21 2021 -0500
Add option to mostly strictly check URL characters (#8012)
(cherry picked from commit 284fe0a0393eca07640f8236f60a6f46f8f132e9)
---
doc/admin-guide/files/records.config.en.rst | 6 +-
mgmt/RecordsConfig.cc | 2 +-
proxy/hdrs/HTTP.cc | 2 +-
proxy/hdrs/HTTP.h | 8 +--
proxy/hdrs/HdrTSOnly.cc | 2 +-
proxy/hdrs/URL.cc | 28 ++++++++-
proxy/hdrs/URL.h | 2 +-
proxy/hdrs/unit_tests/test_URL.cc | 45 ++++++++++++++-
proxy/http/HttpConfig.cc | 2 +-
.../headers/gold/bad_good_request_http1.gold | 8 +++
.../headers/good_request_after_bad.test.py | 66 +++++++++++++++++++++-
11 files changed, 156 insertions(+), 15 deletions(-)
diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst
index 25ffffc..7cce115 100644
--- a/doc/admin-guide/files/records.config.en.rst
+++ b/doc/admin-guide/files/records.config.en.rst
@@ -1196,8 +1196,10 @@ mptcp
.. ts:cv:: CONFIG proxy.config.http.strict_uri_parsing INT 0
- Enables (``1``) or disables (``0``) |TS| to return a 400 Bad Request
- if client's request URI includes character which is not RFC 3986 compliant
+ Takes a value between 0 and 2. ``0`` disables strict_uri_parsing. Any character can appears
+ in the URI. ``1`` causes |TS| to return 400 Bad Request
+ if client's request URI includes character which is not RFC 3986 compliant. ``2`` directs |TS|
+ to reject the clients request if it contains whitespace or non-printable characters.
.. ts:cv:: CONFIG proxy.config.http.errors.log_error_pages INT 1
:reloadable:
diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc
index 415e008..e289739 100644
--- a/mgmt/RecordsConfig.cc
+++ b/mgmt/RecordsConfig.cc
@@ -356,7 +356,7 @@ static const RecordElement RecordsConfig[] =
,
{RECT_CONFIG, "proxy.config.http.post.check.content_length.enabled", RECD_INT, "1", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL}
,
- {RECT_CONFIG, "proxy.config.http.strict_uri_parsing", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL}
+ {RECT_CONFIG, "proxy.config.http.strict_uri_parsing", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-2]", RECA_NULL}
,
// # Send http11 requests
// #
diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc
index c0b16cf..73cd163 100644
--- a/proxy/hdrs/HTTP.cc
+++ b/proxy/hdrs/HTTP.cc
@@ -873,7 +873,7 @@ http_parser_clear(HTTPParser *parser)
ParseResult
http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const char **start, const char *end,
- bool must_copy_strings, bool eof, bool strict_uri_parsing, size_t max_request_line_size,
+ bool must_copy_strings, bool eof, int strict_uri_parsing, size_t max_request_line_size,
size_t max_hdr_field_size)
{
if (parser->m_parsing_http) {
diff --git a/proxy/hdrs/HTTP.h b/proxy/hdrs/HTTP.h
index cbfa4d8..83763c7 100644
--- a/proxy/hdrs/HTTP.h
+++ b/proxy/hdrs/HTTP.h
@@ -439,7 +439,7 @@ const char *http_hdr_reason_lookup(unsigned status);
void http_parser_init(HTTPParser *parser);
void http_parser_clear(HTTPParser *parser);
ParseResult http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const char **start, const char *end,
- bool must_copy_strings, bool eof, bool strict_uri_parsing, size_t max_request_line_size,
+ bool must_copy_strings, bool eof, int strict_uri_parsing, size_t max_request_line_size,
size_t max_hdr_field_size);
ParseResult validate_hdr_host(HTTPHdrImpl *hh);
ParseResult validate_hdr_content_length(HdrHeap *heap, HTTPHdrImpl *hh);
@@ -630,11 +630,11 @@ public:
void mark_early_data(bool flag = true) const;
bool is_early_data() const;
- ParseResult parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, bool strict_uri_parsing = false,
+ ParseResult parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, int strict_uri_parsing = 0,
size_t max_request_line_size = UINT16_MAX, size_t max_hdr_field_size = 131070);
ParseResult parse_resp(HTTPParser *parser, const char **start, const char *end, bool eof);
- ParseResult parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, bool strict_uri_parsing = false,
+ ParseResult parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, int strict_uri_parsing = 0,
size_t max_request_line_size = UINT16_MAX, size_t max_hdr_field_size = UINT16_MAX);
ParseResult parse_resp(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof);
@@ -1154,7 +1154,7 @@ HTTPHdr::is_early_data() const
-------------------------------------------------------------------------*/
inline ParseResult
-HTTPHdr::parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, bool strict_uri_parsing,
+HTTPHdr::parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, int strict_uri_parsing,
size_t max_request_line_size, size_t max_hdr_field_size)
{
ink_assert(valid());
diff --git a/proxy/hdrs/HdrTSOnly.cc b/proxy/hdrs/HdrTSOnly.cc
index 3575464..3a32c3b 100644
--- a/proxy/hdrs/HdrTSOnly.cc
+++ b/proxy/hdrs/HdrTSOnly.cc
@@ -45,7 +45,7 @@
-------------------------------------------------------------------------*/
ParseResult
-HTTPHdr::parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, bool strict_uri_parsing,
+HTTPHdr::parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, int strict_uri_parsing,
size_t max_request_line_size, size_t max_hdr_field_size)
{
const char *start;
diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index 70fc177..e7fa712 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -1191,14 +1191,38 @@ url_is_strictly_compliant(const char *start, const char *end)
return true;
}
+/**
+ * This method will return TRUE if the uri is mostly compliant with
+ * RFC 3986 and it will return FALSE if not. Specifically denying white
+ * space an unprintable characters
+ */
+bool
+url_is_mostly_compliant(const char *start, const char *end)
+{
+ for (const char *i = start; i < end; ++i) {
+ if (isspace(*i)) {
+ Debug("http", "Whitespace character [0x%.2X] found in URL", (unsigned char)*i);
+ return false;
+ }
+ if (!isprint(*i)) {
+ Debug("http", "Non-printable character [0x%.2X] found in URL", (unsigned char)*i);
+ return false;
+ }
+ }
+ return true;
+}
+
} // namespace UrlImpl
using namespace UrlImpl;
ParseResult
-url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p, bool strict_uri_parsing,
+url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p, int strict_uri_parsing,
bool verify_host_characters)
{
- if (strict_uri_parsing && !url_is_strictly_compliant(*start, end)) {
+ if (strict_uri_parsing == 1 && !url_is_strictly_compliant(*start, end)) {
+ return PARSE_RESULT_ERROR;
+ }
+ if (strict_uri_parsing == 2 && !url_is_mostly_compliant(*start, end)) {
return PARSE_RESULT_ERROR;
}
diff --git a/proxy/hdrs/URL.h b/proxy/hdrs/URL.h
index d31107e..baa3531 100644
--- a/proxy/hdrs/URL.h
+++ b/proxy/hdrs/URL.h
@@ -209,7 +209,7 @@ void url_fragment_set(HdrHeap *heap, URLImpl *url, const char *value, int length
constexpr bool USE_STRICT_URI_PARSING = true;
ParseResult url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings,
- bool strict_uri_parsing = false, bool verify_host_characters = true);
+ int strict_uri_parsing = false, bool verify_host_characters = true);
constexpr bool COPY_STRINGS = true;
diff --git a/proxy/hdrs/unit_tests/test_URL.cc b/proxy/hdrs/unit_tests/test_URL.cc
index 2fef10a..a39f7c0 100644
--- a/proxy/hdrs/unit_tests/test_URL.cc
+++ b/proxy/hdrs/unit_tests/test_URL.cc
@@ -56,7 +56,8 @@ TEST_CASE("ValidateURL", "[proxy][validurl]")
namespace UrlImpl
{
bool url_is_strictly_compliant(const char *start, const char *end);
-}
+bool url_is_mostly_compliant(const char *start, const char *end);
+} // namespace UrlImpl
using namespace UrlImpl;
TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]")
@@ -69,6 +70,9 @@ TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]")
{"/path/data?key=value#id", true},
{"/ABCDEFGHIJKLMNOPQRSTUVWXYZ", true},
{"/abcdefghijklmnopqrstuvwxyz", true},
+ {"/abcde fghijklmnopqrstuvwxyz", false},
+ {"/abcde\tfghijklmnopqrstuvwxyz", false},
+ {"/abcdefghijklmnopqrstuvwxyz", false},
{"/0123456789", true},
{":/?#[]@", true},
{"!$&'()*+,;=", true},
@@ -95,6 +99,45 @@ TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]")
}
}
+TEST_CASE("ParseRulesMostlyStrictURI", "[proxy][parseuri]")
+{
+ const struct {
+ const char *const uri;
+ bool valid;
+ } http_mostly_strict_uri_parsing_test_case[] = {{"//index.html", true},
+ {"/home", true},
+ {"/path/data?key=value#id", true},
+ {"/ABCDEFGHIJKLMNOPQRSTUVWXYZ", true},
+ {"/abcdefghijklmnopqrstuvwxyz", true},
+ {"/abcde fghijklmnopqrstuvwxyz", false},
+ {"/abcde\tfghijklmnopqrstuvwxyz", false},
+ {"/abcdefghijklmnopqrstuvwxyz", false},
+ {"/0123456789", true},
+ {":/?#[]@", true},
+ {"!$&'()*+,;=", true},
+ {"-._~", true},
+ {"%", true},
+ {"\n", false},
+ {"\"", true},
+ {"<", true},
+ {">", true},
+ {"\\", true},
+ {"^", true},
+ {"`", true},
+ {"{", true},
+ {"|", true},
+ {"}", true},
+ {"é", false}}; // Non-printable ascii
+
+ for (auto i : http_mostly_strict_uri_parsing_test_case) {
+ const char *const uri = i.uri;
+ if (url_is_mostly_compliant(uri, uri + strlen(uri)) != i.valid) {
+ std::printf("Mostly strictly parse URI: \"%s\", expected %s, but not\n", uri, (i.valid ? "true" : "false"));
+ CHECK(false);
+ }
+ }
+}
+
struct url_parse_test_case {
const std::string input_uri;
const std::string expected_printed_url;
diff --git a/proxy/http/HttpConfig.cc b/proxy/http/HttpConfig.cc
index 9f12e92..439541a 100644
--- a/proxy/http/HttpConfig.cc
+++ b/proxy/http/HttpConfig.cc
@@ -1629,7 +1629,7 @@ HttpConfig::reconfigure()
params->referer_filter_enabled = INT_TO_BOOL(m_master.referer_filter_enabled);
params->referer_format_redirect = INT_TO_BOOL(m_master.referer_format_redirect);
- params->strict_uri_parsing = INT_TO_BOOL(m_master.strict_uri_parsing);
+ params->strict_uri_parsing = m_master.strict_uri_parsing;
params->oride.down_server_timeout = m_master.oride.down_server_timeout;
diff --git a/tests/gold_tests/headers/gold/bad_good_request_http1.gold b/tests/gold_tests/headers/gold/bad_good_request_http1.gold
new file mode 100644
index 0000000..5f31ffa
--- /dev/null
+++ b/tests/gold_tests/headers/gold/bad_good_request_http1.gold
@@ -0,0 +1,8 @@
+``HTTP/1.0 400 Invalid HTTP Request
+``Server: ATS/``
+``Content-Length: 219
+``
+<TITLE>Bad Request</TITLE>
+``<H1>Bad Request</H1>
+``Description: Could not process this request.
+``
diff --git a/tests/gold_tests/headers/good_request_after_bad.test.py b/tests/gold_tests/headers/good_request_after_bad.test.py
index cfefaf6..f65d542 100644
--- a/tests/gold_tests/headers/good_request_after_bad.test.py
+++ b/tests/gold_tests/headers/good_request_after_bad.test.py
@@ -27,9 +27,17 @@ ts = Test.MakeATSProcess("ts", enable_cache=True)
Test.ContinueOnFail = True
ts.Disk.records_config.update({'proxy.config.diags.debug.tags': 'http',
'proxy.config.diags.debug.enabled': 0,
+ 'proxy.config.http.strict_uri_parsing': 1
})
-Test.ContinueOnFail = True
+ts2 = Test.MakeATSProcess("ts2", enable_cache=True)
+
+ts2.Disk.records_config.update({'proxy.config.diags.debug.tags': 'http',
+ 'proxy.config.diags.debug.enabled': 0,
+ 'proxy.config.http.strict_uri_parsing': 2
+ })
+
+
server = Test.MakeOriginServer("server")
request_header = {"headers": "GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n", "timestamp": "1469733493.993", "body": ""}
response_header = {
@@ -41,6 +49,15 @@ server.addResponse("sessionlog.json", request_header, response_header)
ts.Disk.remap_config.AddLine(
'map / http://127.0.0.1:{0}'.format(server.Variables.Port)
)
+ts.Disk.remap_config.AddLine(
+ 'map /bob<> http://127.0.0.1:{0}'.format(server.Variables.Port)
+)
+ts2.Disk.remap_config.AddLine(
+ 'map / http://127.0.0.1:{0}'.format(server.Variables.Port)
+)
+ts2.Disk.remap_config.AddLine(
+ 'map /bob<> http://127.0.0.1:{0}'.format(server.Variables.Port)
+)
trace_out = Test.Disk.File("trace_curl.txt")
@@ -51,6 +68,12 @@ tr.Processes.Default.StartBefore(Test.Processes.ts)
tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost: bob\r\n\r\n" | nc 127.0.0.1 {}'.format(ts.Variables.port)
tr.Processes.Default.ReturnCode = 0
+tr = Test.AddTestRun("Good control")
+tr.Processes.Default.StartBefore(server)
+tr.Processes.Default.StartBefore(Test.Processes.ts2)
+tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost: bob\r\n\r\n" | nc 127.0.0.1 {}'.format(ts2.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+
tr = Test.AddTestRun("space after header name")
tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost : bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
ts.Variables.port)
@@ -110,3 +133,44 @@ tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost:bob\r\n \r\nGET /
ts.Variables.port)
tr.Processes.Default.ReturnCode = 0
tr.Processes.Default.Streams.stdout = 'gold/bad_good_request.gold'
+
+tr = Test.AddTestRun("Catch bad URL characters")
+tr.Processes.Default.Command = 'printf "GET /bob<> HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
+ ts.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+# Since the request line is messsed up ATS will reply with HTTP/1.0
+tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'
+
+tr = Test.AddTestRun("Catch whitespace in URL")
+tr.Processes.Default.Command = 'printf "GET /bob foo HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
+ ts.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+# Since the request line is messsed up ATS will reply with HTTP/1.0
+tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'
+
+tr = Test.AddTestRun("Extra characters in protocol")
+tr.Processes.Default.Command = 'printf "GET / HTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
+ ts.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+# Since the request line is messsed up ATS will reply with HTTP/1.0
+tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'
+
+tr = Test.AddTestRun("Characters that are strict but not case 2 bad")
+tr.Processes.Default.Command = 'printf "GET /bob<> HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
+ ts2.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+tr.Processes.Default.Streams.All = Testers.ContainsExpression("HTTP/1.1 200 OK", "Success")
+
+tr = Test.AddTestRun("Catch whitespace in URL")
+tr.Processes.Default.Command = 'printf "GET /bob foo HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
+ ts2.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+# Since the request line is messsed up ATS will reply with HTTP/1.0
+tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'
+
+tr = Test.AddTestRun("Extra characters in protocol")
+tr.Processes.Default.Command = 'printf "GET / HTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
+ ts2.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+# Since the request line is messsed up ATS will reply with HTTP/1.0
+tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'