You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by zw...@apache.org on 2022/02/04 17:27:25 UTC

[trafficserver] 03/03: Add option to mostly strictly check URL characters (#8012)

This is an automated email from the ASF dual-hosted git repository.

zwoop pushed a commit to branch 9.1.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git

commit 9533e29802b1fb7b22a2f80c94e817765117c4b8
Author: Susan Hinrichs <sh...@verizonmedia.com>
AuthorDate: Mon Jul 12 18:01:21 2021 -0500

    Add option to mostly strictly check URL characters (#8012)
    
    (cherry picked from commit 284fe0a0393eca07640f8236f60a6f46f8f132e9)
---
 doc/admin-guide/files/records.config.en.rst        |  6 +-
 mgmt/RecordsConfig.cc                              |  2 +-
 proxy/hdrs/HTTP.cc                                 |  2 +-
 proxy/hdrs/HTTP.h                                  |  8 +--
 proxy/hdrs/HdrTSOnly.cc                            |  2 +-
 proxy/hdrs/URL.cc                                  | 28 ++++++++-
 proxy/hdrs/URL.h                                   |  2 +-
 proxy/hdrs/unit_tests/test_URL.cc                  | 45 ++++++++++++++-
 proxy/http/HttpConfig.cc                           |  2 +-
 .../headers/gold/bad_good_request_http1.gold       |  8 +++
 .../headers/good_request_after_bad.test.py         | 66 +++++++++++++++++++++-
 11 files changed, 156 insertions(+), 15 deletions(-)

diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst
index 25ffffc..7cce115 100644
--- a/doc/admin-guide/files/records.config.en.rst
+++ b/doc/admin-guide/files/records.config.en.rst
@@ -1196,8 +1196,10 @@ mptcp
 
 .. ts:cv:: CONFIG proxy.config.http.strict_uri_parsing INT 0
 
-   Enables (``1``) or disables (``0``) |TS| to return a 400 Bad Request
-   if client's request URI includes character which is not RFC 3986 compliant
+   Takes a value between 0 and 2.  ``0`` disables strict_uri_parsing.  Any character can appears
+   in the URI.  ``1`` causes |TS| to return 400 Bad Request
+   if client's request URI includes character which is not RFC 3986 compliant. ``2`` directs |TS|
+   to reject the clients request if it contains whitespace or non-printable characters.
 
 .. ts:cv:: CONFIG proxy.config.http.errors.log_error_pages INT 1
    :reloadable:
diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc
index 415e008..e289739 100644
--- a/mgmt/RecordsConfig.cc
+++ b/mgmt/RecordsConfig.cc
@@ -356,7 +356,7 @@ static const RecordElement RecordsConfig[] =
   ,
   {RECT_CONFIG, "proxy.config.http.post.check.content_length.enabled", RECD_INT, "1", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL}
   ,
-  {RECT_CONFIG, "proxy.config.http.strict_uri_parsing", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL}
+  {RECT_CONFIG, "proxy.config.http.strict_uri_parsing", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-2]", RECA_NULL}
   ,
   //       # Send http11 requests
   //       #
diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc
index c0b16cf..73cd163 100644
--- a/proxy/hdrs/HTTP.cc
+++ b/proxy/hdrs/HTTP.cc
@@ -873,7 +873,7 @@ http_parser_clear(HTTPParser *parser)
 
 ParseResult
 http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const char **start, const char *end,
-                      bool must_copy_strings, bool eof, bool strict_uri_parsing, size_t max_request_line_size,
+                      bool must_copy_strings, bool eof, int strict_uri_parsing, size_t max_request_line_size,
                       size_t max_hdr_field_size)
 {
   if (parser->m_parsing_http) {
diff --git a/proxy/hdrs/HTTP.h b/proxy/hdrs/HTTP.h
index cbfa4d8..83763c7 100644
--- a/proxy/hdrs/HTTP.h
+++ b/proxy/hdrs/HTTP.h
@@ -439,7 +439,7 @@ const char *http_hdr_reason_lookup(unsigned status);
 void http_parser_init(HTTPParser *parser);
 void http_parser_clear(HTTPParser *parser);
 ParseResult http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const char **start, const char *end,
-                                  bool must_copy_strings, bool eof, bool strict_uri_parsing, size_t max_request_line_size,
+                                  bool must_copy_strings, bool eof, int strict_uri_parsing, size_t max_request_line_size,
                                   size_t max_hdr_field_size);
 ParseResult validate_hdr_host(HTTPHdrImpl *hh);
 ParseResult validate_hdr_content_length(HdrHeap *heap, HTTPHdrImpl *hh);
@@ -630,11 +630,11 @@ public:
   void mark_early_data(bool flag = true) const;
   bool is_early_data() const;
 
-  ParseResult parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, bool strict_uri_parsing = false,
+  ParseResult parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, int strict_uri_parsing = 0,
                         size_t max_request_line_size = UINT16_MAX, size_t max_hdr_field_size = 131070);
   ParseResult parse_resp(HTTPParser *parser, const char **start, const char *end, bool eof);
 
-  ParseResult parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, bool strict_uri_parsing = false,
+  ParseResult parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, int strict_uri_parsing = 0,
                         size_t max_request_line_size = UINT16_MAX, size_t max_hdr_field_size = UINT16_MAX);
   ParseResult parse_resp(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof);
 
@@ -1154,7 +1154,7 @@ HTTPHdr::is_early_data() const
   -------------------------------------------------------------------------*/
 
 inline ParseResult
-HTTPHdr::parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, bool strict_uri_parsing,
+HTTPHdr::parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, int strict_uri_parsing,
                    size_t max_request_line_size, size_t max_hdr_field_size)
 {
   ink_assert(valid());
diff --git a/proxy/hdrs/HdrTSOnly.cc b/proxy/hdrs/HdrTSOnly.cc
index 3575464..3a32c3b 100644
--- a/proxy/hdrs/HdrTSOnly.cc
+++ b/proxy/hdrs/HdrTSOnly.cc
@@ -45,7 +45,7 @@
   -------------------------------------------------------------------------*/
 
 ParseResult
-HTTPHdr::parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, bool strict_uri_parsing,
+HTTPHdr::parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, int strict_uri_parsing,
                    size_t max_request_line_size, size_t max_hdr_field_size)
 {
   const char *start;
diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index 70fc177..e7fa712 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -1191,14 +1191,38 @@ url_is_strictly_compliant(const char *start, const char *end)
   return true;
 }
 
+/**
+ *  This method will return TRUE if the uri is mostly compliant with
+ *  RFC 3986 and it will return FALSE if not. Specifically denying white
+ *  space an unprintable characters
+ */
+bool
+url_is_mostly_compliant(const char *start, const char *end)
+{
+  for (const char *i = start; i < end; ++i) {
+    if (isspace(*i)) {
+      Debug("http", "Whitespace character [0x%.2X] found in URL", (unsigned char)*i);
+      return false;
+    }
+    if (!isprint(*i)) {
+      Debug("http", "Non-printable character [0x%.2X] found in URL", (unsigned char)*i);
+      return false;
+    }
+  }
+  return true;
+}
+
 } // namespace UrlImpl
 using namespace UrlImpl;
 
 ParseResult
-url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p, bool strict_uri_parsing,
+url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p, int strict_uri_parsing,
           bool verify_host_characters)
 {
-  if (strict_uri_parsing && !url_is_strictly_compliant(*start, end)) {
+  if (strict_uri_parsing == 1 && !url_is_strictly_compliant(*start, end)) {
+    return PARSE_RESULT_ERROR;
+  }
+  if (strict_uri_parsing == 2 && !url_is_mostly_compliant(*start, end)) {
     return PARSE_RESULT_ERROR;
   }
 
diff --git a/proxy/hdrs/URL.h b/proxy/hdrs/URL.h
index d31107e..baa3531 100644
--- a/proxy/hdrs/URL.h
+++ b/proxy/hdrs/URL.h
@@ -209,7 +209,7 @@ void url_fragment_set(HdrHeap *heap, URLImpl *url, const char *value, int length
 constexpr bool USE_STRICT_URI_PARSING = true;
 
 ParseResult url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings,
-                      bool strict_uri_parsing = false, bool verify_host_characters = true);
+                      int strict_uri_parsing = false, bool verify_host_characters = true);
 
 constexpr bool COPY_STRINGS = true;
 
diff --git a/proxy/hdrs/unit_tests/test_URL.cc b/proxy/hdrs/unit_tests/test_URL.cc
index 2fef10a..a39f7c0 100644
--- a/proxy/hdrs/unit_tests/test_URL.cc
+++ b/proxy/hdrs/unit_tests/test_URL.cc
@@ -56,7 +56,8 @@ TEST_CASE("ValidateURL", "[proxy][validurl]")
 namespace UrlImpl
 {
 bool url_is_strictly_compliant(const char *start, const char *end);
-}
+bool url_is_mostly_compliant(const char *start, const char *end);
+} // namespace UrlImpl
 using namespace UrlImpl;
 
 TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]")
@@ -69,6 +70,9 @@ TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]")
                                            {"/path/data?key=value#id", true},
                                            {"/ABCDEFGHIJKLMNOPQRSTUVWXYZ", true},
                                            {"/abcdefghijklmnopqrstuvwxyz", true},
+                                           {"/abcde fghijklmnopqrstuvwxyz", false},
+                                           {"/abcde\tfghijklmnopqrstuvwxyz", false},
+                                           {"/abcdefghijklmnopqrstuvwxyz", false},
                                            {"/0123456789", true},
                                            {":/?#[]@", true},
                                            {"!$&'()*+,;=", true},
@@ -95,6 +99,45 @@ TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]")
   }
 }
 
+TEST_CASE("ParseRulesMostlyStrictURI", "[proxy][parseuri]")
+{
+  const struct {
+    const char *const uri;
+    bool valid;
+  } http_mostly_strict_uri_parsing_test_case[] = {{"//index.html", true},
+                                                  {"/home", true},
+                                                  {"/path/data?key=value#id", true},
+                                                  {"/ABCDEFGHIJKLMNOPQRSTUVWXYZ", true},
+                                                  {"/abcdefghijklmnopqrstuvwxyz", true},
+                                                  {"/abcde fghijklmnopqrstuvwxyz", false},
+                                                  {"/abcde\tfghijklmnopqrstuvwxyz", false},
+                                                  {"/abcdefghijklmnopqrstuvwxyz", false},
+                                                  {"/0123456789", true},
+                                                  {":/?#[]@", true},
+                                                  {"!$&'()*+,;=", true},
+                                                  {"-._~", true},
+                                                  {"%", true},
+                                                  {"\n", false},
+                                                  {"\"", true},
+                                                  {"<", true},
+                                                  {">", true},
+                                                  {"\\", true},
+                                                  {"^", true},
+                                                  {"`", true},
+                                                  {"{", true},
+                                                  {"|", true},
+                                                  {"}", true},
+                                                  {"é", false}}; // Non-printable ascii
+
+  for (auto i : http_mostly_strict_uri_parsing_test_case) {
+    const char *const uri = i.uri;
+    if (url_is_mostly_compliant(uri, uri + strlen(uri)) != i.valid) {
+      std::printf("Mostly strictly parse URI: \"%s\", expected %s, but not\n", uri, (i.valid ? "true" : "false"));
+      CHECK(false);
+    }
+  }
+}
+
 struct url_parse_test_case {
   const std::string input_uri;
   const std::string expected_printed_url;
diff --git a/proxy/http/HttpConfig.cc b/proxy/http/HttpConfig.cc
index 9f12e92..439541a 100644
--- a/proxy/http/HttpConfig.cc
+++ b/proxy/http/HttpConfig.cc
@@ -1629,7 +1629,7 @@ HttpConfig::reconfigure()
   params->referer_filter_enabled  = INT_TO_BOOL(m_master.referer_filter_enabled);
   params->referer_format_redirect = INT_TO_BOOL(m_master.referer_format_redirect);
 
-  params->strict_uri_parsing = INT_TO_BOOL(m_master.strict_uri_parsing);
+  params->strict_uri_parsing = m_master.strict_uri_parsing;
 
   params->oride.down_server_timeout = m_master.oride.down_server_timeout;
 
diff --git a/tests/gold_tests/headers/gold/bad_good_request_http1.gold b/tests/gold_tests/headers/gold/bad_good_request_http1.gold
new file mode 100644
index 0000000..5f31ffa
--- /dev/null
+++ b/tests/gold_tests/headers/gold/bad_good_request_http1.gold
@@ -0,0 +1,8 @@
+``HTTP/1.0 400 Invalid HTTP Request
+``Server: ATS/``
+``Content-Length: 219
+``
+<TITLE>Bad Request</TITLE>
+``<H1>Bad Request</H1>
+``Description: Could not process this request.
+``
diff --git a/tests/gold_tests/headers/good_request_after_bad.test.py b/tests/gold_tests/headers/good_request_after_bad.test.py
index cfefaf6..f65d542 100644
--- a/tests/gold_tests/headers/good_request_after_bad.test.py
+++ b/tests/gold_tests/headers/good_request_after_bad.test.py
@@ -27,9 +27,17 @@ ts = Test.MakeATSProcess("ts", enable_cache=True)
 Test.ContinueOnFail = True
 ts.Disk.records_config.update({'proxy.config.diags.debug.tags': 'http',
                                'proxy.config.diags.debug.enabled': 0,
+                               'proxy.config.http.strict_uri_parsing': 1
                                })
 
-Test.ContinueOnFail = True
+ts2 = Test.MakeATSProcess("ts2", enable_cache=True)
+
+ts2.Disk.records_config.update({'proxy.config.diags.debug.tags': 'http',
+                                'proxy.config.diags.debug.enabled': 0,
+                                'proxy.config.http.strict_uri_parsing': 2
+                                })
+
+
 server = Test.MakeOriginServer("server")
 request_header = {"headers": "GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n", "timestamp": "1469733493.993", "body": ""}
 response_header = {
@@ -41,6 +49,15 @@ server.addResponse("sessionlog.json", request_header, response_header)
 ts.Disk.remap_config.AddLine(
     'map / http://127.0.0.1:{0}'.format(server.Variables.Port)
 )
+ts.Disk.remap_config.AddLine(
+    'map /bob<> http://127.0.0.1:{0}'.format(server.Variables.Port)
+)
+ts2.Disk.remap_config.AddLine(
+    'map / http://127.0.0.1:{0}'.format(server.Variables.Port)
+)
+ts2.Disk.remap_config.AddLine(
+    'map /bob<> http://127.0.0.1:{0}'.format(server.Variables.Port)
+)
 
 trace_out = Test.Disk.File("trace_curl.txt")
 
@@ -51,6 +68,12 @@ tr.Processes.Default.StartBefore(Test.Processes.ts)
 tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost: bob\r\n\r\n" | nc  127.0.0.1 {}'.format(ts.Variables.port)
 tr.Processes.Default.ReturnCode = 0
 
+tr = Test.AddTestRun("Good control")
+tr.Processes.Default.StartBefore(server)
+tr.Processes.Default.StartBefore(Test.Processes.ts2)
+tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost: bob\r\n\r\n" | nc  127.0.0.1 {}'.format(ts2.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+
 tr = Test.AddTestRun("space after header name")
 tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost : bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc  127.0.0.1 {}'.format(
     ts.Variables.port)
@@ -110,3 +133,44 @@ tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost:bob\r\n \r\nGET /
     ts.Variables.port)
 tr.Processes.Default.ReturnCode = 0
 tr.Processes.Default.Streams.stdout = 'gold/bad_good_request.gold'
+
+tr = Test.AddTestRun("Catch bad URL characters")
+tr.Processes.Default.Command = 'printf "GET /bob<> HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc  127.0.0.1 {}'.format(
+    ts.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+# Since the request line is messsed up ATS will reply with HTTP/1.0
+tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'
+
+tr = Test.AddTestRun("Catch whitespace in URL")
+tr.Processes.Default.Command = 'printf "GET /bob foo HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc  127.0.0.1 {}'.format(
+    ts.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+# Since the request line is messsed up ATS will reply with HTTP/1.0
+tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'
+
+tr = Test.AddTestRun("Extra characters in protocol")
+tr.Processes.Default.Command = 'printf "GET / HTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc  127.0.0.1 {}'.format(
+    ts.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+# Since the request line is messsed up ATS will reply with HTTP/1.0
+tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'
+
+tr = Test.AddTestRun("Characters that are strict but not case 2 bad")
+tr.Processes.Default.Command = 'printf "GET /bob<> HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc  127.0.0.1 {}'.format(
+    ts2.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+tr.Processes.Default.Streams.All = Testers.ContainsExpression("HTTP/1.1 200 OK", "Success")
+
+tr = Test.AddTestRun("Catch whitespace in URL")
+tr.Processes.Default.Command = 'printf "GET /bob foo HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc  127.0.0.1 {}'.format(
+    ts2.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+# Since the request line is messsed up ATS will reply with HTTP/1.0
+tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'
+
+tr = Test.AddTestRun("Extra characters in protocol")
+tr.Processes.Default.Command = 'printf "GET / HTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc  127.0.0.1 {}'.format(
+    ts2.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+# Since the request line is messsed up ATS will reply with HTTP/1.0
+tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'