You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by ma...@apache.org on 2022/08/08 23:17:57 UTC

[trafficserver] 01/01: Add control char check in MIME Parser

This is an automated email from the ASF dual-hosted git repository.

masaori pushed a commit to branch asf-master-0809-3
in repository https://gitbox.apache.org/repos/asf/trafficserver.git

commit 4e4738123a49257c9ac912beab8e10dd8f0f9479
Author: Masaori Koshiba <ma...@apache.org>
AuthorDate: Mon Apr 4 10:54:25 2022 +0900

    Add control char check in MIME Parser
---
 include/tscore/ParseRules.h                        | 14 ++++++-
 proxy/hdrs/MIME.cc                                 | 15 +++++++
 proxy/hdrs/unit_tests/test_Hdrs.cc                 | 48 ++++++++++++++++++++++
 .../gold/invalid_character_in_te_value.gold        | 23 +++++++++++
 .../headers/good_request_after_bad.test.py         |  2 +-
 tests/gold_tests/logging/gold/field-json-test.gold |  5 ++-
 tests/gold_tests/logging/log-field-json.test.py    |  5 +++
 7 files changed, 107 insertions(+), 5 deletions(-)

diff --git a/include/tscore/ParseRules.h b/include/tscore/ParseRules.h
index 4ce4ed3d2..92bd687da 100644
--- a/include/tscore/ParseRules.h
+++ b/include/tscore/ParseRules.h
@@ -128,7 +128,7 @@ public:
   static CTypeResult is_empty(char c);            // wslfcr,#
   static CTypeResult is_alnum(char c);            // 0-9,A-Z,a-z
   static CTypeResult is_space(char c);            // ' ' HT,VT,NP,CR,LF
-  static CTypeResult is_control(char c);          // 0-31 127
+  static CTypeResult is_control(char c);          // 0x00-0x08, 0x0a-0x1f, 0x7f
   static CTypeResult is_mime_sep(char c);         // ()<>,;\"/[]?{} \t
   static CTypeResult is_http_field_name(char c);  // not : or mime_sep except for @
   static CTypeResult is_http_field_value(char c); // not CR, LF, comma, or "
@@ -667,14 +667,24 @@ ParseRules::is_space(char c)
 #endif
 }
 
+/**
+   Return true if @c is a control char except HTAB(0x09) and SP(0x20).
+   If you need to check @c is HTAB or SP, use `ParseRules::is_ws`.
+ */
 inline CTypeResult
 ParseRules::is_control(char c)
 {
 #ifndef COMPILE_PARSE_RULES
   return (parseRulesCType[(unsigned char)c] & is_control_BIT);
 #else
-  if (((unsigned char)c) < 32 || ((unsigned char)c) == 127)
+  if (c == CHAR_HT || c == CHAR_SP) {
+    return false;
+  }
+
+  if (((unsigned char)c) < 0x20 || c == 0x7f) {
     return true;
+  }
+
   return false;
 #endif
 }
diff --git a/proxy/hdrs/MIME.cc b/proxy/hdrs/MIME.cc
index 1a2dc05db..902fd62c4 100644
--- a/proxy/hdrs/MIME.cc
+++ b/proxy/hdrs/MIME.cc
@@ -2621,6 +2621,21 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap, MIMEHdrImpl *mh, const char
 
     int field_name_wks_idx = hdrtoken_tokenize(field_name.data(), field_name.size());
 
+    if (field_name_wks_idx < 0) {
+      for (auto i : field_name) {
+        if (ParseRules::is_control(i)) {
+          return PARSE_RESULT_ERROR;
+        }
+      }
+    }
+
+    // RFC 9110 Section 5.5. Field Values
+    for (char i : field_value) {
+      if (ParseRules::is_control(i)) {
+        return PARSE_RESULT_ERROR;
+      }
+    }
+
     ///////////////////////////////////////////
     // build and insert the new field object //
     ///////////////////////////////////////////
diff --git a/proxy/hdrs/unit_tests/test_Hdrs.cc b/proxy/hdrs/unit_tests/test_Hdrs.cc
index 40448aea5..7d9c39eef 100644
--- a/proxy/hdrs/unit_tests/test_Hdrs.cc
+++ b/proxy/hdrs/unit_tests/test_Hdrs.cc
@@ -531,6 +531,54 @@ TEST_CASE("HdrTest", "[proxy][hdrtest]")
   mime_init();
   http_init();
 
+  SECTION("Field Char Check")
+  {
+    static struct {
+      std::string_view line;
+      ParseResult expected;
+    } test_cases[] = {
+      ////
+      // Field Name
+      {"Content-Length: 10\r\n", PARSE_RESULT_CONT},
+      {"Content-Length\x0b: 10\r\n", PARSE_RESULT_ERROR},
+      ////
+      // Field Value
+      // SP
+      {"Content-Length: 10\r\n", PARSE_RESULT_CONT},
+      // HTAB
+      {"Foo: ab\td/cd\r\n", PARSE_RESULT_CONT},
+      // VCHAR
+      {"Foo: ab\x21/cd\r\n", PARSE_RESULT_CONT},
+      {"Foo: ab\x7e/cd\r\n", PARSE_RESULT_CONT},
+      // DEL
+      {"Foo: ab\x7f/cd\r\n", PARSE_RESULT_ERROR},
+      // obs-text
+      {"Foo: ab\x80/cd\r\n", PARSE_RESULT_CONT},
+      {"Foo: ab\xff/cd\r\n", PARSE_RESULT_CONT},
+      // control char
+      {"Content-Length: 10\x0b\r\n", PARSE_RESULT_ERROR},
+      {"Content-Length:\x0b 10\r\n", PARSE_RESULT_ERROR},
+      {"Foo: ab\x1d/cd\r\n", PARSE_RESULT_ERROR},
+    };
+
+    MIMEHdr hdr;
+    MIMEParser parser;
+    mime_parser_init(&parser);
+
+    for (const auto &t : test_cases) {
+      mime_parser_clear(&parser);
+
+      const char *start = t.line.data();
+      const char *end   = start + t.line.size();
+
+      int r = hdr.parse(&parser, &start, end, false, false, false);
+      if (r != t.expected) {
+        std::printf("Expected %s is %s, but not", t.line.data(), t.expected == PARSE_RESULT_ERROR ? "invalid" : "valid");
+        CHECK(false);
+      }
+    }
+  }
+
   SECTION("Test parse date")
   {
     static struct {
diff --git a/tests/gold_tests/headers/gold/invalid_character_in_te_value.gold b/tests/gold_tests/headers/gold/invalid_character_in_te_value.gold
new file mode 100644
index 000000000..30a27d819
--- /dev/null
+++ b/tests/gold_tests/headers/gold/invalid_character_in_te_value.gold
@@ -0,0 +1,23 @@
+HTTP/1.1 400 Invalid HTTP Request
+Date:``
+Connection: close
+Server:``
+Cache-Control: no-store
+Content-Type: text/html
+Content-Language: en
+Content-Length:``
+
+<HTML>
+<HEAD>
+<TITLE>Bad Request</TITLE>
+</HEAD>
+
+<BODY BGCOLOR="white" FGCOLOR="black">
+<H1>Bad Request</H1>
+<HR>
+
+<FONT FACE="Helvetica,Arial"><B>
+Description: Could not process this request.
+</B></FONT>
+<HR>
+</BODY>
diff --git a/tests/gold_tests/headers/good_request_after_bad.test.py b/tests/gold_tests/headers/good_request_after_bad.test.py
index 1e5ca87c0..cbc3d9320 100644
--- a/tests/gold_tests/headers/good_request_after_bad.test.py
+++ b/tests/gold_tests/headers/good_request_after_bad.test.py
@@ -96,7 +96,7 @@ tr = Test.AddTestRun("Another unsupported Transfer Encoding value")
 tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nhost: bob\r\ntransfer-encoding: \x08chunked\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc  127.0.0.1 {}'.format(
     ts.Variables.port)
 tr.Processes.Default.ReturnCode = 0
-tr.Processes.Default.Streams.stdout = 'gold/bad_te_value.gold'
+tr.Processes.Default.Streams.stdout = 'gold/invalid_character_in_te_value.gold'
 
 tr = Test.AddTestRun("Extra characters in content-length")
 tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nhost: bob\r\ncontent-length:+3\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc  127.0.0.1 {}'.format(
diff --git a/tests/gold_tests/logging/gold/field-json-test.gold b/tests/gold_tests/logging/gold/field-json-test.gold
index 8bdc8c20e..fb54cb9a5 100644
--- a/tests/gold_tests/logging/gold/field-json-test.gold
+++ b/tests/gold_tests/logging/gold/field-json-test.gold
@@ -1,3 +1,4 @@
 {"foo":"ab\td\/ef","foo-slice":"\td"}
-{"foo":"ab\u001fd\/ef","foo-slice":"\u001fd"}
-{"foo":"abc\u007fde","foo-slice":"c"}
+{"foo":"-","foo-slice":""}
+{"foo":"-","foo-slice":""}
+{"foo":"ab\u00c2\u0080d\/ef","foo-slice":"\u00c2\u0080d"}
diff --git a/tests/gold_tests/logging/log-field-json.test.py b/tests/gold_tests/logging/log-field-json.test.py
index cd2110bb7..9e1bb1dff 100644
--- a/tests/gold_tests/logging/log-field-json.test.py
+++ b/tests/gold_tests/logging/log-field-json.test.py
@@ -100,6 +100,11 @@ tr.Processes.Default.Command = 'curl --verbose --header "Host: test-3" --header
     ts.Variables.port)
 tr.Processes.Default.ReturnCode = 0
 
+tr = Test.AddTestRun()
+tr.Processes.Default.Command = 'curl --verbose --header "Host: test-2" --header "Foo: ab\x80d/ef" http://localhost:{0}/test-4' .format(
+    ts.Variables.port)
+tr.Processes.Default.ReturnCode = 0
+
 # Wait for log file to appear, then wait one extra second to make sure TS is done writing it.
 test_run = Test.AddTestRun()
 test_run.Processes.Default.Command = (