You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by cs...@apache.org on 2023/08/16 09:35:36 UTC

[impala] branch master updated: IMPALA-12341: Fix http header parsing issue in thrift http server

This is an automated email from the ASF dual-hosted git repository.

csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 9adfe0587 IMPALA-12341: Fix http header parsing issue in thrift http server
9adfe0587 is described below

commit 9adfe0587cfaf3364c8a3672b1c60b198ef35a89
Author: Gergely Farkas <gf...@cloudera.com>
AuthorDate: Tue Aug 8 16:42:06 2023 +0200

    IMPALA-12341: Fix http header parsing issue in thrift http server
    
    This change fixes the following http header parsing bug in
    THttpServer: The THRIFT_strncasecmp() function used in the
    THttpServer::parseHeader() function returns true even if the name
    of the header being processed is a prefix of the header constant
    that is defined in the condition.
    For example: In the original implementation when processing the
    http header "auth: anyValue", we run into the code fragment where
    the Authorization header content is processed, because the condition
    THRIFT_strncasecmp("auth: anyValue", "Authorization", 4) == 0)
    is true, since the first 4 characters of the two strings are the same.
    This can break authentication if the http request has a header
    with a name that is a prefix to the word "Authorization".
    If the length of the checked header is included in the condition,
    this problem is avoided, so this fix refactors the if conditions,
    so that this check is present everywhere.
    
    Tested with new custom cluster tests.
    
    Change-Id: I754639cfe3eab0016d09f71ded4821caa357bf87
    Reviewed-on: http://gerrit.cloudera.org:8080/20301
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/transport/THttpServer.cpp                   | 49 ++++++++++++++--------
 be/src/transport/THttpServer.h                     |  8 ++++
 .../apache/impala/customcluster/LdapHS2Test.java   | 24 ++++++++++-
 3 files changed, 63 insertions(+), 18 deletions(-)

diff --git a/be/src/transport/THttpServer.cpp b/be/src/transport/THttpServer.cpp
index 8498ac1f2..ce76d3e38 100644
--- a/be/src/transport/THttpServer.cpp
+++ b/be/src/transport/THttpServer.cpp
@@ -129,6 +129,23 @@ THttpServer::~THttpServer() {
 const std::string THttpServer::HEADER_REQUEST_ID = "X-Request-Id";
 const std::string THttpServer::HEADER_IMPALA_SESSION_ID = "X-Impala-Session-Id";
 const std::string THttpServer::HEADER_IMPALA_QUERY_ID = "X-Impala-Query-Id";
+const std::string THttpServer::HEADER_SAML2_TOKEN_RESPONSE_PORT = "X-Hive-Token-Response-Port";
+const std::string THttpServer::HEADER_SAML2_CLIENT_IDENTIFIER = "X-Hive-Client-Identifier";
+const std::string THttpServer::HEADER_TRANSFER_ENCODING = "Transfer-Encoding";
+const std::string THttpServer::HEADER_CONTENT_LENGTH = "Content-length";
+const std::string THttpServer::HEADER_X_FORWARDED_FOR = "X-Forwarded-For";
+const std::string THttpServer::HEADER_AUTHORIZATION = "Authorization";
+const std::string THttpServer::HEADER_COOKIE = "Cookie";
+const std::string THttpServer::HEADER_EXPECT = "Expect";
+
+// Checks whether the name of the http header given in the 'header' parameter,
+// with length 'header_name_len', matches the constant given in 'header_constant_str'
+// parameter.
+inline bool MatchesHeader(const char* header, const string& header_constant_str,
+    const size_t header_name_len) {
+  return (header_name_len == header_constant_str.length() &&
+    THRIFT_strncasecmp(header, header_constant_str.c_str(), header_name_len) == 0);
+}
 
 void THttpServer::parseHeader(char* header) {
   char* colon = strchr(header, ':');
@@ -138,49 +155,47 @@ void THttpServer::parseHeader(char* header) {
   size_t sz = colon - header;
   char* value = colon + 1;
 
-  static const char* SAML2_TOKEN_RESPONSE_PORT = "X-Hive-Token-Response-Port";
-  static const char* SAML2_CLIENT_IDENTIFIER = "X-Hive-Client-Identifier";
-  if (THRIFT_strncasecmp(header, "Transfer-Encoding", sz) == 0) {
+  if (MatchesHeader(header, HEADER_TRANSFER_ENCODING, sz)) {
     if (THRIFT_strcasestr(value, "chunked") != NULL) {
       chunked_ = true;
     }
-  } else if (THRIFT_strncasecmp(header, "Content-length", sz) == 0) {
+  } else if (MatchesHeader(header, HEADER_CONTENT_LENGTH, sz)) {
     chunked_ = false;
     contentLength_ = atoi(value);
-  } else if (THRIFT_strncasecmp(header, "X-Forwarded-For", sz) == 0) {
+  } else if (MatchesHeader(header, HEADER_X_FORWARDED_FOR, sz)) {
     origin_ = value;
   } else if ((has_ldap_ || has_kerberos_ || has_saml_ || has_jwt_)
-      && THRIFT_strncasecmp(header, "Authorization", sz) == 0) {
+      && MatchesHeader(header, HEADER_AUTHORIZATION, sz)) {
     auth_value_ = string(value);
-  } else if (use_cookies_ && THRIFT_strncasecmp(header, "Cookie", sz) == 0) {
+  } else if (use_cookies_ && MatchesHeader(header, HEADER_COOKIE, sz)) {
     cookie_value_ = string(value);
-  } else if (THRIFT_strncasecmp(header, "Expect", sz) == 0) {
+  } else if (MatchesHeader(header, HEADER_EXPECT, sz)) {
     if (THRIFT_strcasestr(value, "100-continue")){
       continue_ = true;
     }
   } else if (has_saml_
-        && THRIFT_strncasecmp(header, SAML2_TOKEN_RESPONSE_PORT, sz) == 0) {
+        && MatchesHeader(header, HEADER_SAML2_TOKEN_RESPONSE_PORT, sz)) {
     saml_port_ = atoi(value);
     string port_str = string(value);
     StripWhiteSpace(&port_str);
     DCHECK(wrapped_request_ != nullptr);
-    wrapped_request_->headers[SAML2_TOKEN_RESPONSE_PORT] = port_str;
+    wrapped_request_->headers[HEADER_SAML2_TOKEN_RESPONSE_PORT] = port_str;
   } else if (has_saml_
-        && THRIFT_strncasecmp(header, SAML2_CLIENT_IDENTIFIER, sz) == 0) {
+        && MatchesHeader(header, HEADER_SAML2_CLIENT_IDENTIFIER, sz)) {
     DCHECK(wrapped_request_ != nullptr);
     string client_id = string(value);
     StripWhiteSpace(&client_id);
-    wrapped_request_->headers[SAML2_CLIENT_IDENTIFIER] = client_id;
+    wrapped_request_->headers[HEADER_SAML2_CLIENT_IDENTIFIER] = client_id;
   } else if (check_trusted_auth_header_
-      && THRIFT_strncasecmp(header, FLAGS_trusted_auth_header.c_str(), sz) == 0) {
+      && MatchesHeader(header, FLAGS_trusted_auth_header, sz)) {
     found_trusted_auth_header_ = true;
-  } else if (THRIFT_strncasecmp(header, HEADER_REQUEST_ID.c_str(), sz) == 0) {
+  } else if (MatchesHeader(header, HEADER_REQUEST_ID, sz)) {
     header_x_request_id_ = string(value);
     StripWhiteSpace(&header_x_request_id_);
-  } else if (THRIFT_strncasecmp(header, HEADER_IMPALA_SESSION_ID.c_str(), sz) == 0) {
+  } else if (MatchesHeader(header, HEADER_IMPALA_SESSION_ID, sz)) {
     header_x_session_id_ = string(value);
     StripWhiteSpace(&header_x_session_id_);
-  } else if (THRIFT_strncasecmp(header, HEADER_IMPALA_QUERY_ID.c_str(), sz) == 0) {
+  } else if (MatchesHeader(header, HEADER_IMPALA_QUERY_ID, sz)) {
     header_x_query_id_ = string(value);
     StripWhiteSpace(&header_x_query_id_);
   }
@@ -321,7 +336,7 @@ void THttpServer::headersDone() {
         fallback_to_other_auths = false;
         // Final SAML message in browser mode, check bearer and replace it with a cookie.
         DCHECK(wrapped_request_ != nullptr);
-        wrapped_request_->headers["Authorization"] = auth_value_;
+        wrapped_request_->headers[HEADER_AUTHORIZATION] = auth_value_;
         if (callbacks_.validate_saml2_bearer_fn()) {
           // During EE tests it makes things easier to return 401-Unauthorized here.
           // This hack can be removed once there is a Python client that
diff --git a/be/src/transport/THttpServer.h b/be/src/transport/THttpServer.h
index b623e6491..acd877443 100644
--- a/be/src/transport/THttpServer.h
+++ b/be/src/transport/THttpServer.h
@@ -159,6 +159,14 @@ protected:
   static const std::string HEADER_IMPALA_SESSION_ID;
   // Impala query id specified by the Impala backend.  Used for tracing HTTP requests.
   static const std::string HEADER_IMPALA_QUERY_ID;
+  static const std::string HEADER_SAML2_TOKEN_RESPONSE_PORT;
+  static const std::string HEADER_SAML2_CLIENT_IDENTIFIER;
+  static const std::string HEADER_TRANSFER_ENCODING;
+  static const std::string HEADER_CONTENT_LENGTH;
+  static const std::string HEADER_X_FORWARDED_FOR;
+  static const std::string HEADER_AUTHORIZATION;
+  static const std::string HEADER_COOKIE;
+  static const std::string HEADER_EXPECT;
 
   void readHeaders();
   virtual void parseHeader(char* header);
diff --git a/fe/src/test/java/org/apache/impala/customcluster/LdapHS2Test.java b/fe/src/test/java/org/apache/impala/customcluster/LdapHS2Test.java
index fc031d4b9..164b2e65b 100644
--- a/fe/src/test/java/org/apache/impala/customcluster/LdapHS2Test.java
+++ b/fe/src/test/java/org/apache/impala/customcluster/LdapHS2Test.java
@@ -24,6 +24,7 @@ import static org.junit.Assert.fail;
 
 import java.io.File;
 import java.util.HashMap;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -233,7 +234,7 @@ public class LdapHS2Test {
       }
     }
 
-    // Attempt to authenticate with a different mechanism. SHould fail, but won't
+    // Attempt to authenticate with a different mechanism. Should fail, but won't
     // increment the total-basic-auth-failure metric because its not considered a 'Basic'
     // auth attempt.
     headers.put("Authorization", "Negotiate VGVzdDFMZGFwOjEyMzQ1");
@@ -282,6 +283,27 @@ public class LdapHS2Test {
         assertEquals(e.getMessage(), "HTTP Response code: 401");
       }
     }
+
+    // Authenticate as 'Test1Ldap' with password '12345' - the request
+    // contains an empty 'a' header to test the fix of IMPALA-12341
+    // should succeed
+    Map<String, String> orderedHeaders = new LinkedHashMap<>();
+    orderedHeaders.put("Authorization", "Basic VGVzdDFMZGFwOjEyMzQ1");
+    // This empty "a" header breaks authentication in builds prior to IMPALA-12341
+    orderedHeaders.put("a", "");
+    transport.setCustomHeaders(orderedHeaders);
+    transport.open();
+
+    // Open a session which will get username 'Test1Ldap'.
+    TOpenSessionReq openReq6 = new TOpenSessionReq();
+    TOpenSessionResp openResp6 = client.OpenSession(openReq);
+    // One successful authentication.
+    verifyMetrics(10, numFailures);
+    // Running a query should succeed.
+    TOperationHandle operationHandle6 = execAndFetch(
+            client, openResp.getSessionHandle(), "select logged_in_user()", "Test1Ldap");
+    // Two more successful authentications - for the Exec() and the Fetch().
+    verifyMetrics(12, numFailures);
   }
 
   /**