You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by zw...@apache.org on 2021/03/29 19:19:46 UTC

[trafficserver] branch 9.1.x updated: For TSHttpHdrEffectiveUrlBufGet(), include scheme for request to server URL. (#7545)

This is an automated email from the ASF dual-hosted git repository.

zwoop pushed a commit to branch 9.1.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/9.1.x by this push:
     new 1d7910f  For TSHttpHdrEffectiveUrlBufGet(), include scheme for request to server URL. (#7545)
1d7910f is described below

commit 1d7910fd6f808ae984ea109f3fc5dd775cec00fc
Author: Walt Karas <wk...@verizonmedia.com>
AuthorDate: Thu Feb 25 16:58:34 2021 -0600

    For TSHttpHdrEffectiveUrlBufGet(), include scheme for request to server URL. (#7545)
    
    (cherry picked from commit 3736f61fce9d675c2c4f6968d430af9eeba26992)
---
 proxy/hdrs/HTTP.cc                         | 13 ++++---
 proxy/hdrs/HTTP.h                          | 17 +++++----
 proxy/hdrs/URL.cc                          | 58 ++++++++++++++++++++----------
 proxy/hdrs/URL.h                           | 44 ++++++++++++++---------
 proxy/http/remap/NextHopConsistentHash.cc  |  2 +-
 src/traffic_server/InkAPI.cc               |  4 +--
 tests/gold_tests/pluginTest/tsapi/log.gold | 12 +++----
 7 files changed, 89 insertions(+), 61 deletions(-)

diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc
index fb703be..cde8e98 100644
--- a/proxy/hdrs/HTTP.cc
+++ b/proxy/hdrs/HTTP.cc
@@ -1753,7 +1753,7 @@ class UrlPrintHack
 };
 
 char *
-HTTPHdr::url_string_get(Arena *arena, int *length, bool normalized)
+HTTPHdr::url_string_get(Arena *arena, int *length)
 {
   char *zret = nullptr;
   UrlPrintHack hack(this);
@@ -1763,14 +1763,13 @@ HTTPHdr::url_string_get(Arena *arena, int *length, bool normalized)
     // even uglier but it's less so than duplicating this entire method to
     // change that one thing.
 
-    zret = (arena == USE_HDR_HEAP_MAGIC) ? m_url_cached.string_get_ref(length, normalized) :
-                                           m_url_cached.string_get(arena, length, normalized);
+    zret = (arena == USE_HDR_HEAP_MAGIC) ? m_url_cached.string_get_ref(length) : m_url_cached.string_get(arena, length);
   }
   return zret;
 }
 
 int
-HTTPHdr::url_print(char *buff, int length, int *offset, int *skip, bool normalized)
+HTTPHdr::url_print(char *buff, int length, int *offset, int *skip, unsigned normalization_flags)
 {
   ink_release_assert(offset);
   ink_release_assert(skip);
@@ -1778,18 +1777,18 @@ HTTPHdr::url_print(char *buff, int length, int *offset, int *skip, bool normaliz
   int zret = 0;
   UrlPrintHack hack(this);
   if (hack.is_valid()) {
-    zret = m_url_cached.print(buff, length, offset, skip, normalized);
+    zret = m_url_cached.print(buff, length, offset, skip, normalization_flags);
   }
   return zret;
 }
 
 int
-HTTPHdr::url_printed_length()
+HTTPHdr::url_printed_length(unsigned normalization_flags)
 {
   int zret = -1;
   UrlPrintHack hack(this);
   if (hack.is_valid()) {
-    zret = m_url_cached.length_get();
+    zret = m_url_cached.length_get(normalization_flags);
   }
   return zret;
 }
diff --git a/proxy/hdrs/HTTP.h b/proxy/hdrs/HTTP.h
index 0ae1077..3da31a0 100644
--- a/proxy/hdrs/HTTP.h
+++ b/proxy/hdrs/HTTP.h
@@ -546,9 +546,8 @@ public:
       and invoking @c URL::string_get if the host is in a header
       field and not explicitly in the URL.
    */
-  char *url_string_get(Arena *arena    = nullptr, ///< Arena to use, or @c malloc if NULL.
-                       int *length     = nullptr, ///< Store string length here.
-                       bool normalized = false    ///< Scheme and host normalized to lower case letters.
+  char *url_string_get(Arena *arena = nullptr, ///< Arena to use, or @c malloc if NULL.
+                       int *length  = nullptr  ///< Store string length here.
   );
   /** Get a string with the effective URL in it.
       This is automatically allocated if needed in the request heap.
@@ -562,17 +561,17 @@ public:
       Output is not null terminated.
       @return 0 on failure, non-zero on success.
    */
-  int url_print(char *buff,             ///< Output buffer
-                int length,             ///< Length of @a buffer
-                int *offset,            ///< [in,out] ???
-                int *skip,              ///< [in,out] ???
-                bool normalized = false ///< host/scheme normalized to lower case
+  int url_print(char *buff,                                       ///< Output buffer
+                int length,                                       ///< Length of @a buffer
+                int *offset,                                      ///< [in,out] ???
+                int *skip,                                        ///< [in,out] ???
+                unsigned normalization_flags = URLNormalize::NONE ///< host/scheme normalized to lower case
   );
 
   /** Return the length of the URL that url_print() will create.
       @return -1 on failure, non-negative on success.
    */
-  int url_printed_length();
+  int url_printed_length(unsigned normalizaion_flags = URLNormalize::NONE);
 
   /** Get the URL path.
       This is a reference, not allocated.
diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index 4f7b4e8..70fc177 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -612,40 +612,41 @@ url_clear_string_ref(URLImpl *url)
 }
 
 char *
-url_string_get_ref(HdrHeap *heap, URLImpl *url, int *length, bool normalized)
+url_string_get_ref(HdrHeap *heap, URLImpl *url, int *length, unsigned normalization_flags)
 {
   if (!url) {
     return nullptr;
   }
 
-  if (url->m_ptr_printed_string && url->m_clean) {
+  if (url->m_ptr_printed_string && url->m_clean && (normalization_flags == url->m_normalization_flags)) {
     if (length) {
       *length = url->m_len_printed_string;
     }
     return const_cast<char *>(url->m_ptr_printed_string);
   } else { // either not clean or never printed
-    int len = url_length_get(url);
+    int len = url_length_get(url, normalization_flags);
     char *buf;
     int index  = 0;
     int offset = 0;
 
     /* stuff alloc'd here gets gc'd on HdrHeap::destroy() */
     buf = heap->allocate_str(len + 1);
-    url_print(url, buf, len, &index, &offset, normalized);
+    url_print(url, buf, len, &index, &offset, normalization_flags);
     buf[len] = '\0';
 
     if (length) {
       *length = len;
     }
-    url->m_clean              = true; // reset since we have url_print()'ed again
-    url->m_len_printed_string = len;
-    url->m_ptr_printed_string = buf;
+    url->m_clean               = true; // reset since we have url_print()'ed again
+    url->m_len_printed_string  = len;
+    url->m_ptr_printed_string  = buf;
+    url->m_normalization_flags = normalization_flags;
     return buf;
   }
 }
 
 char *
-url_string_get(URLImpl *url, Arena *arena, int *length, HdrHeap *heap, bool normalized)
+url_string_get(URLImpl *url, Arena *arena, int *length, HdrHeap *heap)
 {
   int len = url_length_get(url);
   char *buf;
@@ -655,7 +656,7 @@ url_string_get(URLImpl *url, Arena *arena, int *length, HdrHeap *heap, bool norm
 
   buf = arena ? arena->str_alloc(len) : static_cast<char *>(ats_malloc(len + 1));
 
-  url_print(url, buf, len, &index, &offset, normalized);
+  url_print(url, buf, len, &index, &offset);
   buf[len] = '\0';
 
   /* see string_get_ref() */
@@ -801,15 +802,19 @@ url_type_get(URLImpl *url)
   -------------------------------------------------------------------------*/
 
 int
-url_length_get(URLImpl *url)
+url_length_get(URLImpl *url, unsigned normalization_flags)
 {
   int length = 0;
 
   if (url->m_ptr_scheme) {
-    if ((url->m_scheme_wks_idx >= 0) && (hdrtoken_index_to_wks(url->m_scheme_wks_idx) == URL_SCHEME_FILE)) {
-      length += url->m_len_scheme + 1; // +1 for ":"
-    } else {
-      length += url->m_len_scheme + 3; // +3 for "://"
+    length += url->m_len_scheme + 3; // +3 for "://"
+
+  } else if (normalization_flags & URLNormalize::IMPLIED_SCHEME) {
+    if (URL_TYPE_HTTP == url->m_url_type) {
+      length += URL_LEN_HTTP + 3;
+
+    } else if (URL_TYPE_HTTPS == url->m_url_type) {
+      length += URL_LEN_HTTPS + 3;
     }
   }
 
@@ -1582,15 +1587,30 @@ url_parse_http_regex(HdrHeap *heap, URLImpl *url, const char **start, const char
  ***********************************************************************/
 
 int
-url_print(URLImpl *url, char *buf_start, int buf_length, int *buf_index_inout, int *buf_chars_to_skip_inout, bool normalize)
+url_print(URLImpl *url, char *buf_start, int buf_length, int *buf_index_inout, int *buf_chars_to_skip_inout,
+          unsigned normalization_flags)
 {
 #define TRY(x) \
   if (!x)      \
   return 0
 
+  bool scheme_added = false;
   if (url->m_ptr_scheme) {
-    TRY((normalize ? mime_mem_print_lc : mime_mem_print)(url->m_ptr_scheme, url->m_len_scheme, buf_start, buf_length,
-                                                         buf_index_inout, buf_chars_to_skip_inout));
+    TRY(((normalization_flags & URLNormalize::LC_SCHEME_HOST) ? mime_mem_print_lc : mime_mem_print)(
+      url->m_ptr_scheme, url->m_len_scheme, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
+    scheme_added = true;
+
+  } else if (normalization_flags & URLNormalize::IMPLIED_SCHEME) {
+    if (URL_TYPE_HTTP == url->m_url_type) {
+      TRY(mime_mem_print(URL_SCHEME_HTTP, URL_LEN_HTTP, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
+      scheme_added = true;
+
+    } else if (URL_TYPE_HTTPS == url->m_url_type) {
+      TRY(mime_mem_print(URL_SCHEME_HTTPS, URL_LEN_HTTPS, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
+      scheme_added = true;
+    }
+  }
+  if (scheme_added) {
     TRY(mime_mem_print("://", 3, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
   }
 
@@ -1612,8 +1632,8 @@ url_print(URLImpl *url, char *buf_start, int buf_length, int *buf_index_inout, i
     if (bracket_p) {
       TRY(mime_mem_print("[", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
     }
-    TRY((normalize ? mime_mem_print_lc : mime_mem_print)(url->m_ptr_host, url->m_len_host, buf_start, buf_length, buf_index_inout,
-                                                         buf_chars_to_skip_inout));
+    TRY(((normalization_flags & URLNormalize::LC_SCHEME_HOST) ? mime_mem_print_lc : mime_mem_print)(
+      url->m_ptr_host, url->m_len_host, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
     if (bracket_p) {
       TRY(mime_mem_print("]", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
     }
diff --git a/proxy/hdrs/URL.h b/proxy/hdrs/URL.h
index ab8b36c..d31107e 100644
--- a/proxy/hdrs/URL.h
+++ b/proxy/hdrs/URL.h
@@ -76,7 +76,8 @@ struct URLImpl : public HdrHeapObjImpl {
   uint32_t m_clean : 1;
   /// Whether the URI had an absolutely empty path, not even an initial '/'.
   uint32_t m_path_is_empty : 1;
-  // 8 bytes + 2 bits, will result in padding
+  uint32_t m_normalization_flags : 2; // Only valid if both m_clean and m_ptr_printed_sting are non-zero.
+  // 8 bytes + 4 bits, will result in padding
 
   // Marshaling Functions
   int marshal(MarshalXlate *str_xlate, int num_xlate);
@@ -166,13 +167,22 @@ URLImpl *url_copy(URLImpl *s_url, HdrHeap *s_heap, HdrHeap *d_heap, bool inherit
 void url_copy_onto(URLImpl *s_url, HdrHeap *s_heap, URLImpl *d_url, HdrHeap *d_heap, bool inherit_strs = true);
 void url_copy_onto_as_server_url(URLImpl *s_url, HdrHeap *s_heap, URLImpl *d_url, HdrHeap *d_heap, bool inherit_strs = true);
 
-int url_print(URLImpl *u, char *buf, int bufsize, int *bufindex, int *dumpoffset, bool normalized = false);
+// Normalization flag masks.
+namespace URLNormalize
+{
+unsigned const NONE           = 0;
+unsigned const IMPLIED_SCHEME = 1; // If scheme missing, add scheme implied by URL type.
+unsigned const LC_SCHEME_HOST = 2; // Force scheme and host to lower case if necessary.
+};                                 // namespace URLNormalize
+
+int url_print(URLImpl *u, char *buf, int bufsize, int *bufindex, int *dumpoffset,
+              unsigned normalization_flags = URLNormalize::NONE);
 void url_describe(HdrHeapObjImpl *raw, bool recurse);
 
-int url_length_get(URLImpl *url);
-char *url_string_get(URLImpl *url, Arena *arena, int *length, HdrHeap *heap, bool normalized = false);
+int url_length_get(URLImpl *url, unsigned normalization_flags = URLNormalize::NONE);
+char *url_string_get(URLImpl *url, Arena *arena, int *length, HdrHeap *heap);
 void url_clear_string_ref(URLImpl *url);
-char *url_string_get_ref(HdrHeap *heap, URLImpl *url, int *length, bool normalized = false);
+char *url_string_get_ref(HdrHeap *heap, URLImpl *url, int *length, unsigned normalization_flags = URLNormalize::NONE);
 void url_called_set(URLImpl *url);
 char *url_string_get_buf(URLImpl *url, char *dstbuf, int dstbuf_size, int *length);
 
@@ -245,14 +255,14 @@ public:
   // Note that URL::destroy() is inherited from HdrHeapSDKHandle.
   void nuke_proxy_stuff();
 
-  int print(char *buf, int bufsize, int *bufindex, int *dumpoffset, bool normalized = false) const;
+  int print(char *buf, int bufsize, int *bufindex, int *dumpoffset, unsigned normalization_flags = URLNormalize::NONE) const;
 
-  int length_get() const;
+  int length_get(unsigned normalization_flags = URLNormalize::NONE) const;
 
   void clear_string_ref();
 
-  char *string_get(Arena *arena, int *length = nullptr, bool normalized = false) const;
-  char *string_get_ref(int *length = nullptr, bool normalized = false) const;
+  char *string_get(Arena *arena, int *length = nullptr) const;
+  char *string_get_ref(int *length = nullptr, unsigned normalization_flags = URLNormalize::NONE) const;
   char *string_get_buf(char *dstbuf, int dsbuf_size, int *length = nullptr) const;
   void hash_get(CryptoHash *hash, cache_generation_t generation = -1) const;
   void host_hash_get(CryptoHash *hash) const;
@@ -415,37 +425,37 @@ URL::nuke_proxy_stuff()
   -------------------------------------------------------------------------*/
 
 inline int
-URL::print(char *buf, int bufsize, int *bufindex, int *dumpoffset, bool normalized) const
+URL::print(char *buf, int bufsize, int *bufindex, int *dumpoffset, unsigned normalization_flags) const
 {
   ink_assert(valid());
-  return url_print(m_url_impl, buf, bufsize, bufindex, dumpoffset, normalized);
+  return url_print(m_url_impl, buf, bufsize, bufindex, dumpoffset, normalization_flags);
 }
 
 /*-------------------------------------------------------------------------
   -------------------------------------------------------------------------*/
 
 inline int
-URL::length_get() const
+URL::length_get(unsigned normalization_flags) const
 {
   ink_assert(valid());
-  return url_length_get(m_url_impl);
+  return url_length_get(m_url_impl, normalization_flags);
 }
 
 /*-------------------------------------------------------------------------
   -------------------------------------------------------------------------*/
 
 inline char *
-URL::string_get(Arena *arena_or_null_for_malloc, int *length, bool normalized) const
+URL::string_get(Arena *arena_or_null_for_malloc, int *length) const
 {
   ink_assert(valid());
-  return url_string_get(m_url_impl, arena_or_null_for_malloc, length, m_heap, normalized);
+  return url_string_get(m_url_impl, arena_or_null_for_malloc, length, m_heap);
 }
 
 inline char *
-URL::string_get_ref(int *length, bool normalized) const
+URL::string_get_ref(int *length, unsigned normalization_flags) const
 {
   ink_assert(valid());
-  return url_string_get_ref(m_heap, m_url_impl, length, normalized);
+  return url_string_get_ref(m_heap, m_url_impl, length, normalization_flags);
 }
 
 inline void
diff --git a/proxy/http/remap/NextHopConsistentHash.cc b/proxy/http/remap/NextHopConsistentHash.cc
index c14c971..d1d2b0f 100644
--- a/proxy/http/remap/NextHopConsistentHash.cc
+++ b/proxy/http/remap/NextHopConsistentHash.cc
@@ -134,7 +134,7 @@ NextHopConsistentHash::getHashKey(uint64_t sm_id, HttpRequestData *hrdata, ATSHa
   // calculate a hash using the selected config.
   switch (hash_key) {
   case NH_URL_HASH_KEY:
-    url_string_ref = url->string_get_ref(&len, true);
+    url_string_ref = url->string_get_ref(&len, URLNormalize::LC_SCHEME_HOST);
     if (url_string_ref && len > 0) {
       h->update(url_string_ref, len);
       NH_Debug(NH_DEBUG_TAG, "[%" PRIu64 "] url hash string: %s", sm_id, url_string_ref);
diff --git a/src/traffic_server/InkAPI.cc b/src/traffic_server/InkAPI.cc
index 770477b..fae6fc8 100644
--- a/src/traffic_server/InkAPI.cc
+++ b/src/traffic_server/InkAPI.cc
@@ -5220,7 +5220,7 @@ TSHttpHdrEffectiveUrlBufGet(TSMBuffer hdr_buf, TSMLoc hdr_loc, char *buf, int64_
     return TS_ERROR;
   }
 
-  int url_length = buf_handle->url_printed_length();
+  int url_length = buf_handle->url_printed_length(URLNormalize::LC_SCHEME_HOST | URLNormalize::IMPLIED_SCHEME);
 
   sdk_assert(url_length >= 0);
 
@@ -5233,7 +5233,7 @@ TSHttpHdrEffectiveUrlBufGet(TSMBuffer hdr_buf, TSMLoc hdr_loc, char *buf, int64_
     int index  = 0;
     int offset = 0;
 
-    buf_handle->url_print(buf, size, &index, &offset, true);
+    buf_handle->url_print(buf, size, &index, &offset, URLNormalize::LC_SCHEME_HOST | URLNormalize::IMPLIED_SCHEME);
   }
 
   return TS_SUCCESS;
diff --git a/tests/gold_tests/pluginTest/tsapi/log.gold b/tests/gold_tests/pluginTest/tsapi/log.gold
index b12f447..c157e81 100644
--- a/tests/gold_tests/pluginTest/tsapi/log.gold
+++ b/tests/gold_tests/pluginTest/tsapi/log.gold
@@ -40,14 +40,14 @@ TSUrlPortGet():  SERVER_PORT
 Global: event=TS_EVENT_HTTP_SEND_REQUEST_HDR
 TSHttpTxnEffectiveUrlStringGet():  http://127.0.0.1:SERVER_PORT/
 Request To Server:
-TSHttpHdrEffectiveUrlBufGet():  127.0.0.1:SERVER_PORT/
+TSHttpHdrEffectiveUrlBufGet():  http://127.0.0.1:SERVER_PORT/
 TSUrlSchemeGet():  http
 TSUrlRawSchemeGet():  failed to get raw URL scheme
 TSUrlPortGet():  80
 Transaction: event=TS_EVENT_HTTP_SEND_REQUEST_HDR
 TSHttpTxnEffectiveUrlStringGet():  http://127.0.0.1:SERVER_PORT/
 Request To Server:
-TSHttpHdrEffectiveUrlBufGet():  127.0.0.1:SERVER_PORT/
+TSHttpHdrEffectiveUrlBufGet():  http://127.0.0.1:SERVER_PORT/
 TSUrlSchemeGet():  http
 TSUrlRawSchemeGet():  failed to get raw URL scheme
 TSUrlPortGet():  80
@@ -81,14 +81,14 @@ TSUrlPortGet():  SERVER_PORT
 Global: event=TS_EVENT_HTTP_SEND_REQUEST_HDR
 TSHttpTxnEffectiveUrlStringGet():  http://127.0.0.1:SERVER_PORT/xYz
 Request To Server:
-TSHttpHdrEffectiveUrlBufGet():  127.0.0.1:SERVER_PORT/xYz
+TSHttpHdrEffectiveUrlBufGet():  http://127.0.0.1:SERVER_PORT/xYz
 TSUrlSchemeGet():  http
 TSUrlRawSchemeGet():  failed to get raw URL scheme
 TSUrlPortGet():  80
 Transaction: event=TS_EVENT_HTTP_SEND_REQUEST_HDR
 TSHttpTxnEffectiveUrlStringGet():  http://127.0.0.1:SERVER_PORT/xYz
 Request To Server:
-TSHttpHdrEffectiveUrlBufGet():  127.0.0.1:SERVER_PORT/xYz
+TSHttpHdrEffectiveUrlBufGet():  http://127.0.0.1:SERVER_PORT/xYz
 TSUrlSchemeGet():  http
 TSUrlRawSchemeGet():  failed to get raw URL scheme
 TSUrlPortGet():  80
@@ -122,14 +122,14 @@ TSUrlPortGet():  SERVER_PORT
 Global: event=TS_EVENT_HTTP_SEND_REQUEST_HDR
 TSHttpTxnEffectiveUrlStringGet():  http://127.0.0.1:SERVER_PORT/
 Request To Server:
-TSHttpHdrEffectiveUrlBufGet():  127.0.0.1:SERVER_PORT/
+TSHttpHdrEffectiveUrlBufGet():  http://127.0.0.1:SERVER_PORT/
 TSUrlSchemeGet():  http
 TSUrlRawSchemeGet():  failed to get raw URL scheme
 TSUrlPortGet():  80
 Transaction: event=TS_EVENT_HTTP_SEND_REQUEST_HDR
 TSHttpTxnEffectiveUrlStringGet():  http://127.0.0.1:SERVER_PORT/
 Request To Server:
-TSHttpHdrEffectiveUrlBufGet():  127.0.0.1:SERVER_PORT/
+TSHttpHdrEffectiveUrlBufGet():  http://127.0.0.1:SERVER_PORT/
 TSUrlSchemeGet():  http
 TSUrlRawSchemeGet():  failed to get raw URL scheme
 TSUrlPortGet():  80