You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by am...@apache.org on 2012/05/28 07:15:23 UTC

[4/5] git commit: Fixes for handling Host field.

Fixes for handling Host field.


Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/ebc4c1c3
Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/ebc4c1c3
Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/ebc4c1c3

Branch: refs/heads/master
Commit: ebc4c1c3cd4d9b475a5d0e9980ee34120d7e1ffe
Parents: 86631b8
Author: Alan M. Carroll <am...@network-geographics.com>
Authored: Sun May 27 21:58:16 2012 -0500
Committer: Alan M. Carroll <am...@network-geographics.com>
Committed: Sun May 27 21:58:16 2012 -0500

----------------------------------------------------------------------
 lib/ts/TsBuffer.h          |   30 ++++-
 proxy/hdrs/HTTP.cc         |   18 ++--
 proxy/hdrs/HdrTest.cc      |   21 ++++
 proxy/hdrs/MIME.cc         |   56 ++++++++++
 proxy/hdrs/MIME.h          |   11 ++
 proxy/hdrs/URL.cc          |  229 ++++++---------------------------------
 proxy/http/HttpTransact.cc |   17 ++--
 7 files changed, 162 insertions(+), 220 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/lib/ts/TsBuffer.h
----------------------------------------------------------------------
diff --git a/lib/ts/TsBuffer.h b/lib/ts/TsBuffer.h
index d784c02..dbcd72e 100644
--- a/lib/ts/TsBuffer.h
+++ b/lib/ts/TsBuffer.h
@@ -40,6 +40,10 @@ namespace ts {
   struct ConstBuffer;
   /** A chunk of writable memory.
       A convenience class because we pass this kind of pair frequently.
+
+      @note The default construct leaves the object
+      uninitialized. This is for performance reasons. To construct an
+      empty @c Buffer use @c Buffer(0).
    */
   struct Buffer {
     typedef Buffer self; ///< Self reference type.
@@ -52,13 +56,18 @@ namespace ts {
     /// Elements are in uninitialized state.
     Buffer();
 
-    /// Construct from pointer and size.
+    /** Construct from pointer and size.
+	@note Due to ambiguity issues do not call this with
+	two arguments if the first argument is 0.
+     */
     Buffer(
       char* ptr, ///< Pointer to buffer.
       size_t n = 0 ///< Size of buffer.
     );
     /** Construct from two pointers.
 	@note This presumes a half open range, (start, end]
+	@note Due to ambiguity issues do not invoke this with
+	@a start == 0.
     */
     Buffer(
 	   char* start, ///< First valid character.
@@ -121,6 +130,10 @@ namespace ts {
 
   /** A chunk of read only memory.
       A convenience class because we pass this kind of pair frequently.
+
+      @note The default construct leaves the object
+      uninitialized. This is for performance reasons. To construct an
+      empty @c Buffer use @c Buffer(0).
    */
   struct ConstBuffer {
     typedef ConstBuffer self; ///< Self reference type.
@@ -133,13 +146,18 @@ namespace ts {
     /// Elements are in uninitialized state.
     ConstBuffer();
 
-    /// Construct from pointer and size.
+    /** Construct from pointer and size.
+	@note Due to ambiguity issues do not call this with
+	two arguments if the first argument is 0.
+     */
     ConstBuffer(
       char const * ptr, ///< Pointer to buffer.
       size_t n = 0///< Size of buffer.
     );
     /** Construct from two pointers.
 	@note This presumes a half open range (start, end]
+	@note Due to ambiguity issues do not invoke this with
+	@a start == 0.
     */
     ConstBuffer(
 	   char const* start, ///< First valid character.
@@ -238,10 +256,10 @@ namespace ts {
         This is convenient when tokenizing and @a p points at the token
         separator.
 
-        @note If @a *p is not a character in the buffer then @a this
-        is not changed and an empty buffer is returned. This means the
-        caller can simply pass the result of @c find and check for an
-        empty buffer returned to detect no more separators.
+        @note If @a *p is in the buffer then @a this is not changed
+        and an empty buffer is returned. This means the caller can
+        simply pass the result of @c find and check for an empty
+        buffer returned to detect no more separators.
 
         @return A buffer containing data up to but not including @a p.
     */

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/proxy/hdrs/HTTP.cc
----------------------------------------------------------------------
diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc
index 2ccad25..5b3d0ed 100644
--- a/proxy/hdrs/HTTP.cc
+++ b/proxy/hdrs/HTTP.cc
@@ -1462,6 +1462,8 @@ void
 HTTPHdr::_fill_target_cache() const
 {
   URL* url = this->url_get();
+  char const* port_ptr;
+
   m_target_in_url = false;
   m_port_in_header = false;
   m_host_mime = NULL;
@@ -1471,18 +1473,12 @@ HTTPHdr::_fill_target_cache() const
     m_port = url->port_get();
     m_port_in_header = 0 != url->port_get_raw();
     m_host_mime = NULL;
-  } else if (0 != (m_host_mime = const_cast<HTTPHdr*>(this)->field_find(MIME_FIELD_HOST, MIME_LEN_HOST))) {
-    // Check for port in the host.
-    char const* colon = static_cast<char const*>(memchr(m_host_mime->m_ptr_value, ':', m_host_mime->m_len_value));
-    
-    if (colon) {
+  } else if (0 != (m_host_mime = const_cast<HTTPHdr*>(this)->get_host_port_values(0, &m_host_length, &port_ptr, 0))) {
+    if (port_ptr) {
       m_port = 0;
-      m_host_length = colon - m_host_mime->m_ptr_value; // Length of just the host in the value.
-      for ( ++colon ; is_digit(*colon) ; ++colon )
-        m_port = m_port * 10 + *colon - '0';
-      m_port_in_header = 0 != m_port;
-    } else {
-      m_host_length = m_host_mime->m_len_value;
+      for ( ; is_digit(*port_ptr) ; ++port_ptr )
+        m_port = m_port * 10 + *port_ptr - '0';
+      m_port_in_header = (0 != m_port);
     }
     m_port = url_canonicalize_port(url->m_url_impl->m_url_type, m_port);
   }

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/proxy/hdrs/HdrTest.cc
----------------------------------------------------------------------
diff --git a/proxy/hdrs/HdrTest.cc b/proxy/hdrs/HdrTest.cc
index 6fa7628..2e4f2a5 100644
--- a/proxy/hdrs/HdrTest.cc
+++ b/proxy/hdrs/HdrTest.cc
@@ -407,6 +407,7 @@ HdrTest::test_url()
     "http://172.16.28.101/",
     "http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/",
 
+
     "foo:bar@some.place",
     "foo:bar@some.place/",
     "http://foo:bar@some.place",
@@ -423,12 +424,17 @@ HdrTest::test_url()
   static int nstrs = sizeof(strs) / sizeof(strs[0]);
 
   static char const* bad[] = {
+/*
     "http://[1:2:3:4:5:6:7:8:9]",
     "http://1:2:3:4:5:6:7:8:A:B",
     "http://bob.com[::1]",
     "http://[::1].com"
     "http://foo:bar:baz@bob.com/",
     "http://foo:bar:baz@[::1]:8080/",
+    "http://]",
+    "http://:",
+*/
+    "http:/"
   };
   static int nbad = sizeof(bad) / sizeof(bad[0]);
 
@@ -499,6 +505,21 @@ HdrTest::test_url()
     }
   }
 
+#if 0
+  if (!failed) {
+    Note("URL performance test start");
+    for (int j = 0 ; j < 100000 ; ++j) {
+      for (i = 0 ; i < nstrs ; ++i) {
+        char const* x = strs[i];
+        url.create(NULL);
+        err = url.parse(x, strlen(x));
+        url.destroy();
+      }
+    }
+    Note("URL performance test end");
+  }
+#endif
+
   return (failures_to_status("test_url", failed));
 }
 

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/proxy/hdrs/MIME.cc
----------------------------------------------------------------------
diff --git a/proxy/hdrs/MIME.cc b/proxy/hdrs/MIME.cc
index b175ff6..30d2fd8 100644
--- a/proxy/hdrs/MIME.cc
+++ b/proxy/hdrs/MIME.cc
@@ -2102,6 +2102,62 @@ mime_field_value_append(HdrHeap *heap, MIMEHdrImpl *mh, MIMEField *field, const
     mh->recompute_cooked_stuff(field);
 }
 
+MIMEField* MIMEHdr::get_host_port_values(
+  char const** host_ptr, ///< Pointer to host.
+  int* host_len, ///< Length of host.
+  char const** port_ptr, ///< Pointer to port.
+  int* port_len
+  )
+{
+  MIMEField* field = this->field_find(MIME_FIELD_HOST, MIME_LEN_HOST);
+  if (host_ptr)
+    *host_ptr = 0;
+  if (host_len)
+    *host_len = 0;
+  if (port_ptr)
+    *port_ptr = 0;
+  if (port_len)
+    port_len = 0;
+
+  if (field) {
+    ts::ConstBuffer b(field->m_ptr_value, field->m_len_value);
+    ts::ConstBuffer host(0), port(0);
+
+    if (b) {
+      char const* x;
+
+      if ('[' == *b) {
+        x = static_cast<char const*>(memchr(b._ptr, ']', b._size));
+        if (x && b.contains(x+1) && ':' == x[1]) {
+          host = b.splitOn(x+1);
+          port = b;
+        } else {
+          host = b;
+        }
+      } else {
+        x = static_cast<char const*>(memrchr(b._ptr, ':', b._size));
+        if (x) {
+          host = b.splitOn(x);
+          port = b;
+        } else {
+          host = b;
+        }
+      }
+      
+      if (host) {
+        if (host_ptr) *host_ptr = host._ptr;
+        if (host_len) *host_len = static_cast<int>(host._size);
+      }
+      if (port) {
+        if (port_ptr) *port_ptr = port._ptr;
+        if (port_len) *port_len = static_cast<int>(port._size);
+      }
+    } else {
+      field = 0; // no value in field, signal fail.
+    }
+  }
+  return field;
+}
 
 /***********************************************************************
  *                                                                     *

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/proxy/hdrs/MIME.h
----------------------------------------------------------------------
diff --git a/proxy/hdrs/MIME.h b/proxy/hdrs/MIME.h
index 058e88c..9664e27 100644
--- a/proxy/hdrs/MIME.h
+++ b/proxy/hdrs/MIME.h
@@ -933,6 +933,17 @@ public:
   int32_t get_cooked_cc_min_fresh();
   bool get_cooked_pragma_no_cache();
 
+  /** Get the value of the host field.
+      This parses the host field for brackets and port value.
+      @return The mime HOST field if it has a value, @c NULL otherwise.
+  */
+  MIMEField* get_host_port_values(
+			    char const** host_ptr, ///< [out] Pointer to host.
+			    int* host_len, ///< [out] Length of host.
+			    char const** port_ptr, ///< [out] Pointer to port.
+			    int* port_len ///< [out] Length of port.
+			    );
+
   void set_cooked_cc_need_revalidate_once();
   void unset_cooked_cc_need_revalidate_once();
 

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/proxy/hdrs/URL.cc
----------------------------------------------------------------------
diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index 17683c2..9085b4b 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -1213,206 +1213,49 @@ eof:
   @endverbatim
 
 */
-MIMEParseResult
-url_parse_internet(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings)
-{
-  const char *cur;
-  const char *user_start = NULL;
-  const char *user_end = NULL;
-  const char *pass_start = NULL;
-  const char *pass_end = NULL;
-  const char *host_start = NULL;
-  const char *host_end = NULL;
-  const char *port_start = NULL;
-  const char *port_end = NULL;
-  const unsigned char user_offset = 0x3d;
-  const unsigned char user_mask = ((((unsigned char) ':') + user_offset) &
-                                   (((unsigned char) '@') + user_offset) & (((unsigned char) '/') + user_offset));
-  const unsigned char pass_offset = 0x10;
-  const unsigned char pass_mask = ((((unsigned char) '@') + pass_offset) & (((unsigned char) '/') + pass_offset));
-
-  cur = *start;
-
-  // fast case starts with "://"
-  if ((end - cur > 3) && (((cur[0] ^ ':') | (cur[1] ^ '/') | (cur[2] ^ '/')) == 0)) {
-    cur += 3;
-    goto parse_user1;
-  }
-  if (*cur != ':') {
-    goto parse_user1;
-  }
-  GETNEXT(eof);
-  if (*cur != '/') {
-    goto parse_user1;
-  }
-  GETNEXT(eof);
-  if (*cur != '/') {
-    goto parse_user1;
-  }
-  GETNEXT(eof);
-
-parse_user1:
-  user_start = cur;
-parse_user2:
-  if (((((unsigned char) *cur) + user_offset) & user_mask) == user_mask) {
-    if (*cur == ':') {
-      user_end = cur;
-      GETNEXT(done);
-      goto parse_pass1;
-    }
-    if (*cur == '@') {
-      user_end = cur;
-      GETNEXT(done);
-      goto parse_host1;
-    }
-    if (*cur == '/') {
-      host_start = user_start;
-      host_end = cur;
-      user_start = NULL;
-
-      GETNEXT(done);
-      goto done;
-    }
-  } else {
-    ink_debug_assert((*cur != ':') && (*cur != '@') && (*cur != '/'));
-  }
-  GETNEXT(done);
-  goto parse_user2;
-
-parse_pass1:
-  pass_start = cur;
-parse_pass2:
-  if (((((unsigned char) *cur) + pass_offset) & pass_mask) == pass_mask) {
-    if (*cur == '/') {
-      host_start = user_start;
-      host_end = user_end;
-      user_start = NULL;
-      user_end = NULL;
-
-      port_start = pass_start;
-      port_end = cur;
-      pass_start = NULL;
-
-      GETNEXT(done);
-      goto done;
-    }
-    if (*cur == '@') {
-      pass_end = cur;
-      GETNEXT(done);
-      goto parse_host1;
-    }
-  } else {
-    ink_debug_assert((*cur != '/') && (*cur != '@'));
-  }
-
-  GETNEXT(done);
-  goto parse_pass2;
-
-parse_host1:
-  host_start = cur;
-parse_host2:
-  if (*cur == ':') {
-    host_end = cur;
-    GETNEXT(done);
-    goto parse_port1;
-  }
-  if (*cur == '/') {
-    host_end = cur;
-    GETNEXT(done);
-    goto done;
-  }
-  GETNEXT(done);
-  goto parse_host2;
-
-parse_port1:
-  port_start = cur;
-parse_port2:
-  if (*cur == '/') {
-    port_end = cur;
-    GETNEXT(done);
-    goto done;
-  }
-  GETNEXT(done);
-  goto parse_port2;
-
-done:
-  if (!host_start) {
-    host_start = user_start;
-    host_end = user_end;
-    user_start = NULL;
-    user_end = NULL;
-
-    port_start = pass_start;
-    port_end = pass_end;
-    pass_start = NULL;
-    pass_end = NULL;
-  }
-
-  if (user_start) {
-    if (!user_end)
-      user_end = cur;
-    url_user_set(heap, url, user_start, user_end - user_start, copy_strings);
-  }
-  if (pass_start) {
-    if (!pass_end)
-      pass_end = cur;
-    url_password_set(heap, url, pass_start, pass_end - pass_start, copy_strings);
-  }
-  if (host_start) {
-    if (!host_end)
-      host_end = cur;
-    url_host_set(heap, url, host_start, host_end - host_start, copy_strings);
-  }
-  if (port_start) {
-    if (!port_end)
-      port_end = cur;
-    url_port_set(heap, url, port_start, port_end - port_start, copy_strings);
-  }
-
-  *start = cur;
-  return PARSE_DONE;
-
-eof:
-  return PARSE_ERROR;
-}
 
 MIMEParseResult
-amc_url_parse_internet(HdrHeap* heap, URLImpl* url,
+url_parse_internet(HdrHeap* heap, URLImpl* url,
                        char const ** start, char const *end,
                        bool copy_strings_p)
 {
-  ts::ConstBuffer cur(*start, end);
-  char const* point; // last point of interest.
+  char const* cur = *start;
+  char const* base; // Base for host/port field.
   char const* bracket = 0; // marker for open bracket, if any.
   ts::ConstBuffer user(0), passw(0), host(0), port(0);
   static size_t const MAX_COLON = 8; // max # of valid colons.
   size_t n_colon = 0;
-  char const* last_colon; // pointer to last colon seen.
+  char const* last_colon = 0; // pointer to last colon seen.
 
   // Do a quick check for "://"
-  if (cur._size > 3 &&
+  if (end - cur > 3 &&
       (((':' ^ *cur) | ('/' ^ cur[1]) | ('/' ^ cur[2])) == 0)) {
     cur += 3;
-  } else if (':' == *cur && ((++cur)._size == 0 ||
-                             ('/' == *cur && ((++cur)._size == 0 ||
-                                              ('/' == *cur && (++cur)._size == 0))))) {
+  } else if (':' == *cur && (++cur >= end ||
+                             ('/' == *cur && (++cur >= end ||
+                                              ('/' == *cur && ++cur >= end))))) {
     return PARSE_ERROR;
   }
+  base = cur;
   // skipped leading stuff, start real parsing.
-  point = cur._ptr;
-  while (cur._size) {
+  while (cur < end) {
     // Note: Each case is responsible for incrementing @a cur if
     // appropriate!
-    char c = *cur;
-    switch (c) {
+    switch (*cur) {
     case ']' : // address close
       if (0 == bracket || n_colon >= MAX_COLON)
         return PARSE_ERROR;
-      host.set(bracket+1, cur._ptr);
       ++cur;
-      // This must constitute the entire host so the next character must be
-      // missing (EOS), slash, or colon.
-      if (cur._size == 0 || '/' == *cur) { // done which is OK
+      /* We keep the brackets because there are too many other places
+         that depend on them and it's too painful to keep track if
+         they should be used. I thought about being clever with
+         stripping brackets from non-IPv6 content but that gets ugly
+         as well. Just not worth it.
+       */
+      host.set(bracket, cur);
+      // Spec requires This constitute the entire host so the next
+      // character must be missing (EOS), slash, or colon.
+      if (cur >= end || '/' == *cur) { // done which is OK
         last_colon = 0;
         break;
       } else if (':' != *cur) { // otherwise it must be a colon
@@ -1426,31 +1269,31 @@ amc_url_parse_internet(HdrHeap* heap, URLImpl* url,
     case ':' : // track colons, fail if too many.
       if (++n_colon > MAX_COLON)
         return PARSE_ERROR;
-      last_colon = cur._ptr;
+      last_colon = cur;
       ++cur;
       break;
     case '@' : // user/password marker.
       if (user || n_colon > 1)
         return PARSE_ERROR; // we already got one, or too many colons.
       if (n_colon) {
-        user.set(point, last_colon);
-        passw.set(last_colon+1, cur._ptr);
+        user.set(base, last_colon);
+        passw.set(last_colon+1, cur);
         n_colon= 0;
         last_colon = 0;
       } else {
-        user.set(point, cur._ptr);
+        user.set(base, cur);
       }
       ++cur;
-      point = cur._ptr; // interesting stuff is past '@'
+      base = cur;
       break;
     case '[' : // address open
-      if (bracket || point != cur._ptr) // must be first char in field
+      if (bracket || base != cur) // must be first char in field
         return PARSE_ERROR;
-      bracket = cur._ptr; // location and flag.
+      bracket = cur; // location and flag.
       ++cur;
       break;
     case '/' : // we're done with this phase.
-      cur._size = 0; // cause loop exit
+      end = cur; // cause loop exit
       break;
     default:
       ++cur;
@@ -1469,9 +1312,9 @@ amc_url_parse_internet(HdrHeap* heap, URLImpl* url,
   // @a host not set means no brackets to mark explicit host.
   if (!host) {
     if (1 == n_colon || MAX_COLON == n_colon) { // presume port.
-      host.set(point, last_colon);
+      host.set(base, last_colon);
     } else { // it's all host.
-      host.set(point, cur._ptr);
+      host.set(base, cur);
       last_colon = 0; // prevent port setting.
     }
   }
@@ -1480,13 +1323,13 @@ amc_url_parse_internet(HdrHeap* heap, URLImpl* url,
   
   if (last_colon) {
     ink_debug_assert(n_colon);
-    port.set(last_colon+1, cur._ptr);
+    port.set(last_colon+1, cur);
     if (!port._size)
       return PARSE_ERROR; // colon w/o port value.
     url_port_set(heap, url, port._ptr, port._size, copy_strings_p);
   }
-  if ('/' == *cur) ++cur._ptr;
-  *start = cur._ptr;
+  if ('/' == *cur) ++cur; // must do this after filling in host/port.
+  *start = cur;
   return PARSE_DONE;
 }
 /*-------------------------------------------------------------------------
@@ -1509,7 +1352,7 @@ url_parse_http(HdrHeap * heap, URLImpl * url, const char **start, const char *en
   const char *fragment_end = NULL;
   char mask;
 
-  err = amc_url_parse_internet(heap, url, start, end, copy_strings);
+  err = url_parse_internet(heap, url, start, end, copy_strings);
   if (err < 0)
     return err;
 
@@ -1668,7 +1511,7 @@ url_print(URLImpl * url, char *buf_start, int buf_length, int *buf_index_inout,
     // Force brackets for IPv6. Note colon must occur in first 5 characters.
     // But it can be less (e.g. "::1").
     int n = url->m_len_host;
-    bool bracket_p = (0 != memchr(url->m_ptr_host, ':', n > 5 ? 5 : n));
+    bool bracket_p = '[' != *url->m_ptr_host && (0 != memchr(url->m_ptr_host, ':', n > 5 ? 5 : n));
     if (bracket_p)
       TRY(mime_mem_print("[", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
     TRY(mime_mem_print(url->m_ptr_host, url->m_len_host,

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/proxy/http/HttpTransact.cc
----------------------------------------------------------------------
diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc
index 6a1e69e..43eff68 100644
--- a/proxy/http/HttpTransact.cc
+++ b/proxy/http/HttpTransact.cc
@@ -1797,18 +1797,15 @@ HttpTransact::DecideCacheLookup(State* s)
       // We could a) have 6000 alts (barf, puke, vomit) or b) use the original
       // host header in the url before doing all cache actions (lookups, writes, etc.)
       if (s->txn_conf->maintain_pristine_host_hdr) {
+        char const* host_hdr;
+        char const* port_hdr;
+        int host_len, port_len;
         // So, the host header will have the original host header.
-        int host_len;
-        const char *host_hdr = incoming_request->value_get(MIME_FIELD_HOST, MIME_LEN_HOST, &host_len);
-
-        if (host_hdr) {
-          char *tmp;
+        if (incoming_request->get_host_port_values(&host_hdr, &host_len, &port_hdr, &port_len)) {
           int port = 0;
-          tmp = (char *) memchr(host_hdr, ':', host_len);
-          if (tmp) {
-            s->cache_info.lookup_url->host_set(host_hdr, (tmp - host_hdr));
-
-            port = ink_atoi(tmp + 1, host_len - (tmp + 1 - host_hdr));
+          if (port_hdr) {
+            s->cache_info.lookup_url->host_set(host_hdr, host_len);
+            port = ink_atoi(port_hdr, port_len);
           } else {
             s->cache_info.lookup_url->host_set(host_hdr, host_len);
           }