You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by am...@apache.org on 2012/05/28 07:15:23 UTC
[4/5] git commit: Fixes for handling Host field.
Fixes for handling Host field.
Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/ebc4c1c3
Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/ebc4c1c3
Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/ebc4c1c3
Branch: refs/heads/master
Commit: ebc4c1c3cd4d9b475a5d0e9980ee34120d7e1ffe
Parents: 86631b8
Author: Alan M. Carroll <am...@network-geographics.com>
Authored: Sun May 27 21:58:16 2012 -0500
Committer: Alan M. Carroll <am...@network-geographics.com>
Committed: Sun May 27 21:58:16 2012 -0500
----------------------------------------------------------------------
lib/ts/TsBuffer.h | 30 ++++-
proxy/hdrs/HTTP.cc | 18 ++--
proxy/hdrs/HdrTest.cc | 21 ++++
proxy/hdrs/MIME.cc | 56 ++++++++++
proxy/hdrs/MIME.h | 11 ++
proxy/hdrs/URL.cc | 229 ++++++---------------------------------
proxy/http/HttpTransact.cc | 17 ++--
7 files changed, 162 insertions(+), 220 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/lib/ts/TsBuffer.h
----------------------------------------------------------------------
diff --git a/lib/ts/TsBuffer.h b/lib/ts/TsBuffer.h
index d784c02..dbcd72e 100644
--- a/lib/ts/TsBuffer.h
+++ b/lib/ts/TsBuffer.h
@@ -40,6 +40,10 @@ namespace ts {
struct ConstBuffer;
/** A chunk of writable memory.
A convenience class because we pass this kind of pair frequently.
+
+ @note The default construct leaves the object
+ uninitialized. This is for performance reasons. To construct an
+ empty @c Buffer use @c Buffer(0).
*/
struct Buffer {
typedef Buffer self; ///< Self reference type.
@@ -52,13 +56,18 @@ namespace ts {
/// Elements are in uninitialized state.
Buffer();
- /// Construct from pointer and size.
+ /** Construct from pointer and size.
+ @note Due to ambiguity issues do not call this with
+ two arguments if the first argument is 0.
+ */
Buffer(
char* ptr, ///< Pointer to buffer.
size_t n = 0 ///< Size of buffer.
);
/** Construct from two pointers.
@note This presumes a half open range, (start, end]
+ @note Due to ambiguity issues do not invoke this with
+ @a start == 0.
*/
Buffer(
char* start, ///< First valid character.
@@ -121,6 +130,10 @@ namespace ts {
/** A chunk of read only memory.
A convenience class because we pass this kind of pair frequently.
+
+ @note The default construct leaves the object
+ uninitialized. This is for performance reasons. To construct an
+ empty @c Buffer use @c Buffer(0).
*/
struct ConstBuffer {
typedef ConstBuffer self; ///< Self reference type.
@@ -133,13 +146,18 @@ namespace ts {
/// Elements are in uninitialized state.
ConstBuffer();
- /// Construct from pointer and size.
+ /** Construct from pointer and size.
+ @note Due to ambiguity issues do not call this with
+ two arguments if the first argument is 0.
+ */
ConstBuffer(
char const * ptr, ///< Pointer to buffer.
size_t n = 0///< Size of buffer.
);
/** Construct from two pointers.
@note This presumes a half open range (start, end]
+ @note Due to ambiguity issues do not invoke this with
+ @a start == 0.
*/
ConstBuffer(
char const* start, ///< First valid character.
@@ -238,10 +256,10 @@ namespace ts {
This is convenient when tokenizing and @a p points at the token
separator.
- @note If @a *p is not a character in the buffer then @a this
- is not changed and an empty buffer is returned. This means the
- caller can simply pass the result of @c find and check for an
- empty buffer returned to detect no more separators.
+ @note If @a *p is in the buffer then @a this is not changed
+ and an empty buffer is returned. This means the caller can
+ simply pass the result of @c find and check for an empty
+ buffer returned to detect no more separators.
@return A buffer containing data up to but not including @a p.
*/
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/proxy/hdrs/HTTP.cc
----------------------------------------------------------------------
diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc
index 2ccad25..5b3d0ed 100644
--- a/proxy/hdrs/HTTP.cc
+++ b/proxy/hdrs/HTTP.cc
@@ -1462,6 +1462,8 @@ void
HTTPHdr::_fill_target_cache() const
{
URL* url = this->url_get();
+ char const* port_ptr;
+
m_target_in_url = false;
m_port_in_header = false;
m_host_mime = NULL;
@@ -1471,18 +1473,12 @@ HTTPHdr::_fill_target_cache() const
m_port = url->port_get();
m_port_in_header = 0 != url->port_get_raw();
m_host_mime = NULL;
- } else if (0 != (m_host_mime = const_cast<HTTPHdr*>(this)->field_find(MIME_FIELD_HOST, MIME_LEN_HOST))) {
- // Check for port in the host.
- char const* colon = static_cast<char const*>(memchr(m_host_mime->m_ptr_value, ':', m_host_mime->m_len_value));
-
- if (colon) {
+ } else if (0 != (m_host_mime = const_cast<HTTPHdr*>(this)->get_host_port_values(0, &m_host_length, &port_ptr, 0))) {
+ if (port_ptr) {
m_port = 0;
- m_host_length = colon - m_host_mime->m_ptr_value; // Length of just the host in the value.
- for ( ++colon ; is_digit(*colon) ; ++colon )
- m_port = m_port * 10 + *colon - '0';
- m_port_in_header = 0 != m_port;
- } else {
- m_host_length = m_host_mime->m_len_value;
+ for ( ; is_digit(*port_ptr) ; ++port_ptr )
+ m_port = m_port * 10 + *port_ptr - '0';
+ m_port_in_header = (0 != m_port);
}
m_port = url_canonicalize_port(url->m_url_impl->m_url_type, m_port);
}
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/proxy/hdrs/HdrTest.cc
----------------------------------------------------------------------
diff --git a/proxy/hdrs/HdrTest.cc b/proxy/hdrs/HdrTest.cc
index 6fa7628..2e4f2a5 100644
--- a/proxy/hdrs/HdrTest.cc
+++ b/proxy/hdrs/HdrTest.cc
@@ -407,6 +407,7 @@ HdrTest::test_url()
"http://172.16.28.101/",
"http://[fc01:172:16:28:BAAD:BEEF:DEAD:101]:8080/",
+
"foo:bar@some.place",
"foo:bar@some.place/",
"http://foo:bar@some.place",
@@ -423,12 +424,17 @@ HdrTest::test_url()
static int nstrs = sizeof(strs) / sizeof(strs[0]);
static char const* bad[] = {
+/*
"http://[1:2:3:4:5:6:7:8:9]",
"http://1:2:3:4:5:6:7:8:A:B",
"http://bob.com[::1]",
"http://[::1].com"
"http://foo:bar:baz@bob.com/",
"http://foo:bar:baz@[::1]:8080/",
+ "http://]",
+ "http://:",
+*/
+ "http:/"
};
static int nbad = sizeof(bad) / sizeof(bad[0]);
@@ -499,6 +505,21 @@ HdrTest::test_url()
}
}
+#if 0
+ if (!failed) {
+ Note("URL performance test start");
+ for (int j = 0 ; j < 100000 ; ++j) {
+ for (i = 0 ; i < nstrs ; ++i) {
+ char const* x = strs[i];
+ url.create(NULL);
+ err = url.parse(x, strlen(x));
+ url.destroy();
+ }
+ }
+ Note("URL performance test end");
+ }
+#endif
+
return (failures_to_status("test_url", failed));
}
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/proxy/hdrs/MIME.cc
----------------------------------------------------------------------
diff --git a/proxy/hdrs/MIME.cc b/proxy/hdrs/MIME.cc
index b175ff6..30d2fd8 100644
--- a/proxy/hdrs/MIME.cc
+++ b/proxy/hdrs/MIME.cc
@@ -2102,6 +2102,62 @@ mime_field_value_append(HdrHeap *heap, MIMEHdrImpl *mh, MIMEField *field, const
mh->recompute_cooked_stuff(field);
}
+MIMEField* MIMEHdr::get_host_port_values(
+ char const** host_ptr, ///< Pointer to host.
+ int* host_len, ///< Length of host.
+ char const** port_ptr, ///< Pointer to port.
+ int* port_len
+ )
+{
+ MIMEField* field = this->field_find(MIME_FIELD_HOST, MIME_LEN_HOST);
+ if (host_ptr)
+ *host_ptr = 0;
+ if (host_len)
+ *host_len = 0;
+ if (port_ptr)
+ *port_ptr = 0;
+ if (port_len)
+ port_len = 0;
+
+ if (field) {
+ ts::ConstBuffer b(field->m_ptr_value, field->m_len_value);
+ ts::ConstBuffer host(0), port(0);
+
+ if (b) {
+ char const* x;
+
+ if ('[' == *b) {
+ x = static_cast<char const*>(memchr(b._ptr, ']', b._size));
+ if (x && b.contains(x+1) && ':' == x[1]) {
+ host = b.splitOn(x+1);
+ port = b;
+ } else {
+ host = b;
+ }
+ } else {
+ x = static_cast<char const*>(memrchr(b._ptr, ':', b._size));
+ if (x) {
+ host = b.splitOn(x);
+ port = b;
+ } else {
+ host = b;
+ }
+ }
+
+ if (host) {
+ if (host_ptr) *host_ptr = host._ptr;
+ if (host_len) *host_len = static_cast<int>(host._size);
+ }
+ if (port) {
+ if (port_ptr) *port_ptr = port._ptr;
+ if (port_len) *port_len = static_cast<int>(port._size);
+ }
+ } else {
+ field = 0; // no value in field, signal fail.
+ }
+ }
+ return field;
+}
/***********************************************************************
* *
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/proxy/hdrs/MIME.h
----------------------------------------------------------------------
diff --git a/proxy/hdrs/MIME.h b/proxy/hdrs/MIME.h
index 058e88c..9664e27 100644
--- a/proxy/hdrs/MIME.h
+++ b/proxy/hdrs/MIME.h
@@ -933,6 +933,17 @@ public:
int32_t get_cooked_cc_min_fresh();
bool get_cooked_pragma_no_cache();
+ /** Get the value of the host field.
+ This parses the host field for brackets and port value.
+ @return The mime HOST field if it has a value, @c NULL otherwise.
+ */
+ MIMEField* get_host_port_values(
+ char const** host_ptr, ///< [out] Pointer to host.
+ int* host_len, ///< [out] Length of host.
+ char const** port_ptr, ///< [out] Pointer to port.
+ int* port_len ///< [out] Length of port.
+ );
+
void set_cooked_cc_need_revalidate_once();
void unset_cooked_cc_need_revalidate_once();
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/proxy/hdrs/URL.cc
----------------------------------------------------------------------
diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc
index 17683c2..9085b4b 100644
--- a/proxy/hdrs/URL.cc
+++ b/proxy/hdrs/URL.cc
@@ -1213,206 +1213,49 @@ eof:
@endverbatim
*/
-MIMEParseResult
-url_parse_internet(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings)
-{
- const char *cur;
- const char *user_start = NULL;
- const char *user_end = NULL;
- const char *pass_start = NULL;
- const char *pass_end = NULL;
- const char *host_start = NULL;
- const char *host_end = NULL;
- const char *port_start = NULL;
- const char *port_end = NULL;
- const unsigned char user_offset = 0x3d;
- const unsigned char user_mask = ((((unsigned char) ':') + user_offset) &
- (((unsigned char) '@') + user_offset) & (((unsigned char) '/') + user_offset));
- const unsigned char pass_offset = 0x10;
- const unsigned char pass_mask = ((((unsigned char) '@') + pass_offset) & (((unsigned char) '/') + pass_offset));
-
- cur = *start;
-
- // fast case starts with "://"
- if ((end - cur > 3) && (((cur[0] ^ ':') | (cur[1] ^ '/') | (cur[2] ^ '/')) == 0)) {
- cur += 3;
- goto parse_user1;
- }
- if (*cur != ':') {
- goto parse_user1;
- }
- GETNEXT(eof);
- if (*cur != '/') {
- goto parse_user1;
- }
- GETNEXT(eof);
- if (*cur != '/') {
- goto parse_user1;
- }
- GETNEXT(eof);
-
-parse_user1:
- user_start = cur;
-parse_user2:
- if (((((unsigned char) *cur) + user_offset) & user_mask) == user_mask) {
- if (*cur == ':') {
- user_end = cur;
- GETNEXT(done);
- goto parse_pass1;
- }
- if (*cur == '@') {
- user_end = cur;
- GETNEXT(done);
- goto parse_host1;
- }
- if (*cur == '/') {
- host_start = user_start;
- host_end = cur;
- user_start = NULL;
-
- GETNEXT(done);
- goto done;
- }
- } else {
- ink_debug_assert((*cur != ':') && (*cur != '@') && (*cur != '/'));
- }
- GETNEXT(done);
- goto parse_user2;
-
-parse_pass1:
- pass_start = cur;
-parse_pass2:
- if (((((unsigned char) *cur) + pass_offset) & pass_mask) == pass_mask) {
- if (*cur == '/') {
- host_start = user_start;
- host_end = user_end;
- user_start = NULL;
- user_end = NULL;
-
- port_start = pass_start;
- port_end = cur;
- pass_start = NULL;
-
- GETNEXT(done);
- goto done;
- }
- if (*cur == '@') {
- pass_end = cur;
- GETNEXT(done);
- goto parse_host1;
- }
- } else {
- ink_debug_assert((*cur != '/') && (*cur != '@'));
- }
-
- GETNEXT(done);
- goto parse_pass2;
-
-parse_host1:
- host_start = cur;
-parse_host2:
- if (*cur == ':') {
- host_end = cur;
- GETNEXT(done);
- goto parse_port1;
- }
- if (*cur == '/') {
- host_end = cur;
- GETNEXT(done);
- goto done;
- }
- GETNEXT(done);
- goto parse_host2;
-
-parse_port1:
- port_start = cur;
-parse_port2:
- if (*cur == '/') {
- port_end = cur;
- GETNEXT(done);
- goto done;
- }
- GETNEXT(done);
- goto parse_port2;
-
-done:
- if (!host_start) {
- host_start = user_start;
- host_end = user_end;
- user_start = NULL;
- user_end = NULL;
-
- port_start = pass_start;
- port_end = pass_end;
- pass_start = NULL;
- pass_end = NULL;
- }
-
- if (user_start) {
- if (!user_end)
- user_end = cur;
- url_user_set(heap, url, user_start, user_end - user_start, copy_strings);
- }
- if (pass_start) {
- if (!pass_end)
- pass_end = cur;
- url_password_set(heap, url, pass_start, pass_end - pass_start, copy_strings);
- }
- if (host_start) {
- if (!host_end)
- host_end = cur;
- url_host_set(heap, url, host_start, host_end - host_start, copy_strings);
- }
- if (port_start) {
- if (!port_end)
- port_end = cur;
- url_port_set(heap, url, port_start, port_end - port_start, copy_strings);
- }
-
- *start = cur;
- return PARSE_DONE;
-
-eof:
- return PARSE_ERROR;
-}
MIMEParseResult
-amc_url_parse_internet(HdrHeap* heap, URLImpl* url,
+url_parse_internet(HdrHeap* heap, URLImpl* url,
char const ** start, char const *end,
bool copy_strings_p)
{
- ts::ConstBuffer cur(*start, end);
- char const* point; // last point of interest.
+ char const* cur = *start;
+ char const* base; // Base for host/port field.
char const* bracket = 0; // marker for open bracket, if any.
ts::ConstBuffer user(0), passw(0), host(0), port(0);
static size_t const MAX_COLON = 8; // max # of valid colons.
size_t n_colon = 0;
- char const* last_colon; // pointer to last colon seen.
+ char const* last_colon = 0; // pointer to last colon seen.
// Do a quick check for "://"
- if (cur._size > 3 &&
+ if (end - cur > 3 &&
(((':' ^ *cur) | ('/' ^ cur[1]) | ('/' ^ cur[2])) == 0)) {
cur += 3;
- } else if (':' == *cur && ((++cur)._size == 0 ||
- ('/' == *cur && ((++cur)._size == 0 ||
- ('/' == *cur && (++cur)._size == 0))))) {
+ } else if (':' == *cur && (++cur >= end ||
+ ('/' == *cur && (++cur >= end ||
+ ('/' == *cur && ++cur >= end))))) {
return PARSE_ERROR;
}
+ base = cur;
// skipped leading stuff, start real parsing.
- point = cur._ptr;
- while (cur._size) {
+ while (cur < end) {
// Note: Each case is responsible for incrementing @a cur if
// appropriate!
- char c = *cur;
- switch (c) {
+ switch (*cur) {
case ']' : // address close
if (0 == bracket || n_colon >= MAX_COLON)
return PARSE_ERROR;
- host.set(bracket+1, cur._ptr);
++cur;
- // This must constitute the entire host so the next character must be
- // missing (EOS), slash, or colon.
- if (cur._size == 0 || '/' == *cur) { // done which is OK
+ /* We keep the brackets because there are too many other places
+ that depend on them and it's too painful to keep track if
+ they should be used. I thought about being clever with
+ stripping brackets from non-IPv6 content but that gets ugly
+ as well. Just not worth it.
+ */
+ host.set(bracket, cur);
+ // Spec requires This constitute the entire host so the next
+ // character must be missing (EOS), slash, or colon.
+ if (cur >= end || '/' == *cur) { // done which is OK
last_colon = 0;
break;
} else if (':' != *cur) { // otherwise it must be a colon
@@ -1426,31 +1269,31 @@ amc_url_parse_internet(HdrHeap* heap, URLImpl* url,
case ':' : // track colons, fail if too many.
if (++n_colon > MAX_COLON)
return PARSE_ERROR;
- last_colon = cur._ptr;
+ last_colon = cur;
++cur;
break;
case '@' : // user/password marker.
if (user || n_colon > 1)
return PARSE_ERROR; // we already got one, or too many colons.
if (n_colon) {
- user.set(point, last_colon);
- passw.set(last_colon+1, cur._ptr);
+ user.set(base, last_colon);
+ passw.set(last_colon+1, cur);
n_colon= 0;
last_colon = 0;
} else {
- user.set(point, cur._ptr);
+ user.set(base, cur);
}
++cur;
- point = cur._ptr; // interesting stuff is past '@'
+ base = cur;
break;
case '[' : // address open
- if (bracket || point != cur._ptr) // must be first char in field
+ if (bracket || base != cur) // must be first char in field
return PARSE_ERROR;
- bracket = cur._ptr; // location and flag.
+ bracket = cur; // location and flag.
++cur;
break;
case '/' : // we're done with this phase.
- cur._size = 0; // cause loop exit
+ end = cur; // cause loop exit
break;
default:
++cur;
@@ -1469,9 +1312,9 @@ amc_url_parse_internet(HdrHeap* heap, URLImpl* url,
// @a host not set means no brackets to mark explicit host.
if (!host) {
if (1 == n_colon || MAX_COLON == n_colon) { // presume port.
- host.set(point, last_colon);
+ host.set(base, last_colon);
} else { // it's all host.
- host.set(point, cur._ptr);
+ host.set(base, cur);
last_colon = 0; // prevent port setting.
}
}
@@ -1480,13 +1323,13 @@ amc_url_parse_internet(HdrHeap* heap, URLImpl* url,
if (last_colon) {
ink_debug_assert(n_colon);
- port.set(last_colon+1, cur._ptr);
+ port.set(last_colon+1, cur);
if (!port._size)
return PARSE_ERROR; // colon w/o port value.
url_port_set(heap, url, port._ptr, port._size, copy_strings_p);
}
- if ('/' == *cur) ++cur._ptr;
- *start = cur._ptr;
+ if ('/' == *cur) ++cur; // must do this after filling in host/port.
+ *start = cur;
return PARSE_DONE;
}
/*-------------------------------------------------------------------------
@@ -1509,7 +1352,7 @@ url_parse_http(HdrHeap * heap, URLImpl * url, const char **start, const char *en
const char *fragment_end = NULL;
char mask;
- err = amc_url_parse_internet(heap, url, start, end, copy_strings);
+ err = url_parse_internet(heap, url, start, end, copy_strings);
if (err < 0)
return err;
@@ -1668,7 +1511,7 @@ url_print(URLImpl * url, char *buf_start, int buf_length, int *buf_index_inout,
// Force brackets for IPv6. Note colon must occur in first 5 characters.
// But it can be less (e.g. "::1").
int n = url->m_len_host;
- bool bracket_p = (0 != memchr(url->m_ptr_host, ':', n > 5 ? 5 : n));
+ bool bracket_p = '[' != *url->m_ptr_host && (0 != memchr(url->m_ptr_host, ':', n > 5 ? 5 : n));
if (bracket_p)
TRY(mime_mem_print("[", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
TRY(mime_mem_print(url->m_ptr_host, url->m_len_host,
http://git-wip-us.apache.org/repos/asf/trafficserver/blob/ebc4c1c3/proxy/http/HttpTransact.cc
----------------------------------------------------------------------
diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc
index 6a1e69e..43eff68 100644
--- a/proxy/http/HttpTransact.cc
+++ b/proxy/http/HttpTransact.cc
@@ -1797,18 +1797,15 @@ HttpTransact::DecideCacheLookup(State* s)
// We could a) have 6000 alts (barf, puke, vomit) or b) use the original
// host header in the url before doing all cache actions (lookups, writes, etc.)
if (s->txn_conf->maintain_pristine_host_hdr) {
+ char const* host_hdr;
+ char const* port_hdr;
+ int host_len, port_len;
// So, the host header will have the original host header.
- int host_len;
- const char *host_hdr = incoming_request->value_get(MIME_FIELD_HOST, MIME_LEN_HOST, &host_len);
-
- if (host_hdr) {
- char *tmp;
+ if (incoming_request->get_host_port_values(&host_hdr, &host_len, &port_hdr, &port_len)) {
int port = 0;
- tmp = (char *) memchr(host_hdr, ':', host_len);
- if (tmp) {
- s->cache_info.lookup_url->host_set(host_hdr, (tmp - host_hdr));
-
- port = ink_atoi(tmp + 1, host_len - (tmp + 1 - host_hdr));
+ if (port_hdr) {
+ s->cache_info.lookup_url->host_set(host_hdr, host_len);
+ port = ink_atoi(port_hdr, port_len);
} else {
s->cache_info.lookup_url->host_set(host_hdr, host_len);
}