You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficserver.apache.org by am...@apache.org on 2011/08/12 15:28:32 UTC
svn commit: r1157098 - in /trafficserver/traffic/trunk/proxy/hdrs: MIME.cc
MIME.h
Author: amc
Date: Fri Aug 12 13:28:31 2011
New Revision: 1157098
URL: http://svn.apache.org/viewvc?rev=1157098&view=rev
Log:
TS-466: Multiline headers
Modified:
trafficserver/traffic/trunk/proxy/hdrs/MIME.cc
trafficserver/traffic/trunk/proxy/hdrs/MIME.h
Modified: trafficserver/traffic/trunk/proxy/hdrs/MIME.cc
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/proxy/hdrs/MIME.cc?rev=1157098&r1=1157097&r2=1157098&view=diff
==============================================================================
--- trafficserver/traffic/trunk/proxy/hdrs/MIME.cc (original)
+++ trafficserver/traffic/trunk/proxy/hdrs/MIME.cc Fri Aug 12 13:28:31 2011
@@ -2310,7 +2310,8 @@ _mime_scanner_init(MIMEScanner * scanner
scanner->m_line = NULL;
scanner->m_line_size = 0;
scanner->m_line_length = 0;
- scanner->m_state = MIME_SCANNER_STATE_START;
+ scanner->m_state = MIME_PARSE_BEFORE;
+
}
//////////////////////////////////////////////////////
@@ -2380,259 +2381,117 @@ mime_scanner_get(MIMEScanner * S,
const char **raw_input_s,
const char *raw_input_e,
const char **output_s,
- const char **output_e, bool * output_shares_raw_input, bool raw_input_eof, int raw_input_scan_type)
-{
+ const char **output_e, bool * output_shares_raw_input,
+ bool raw_input_eof, ///< All data has been received for this header.
+ int raw_input_scan_type
+) {
const char *raw_input_c, *lf_ptr;
+ MIMEParseResult zret = PARSE_CONT;
ink_debug_assert((raw_input_s != NULL) && (*raw_input_s != NULL));
ink_debug_assert(raw_input_e != NULL);
raw_input_c = *raw_input_s;
- // first try fastpath
- if ((S->m_line_length == 0) && (S->m_state == MIME_SCANNER_STATE_START)) {
- if ((raw_input_c<raw_input_e) && (*raw_input_c> '\r')) {
- ++raw_input_c;
- lf_ptr = (const char *) memchr(raw_input_c, '\n', raw_input_e - raw_input_c);
+ while (PARSE_CONT == zret && raw_input_c < raw_input_e) {
+ ptrdiff_t runway = raw_input_e - raw_input_c; // remaining input.
+ switch (S->m_state) {
+ case MIME_PARSE_BEFORE: // waiting to find a field.
+ // If we find leading CR LF then it's the last line of the header.
+ if (ParseRules::is_cr(*raw_input_c)
+ && runway >= 2
+ && ParseRules::is_lf(raw_input_c[1])
+ ) {
+ raw_input_c += 2;
+ zret = PARSE_OK;
+ } else {
+ S->m_state = MIME_PARSE_INSIDE;
+ }
+ break;
+ case MIME_PARSE_INSIDE:
+ lf_ptr = static_cast<char const*>(memchr(raw_input_c, ParseRules::CHAR_LF, runway));
if (lf_ptr) {
raw_input_c = lf_ptr + 1;
- if ((raw_input_scan_type == MIME_SCANNER_TYPE_LINE) || ((raw_input_c < raw_input_e) && (!is_ws(*raw_input_c)))) {
- *output_s = *raw_input_s;
- *output_e = raw_input_c;
- *output_shares_raw_input = true;
- *raw_input_s = raw_input_c; // consume input data
- return PARSE_OK;
+ if (MIME_SCANNER_TYPE_LINE == raw_input_scan_type) {
+ zret = PARSE_OK;
+ S->m_state = MIME_PARSE_BEFORE;
+ } else {
+ S->m_state = MIME_PARSE_AFTER;
}
- }
- } else if ((raw_input_e >= raw_input_c + 2) &&
- ParseRules::is_cr(*raw_input_c) && ParseRules::is_lf(*(raw_input_c + 1))) {
- raw_input_c += 2;
- *output_s = *raw_input_s;
- *output_e = raw_input_c;
- *output_shares_raw_input = true;
- *raw_input_s = raw_input_c; // consume input data
- return PARSE_OK;
- }
- }
- // fastpath conditions didn't match -- fall through to general case
-
- raw_input_c = *raw_input_s;
-
- int data_size;
-
- *output_s = NULL;
- *output_e = NULL;
-
- //////////////////////////////////////////////////////////////////////
- // enter with data in [*raw_input_s .. raw_input_e] & scan the data //
- // according to the scanning state --- if exiting the scanner and //
- // not in SCANNING_DONE, save data btw *raw_input_s & raw_input_e, //
- // and return for more data. //
- //////////////////////////////////////////////////////////////////////
-
-
-loop:
- ink_debug_assert(raw_input_e >= raw_input_c);
-
- switch (S->m_state) {
- ////////////////////////////////////////////////////////////////////
- // STATE_START --- seen 0 characters, need to look for presence //
- // of leading CRLF or LF because this is a special line, and //
- // should not lead to continuation line processing after it. //
- ////////////////////////////////////////////////////////////////////
-
- case MIME_SCANNER_STATE_START:
- {
- if (raw_input_c >= raw_input_e)
- break; // out of data
- if (ParseRules::is_cr(*raw_input_c)) {
- ++raw_input_c;
- S->m_state = MIME_SCANNER_STATE_CHAR_2;
- } else if (ParseRules::is_lf(*raw_input_c)) {
- ++raw_input_c;
- S->m_state = MIME_SCANNER_STATE_DONE; // no cont after blank line
- break;
- } else {
- ++raw_input_c;
- S->m_state = MIME_SCANNER_STATE_SCANNING_FOR_LF;
- goto loop;
- }
- }
-
- ////////////////////////////////////////////////////////////////////
- // STATE_CHAR_2 --- first character was a CR, and we want to see //
- // if this character is an LF. We do this because a line //
- // starting with CRLF is special, and should not lead to //
- // continuation line processing after it. //
- ////////////////////////////////////////////////////////////////////
-
- case MIME_SCANNER_STATE_CHAR_2:
- {
- if (raw_input_c >= raw_input_e)
- break; // out of data
- if (ParseRules::is_lf(*raw_input_c)) {
- ++raw_input_c;
- S->m_state = MIME_SCANNER_STATE_DONE;
- break;
} else {
- ++raw_input_c;
- S->m_state = MIME_SCANNER_STATE_SCANNING_FOR_LF;
- }
- }
-
- //////////////////////////////////////////////////////////////////////
- // STATE_SCANNING_FOR_LF --- the line did not start with LF or CRLF //
- // so, we now just gobble up characters until we find final LF //
- //////////////////////////////////////////////////////////////////////
-
- case MIME_SCANNER_STATE_SCANNING_FOR_LF:
- {
- lf_ptr = (const char *)
- memchr(raw_input_c, '\n', raw_input_e - raw_input_c);
-
- // if found LF, eat through it, else eat till end of buffer
- raw_input_c = (lf_ptr ? lf_ptr + 1 : raw_input_e);
- if (!lf_ptr)
- break; // out of data before LF
-
- // no continuation lines when MIME_SCANNER_TYPE_LINE
- if (raw_input_scan_type == MIME_SCANNER_TYPE_LINE) {
- S->m_state = MIME_SCANNER_STATE_DONE;
- break;
- }
- S->m_state = MIME_SCANNER_STATE_SCANNING_FOR_CONTINUATION;
- }
-
- ////////////////////////////////////////////////////////////////////
- // STATE_SCANNING_FOR_CONTINUATION --- we have found the trailing //
- // LF, but we can't return the line yet, because if the next //
- // line starts with a space, we need to glue the next line //
- // onto this line as a continuation line. //
- // //
- // Note that we DO need to handle the case of EOS specially //
- // here, because if we don't, on EOS we'll conclude that //
- // the line is actually not complete yet, and will return //
- // PARSE_ERROR instead of a successful line. We can't //
- // require that all lines are followed by characters to //
- // disambiguate continuation lines. //
- ////////////////////////////////////////////////////////////////////
-
- case MIME_SCANNER_STATE_SCANNING_FOR_CONTINUATION:
- {
- if (raw_input_c >= raw_input_e) // out of data
- {
- if (raw_input_eof)
- S->m_state = MIME_SCANNER_STATE_DONE;
- break;
+ raw_input_c = raw_input_e; // grab all that's available.
}
-
- if (!is_ws(*raw_input_c)) // peek at character, if not WS, no cont line
- {
- S->m_state = MIME_SCANNER_STATE_DONE;
- break; // done with line
+ break;
+ case MIME_PARSE_AFTER:
+ // After a LF. Might be the end or a continuation.
+ if (ParseRules::is_ws(*raw_input_c)) {
+ S->m_state = MIME_PARSE_INSIDE; // back inside the field.
} else {
- S->m_state = MIME_SCANNER_STATE_EATING_WS;
- break; // save away pre-WS data
- }
- }
-
- ////////////////////////////////////////////////////////////////////
- // MIME_SCANNER_STATE_EATING_WS --- the next character after the //
- // final LF was indeed whitespace, so we need to consume all //
- // the whitespace characters up to the first non-whitespace. //
- // //
- // Before we were called, the previous line had already been //
- // copied into the line buffer, so when we break out of this //
- // state, all the non whitespace data can be glued onto the //
- // stuff in the line buffer. //
- ////////////////////////////////////////////////////////////////////
-
- case MIME_SCANNER_STATE_EATING_WS:
- {
- while ((raw_input_c < raw_input_e) && is_ws(*raw_input_c))
- ++raw_input_c;
-
- *raw_input_s = raw_input_c; // eat up input characters
- if (raw_input_c < raw_input_e) // now treat line normal line
- {
- S->m_state = MIME_SCANNER_STATE_SCANNING_FOR_LF;
- goto loop;
+ S->m_state = MIME_PARSE_BEFORE; // field terminated.
+ zret = PARSE_OK;
}
- break; // out of data
+ break;
}
-
- default:
- ink_release_assert(0);
}
- ///////////////////////////////////////////////////////////////////////
- // we get here if we are out of data, or we are done with a line, or //
- // we are beginning to eat continuation-line whitespace and want to //
- // save away the current line data. //
- // //
- // if we are done scanning, and have no pre-existing buffered data, //
- // we can use the raw input data directly as the next parser line, //
- // otherwise we append the data to the scanner buffer. //
- ///////////////////////////////////////////////////////////////////////
+ ptrdiff_t data_size = raw_input_c - *raw_input_s;
- data_size = (int) (raw_input_c - *raw_input_s);
-
- if ((S->m_state == MIME_SCANNER_STATE_DONE) && (S->m_line_length == 0)) {
- *output_s = *raw_input_s;
- *output_e = raw_input_c;
- *output_shares_raw_input = true;
- } else {
- if (data_size) {
- mime_scanner_append(S, *raw_input_s, data_size);
- if (S->m_state == MIME_SCANNER_STATE_EATING_WS) {
- if (S->m_line_length && (S->m_line[S->m_line_length - 1] == ParseRules::CHAR_LF)) {
- --S->m_line_length;
- if (S->m_line_length && (S->m_line[S->m_line_length - 1] == ParseRules::CHAR_CR))
- --S->m_line_length;
+ if (data_size && S->m_line_length) {
+ // If we're already accumulating, continue to do so if we have data.
+ mime_scanner_append(S, *raw_input_s, data_size);
+ data_size = 0;
+ }
+
+ if (PARSE_CONT == zret) {
+ // data ran out before we got a clear final result.
+ // There a number of things we need to check and possibly adjust
+ // that result. It's less complex to do this cleanup than handle
+ // in the parser state machine.
+ if (raw_input_eof) {
+ // Should never return PARSE_CONT if we've hit EOF.
+ if (0 == data_size) {
+ // all input previously consumed. If we're between fields, that's cool.
+ if (MIME_PARSE_INSIDE != S->m_state) {
+ S->m_state = MIME_PARSE_BEFORE; // probably not needed...
+ zret = PARSE_DONE;
+ } else {
+ zret = PARSE_ERROR; // unterminated field.
}
+ } else if (MIME_PARSE_AFTER == S->m_state) {
+ // Special case it seems - need to accept the final field
+ // even if there's no header terminating CR LF. We check for
+ // absolute end of input because otherwise this might be
+ // a multiline field where we haven't seen the next leading space.
+ S->m_state = MIME_PARSE_BEFORE;
+ zret = PARSE_OK;
+ } else {
+ // Partial input, no field / line CR LF
+ zret = PARSE_ERROR; // Unterminated field.
}
+ } else if (data_size) {
+ // Inside a field but more data is expected. Save what we've got.
+ mime_scanner_append(S, *raw_input_s, data_size);
}
+ }
- *output_s = S->m_line;
- *output_e = *output_s + S->m_line_length;
- *output_shares_raw_input = false;
- }
-
- ///////////////////////////////////////////////////////////
- // we either have: //
- // a full line ready: PARSE_OK //
- // a partial line ready, but not at eof: PARSE_CONT //
- // a partial line ready, but at eof: PARSE_ERROR //
- // zero bytes ready, but are out of data: PARSE_DONE //
- ///////////////////////////////////////////////////////////
-
- *raw_input_s = raw_input_c; // consume input data
-
-#ifdef DEBUG
- ink_debug_assert(*output_e - *output_s >= 0);
- checksum_block(*output_s, (int) (*output_e - *output_s));
-#endif
-
- if (S->m_state == MIME_SCANNER_STATE_DONE) // got LF, line ready
- {
- S->m_line_length = 0;
- S->m_state = MIME_SCANNER_STATE_START;
- return PARSE_OK;
- } else {
- if (*raw_input_s < raw_input_e)
- goto loop;
- else if (!raw_input_eof) // no LF yet, need more data
- return PARSE_CONT;
- else // ack! no LF but EOF!
- {
- if (S->m_line_length > 0)
- return PARSE_ERROR;
- else
- return PARSE_DONE;
+ // adjust out arguments.
+ if (PARSE_CONT != zret) {
+ if (0 != S->m_line_length) {
+ *output_s = S->m_line;
+ *output_e = *output_s + S->m_line_length;
+ *output_shares_raw_input = false;
+ S->m_line_length = 0;
+ S->m_line = 0;
+ } else {
+ *output_s = *raw_input_s;
+ *output_e = raw_input_c;
+ *output_shares_raw_input = true;
}
}
+
+ *raw_input_s = raw_input_c; // mark input consumed.
+ return zret;
}
-
/*-------------------------------------------------------------------------
-------------------------------------------------------------------------*/
Modified: trafficserver/traffic/trunk/proxy/hdrs/MIME.h
URL: http://svn.apache.org/viewvc/trafficserver/traffic/trunk/proxy/hdrs/MIME.h?rev=1157098&r1=1157097&r2=1157098&view=diff
==============================================================================
--- trafficserver/traffic/trunk/proxy/hdrs/MIME.h (original)
+++ trafficserver/traffic/trunk/proxy/hdrs/MIME.h Fri Aug 12 13:28:31 2011
@@ -52,12 +52,12 @@ enum
UNDEFINED_COUNT = -1
};
-#define MIME_SCANNER_STATE_START 0
-#define MIME_SCANNER_STATE_CHAR_2 1
-#define MIME_SCANNER_STATE_SCANNING_FOR_LF 2
-#define MIME_SCANNER_STATE_SCANNING_FOR_CONTINUATION 3
-#define MIME_SCANNER_STATE_EATING_WS 4
-#define MIME_SCANNER_STATE_DONE 5
+/// Parsing state.
+enum MimeParseState {
+ MIME_PARSE_BEFORE, ///< Before a field.
+ MIME_PARSE_INSIDE, ///< Inside a field.
+ MIME_PARSE_AFTER, ///< After a field.
+};
#define MIME_SCANNER_TYPE_LINE 0
#define MIME_SCANNER_TYPE_FIELD 1
@@ -259,7 +259,8 @@ struct MIMEScanner
//int m_type; // what kind of scanner: raw line, or field (this has never been used)
int m_line_length; // size of real live data in buffer
int m_line_size; // total allocated size of buffer
- int m_state; // state of scanning state machine
+// int m_state; // state of scanning state machine
+ MimeParseState m_state; ///< Parsing machine state.
};