You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ja...@apache.org on 2012/11/12 21:48:52 UTC

[10/12] update mochiweb to 2.3.2

http://git-wip-us.apache.org/repos/asf/couchdb/blob/6fdb9e07/src/mochiweb/src/mochiweb_headers.erl
----------------------------------------------------------------------
diff --git a/src/mochiweb/src/mochiweb_headers.erl b/src/mochiweb/src/mochiweb_headers.erl
new file mode 100644
index 0000000..b49cf9e
--- /dev/null
+++ b/src/mochiweb/src/mochiweb_headers.erl
@@ -0,0 +1,420 @@
+%% @author Bob Ippolito <bo...@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Case preserving (but case insensitive) HTTP Header dictionary.
+
+-module(mochiweb_headers).
+-author('bob@mochimedia.com').
+-export([empty/0, from_list/1, insert/3, enter/3, get_value/2, lookup/2]).
+-export([delete_any/2, get_primary_value/2, get_combined_value/2]).
+-export([default/3, enter_from_list/2, default_from_list/2]).
+-export([to_list/1, make/1]).
+-export([from_binary/1]).
+
+%% @type headers().
+%% @type key() = atom() | binary() | string().
+%% @type value() = atom() | binary() | string() | integer().
+
+%% @spec empty() -> headers()
+%% @doc Create an empty headers structure.
+empty() ->
+    gb_trees:empty().
+
+%% @spec make(headers() | [{key(), value()}]) -> headers()
+%% @doc Construct a headers() from the given list.
+make(L) when is_list(L) ->
+    from_list(L);
+%% assume a non-list is already mochiweb_headers.
+make(T) ->
+    T.
+
+%% @spec from_binary(iolist()) -> headers()
+%% @doc Transforms a raw HTTP header into a mochiweb headers structure.
+%%
+%%      The given raw HTTP header can be one of the following:
+%%
+%%      1) A string or a binary representing a full HTTP header ending with
+%%         double CRLF.
+%%         Examples:
+%%         ```
+%%         "Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n"
+%%         <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>'''
+%%
+%%      2) A list of binaries or strings where each element represents a raw
+%%         HTTP header line ending with a single CRLF.
+%%         Examples:
+%%         ```
+%%         [<<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">>]
+%%         ["Content-Length: 47\r\n", "Content-Type: text/plain\r\n"]
+%%         ["Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">>]'''
+%%
+from_binary(RawHttpHeader) when is_binary(RawHttpHeader) ->
+    from_binary(RawHttpHeader, []);
+from_binary(RawHttpHeaderList) ->
+    from_binary(list_to_binary([RawHttpHeaderList, "\r\n"])).
+
+from_binary(RawHttpHeader, Acc) ->
+    case erlang:decode_packet(httph, RawHttpHeader, []) of
+        {ok, {http_header, _, H, _, V}, Rest} ->
+            from_binary(Rest, [{H, V} | Acc]);
+        _ ->
+            make(Acc)
+    end.
+
+%% @spec from_list([{key(), value()}]) -> headers()
+%% @doc Construct a headers() from the given list.
+from_list(List) ->
+    lists:foldl(fun ({K, V}, T) -> insert(K, V, T) end, empty(), List).
+
+%% @spec enter_from_list([{key(), value()}], headers()) -> headers()
+%% @doc Insert pairs into the headers, replace any values for existing keys.
+enter_from_list(List, T) ->
+    lists:foldl(fun ({K, V}, T1) -> enter(K, V, T1) end, T, List).
+
+%% @spec default_from_list([{key(), value()}], headers()) -> headers()
+%% @doc Insert pairs into the headers for keys that do not already exist.
+default_from_list(List, T) ->
+    lists:foldl(fun ({K, V}, T1) -> default(K, V, T1) end, T, List).
+
+%% @spec to_list(headers()) -> [{key(), string()}]
+%% @doc Return the contents of the headers. The keys will be the exact key
+%%      that was first inserted (e.g. may be an atom or binary, case is
+%%      preserved).
+to_list(T) ->
+    F = fun ({K, {array, L}}, Acc) ->
+                L1 = lists:reverse(L),
+                lists:foldl(fun (V, Acc1) -> [{K, V} | Acc1] end, Acc, L1);
+            (Pair, Acc) ->
+                [Pair | Acc]
+        end,
+    lists:reverse(lists:foldl(F, [], gb_trees:values(T))).
+
+%% @spec get_value(key(), headers()) -> string() | undefined
+%% @doc Return the value of the given header using a case insensitive search.
+%%      undefined will be returned for keys that are not present.
+get_value(K, T) ->
+    case lookup(K, T) of
+        {value, {_, V}} ->
+            expand(V);
+        none ->
+            undefined
+    end.
+
+%% @spec get_primary_value(key(), headers()) -> string() | undefined
+%% @doc Return the value of the given header up to the first semicolon using
+%%      a case insensitive search. undefined will be returned for keys
+%%      that are not present.
+get_primary_value(K, T) ->
+    case get_value(K, T) of
+        undefined ->
+            undefined;
+        V ->
+            lists:takewhile(fun (C) -> C =/= $; end, V)
+    end.
+
+%% @spec get_combined_value(key(), headers()) -> string() | undefined
+%% @doc Return the value from the given header using a case insensitive search.
+%%      If the value of the header is a comma-separated list where holds values
+%%      are all identical, the identical value will be returned.
+%%      undefined will be returned for keys that are not present or the
+%%      values in the list are not the same.
+%%
+%%      NOTE: The process isn't designed for a general purpose. If you need
+%%            to access all values in the combined header, please refer to
+%%            '''tokenize_header_value/1'''.
+%%
+%%      Section 4.2 of the RFC 2616 (HTTP 1.1) describes multiple message-header
+%%      fields with the same field-name may be present in a message if and only
+%%      if the entire field-value for that header field is defined as a
+%%      comma-separated list [i.e., #(values)].
+get_combined_value(K, T) ->
+    case get_value(K, T) of
+        undefined ->
+            undefined;
+        V ->
+            case sets:to_list(sets:from_list(tokenize_header_value(V))) of
+                [Val] ->
+                    Val;
+                _ ->
+                    undefined
+            end
+    end.
+
+%% @spec lookup(key(), headers()) -> {value, {key(), string()}} | none
+%% @doc Return the case preserved key and value for the given header using
+%%      a case insensitive search. none will be returned for keys that are
+%%      not present.
+lookup(K, T) ->
+    case gb_trees:lookup(normalize(K), T) of
+        {value, {K0, V}} ->
+            {value, {K0, expand(V)}};
+        none ->
+            none
+    end.
+
+%% @spec default(key(), value(), headers()) -> headers()
+%% @doc Insert the pair into the headers if it does not already exist.
+default(K, V, T) ->
+    K1 = normalize(K),
+    V1 = any_to_list(V),
+    try gb_trees:insert(K1, {K, V1}, T)
+    catch
+        error:{key_exists, _} ->
+            T
+    end.
+
+%% @spec enter(key(), value(), headers()) -> headers()
+%% @doc Insert the pair into the headers, replacing any pre-existing key.
+enter(K, V, T) ->
+    K1 = normalize(K),
+    V1 = any_to_list(V),
+    gb_trees:enter(K1, {K, V1}, T).
+
+%% @spec insert(key(), value(), headers()) -> headers()
+%% @doc Insert the pair into the headers, merging with any pre-existing key.
+%%      A merge is done with Value = V0 ++ ", " ++ V1.
+insert(K, V, T) ->
+    K1 = normalize(K),
+    V1 = any_to_list(V),
+    try gb_trees:insert(K1, {K, V1}, T)
+    catch
+        error:{key_exists, _} ->
+            {K0, V0} = gb_trees:get(K1, T),
+            V2 = merge(K1, V1, V0),
+            gb_trees:update(K1, {K0, V2}, T)
+    end.
+
+%% @spec delete_any(key(), headers()) -> headers()
+%% @doc Delete the header corresponding to key if it is present.
+delete_any(K, T) ->
+    K1 = normalize(K),
+    gb_trees:delete_any(K1, T).
+
+%% Internal API
+
+tokenize_header_value(undefined) ->
+    undefined;
+tokenize_header_value(V) ->
+    reversed_tokens(trim_and_reverse(V, false), [], []).
+
+trim_and_reverse([S | Rest], Reversed) when S=:=$ ; S=:=$\n; S=:=$\t ->
+    trim_and_reverse(Rest, Reversed);
+trim_and_reverse(V, false) ->
+    trim_and_reverse(lists:reverse(V), true);
+trim_and_reverse(V, true) ->
+    V.
+
+reversed_tokens([], [], Acc) ->
+    Acc;
+reversed_tokens([], Token, Acc) ->
+    [Token | Acc];
+reversed_tokens("\"" ++ Rest, [], Acc) ->
+    case extract_quoted_string(Rest, []) of
+        {String, NewRest} ->
+            reversed_tokens(NewRest, [], [String | Acc]);
+        undefined ->
+            undefined
+    end;
+reversed_tokens("\"" ++ _Rest, _Token, _Acc) ->
+    undefined;
+reversed_tokens([C | Rest], [], Acc) when C=:=$ ;C=:=$\n;C=:=$\t;C=:=$, ->
+    reversed_tokens(Rest, [], Acc);
+reversed_tokens([C | Rest], Token, Acc) when C=:=$ ;C=:=$\n;C=:=$\t;C=:=$, ->
+    reversed_tokens(Rest, [], [Token | Acc]);
+reversed_tokens([C | Rest], Token, Acc) ->
+    reversed_tokens(Rest, [C | Token], Acc);
+reversed_tokens(_, _, _) ->
+    undefeined.
+
+extract_quoted_string([], _Acc) ->
+    undefined;
+extract_quoted_string("\"\\" ++ Rest, Acc) ->
+    extract_quoted_string(Rest, "\"" ++ Acc);
+extract_quoted_string("\"" ++ Rest, Acc) ->
+    {Acc, Rest};
+extract_quoted_string([C | Rest], Acc) ->
+    extract_quoted_string(Rest, [C | Acc]).
+
+expand({array, L}) ->
+    mochiweb_util:join(lists:reverse(L), ", ");
+expand(V) ->
+    V.
+
+merge("set-cookie", V1, {array, L}) ->
+    {array, [V1 | L]};
+merge("set-cookie", V1, V0) ->
+    {array, [V1, V0]};
+merge(_, V1, V0) ->
+    V0 ++ ", " ++ V1.
+
+normalize(K) when is_list(K) ->
+    string:to_lower(K);
+normalize(K) when is_atom(K) ->
+    normalize(atom_to_list(K));
+normalize(K) when is_binary(K) ->
+    normalize(binary_to_list(K)).
+
+any_to_list(V) when is_list(V) ->
+    V;
+any_to_list(V) when is_atom(V) ->
+    atom_to_list(V);
+any_to_list(V) when is_binary(V) ->
+    binary_to_list(V);
+any_to_list(V) when is_integer(V) ->
+    integer_to_list(V).
+
+%%
+%% Tests.
+%%
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+make_test() ->
+    Identity = make([{hdr, foo}]),
+    ?assertEqual(
+       Identity,
+       make(Identity)).
+
+enter_from_list_test() ->
+    H = make([{hdr, foo}]),
+    ?assertEqual(
+       [{baz, "wibble"}, {hdr, "foo"}],
+       to_list(enter_from_list([{baz, wibble}], H))),
+    ?assertEqual(
+       [{hdr, "bar"}],
+       to_list(enter_from_list([{hdr, bar}], H))),
+    ok.
+
+default_from_list_test() ->
+    H = make([{hdr, foo}]),
+    ?assertEqual(
+       [{baz, "wibble"}, {hdr, "foo"}],
+       to_list(default_from_list([{baz, wibble}], H))),
+    ?assertEqual(
+       [{hdr, "foo"}],
+       to_list(default_from_list([{hdr, bar}], H))),
+    ok.
+
+get_primary_value_test() ->
+    H = make([{hdr, foo}, {baz, <<"wibble;taco">>}]),
+    ?assertEqual(
+       "foo",
+       get_primary_value(hdr, H)),
+    ?assertEqual(
+       undefined,
+       get_primary_value(bar, H)),
+    ?assertEqual(
+       "wibble",
+       get_primary_value(<<"baz">>, H)),
+    ok.
+
+get_combined_value_test() ->
+    H = make([{hdr, foo}, {baz, <<"wibble,taco">>}, {content_length, "123, 123"},
+              {test, " 123,  123,     123  , 123,123 "},
+              {test2, "456,  123,     123  , 123"},
+              {test3, "123"}, {test4, " 123, "}]),
+    ?assertEqual(
+       "foo",
+       get_combined_value(hdr, H)),
+    ?assertEqual(
+       undefined,
+       get_combined_value(bar, H)),
+    ?assertEqual(
+       undefined,
+       get_combined_value(<<"baz">>, H)),
+    ?assertEqual(
+       "123",
+       get_combined_value(<<"content_length">>, H)),
+    ?assertEqual(
+       "123",
+       get_combined_value(<<"test">>, H)),
+    ?assertEqual(
+       undefined,
+       get_combined_value(<<"test2">>, H)),
+    ?assertEqual(
+       "123",
+       get_combined_value(<<"test3">>, H)),
+    ?assertEqual(
+       "123",
+       get_combined_value(<<"test4">>, H)),
+    ok.
+
+set_cookie_test() ->
+    H = make([{"set-cookie", foo}, {"set-cookie", bar}, {"set-cookie", baz}]),
+    ?assertEqual(
+       [{"set-cookie", "foo"}, {"set-cookie", "bar"}, {"set-cookie", "baz"}],
+       to_list(H)),
+    ok.
+
+headers_test() ->
+    H = ?MODULE:make([{hdr, foo}, {"Hdr", "bar"}, {'Hdr', 2}]),
+    [{hdr, "foo, bar, 2"}] = ?MODULE:to_list(H),
+    H1 = ?MODULE:insert(taco, grande, H),
+    [{hdr, "foo, bar, 2"}, {taco, "grande"}] = ?MODULE:to_list(H1),
+    H2 = ?MODULE:make([{"Set-Cookie", "foo"}]),
+    [{"Set-Cookie", "foo"}] = ?MODULE:to_list(H2),
+    H3 = ?MODULE:insert("Set-Cookie", "bar", H2),
+    [{"Set-Cookie", "foo"}, {"Set-Cookie", "bar"}] = ?MODULE:to_list(H3),
+    "foo, bar" = ?MODULE:get_value("set-cookie", H3),
+    {value, {"Set-Cookie", "foo, bar"}} = ?MODULE:lookup("set-cookie", H3),
+    undefined = ?MODULE:get_value("shibby", H3),
+    none = ?MODULE:lookup("shibby", H3),
+    H4 = ?MODULE:insert("content-type",
+                        "application/x-www-form-urlencoded; charset=utf8",
+                        H3),
+    "application/x-www-form-urlencoded" = ?MODULE:get_primary_value(
+                                             "content-type", H4),
+    H4 = ?MODULE:delete_any("nonexistent-header", H4),
+    H3 = ?MODULE:delete_any("content-type", H4),
+    HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>,
+    H_HB = ?MODULE:from_binary(HB),
+    H_HB = ?MODULE:from_binary(binary_to_list(HB)),
+    "47" = ?MODULE:get_value("Content-Length", H_HB),
+    "text/plain" = ?MODULE:get_value("Content-Type", H_HB),
+    L_H_HB = ?MODULE:to_list(H_HB),
+    2 = length(L_H_HB),
+    true = lists:member({'Content-Length', "47"}, L_H_HB),
+    true = lists:member({'Content-Type', "text/plain"}, L_H_HB),
+    HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ],
+    HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ],
+    HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ],
+    H_HL = ?MODULE:from_binary(HL),
+    H_HL = ?MODULE:from_binary(HL2),
+    H_HL = ?MODULE:from_binary(HL3),
+    "47" = ?MODULE:get_value("Content-Length", H_HL),
+    "text/plain" = ?MODULE:get_value("Content-Type", H_HL),
+    L_H_HL = ?MODULE:to_list(H_HL),
+    2 = length(L_H_HL),
+    true = lists:member({'Content-Length', "47"}, L_H_HL),
+    true = lists:member({'Content-Type', "text/plain"}, L_H_HL),
+    [] = ?MODULE:to_list(?MODULE:from_binary(<<>>)),
+    [] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)),
+    [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)),
+    [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)),
+    [] = ?MODULE:to_list(?MODULE:from_binary("")),
+    [] = ?MODULE:to_list(?MODULE:from_binary([<<>>])),
+    [] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])),
+    [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])),
+    [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])),
+    ok.
+
+tokenize_header_value_test() ->
+    ?assertEqual(["a quote in a \"quote\"."],
+                 tokenize_header_value("\"a quote in a \\\"quote\\\".\"")),
+    ?assertEqual(["abc"], tokenize_header_value("abc")),
+    ?assertEqual(["abc", "def"], tokenize_header_value("abc def")),
+    ?assertEqual(["abc", "def"], tokenize_header_value("abc , def")),
+    ?assertEqual(["abc", "def"], tokenize_header_value(",abc ,, def,,")),
+    ?assertEqual(["abc def"], tokenize_header_value("\"abc def\"      ")),
+    ?assertEqual(["abc, def"], tokenize_header_value("\"abc, def\"")),
+    ?assertEqual(["\\a\\$"], tokenize_header_value("\"\\a\\$\"")),
+    ?assertEqual(["abc def", "foo, bar", "12345", ""],
+                 tokenize_header_value("\"abc def\" \"foo, bar\" , 12345, \"\"")),
+    ?assertEqual(undefined,
+                 tokenize_header_value(undefined)),
+    ?assertEqual(undefined,
+                 tokenize_header_value("umatched quote\"")),
+    ?assertEqual(undefined,
+                 tokenize_header_value("\"unmatched quote")).
+
+-endif.

http://git-wip-us.apache.org/repos/asf/couchdb/blob/6fdb9e07/src/mochiweb/src/mochiweb_html.erl
----------------------------------------------------------------------
diff --git a/src/mochiweb/src/mochiweb_html.erl b/src/mochiweb/src/mochiweb_html.erl
new file mode 100644
index 0000000..965c846
--- /dev/null
+++ b/src/mochiweb/src/mochiweb_html.erl
@@ -0,0 +1,1332 @@
+%% @author Bob Ippolito <bo...@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Loosely tokenizes and generates parse trees for HTML 4.
+-module(mochiweb_html).
+-export([tokens/1, parse/1, parse_tokens/1, to_tokens/1, escape/1,
+         escape_attr/1, to_html/1]).
+
+%% This is a macro to placate syntax highlighters..
+-define(QUOTE, $\").
+-define(SQUOTE, $\').
+-define(ADV_COL(S, N),
+        S#decoder{column=N+S#decoder.column,
+                  offset=N+S#decoder.offset}).
+-define(INC_COL(S),
+        S#decoder{column=1+S#decoder.column,
+                  offset=1+S#decoder.offset}).
+-define(INC_LINE(S),
+        S#decoder{column=1,
+                  line=1+S#decoder.line,
+                  offset=1+S#decoder.offset}).
+-define(INC_CHAR(S, C),
+        case C of
+            $\n ->
+                S#decoder{column=1,
+                          line=1+S#decoder.line,
+                          offset=1+S#decoder.offset};
+            _ ->
+                S#decoder{column=1+S#decoder.column,
+                          offset=1+S#decoder.offset}
+        end).
+
+-define(IS_WHITESPACE(C),
+        (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
+-define(IS_LITERAL_SAFE(C),
+        ((C >= $A andalso C =< $Z) orelse (C >= $a andalso C =< $z)
+         orelse (C >= $0 andalso C =< $9))).
+-define(PROBABLE_CLOSE(C),
+        (C =:= $> orelse ?IS_WHITESPACE(C))).
+
+-record(decoder, {line=1,
+                  column=1,
+                  offset=0}).
+
+%% @type html_node() = {string(), [html_attr()], [html_node() | string()]}
+%% @type html_attr() = {string(), string()}
+%% @type html_token() = html_data() | start_tag() | end_tag() | inline_html() | html_comment() | html_doctype()
+%% @type html_data() = {data, string(), Whitespace::boolean()}
+%% @type start_tag() = {start_tag, Name, [html_attr()], Singleton::boolean()}
+%% @type end_tag() = {end_tag, Name}
+%% @type html_comment() = {comment, Comment}
+%% @type html_doctype() = {doctype, [Doctype]}
+%% @type inline_html() = {'=', iolist()}
+
+%% External API.
+
+%% @spec parse(string() | binary()) -> html_node()
+%% @doc tokenize and then transform the token stream into a HTML tree.
+parse(Input) ->
+    parse_tokens(tokens(Input)).
+
+%% @spec parse_tokens([html_token()]) -> html_node()
+%% @doc Transform the output of tokens(Doc) into a HTML tree.
+parse_tokens(Tokens) when is_list(Tokens) ->
+    %% Skip over doctype, processing instructions
+    F = fun (X) ->
+                case X of
+                    {start_tag, _, _, false} ->
+                        false;
+                    _ ->
+                        true
+                end
+        end,
+    [{start_tag, Tag, Attrs, false} | Rest] = lists:dropwhile(F, Tokens),
+    {Tree, _} = tree(Rest, [norm({Tag, Attrs})]),
+    Tree.
+
+%% @spec tokens(StringOrBinary) -> [html_token()]
+%% @doc Transform the input UTF-8 HTML into a token stream.
+tokens(Input) ->
+    tokens(iolist_to_binary(Input), #decoder{}, []).
+
+%% @spec to_tokens(html_node()) -> [html_token()]
+%% @doc Convert a html_node() tree to a list of tokens.
+to_tokens({Tag0}) ->
+    to_tokens({Tag0, [], []});
+to_tokens(T={'=', _}) ->
+    [T];
+to_tokens(T={doctype, _}) ->
+    [T];
+to_tokens(T={comment, _}) ->
+    [T];
+to_tokens({Tag0, Acc}) ->
+    %% This is only allowed in sub-tags: {p, [{"class", "foo"}]}
+    to_tokens({Tag0, [], Acc});
+to_tokens({Tag0, Attrs, Acc}) ->
+    Tag = to_tag(Tag0),
+    case is_singleton(Tag) of 
+        true ->
+            to_tokens([], [{start_tag, Tag, Attrs, true}]);
+        false ->
+            to_tokens([{Tag, Acc}], [{start_tag, Tag, Attrs, false}])
+    end.
+
+%% @spec to_html([html_token()] | html_node()) -> iolist()
+%% @doc Convert a list of html_token() to a HTML document.
+to_html(Node) when is_tuple(Node) ->
+    to_html(to_tokens(Node));
+to_html(Tokens) when is_list(Tokens) ->
+    to_html(Tokens, []).
+
+%% @spec escape(string() | atom() | binary()) -> binary()
+%% @doc Escape a string such that it's safe for HTML (amp; lt; gt;).
+escape(B) when is_binary(B) ->
+    escape(binary_to_list(B), []);
+escape(A) when is_atom(A) ->
+    escape(atom_to_list(A), []);
+escape(S) when is_list(S) ->
+    escape(S, []).
+
+%% @spec escape_attr(string() | binary() | atom() | integer() | float()) -> binary()
+%% @doc Escape a string such that it's safe for HTML attrs
+%%      (amp; lt; gt; quot;).
+escape_attr(B) when is_binary(B) ->
+    escape_attr(binary_to_list(B), []);
+escape_attr(A) when is_atom(A) ->
+    escape_attr(atom_to_list(A), []);
+escape_attr(S) when is_list(S) ->
+    escape_attr(S, []);
+escape_attr(I) when is_integer(I) ->
+    escape_attr(integer_to_list(I), []);
+escape_attr(F) when is_float(F) ->
+    escape_attr(mochinum:digits(F), []).
+
+to_html([], Acc) ->
+    lists:reverse(Acc);
+to_html([{'=', Content} | Rest], Acc) ->
+    to_html(Rest, [Content | Acc]);
+to_html([{pi, Bin} | Rest], Acc) ->
+    Open = [<<"<?">>,
+            Bin,
+            <<"?>">>],
+    to_html(Rest, [Open | Acc]);
+to_html([{pi, Tag, Attrs} | Rest], Acc) ->
+    Open = [<<"<?">>,
+            Tag,
+            attrs_to_html(Attrs, []),
+            <<"?>">>],
+    to_html(Rest, [Open | Acc]);
+to_html([{comment, Comment} | Rest], Acc) ->
+    to_html(Rest, [[<<"<!--">>, Comment, <<"-->">>] | Acc]);
+to_html([{doctype, Parts} | Rest], Acc) ->
+    Inside = doctype_to_html(Parts, Acc),
+    to_html(Rest, [[<<"<!DOCTYPE">>, Inside, <<">">>] | Acc]);
+to_html([{data, Data, _Whitespace} | Rest], Acc) ->
+    to_html(Rest, [escape(Data) | Acc]);
+to_html([{start_tag, Tag, Attrs, Singleton} | Rest], Acc) ->
+    Open = [<<"<">>,
+            Tag,
+            attrs_to_html(Attrs, []),
+            case Singleton of
+                true -> <<" />">>;
+                false -> <<">">>
+            end],
+    to_html(Rest, [Open | Acc]);
+to_html([{end_tag, Tag} | Rest], Acc) ->
+    to_html(Rest, [[<<"</">>, Tag, <<">">>] | Acc]).
+
+doctype_to_html([], Acc) ->
+    lists:reverse(Acc);
+doctype_to_html([Word | Rest], Acc) ->
+    case lists:all(fun (C) -> ?IS_LITERAL_SAFE(C) end,
+                   binary_to_list(iolist_to_binary(Word))) of
+        true ->
+            doctype_to_html(Rest, [[<<" ">>, Word] | Acc]);
+        false ->
+            doctype_to_html(Rest, [[<<" \"">>, escape_attr(Word), ?QUOTE] | Acc])
+    end.
+
+attrs_to_html([], Acc) ->
+    lists:reverse(Acc);
+attrs_to_html([{K, V} | Rest], Acc) ->
+    attrs_to_html(Rest,
+                  [[<<" ">>, escape(K), <<"=\"">>,
+                    escape_attr(V), <<"\"">>] | Acc]).
+
+escape([], Acc) ->
+    list_to_binary(lists:reverse(Acc));
+escape("<" ++ Rest, Acc) ->
+    escape(Rest, lists:reverse("&lt;", Acc));
+escape(">" ++ Rest, Acc) ->
+    escape(Rest, lists:reverse("&gt;", Acc));
+escape("&" ++ Rest, Acc) ->
+    escape(Rest, lists:reverse("&amp;", Acc));
+escape([C | Rest], Acc) ->
+    escape(Rest, [C | Acc]).
+
+escape_attr([], Acc) ->
+    list_to_binary(lists:reverse(Acc));
+escape_attr("<" ++ Rest, Acc) ->
+    escape_attr(Rest, lists:reverse("&lt;", Acc));
+escape_attr(">" ++ Rest, Acc) ->
+    escape_attr(Rest, lists:reverse("&gt;", Acc));
+escape_attr("&" ++ Rest, Acc) ->
+    escape_attr(Rest, lists:reverse("&amp;", Acc));
+escape_attr([?QUOTE | Rest], Acc) ->
+    escape_attr(Rest, lists:reverse("&quot;", Acc));
+escape_attr([C | Rest], Acc) ->
+    escape_attr(Rest, [C | Acc]).
+
+to_tag(A) when is_atom(A) ->
+    norm(atom_to_list(A));
+to_tag(L) ->
+    norm(L).
+
+to_tokens([], Acc) ->
+    lists:reverse(Acc);
+to_tokens([{Tag, []} | Rest], Acc) ->
+    to_tokens(Rest, [{end_tag, to_tag(Tag)} | Acc]);
+to_tokens([{Tag0, [{T0} | R1]} | Rest], Acc) ->
+    %% Allow {br}
+    to_tokens([{Tag0, [{T0, [], []} | R1]} | Rest], Acc);
+to_tokens([{Tag0, [T0={'=', _C0} | R1]} | Rest], Acc) ->
+    %% Allow {'=', iolist()}
+    to_tokens([{Tag0, R1} | Rest], [T0 | Acc]);
+to_tokens([{Tag0, [T0={comment, _C0} | R1]} | Rest], Acc) ->
+    %% Allow {comment, iolist()}
+    to_tokens([{Tag0, R1} | Rest], [T0 | Acc]);
+to_tokens([{Tag0, [T0={pi, _S0} | R1]} | Rest], Acc) ->
+    %% Allow {pi, binary()}
+    to_tokens([{Tag0, R1} | Rest], [T0 | Acc]);
+to_tokens([{Tag0, [T0={pi, _S0, _A0} | R1]} | Rest], Acc) ->
+    %% Allow {pi, binary(), list()}
+    to_tokens([{Tag0, R1} | Rest], [T0 | Acc]);
+to_tokens([{Tag0, [{T0, A0=[{_, _} | _]} | R1]} | Rest], Acc) ->
+    %% Allow {p, [{"class", "foo"}]}
+    to_tokens([{Tag0, [{T0, A0, []} | R1]} | Rest], Acc);
+to_tokens([{Tag0, [{T0, C0} | R1]} | Rest], Acc) ->
+    %% Allow {p, "content"} and {p, <<"content">>}
+    to_tokens([{Tag0, [{T0, [], C0} | R1]} | Rest], Acc);
+to_tokens([{Tag0, [{T0, A1, C0} | R1]} | Rest], Acc) when is_binary(C0) ->
+    %% Allow {"p", [{"class", "foo"}], <<"content">>}
+    to_tokens([{Tag0, [{T0, A1, binary_to_list(C0)} | R1]} | Rest], Acc);
+to_tokens([{Tag0, [{T0, A1, C0=[C | _]} | R1]} | Rest], Acc)
+  when is_integer(C) ->
+    %% Allow {"p", [{"class", "foo"}], "content"}
+    to_tokens([{Tag0, [{T0, A1, [C0]} | R1]} | Rest], Acc);
+to_tokens([{Tag0, [{T0, A1, C1} | R1]} | Rest], Acc) ->
+    %% Native {"p", [{"class", "foo"}], ["content"]}
+    Tag = to_tag(Tag0),
+    T1 = to_tag(T0),
+    case is_singleton(norm(T1)) of
+        true ->
+            to_tokens([{Tag, R1} | Rest], [{start_tag, T1, A1, true} | Acc]);
+        false ->
+            to_tokens([{T1, C1}, {Tag, R1} | Rest],
+                      [{start_tag, T1, A1, false} | Acc])
+    end;
+to_tokens([{Tag0, [L | R1]} | Rest], Acc) when is_list(L) ->
+    %% List text
+    Tag = to_tag(Tag0),
+    to_tokens([{Tag, R1} | Rest], [{data, iolist_to_binary(L), false} | Acc]);
+to_tokens([{Tag0, [B | R1]} | Rest], Acc) when is_binary(B) ->
+    %% Binary text
+    Tag = to_tag(Tag0),
+    to_tokens([{Tag, R1} | Rest], [{data, B, false} | Acc]).
+
+tokens(B, S=#decoder{offset=O}, Acc) ->
+    case B of
+        <<_:O/binary>> ->
+            lists:reverse(Acc);
+        _ ->
+            {Tag, S1} = tokenize(B, S),
+            case parse_flag(Tag) of
+                script ->
+                    {Tag2, S2} = tokenize_script(B, S1),
+                    tokens(B, S2, [Tag2, Tag | Acc]);
+                textarea ->
+                    {Tag2, S2} = tokenize_textarea(B, S1),
+                    tokens(B, S2, [Tag2, Tag | Acc]);
+                none ->
+                    tokens(B, S1, [Tag | Acc])
+            end
+    end.
+
+parse_flag({start_tag, B, _, false}) ->
+    case string:to_lower(binary_to_list(B)) of
+        "script" ->
+            script;
+        "textarea" ->
+            textarea;
+        _ ->
+            none
+    end;
+parse_flag(_) ->
+    none.
+
+tokenize(B, S=#decoder{offset=O}) ->
+    case B of
+        <<_:O/binary, "<!--", _/binary>> ->
+            tokenize_comment(B, ?ADV_COL(S, 4));
+        <<_:O/binary, "<!DOCTYPE", _/binary>> ->
+            tokenize_doctype(B, ?ADV_COL(S, 10));
+        <<_:O/binary, "<![CDATA[", _/binary>> ->
+            tokenize_cdata(B, ?ADV_COL(S, 9));
+        <<_:O/binary, "<?php", _/binary>> ->
+            {Body, S1} = raw_qgt(B, ?ADV_COL(S, 2)),
+            {{pi, Body}, S1};
+        <<_:O/binary, "<?", _/binary>> ->
+            {Tag, S1} = tokenize_literal(B, ?ADV_COL(S, 2)),
+            {Attrs, S2} = tokenize_attributes(B, S1),
+            S3 = find_qgt(B, S2),
+            {{pi, Tag, Attrs}, S3};
+        <<_:O/binary, "&", _/binary>> ->
+            tokenize_charref(B, ?INC_COL(S));
+        <<_:O/binary, "</", _/binary>> ->
+            {Tag, S1} = tokenize_literal(B, ?ADV_COL(S, 2)),
+            {S2, _} = find_gt(B, S1),
+            {{end_tag, Tag}, S2};
+        <<_:O/binary, "<", C, _/binary>> 
+                when ?IS_WHITESPACE(C); not ?IS_LITERAL_SAFE(C) ->
+            %% This isn't really strict HTML
+            {{data, Data, _Whitespace}, S1} = tokenize_data(B, ?INC_COL(S)),
+            {{data, <<$<, Data/binary>>, false}, S1};
+        <<_:O/binary, "<", _/binary>> ->
+            {Tag, S1} = tokenize_literal(B, ?INC_COL(S)),
+            {Attrs, S2} = tokenize_attributes(B, S1),
+            {S3, HasSlash} = find_gt(B, S2),
+            Singleton = HasSlash orelse is_singleton(Tag),
+            {{start_tag, Tag, Attrs, Singleton}, S3};
+        _ ->
+            tokenize_data(B, S)
+    end.
+
+tree_data([{data, Data, Whitespace} | Rest], AllWhitespace, Acc) ->
+    tree_data(Rest, (Whitespace andalso AllWhitespace), [Data | Acc]);
+tree_data(Rest, AllWhitespace, Acc) ->
+    {iolist_to_binary(lists:reverse(Acc)), AllWhitespace, Rest}.
+
+tree([], Stack) ->
+    {destack(Stack), []};
+tree([{end_tag, Tag} | Rest], Stack) ->
+    case destack(norm(Tag), Stack) of
+        S when is_list(S) ->
+            tree(Rest, S);
+        Result ->
+            {Result, []}
+    end;
+tree([{start_tag, Tag, Attrs, true} | Rest], S) ->
+    tree(Rest, append_stack_child(norm({Tag, Attrs}), S));
+tree([{start_tag, Tag, Attrs, false} | Rest], S) ->
+    tree(Rest, stack(norm({Tag, Attrs}), S));
+tree([T={pi, _Raw} | Rest], S) ->
+    tree(Rest, append_stack_child(T, S));
+tree([T={pi, _Tag, _Attrs} | Rest], S) ->
+    tree(Rest, append_stack_child(T, S));
+tree([T={comment, _Comment} | Rest], S) ->
+    tree(Rest, append_stack_child(T, S));
+tree(L=[{data, _Data, _Whitespace} | _], S) ->
+    case tree_data(L, true, []) of
+        {_, true, Rest} ->
+            tree(Rest, S);
+        {Data, false, Rest} ->
+            tree(Rest, append_stack_child(Data, S))
+    end;
+tree([{doctype, _} | Rest], Stack) ->
+    tree(Rest, Stack).
+
+norm({Tag, Attrs}) ->
+    {norm(Tag), [{norm(K), iolist_to_binary(V)} || {K, V} <- Attrs], []};
+norm(Tag) when is_binary(Tag) ->
+    Tag;
+norm(Tag) ->
+    list_to_binary(string:to_lower(Tag)).
+
+stack(T1={TN, _, _}, Stack=[{TN, _, _} | _Rest])
+  when TN =:= <<"li">> orelse TN =:= <<"option">> ->
+    [T1 | destack(TN, Stack)];
+stack(T1={TN0, _, _}, Stack=[{TN1, _, _} | _Rest])
+  when (TN0 =:= <<"dd">> orelse TN0 =:= <<"dt">>) andalso
+       (TN1 =:= <<"dd">> orelse TN1 =:= <<"dt">>) ->
+    [T1 | destack(TN1, Stack)];
+stack(T1, Stack) ->
+    [T1 | Stack].
+
+append_stack_child(StartTag, [{Name, Attrs, Acc} | Stack]) ->
+    [{Name, Attrs, [StartTag | Acc]} | Stack].
+
+destack(<<"br">>, Stack) ->
+    %% This is an ugly hack to make dumb_br_test() pass,
+    %% this makes it such that br can never have children.
+    Stack;
+destack(TagName, Stack) when is_list(Stack) ->
+    F = fun (X) ->
+                case X of
+                    {TagName, _, _} ->
+                        false;
+                    _ ->
+                        true
+                end
+        end,
+    case lists:splitwith(F, Stack) of
+        {_, []} ->
+            %% If we're parsing something like XML we might find
+            %% a <link>tag</link> that is normally a singleton
+            %% in HTML but isn't here
+            case {is_singleton(TagName), Stack} of
+                {true, [{T0, A0, Acc0} | Post0]} ->
+                    case lists:splitwith(F, Acc0) of
+                        {_, []} ->
+                            %% Actually was a singleton
+                            Stack;
+                        {Pre, [{T1, A1, Acc1} | Post1]} ->
+                            [{T0, A0, [{T1, A1, Acc1 ++ lists:reverse(Pre)} | Post1]}
+                             | Post0]
+                    end;
+                _ ->
+                    %% No match, no state change
+                    Stack
+            end;
+        {_Pre, [_T]} ->
+            %% Unfurl the whole stack, we're done
+            destack(Stack);
+        {Pre, [T, {T0, A0, Acc0} | Post]} ->
+            %% Unfurl up to the tag, then accumulate it
+            [{T0, A0, [destack(Pre ++ [T]) | Acc0]} | Post]
+    end.
+
+destack([{Tag, Attrs, Acc}]) ->
+    {Tag, Attrs, lists:reverse(Acc)};
+destack([{T1, A1, Acc1}, {T0, A0, Acc0} | Rest]) ->
+    destack([{T0, A0, [{T1, A1, lists:reverse(Acc1)} | Acc0]} | Rest]).
+
+is_singleton(<<"br">>) -> true;
+is_singleton(<<"hr">>) -> true;
+is_singleton(<<"img">>) -> true;
+is_singleton(<<"input">>) -> true;
+is_singleton(<<"base">>) -> true;
+is_singleton(<<"meta">>) -> true;
+is_singleton(<<"link">>) -> true;
+is_singleton(<<"area">>) -> true;
+is_singleton(<<"param">>) -> true;
+is_singleton(<<"col">>) -> true;
+is_singleton(_) -> false.
+
+tokenize_data(B, S=#decoder{offset=O}) ->
+    tokenize_data(B, S, O, true).
+
+tokenize_data(B, S=#decoder{offset=O}, Start, Whitespace) ->
+    case B of
+        <<_:O/binary, C, _/binary>> when (C =/= $< andalso C =/= $&) ->
+            tokenize_data(B, ?INC_CHAR(S, C), Start,
+                          (Whitespace andalso ?IS_WHITESPACE(C)));
+        _ ->
+            Len = O - Start,
+            <<_:Start/binary, Data:Len/binary, _/binary>> = B,
+            {{data, Data, Whitespace}, S}
+    end.
+
+tokenize_attributes(B, S) ->
+    tokenize_attributes(B, S, []).
+
+tokenize_attributes(B, S=#decoder{offset=O}, Acc) ->
+    case B of
+        <<_:O/binary>> ->
+            {lists:reverse(Acc), S};
+        <<_:O/binary, C, _/binary>> when (C =:= $> orelse C =:= $/) ->
+            {lists:reverse(Acc), S};
+        <<_:O/binary, "?>", _/binary>> ->
+            {lists:reverse(Acc), S};
+        <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
+            tokenize_attributes(B, ?INC_CHAR(S, C), Acc);
+        _ ->
+            {Attr, S1} = tokenize_literal(B, S),
+            {Value, S2} = tokenize_attr_value(Attr, B, S1),
+            tokenize_attributes(B, S2, [{Attr, Value} | Acc])
+    end.
+
+tokenize_attr_value(Attr, B, S) ->
+    S1 = skip_whitespace(B, S),
+    O = S1#decoder.offset,
+    case B of
+        <<_:O/binary, "=", _/binary>> ->
+            S2 = skip_whitespace(B, ?INC_COL(S1)),
+            tokenize_quoted_or_unquoted_attr_value(B, S2);
+        _ ->
+            {Attr, S1}
+    end.
+
+tokenize_quoted_or_unquoted_attr_value(B, S=#decoder{offset=O}) ->
+    case B of
+        <<_:O/binary>> ->
+            { [], S };
+        <<_:O/binary, Q, _/binary>> when Q =:= ?QUOTE orelse
+                                         Q =:= ?SQUOTE ->
+            tokenize_quoted_attr_value(B, ?INC_COL(S), [], Q);
+        <<_:O/binary, _/binary>> ->
+            tokenize_unquoted_attr_value(B, S, [])
+    end.
+
+tokenize_quoted_attr_value(B, S=#decoder{offset=O}, Acc, Q) ->
+    case B of
+        <<_:O/binary>> ->
+            { iolist_to_binary(lists:reverse(Acc)), S };
+        <<_:O/binary, $&, _/binary>> ->
+            {{data, Data, false}, S1} = tokenize_charref(B, ?INC_COL(S)),
+            tokenize_quoted_attr_value(B, S1, [Data|Acc], Q);
+        <<_:O/binary, Q, _/binary>> ->
+            { iolist_to_binary(lists:reverse(Acc)), ?INC_COL(S) };
+        <<_:O/binary, C, _/binary>> ->
+            tokenize_quoted_attr_value(B, ?INC_COL(S), [C|Acc], Q)
+    end.
+
+tokenize_unquoted_attr_value(B, S=#decoder{offset=O}, Acc) ->
+    case B of
+        <<_:O/binary>> ->
+            { iolist_to_binary(lists:reverse(Acc)), S };
+        <<_:O/binary, $&, _/binary>> ->
+            {{data, Data, false}, S1} = tokenize_charref(B, ?INC_COL(S)),
+            tokenize_unquoted_attr_value(B, S1, [Data|Acc]);
+        <<_:O/binary, $/, $>, _/binary>> ->
+            { iolist_to_binary(lists:reverse(Acc)), S };
+        <<_:O/binary, C, _/binary>> when ?PROBABLE_CLOSE(C) ->
+            { iolist_to_binary(lists:reverse(Acc)), S };
+        <<_:O/binary, C, _/binary>> ->
+            tokenize_unquoted_attr_value(B, ?INC_COL(S), [C|Acc])
+    end.
+
+skip_whitespace(B, S=#decoder{offset=O}) ->
+    case B of
+        <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
+            skip_whitespace(B, ?INC_CHAR(S, C));
+        _ ->
+            S
+    end.
+
+tokenize_literal(Bin, S=#decoder{offset=O}) ->
+    case Bin of
+        <<_:O/binary, C, _/binary>> when C =:= $>
+                                    orelse C =:= $/
+                                    orelse C =:= $= ->
+            %% Handle case where tokenize_literal would consume
+            %% 0 chars. http://github.com/mochi/mochiweb/pull/13
+            {[C], ?INC_COL(S)};
+        _ ->
+            tokenize_literal(Bin, S, [])
+    end.
+
+tokenize_literal(Bin, S=#decoder{offset=O}, Acc) ->
+    case Bin of
+        <<_:O/binary, $&, _/binary>> ->
+            {{data, Data, false}, S1} = tokenize_charref(Bin, ?INC_COL(S)),
+            tokenize_literal(Bin, S1, [Data | Acc]);
+        <<_:O/binary, C, _/binary>> when not (?IS_WHITESPACE(C)
+                                              orelse C =:= $>
+                                              orelse C =:= $/
+                                              orelse C =:= $=) ->
+            tokenize_literal(Bin, ?INC_COL(S), [C | Acc]);
+        _ ->
+            {iolist_to_binary(string:to_lower(lists:reverse(Acc))), S}
+    end.
+
+raw_qgt(Bin, S=#decoder{offset=O}) ->
+    raw_qgt(Bin, S, O).
+
+raw_qgt(Bin, S=#decoder{offset=O}, Start) ->
+    case Bin of
+        <<_:O/binary, "?>", _/binary>> ->
+            Len = O - Start,
+            <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
+            {Raw, ?ADV_COL(S, 2)};
+        <<_:O/binary, C, _/binary>> ->
+            raw_qgt(Bin, ?INC_CHAR(S, C), Start);
+        <<_:O/binary>> ->
+            <<_:Start/binary, Raw/binary>> = Bin,
+            {Raw, S}
+    end.
+
+find_qgt(Bin, S=#decoder{offset=O}) ->
+    case Bin of
+        <<_:O/binary, "?>", _/binary>> ->
+            ?ADV_COL(S, 2);
+        <<_:O/binary, ">", _/binary>> ->
+			?ADV_COL(S, 1);
+        <<_:O/binary, "/>", _/binary>> ->
+			?ADV_COL(S, 2);
+        %% tokenize_attributes takes care of this state:
+        %% <<_:O/binary, C, _/binary>> ->
+        %%     find_qgt(Bin, ?INC_CHAR(S, C));
+        <<_:O/binary>> ->
+            S
+    end.
+
+find_gt(Bin, S) ->
+    find_gt(Bin, S, false).
+
+find_gt(Bin, S=#decoder{offset=O}, HasSlash) ->
+    case Bin of
+        <<_:O/binary, $/, _/binary>> ->
+            find_gt(Bin, ?INC_COL(S), true);
+        <<_:O/binary, $>, _/binary>> ->
+            {?INC_COL(S), HasSlash};
+        <<_:O/binary, C, _/binary>> ->
+            find_gt(Bin, ?INC_CHAR(S, C), HasSlash);
+        _ ->
+            {S, HasSlash}
+    end.
+
+tokenize_charref(Bin, S=#decoder{offset=O}) ->
+    try
+        tokenize_charref(Bin, S, O)
+    catch
+        throw:invalid_charref ->
+            {{data, <<"&">>, false}, S}
+    end.
+
+tokenize_charref(Bin, S=#decoder{offset=O}, Start) ->
+    case Bin of
+        <<_:O/binary>> ->
+            throw(invalid_charref);
+        <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C)
+                                         orelse C =:= ?SQUOTE
+                                         orelse C =:= ?QUOTE
+                                         orelse C =:= $/
+                                         orelse C =:= $> ->
+            throw(invalid_charref);
+        <<_:O/binary, $;, _/binary>> ->
+            Len = O - Start,
+            <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
+            Data = case mochiweb_charref:charref(Raw) of
+                       undefined ->
+                           throw(invalid_charref);
+                       Unichar when is_integer(Unichar) ->
+                           mochiutf8:codepoint_to_bytes(Unichar);
+                       Unichars when is_list(Unichars) ->
+                           unicode:characters_to_binary(Unichars)
+                   end,
+            {{data, Data, false}, ?INC_COL(S)};
+        _ ->
+            tokenize_charref(Bin, ?INC_COL(S), Start)
+    end.
+
+tokenize_doctype(Bin, S) ->
+    tokenize_doctype(Bin, S, []).
+
+tokenize_doctype(Bin, S=#decoder{offset=O}, Acc) ->
+    case Bin of
+        <<_:O/binary>> ->
+            {{doctype, lists:reverse(Acc)}, S};
+        <<_:O/binary, $>, _/binary>> ->
+            {{doctype, lists:reverse(Acc)}, ?INC_COL(S)};
+        <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
+            tokenize_doctype(Bin, ?INC_CHAR(S, C), Acc);
+        _ ->
+            {Word, S1} = tokenize_word_or_literal(Bin, S),
+            tokenize_doctype(Bin, S1, [Word | Acc])
+    end.
+
+tokenize_word_or_literal(Bin, S=#decoder{offset=O}) ->
+    case Bin of
+        <<_:O/binary, C, _/binary>> when C =:= ?QUOTE orelse C =:= ?SQUOTE ->
+            tokenize_word(Bin, ?INC_COL(S), C);
+        <<_:O/binary, C, _/binary>> when not ?IS_WHITESPACE(C) ->
+            %% Sanity check for whitespace
+            tokenize_literal(Bin, S)
+    end.
+
+tokenize_word(Bin, S, Quote) ->
+    tokenize_word(Bin, S, Quote, []).
+
+tokenize_word(Bin, S=#decoder{offset=O}, Quote, Acc) ->
+    case Bin of
+        <<_:O/binary>> ->
+            {iolist_to_binary(lists:reverse(Acc)), S};
+        <<_:O/binary, Quote, _/binary>> ->
+            {iolist_to_binary(lists:reverse(Acc)), ?INC_COL(S)};
+        <<_:O/binary, $&, _/binary>> ->
+            {{data, Data, false}, S1} = tokenize_charref(Bin, ?INC_COL(S)),
+            tokenize_word(Bin, S1, Quote, [Data | Acc]);
+        <<_:O/binary, C, _/binary>> ->
+            tokenize_word(Bin, ?INC_CHAR(S, C), Quote, [C | Acc])
+    end.
+
+tokenize_cdata(Bin, S=#decoder{offset=O}) ->
+    tokenize_cdata(Bin, S, O).
+
+tokenize_cdata(Bin, S=#decoder{offset=O}, Start) ->
+    case Bin of
+        <<_:O/binary, "]]>", _/binary>> ->
+            Len = O - Start,
+            <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
+            {{data, Raw, false}, ?ADV_COL(S, 3)};
+        <<_:O/binary, C, _/binary>> ->
+            tokenize_cdata(Bin, ?INC_CHAR(S, C), Start);
+        _ ->
+            <<_:O/binary, Raw/binary>> = Bin,
+            {{data, Raw, false}, S}
+    end.
+
+tokenize_comment(Bin, S=#decoder{offset=O}) ->
+    tokenize_comment(Bin, S, O).
+
+tokenize_comment(Bin, S=#decoder{offset=O}, Start) ->
+    case Bin of
+        <<_:O/binary, "-->", _/binary>> ->
+            Len = O - Start,
+            <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
+            {{comment, Raw}, ?ADV_COL(S, 3)};
+        <<_:O/binary, C, _/binary>> ->
+            tokenize_comment(Bin, ?INC_CHAR(S, C), Start);
+        <<_:Start/binary, Raw/binary>> ->
+            {{comment, Raw}, S}
+    end.
+
+tokenize_script(Bin, S=#decoder{offset=O}) ->
+    tokenize_script(Bin, S, O).
+
+tokenize_script(Bin, S=#decoder{offset=O}, Start) ->
+    case Bin of
+        %% Just a look-ahead, we want the end_tag separately
+        <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, ZZ, _/binary>>
+        when (SS =:= $s orelse SS =:= $S) andalso
+             (CC =:= $c orelse CC =:= $C) andalso
+             (RR =:= $r orelse RR =:= $R) andalso
+             (II =:= $i orelse II =:= $I) andalso
+             (PP =:= $p orelse PP =:= $P) andalso
+             (TT=:= $t orelse TT =:= $T) andalso
+             ?PROBABLE_CLOSE(ZZ) ->
+            Len = O - Start,
+            <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
+            {{data, Raw, false}, S};
+        <<_:O/binary, C, _/binary>> ->
+            tokenize_script(Bin, ?INC_CHAR(S, C), Start);
+        <<_:Start/binary, Raw/binary>> ->
+            {{data, Raw, false}, S}
+    end.
+
+tokenize_textarea(Bin, S=#decoder{offset=O}) ->
+    tokenize_textarea(Bin, S, O).
+
+tokenize_textarea(Bin, S=#decoder{offset=O}, Start) ->
+    case Bin of
+        %% Just a look-ahead, we want the end_tag separately
+        <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, ZZ, _/binary>>
+        when (TT =:= $t orelse TT =:= $T) andalso
+             (EE =:= $e orelse EE =:= $E) andalso
+             (XX =:= $x orelse XX =:= $X) andalso
+             (TT2 =:= $t orelse TT2 =:= $T) andalso
+             (AA =:= $a orelse AA =:= $A) andalso
+             (RR =:= $r orelse RR =:= $R) andalso
+             (EE2 =:= $e orelse EE2 =:= $E) andalso
+             (AA2 =:= $a orelse AA2 =:= $A) andalso
+             ?PROBABLE_CLOSE(ZZ) ->
+            Len = O - Start,
+            <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
+            {{data, Raw, false}, S};
+        <<_:O/binary, C, _/binary>> ->
+            tokenize_textarea(Bin, ?INC_CHAR(S, C), Start);
+        <<_:Start/binary, Raw/binary>> ->
+            {{data, Raw, false}, S}
+    end.
+
+
+%%
+%% Tests
+%%
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+to_html_test() ->
+    ?assertEqual(
+       <<"<html><head><title>hey!</title></head><body><p class=\"foo\">what's up<br /></p><div>sucka</div>RAW!<!-- comment! --></body></html>">>,
+       iolist_to_binary(
+         to_html({html, [],
+                  [{<<"head">>, [],
+                    [{title, <<"hey!">>}]},
+                   {body, [],
+                    [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]},
+                     {'div', <<"sucka">>},
+                     {'=', <<"RAW!">>},
+                     {comment, <<" comment! ">>}]}]}))),
+    ?assertEqual(
+       <<"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">">>,
+       iolist_to_binary(
+         to_html({doctype,
+                  [<<"html">>, <<"PUBLIC">>,
+                   <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>,
+                   <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]}))),
+    ?assertEqual(
+       <<"<html><?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?></html>">>,
+       iolist_to_binary(
+         to_html({<<"html">>,[],
+                  [{pi, <<"xml:namespace">>,
+                    [{<<"prefix">>,<<"o">>},
+                     {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}]}))),
+    ok.
+
+escape_test() ->
+    ?assertEqual(
+       <<"&amp;quot;\"word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape(<<"&quot;\"word ><<up!&quot;">>)),
+    ?assertEqual(
+       <<"&amp;quot;\"word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape("&quot;\"word ><<up!&quot;")),
+    ?assertEqual(
+       <<"&amp;quot;\"word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape('&quot;\"word ><<up!&quot;')),
+    ok.
+
+escape_attr_test() ->
+    ?assertEqual(
+       <<"&amp;quot;&quot;word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape_attr(<<"&quot;\"word ><<up!&quot;">>)),
+    ?assertEqual(
+       <<"&amp;quot;&quot;word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape_attr("&quot;\"word ><<up!&quot;")),
+    ?assertEqual(
+       <<"&amp;quot;&quot;word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape_attr('&quot;\"word ><<up!&quot;')),
+    ?assertEqual(
+       <<"12345">>,
+       escape_attr(12345)),
+    ?assertEqual(
+       <<"1.5">>,
+       escape_attr(1.5)),
+    ok.
+
+tokens_test() ->
+    ?assertEqual(
+       [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
+                                {<<"wibble">>, <<"wibble">>},
+                                {<<"alice">>, <<"bob">>}], true}],
+       tokens(<<"<foo bar=baz wibble='wibble' alice=\"bob\"/>">>)),
+    ?assertEqual(
+       [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
+                                {<<"wibble">>, <<"wibble">>},
+                                {<<"alice">>, <<"bob">>}], true}],
+       tokens(<<"<foo bar=baz wibble='wibble' alice=bob/>">>)),
+    ?assertEqual(
+       [{comment, <<"[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]">>}],
+       tokens(<<"<!--[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]-->">>)),
+    ?assertEqual(
+       [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+        {data, <<" A= B <= C ">>, false},
+        {end_tag, <<"script">>}],
+       tokens(<<"<script type=\"text/javascript\"> A= B <= C </script>">>)),
+    ?assertEqual(
+       [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+        {data, <<" A= B <= C ">>, false},
+        {end_tag, <<"script">>}],
+       tokens(<<"<script type =\"text/javascript\"> A= B <= C </script>">>)),
+    ?assertEqual(
+       [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+        {data, <<" A= B <= C ">>, false},
+        {end_tag, <<"script">>}],
+       tokens(<<"<script type = \"text/javascript\"> A= B <= C </script>">>)),
+    ?assertEqual(
+       [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+        {data, <<" A= B <= C ">>, false},
+        {end_tag, <<"script">>}],
+       tokens(<<"<script type= \"text/javascript\"> A= B <= C </script>">>)),
+    ?assertEqual(
+       [{start_tag, <<"textarea">>, [], false},
+        {data, <<"<html></body>">>, false},
+        {end_tag, <<"textarea">>}],
+       tokens(<<"<textarea><html></body></textarea>">>)),
+    ?assertEqual(
+       [{start_tag, <<"textarea">>, [], false},
+        {data, <<"<html></body></textareaz>">>, false}],
+       tokens(<<"<textarea ><html></body></textareaz>">>)),
+    ?assertEqual(
+       [{pi, <<"xml:namespace">>,
+         [{<<"prefix">>,<<"o">>},
+          {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}],
+       tokens(<<"<?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?>">>)),
+    ?assertEqual(
+       [{pi, <<"xml:namespace">>,
+         [{<<"prefix">>,<<"o">>},
+          {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}],
+       tokens(<<"<?xml:namespace prefix=o ns=urn:schemas-microsoft-com:office:office \n?>">>)),
+    ?assertEqual(
+       [{pi, <<"xml:namespace">>,
+         [{<<"prefix">>,<<"o">>},
+          {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}],
+       tokens(<<"<?xml:namespace prefix=o ns=urn:schemas-microsoft-com:office:office">>)),
+    ?assertEqual(
+       [{data, <<"<">>, false}],
+       tokens(<<"&lt;">>)),
+    ?assertEqual(
+       [{data, <<"not html ">>, false},
+        {data, <<"< at all">>, false}],
+       tokens(<<"not html < at all">>)),
+    ok.
+
+parse_test() ->
+    D0 = <<"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">
+<html>
+ <head>
+   <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">
+   <title>Foo</title>
+   <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/rel/dojo/resources/dojo.css\" media=\"screen\">
+   <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/foo.css\" media=\"screen\">
+   <!--[if lt IE 7]>
+   <style type=\"text/css\">
+     .no_ie { display: none; }
+   </style>
+   <![endif]-->
+   <link rel=\"icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\">
+   <link rel=\"shortcut icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\">
+ </head>
+ <body id=\"home\" class=\"tundra\"><![CDATA[&lt;<this<!-- is -->CDATA>&gt;]]></body>
+</html>">>,
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"head">>, [],
+          [{<<"meta">>,
+            [{<<"http-equiv">>,<<"Content-Type">>},
+             {<<"content">>,<<"text/html; charset=UTF-8">>}],
+            []},
+           {<<"title">>,[],[<<"Foo">>]},
+           {<<"link">>,
+            [{<<"rel">>,<<"stylesheet">>},
+             {<<"type">>,<<"text/css">>},
+             {<<"href">>,<<"/static/rel/dojo/resources/dojo.css">>},
+             {<<"media">>,<<"screen">>}],
+            []},
+           {<<"link">>,
+            [{<<"rel">>,<<"stylesheet">>},
+             {<<"type">>,<<"text/css">>},
+             {<<"href">>,<<"/static/foo.css">>},
+             {<<"media">>,<<"screen">>}],
+            []},
+           {comment,<<"[if lt IE 7]>\n   <style type=\"text/css\">\n     .no_ie { display: none; }\n   </style>\n   <![endif]">>},
+           {<<"link">>,
+            [{<<"rel">>,<<"icon">>},
+             {<<"href">>,<<"/static/images/favicon.ico">>},
+             {<<"type">>,<<"image/x-icon">>}],
+            []},
+           {<<"link">>,
+            [{<<"rel">>,<<"shortcut icon">>},
+             {<<"href">>,<<"/static/images/favicon.ico">>},
+             {<<"type">>,<<"image/x-icon">>}],
+            []}]},
+         {<<"body">>,
+          [{<<"id">>,<<"home">>},
+           {<<"class">>,<<"tundra">>}],
+          [<<"&lt;<this<!-- is -->CDATA>&gt;">>]}]},
+       parse(D0)),
+    ?assertEqual(
+       {<<"html">>,[],
+        [{pi, <<"xml:namespace">>,
+          [{<<"prefix">>,<<"o">>},
+           {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}]},
+       parse(
+         <<"<html><?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?></html>">>)),
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"dd">>, [], [<<"foo">>]},
+         {<<"dt">>, [], [<<"bar">>]}]},
+       parse(<<"<html><dd>foo<dt>bar</html>">>)),
+    %% Singleton sadness
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"link">>, [], []},
+         <<"foo">>,
+         {<<"br">>, [], []},
+         <<"bar">>]},
+       parse(<<"<html><link>foo<br>bar</html>">>)),
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"link">>, [], [<<"foo">>,
+                           {<<"br">>, [], []},
+                           <<"bar">>]}]},
+       parse(<<"<html><link>foo<br>bar</link></html>">>)),
+    %% Case insensitive tags
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"head">>, [], [<<"foo">>,
+                           {<<"br">>, [], []},
+                           <<"BAR">>]},
+         {<<"body">>, [{<<"class">>, <<"">>}, {<<"bgcolor">>, <<"#Aa01fF">>}], []}
+        ]},
+       parse(<<"<html><Head>foo<bR>BAR</head><body Class=\"\" bgcolor=\"#Aa01fF\"></BODY></html>">>)),
+    ok.
+
+exhaustive_is_singleton_test() ->
+    T = mochiweb_cover:clause_lookup_table(?MODULE, is_singleton),
+    [?assertEqual(V, is_singleton(K)) || {K, V} <- T].
+
+tokenize_attributes_test() ->
+    ?assertEqual(
+       {<<"foo">>,
+        [{<<"bar">>, <<"b\"az">>},
+         {<<"wibble">>, <<"wibble">>},
+         {<<"taco", 16#c2, 16#a9>>, <<"bell">>},
+         {<<"quux">>, <<"quux">>}],
+        []},
+       parse(<<"<foo bar=\"b&quot;az\" wibble taco&copy;=bell quux">>)),
+    ok.
+
+tokens2_test() ->
+    D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org</link><description>Bob's Rants</description></channel>">>,
+    ?assertEqual(
+       [{start_tag,<<"channel">>,[],false},
+        {start_tag,<<"title">>,[],false},
+        {data,<<"from __future__ import *">>,false},
+        {end_tag,<<"title">>},
+        {start_tag,<<"link">>,[],true},
+        {data,<<"http://bob.pythonmac.org">>,false},
+        {end_tag,<<"link">>},
+        {start_tag,<<"description">>,[],false},
+        {data,<<"Bob's Rants">>,false},
+        {end_tag,<<"description">>},
+        {end_tag,<<"channel">>}],
+       tokens(D0)),
+    ok.
+
+to_tokens_test() ->
+    ?assertEqual(
+       [{start_tag, <<"p">>, [{class, 1}], false},
+        {end_tag, <<"p">>}],
+       to_tokens({p, [{class, 1}], []})),
+    ?assertEqual(
+       [{start_tag, <<"p">>, [], false},
+        {end_tag, <<"p">>}],
+       to_tokens({p})),
+    ?assertEqual(
+       [{'=', <<"data">>}],
+       to_tokens({'=', <<"data">>})),
+    ?assertEqual(
+       [{comment, <<"comment">>}],
+       to_tokens({comment, <<"comment">>})),
+    %% This is only allowed in sub-tags:
+    %% {p, [{"class", "foo"}]} as {p, [{"class", "foo"}], []}
+    %% On the outside it's always treated as follows:
+    %% {p, [], [{"class", "foo"}]} as {p, [], [{"class", "foo"}]}
+    ?assertEqual(
+       [{start_tag, <<"html">>, [], false},
+        {start_tag, <<"p">>, [{class, 1}], false},
+        {end_tag, <<"p">>},
+        {end_tag, <<"html">>}],
+       to_tokens({html, [{p, [{class, 1}]}]})),
+    ok.
+
+parse2_test() ->
+    D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org<br>foo</link><description>Bob's Rants</description></channel>">>,
+    ?assertEqual(
+       {<<"channel">>,[],
+        [{<<"title">>,[],[<<"from __future__ import *">>]},
+         {<<"link">>,[],[
+                         <<"http://bob.pythonmac.org">>,
+                         {<<"br">>,[],[]},
+                         <<"foo">>]},
+         {<<"description">>,[],[<<"Bob's Rants">>]}]},
+       parse(D0)),
+    ok.
+
+parse_tokens_test() ->
+    D0 = [{doctype,[<<"HTML">>,<<"PUBLIC">>,<<"-//W3C//DTD HTML 4.01 Transitional//EN">>]},
+          {data,<<"\n">>,true},
+          {start_tag,<<"html">>,[],false}],
+    ?assertEqual(
+       {<<"html">>, [], []},
+       parse_tokens(D0)),
+    D1 = D0 ++ [{end_tag, <<"html">>}],
+    ?assertEqual(
+       {<<"html">>, [], []},
+       parse_tokens(D1)),
+    D2 = D0 ++ [{start_tag, <<"body">>, [], false}],
+    ?assertEqual(
+       {<<"html">>, [], [{<<"body">>, [], []}]},
+       parse_tokens(D2)),
+    D3 = D0 ++ [{start_tag, <<"head">>, [], false},
+                {end_tag, <<"head">>},
+                {start_tag, <<"body">>, [], false}],
+    ?assertEqual(
+       {<<"html">>, [], [{<<"head">>, [], []}, {<<"body">>, [], []}]},
+       parse_tokens(D3)),
+    D4 = D3 ++ [{data,<<"\n">>,true},
+                {start_tag,<<"div">>,[{<<"class">>,<<"a">>}],false},
+                {start_tag,<<"a">>,[{<<"name">>,<<"#anchor">>}],false},
+                {end_tag,<<"a">>},
+                {end_tag,<<"div">>},
+                {start_tag,<<"div">>,[{<<"class">>,<<"b">>}],false},
+                {start_tag,<<"div">>,[{<<"class">>,<<"c">>}],false},
+                {end_tag,<<"div">>},
+                {end_tag,<<"div">>}],
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"head">>, [], []},
+         {<<"body">>, [],
+          [{<<"div">>, [{<<"class">>, <<"a">>}], [{<<"a">>, [{<<"name">>, <<"#anchor">>}], []}]},
+           {<<"div">>, [{<<"class">>, <<"b">>}], [{<<"div">>, [{<<"class">>, <<"c">>}], []}]}
+          ]}]},
+       parse_tokens(D4)),
+    D5 = [{start_tag,<<"html">>,[],false},
+          {data,<<"\n">>,true},
+          {data,<<"boo">>,false},
+          {data,<<"hoo">>,false},
+          {data,<<"\n">>,true},
+          {end_tag,<<"html">>}],
+    ?assertEqual(
+       {<<"html">>, [], [<<"\nboohoo\n">>]},
+       parse_tokens(D5)),
+    D6 = [{start_tag,<<"html">>,[],false},
+          {data,<<"\n">>,true},
+          {data,<<"\n">>,true},
+          {end_tag,<<"html">>}],
+    ?assertEqual(
+       {<<"html">>, [], []},
+       parse_tokens(D6)),
+    D7 = [{start_tag,<<"html">>,[],false},
+          {start_tag,<<"ul">>,[],false},
+          {start_tag,<<"li">>,[],false},
+          {data,<<"word">>,false},
+          {start_tag,<<"li">>,[],false},
+          {data,<<"up">>,false},
+          {end_tag,<<"li">>},
+          {start_tag,<<"li">>,[],false},
+          {data,<<"fdsa">>,false},
+          {start_tag,<<"br">>,[],true},
+          {data,<<"asdf">>,false},
+          {end_tag,<<"ul">>},
+          {end_tag,<<"html">>}],
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"ul">>, [],
+          [{<<"li">>, [], [<<"word">>]},
+           {<<"li">>, [], [<<"up">>]},
+           {<<"li">>, [], [<<"fdsa">>,{<<"br">>, [], []}, <<"asdf">>]}]}]},
+       parse_tokens(D7)),
+    ok.
+
+destack_test() ->
+    {<<"a">>, [], []} =
+        destack([{<<"a">>, [], []}]),
+    {<<"a">>, [], [{<<"b">>, [], []}]} =
+        destack([{<<"b">>, [], []}, {<<"a">>, [], []}]),
+    {<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]} =
+     destack([{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]),
+    [{<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]}] =
+     destack(<<"b">>,
+             [{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]),
+    [{<<"b">>, [], [{<<"c">>, [], []}]}, {<<"a">>, [], []}] =
+     destack(<<"c">>,
+             [{<<"c">>, [], []}, {<<"b">>, [], []},{<<"a">>, [], []}]),
+    ok.
+
+doctype_test() ->
+    ?assertEqual(
+       {<<"html">>,[],[{<<"head">>,[],[]}]},
+       mochiweb_html:parse("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">"
+                           "<html><head></head></body></html>")),
+    %% http://code.google.com/p/mochiweb/issues/detail?id=52
+    ?assertEqual(
+       {<<"html">>,[],[{<<"head">>,[],[]}]},
+       mochiweb_html:parse("<html>"
+                           "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">"
+                           "<head></head></body></html>")),
+    %% http://github.com/mochi/mochiweb/pull/13
+    ?assertEqual(
+       {<<"html">>,[],[{<<"head">>,[],[]}]},
+       mochiweb_html:parse("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"/>"
+                           "<html>"
+                           "<head></head></body></html>")),
+    ok.
+
+dumb_br_test() ->
+    %% http://code.google.com/p/mochiweb/issues/detail?id=71
+    ?assertEqual(
+       {<<"div">>,[],[{<<"br">>, [], []}, {<<"br">>, [], []}, <<"z">>]},
+       mochiweb_html:parse("<div><br/><br/>z</br/></br/></div>")),
+    ?assertEqual(
+       {<<"div">>,[],[{<<"br">>, [], []}, {<<"br">>, [], []}, <<"z">>]},
+       mochiweb_html:parse("<div><br><br>z</br/></br/></div>")),
+    ?assertEqual(
+       {<<"div">>,[],[{<<"br">>, [], []}, {<<"br">>, [], []}, <<"z">>, {<<"br">>, [], []}, {<<"br">>, [], []}]},
+       mochiweb_html:parse("<div><br><br>z<br/><br/></div>")),
+    ?assertEqual(
+       {<<"div">>,[],[{<<"br">>, [], []}, {<<"br">>, [], []}, <<"z">>]},
+       mochiweb_html:parse("<div><br><br>z</br></br></div>")).
+
+
+php_test() ->
+    %% http://code.google.com/p/mochiweb/issues/detail?id=71
+    ?assertEqual(
+       [{pi, <<"php\n">>}],
+       mochiweb_html:tokens(
+         "<?php\n?>")),
+    ?assertEqual(
+       {<<"div">>, [], [{pi, <<"php\n">>}]},
+       mochiweb_html:parse(
+         "<div><?php\n?></div>")),
+    ok.
+
+parse_unquoted_attr_test() ->
+    D0 = <<"<html><img src=/images/icon.png/></html>">>,
+    ?assertEqual(
+        {<<"html">>,[],[
+            { <<"img">>, [ { <<"src">>, <<"/images/icon.png">> } ], [] }
+        ]},
+        mochiweb_html:parse(D0)),
+
+    D1 = <<"<html><img src=/images/icon.png></img></html>">>,
+        ?assertEqual(
+            {<<"html">>,[],[
+                { <<"img">>, [ { <<"src">>, <<"/images/icon.png">> } ], [] }
+            ]},
+            mochiweb_html:parse(D1)),
+
+    D2 = <<"<html><img src=/images/icon&gt;.png width=100></img></html>">>,
+        ?assertEqual(
+            {<<"html">>,[],[
+                { <<"img">>, [ { <<"src">>, <<"/images/icon>.png">> }, { <<"width">>, <<"100">> } ], [] }
+            ]},
+            mochiweb_html:parse(D2)),
+    ok.
+
+parse_quoted_attr_test() ->
+    D0 = <<"<html><img src='/images/icon.png'></html>">>,
+    ?assertEqual(
+        {<<"html">>,[],[
+            { <<"img">>, [ { <<"src">>, <<"/images/icon.png">> } ], [] }
+        ]},
+        mochiweb_html:parse(D0)),
+
+    D1 = <<"<html><img src=\"/images/icon.png'></html>">>,
+    ?assertEqual(
+        {<<"html">>,[],[
+            { <<"img">>, [ { <<"src">>, <<"/images/icon.png'></html>">> } ], [] }
+        ]},
+        mochiweb_html:parse(D1)),
+
+    D2 = <<"<html><img src=\"/images/icon&gt;.png\"></html>">>,
+    ?assertEqual(
+        {<<"html">>,[],[
+            { <<"img">>, [ { <<"src">>, <<"/images/icon>.png">> } ], [] }
+        ]},
+        mochiweb_html:parse(D2)),
+
+    %% Quoted attributes can contain whitespace and newlines
+    D3 = <<"<html><a href=\"#\" onclick=\"javascript: test(1,\ntrue);\"></html>">>,
+    ?assertEqual(
+        {<<"html">>,[],[
+            { <<"a">>, [ { <<"href">>, <<"#">> }, {<<"onclick">>, <<"javascript: test(1,\ntrue);">>} ], [] }
+        ]},
+        mochiweb_html:parse(D3)),     
+    ok.
+
+parse_missing_attr_name_test() ->
+    D0 = <<"<html =black></html>">>,
+    ?assertEqual(
+        {<<"html">>, [ { <<"=">>, <<"=">> }, { <<"black">>, <<"black">> } ], [] },
+       mochiweb_html:parse(D0)),
+    ok.
+
+parse_broken_pi_test() ->
+	D0 = <<"<html><?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" /></html>">>,
+	?assertEqual(
+		{<<"html">>, [], [
+			{ pi, <<"xml:namespace">>, [ { <<"prefix">>, <<"o">> },
+			                             { <<"ns">>, <<"urn:schemas-microsoft-com:office:office">> } ] }
+		] },
+		mochiweb_html:parse(D0)),
+	ok.
+
+parse_funny_singletons_test() ->
+	D0 = <<"<html><input><input>x</input></input></html>">>,
+	?assertEqual(
+		{<<"html">>, [], [
+			{ <<"input">>, [], [] },
+			{ <<"input">>, [], [ <<"x">> ] }
+		] },
+		mochiweb_html:parse(D0)),
+	ok.
+
+to_html_singleton_test() ->
+    D0 = <<"<link />">>,
+    T0 = {<<"link">>,[],[]},
+    ?assertEqual(D0, iolist_to_binary(to_html(T0))),
+
+    D1 = <<"<head><link /></head>">>,
+    T1 = {<<"head">>,[],[{<<"link">>,[],[]}]},
+    ?assertEqual(D1, iolist_to_binary(to_html(T1))),
+
+    D2 = <<"<head><link /><link /></head>">>,
+    T2 = {<<"head">>,[],[{<<"link">>,[],[]}, {<<"link">>,[],[]}]},
+    ?assertEqual(D2, iolist_to_binary(to_html(T2))),
+
+    %% Make sure singletons are converted to singletons.
+    D3 = <<"<head><link /></head>">>,
+    T3 = {<<"head">>,[],[{<<"link">>,[],[<<"funny">>]}]},
+    ?assertEqual(D3, iolist_to_binary(to_html(T3))),
+
+    D4 = <<"<link />">>,
+    T4 = {<<"link">>,[],[<<"funny">>]},
+    ?assertEqual(D4, iolist_to_binary(to_html(T4))),
+
+    ok.
+
+parse_amp_test_() ->
+    [?_assertEqual(
+       {<<"html">>,[],
+        [{<<"body">>,[{<<"onload">>,<<"javascript:A('1&2')">>}],[]}]},
+       mochiweb_html:parse("<html><body onload=\"javascript:A('1&2')\"></body></html>")),
+     ?_assertEqual(
+        {<<"html">>,[],
+         [{<<"body">>,[{<<"onload">>,<<"javascript:A('1& 2')">>}],[]}]},
+        mochiweb_html:parse("<html><body onload=\"javascript:A('1& 2')\"></body></html>")),
+     ?_assertEqual(
+        {<<"html">>,[],
+         [{<<"body">>,[],[<<"& ">>]}]},
+        mochiweb_html:parse("<html><body>& </body></html>")),
+     ?_assertEqual(
+        {<<"html">>,[],
+         [{<<"body">>,[],[<<"&">>]}]},
+        mochiweb_html:parse("<html><body>&</body></html>"))].
+
+parse_unescaped_lt_test() ->
+    D1 = <<"<div> < < <a href=\"/\">Back</a></div>">>,
+    ?assertEqual(
+        {<<"div">>, [], [<<" < < ">>, {<<"a">>, [{<<"href">>, <<"/">>}], 
+                                       [<<"Back">>]}]},
+        mochiweb_html:parse(D1)),
+
+    D2 = <<"<div> << <a href=\"/\">Back</a></div>">>,
+    ?assertEqual(
+        {<<"div">>, [], [<<" << ">>, {<<"a">>, [{<<"href">>, <<"/">>}], 
+                                      [<<"Back">>]}]},
+    mochiweb_html:parse(D2)).
+
+-endif.

http://git-wip-us.apache.org/repos/asf/couchdb/blob/6fdb9e07/src/mochiweb/src/mochiweb_http.erl
----------------------------------------------------------------------
diff --git a/src/mochiweb/src/mochiweb_http.erl b/src/mochiweb/src/mochiweb_http.erl
new file mode 100644
index 0000000..c9acda8
--- /dev/null
+++ b/src/mochiweb/src/mochiweb_http.erl
@@ -0,0 +1,256 @@
+%% @author Bob Ippolito <bo...@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc HTTP server.
+
+-module(mochiweb_http).
+-author('bob@mochimedia.com').
+-export([start/1, start_link/1, stop/0, stop/1]).
+-export([loop/2]).
+-export([after_response/2, reentry/1]).
+-export([parse_range_request/1, range_skip_length/2]).
+
+-define(REQUEST_RECV_TIMEOUT, 300000).   %% timeout waiting for request line
+-define(HEADERS_RECV_TIMEOUT, 30000).    %% timeout waiting for headers
+
+-define(MAX_HEADERS, 1000).
+-define(DEFAULTS, [{name, ?MODULE},
+                   {port, 8888}]).
+
+parse_options(Options) ->
+    {loop, HttpLoop} = proplists:lookup(loop, Options),
+    Loop = {?MODULE, loop, [HttpLoop]},
+    Options1 = [{loop, Loop} | proplists:delete(loop, Options)],
+    mochilists:set_defaults(?DEFAULTS, Options1).
+
+stop() ->
+    mochiweb_socket_server:stop(?MODULE).
+
+stop(Name) ->
+    mochiweb_socket_server:stop(Name).
+
+%% @spec start(Options) -> ServerRet
+%%     Options = [option()]
+%%     Option = {name, atom()} | {ip, string() | tuple()} | {backlog, integer()}
+%%              | {nodelay, boolean()} | {acceptor_pool_size, integer()}
+%%              | {ssl, boolean()} | {profile_fun, undefined | (Props) -> ok}
+%%              | {link, false}
+%% @doc Start a mochiweb server.
+%%      profile_fun is used to profile accept timing.
+%%      After each accept, if defined, profile_fun is called with a proplist of a subset of the mochiweb_socket_server state and timing information.
+%%      The proplist is as follows: [{name, Name}, {port, Port}, {active_sockets, ActiveSockets}, {timing, Timing}].
+%% @end
+start(Options) ->
+    mochiweb_socket_server:start(parse_options(Options)).
+
+start_link(Options) ->
+    mochiweb_socket_server:start_link(parse_options(Options)).
+
+loop(Socket, Body) ->
+    ok = mochiweb_socket:setopts(Socket, [{packet, http}]),
+    request(Socket, Body).
+
+request(Socket, Body) ->
+    ok = mochiweb_socket:setopts(Socket, [{active, once}]),
+    receive
+        {Protocol, _, {http_request, Method, Path, Version}} when Protocol == http orelse Protocol == ssl ->
+            ok = mochiweb_socket:setopts(Socket, [{packet, httph}]),
+            headers(Socket, {Method, Path, Version}, [], Body, 0);
+        {Protocol, _, {http_error, "\r\n"}} when Protocol == http orelse Protocol == ssl ->
+            request(Socket, Body);
+        {Protocol, _, {http_error, "\n"}} when Protocol == http orelse Protocol == ssl ->
+            request(Socket, Body);
+        {tcp_closed, _} ->
+            mochiweb_socket:close(Socket),
+            exit(normal);
+        {ssl_closed, _} ->
+            mochiweb_socket:close(Socket),
+            exit(normal);
+        _Other ->
+            handle_invalid_request(Socket)
+    after ?REQUEST_RECV_TIMEOUT ->
+        mochiweb_socket:close(Socket),
+        exit(normal)
+    end.
+
+reentry(Body) ->
+    fun (Req) ->
+            ?MODULE:after_response(Body, Req)
+    end.
+
+headers(Socket, Request, Headers, _Body, ?MAX_HEADERS) ->
+    %% Too many headers sent, bad request.
+    ok = mochiweb_socket:setopts(Socket, [{packet, raw}]),
+    handle_invalid_request(Socket, Request, Headers);
+headers(Socket, Request, Headers, Body, HeaderCount) ->
+    ok = mochiweb_socket:setopts(Socket, [{active, once}]),
+    receive
+        {Protocol, _, http_eoh} when Protocol == http orelse Protocol == ssl ->
+            Req = new_request(Socket, Request, Headers),
+            call_body(Body, Req),
+            ?MODULE:after_response(Body, Req);
+        {Protocol, _, {http_header, _, Name, _, Value}} when Protocol == http orelse Protocol == ssl ->
+            headers(Socket, Request, [{Name, Value} | Headers], Body,
+                    1 + HeaderCount);
+        {tcp_closed, _} ->
+            mochiweb_socket:close(Socket),
+            exit(normal);
+        _Other ->
+            handle_invalid_request(Socket, Request, Headers)
+    after ?HEADERS_RECV_TIMEOUT ->
+        mochiweb_socket:close(Socket),
+        exit(normal)
+    end.
+
+call_body({M, F, A}, Req) ->
+    erlang:apply(M, F, [Req | A]);
+call_body({M, F}, Req) ->
+    M:F(Req);
+call_body(Body, Req) ->
+    Body(Req).
+
+-spec handle_invalid_request(term()) -> no_return().
+handle_invalid_request(Socket) ->
+    handle_invalid_request(Socket, {'GET', {abs_path, "/"}, {0,9}}, []),
+    exit(normal).
+
+-spec handle_invalid_request(term(), term(), term()) -> no_return().
+handle_invalid_request(Socket, Request, RevHeaders) ->
+    Req = new_request(Socket, Request, RevHeaders),
+    Req:respond({400, [], []}),
+    mochiweb_socket:close(Socket),
+    exit(normal).
+
+new_request(Socket, Request, RevHeaders) ->
+    ok = mochiweb_socket:setopts(Socket, [{packet, raw}]),
+    mochiweb:new_request({Socket, Request, lists:reverse(RevHeaders)}).
+
+after_response(Body, Req) ->
+    Socket = Req:get(socket),
+    case Req:should_close() of
+        true ->
+            mochiweb_socket:close(Socket),
+            exit(normal);
+        false ->
+            Req:cleanup(),
+            erlang:garbage_collect(),
+            ?MODULE:loop(Socket, Body)
+    end.
+
+parse_range_request("bytes=0-") ->
+    undefined;
+parse_range_request(RawRange) when is_list(RawRange) ->
+    try
+        "bytes=" ++ RangeString = RawRange,
+        Ranges = string:tokens(RangeString, ","),
+        lists:map(fun ("-" ++ V)  ->
+                          {none, list_to_integer(V)};
+                      (R) ->
+                          case string:tokens(R, "-") of
+                              [S1, S2] ->
+                                  {list_to_integer(S1), list_to_integer(S2)};
+                              [S] ->
+                                  {list_to_integer(S), none}
+                          end
+                  end,
+                  Ranges)
+    catch
+        _:_ ->
+            fail
+    end.
+
+range_skip_length(Spec, Size) ->
+    case Spec of
+        {none, R} when R =< Size, R >= 0 ->
+            {Size - R, R};
+        {none, _OutOfRange} ->
+            {0, Size};
+        {R, none} when R >= 0, R < Size ->
+            {R, Size - R};
+        {_OutOfRange, none} ->
+            invalid_range;
+        {Start, End} when 0 =< Start, Start =< End, End < Size ->
+            {Start, End - Start + 1};
+        {Start, End} when 0 =< Start, Start =< End, End >= Size ->
+            {Start, Size - Start};
+        {_OutOfRange, _End} ->
+            invalid_range
+    end.
+
+%%
+%% Tests
+%%
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+range_test() ->
+    %% valid, single ranges
+    ?assertEqual([{20, 30}], parse_range_request("bytes=20-30")),
+    ?assertEqual([{20, none}], parse_range_request("bytes=20-")),
+    ?assertEqual([{none, 20}], parse_range_request("bytes=-20")),
+
+    %% trivial single range
+    ?assertEqual(undefined, parse_range_request("bytes=0-")),
+
+    %% invalid, single ranges
+    ?assertEqual(fail, parse_range_request("")),
+    ?assertEqual(fail, parse_range_request("garbage")),
+    ?assertEqual(fail, parse_range_request("bytes=-20-30")),
+
+    %% valid, multiple range
+    ?assertEqual(
+       [{20, 30}, {50, 100}, {110, 200}],
+       parse_range_request("bytes=20-30,50-100,110-200")),
+    ?assertEqual(
+       [{20, none}, {50, 100}, {none, 200}],
+       parse_range_request("bytes=20-,50-100,-200")),
+
+    %% no ranges
+    ?assertEqual([], parse_range_request("bytes=")),
+    ok.
+
+range_skip_length_test() ->
+    Body = <<"012345678901234567890123456789012345678901234567890123456789">>,
+    BodySize = byte_size(Body), %% 60
+    BodySize = 60,
+
+    %% these values assume BodySize =:= 60
+    ?assertEqual({1,9}, range_skip_length({1,9}, BodySize)), %% 1-9
+    ?assertEqual({10,10}, range_skip_length({10,19}, BodySize)), %% 10-19
+    ?assertEqual({40, 20}, range_skip_length({none, 20}, BodySize)), %% -20
+    ?assertEqual({30, 30}, range_skip_length({30, none}, BodySize)), %% 30-
+
+    %% valid edge cases for range_skip_length
+    ?assertEqual({BodySize, 0}, range_skip_length({none, 0}, BodySize)),
+    ?assertEqual({0, BodySize}, range_skip_length({none, BodySize}, BodySize)),
+    ?assertEqual({0, BodySize}, range_skip_length({0, none}, BodySize)),
+    BodySizeLess1 = BodySize - 1,
+    ?assertEqual({BodySizeLess1, 1},
+                 range_skip_length({BodySize - 1, none}, BodySize)),
+    ?assertEqual({BodySizeLess1, 1},
+                 range_skip_length({BodySize - 1, BodySize+5}, BodySize)),
+    ?assertEqual({BodySizeLess1, 1},
+                 range_skip_length({BodySize - 1, BodySize}, BodySize)),
+
+    %% out of range, return whole thing
+    ?assertEqual({0, BodySize},
+                 range_skip_length({none, BodySize + 1}, BodySize)),
+    ?assertEqual({0, BodySize},
+                 range_skip_length({none, -1}, BodySize)),
+    ?assertEqual({0, BodySize},
+                 range_skip_length({0, BodySize + 1}, BodySize)),
+
+    %% invalid ranges
+    ?assertEqual(invalid_range,
+                 range_skip_length({-1, 30}, BodySize)),
+    ?assertEqual(invalid_range,
+                 range_skip_length({-1, BodySize + 1}, BodySize)),
+    ?assertEqual(invalid_range,
+                 range_skip_length({BodySize, 40}, BodySize)),
+    ?assertEqual(invalid_range,
+                 range_skip_length({-1, none}, BodySize)),
+    ?assertEqual(invalid_range,
+                 range_skip_length({BodySize, none}, BodySize)),
+    ok.
+
+-endif.

http://git-wip-us.apache.org/repos/asf/couchdb/blob/6fdb9e07/src/mochiweb/src/mochiweb_io.erl
----------------------------------------------------------------------
diff --git a/src/mochiweb/src/mochiweb_io.erl b/src/mochiweb/src/mochiweb_io.erl
new file mode 100644
index 0000000..8454b43
--- /dev/null
+++ b/src/mochiweb/src/mochiweb_io.erl
@@ -0,0 +1,43 @@
+%% @author Bob Ippolito <bo...@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Utilities for dealing with IO devices (open files).
+
+-module(mochiweb_io).
+-author('bob@mochimedia.com').
+
+-export([iodevice_stream/3, iodevice_stream/2]).
+-export([iodevice_foldl/4, iodevice_foldl/3]).
+-export([iodevice_size/1]).
+-define(READ_SIZE, 8192).
+
+iodevice_foldl(F, Acc, IoDevice) ->
+    iodevice_foldl(F, Acc, IoDevice, ?READ_SIZE).
+
+iodevice_foldl(F, Acc, IoDevice, BufferSize) ->
+    case file:read(IoDevice, BufferSize) of
+        eof ->
+            Acc;
+        {ok, Data} ->
+            iodevice_foldl(F, F(Data, Acc), IoDevice, BufferSize)
+    end.
+
+iodevice_stream(Callback, IoDevice) ->
+    iodevice_stream(Callback, IoDevice, ?READ_SIZE).
+
+iodevice_stream(Callback, IoDevice, BufferSize) ->
+    F = fun (Data, ok) -> Callback(Data) end,
+    ok = iodevice_foldl(F, ok, IoDevice, BufferSize).
+
+iodevice_size(IoDevice) ->
+    {ok, Size} = file:position(IoDevice, eof),
+    {ok, 0} = file:position(IoDevice, bof),
+    Size.
+
+
+%%
+%% Tests
+%%
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-endif.

http://git-wip-us.apache.org/repos/asf/couchdb/blob/6fdb9e07/src/mochiweb/src/mochiweb_mime.erl
----------------------------------------------------------------------
diff --git a/src/mochiweb/src/mochiweb_mime.erl b/src/mochiweb/src/mochiweb_mime.erl
new file mode 100644
index 0000000..7d9f249
--- /dev/null
+++ b/src/mochiweb/src/mochiweb_mime.erl
@@ -0,0 +1,415 @@
+%% @author Bob Ippolito <bo...@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Gives a good MIME type guess based on file extension.
+
+-module(mochiweb_mime).
+-author('bob@mochimedia.com').
+-export([from_extension/1]).
+
+%% @spec from_extension(S::string()) -> string() | undefined
+%% @doc Given a filename extension (e.g. ".html") return a guess for the MIME
+%%      type such as "text/html". Will return the atom undefined if no good
+%%      guess is available.
+
+from_extension(".stl") ->
+    "application/SLA";
+from_extension(".stp") ->
+    "application/STEP";
+from_extension(".step") ->
+    "application/STEP";
+from_extension(".dwg") ->
+    "application/acad";
+from_extension(".ez") ->
+    "application/andrew-inset";
+from_extension(".ccad") ->
+    "application/clariscad";
+from_extension(".drw") ->
+    "application/drafting";
+from_extension(".tsp") ->
+    "application/dsptype";
+from_extension(".dxf") ->
+    "application/dxf";
+from_extension(".xls") ->
+    "application/excel";
+from_extension(".unv") ->
+    "application/i-deas";
+from_extension(".jar") ->
+    "application/java-archive";
+from_extension(".hqx") ->
+    "application/mac-binhex40";
+from_extension(".cpt") ->
+    "application/mac-compactpro";
+from_extension(".pot") ->
+    "application/vnd.ms-powerpoint";
+from_extension(".ppt") ->
+    "application/vnd.ms-powerpoint";
+from_extension(".dms") ->
+    "application/octet-stream";
+from_extension(".lha") ->
+    "application/octet-stream";
+from_extension(".lzh") ->
+    "application/octet-stream";
+from_extension(".oda") ->
+    "application/oda";
+from_extension(".ogg") ->
+    "application/ogg";
+from_extension(".ogm") ->
+    "application/ogg";
+from_extension(".pdf") ->
+    "application/pdf";
+from_extension(".pgp") ->
+    "application/pgp";
+from_extension(".ai") ->
+    "application/postscript";
+from_extension(".eps") ->
+    "application/postscript";
+from_extension(".ps") ->
+    "application/postscript";
+from_extension(".prt") ->
+    "application/pro_eng";
+from_extension(".rtf") ->
+    "application/rtf";
+from_extension(".smi") ->
+    "application/smil";
+from_extension(".smil") ->
+    "application/smil";
+from_extension(".sol") ->
+    "application/solids";
+from_extension(".vda") ->
+    "application/vda";
+from_extension(".xlm") ->
+    "application/vnd.ms-excel";
+from_extension(".cod") ->
+    "application/vnd.rim.cod";
+from_extension(".pgn") ->
+    "application/x-chess-pgn";
+from_extension(".cpio") ->
+    "application/x-cpio";
+from_extension(".csh") ->
+    "application/x-csh";
+from_extension(".deb") ->
+    "application/x-debian-package";
+from_extension(".dcr") ->
+    "application/x-director";
+from_extension(".dir") ->
+    "application/x-director";
+from_extension(".dxr") ->
+    "application/x-director";
+from_extension(".gz") ->
+    "application/x-gzip";
+from_extension(".hdf") ->
+    "application/x-hdf";
+from_extension(".ipx") ->
+    "application/x-ipix";
+from_extension(".ips") ->
+    "application/x-ipscript";
+from_extension(".js") ->
+    "application/x-javascript";
+from_extension(".skd") ->
+    "application/x-koan";
+from_extension(".skm") ->
+    "application/x-koan";
+from_extension(".skp") ->
+    "application/x-koan";
+from_extension(".skt") ->
+    "application/x-koan";
+from_extension(".latex") ->
+    "application/x-latex";
+from_extension(".lsp") ->
+    "application/x-lisp";
+from_extension(".scm") ->
+    "application/x-lotusscreencam";
+from_extension(".mif") ->
+    "application/x-mif";
+from_extension(".com") ->
+    "application/x-msdos-program";
+from_extension(".exe") ->
+    "application/octet-stream";
+from_extension(".cdf") ->
+    "application/x-netcdf";
+from_extension(".nc") ->
+    "application/x-netcdf";
+from_extension(".pl") ->
+    "application/x-perl";
+from_extension(".pm") ->
+    "application/x-perl";
+from_extension(".rar") ->
+    "application/x-rar-compressed";
+from_extension(".sh") ->
+    "application/x-sh";
+from_extension(".shar") ->
+    "application/x-shar";
+from_extension(".swf") ->
+    "application/x-shockwave-flash";
+from_extension(".sit") ->
+    "application/x-stuffit";
+from_extension(".sv4cpio") ->
+    "application/x-sv4cpio";
+from_extension(".sv4crc") ->
+    "application/x-sv4crc";
+from_extension(".tar.gz") ->
+    "application/x-tar-gz";
+from_extension(".tgz") ->
+    "application/x-tar-gz";
+from_extension(".tar") ->
+    "application/x-tar";
+from_extension(".tcl") ->
+    "application/x-tcl";
+from_extension(".texi") ->
+    "application/x-texinfo";
+from_extension(".texinfo") ->
+    "application/x-texinfo";
+from_extension(".man") ->
+    "application/x-troff-man";
+from_extension(".me") ->
+    "application/x-troff-me";
+from_extension(".ms") ->
+    "application/x-troff-ms";
+from_extension(".roff") ->
+    "application/x-troff";
+from_extension(".t") ->
+    "application/x-troff";
+from_extension(".tr") ->
+    "application/x-troff";
+from_extension(".ustar") ->
+    "application/x-ustar";
+from_extension(".src") ->
+    "application/x-wais-source";
+from_extension(".zip") ->
+    "application/zip";
+from_extension(".tsi") ->
+    "audio/TSP-audio";
+from_extension(".au") ->
+    "audio/basic";
+from_extension(".snd") ->
+    "audio/basic";
+from_extension(".kar") ->
+    "audio/midi";
+from_extension(".mid") ->
+    "audio/midi";
+from_extension(".midi") ->
+    "audio/midi";
+from_extension(".mp2") ->
+    "audio/mpeg";
+from_extension(".mp3") ->
+    "audio/mpeg";
+from_extension(".mpga") ->
+    "audio/mpeg";
+from_extension(".aif") ->
+    "audio/x-aiff";
+from_extension(".aifc") ->
+    "audio/x-aiff";
+from_extension(".aiff") ->
+    "audio/x-aiff";
+from_extension(".m3u") ->
+    "audio/x-mpegurl";
+from_extension(".wax") ->
+    "audio/x-ms-wax";
+from_extension(".wma") ->
+    "audio/x-ms-wma";
+from_extension(".rpm") ->
+    "audio/x-pn-realaudio-plugin";
+from_extension(".ram") ->
+    "audio/x-pn-realaudio";
+from_extension(".rm") ->
+    "audio/x-pn-realaudio";
+from_extension(".ra") ->
+    "audio/x-realaudio";
+from_extension(".wav") ->
+    "audio/x-wav";
+from_extension(".pdb") ->
+    "chemical/x-pdb";
+from_extension(".ras") ->
+    "image/cmu-raster";
+from_extension(".gif") ->
+    "image/gif";
+from_extension(".ief") ->
+    "image/ief";
+from_extension(".jpe") ->
+    "image/jpeg";
+from_extension(".jpeg") ->
+    "image/jpeg";
+from_extension(".jpg") ->
+    "image/jpeg";
+from_extension(".jp2") ->
+    "image/jp2";
+from_extension(".png") ->
+    "image/png";
+from_extension(".tif") ->
+    "image/tiff";
+from_extension(".tiff") ->
+    "image/tiff";
+from_extension(".pnm") ->
+    "image/x-portable-anymap";
+from_extension(".pbm") ->
+    "image/x-portable-bitmap";
+from_extension(".pgm") ->
+    "image/x-portable-graymap";
+from_extension(".ppm") ->
+    "image/x-portable-pixmap";
+from_extension(".rgb") ->
+    "image/x-rgb";
+from_extension(".xbm") ->
+    "image/x-xbitmap";
+from_extension(".xwd") ->
+    "image/x-xwindowdump";
+from_extension(".iges") ->
+    "model/iges";
+from_extension(".igs") ->
+    "model/iges";
+from_extension(".mesh") ->
+    "model/mesh";
+from_extension(".") ->
+    "";
+from_extension(".msh") ->
+    "model/mesh";
+from_extension(".silo") ->
+    "model/mesh";
+from_extension(".vrml") ->
+    "model/vrml";
+from_extension(".wrl") ->
+    "model/vrml";
+from_extension(".css") ->
+    "text/css";
+from_extension(".htm") ->
+    "text/html";
+from_extension(".html") ->
+    "text/html";
+from_extension(".asc") ->
+    "text/plain";
+from_extension(".c") ->
+    "text/plain";
+from_extension(".cc") ->
+    "text/plain";
+from_extension(".f90") ->
+    "text/plain";
+from_extension(".f") ->
+    "text/plain";
+from_extension(".hh") ->
+    "text/plain";
+from_extension(".m") ->
+    "text/plain";
+from_extension(".txt") ->
+    "text/plain";
+from_extension(".rtx") ->
+    "text/richtext";
+from_extension(".sgm") ->
+    "text/sgml";
+from_extension(".sgml") ->
+    "text/sgml";
+from_extension(".tsv") ->
+    "text/tab-separated-values";
+from_extension(".jad") ->
+    "text/vnd.sun.j2me.app-descriptor";
+from_extension(".etx") ->
+    "text/x-setext";
+from_extension(".xml") ->
+    "application/xml";
+from_extension(".dl") ->
+    "video/dl";
+from_extension(".fli") ->
+    "video/fli";
+from_extension(".flv") ->
+    "video/x-flv";
+from_extension(".gl") ->
+    "video/gl";
+from_extension(".mp4") ->
+    "video/mp4";
+from_extension(".mpe") ->
+    "video/mpeg";
+from_extension(".mpeg") ->
+    "video/mpeg";
+from_extension(".mpg") ->
+    "video/mpeg";
+from_extension(".mov") ->
+    "video/quicktime";
+from_extension(".qt") ->
+    "video/quicktime";
+from_extension(".viv") ->
+    "video/vnd.vivo";
+from_extension(".vivo") ->
+    "video/vnd.vivo";
+from_extension(".asf") ->
+    "video/x-ms-asf";
+from_extension(".asx") ->
+    "video/x-ms-asx";
+from_extension(".wmv") ->
+    "video/x-ms-wmv";
+from_extension(".wmx") ->
+    "video/x-ms-wmx";
+from_extension(".wvx") ->
+    "video/x-ms-wvx";
+from_extension(".avi") ->
+    "video/x-msvideo";
+from_extension(".movie") ->
+    "video/x-sgi-movie";
+from_extension(".mime") ->
+    "www/mime";
+from_extension(".ice") ->
+    "x-conference/x-cooltalk";
+from_extension(".vrm") ->
+    "x-world/x-vrml";
+from_extension(".spx") ->
+    "audio/ogg";
+from_extension(".xhtml") ->
+    "application/xhtml+xml";
+from_extension(".bz2") ->
+    "application/x-bzip2";
+from_extension(".doc") ->
+    "application/msword";
+from_extension(".z") ->
+    "application/x-compress";
+from_extension(".ico") ->
+    "image/x-icon";
+from_extension(".bmp") ->
+    "image/bmp";
+from_extension(".m4a") ->
+    "audio/mpeg";
+from_extension(".csv") ->
+    "text/csv";
+from_extension(".eot") ->
+    "application/vnd.ms-fontobject";
+from_extension(".m4v") ->
+    "video/mp4";
+from_extension(".svg") ->
+    "image/svg+xml";
+from_extension(".svgz") ->
+    "image/svg+xml";
+from_extension(".ttc") ->
+    "application/x-font-ttf";
+from_extension(".ttf") ->
+    "application/x-font-ttf";
+from_extension(".vcf") ->
+    "text/x-vcard";
+from_extension(".webm") ->
+    "video/web";
+from_extension(".webp") ->
+    "image/web";
+from_extension(".woff") ->
+    "application/x-font-woff";
+from_extension(".otf") ->
+    "font/opentype";
+from_extension(_) ->
+    undefined.
+
+%%
+%% Tests
+%%
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+
+exhaustive_from_extension_test() ->
+    T = mochiweb_cover:clause_lookup_table(?MODULE, from_extension),
+    [?assertEqual(V, from_extension(K)) || {K, V} <- T].
+
+from_extension_test() ->
+    ?assertEqual("text/html",
+                 from_extension(".html")),
+    ?assertEqual(undefined,
+                 from_extension("")),
+    ?assertEqual(undefined,
+                 from_extension(".wtf")),
+    ok.
+
+-endif.