You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by rn...@apache.org on 2010/07/26 19:21:32 UTC

svn commit: r979368 [2/3] - in /couchdb/trunk: etc/couchdb/ share/www/script/ share/www/script/jspec/ share/www/script/test/ src/couchdb/ src/mochiweb/

Modified: couchdb/trunk/src/mochiweb/mochiweb.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb.erl?rev=979368&r1=979367&r2=979368&view=diff
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb.erl (original)
+++ couchdb/trunk/src/mochiweb/mochiweb.erl Mon Jul 26 17:21:30 2010
@@ -9,7 +9,6 @@
 -export([start/0, stop/0]).
 -export([new_request/1, new_response/1]).
 -export([all_loaded/0, all_loaded/1, reload/0]).
--export([test/0]).
 
 %% @spec start() -> ok
 %% @doc Start the MochiWeb server.
@@ -24,21 +23,6 @@ stop() ->
     application:stop(crypto),
     Res.
 
-%% @spec test() -> ok
-%% @doc Run all of the tests for MochiWeb.
-test() ->
-    mochiweb_util:test(),
-    mochiweb_headers:test(),
-    mochiweb_cookies:test(),
-    mochihex:test(),
-    mochinum:test(),
-    mochijson:test(),
-    mochiweb_charref:test(),
-    mochiweb_html:test(),
-    mochifmt:test(),
-    test_request(),
-    ok.
-
 reload() ->
     [c:l(Module) || Module <- all_loaded()].
 
@@ -96,11 +80,6 @@ new_response({Request, Code, Headers}) -
 
 %% Internal API
 
-test_request() ->
-    R = mochiweb_request:new(z, z, "/foo/bar/baz%20wibble+quux?qs=2", z, []),
-    "/foo/bar/baz wibble quux" = R:get(path),
-    ok.
-
 ensure_started(App) ->
     case application:start(App) of
         ok ->
@@ -108,3 +87,203 @@ ensure_started(App) ->
         {error, {already_started, App}} ->
             ok
     end.
+
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+-record(treq, {path, body= <<>>, xreply= <<>>}).
+
+ssl_cert_opts() ->
+    EbinDir = filename:dirname(code:which(?MODULE)),
+    CertDir = filename:join([EbinDir, "..", "support", "test-materials"]),
+    CertFile = filename:join(CertDir, "test_ssl_cert.pem"),
+    KeyFile = filename:join(CertDir, "test_ssl_key.pem"),
+    [{certfile, CertFile}, {keyfile, KeyFile}].
+
+with_server(Transport, ServerFun, ClientFun) ->
+    ServerOpts0 = [{ip, "127.0.0.1"}, {port, 0}, {loop, ServerFun}],
+    ServerOpts = case Transport of
+        plain ->
+            ServerOpts0;
+        ssl ->
+            ServerOpts0 ++ [{ssl, true}, {ssl_opts, ssl_cert_opts()}]
+    end,
+    {ok, Server} = mochiweb_http:start(ServerOpts),
+    Port = mochiweb_socket_server:get(Server, port),
+    Res = (catch ClientFun(Transport, Port)),
+    mochiweb_http:stop(Server),
+    Res.
+
+request_test() ->
+    R = mochiweb_request:new(z, z, "/foo/bar/baz%20wibble+quux?qs=2", z, []),
+    "/foo/bar/baz wibble quux" = R:get(path),
+    ok.
+
+single_http_GET_test() ->
+    do_GET(plain, 1).
+
+single_https_GET_test() ->
+    do_GET(ssl, 1).
+
+multiple_http_GET_test() ->
+    do_GET(plain, 3).
+
+multiple_https_GET_test() ->
+    do_GET(ssl, 3).
+
+hundred_http_GET_test() ->
+    do_GET(plain, 100).
+
+hundred_https_GET_test() ->
+    do_GET(ssl, 100).
+
+single_128_http_POST_test() ->
+    do_POST(plain, 128, 1).
+
+single_128_https_POST_test() ->
+    do_POST(ssl, 128, 1).
+
+single_2k_http_POST_test() ->
+    do_POST(plain, 2048, 1).
+
+single_2k_https_POST_test() ->
+    do_POST(ssl, 2048, 1).
+
+single_100k_http_POST_test() ->
+    do_POST(plain, 102400, 1).
+
+single_100k_https_POST_test() ->
+    do_POST(ssl, 102400, 1).
+
+multiple_100k_http_POST_test() ->
+    do_POST(plain, 102400, 3).
+
+multiple_100K_https_POST_test() ->
+    do_POST(ssl, 102400, 3).
+
+hundred_128_http_POST_test() ->
+    do_POST(plain, 128, 100).
+
+hundred_128_https_POST_test() ->
+    do_POST(ssl, 128, 100).
+
+do_GET(Transport, Times) ->
+    PathPrefix = "/whatever/",
+    ReplyPrefix = "You requested: ",
+    ServerFun = fun (Req) ->
+                        Reply = ReplyPrefix ++ Req:get(path),
+                        Req:ok({"text/plain", Reply})
+                end,
+    TestReqs = [begin
+                    Path = PathPrefix ++ integer_to_list(N),
+                    ExpectedReply = list_to_binary(ReplyPrefix ++ Path),
+                    #treq{path=Path, xreply=ExpectedReply}
+                end || N <- lists:seq(1, Times)],
+    ClientFun = new_client_fun('GET', TestReqs),
+    ok = with_server(Transport, ServerFun, ClientFun),
+    ok.
+
+do_POST(Transport, Size, Times) ->
+    ServerFun = fun (Req) ->
+                        Body = Req:recv_body(),
+                        Headers = [{"Content-Type", "application/octet-stream"}],
+                        Req:respond({201, Headers, Body})
+                end,
+    TestReqs = [begin
+                    Path = "/stuff/" ++ integer_to_list(N),
+                    Body = crypto:rand_bytes(Size),
+                    #treq{path=Path, body=Body, xreply=Body}
+                end || N <- lists:seq(1, Times)],
+    ClientFun = new_client_fun('POST', TestReqs),
+    ok = with_server(Transport, ServerFun, ClientFun),
+    ok.
+
+new_client_fun(Method, TestReqs) ->
+    fun (Transport, Port) ->
+            client_request(Transport, Port, Method, TestReqs)
+    end.
+
+client_request(Transport, Port, Method, TestReqs) ->
+    Opts = [binary, {active, false}, {packet, http}],
+    SockFun = case Transport of
+        plain ->
+            {ok, Socket} = gen_tcp:connect("127.0.0.1", Port, Opts),
+            fun (recv) ->
+                    gen_tcp:recv(Socket, 0);
+                ({recv, Length}) ->
+                    gen_tcp:recv(Socket, Length);
+                ({send, Data}) ->
+                    gen_tcp:send(Socket, Data);
+                ({setopts, L}) ->
+                    inet:setopts(Socket, L)
+            end;
+        ssl ->
+            {ok, Socket} = ssl:connect("127.0.0.1", Port, [{ssl_imp, new} | Opts]),
+            fun (recv) ->
+                    ssl:recv(Socket, 0);
+                ({recv, Length}) ->
+                    ssl:recv(Socket, Length);
+                ({send, Data}) ->
+                    ssl:send(Socket, Data);
+                ({setopts, L}) ->
+                    ssl:setopts(Socket, L)
+            end
+    end,
+    client_request(SockFun, Method, TestReqs).
+
+client_request(SockFun, _Method, []) ->
+    {the_end, {error, closed}} = {the_end, SockFun(recv)},
+    ok;
+client_request(SockFun, Method,
+               [#treq{path=Path, body=Body, xreply=ExReply} | Rest]) ->
+    Request = [atom_to_list(Method), " ", Path, " HTTP/1.1\r\n",
+               client_headers(Body, Rest =:= []),
+               "\r\n",
+               Body],
+    ok = SockFun({send, Request}),
+    case Method of
+        'GET' ->
+            {ok, {http_response, {1,1}, 200, "OK"}} = SockFun(recv);
+        'POST' ->
+            {ok, {http_response, {1,1}, 201, "Created"}} = SockFun(recv)
+    end,
+    ok = SockFun({setopts, [{packet, httph}]}),
+    {ok, {http_header, _, 'Server', _, "MochiWeb" ++ _}} = SockFun(recv),
+    {ok, {http_header, _, 'Date', _, _}} = SockFun(recv),
+    {ok, {http_header, _, 'Content-Type', _, _}} = SockFun(recv),
+    {ok, {http_header, _, 'Content-Length', _, ConLenStr}} = SockFun(recv),
+    ContentLength = list_to_integer(ConLenStr),
+    {ok, http_eoh} = SockFun(recv),
+    ok = SockFun({setopts, [{packet, raw}]}),
+    {payload, ExReply} = {payload, drain_reply(SockFun, ContentLength, <<>>)},
+    ok = SockFun({setopts, [{packet, http}]}),
+    client_request(SockFun, Method, Rest).
+
+client_headers(Body, IsLastRequest) ->
+    ["Host: localhost\r\n",
+     case Body of
+        <<>> ->
+            "";
+        _ ->
+            ["Content-Type: application/octet-stream\r\n",
+             "Content-Length: ", integer_to_list(byte_size(Body)), "\r\n"]
+     end,
+     case IsLastRequest of
+         true ->
+             "Connection: close\r\n";
+         false ->
+             ""
+     end].
+
+drain_reply(_SockFun, 0, Acc) ->
+    Acc;
+drain_reply(SockFun, Length, Acc) ->
+    Sz = erlang:min(Length, 1024),
+    {ok, B} = SockFun({recv, Sz}),
+    drain_reply(SockFun, Length - Sz, <<Acc/bytes, B/bytes>>).
+
+-endif.

Added: couchdb/trunk/src/mochiweb/mochiweb_acceptor.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb_acceptor.erl?rev=979368&view=auto
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb_acceptor.erl (added)
+++ couchdb/trunk/src/mochiweb/mochiweb_acceptor.erl Mon Jul 26 17:21:30 2010
@@ -0,0 +1,48 @@
+%% @author Bob Ippolito <bo...@mochimedia.com>
+%% @copyright 2010 Mochi Media, Inc.
+
+%% @doc MochiWeb acceptor.
+
+-module(mochiweb_acceptor).
+-author('bob@mochimedia.com').
+
+-include("internal.hrl").
+
+-export([start_link/3, init/3]).
+
+start_link(Server, Listen, Loop) ->
+    proc_lib:spawn_link(?MODULE, init, [Server, Listen, Loop]).
+
+init(Server, Listen, Loop) ->
+    T1 = now(),
+    case catch mochiweb_socket:accept(Listen) of
+        {ok, Socket} ->
+            gen_server:cast(Server, {accepted, self(), timer:now_diff(now(), T1)}),
+            call_loop(Loop, Socket);
+        {error, closed} ->
+            exit(normal);
+        {error, timeout} ->
+            exit(normal);
+        {error, esslaccept} ->
+            exit(normal);
+        Other ->
+            error_logger:error_report(
+              [{application, mochiweb},
+               "Accept failed error",
+               lists:flatten(io_lib:format("~p", [Other]))]),
+            exit({error, accept_failed})
+    end.
+
+call_loop({M, F}, Socket) ->
+    M:F(Socket);
+call_loop({M, F, A}, Socket) ->
+    erlang:apply(M, F, [Socket | A]);
+call_loop(Loop, Socket) ->
+    Loop(Socket).
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+-endif.

Modified: couchdb/trunk/src/mochiweb/mochiweb_app.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb_app.erl?rev=979368&r1=979367&r2=979368&view=diff
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb_app.erl (original)
+++ couchdb/trunk/src/mochiweb/mochiweb_app.erl Mon Jul 26 17:21:30 2010
@@ -18,3 +18,10 @@ start(_Type, _StartArgs) ->
 %% @doc application stop callback for mochiweb.
 stop(_State) ->
     ok.
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+-endif.

Modified: couchdb/trunk/src/mochiweb/mochiweb_charref.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb_charref.erl?rev=979368&r1=979367&r2=979368&view=diff
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb_charref.erl (original)
+++ couchdb/trunk/src/mochiweb/mochiweb_charref.erl Mon Jul 26 17:21:30 2010
@@ -3,7 +3,7 @@
 
 %% @doc Converts HTML 4 charrefs and entities to codepoints.
 -module(mochiweb_charref).
--export([charref/1, test/0]).
+-export([charref/1]).
 
 %% External API.
 
@@ -27,16 +27,6 @@ charref([$# | L]) ->
 charref(L) ->
     entity(L).
 
-%% @spec test() -> ok
-%% @doc Run tests for mochiweb_charref.
-test() ->
-    1234 = charref("#1234"),
-    255 = charref("#xfF"),
-    255 = charref("#XFf"),
-    38 = charref("amp"),
-    undefined = charref("not_an_entity"),
-    ok.
-
 %% Internal API.
 
 entity("nbsp") -> 160;
@@ -293,3 +283,26 @@ entity("rsaquo") -> 8250;
 entity("euro") -> 8364;
 entity(_) -> undefined.
 
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+exhaustive_entity_test() ->
+    T = mochiweb_cover:clause_lookup_table(?MODULE, entity),
+    [?assertEqual(V, entity(K)) || {K, V} <- T].
+
+charref_test() ->
+    1234 = charref("#1234"),
+    255 = charref("#xfF"),
+    255 = charref(<<"#XFf">>),
+    38 = charref("amp"),
+    38 = charref(<<"amp">>),
+    undefined = charref("not_an_entity"),
+    undefined = charref("#not_an_entity"),
+    undefined = charref("#xnot_an_entity"),
+    ok.
+
+-endif.

Modified: couchdb/trunk/src/mochiweb/mochiweb_cookies.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb_cookies.erl?rev=979368&r1=979367&r2=979368&view=diff
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb_cookies.erl (original)
+++ couchdb/trunk/src/mochiweb/mochiweb_cookies.erl Mon Jul 26 17:21:30 2010
@@ -4,7 +4,7 @@
 %% @doc HTTP Cookie parsing and generating (RFC 2109, RFC 2965).
 
 -module(mochiweb_cookies).
--export([parse_cookie/1, cookie/3, cookie/2, test/0]).
+-export([parse_cookie/1, cookie/3, cookie/2]).
 
 -define(QUOTE, $\").
 
@@ -130,13 +130,6 @@ parse_cookie("") ->
 parse_cookie(Cookie) ->
     parse_cookie(Cookie, []).
 
-%% @spec test() -> ok
-%% @doc Run tests for mochiweb_cookies.
-test() ->
-    parse_cookie_test(),
-    cookie_test(),
-    ok.
-
 %% Internal API
 
 parse_cookie([], Acc) ->
@@ -198,24 +191,6 @@ skip_past_separator([$, | Rest]) ->
 skip_past_separator([_ | Rest]) ->
     skip_past_separator(Rest).
 
-parse_cookie_test() ->
-    %% RFC example
-    C1 = "$Version=\"1\"; Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\";
-    Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\";
-    Shipping=\"FedEx\"; $Path=\"/acme\"",
-    [
-     {"Customer","WILE_E_COYOTE"},
-     {"Part_Number","Rocket_Launcher_0001"},
-     {"Shipping","FedEx"}
-    ] = parse_cookie(C1),
-    %% Potential edge cases
-    [{"foo", "x"}] = parse_cookie("foo=\"\\x\""),
-    [] = parse_cookie("="),
-    [{"foo", ""}, {"bar", ""}] = parse_cookie("  foo ; bar  "),
-    [{"foo", ""}, {"bar", ""}] = parse_cookie("foo=;bar="),
-    [{"foo", "\";"}, {"bar", ""}] = parse_cookie("foo = \"\\\";\";bar "),
-    [{"foo", "\";bar"}] = parse_cookie("foo=\"\\\";bar").
-
 any_to_list(V) when is_list(V) ->
     V;
 any_to_list(V) when is_atom(V) ->
@@ -225,6 +200,81 @@ any_to_list(V) when is_binary(V) ->
 any_to_list(V) when is_integer(V) ->
     integer_to_list(V).
 
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+quote_test() ->
+    %% ?assertError eunit macro is not compatible with coverage module
+    try quote(":wq")
+    catch error:{cookie_quoting_required, ":wq"} -> ok
+    end,
+    ?assertEqual(
+       "foo",
+       quote(foo)),
+    ok.
+
+parse_cookie_test() ->
+    %% RFC example
+    C1 = "$Version=\"1\"; Customer=\"WILE_E_COYOTE\"; $Path=\"/acme\";
+    Part_Number=\"Rocket_Launcher_0001\"; $Path=\"/acme\";
+    Shipping=\"FedEx\"; $Path=\"/acme\"",
+    ?assertEqual(
+       [{"Customer","WILE_E_COYOTE"},
+        {"Part_Number","Rocket_Launcher_0001"},
+        {"Shipping","FedEx"}],
+       parse_cookie(C1)),
+    %% Potential edge cases
+    ?assertEqual(
+       [{"foo", "x"}],
+       parse_cookie("foo=\"\\x\"")),
+    ?assertEqual(
+       [],
+       parse_cookie("=")),
+    ?assertEqual(
+       [{"foo", ""}, {"bar", ""}],
+       parse_cookie("  foo ; bar  ")),
+    ?assertEqual(
+       [{"foo", ""}, {"bar", ""}],
+       parse_cookie("foo=;bar=")),
+    ?assertEqual(
+       [{"foo", "\";"}, {"bar", ""}],
+       parse_cookie("foo = \"\\\";\";bar ")),
+    ?assertEqual(
+       [{"foo", "\";bar"}],
+       parse_cookie("foo=\"\\\";bar")),
+    ?assertEqual(
+       [],
+       parse_cookie([])),
+    ?assertEqual(
+       [{"foo", "bar"}, {"baz", "wibble"}],
+       parse_cookie("foo=bar , baz=wibble ")),
+    ok.
+
+domain_test() ->
+    ?assertEqual(
+       {"Set-Cookie",
+        "Customer=WILE_E_COYOTE; "
+        "Version=1; "
+        "Domain=acme.com; "
+        "HttpOnly"},
+       cookie("Customer", "WILE_E_COYOTE",
+              [{http_only, true}, {domain, "acme.com"}])),
+    ok.
+
+local_time_test() ->
+    {"Set-Cookie", S} = cookie("Customer", "WILE_E_COYOTE",
+                               [{max_age, 111}, {secure, true}]),
+    ?assertMatch(
+       ["Customer=WILE_E_COYOTE",
+        " Version=1",
+        " Expires=" ++ _,
+        " Max-Age=111",
+        " Secure"],
+       string:tokens(S, ";")),
+    ok.
 
 cookie_test() ->
     C1 = {"Set-Cookie",
@@ -238,8 +288,8 @@ cookie_test() ->
     C1 = cookie(<<"Customer">>, <<"WILE_E_COYOTE">>, [{path, <<"/acme">>}]),
 
     {"Set-Cookie","=NoKey; Version=1"} = cookie("", "NoKey", []),
-
-        LocalTime = calendar:universal_time_to_local_time({{2007, 5, 15}, {13, 45, 33}}),
+    {"Set-Cookie","=NoKey; Version=1"} = cookie("", "NoKey"),
+    LocalTime = calendar:universal_time_to_local_time({{2007, 5, 15}, {13, 45, 33}}),
     C2 = {"Set-Cookie",
           "Customer=WILE_E_COYOTE; "
           "Version=1; "
@@ -255,3 +305,5 @@ cookie_test() ->
     C3 = cookie("Customer", "WILE_E_COYOTE",
                 [{max_age, 86417}, {local_time, LocalTime}]),
     ok.
+
+-endif.

Added: couchdb/trunk/src/mochiweb/mochiweb_cover.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb_cover.erl?rev=979368&view=auto
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb_cover.erl (added)
+++ couchdb/trunk/src/mochiweb/mochiweb_cover.erl Mon Jul 26 17:21:30 2010
@@ -0,0 +1,75 @@
+%% @author Bob Ippolito <bo...@mochimedia.com>
+%% @copyright 2010 Mochi Media, Inc.
+
+%% @doc Workarounds for various cover deficiencies.
+-module(mochiweb_cover).
+-export([get_beam/1, get_abstract_code/1,
+         get_clauses/2, clause_lookup_table/1]).
+-export([clause_lookup_table/2]).
+
+%% Internal
+
+get_beam(Module) ->
+    {Module, Beam, _Path} = code:get_object_code(Module),
+    Beam.
+
+get_abstract_code(Beam) ->
+    {ok, {_Module,
+          [{abstract_code,
+            {raw_abstract_v1, L}}]}} = beam_lib:chunks(Beam, [abstract_code]),
+    L.
+
+get_clauses(Function, Code) ->
+    [L] = [Clauses || {function, _, FName, _, Clauses}
+                          <- Code, FName =:= Function],
+    L.
+
+clause_lookup_table(Module, Function) ->
+    clause_lookup_table(
+      get_clauses(Function,
+                  get_abstract_code(get_beam(Module)))).
+
+clause_lookup_table(Clauses) ->
+    lists:foldr(fun clause_fold/2, [], Clauses).
+
+clause_fold({clause, _,
+             [InTerm],
+             _Guards=[],
+             [OutTerm]},
+            Acc) ->
+    try [{erl_parse:normalise(InTerm), erl_parse:normalise(OutTerm)} | Acc]
+    catch error:_ -> Acc
+    end;
+clause_fold(_, Acc) ->
+    Acc.
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+foo_table(a) -> b;
+foo_table("a") -> <<"b">>;
+foo_table(123) -> {4, 3, 2};
+foo_table([list]) -> [];
+foo_table([list1, list2]) -> [list1, list2, list3];
+foo_table(ignored) -> some, code, ignored;
+foo_table(Var) -> Var.
+
+foo_table_test() ->
+    T = clause_lookup_table(?MODULE, foo_table),
+    [?assertEqual(V, foo_table(K)) || {K, V} <- T].
+
+clause_lookup_table_test() ->
+    ?assertEqual(b, foo_table(a)),
+    ?assertEqual(ignored, foo_table(ignored)),
+    ?assertEqual('Var', foo_table('Var')),
+    ?assertEqual(
+       [{a, b},
+        {"a", <<"b">>},
+        {123, {4, 3, 2}},
+        {[list], []},
+        {[list1, list2], [list1, list2, list3]}],
+       clause_lookup_table(?MODULE, foo_table)).
+
+-endif.

Modified: couchdb/trunk/src/mochiweb/mochiweb_echo.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb_echo.erl?rev=979368&r1=979367&r2=979368&view=diff
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb_echo.erl (original)
+++ couchdb/trunk/src/mochiweb/mochiweb_echo.erl Mon Jul 26 17:21:30 2010
@@ -18,9 +18,9 @@ start() ->
                                   {loop, {?MODULE, loop}}]).
 
 loop(Socket) ->
-    case gen_tcp:recv(Socket, 0, 30000) of
+    case mochiweb_socket:recv(Socket, 0, 30000) of
         {ok, Data} ->
-            case gen_tcp:send(Socket, Data) of
+            case mochiweb_socket:send(Socket, Data) of
                 ok ->
                     loop(Socket);
                 _ ->
@@ -29,3 +29,10 @@ loop(Socket) ->
         _Other ->
             exit(normal)
     end.
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+-endif.

Modified: couchdb/trunk/src/mochiweb/mochiweb_headers.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb_headers.erl?rev=979368&r1=979367&r2=979368&view=diff
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb_headers.erl (original)
+++ couchdb/trunk/src/mochiweb/mochiweb_headers.erl Mon Jul 26 17:21:30 2010
@@ -10,66 +10,11 @@
 -export([default/3, enter_from_list/2, default_from_list/2]).
 -export([to_list/1, make/1]).
 -export([from_binary/1]).
--export([test/0]).
 
 %% @type headers().
 %% @type key() = atom() | binary() | string().
 %% @type value() = atom() | binary() | string() | integer().
 
-%% @spec test() -> ok
-%% @doc Run tests for this module.
-test() ->
-    H = ?MODULE:make([{hdr, foo}, {"Hdr", "bar"}, {'Hdr', 2}]),
-    [{hdr, "foo, bar, 2"}] = ?MODULE:to_list(H),
-    H1 = ?MODULE:insert(taco, grande, H),
-    [{hdr, "foo, bar, 2"}, {taco, "grande"}] = ?MODULE:to_list(H1),
-    H2 = ?MODULE:make([{"Set-Cookie", "foo"}]),
-    [{"Set-Cookie", "foo"}] = ?MODULE:to_list(H2),
-    H3 = ?MODULE:insert("Set-Cookie", "bar", H2),
-    [{"Set-Cookie", "foo"}, {"Set-Cookie", "bar"}] = ?MODULE:to_list(H3),
-    "foo, bar" = ?MODULE:get_value("set-cookie", H3),
-    {value, {"Set-Cookie", "foo, bar"}} = ?MODULE:lookup("set-cookie", H3),
-    undefined = ?MODULE:get_value("shibby", H3),
-    none = ?MODULE:lookup("shibby", H3),
-    H4 = ?MODULE:insert("content-type",
-                        "application/x-www-form-urlencoded; charset=utf8",
-                        H3),
-    "application/x-www-form-urlencoded" = ?MODULE:get_primary_value(
-                                             "content-type", H4),
-    H4 = ?MODULE:delete_any("nonexistent-header", H4),
-    H3 = ?MODULE:delete_any("content-type", H4),
-    HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>,
-    H_HB = ?MODULE:from_binary(HB),
-    H_HB = ?MODULE:from_binary(binary_to_list(HB)),
-    "47" = ?MODULE:get_value("Content-Length", H_HB),
-    "text/plain" = ?MODULE:get_value("Content-Type", H_HB),
-    L_H_HB = ?MODULE:to_list(H_HB),
-    2 = length(L_H_HB),
-    true = lists:member({'Content-Length', "47"}, L_H_HB),
-    true = lists:member({'Content-Type', "text/plain"}, L_H_HB),
-    HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ],
-    HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ],
-    HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ],
-    H_HL = ?MODULE:from_binary(HL),
-    H_HL = ?MODULE:from_binary(HL2),
-    H_HL = ?MODULE:from_binary(HL3),
-    "47" = ?MODULE:get_value("Content-Length", H_HL),
-    "text/plain" = ?MODULE:get_value("Content-Type", H_HL),
-    L_H_HL = ?MODULE:to_list(H_HL),
-    2 = length(L_H_HL),
-    true = lists:member({'Content-Length', "47"}, L_H_HL),
-    true = lists:member({'Content-Type', "text/plain"}, L_H_HL),
-    [] = ?MODULE:to_list(?MODULE:from_binary(<<>>)),
-    [] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)),
-    [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)),
-    [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)),
-    [] = ?MODULE:to_list(?MODULE:from_binary("")),
-    [] = ?MODULE:to_list(?MODULE:from_binary([<<>>])),
-    [] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])),
-    [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])),
-    [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])),
-    ok.
-
 %% @spec empty() -> headers()
 %% @doc Create an empty headers structure.
 empty() ->
@@ -83,35 +28,34 @@ make(L) when is_list(L) ->
 make(T) when is_tuple(T) ->
     T.
 
-%% @spec from_binary(RawHttpHeader()) -> headers() 
-%% @type RawHttpHeader() -> string() | binary() | [ string() | binary() ]
-%%
+%% @spec from_binary(iolist()) -> headers()
 %% @doc Transforms a raw HTTP header into a mochiweb headers structure.
 %%
 %%      The given raw HTTP header can be one of the following:
 %%
-%%      1) A string or a binary representing a full HTTP header ending with 
+%%      1) A string or a binary representing a full HTTP header ending with
 %%         double CRLF.
 %%         Examples:
+%%         ```
 %%         "Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n"
-%%         <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>
+%%         <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>'''
 %%
-%%      2) A list of binaries or strings where each element represents a raw 
+%%      2) A list of binaries or strings where each element represents a raw
 %%         HTTP header line ending with a single CRLF.
 %%         Examples:
-%%         [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ]
-%%         [ "Content-Length: 47\r\n", "Content-Type: text/plain\r\n" ]
-%%         [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ]
+%%         ```
+%%         [<<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">>]
+%%         ["Content-Length: 47\r\n", "Content-Type: text/plain\r\n"]
+%%         ["Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">>]'''
 %%
 from_binary(RawHttpHeader) when is_binary(RawHttpHeader) ->
     from_binary(RawHttpHeader, []);
-
 from_binary(RawHttpHeaderList) ->
     from_binary(list_to_binary([RawHttpHeaderList, "\r\n"])).
 
 from_binary(RawHttpHeader, Acc) ->
     case erlang:decode_packet(httph, RawHttpHeader, []) of
-        { ok, {http_header, _, H, _, V}, Rest } ->
+        {ok, {http_header, _, H, _, V}, Rest} ->
             from_binary(Rest, [{H, V} | Acc]);
         _ ->
             make(Acc)
@@ -248,4 +192,108 @@ any_to_list(V) when is_binary(V) ->
 any_to_list(V) when is_integer(V) ->
     integer_to_list(V).
 
+%%
+%% Tests.
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+make_test() ->
+    Identity = make([{hdr, foo}]),
+    ?assertEqual(
+       Identity,
+       make(Identity)).
+
+enter_from_list_test() ->
+    H = make([{hdr, foo}]),
+    ?assertEqual(
+       [{baz, "wibble"}, {hdr, "foo"}],
+       to_list(enter_from_list([{baz, wibble}], H))),
+    ?assertEqual(
+       [{hdr, "bar"}],
+       to_list(enter_from_list([{hdr, bar}], H))),
+    ok.
+
+default_from_list_test() ->
+    H = make([{hdr, foo}]),
+    ?assertEqual(
+       [{baz, "wibble"}, {hdr, "foo"}],
+       to_list(default_from_list([{baz, wibble}], H))),
+    ?assertEqual(
+       [{hdr, "foo"}],
+       to_list(default_from_list([{hdr, bar}], H))),
+    ok.
+
+get_primary_value_test() ->
+    H = make([{hdr, foo}, {baz, <<"wibble;taco">>}]),
+    ?assertEqual(
+       "foo",
+       get_primary_value(hdr, H)),
+    ?assertEqual(
+       undefined,
+       get_primary_value(bar, H)),
+    ?assertEqual(
+       "wibble",
+       get_primary_value(<<"baz">>, H)),
+    ok.
+
+set_cookie_test() ->
+    H = make([{"set-cookie", foo}, {"set-cookie", bar}, {"set-cookie", baz}]),
+    ?assertEqual(
+       [{"set-cookie", "foo"}, {"set-cookie", "bar"}, {"set-cookie", "baz"}],
+       to_list(H)),
+    ok.
+
+headers_test() ->
+    H = ?MODULE:make([{hdr, foo}, {"Hdr", "bar"}, {'Hdr', 2}]),
+    [{hdr, "foo, bar, 2"}] = ?MODULE:to_list(H),
+    H1 = ?MODULE:insert(taco, grande, H),
+    [{hdr, "foo, bar, 2"}, {taco, "grande"}] = ?MODULE:to_list(H1),
+    H2 = ?MODULE:make([{"Set-Cookie", "foo"}]),
+    [{"Set-Cookie", "foo"}] = ?MODULE:to_list(H2),
+    H3 = ?MODULE:insert("Set-Cookie", "bar", H2),
+    [{"Set-Cookie", "foo"}, {"Set-Cookie", "bar"}] = ?MODULE:to_list(H3),
+    "foo, bar" = ?MODULE:get_value("set-cookie", H3),
+    {value, {"Set-Cookie", "foo, bar"}} = ?MODULE:lookup("set-cookie", H3),
+    undefined = ?MODULE:get_value("shibby", H3),
+    none = ?MODULE:lookup("shibby", H3),
+    H4 = ?MODULE:insert("content-type",
+                        "application/x-www-form-urlencoded; charset=utf8",
+                        H3),
+    "application/x-www-form-urlencoded" = ?MODULE:get_primary_value(
+                                             "content-type", H4),
+    H4 = ?MODULE:delete_any("nonexistent-header", H4),
+    H3 = ?MODULE:delete_any("content-type", H4),
+    HB = <<"Content-Length: 47\r\nContent-Type: text/plain\r\n\r\n">>,
+    H_HB = ?MODULE:from_binary(HB),
+    H_HB = ?MODULE:from_binary(binary_to_list(HB)),
+    "47" = ?MODULE:get_value("Content-Length", H_HB),
+    "text/plain" = ?MODULE:get_value("Content-Type", H_HB),
+    L_H_HB = ?MODULE:to_list(H_HB),
+    2 = length(L_H_HB),
+    true = lists:member({'Content-Length', "47"}, L_H_HB),
+    true = lists:member({'Content-Type', "text/plain"}, L_H_HB),
+    HL = [ <<"Content-Length: 47\r\n">>, <<"Content-Type: text/plain\r\n">> ],
+    HL2 = [ "Content-Length: 47\r\n", <<"Content-Type: text/plain\r\n">> ],
+    HL3 = [ <<"Content-Length: 47\r\n">>, "Content-Type: text/plain\r\n" ],
+    H_HL = ?MODULE:from_binary(HL),
+    H_HL = ?MODULE:from_binary(HL2),
+    H_HL = ?MODULE:from_binary(HL3),
+    "47" = ?MODULE:get_value("Content-Length", H_HL),
+    "text/plain" = ?MODULE:get_value("Content-Type", H_HL),
+    L_H_HL = ?MODULE:to_list(H_HL),
+    2 = length(L_H_HL),
+    true = lists:member({'Content-Length', "47"}, L_H_HL),
+    true = lists:member({'Content-Type', "text/plain"}, L_H_HL),
+    [] = ?MODULE:to_list(?MODULE:from_binary(<<>>)),
+    [] = ?MODULE:to_list(?MODULE:from_binary(<<"">>)),
+    [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n">>)),
+    [] = ?MODULE:to_list(?MODULE:from_binary(<<"\r\n\r\n">>)),
+    [] = ?MODULE:to_list(?MODULE:from_binary("")),
+    [] = ?MODULE:to_list(?MODULE:from_binary([<<>>])),
+    [] = ?MODULE:to_list(?MODULE:from_binary([<<"">>])),
+    [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n">>])),
+    [] = ?MODULE:to_list(?MODULE:from_binary([<<"\r\n\r\n">>])),
+    ok.
 
+-endif.

Modified: couchdb/trunk/src/mochiweb/mochiweb_html.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb_html.erl?rev=979368&r1=979367&r2=979368&view=diff
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb_html.erl (original)
+++ couchdb/trunk/src/mochiweb/mochiweb_html.erl Mon Jul 26 17:21:30 2010
@@ -4,9 +4,9 @@
 %% @doc Loosely tokenizes and generates parse trees for HTML 4.
 -module(mochiweb_html).
 -export([tokens/1, parse/1, parse_tokens/1, to_tokens/1, escape/1,
-         escape_attr/1, to_html/1, test/0]).
+         escape_attr/1, to_html/1]).
 
-% This is a macro to placate syntax highlighters..
+%% This is a macro to placate syntax highlighters..
 -define(QUOTE, $\").
 -define(SQUOTE, $\').
 -define(ADV_COL(S, N),
@@ -35,6 +35,8 @@
 -define(IS_LITERAL_SAFE(C),
         ((C >= $A andalso C =< $Z) orelse (C >= $a andalso C =< $z)
          orelse (C >= $0 andalso C =< $9))).
+-define(PROBABLE_CLOSE(C),
+        (C =:= $> orelse ?IS_WHITESPACE(C))).
 
 -record(decoder, {line=1,
                   column=1,
@@ -89,6 +91,7 @@ to_tokens(T={doctype, _}) ->
 to_tokens(T={comment, _}) ->
     [T];
 to_tokens({Tag0, Acc}) ->
+    %% This is only allowed in sub-tags: {p, [{"class", "foo"}]}
     to_tokens({Tag0, [], Acc});
 to_tokens({Tag0, Attrs, Acc}) ->
     Tag = to_tag(Tag0),
@@ -124,40 +127,6 @@ escape_attr(I) when is_integer(I) ->
 escape_attr(F) when is_float(F) ->
     escape_attr(mochinum:digits(F), []).
 
-%% @spec test() -> ok
-%% @doc Run tests for mochiweb_html.
-test() ->
-    test_destack(),
-    test_tokens(),
-    test_tokens2(),
-    test_parse(),
-    test_parse2(),
-    test_parse_tokens(),
-    test_escape(),
-    test_escape_attr(),
-    test_to_html(),
-    ok.
-
-
-%% Internal API
-
-test_to_html() ->
-    Expect = <<"<html><head><title>hey!</title></head><body><p class=\"foo\">what's up<br /></p><div>sucka</div><!-- comment! --></body></html>">>,
-    Expect = iolist_to_binary(
-               to_html({html, [],
-                        [{<<"head">>, [],
-                          [{title, <<"hey!">>}]},
-                         {body, [],
-                          [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]},
-                           {'div', <<"sucka">>},
-                           {comment, <<" comment! ">>}]}]})),
-    Expect1 = <<"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">">>,
-    Expect1 = iolist_to_binary(
-                to_html({doctype,
-                         [<<"html">>, <<"PUBLIC">>,
-                          <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>,
-                          <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]})),
-    ok.
 to_html([], Acc) ->
     lists:reverse(Acc);
 to_html([{'=', Content} | Rest], Acc) ->
@@ -205,16 +174,6 @@ attrs_to_html([{K, V} | Rest], Acc) ->
                   [[<<" ">>, escape(K), <<"=\"">>,
                     escape_attr(V), <<"\"">>] | Acc]).
 
-test_escape() ->
-    <<"&amp;quot;\"word &lt;&lt;up!&amp;quot;">> =
-        escape(<<"&quot;\"word <<up!&quot;">>),
-    ok.
-
-test_escape_attr() ->
-    <<"&amp;quot;&quot;word &lt;&lt;up!&amp;quot;">> =
-        escape_attr(<<"&quot;\"word <<up!&quot;">>),
-    ok.
-
 escape([], Acc) ->
     list_to_binary(lists:reverse(Acc));
 escape("<" ++ Rest, Acc) ->
@@ -257,6 +216,9 @@ to_tokens([{Tag0, [T0={'=', _C0} | R1]} 
 to_tokens([{Tag0, [T0={comment, _C0} | R1]} | Rest], Acc) ->
     %% Allow {comment, iolist()}
     to_tokens([{Tag0, R1} | Rest], [T0 | Acc]);
+to_tokens([{Tag0, [T0={pi, _S0, _A0} | R1]} | Rest], Acc) ->
+    %% Allow {pi, binary(), list()}
+    to_tokens([{Tag0, R1} | Rest], [T0 | Acc]);
 to_tokens([{Tag0, [{T0, A0=[{_, _} | _]} | R1]} | Rest], Acc) ->
     %% Allow {p, [{"class", "foo"}]}
     to_tokens([{Tag0, [{T0, A0, []} | R1]} | Rest], Acc);
@@ -290,39 +252,6 @@ to_tokens([{Tag0, [B | R1]} | Rest], Acc
     Tag = to_tag(Tag0),
     to_tokens([{Tag, R1} | Rest], [{data, B, false} | Acc]).
 
-test_tokens() ->
-    [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
-                             {<<"wibble">>, <<"wibble">>},
-                             {<<"alice">>, <<"bob">>}], true}] =
-        tokens(<<"<foo bar=baz wibble='wibble' alice=\"bob\"/>">>),
-    [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
-                             {<<"wibble">>, <<"wibble">>},
-                             {<<"alice">>, <<"bob">>}], true}] =
-        tokens(<<"<foo bar=baz wibble='wibble' alice=bob/>">>),
-    [{comment, <<"[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]">>}] =
-        tokens(<<"<!--[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]-->">>),
-    [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
-     {data, <<" A= B <= C ">>, false},
-     {end_tag, <<"script">>}] =
-        tokens(<<"<script type=\"text/javascript\"> A= B <= C </script>">>),
-    [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
-     {data, <<" A= B <= C ">>, false},
-     {end_tag, <<"script">>}] =
-        tokens(<<"<script type =\"text/javascript\"> A= B <= C </script>">>),
-    [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
-     {data, <<" A= B <= C ">>, false},
-     {end_tag, <<"script">>}] =
-        tokens(<<"<script type = \"text/javascript\"> A= B <= C </script>">>),
-    [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
-     {data, <<" A= B <= C ">>, false},
-     {end_tag, <<"script">>}] =
-        tokens(<<"<script type= \"text/javascript\"> A= B <= C </script>">>),
-    [{start_tag, <<"textarea">>, [], false},
-     {data, <<"<html></body>">>, false},
-     {end_tag, <<"textarea">>}] =
-        tokens(<<"<textarea><html></body></textarea>">>),
-    ok.
-
 tokens(B, S=#decoder{offset=O}, Acc) ->
     case B of
         <<_:O/binary>> ->
@@ -374,7 +303,8 @@ tokenize(B, S=#decoder{offset=O}) ->
             {{end_tag, Tag}, S2};
         <<_:O/binary, "<", C, _/binary>> when ?IS_WHITESPACE(C) ->
             %% This isn't really strict HTML
-            tokenize_data(B, ?INC_COL(S));
+            {{data, Data, _Whitespace}, S1} = tokenize_data(B, ?INC_COL(S)),
+            {{data, <<$<, Data/binary>>, false}, S1};
         <<_:O/binary, "<", _/binary>> ->
             {Tag, S1} = tokenize_literal(B, ?INC_COL(S)),
             {Attrs, S2} = tokenize_attributes(B, S1),
@@ -385,149 +315,6 @@ tokenize(B, S=#decoder{offset=O}) ->
             tokenize_data(B, S)
     end.
 
-test_parse() ->
-    D0 = <<"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">
-<html>
- <head>
-   <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">
-   <title>Foo</title>
-   <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/rel/dojo/resources/dojo.css\" media=\"screen\">
-   <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/foo.css\" media=\"screen\">
-   <!--[if lt IE 7]>
-   <style type=\"text/css\">
-     .no_ie { display: none; }
-   </style>
-   <![endif]-->
-   <link rel=\"icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\">
-   <link rel=\"shortcut icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\">
- </head>
- <body id=\"home\" class=\"tundra\"><![CDATA[&lt;<this<!-- is -->CDATA>&gt;]]></body>
-</html>">>,
-    Expect = {<<"html">>, [],
-              [{<<"head">>, [],
-                [{<<"meta">>,
-                  [{<<"http-equiv">>,<<"Content-Type">>},
-                   {<<"content">>,<<"text/html; charset=UTF-8">>}],
-                  []},
-                 {<<"title">>,[],[<<"Foo">>]},
-                 {<<"link">>,
-                  [{<<"rel">>,<<"stylesheet">>},
-                   {<<"type">>,<<"text/css">>},
-                   {<<"href">>,<<"/static/rel/dojo/resources/dojo.css">>},
-                   {<<"media">>,<<"screen">>}],
-                  []},
-                 {<<"link">>,
-                  [{<<"rel">>,<<"stylesheet">>},
-                   {<<"type">>,<<"text/css">>},
-                   {<<"href">>,<<"/static/foo.css">>},
-                   {<<"media">>,<<"screen">>}],
-                  []},
-                 {comment,<<"[if lt IE 7]>\n   <style type=\"text/css\">\n     .no_ie { display: none; }\n   </style>\n   <![endif]">>},
-                 {<<"link">>,
-                  [{<<"rel">>,<<"icon">>},
-                   {<<"href">>,<<"/static/images/favicon.ico">>},
-                   {<<"type">>,<<"image/x-icon">>}],
-                  []},
-                 {<<"link">>,
-                  [{<<"rel">>,<<"shortcut icon">>},
-                   {<<"href">>,<<"/static/images/favicon.ico">>},
-                   {<<"type">>,<<"image/x-icon">>}],
-                  []}]},
-               {<<"body">>,
-                [{<<"id">>,<<"home">>},
-                 {<<"class">>,<<"tundra">>}],
-                [<<"&lt;<this<!-- is -->CDATA>&gt;">>]}]},
-    Expect = parse(D0),
-    ok.
-
-test_tokens2() ->
-    D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org</link><description>Bob's Rants</description></channel>">>,
-    Expect = [{start_tag,<<"channel">>,[],false},
-              {start_tag,<<"title">>,[],false},
-              {data,<<"from __future__ import *">>,false},
-              {end_tag,<<"title">>},
-              {start_tag,<<"link">>,[],true},
-              {data,<<"http://bob.pythonmac.org">>,false},
-              {end_tag,<<"link">>},
-              {start_tag,<<"description">>,[],false},
-              {data,<<"Bob's Rants">>,false},
-              {end_tag,<<"description">>},
-              {end_tag,<<"channel">>}],
-    Expect = tokens(D0),
-    ok.
-
-test_parse2() ->
-    D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org<br>foo</link><description>Bob's Rants</description></channel>">>,
-    Expect = {<<"channel">>,[],
-              [{<<"title">>,[],[<<"from __future__ import *">>]},
-               {<<"link">>,[],[
-                               <<"http://bob.pythonmac.org">>,
-                               {<<"br">>,[],[]},
-                               <<"foo">>]},
-               {<<"description">>,[],[<<"Bob's Rants">>]}]},
-    Expect = parse(D0),
-    ok.
-
-test_parse_tokens() ->
-    D0 = [{doctype,[<<"HTML">>,<<"PUBLIC">>,<<"-//W3C//DTD HTML 4.01 Transitional//EN">>]},
-          {data,<<"\n">>,true},
-          {start_tag,<<"html">>,[],false}],
-    {<<"html">>, [], []} = parse_tokens(D0),
-    D1 = D0 ++ [{end_tag, <<"html">>}],
-    {<<"html">>, [], []} = parse_tokens(D1),
-    D2 = D0 ++ [{start_tag, <<"body">>, [], false}],
-    {<<"html">>, [], [{<<"body">>, [], []}]} = parse_tokens(D2),
-    D3 = D0 ++ [{start_tag, <<"head">>, [], false},
-                {end_tag, <<"head">>},
-                {start_tag, <<"body">>, [], false}],
-    {<<"html">>, [], [{<<"head">>, [], []}, {<<"body">>, [], []}]} = parse_tokens(D3),
-    D4 = D3 ++ [{data,<<"\n">>,true},
-                {start_tag,<<"div">>,[{<<"class">>,<<"a">>}],false},
-                {start_tag,<<"a">>,[{<<"name">>,<<"#anchor">>}],false},
-                {end_tag,<<"a">>},
-                {end_tag,<<"div">>},
-                {start_tag,<<"div">>,[{<<"class">>,<<"b">>}],false},
-                {start_tag,<<"div">>,[{<<"class">>,<<"c">>}],false},
-                {end_tag,<<"div">>},
-                {end_tag,<<"div">>}],
-    {<<"html">>, [],
-     [{<<"head">>, [], []},
-      {<<"body">>, [],
-       [{<<"div">>, [{<<"class">>, <<"a">>}], [{<<"a">>, [{<<"name">>, <<"#anchor">>}], []}]},
-        {<<"div">>, [{<<"class">>, <<"b">>}], [{<<"div">>, [{<<"class">>, <<"c">>}], []}]}
-       ]}]} = parse_tokens(D4),
-    D5 = [{start_tag,<<"html">>,[],false},
-          {data,<<"\n">>,true},
-          {data,<<"boo">>,false},
-          {data,<<"hoo">>,false},
-          {data,<<"\n">>,true},
-          {end_tag,<<"html">>}],
-    {<<"html">>, [], [<<"\nboohoo\n">>]} = parse_tokens(D5),
-    D6 = [{start_tag,<<"html">>,[],false},
-          {data,<<"\n">>,true},
-          {data,<<"\n">>,true},
-          {end_tag,<<"html">>}],
-    {<<"html">>, [], []} = parse_tokens(D6),
-    D7 = [{start_tag,<<"html">>,[],false},
-          {start_tag,<<"ul">>,[],false},
-          {start_tag,<<"li">>,[],false},
-          {data,<<"word">>,false},
-          {start_tag,<<"li">>,[],false},
-          {data,<<"up">>,false},
-          {end_tag,<<"li">>},
-          {start_tag,<<"li">>,[],false},
-          {data,<<"fdsa">>,false},
-          {start_tag,<<"br">>,[],true},
-          {data,<<"asdf">>,false},
-          {end_tag,<<"ul">>},
-          {end_tag,<<"html">>}],
-    {<<"html">>, [],
-     [{<<"ul">>, [],
-       [{<<"li">>, [], [<<"word">>]},
-        {<<"li">>, [], [<<"up">>]},
-        {<<"li">>, [], [<<"fdsa">>,{<<"br">>, [], []}, <<"asdf">>]}]}]} = parse_tokens(D7),
-    ok.
-
 tree_data([{data, Data, Whitespace} | Rest], AllWhitespace, Acc) ->
     tree_data(Rest, (Whitespace andalso AllWhitespace), [Data | Acc]);
 tree_data(Rest, AllWhitespace, Acc) ->
@@ -556,7 +343,9 @@ tree(L=[{data, _Data, _Whitespace} | _],
             tree(Rest, S);
         {Data, false, Rest} ->
             tree(Rest, append_stack_child(Data, S))
-    end.
+    end;
+tree([{doctype, _} | Rest], Stack) ->
+    tree(Rest, Stack).
 
 norm({Tag, Attrs}) ->
     {norm(Tag), [{norm(K), iolist_to_binary(V)} || {K, V} <- Attrs], []};
@@ -565,21 +354,6 @@ norm(Tag) when is_binary(Tag) ->
 norm(Tag) ->
     list_to_binary(string:to_lower(Tag)).
 
-test_destack() ->
-    {<<"a">>, [], []} =
-        destack([{<<"a">>, [], []}]),
-    {<<"a">>, [], [{<<"b">>, [], []}]} =
-        destack([{<<"b">>, [], []}, {<<"a">>, [], []}]),
-    {<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]} =
-     destack([{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]),
-    [{<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]}] =
-     destack(<<"b">>,
-             [{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]),
-    [{<<"b">>, [], [{<<"c">>, [], []}]}, {<<"a">>, [], []}] =
-     destack(<<"c">>,
-             [{<<"c">>, [], []}, {<<"b">>, [], []},{<<"a">>, [], []}]),
-    ok.
-
 stack(T1={TN, _, _}, Stack=[{TN, _, _} | _Rest])
   when TN =:= <<"li">> orelse TN =:= <<"option">> ->
     [T1 | destack(TN, Stack)];
@@ -719,9 +493,10 @@ find_qgt(Bin, S=#decoder{offset=O}) ->
     case Bin of
         <<_:O/binary, "?>", _/binary>> ->
             ?ADV_COL(S, 2);
-        <<_:O/binary, C, _/binary>> ->
-            find_qgt(Bin, ?INC_CHAR(S, C));
-        _ ->
+        %% tokenize_attributes takes care of this state:
+        %% <<_:O/binary, C, _/binary>> ->
+        %%     find_qgt(Bin, ?INC_CHAR(S, C));
+        <<_:O/binary>> ->
             S
     end.
 
@@ -766,7 +541,7 @@ tokenize_charref(Bin, S=#decoder{offset=
                            <<_:Start1/binary, R:Len1/binary, _/binary>> = Bin,
                            R;
                        Unichar ->
-                           list_to_binary(xmerl_ucs:to_utf8(Unichar))
+                           mochiutf8:codepoint_to_bytes(Unichar)
                    end,
             {{data, Data, false}, ?INC_COL(S)};
         _ ->
@@ -791,11 +566,10 @@ tokenize_doctype(Bin, S=#decoder{offset=
 
 tokenize_word_or_literal(Bin, S=#decoder{offset=O}) ->
     case Bin of
-        <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
-            {error, {whitespace, [C], S}};
         <<_:O/binary, C, _/binary>> when C =:= ?QUOTE orelse C =:= ?SQUOTE ->
             tokenize_word(Bin, ?INC_COL(S), C);
-        _ ->
+        <<_:O/binary, C, _/binary>> when not ?IS_WHITESPACE(C) ->
+            %% Sanity check for whitespace
             tokenize_literal(Bin, S, [])
     end.
 
@@ -852,13 +626,14 @@ tokenize_script(Bin, S=#decoder{offset=O
 tokenize_script(Bin, S=#decoder{offset=O}, Start) ->
     case Bin of
         %% Just a look-ahead, we want the end_tag separately
-        <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, _/binary>>
+        <<_:O/binary, $<, $/, SS, CC, RR, II, PP, TT, ZZ, _/binary>>
         when (SS =:= $s orelse SS =:= $S) andalso
              (CC =:= $c orelse CC =:= $C) andalso
              (RR =:= $r orelse RR =:= $R) andalso
              (II =:= $i orelse II =:= $I) andalso
              (PP =:= $p orelse PP =:= $P) andalso
-             (TT=:= $t orelse TT =:= $T) ->
+             (TT=:= $t orelse TT =:= $T) andalso
+             ?PROBABLE_CLOSE(ZZ) ->
             Len = O - Start,
             <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
             {{data, Raw, false}, S};
@@ -874,7 +649,7 @@ tokenize_textarea(Bin, S=#decoder{offset
 tokenize_textarea(Bin, S=#decoder{offset=O}, Start) ->
     case Bin of
         %% Just a look-ahead, we want the end_tag separately
-        <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, _/binary>>
+        <<_:O/binary, $<, $/, TT, EE, XX, TT2, AA, RR, EE2, AA2, ZZ, _/binary>>
         when (TT =:= $t orelse TT =:= $T) andalso
              (EE =:= $e orelse EE =:= $E) andalso
              (XX =:= $x orelse XX =:= $X) andalso
@@ -882,7 +657,8 @@ tokenize_textarea(Bin, S=#decoder{offset
              (AA =:= $a orelse AA =:= $A) andalso
              (RR =:= $r orelse RR =:= $R) andalso
              (EE2 =:= $e orelse EE2 =:= $E) andalso
-             (AA2 =:= $a orelse AA2 =:= $A) ->
+             (AA2 =:= $a orelse AA2 =:= $A) andalso
+             ?PROBABLE_CLOSE(ZZ) ->
             Len = O - Start,
             <<_:Start/binary, Raw:Len/binary, _/binary>> = Bin,
             {{data, Raw, false}, S};
@@ -891,3 +667,395 @@ tokenize_textarea(Bin, S=#decoder{offset
         <<_:Start/binary, Raw/binary>> ->
             {{data, Raw, false}, S}
     end.
+
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+to_html_test() ->
+    ?assertEqual(
+       <<"<html><head><title>hey!</title></head><body><p class=\"foo\">what's up<br /></p><div>sucka</div>RAW!<!-- comment! --></body></html>">>,
+       iolist_to_binary(
+         to_html({html, [],
+                  [{<<"head">>, [],
+                    [{title, <<"hey!">>}]},
+                   {body, [],
+                    [{p, [{class, foo}], [<<"what's">>, <<" up">>, {br}]},
+                     {'div', <<"sucka">>},
+                     {'=', <<"RAW!">>},
+                     {comment, <<" comment! ">>}]}]}))),
+    ?assertEqual(
+       <<"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">">>,
+       iolist_to_binary(
+         to_html({doctype,
+                  [<<"html">>, <<"PUBLIC">>,
+                   <<"-//W3C//DTD XHTML 1.0 Transitional//EN">>,
+                   <<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">>]}))),
+    ?assertEqual(
+       <<"<html><?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?></html>">>,
+       iolist_to_binary(
+         to_html({<<"html">>,[],
+                  [{pi, <<"xml:namespace">>,
+                    [{<<"prefix">>,<<"o">>},
+                     {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}]}))),
+    ok.
+
+escape_test() ->
+    ?assertEqual(
+       <<"&amp;quot;\"word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape(<<"&quot;\"word ><<up!&quot;">>)),
+    ?assertEqual(
+       <<"&amp;quot;\"word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape("&quot;\"word ><<up!&quot;")),
+    ?assertEqual(
+       <<"&amp;quot;\"word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape('&quot;\"word ><<up!&quot;')),
+    ok.
+
+escape_attr_test() ->
+    ?assertEqual(
+       <<"&amp;quot;&quot;word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape_attr(<<"&quot;\"word ><<up!&quot;">>)),
+    ?assertEqual(
+       <<"&amp;quot;&quot;word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape_attr("&quot;\"word ><<up!&quot;")),
+    ?assertEqual(
+       <<"&amp;quot;&quot;word &gt;&lt;&lt;up!&amp;quot;">>,
+       escape_attr('&quot;\"word ><<up!&quot;')),
+    ?assertEqual(
+       <<"12345">>,
+       escape_attr(12345)),
+    ?assertEqual(
+       <<"1.5">>,
+       escape_attr(1.5)),
+    ok.
+
+tokens_test() ->
+    ?assertEqual(
+       [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
+                                {<<"wibble">>, <<"wibble">>},
+                                {<<"alice">>, <<"bob">>}], true}],
+       tokens(<<"<foo bar=baz wibble='wibble' alice=\"bob\"/>">>)),
+    ?assertEqual(
+       [{start_tag, <<"foo">>, [{<<"bar">>, <<"baz">>},
+                                {<<"wibble">>, <<"wibble">>},
+                                {<<"alice">>, <<"bob">>}], true}],
+       tokens(<<"<foo bar=baz wibble='wibble' alice=bob/>">>)),
+    ?assertEqual(
+       [{comment, <<"[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]">>}],
+       tokens(<<"<!--[if lt IE 7]>\n<style type=\"text/css\">\n.no_ie { display: none; }\n</style>\n<![endif]-->">>)),
+    ?assertEqual(
+       [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+        {data, <<" A= B <= C ">>, false},
+        {end_tag, <<"script">>}],
+       tokens(<<"<script type=\"text/javascript\"> A= B <= C </script>">>)),
+    ?assertEqual(
+       [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+        {data, <<" A= B <= C ">>, false},
+        {end_tag, <<"script">>}],
+       tokens(<<"<script type =\"text/javascript\"> A= B <= C </script>">>)),
+    ?assertEqual(
+       [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+        {data, <<" A= B <= C ">>, false},
+        {end_tag, <<"script">>}],
+       tokens(<<"<script type = \"text/javascript\"> A= B <= C </script>">>)),
+    ?assertEqual(
+       [{start_tag, <<"script">>, [{<<"type">>, <<"text/javascript">>}], false},
+        {data, <<" A= B <= C ">>, false},
+        {end_tag, <<"script">>}],
+       tokens(<<"<script type= \"text/javascript\"> A= B <= C </script>">>)),
+    ?assertEqual(
+       [{start_tag, <<"textarea">>, [], false},
+        {data, <<"<html></body>">>, false},
+        {end_tag, <<"textarea">>}],
+       tokens(<<"<textarea><html></body></textarea>">>)),
+    ?assertEqual(
+       [{start_tag, <<"textarea">>, [], false},
+        {data, <<"<html></body></textareaz>">>, false}],
+       tokens(<<"<textarea ><html></body></textareaz>">>)),
+    ?assertEqual(
+       [{pi, <<"xml:namespace">>,
+         [{<<"prefix">>,<<"o">>},
+          {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}],
+       tokens(<<"<?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?>">>)),
+    ?assertEqual(
+       [{pi, <<"xml:namespace">>,
+         [{<<"prefix">>,<<"o">>},
+          {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}],
+       tokens(<<"<?xml:namespace prefix=o ns=urn:schemas-microsoft-com:office:office \n?>">>)),
+    ?assertEqual(
+       [{pi, <<"xml:namespace">>,
+         [{<<"prefix">>,<<"o">>},
+          {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}],
+       tokens(<<"<?xml:namespace prefix=o ns=urn:schemas-microsoft-com:office:office">>)),
+    ?assertEqual(
+       [{data, <<"<">>, false}],
+       tokens(<<"&lt;">>)),
+    ?assertEqual(
+       [{data, <<"not html ">>, false},
+        {data, <<"< at all">>, false}],
+       tokens(<<"not html < at all">>)),
+    ok.
+
+parse_test() ->
+    D0 = <<"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">
+<html>
+ <head>
+   <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">
+   <title>Foo</title>
+   <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/rel/dojo/resources/dojo.css\" media=\"screen\">
+   <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/foo.css\" media=\"screen\">
+   <!--[if lt IE 7]>
+   <style type=\"text/css\">
+     .no_ie { display: none; }
+   </style>
+   <![endif]-->
+   <link rel=\"icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\">
+   <link rel=\"shortcut icon\" href=\"/static/images/favicon.ico\" type=\"image/x-icon\">
+ </head>
+ <body id=\"home\" class=\"tundra\"><![CDATA[&lt;<this<!-- is -->CDATA>&gt;]]></body>
+</html>">>,
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"head">>, [],
+          [{<<"meta">>,
+            [{<<"http-equiv">>,<<"Content-Type">>},
+             {<<"content">>,<<"text/html; charset=UTF-8">>}],
+            []},
+           {<<"title">>,[],[<<"Foo">>]},
+           {<<"link">>,
+            [{<<"rel">>,<<"stylesheet">>},
+             {<<"type">>,<<"text/css">>},
+             {<<"href">>,<<"/static/rel/dojo/resources/dojo.css">>},
+             {<<"media">>,<<"screen">>}],
+            []},
+           {<<"link">>,
+            [{<<"rel">>,<<"stylesheet">>},
+             {<<"type">>,<<"text/css">>},
+             {<<"href">>,<<"/static/foo.css">>},
+             {<<"media">>,<<"screen">>}],
+            []},
+           {comment,<<"[if lt IE 7]>\n   <style type=\"text/css\">\n     .no_ie { display: none; }\n   </style>\n   <![endif]">>},
+           {<<"link">>,
+            [{<<"rel">>,<<"icon">>},
+             {<<"href">>,<<"/static/images/favicon.ico">>},
+             {<<"type">>,<<"image/x-icon">>}],
+            []},
+           {<<"link">>,
+            [{<<"rel">>,<<"shortcut icon">>},
+             {<<"href">>,<<"/static/images/favicon.ico">>},
+             {<<"type">>,<<"image/x-icon">>}],
+            []}]},
+         {<<"body">>,
+          [{<<"id">>,<<"home">>},
+           {<<"class">>,<<"tundra">>}],
+          [<<"&lt;<this<!-- is -->CDATA>&gt;">>]}]},
+       parse(D0)),
+    ?assertEqual(
+       {<<"html">>,[],
+        [{pi, <<"xml:namespace">>,
+          [{<<"prefix">>,<<"o">>},
+           {<<"ns">>,<<"urn:schemas-microsoft-com:office:office">>}]}]},
+       parse(
+         <<"<html><?xml:namespace prefix=\"o\" ns=\"urn:schemas-microsoft-com:office:office\"?></html>">>)),
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"dd">>, [], [<<"foo">>]},
+         {<<"dt">>, [], [<<"bar">>]}]},
+       parse(<<"<html><dd>foo<dt>bar</html>">>)),
+    %% Singleton sadness
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"link">>, [], []},
+         <<"foo">>,
+         {<<"br">>, [], []},
+         <<"bar">>]},
+       parse(<<"<html><link>foo<br>bar</html>">>)),
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"link">>, [], [<<"foo">>,
+                           {<<"br">>, [], []},
+                           <<"bar">>]}]},
+       parse(<<"<html><link>foo<br>bar</link></html>">>)),
+    ok.
+
+exhaustive_is_singleton_test() ->
+    T = mochiweb_cover:clause_lookup_table(?MODULE, is_singleton),
+    [?assertEqual(V, is_singleton(K)) || {K, V} <- T].
+
+tokenize_attributes_test() ->
+    ?assertEqual(
+       {<<"foo">>,
+        [{<<"bar">>, <<"b\"az">>},
+         {<<"wibble">>, <<"wibble">>},
+         {<<"taco", 16#c2, 16#a9>>, <<"bell">>},
+         {<<"quux">>, <<"quux">>}],
+        []},
+       parse(<<"<foo bar=\"b&quot;az\" wibble taco&copy;=bell quux">>)),
+    ok.
+
+tokens2_test() ->
+    D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org</link><description>Bob's Rants</description></channel>">>,
+    ?assertEqual(
+       [{start_tag,<<"channel">>,[],false},
+        {start_tag,<<"title">>,[],false},
+        {data,<<"from __future__ import *">>,false},
+        {end_tag,<<"title">>},
+        {start_tag,<<"link">>,[],true},
+        {data,<<"http://bob.pythonmac.org">>,false},
+        {end_tag,<<"link">>},
+        {start_tag,<<"description">>,[],false},
+        {data,<<"Bob's Rants">>,false},
+        {end_tag,<<"description">>},
+        {end_tag,<<"channel">>}],
+       tokens(D0)),
+    ok.
+
+to_tokens_test() ->
+    ?assertEqual(
+       [{start_tag, <<"p">>, [{class, 1}], false},
+        {end_tag, <<"p">>}],
+       to_tokens({p, [{class, 1}], []})),
+    ?assertEqual(
+       [{start_tag, <<"p">>, [], false},
+        {end_tag, <<"p">>}],
+       to_tokens({p})),
+    ?assertEqual(
+       [{'=', <<"data">>}],
+       to_tokens({'=', <<"data">>})),
+    ?assertEqual(
+       [{comment, <<"comment">>}],
+       to_tokens({comment, <<"comment">>})),
+    %% This is only allowed in sub-tags:
+    %% {p, [{"class", "foo"}]} as {p, [{"class", "foo"}], []}
+    %% On the outside it's always treated as follows:
+    %% {p, [], [{"class", "foo"}]} as {p, [], [{"class", "foo"}]}
+    ?assertEqual(
+       [{start_tag, <<"html">>, [], false},
+        {start_tag, <<"p">>, [{class, 1}], false},
+        {end_tag, <<"p">>},
+        {end_tag, <<"html">>}],
+       to_tokens({html, [{p, [{class, 1}]}]})),
+    ok.
+
+parse2_test() ->
+    D0 = <<"<channel><title>from __future__ import *</title><link>http://bob.pythonmac.org<br>foo</link><description>Bob's Rants</description></channel>">>,
+    ?assertEqual(
+       {<<"channel">>,[],
+        [{<<"title">>,[],[<<"from __future__ import *">>]},
+         {<<"link">>,[],[
+                         <<"http://bob.pythonmac.org">>,
+                         {<<"br">>,[],[]},
+                         <<"foo">>]},
+         {<<"description">>,[],[<<"Bob's Rants">>]}]},
+       parse(D0)),
+    ok.
+
+parse_tokens_test() ->
+    D0 = [{doctype,[<<"HTML">>,<<"PUBLIC">>,<<"-//W3C//DTD HTML 4.01 Transitional//EN">>]},
+          {data,<<"\n">>,true},
+          {start_tag,<<"html">>,[],false}],
+    ?assertEqual(
+       {<<"html">>, [], []},
+       parse_tokens(D0)),
+    D1 = D0 ++ [{end_tag, <<"html">>}],
+    ?assertEqual(
+       {<<"html">>, [], []},
+       parse_tokens(D1)),
+    D2 = D0 ++ [{start_tag, <<"body">>, [], false}],
+    ?assertEqual(
+       {<<"html">>, [], [{<<"body">>, [], []}]},
+       parse_tokens(D2)),
+    D3 = D0 ++ [{start_tag, <<"head">>, [], false},
+                {end_tag, <<"head">>},
+                {start_tag, <<"body">>, [], false}],
+    ?assertEqual(
+       {<<"html">>, [], [{<<"head">>, [], []}, {<<"body">>, [], []}]},
+       parse_tokens(D3)),
+    D4 = D3 ++ [{data,<<"\n">>,true},
+                {start_tag,<<"div">>,[{<<"class">>,<<"a">>}],false},
+                {start_tag,<<"a">>,[{<<"name">>,<<"#anchor">>}],false},
+                {end_tag,<<"a">>},
+                {end_tag,<<"div">>},
+                {start_tag,<<"div">>,[{<<"class">>,<<"b">>}],false},
+                {start_tag,<<"div">>,[{<<"class">>,<<"c">>}],false},
+                {end_tag,<<"div">>},
+                {end_tag,<<"div">>}],
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"head">>, [], []},
+         {<<"body">>, [],
+          [{<<"div">>, [{<<"class">>, <<"a">>}], [{<<"a">>, [{<<"name">>, <<"#anchor">>}], []}]},
+           {<<"div">>, [{<<"class">>, <<"b">>}], [{<<"div">>, [{<<"class">>, <<"c">>}], []}]}
+          ]}]},
+       parse_tokens(D4)),
+    D5 = [{start_tag,<<"html">>,[],false},
+          {data,<<"\n">>,true},
+          {data,<<"boo">>,false},
+          {data,<<"hoo">>,false},
+          {data,<<"\n">>,true},
+          {end_tag,<<"html">>}],
+    ?assertEqual(
+       {<<"html">>, [], [<<"\nboohoo\n">>]},
+       parse_tokens(D5)),
+    D6 = [{start_tag,<<"html">>,[],false},
+          {data,<<"\n">>,true},
+          {data,<<"\n">>,true},
+          {end_tag,<<"html">>}],
+    ?assertEqual(
+       {<<"html">>, [], []},
+       parse_tokens(D6)),
+    D7 = [{start_tag,<<"html">>,[],false},
+          {start_tag,<<"ul">>,[],false},
+          {start_tag,<<"li">>,[],false},
+          {data,<<"word">>,false},
+          {start_tag,<<"li">>,[],false},
+          {data,<<"up">>,false},
+          {end_tag,<<"li">>},
+          {start_tag,<<"li">>,[],false},
+          {data,<<"fdsa">>,false},
+          {start_tag,<<"br">>,[],true},
+          {data,<<"asdf">>,false},
+          {end_tag,<<"ul">>},
+          {end_tag,<<"html">>}],
+    ?assertEqual(
+       {<<"html">>, [],
+        [{<<"ul">>, [],
+          [{<<"li">>, [], [<<"word">>]},
+           {<<"li">>, [], [<<"up">>]},
+           {<<"li">>, [], [<<"fdsa">>,{<<"br">>, [], []}, <<"asdf">>]}]}]},
+       parse_tokens(D7)),
+    ok.
+
+destack_test() ->
+    {<<"a">>, [], []} =
+        destack([{<<"a">>, [], []}]),
+    {<<"a">>, [], [{<<"b">>, [], []}]} =
+        destack([{<<"b">>, [], []}, {<<"a">>, [], []}]),
+    {<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]} =
+     destack([{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]),
+    [{<<"a">>, [], [{<<"b">>, [], [{<<"c">>, [], []}]}]}] =
+     destack(<<"b">>,
+             [{<<"c">>, [], []}, {<<"b">>, [], []}, {<<"a">>, [], []}]),
+    [{<<"b">>, [], [{<<"c">>, [], []}]}, {<<"a">>, [], []}] =
+     destack(<<"c">>,
+             [{<<"c">>, [], []}, {<<"b">>, [], []},{<<"a">>, [], []}]),
+    ok.
+
+doctype_test() ->
+    ?assertEqual(
+       {<<"html">>,[],[{<<"head">>,[],[]}]},
+       mochiweb_html:parse("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">"
+                           "<html><head></head></body></html>")),
+    %% http://code.google.com/p/mochiweb/issues/detail?id=52
+    ?assertEqual(
+       {<<"html">>,[],[{<<"head">>,[],[]}]},
+       mochiweb_html:parse("<html>"
+                           "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">"
+                           "<head></head></body></html>")),
+    ok.
+
+-endif.

Modified: couchdb/trunk/src/mochiweb/mochiweb_http.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb_http.erl?rev=979368&r1=979367&r2=979368&view=diff
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb_http.erl (original)
+++ couchdb/trunk/src/mochiweb/mochiweb_http.erl Mon Jul 26 17:21:30 2010
@@ -8,31 +8,22 @@
 -export([start/0, start/1, stop/0, stop/1]).
 -export([loop/2, default_body/1]).
 -export([after_response/2, reentry/1]).
+-export([parse_range_request/1, range_skip_length/2]).
 
--define(IDLE_TIMEOUT, 30000).
+-define(REQUEST_RECV_TIMEOUT, 300000).   % timeout waiting for request line
+-define(HEADERS_RECV_TIMEOUT, 30000). % timeout waiting for headers
 
 -define(MAX_HEADERS, 1000).
 -define(DEFAULTS, [{name, ?MODULE},
                    {port, 8888}]).
 
-set_default({Prop, Value}, PropList) ->
-    case proplists:is_defined(Prop, PropList) of
-        true ->
-            PropList;
-        false ->
-            [{Prop, Value} | PropList]
-    end.
-
-set_defaults(Defaults, PropList) ->
-    lists:foldl(fun set_default/2, PropList, Defaults).
-
 parse_options(Options) ->
     {loop, HttpLoop} = proplists:lookup(loop, Options),
     Loop = fun (S) ->
                    ?MODULE:loop(S, HttpLoop)
            end,
     Options1 = [{loop, Loop} | proplists:delete(loop, Options)],
-    set_defaults(?DEFAULTS, Options1).
+    mochilists:set_defaults(?DEFAULTS, Options1).
 
 stop() ->
     mochiweb_socket_server:stop(?MODULE).
@@ -95,20 +86,26 @@ default_body(Req) ->
     default_body(Req, Req:get(method), Req:get(path)).
 
 loop(Socket, Body) ->
-    inet:setopts(Socket, [{packet, http}]),
+    mochiweb_socket:setopts(Socket, [{packet, http}]),
     request(Socket, Body).
 
 request(Socket, Body) ->
-    case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of
+    case mochiweb_socket:recv(Socket, 0, ?REQUEST_RECV_TIMEOUT) of
         {ok, {http_request, Method, Path, Version}} ->
+            mochiweb_socket:setopts(Socket, [{packet, httph}]),
             headers(Socket, {Method, Path, Version}, [], Body, 0);
         {error, {http_error, "\r\n"}} ->
             request(Socket, Body);
         {error, {http_error, "\n"}} ->
             request(Socket, Body);
+        {error, closed} ->
+            mochiweb_socket:close(Socket),
+            exit(normal);
+        {error, timeout} ->
+            mochiweb_socket:close(Socket),
+            exit(normal);
         _Other ->
-            gen_tcp:close(Socket),
-            exit(normal)
+            handle_invalid_request(Socket)
     end.
 
 reentry(Body) ->
@@ -118,35 +115,161 @@ reentry(Body) ->
 
 headers(Socket, Request, Headers, _Body, ?MAX_HEADERS) ->
     %% Too many headers sent, bad request.
-    inet:setopts(Socket, [{packet, raw}]),
-    Req = mochiweb:new_request({Socket, Request,
-                                lists:reverse(Headers)}),
-    Req:respond({400, [], []}),
-    gen_tcp:close(Socket),
-    exit(normal);
+    mochiweb_socket:setopts(Socket, [{packet, raw}]),
+    handle_invalid_request(Socket, Request, Headers);
 headers(Socket, Request, Headers, Body, HeaderCount) ->
-    case gen_tcp:recv(Socket, 0, ?IDLE_TIMEOUT) of
+    case mochiweb_socket:recv(Socket, 0, ?HEADERS_RECV_TIMEOUT) of
         {ok, http_eoh} ->
-            inet:setopts(Socket, [{packet, raw}]),
+            mochiweb_socket:setopts(Socket, [{packet, raw}]),
             Req = mochiweb:new_request({Socket, Request,
                                         lists:reverse(Headers)}),
-            Body(Req),
+            call_body(Body, Req),
             ?MODULE:after_response(Body, Req);
         {ok, {http_header, _, Name, _, Value}} ->
             headers(Socket, Request, [{Name, Value} | Headers], Body,
                     1 + HeaderCount);
+        {error, closed} ->
+            mochiweb_socket:close(Socket),
+            exit(normal);
         _Other ->
-            gen_tcp:close(Socket),
-            exit(normal)
+            handle_invalid_request(Socket, Request, Headers)
     end.
 
+call_body({M, F}, Req) ->
+    M:F(Req);
+call_body(Body, Req) ->
+    Body(Req).
+
+handle_invalid_request(Socket) ->
+    handle_invalid_request(Socket, {'GET', {abs_path, "/"}, {0,9}}, []).
+
+handle_invalid_request(Socket, Request, RevHeaders) ->
+    mochiweb_socket:setopts(Socket, [{packet, raw}]),
+    Req = mochiweb:new_request({Socket, Request,
+                                lists:reverse(RevHeaders)}),
+    Req:respond({400, [], []}),
+    mochiweb_socket:close(Socket),
+    exit(normal).
+
 after_response(Body, Req) ->
     Socket = Req:get(socket),
     case Req:should_close() of
         true ->
-            gen_tcp:close(Socket),
+            mochiweb_socket:close(Socket),
             exit(normal);
         false ->
             Req:cleanup(),
             ?MODULE:loop(Socket, Body)
     end.
+
+parse_range_request("bytes=0-") ->
+    undefined;
+parse_range_request(RawRange) when is_list(RawRange) ->
+    try
+        "bytes=" ++ RangeString = RawRange,
+        Ranges = string:tokens(RangeString, ","),
+        lists:map(fun ("-" ++ V)  ->
+                          {none, list_to_integer(V)};
+                      (R) ->
+                          case string:tokens(R, "-") of
+                              [S1, S2] ->
+                                  {list_to_integer(S1), list_to_integer(S2)};
+                              [S] ->
+                                  {list_to_integer(S), none}
+                          end
+                  end,
+                  Ranges)
+    catch
+        _:_ ->
+            fail
+    end.
+
+range_skip_length(Spec, Size) ->
+    case Spec of
+        {none, R} when R =< Size, R >= 0 ->
+            {Size - R, R};
+        {none, _OutOfRange} ->
+            {0, Size};
+        {R, none} when R >= 0, R < Size ->
+            {R, Size - R};
+        {_OutOfRange, none} ->
+            invalid_range;
+        {Start, End} when 0 =< Start, Start =< End, End < Size ->
+            {Start, End - Start + 1};
+        {_OutOfRange, _End} ->
+            invalid_range
+    end.
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+range_test() ->
+    %% valid, single ranges
+    ?assertEqual([{20, 30}], parse_range_request("bytes=20-30")),
+    ?assertEqual([{20, none}], parse_range_request("bytes=20-")),
+    ?assertEqual([{none, 20}], parse_range_request("bytes=-20")),
+
+    %% trivial single range
+    ?assertEqual(undefined, parse_range_request("bytes=0-")),
+
+    %% invalid, single ranges
+    ?assertEqual(fail, parse_range_request("")),
+    ?assertEqual(fail, parse_range_request("garbage")),
+    ?assertEqual(fail, parse_range_request("bytes=-20-30")),
+
+    %% valid, multiple range
+    ?assertEqual(
+       [{20, 30}, {50, 100}, {110, 200}],
+       parse_range_request("bytes=20-30,50-100,110-200")),
+    ?assertEqual(
+       [{20, none}, {50, 100}, {none, 200}],
+       parse_range_request("bytes=20-,50-100,-200")),
+
+    %% no ranges
+    ?assertEqual([], parse_range_request("bytes=")),
+    ok.
+
+range_skip_length_test() ->
+    Body = <<"012345678901234567890123456789012345678901234567890123456789">>,
+    BodySize = byte_size(Body), %% 60
+    BodySize = 60,
+
+    %% these values assume BodySize =:= 60
+    ?assertEqual({1,9}, range_skip_length({1,9}, BodySize)), %% 1-9
+    ?assertEqual({10,10}, range_skip_length({10,19}, BodySize)), %% 10-19
+    ?assertEqual({40, 20}, range_skip_length({none, 20}, BodySize)), %% -20
+    ?assertEqual({30, 30}, range_skip_length({30, none}, BodySize)), %% 30-
+
+    %% valid edge cases for range_skip_length
+    ?assertEqual({BodySize, 0}, range_skip_length({none, 0}, BodySize)),
+    ?assertEqual({0, BodySize}, range_skip_length({none, BodySize}, BodySize)),
+    ?assertEqual({0, BodySize}, range_skip_length({0, none}, BodySize)),
+    BodySizeLess1 = BodySize - 1,
+    ?assertEqual({BodySizeLess1, 1},
+                 range_skip_length({BodySize - 1, none}, BodySize)),
+
+    %% out of range, return whole thing
+    ?assertEqual({0, BodySize},
+                 range_skip_length({none, BodySize + 1}, BodySize)),
+    ?assertEqual({0, BodySize},
+                 range_skip_length({none, -1}, BodySize)),
+
+    %% invalid ranges
+    ?assertEqual(invalid_range,
+                 range_skip_length({-1, 30}, BodySize)),
+    ?assertEqual(invalid_range,
+                 range_skip_length({0, BodySize + 1}, BodySize)),
+    ?assertEqual(invalid_range,
+                 range_skip_length({-1, BodySize + 1}, BodySize)),
+    ?assertEqual(invalid_range,
+                 range_skip_length({BodySize, 40}, BodySize)),
+    ?assertEqual(invalid_range,
+                 range_skip_length({-1, none}, BodySize)),
+    ?assertEqual(invalid_range,
+                 range_skip_length({BodySize, none}, BodySize)),
+    ok.
+
+-endif.

Added: couchdb/trunk/src/mochiweb/mochiweb_io.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb_io.erl?rev=979368&view=auto
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb_io.erl (added)
+++ couchdb/trunk/src/mochiweb/mochiweb_io.erl Mon Jul 26 17:21:30 2010
@@ -0,0 +1,46 @@
+%% @author Bob Ippolito <bo...@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Utilities for dealing with IO devices (open files).
+
+-module(mochiweb_io).
+-author('bob@mochimedia.com').
+
+-export([iodevice_stream/3, iodevice_stream/2]).
+-export([iodevice_foldl/4, iodevice_foldl/3]).
+-export([iodevice_size/1]).
+-define(READ_SIZE, 8192).
+
+iodevice_foldl(F, Acc, IoDevice) ->
+    iodevice_foldl(F, Acc, IoDevice, ?READ_SIZE).
+
+iodevice_foldl(F, Acc, IoDevice, BufferSize) ->
+    case file:read(IoDevice, BufferSize) of
+        eof ->
+            Acc;
+        {ok, Data} ->
+            iodevice_foldl(F, F(Data, Acc), IoDevice, BufferSize)
+    end.
+
+iodevice_stream(Callback, IoDevice) ->
+    iodevice_stream(Callback, IoDevice, ?READ_SIZE).
+
+iodevice_stream(Callback, IoDevice, BufferSize) ->
+    F = fun (Data, ok) -> Callback(Data) end,
+    ok = iodevice_foldl(F, ok, IoDevice, BufferSize).
+
+iodevice_size(IoDevice) ->
+    {ok, Size} = file:position(IoDevice, eof),
+    {ok, 0} = file:position(IoDevice, bof),
+    Size.
+
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+
+
+-endif.

Added: couchdb/trunk/src/mochiweb/mochiweb_mime.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochiweb_mime.erl?rev=979368&view=auto
==============================================================================
--- couchdb/trunk/src/mochiweb/mochiweb_mime.erl (added)
+++ couchdb/trunk/src/mochiweb/mochiweb_mime.erl Mon Jul 26 17:21:30 2010
@@ -0,0 +1,94 @@
+%% @author Bob Ippolito <bo...@mochimedia.com>
+%% @copyright 2007 Mochi Media, Inc.
+
+%% @doc Gives a good MIME type guess based on file extension.
+
+-module(mochiweb_mime).
+-author('bob@mochimedia.com').
+-export([from_extension/1]).
+
+%% @spec from_extension(S::string()) -> string() | undefined
+%% @doc Given a filename extension (e.g. ".html") return a guess for the MIME
+%%      type such as "text/html". Will return the atom undefined if no good
+%%      guess is available.
+from_extension(".html") ->
+    "text/html";
+from_extension(".xhtml") ->
+    "application/xhtml+xml";
+from_extension(".xml") ->
+    "application/xml";
+from_extension(".css") ->
+    "text/css";
+from_extension(".js") ->
+    "application/x-javascript";
+from_extension(".jpg") ->
+    "image/jpeg";
+from_extension(".gif") ->
+    "image/gif";
+from_extension(".png") ->
+    "image/png";
+from_extension(".swf") ->
+    "application/x-shockwave-flash";
+from_extension(".zip") ->
+    "application/zip";
+from_extension(".bz2") ->
+    "application/x-bzip2";
+from_extension(".gz") ->
+    "application/x-gzip";
+from_extension(".tar") ->
+    "application/x-tar";
+from_extension(".tgz") ->
+    "application/x-gzip";
+from_extension(".txt") ->
+    "text/plain";
+from_extension(".doc") ->
+    "application/msword";
+from_extension(".pdf") ->
+    "application/pdf";
+from_extension(".xls") ->
+    "application/vnd.ms-excel";
+from_extension(".rtf") ->
+    "application/rtf";
+from_extension(".mov") ->
+    "video/quicktime";
+from_extension(".mp3") ->
+    "audio/mpeg";
+from_extension(".z") ->
+    "application/x-compress";
+from_extension(".wav") ->
+    "audio/x-wav";
+from_extension(".ico") ->
+    "image/x-icon";
+from_extension(".bmp") ->
+    "image/bmp";
+from_extension(".m4a") ->
+    "audio/mpeg";
+from_extension(".m3u") ->
+    "audio/x-mpegurl";
+from_extension(".exe") ->
+    "application/octet-stream";
+from_extension(".csv") ->
+    "text/csv";
+from_extension(_) ->
+    undefined.
+
+%%
+%% Tests
+%%
+-include_lib("eunit/include/eunit.hrl").
+-ifdef(TEST).
+
+exhaustive_from_extension_test() ->
+    T = mochiweb_cover:clause_lookup_table(?MODULE, from_extension),
+    [?assertEqual(V, from_extension(K)) || {K, V} <- T].
+
+from_extension_test() ->
+    ?assertEqual("text/html",
+                 from_extension(".html")),
+    ?assertEqual(undefined,
+                 from_extension("")),
+    ?assertEqual(undefined,
+                 from_extension(".wtf")),
+    ok.
+
+-endif.