You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ja...@apache.org on 2012/11/12 21:48:52 UTC
[2/12] update mochiweb to 2.3.2
http://git-wip-us.apache.org/repos/asf/couchdb/blob/6fdb9e07/src/mochiweb/mochijson.erl
----------------------------------------------------------------------
diff --git a/src/mochiweb/mochijson.erl b/src/mochiweb/mochijson.erl
deleted file mode 100644
index 2e3d145..0000000
--- a/src/mochiweb/mochijson.erl
+++ /dev/null
@@ -1,531 +0,0 @@
-%% @author Bob Ippolito <bo...@mochimedia.com>
-%% @copyright 2006 Mochi Media, Inc.
-
-%% @doc Yet another JSON (RFC 4627) library for Erlang.
--module(mochijson).
--author('bob@mochimedia.com').
--export([encoder/1, encode/1]).
--export([decoder/1, decode/1]).
--export([binary_encoder/1, binary_encode/1]).
--export([binary_decoder/1, binary_decode/1]).
-
-% This is a macro to placate syntax highlighters..
--define(Q, $\").
--define(ADV_COL(S, N), S#decoder{column=N+S#decoder.column}).
--define(INC_COL(S), S#decoder{column=1+S#decoder.column}).
--define(INC_LINE(S), S#decoder{column=1, line=1+S#decoder.line}).
-
-%% @type iolist() = [char() | binary() | iolist()]
-%% @type iodata() = iolist() | binary()
-%% @type json_string() = atom | string() | binary()
-%% @type json_number() = integer() | float()
-%% @type json_array() = {array, [json_term()]}
-%% @type json_object() = {struct, [{json_string(), json_term()}]}
-%% @type json_term() = json_string() | json_number() | json_array() |
-%% json_object()
-%% @type encoding() = utf8 | unicode
-%% @type encoder_option() = {input_encoding, encoding()} |
-%% {handler, function()}
-%% @type decoder_option() = {input_encoding, encoding()} |
-%% {object_hook, function()}
-%% @type bjson_string() = binary()
-%% @type bjson_number() = integer() | float()
-%% @type bjson_array() = [bjson_term()]
-%% @type bjson_object() = {struct, [{bjson_string(), bjson_term()}]}
-%% @type bjson_term() = bjson_string() | bjson_number() | bjson_array() |
-%% bjson_object()
-%% @type binary_encoder_option() = {handler, function()}
-%% @type binary_decoder_option() = {object_hook, function()}
-
--record(encoder, {input_encoding=unicode,
- handler=null}).
-
--record(decoder, {input_encoding=utf8,
- object_hook=null,
- line=1,
- column=1,
- state=null}).
-
-%% @spec encoder([encoder_option()]) -> function()
-%% @doc Create an encoder/1 with the given options.
-encoder(Options) ->
- State = parse_encoder_options(Options, #encoder{}),
- fun (O) -> json_encode(O, State) end.
-
-%% @spec encode(json_term()) -> iolist()
-%% @doc Encode the given as JSON to an iolist.
-encode(Any) ->
- json_encode(Any, #encoder{}).
-
-%% @spec decoder([decoder_option()]) -> function()
-%% @doc Create a decoder/1 with the given options.
-decoder(Options) ->
- State = parse_decoder_options(Options, #decoder{}),
- fun (O) -> json_decode(O, State) end.
-
-%% @spec decode(iolist()) -> json_term()
-%% @doc Decode the given iolist to Erlang terms.
-decode(S) ->
- json_decode(S, #decoder{}).
-
-%% @spec binary_decoder([binary_decoder_option()]) -> function()
-%% @doc Create a binary_decoder/1 with the given options.
-binary_decoder(Options) ->
- mochijson2:decoder(Options).
-
-%% @spec binary_encoder([binary_encoder_option()]) -> function()
-%% @doc Create a binary_encoder/1 with the given options.
-binary_encoder(Options) ->
- mochijson2:encoder(Options).
-
-%% @spec binary_encode(bjson_term()) -> iolist()
-%% @doc Encode the given as JSON to an iolist, using lists for arrays and
-%% binaries for strings.
-binary_encode(Any) ->
- mochijson2:encode(Any).
-
-%% @spec binary_decode(iolist()) -> bjson_term()
-%% @doc Decode the given iolist to Erlang terms, using lists for arrays and
-%% binaries for strings.
-binary_decode(S) ->
- mochijson2:decode(S).
-
-%% Internal API
-
-parse_encoder_options([], State) ->
- State;
-parse_encoder_options([{input_encoding, Encoding} | Rest], State) ->
- parse_encoder_options(Rest, State#encoder{input_encoding=Encoding});
-parse_encoder_options([{handler, Handler} | Rest], State) ->
- parse_encoder_options(Rest, State#encoder{handler=Handler}).
-
-parse_decoder_options([], State) ->
- State;
-parse_decoder_options([{input_encoding, Encoding} | Rest], State) ->
- parse_decoder_options(Rest, State#decoder{input_encoding=Encoding});
-parse_decoder_options([{object_hook, Hook} | Rest], State) ->
- parse_decoder_options(Rest, State#decoder{object_hook=Hook}).
-
-json_encode(true, _State) ->
- "true";
-json_encode(false, _State) ->
- "false";
-json_encode(null, _State) ->
- "null";
-json_encode(I, _State) when is_integer(I) ->
- integer_to_list(I);
-json_encode(F, _State) when is_float(F) ->
- mochinum:digits(F);
-json_encode(L, State) when is_list(L); is_binary(L); is_atom(L) ->
- json_encode_string(L, State);
-json_encode({array, Props}, State) when is_list(Props) ->
- json_encode_array(Props, State);
-json_encode({struct, Props}, State) when is_list(Props) ->
- json_encode_proplist(Props, State);
-json_encode(Bad, #encoder{handler=null}) ->
- exit({json_encode, {bad_term, Bad}});
-json_encode(Bad, State=#encoder{handler=Handler}) ->
- json_encode(Handler(Bad), State).
-
-json_encode_array([], _State) ->
- "[]";
-json_encode_array(L, State) ->
- F = fun (O, Acc) ->
- [$,, json_encode(O, State) | Acc]
- end,
- [$, | Acc1] = lists:foldl(F, "[", L),
- lists:reverse([$\] | Acc1]).
-
-json_encode_proplist([], _State) ->
- "{}";
-json_encode_proplist(Props, State) ->
- F = fun ({K, V}, Acc) ->
- KS = case K of
- K when is_atom(K) ->
- json_encode_string_utf8(atom_to_list(K));
- K when is_integer(K) ->
- json_encode_string(integer_to_list(K), State);
- K when is_list(K); is_binary(K) ->
- json_encode_string(K, State)
- end,
- VS = json_encode(V, State),
- [$,, VS, $:, KS | Acc]
- end,
- [$, | Acc1] = lists:foldl(F, "{", Props),
- lists:reverse([$\} | Acc1]).
-
-json_encode_string(A, _State) when is_atom(A) ->
- json_encode_string_unicode(xmerl_ucs:from_utf8(atom_to_list(A)));
-json_encode_string(B, _State) when is_binary(B) ->
- json_encode_string_unicode(xmerl_ucs:from_utf8(B));
-json_encode_string(S, #encoder{input_encoding=utf8}) ->
- json_encode_string_utf8(S);
-json_encode_string(S, #encoder{input_encoding=unicode}) ->
- json_encode_string_unicode(S).
-
-json_encode_string_utf8(S) ->
- [?Q | json_encode_string_utf8_1(S)].
-
-json_encode_string_utf8_1([C | Cs]) when C >= 0, C =< 16#7f ->
- NewC = case C of
- $\\ -> "\\\\";
- ?Q -> "\\\"";
- _ when C >= $\s, C < 16#7f -> C;
- $\t -> "\\t";
- $\n -> "\\n";
- $\r -> "\\r";
- $\f -> "\\f";
- $\b -> "\\b";
- _ when C >= 0, C =< 16#7f -> unihex(C);
- _ -> exit({json_encode, {bad_char, C}})
- end,
- [NewC | json_encode_string_utf8_1(Cs)];
-json_encode_string_utf8_1(All=[C | _]) when C >= 16#80, C =< 16#10FFFF ->
- [?Q | Rest] = json_encode_string_unicode(xmerl_ucs:from_utf8(All)),
- Rest;
-json_encode_string_utf8_1([]) ->
- "\"".
-
-json_encode_string_unicode(S) ->
- [?Q | json_encode_string_unicode_1(S)].
-
-json_encode_string_unicode_1([C | Cs]) ->
- NewC = case C of
- $\\ -> "\\\\";
- ?Q -> "\\\"";
- _ when C >= $\s, C < 16#7f -> C;
- $\t -> "\\t";
- $\n -> "\\n";
- $\r -> "\\r";
- $\f -> "\\f";
- $\b -> "\\b";
- _ when C >= 0, C =< 16#10FFFF -> unihex(C);
- _ -> exit({json_encode, {bad_char, C}})
- end,
- [NewC | json_encode_string_unicode_1(Cs)];
-json_encode_string_unicode_1([]) ->
- "\"".
-
-dehex(C) when C >= $0, C =< $9 ->
- C - $0;
-dehex(C) when C >= $a, C =< $f ->
- C - $a + 10;
-dehex(C) when C >= $A, C =< $F ->
- C - $A + 10.
-
-hexdigit(C) when C >= 0, C =< 9 ->
- C + $0;
-hexdigit(C) when C =< 15 ->
- C + $a - 10.
-
-unihex(C) when C < 16#10000 ->
- <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
- Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
- [$\\, $u | Digits];
-unihex(C) when C =< 16#10FFFF ->
- N = C - 16#10000,
- S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
- S2 = 16#dc00 bor (N band 16#3ff),
- [unihex(S1), unihex(S2)].
-
-json_decode(B, S) when is_binary(B) ->
- json_decode(binary_to_list(B), S);
-json_decode(L, S) ->
- {Res, L1, S1} = decode1(L, S),
- {eof, [], _} = tokenize(L1, S1#decoder{state=trim}),
- Res.
-
-decode1(L, S=#decoder{state=null}) ->
- case tokenize(L, S#decoder{state=any}) of
- {{const, C}, L1, S1} ->
- {C, L1, S1};
- {start_array, L1, S1} ->
- decode_array(L1, S1#decoder{state=any}, []);
- {start_object, L1, S1} ->
- decode_object(L1, S1#decoder{state=key}, [])
- end.
-
-make_object(V, #decoder{object_hook=null}) ->
- V;
-make_object(V, #decoder{object_hook=Hook}) ->
- Hook(V).
-
-decode_object(L, S=#decoder{state=key}, Acc) ->
- case tokenize(L, S) of
- {end_object, Rest, S1} ->
- V = make_object({struct, lists:reverse(Acc)}, S1),
- {V, Rest, S1#decoder{state=null}};
- {{const, K}, Rest, S1} when is_list(K) ->
- {colon, L2, S2} = tokenize(Rest, S1),
- {V, L3, S3} = decode1(L2, S2#decoder{state=null}),
- decode_object(L3, S3#decoder{state=comma}, [{K, V} | Acc])
- end;
-decode_object(L, S=#decoder{state=comma}, Acc) ->
- case tokenize(L, S) of
- {end_object, Rest, S1} ->
- V = make_object({struct, lists:reverse(Acc)}, S1),
- {V, Rest, S1#decoder{state=null}};
- {comma, Rest, S1} ->
- decode_object(Rest, S1#decoder{state=key}, Acc)
- end.
-
-decode_array(L, S=#decoder{state=any}, Acc) ->
- case tokenize(L, S) of
- {end_array, Rest, S1} ->
- {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}};
- {start_array, Rest, S1} ->
- {Array, Rest1, S2} = decode_array(Rest, S1#decoder{state=any}, []),
- decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]);
- {start_object, Rest, S1} ->
- {Array, Rest1, S2} = decode_object(Rest, S1#decoder{state=key}, []),
- decode_array(Rest1, S2#decoder{state=comma}, [Array | Acc]);
- {{const, Const}, Rest, S1} ->
- decode_array(Rest, S1#decoder{state=comma}, [Const | Acc])
- end;
-decode_array(L, S=#decoder{state=comma}, Acc) ->
- case tokenize(L, S) of
- {end_array, Rest, S1} ->
- {{array, lists:reverse(Acc)}, Rest, S1#decoder{state=null}};
- {comma, Rest, S1} ->
- decode_array(Rest, S1#decoder{state=any}, Acc)
- end.
-
-tokenize_string(IoList=[C | _], S=#decoder{input_encoding=utf8}, Acc)
- when is_list(C); is_binary(C); C >= 16#7f ->
- List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)),
- tokenize_string(List, S#decoder{input_encoding=unicode}, Acc);
-tokenize_string("\"" ++ Rest, S, Acc) ->
- {lists:reverse(Acc), Rest, ?INC_COL(S)};
-tokenize_string("\\\"" ++ Rest, S, Acc) ->
- tokenize_string(Rest, ?ADV_COL(S, 2), [$\" | Acc]);
-tokenize_string("\\\\" ++ Rest, S, Acc) ->
- tokenize_string(Rest, ?ADV_COL(S, 2), [$\\ | Acc]);
-tokenize_string("\\/" ++ Rest, S, Acc) ->
- tokenize_string(Rest, ?ADV_COL(S, 2), [$/ | Acc]);
-tokenize_string("\\b" ++ Rest, S, Acc) ->
- tokenize_string(Rest, ?ADV_COL(S, 2), [$\b | Acc]);
-tokenize_string("\\f" ++ Rest, S, Acc) ->
- tokenize_string(Rest, ?ADV_COL(S, 2), [$\f | Acc]);
-tokenize_string("\\n" ++ Rest, S, Acc) ->
- tokenize_string(Rest, ?ADV_COL(S, 2), [$\n | Acc]);
-tokenize_string("\\r" ++ Rest, S, Acc) ->
- tokenize_string(Rest, ?ADV_COL(S, 2), [$\r | Acc]);
-tokenize_string("\\t" ++ Rest, S, Acc) ->
- tokenize_string(Rest, ?ADV_COL(S, 2), [$\t | Acc]);
-tokenize_string([$\\, $u, C3, C2, C1, C0 | Rest], S, Acc) ->
- % coalesce UTF-16 surrogate pair?
- C = dehex(C0) bor
- (dehex(C1) bsl 4) bor
- (dehex(C2) bsl 8) bor
- (dehex(C3) bsl 12),
- tokenize_string(Rest, ?ADV_COL(S, 6), [C | Acc]);
-tokenize_string([C | Rest], S, Acc) when C >= $\s; C < 16#10FFFF ->
- tokenize_string(Rest, ?ADV_COL(S, 1), [C | Acc]).
-
-tokenize_number(IoList=[C | _], Mode, S=#decoder{input_encoding=utf8}, Acc)
- when is_list(C); is_binary(C); C >= 16#7f ->
- List = xmerl_ucs:from_utf8(iolist_to_binary(IoList)),
- tokenize_number(List, Mode, S#decoder{input_encoding=unicode}, Acc);
-tokenize_number([$- | Rest], sign, S, []) ->
- tokenize_number(Rest, int, ?INC_COL(S), [$-]);
-tokenize_number(Rest, sign, S, []) ->
- tokenize_number(Rest, int, S, []);
-tokenize_number([$0 | Rest], int, S, Acc) ->
- tokenize_number(Rest, frac, ?INC_COL(S), [$0 | Acc]);
-tokenize_number([C | Rest], int, S, Acc) when C >= $1, C =< $9 ->
- tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]);
-tokenize_number([C | Rest], int1, S, Acc) when C >= $0, C =< $9 ->
- tokenize_number(Rest, int1, ?INC_COL(S), [C | Acc]);
-tokenize_number(Rest, int1, S, Acc) ->
- tokenize_number(Rest, frac, S, Acc);
-tokenize_number([$., C | Rest], frac, S, Acc) when C >= $0, C =< $9 ->
- tokenize_number(Rest, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
-tokenize_number([E | Rest], frac, S, Acc) when E == $e; E == $E ->
- tokenize_number(Rest, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
-tokenize_number(Rest, frac, S, Acc) ->
- {{int, lists:reverse(Acc)}, Rest, S};
-tokenize_number([C | Rest], frac1, S, Acc) when C >= $0, C =< $9 ->
- tokenize_number(Rest, frac1, ?INC_COL(S), [C | Acc]);
-tokenize_number([E | Rest], frac1, S, Acc) when E == $e; E == $E ->
- tokenize_number(Rest, esign, ?INC_COL(S), [$e | Acc]);
-tokenize_number(Rest, frac1, S, Acc) ->
- {{float, lists:reverse(Acc)}, Rest, S};
-tokenize_number([C | Rest], esign, S, Acc) when C == $-; C == $+ ->
- tokenize_number(Rest, eint, ?INC_COL(S), [C | Acc]);
-tokenize_number(Rest, esign, S, Acc) ->
- tokenize_number(Rest, eint, S, Acc);
-tokenize_number([C | Rest], eint, S, Acc) when C >= $0, C =< $9 ->
- tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]);
-tokenize_number([C | Rest], eint1, S, Acc) when C >= $0, C =< $9 ->
- tokenize_number(Rest, eint1, ?INC_COL(S), [C | Acc]);
-tokenize_number(Rest, eint1, S, Acc) ->
- {{float, lists:reverse(Acc)}, Rest, S}.
-
-tokenize([], S=#decoder{state=trim}) ->
- {eof, [], S};
-tokenize([L | Rest], S) when is_list(L) ->
- tokenize(L ++ Rest, S);
-tokenize([B | Rest], S) when is_binary(B) ->
- tokenize(xmerl_ucs:from_utf8(B) ++ Rest, S);
-tokenize("\r\n" ++ Rest, S) ->
- tokenize(Rest, ?INC_LINE(S));
-tokenize("\n" ++ Rest, S) ->
- tokenize(Rest, ?INC_LINE(S));
-tokenize([C | Rest], S) when C == $\s; C == $\t ->
- tokenize(Rest, ?INC_COL(S));
-tokenize("{" ++ Rest, S) ->
- {start_object, Rest, ?INC_COL(S)};
-tokenize("}" ++ Rest, S) ->
- {end_object, Rest, ?INC_COL(S)};
-tokenize("[" ++ Rest, S) ->
- {start_array, Rest, ?INC_COL(S)};
-tokenize("]" ++ Rest, S) ->
- {end_array, Rest, ?INC_COL(S)};
-tokenize("," ++ Rest, S) ->
- {comma, Rest, ?INC_COL(S)};
-tokenize(":" ++ Rest, S) ->
- {colon, Rest, ?INC_COL(S)};
-tokenize("null" ++ Rest, S) ->
- {{const, null}, Rest, ?ADV_COL(S, 4)};
-tokenize("true" ++ Rest, S) ->
- {{const, true}, Rest, ?ADV_COL(S, 4)};
-tokenize("false" ++ Rest, S) ->
- {{const, false}, Rest, ?ADV_COL(S, 5)};
-tokenize("\"" ++ Rest, S) ->
- {String, Rest1, S1} = tokenize_string(Rest, ?INC_COL(S), []),
- {{const, String}, Rest1, S1};
-tokenize(L=[C | _], S) when C >= $0, C =< $9; C == $- ->
- case tokenize_number(L, sign, S, []) of
- {{int, Int}, Rest, S1} ->
- {{const, list_to_integer(Int)}, Rest, S1};
- {{float, Float}, Rest, S1} ->
- {{const, list_to_float(Float)}, Rest, S1}
- end.
-
-
-%%
-%% Tests
-%%
--include_lib("eunit/include/eunit.hrl").
--ifdef(TEST).
-
-%% testing constructs borrowed from the Yaws JSON implementation.
-
-%% Create an object from a list of Key/Value pairs.
-
-obj_new() ->
- {struct, []}.
-
-is_obj({struct, Props}) ->
- F = fun ({K, _}) when is_list(K) ->
- true;
- (_) ->
- false
- end,
- lists:all(F, Props).
-
-obj_from_list(Props) ->
- Obj = {struct, Props},
- case is_obj(Obj) of
- true -> Obj;
- false -> exit(json_bad_object)
- end.
-
-%% Test for equivalence of Erlang terms.
-%% Due to arbitrary order of construction, equivalent objects might
-%% compare unequal as erlang terms, so we need to carefully recurse
-%% through aggregates (tuples and objects).
-
-equiv({struct, Props1}, {struct, Props2}) ->
- equiv_object(Props1, Props2);
-equiv({array, L1}, {array, L2}) ->
- equiv_list(L1, L2);
-equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
-equiv(S1, S2) when is_list(S1), is_list(S2) -> S1 == S2;
-equiv(true, true) -> true;
-equiv(false, false) -> true;
-equiv(null, null) -> true.
-
-%% Object representation and traversal order is unknown.
-%% Use the sledgehammer and sort property lists.
-
-equiv_object(Props1, Props2) ->
- L1 = lists:keysort(1, Props1),
- L2 = lists:keysort(1, Props2),
- Pairs = lists:zip(L1, L2),
- true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
- equiv(K1, K2) and equiv(V1, V2)
- end, Pairs).
-
-%% Recursively compare tuple elements for equivalence.
-
-equiv_list([], []) ->
- true;
-equiv_list([V1 | L1], [V2 | L2]) ->
- equiv(V1, V2) andalso equiv_list(L1, L2).
-
-e2j_vec_test() ->
- test_one(e2j_test_vec(utf8), 1).
-
-issue33_test() ->
- %% http://code.google.com/p/mochiweb/issues/detail?id=33
- Js = {struct, [{"key", [194, 163]}]},
- Encoder = encoder([{input_encoding, utf8}]),
- "{\"key\":\"\\u00a3\"}" = lists:flatten(Encoder(Js)).
-
-test_one([], _N) ->
- %% io:format("~p tests passed~n", [N-1]),
- ok;
-test_one([{E, J} | Rest], N) ->
- %% io:format("[~p] ~p ~p~n", [N, E, J]),
- true = equiv(E, decode(J)),
- true = equiv(E, decode(encode(E))),
- test_one(Rest, 1+N).
-
-e2j_test_vec(utf8) ->
- [
- {1, "1"},
- {3.1416, "3.14160"}, % text representation may truncate, trail zeroes
- {-1, "-1"},
- {-3.1416, "-3.14160"},
- {12.0e10, "1.20000e+11"},
- {1.234E+10, "1.23400e+10"},
- {-1.234E-10, "-1.23400e-10"},
- {10.0, "1.0e+01"},
- {123.456, "1.23456E+2"},
- {10.0, "1e1"},
- {"foo", "\"foo\""},
- {"foo" ++ [5] ++ "bar", "\"foo\\u0005bar\""},
- {"", "\"\""},
- {"\"", "\"\\\"\""},
- {"\n\n\n", "\"\\n\\n\\n\""},
- {"\\", "\"\\\\\""},
- {"\" \b\f\r\n\t\"", "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
- {obj_new(), "{}"},
- {obj_from_list([{"foo", "bar"}]), "{\"foo\":\"bar\"}"},
- {obj_from_list([{"foo", "bar"}, {"baz", 123}]),
- "{\"foo\":\"bar\",\"baz\":123}"},
- {{array, []}, "[]"},
- {{array, [{array, []}]}, "[[]]"},
- {{array, [1, "foo"]}, "[1,\"foo\"]"},
-
- % json array in a json object
- {obj_from_list([{"foo", {array, [123]}}]),
- "{\"foo\":[123]}"},
-
- % json object in a json object
- {obj_from_list([{"foo", obj_from_list([{"bar", true}])}]),
- "{\"foo\":{\"bar\":true}}"},
-
- % fold evaluation order
- {obj_from_list([{"foo", {array, []}},
- {"bar", obj_from_list([{"baz", true}])},
- {"alice", "bob"}]),
- "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
-
- % json object in a json array
- {{array, [-123, "foo", obj_from_list([{"bar", {array, []}}]), null]},
- "[-123,\"foo\",{\"bar\":[]},null]"}
- ].
-
--endif.
http://git-wip-us.apache.org/repos/asf/couchdb/blob/6fdb9e07/src/mochiweb/mochijson2.erl
----------------------------------------------------------------------
diff --git a/src/mochiweb/mochijson2.erl b/src/mochiweb/mochijson2.erl
deleted file mode 100644
index bdf6d77..0000000
--- a/src/mochiweb/mochijson2.erl
+++ /dev/null
@@ -1,849 +0,0 @@
-%% @author Bob Ippolito <bo...@mochimedia.com>
-%% @copyright 2007 Mochi Media, Inc.
-
-%% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works
-%% with binaries as strings, arrays as lists (without an {array, _})
-%% wrapper and it only knows how to decode UTF-8 (and ASCII).
-%%
-%% JSON terms are decoded as follows (javascript -> erlang):
-%% <ul>
-%% <li>{"key": "value"} ->
-%% {struct, [{<<"key">>, <<"value">>}]}</li>
-%% <li>["array", 123, 12.34, true, false, null] ->
-%% [<<"array">>, 123, 12.34, true, false, null]
-%% </li>
-%% </ul>
-%% <ul>
-%% <li>Strings in JSON decode to UTF-8 binaries in Erlang</li>
-%% <li>Objects decode to {struct, PropList}</li>
-%% <li>Numbers decode to integer or float</li>
-%% <li>true, false, null decode to their respective terms.</li>
-%% </ul>
-%% The encoder will accept the same format that the decoder will produce,
-%% but will also allow additional cases for leniency:
-%% <ul>
-%% <li>atoms other than true, false, null will be considered UTF-8
-%% strings (even as a proplist key)
-%% </li>
-%% <li>{json, IoList} will insert IoList directly into the output
-%% with no validation
-%% </li>
-%% <li>{array, Array} will be encoded as Array
-%% (legacy mochijson style)
-%% </li>
-%% <li>A non-empty raw proplist will be encoded as an object as long
-%% as the first pair does not have an atom key of json, struct,
-%% or array
-%% </li>
-%% </ul>
-
--module(mochijson2).
--author('bob@mochimedia.com').
--export([encoder/1, encode/1]).
--export([decoder/1, decode/1]).
-
-% This is a macro to placate syntax highlighters..
--define(Q, $\").
--define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset,
- column=N+S#decoder.column}).
--define(INC_COL(S), S#decoder{offset=1+S#decoder.offset,
- column=1+S#decoder.column}).
--define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset,
- column=1,
- line=1+S#decoder.line}).
--define(INC_CHAR(S, C),
- case C of
- $\n ->
- S#decoder{column=1,
- line=1+S#decoder.line,
- offset=1+S#decoder.offset};
- _ ->
- S#decoder{column=1+S#decoder.column,
- offset=1+S#decoder.offset}
- end).
--define(IS_WHITESPACE(C),
- (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
-
-%% @type iolist() = [char() | binary() | iolist()]
-%% @type iodata() = iolist() | binary()
-%% @type json_string() = atom | binary()
-%% @type json_number() = integer() | float()
-%% @type json_array() = [json_term()]
-%% @type json_object() = {struct, [{json_string(), json_term()}]}
-%% @type json_iolist() = {json, iolist()}
-%% @type json_term() = json_string() | json_number() | json_array() |
-%% json_object() | json_iolist()
-
--record(encoder, {handler=null,
- utf8=false}).
-
--record(decoder, {object_hook=null,
- offset=0,
- line=1,
- column=1,
- state=null}).
-
-%% @spec encoder([encoder_option()]) -> function()
-%% @doc Create an encoder/1 with the given options.
-%% @type encoder_option() = handler_option() | utf8_option()
-%% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false)
-encoder(Options) ->
- State = parse_encoder_options(Options, #encoder{}),
- fun (O) -> json_encode(O, State) end.
-
-%% @spec encode(json_term()) -> iolist()
-%% @doc Encode the given as JSON to an iolist.
-encode(Any) ->
- json_encode(Any, #encoder{}).
-
-%% @spec decoder([decoder_option()]) -> function()
-%% @doc Create a decoder/1 with the given options.
-decoder(Options) ->
- State = parse_decoder_options(Options, #decoder{}),
- fun (O) -> json_decode(O, State) end.
-
-%% @spec decode(iolist()) -> json_term()
-%% @doc Decode the given iolist to Erlang terms.
-decode(S) ->
- json_decode(S, #decoder{}).
-
-%% Internal API
-
-parse_encoder_options([], State) ->
- State;
-parse_encoder_options([{handler, Handler} | Rest], State) ->
- parse_encoder_options(Rest, State#encoder{handler=Handler});
-parse_encoder_options([{utf8, Switch} | Rest], State) ->
- parse_encoder_options(Rest, State#encoder{utf8=Switch}).
-
-parse_decoder_options([], State) ->
- State;
-parse_decoder_options([{object_hook, Hook} | Rest], State) ->
- parse_decoder_options(Rest, State#decoder{object_hook=Hook}).
-
-json_encode(true, _State) ->
- <<"true">>;
-json_encode(false, _State) ->
- <<"false">>;
-json_encode(null, _State) ->
- <<"null">>;
-json_encode(I, _State) when is_integer(I) ->
- integer_to_list(I);
-json_encode(F, _State) when is_float(F) ->
- mochinum:digits(F);
-json_encode(S, State) when is_binary(S); is_atom(S) ->
- json_encode_string(S, State);
-json_encode([{K, _}|_] = Props, State) when (K =/= struct andalso
- K =/= array andalso
- K =/= json) ->
- json_encode_proplist(Props, State);
-json_encode({struct, Props}, State) when is_list(Props) ->
- json_encode_proplist(Props, State);
-json_encode(Array, State) when is_list(Array) ->
- json_encode_array(Array, State);
-json_encode({array, Array}, State) when is_list(Array) ->
- json_encode_array(Array, State);
-json_encode({json, IoList}, _State) ->
- IoList;
-json_encode(Bad, #encoder{handler=null}) ->
- exit({json_encode, {bad_term, Bad}});
-json_encode(Bad, State=#encoder{handler=Handler}) ->
- json_encode(Handler(Bad), State).
-
-json_encode_array([], _State) ->
- <<"[]">>;
-json_encode_array(L, State) ->
- F = fun (O, Acc) ->
- [$,, json_encode(O, State) | Acc]
- end,
- [$, | Acc1] = lists:foldl(F, "[", L),
- lists:reverse([$\] | Acc1]).
-
-json_encode_proplist([], _State) ->
- <<"{}">>;
-json_encode_proplist(Props, State) ->
- F = fun ({K, V}, Acc) ->
- KS = json_encode_string(K, State),
- VS = json_encode(V, State),
- [$,, VS, $:, KS | Acc]
- end,
- [$, | Acc1] = lists:foldl(F, "{", Props),
- lists:reverse([$\} | Acc1]).
-
-json_encode_string(A, State) when is_atom(A) ->
- L = atom_to_list(A),
- case json_string_is_safe(L) of
- true ->
- [?Q, L, ?Q];
- false ->
- json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q])
- end;
-json_encode_string(B, State) when is_binary(B) ->
- case json_bin_is_safe(B) of
- true ->
- [?Q, B, ?Q];
- false ->
- json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q])
- end;
-json_encode_string(I, _State) when is_integer(I) ->
- [?Q, integer_to_list(I), ?Q];
-json_encode_string(L, State) when is_list(L) ->
- case json_string_is_safe(L) of
- true ->
- [?Q, L, ?Q];
- false ->
- json_encode_string_unicode(L, State, [?Q])
- end.
-
-json_string_is_safe([]) ->
- true;
-json_string_is_safe([C | Rest]) ->
- case C of
- ?Q ->
- false;
- $\\ ->
- false;
- $\b ->
- false;
- $\f ->
- false;
- $\n ->
- false;
- $\r ->
- false;
- $\t ->
- false;
- C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
- false;
- C when C < 16#7f ->
- json_string_is_safe(Rest);
- _ ->
- false
- end.
-
-json_bin_is_safe(<<>>) ->
- true;
-json_bin_is_safe(<<C, Rest/binary>>) ->
- case C of
- ?Q ->
- false;
- $\\ ->
- false;
- $\b ->
- false;
- $\f ->
- false;
- $\n ->
- false;
- $\r ->
- false;
- $\t ->
- false;
- C when C >= 0, C < $\s; C >= 16#7f ->
- false;
- C when C < 16#7f ->
- json_bin_is_safe(Rest)
- end.
-
-json_encode_string_unicode([], _State, Acc) ->
- lists:reverse([$\" | Acc]);
-json_encode_string_unicode([C | Cs], State, Acc) ->
- Acc1 = case C of
- ?Q ->
- [?Q, $\\ | Acc];
- %% Escaping solidus is only useful when trying to protect
- %% against "</script>" injection attacks which are only
- %% possible when JSON is inserted into a HTML document
- %% in-line. mochijson2 does not protect you from this, so
- %% if you do insert directly into HTML then you need to
- %% uncomment the following case or escape the output of encode.
- %%
- %% $/ ->
- %% [$/, $\\ | Acc];
- %%
- $\\ ->
- [$\\, $\\ | Acc];
- $\b ->
- [$b, $\\ | Acc];
- $\f ->
- [$f, $\\ | Acc];
- $\n ->
- [$n, $\\ | Acc];
- $\r ->
- [$r, $\\ | Acc];
- $\t ->
- [$t, $\\ | Acc];
- C when C >= 0, C < $\s ->
- [unihex(C) | Acc];
- C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 ->
- [xmerl_ucs:to_utf8(C) | Acc];
- C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 ->
- [unihex(C) | Acc];
- C when C < 16#7f ->
- [C | Acc];
- _ ->
- exit({json_encode, {bad_char, C}})
- end,
- json_encode_string_unicode(Cs, State, Acc1).
-
-hexdigit(C) when C >= 0, C =< 9 ->
- C + $0;
-hexdigit(C) when C =< 15 ->
- C + $a - 10.
-
-unihex(C) when C < 16#10000 ->
- <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
- Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
- [$\\, $u | Digits];
-unihex(C) when C =< 16#10FFFF ->
- N = C - 16#10000,
- S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
- S2 = 16#dc00 bor (N band 16#3ff),
- [unihex(S1), unihex(S2)].
-
-json_decode(L, S) when is_list(L) ->
- json_decode(iolist_to_binary(L), S);
-json_decode(B, S) ->
- {Res, S1} = decode1(B, S),
- {eof, _} = tokenize(B, S1#decoder{state=trim}),
- Res.
-
-decode1(B, S=#decoder{state=null}) ->
- case tokenize(B, S#decoder{state=any}) of
- {{const, C}, S1} ->
- {C, S1};
- {start_array, S1} ->
- decode_array(B, S1);
- {start_object, S1} ->
- decode_object(B, S1)
- end.
-
-make_object(V, #decoder{object_hook=null}) ->
- V;
-make_object(V, #decoder{object_hook=Hook}) ->
- Hook(V).
-
-decode_object(B, S) ->
- decode_object(B, S#decoder{state=key}, []).
-
-decode_object(B, S=#decoder{state=key}, Acc) ->
- case tokenize(B, S) of
- {end_object, S1} ->
- V = make_object({struct, lists:reverse(Acc)}, S1),
- {V, S1#decoder{state=null}};
- {{const, K}, S1} ->
- {colon, S2} = tokenize(B, S1),
- {V, S3} = decode1(B, S2#decoder{state=null}),
- decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc])
- end;
-decode_object(B, S=#decoder{state=comma}, Acc) ->
- case tokenize(B, S) of
- {end_object, S1} ->
- V = make_object({struct, lists:reverse(Acc)}, S1),
- {V, S1#decoder{state=null}};
- {comma, S1} ->
- decode_object(B, S1#decoder{state=key}, Acc)
- end.
-
-decode_array(B, S) ->
- decode_array(B, S#decoder{state=any}, []).
-
-decode_array(B, S=#decoder{state=any}, Acc) ->
- case tokenize(B, S) of
- {end_array, S1} ->
- {lists:reverse(Acc), S1#decoder{state=null}};
- {start_array, S1} ->
- {Array, S2} = decode_array(B, S1),
- decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
- {start_object, S1} ->
- {Array, S2} = decode_object(B, S1),
- decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
- {{const, Const}, S1} ->
- decode_array(B, S1#decoder{state=comma}, [Const | Acc])
- end;
-decode_array(B, S=#decoder{state=comma}, Acc) ->
- case tokenize(B, S) of
- {end_array, S1} ->
- {lists:reverse(Acc), S1#decoder{state=null}};
- {comma, S1} ->
- decode_array(B, S1#decoder{state=any}, Acc)
- end.
-
-tokenize_string(B, S=#decoder{offset=O}) ->
- case tokenize_string_fast(B, O) of
- {escape, O1} ->
- Length = O1 - O,
- S1 = ?ADV_COL(S, Length),
- <<_:O/binary, Head:Length/binary, _/binary>> = B,
- tokenize_string(B, S1, lists:reverse(binary_to_list(Head)));
- O1 ->
- Length = O1 - O,
- <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B,
- {{const, String}, ?ADV_COL(S, Length + 1)}
- end.
-
-tokenize_string_fast(B, O) ->
- case B of
- <<_:O/binary, ?Q, _/binary>> ->
- O;
- <<_:O/binary, $\\, _/binary>> ->
- {escape, O};
- <<_:O/binary, C1, _/binary>> when C1 < 128 ->
- tokenize_string_fast(B, 1 + O);
- <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
- C2 >= 128, C2 =< 191 ->
- tokenize_string_fast(B, 2 + O);
- <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
- C2 >= 128, C2 =< 191,
- C3 >= 128, C3 =< 191 ->
- tokenize_string_fast(B, 3 + O);
- <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
- C2 >= 128, C2 =< 191,
- C3 >= 128, C3 =< 191,
- C4 >= 128, C4 =< 191 ->
- tokenize_string_fast(B, 4 + O);
- _ ->
- throw(invalid_utf8)
- end.
-
-tokenize_string(B, S=#decoder{offset=O}, Acc) ->
- case B of
- <<_:O/binary, ?Q, _/binary>> ->
- {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)};
- <<_:O/binary, "\\\"", _/binary>> ->
- tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]);
- <<_:O/binary, "\\\\", _/binary>> ->
- tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]);
- <<_:O/binary, "\\/", _/binary>> ->
- tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]);
- <<_:O/binary, "\\b", _/binary>> ->
- tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]);
- <<_:O/binary, "\\f", _/binary>> ->
- tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]);
- <<_:O/binary, "\\n", _/binary>> ->
- tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]);
- <<_:O/binary, "\\r", _/binary>> ->
- tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]);
- <<_:O/binary, "\\t", _/binary>> ->
- tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]);
- <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> ->
- C = erlang:list_to_integer([C3, C2, C1, C0], 16),
- if C > 16#D7FF, C < 16#DC00 ->
- %% coalesce UTF-16 surrogate pair
- <<"\\u", D3, D2, D1, D0, _/binary>> = Rest,
- D = erlang:list_to_integer([D3,D2,D1,D0], 16),
- [CodePoint] = xmerl_ucs:from_utf16be(<<C:16/big-unsigned-integer,
- D:16/big-unsigned-integer>>),
- Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc),
- tokenize_string(B, ?ADV_COL(S, 12), Acc1);
- true ->
- Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc),
- tokenize_string(B, ?ADV_COL(S, 6), Acc1)
- end;
- <<_:O/binary, C1, _/binary>> when C1 < 128 ->
- tokenize_string(B, ?INC_CHAR(S, C1), [C1 | Acc]);
- <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
- C2 >= 128, C2 =< 191 ->
- tokenize_string(B, ?ADV_COL(S, 2), [C2, C1 | Acc]);
- <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
- C2 >= 128, C2 =< 191,
- C3 >= 128, C3 =< 191 ->
- tokenize_string(B, ?ADV_COL(S, 3), [C3, C2, C1 | Acc]);
- <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
- C2 >= 128, C2 =< 191,
- C3 >= 128, C3 =< 191,
- C4 >= 128, C4 =< 191 ->
- tokenize_string(B, ?ADV_COL(S, 4), [C4, C3, C2, C1 | Acc]);
- _ ->
- throw(invalid_utf8)
- end.
-
-tokenize_number(B, S) ->
- case tokenize_number(B, sign, S, []) of
- {{int, Int}, S1} ->
- {{const, list_to_integer(Int)}, S1};
- {{float, Float}, S1} ->
- {{const, list_to_float(Float)}, S1}
- end.
-
-tokenize_number(B, sign, S=#decoder{offset=O}, []) ->
- case B of
- <<_:O/binary, $-, _/binary>> ->
- tokenize_number(B, int, ?INC_COL(S), [$-]);
- _ ->
- tokenize_number(B, int, S, [])
- end;
-tokenize_number(B, int, S=#decoder{offset=O}, Acc) ->
- case B of
- <<_:O/binary, $0, _/binary>> ->
- tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]);
- <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 ->
- tokenize_number(B, int1, ?INC_COL(S), [C | Acc])
- end;
-tokenize_number(B, int1, S=#decoder{offset=O}, Acc) ->
- case B of
- <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
- tokenize_number(B, int1, ?INC_COL(S), [C | Acc]);
- _ ->
- tokenize_number(B, frac, S, Acc)
- end;
-tokenize_number(B, frac, S=#decoder{offset=O}, Acc) ->
- case B of
- <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 ->
- tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
- <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
- tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
- _ ->
- {{int, lists:reverse(Acc)}, S}
- end;
-tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) ->
- case B of
- <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
- tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]);
- <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
- tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]);
- _ ->
- {{float, lists:reverse(Acc)}, S}
- end;
-tokenize_number(B, esign, S=#decoder{offset=O}, Acc) ->
- case B of
- <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ ->
- tokenize_number(B, eint, ?INC_COL(S), [C | Acc]);
- _ ->
- tokenize_number(B, eint, S, Acc)
- end;
-tokenize_number(B, eint, S=#decoder{offset=O}, Acc) ->
- case B of
- <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
- tokenize_number(B, eint1, ?INC_COL(S), [C | Acc])
- end;
-tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) ->
- case B of
- <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
- tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]);
- _ ->
- {{float, lists:reverse(Acc)}, S}
- end.
-
-tokenize(B, S=#decoder{offset=O}) ->
- case B of
- <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
- tokenize(B, ?INC_CHAR(S, C));
- <<_:O/binary, "{", _/binary>> ->
- {start_object, ?INC_COL(S)};
- <<_:O/binary, "}", _/binary>> ->
- {end_object, ?INC_COL(S)};
- <<_:O/binary, "[", _/binary>> ->
- {start_array, ?INC_COL(S)};
- <<_:O/binary, "]", _/binary>> ->
- {end_array, ?INC_COL(S)};
- <<_:O/binary, ",", _/binary>> ->
- {comma, ?INC_COL(S)};
- <<_:O/binary, ":", _/binary>> ->
- {colon, ?INC_COL(S)};
- <<_:O/binary, "null", _/binary>> ->
- {{const, null}, ?ADV_COL(S, 4)};
- <<_:O/binary, "true", _/binary>> ->
- {{const, true}, ?ADV_COL(S, 4)};
- <<_:O/binary, "false", _/binary>> ->
- {{const, false}, ?ADV_COL(S, 5)};
- <<_:O/binary, "\"", _/binary>> ->
- tokenize_string(B, ?INC_COL(S));
- <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9)
- orelse C =:= $- ->
- tokenize_number(B, S);
- <<_:O/binary>> ->
- trim = S#decoder.state,
- {eof, S}
- end.
-%%
-%% Tests
-%%
--include_lib("eunit/include/eunit.hrl").
--ifdef(TEST).
-
-
-%% testing constructs borrowed from the Yaws JSON implementation.
-
-%% Create an object from a list of Key/Value pairs.
-
-obj_new() ->
- {struct, []}.
-
-is_obj({struct, Props}) ->
- F = fun ({K, _}) when is_binary(K) -> true end,
- lists:all(F, Props).
-
-obj_from_list(Props) ->
- Obj = {struct, Props},
- ?assert(is_obj(Obj)),
- Obj.
-
-%% Test for equivalence of Erlang terms.
-%% Due to arbitrary order of construction, equivalent objects might
-%% compare unequal as erlang terms, so we need to carefully recurse
-%% through aggregates (tuples and objects).
-
-equiv({struct, Props1}, {struct, Props2}) ->
- equiv_object(Props1, Props2);
-equiv(L1, L2) when is_list(L1), is_list(L2) ->
- equiv_list(L1, L2);
-equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
-equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2;
-equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true.
-
-%% Object representation and traversal order is unknown.
-%% Use the sledgehammer and sort property lists.
-
-equiv_object(Props1, Props2) ->
- L1 = lists:keysort(1, Props1),
- L2 = lists:keysort(1, Props2),
- Pairs = lists:zip(L1, L2),
- true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
- equiv(K1, K2) and equiv(V1, V2)
- end, Pairs).
-
-%% Recursively compare tuple elements for equivalence.
-
-equiv_list([], []) ->
- true;
-equiv_list([V1 | L1], [V2 | L2]) ->
- equiv(V1, V2) andalso equiv_list(L1, L2).
-
-decode_test() ->
- [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
- <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]).
-
-e2j_vec_test() ->
- test_one(e2j_test_vec(utf8), 1).
-
-test_one([], _N) ->
- %% io:format("~p tests passed~n", [N-1]),
- ok;
-test_one([{E, J} | Rest], N) ->
- %% io:format("[~p] ~p ~p~n", [N, E, J]),
- true = equiv(E, decode(J)),
- true = equiv(E, decode(encode(E))),
- test_one(Rest, 1+N).
-
-e2j_test_vec(utf8) ->
- [
- {1, "1"},
- {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes
- {-1, "-1"},
- {-3.1416, "-3.14160"},
- {12.0e10, "1.20000e+11"},
- {1.234E+10, "1.23400e+10"},
- {-1.234E-10, "-1.23400e-10"},
- {10.0, "1.0e+01"},
- {123.456, "1.23456E+2"},
- {10.0, "1e1"},
- {<<"foo">>, "\"foo\""},
- {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""},
- {<<"">>, "\"\""},
- {<<"\n\n\n">>, "\"\\n\\n\\n\""},
- {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
- {obj_new(), "{}"},
- {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"},
- {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]),
- "{\"foo\":\"bar\",\"baz\":123}"},
- {[], "[]"},
- {[[]], "[[]]"},
- {[1, <<"foo">>], "[1,\"foo\"]"},
-
- %% json array in a json object
- {obj_from_list([{<<"foo">>, [123]}]),
- "{\"foo\":[123]}"},
-
- %% json object in a json object
- {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]),
- "{\"foo\":{\"bar\":true}}"},
-
- %% fold evaluation order
- {obj_from_list([{<<"foo">>, []},
- {<<"bar">>, obj_from_list([{<<"baz">>, true}])},
- {<<"alice">>, <<"bob">>}]),
- "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
-
- %% json object in a json array
- {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
- "[-123,\"foo\",{\"bar\":[]},null]"}
- ].
-
-%% test utf8 encoding
-encoder_utf8_test() ->
- %% safe conversion case (default)
- [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] =
- encode(<<1,"\321\202\320\265\321\201\321\202">>),
-
- %% raw utf8 output (optional)
- Enc = mochijson2:encoder([{utf8, true}]),
- [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] =
- Enc(<<1,"\321\202\320\265\321\201\321\202">>).
-
-input_validation_test() ->
- Good = [
- {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound
- {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro
- {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius
- ],
- lists:foreach(fun({CodePoint, UTF8}) ->
- Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)),
- Expect = decode(UTF8)
- end, Good),
-
- Bad = [
- %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte
- <<?Q, 16#80, ?Q>>,
- %% missing continuations, last byte in each should be 80-BF
- <<?Q, 16#C2, 16#7F, ?Q>>,
- <<?Q, 16#E0, 16#80,16#7F, ?Q>>,
- <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>,
- %% we don't support code points > 10FFFF per RFC 3629
- <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>,
- %% escape characters trigger a different code path
- <<?Q, $\\, $\n, 16#80, ?Q>>
- ],
- lists:foreach(
- fun(X) ->
- ok = try decode(X) catch invalid_utf8 -> ok end,
- %% could be {ucs,{bad_utf8_character_code}} or
- %% {json_encode,{bad_char,_}}
- {'EXIT', _} = (catch encode(X))
- end, Bad).
-
-inline_json_test() ->
- ?assertEqual(<<"\"iodata iodata\"">>,
- iolist_to_binary(
- encode({json, [<<"\"iodata">>, " iodata\""]}))),
- ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]},
- decode(
- encode({struct,
- [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))),
- ok.
-
-big_unicode_test() ->
- UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)),
- ?assertEqual(
- <<"\"\\ud834\\udd20\"">>,
- iolist_to_binary(encode(UTF8Seq))),
- ?assertEqual(
- UTF8Seq,
- decode(iolist_to_binary(encode(UTF8Seq)))),
- ok.
-
-custom_decoder_test() ->
- ?assertEqual(
- {struct, [{<<"key">>, <<"value">>}]},
- (decoder([]))("{\"key\": \"value\"}")),
- F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end,
- ?assertEqual(
- win,
- (decoder([{object_hook, F}]))("{\"key\": \"value\"}")),
- ok.
-
-atom_test() ->
- %% JSON native atoms
- [begin
- ?assertEqual(A, decode(atom_to_list(A))),
- ?assertEqual(iolist_to_binary(atom_to_list(A)),
- iolist_to_binary(encode(A)))
- end || A <- [true, false, null]],
- %% Atom to string
- ?assertEqual(
- <<"\"foo\"">>,
- iolist_to_binary(encode(foo))),
- ?assertEqual(
- <<"\"\\ud834\\udd20\"">>,
- iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))),
- ok.
-
-key_encode_test() ->
- %% Some forms are accepted as keys that would not be strings in other
- %% cases
- ?assertEqual(
- <<"{\"foo\":1}">>,
- iolist_to_binary(encode({struct, [{foo, 1}]}))),
- ?assertEqual(
- <<"{\"foo\":1}">>,
- iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))),
- ?assertEqual(
- <<"{\"foo\":1}">>,
- iolist_to_binary(encode({struct, [{"foo", 1}]}))),
- ?assertEqual(
- <<"{\"foo\":1}">>,
- iolist_to_binary(encode([{foo, 1}]))),
- ?assertEqual(
- <<"{\"foo\":1}">>,
- iolist_to_binary(encode([{<<"foo">>, 1}]))),
- ?assertEqual(
- <<"{\"foo\":1}">>,
- iolist_to_binary(encode([{"foo", 1}]))),
- ?assertEqual(
- <<"{\"\\ud834\\udd20\":1}">>,
- iolist_to_binary(
- encode({struct, [{[16#0001d120], 1}]}))),
- ?assertEqual(
- <<"{\"1\":1}">>,
- iolist_to_binary(encode({struct, [{1, 1}]}))),
- ok.
-
-unsafe_chars_test() ->
- Chars = "\"\\\b\f\n\r\t",
- [begin
- ?assertEqual(false, json_string_is_safe([C])),
- ?assertEqual(false, json_bin_is_safe(<<C>>)),
- ?assertEqual(<<C>>, decode(encode(<<C>>)))
- end || C <- Chars],
- ?assertEqual(
- false,
- json_string_is_safe([16#0001d120])),
- ?assertEqual(
- false,
- json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#0001d120)))),
- ?assertEqual(
- [16#0001d120],
- xmerl_ucs:from_utf8(
- binary_to_list(
- decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))))),
- ?assertEqual(
- false,
- json_string_is_safe([16#110000])),
- ?assertEqual(
- false,
- json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#110000])))),
- %% solidus can be escaped but isn't unsafe by default
- ?assertEqual(
- <<"/">>,
- decode(<<"\"\\/\"">>)),
- ok.
-
-int_test() ->
- ?assertEqual(0, decode("0")),
- ?assertEqual(1, decode("1")),
- ?assertEqual(11, decode("11")),
- ok.
-
-large_int_test() ->
- ?assertEqual(<<"-2147483649214748364921474836492147483649">>,
- iolist_to_binary(encode(-2147483649214748364921474836492147483649))),
- ?assertEqual(<<"2147483649214748364921474836492147483649">>,
- iolist_to_binary(encode(2147483649214748364921474836492147483649))),
- ok.
-
-float_test() ->
- ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649.0))),
- ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648.0))),
- ok.
-
-handler_test() ->
- ?assertEqual(
- {'EXIT',{json_encode,{bad_term,{}}}},
- catch encode({})),
- F = fun ({}) -> [] end,
- ?assertEqual(
- <<"[]">>,
- iolist_to_binary((encoder([{handler, F}]))({}))),
- ok.
-
--endif.
http://git-wip-us.apache.org/repos/asf/couchdb/blob/6fdb9e07/src/mochiweb/mochilists.erl
----------------------------------------------------------------------
diff --git a/src/mochiweb/mochilists.erl b/src/mochiweb/mochilists.erl
deleted file mode 100644
index 8981e7b..0000000
--- a/src/mochiweb/mochilists.erl
+++ /dev/null
@@ -1,104 +0,0 @@
-%% @copyright Copyright (c) 2010 Mochi Media, Inc.
-%% @author David Reid <dr...@mochimedia.com>
-
-%% @doc Utility functions for dealing with proplists.
-
--module(mochilists).
--author("David Reid <dr...@mochimedia.com>").
--export([get_value/2, get_value/3, is_defined/2, set_default/2, set_defaults/2]).
-
-%% @spec set_default({Key::term(), Value::term()}, Proplist::list()) -> list()
-%%
-%% @doc Return new Proplist with {Key, Value} set if not is_defined(Key, Proplist).
-set_default({Key, Value}, Proplist) ->
- case is_defined(Key, Proplist) of
- true ->
- Proplist;
- false ->
- [{Key, Value} | Proplist]
- end.
-
-%% @spec set_defaults([{Key::term(), Value::term()}], Proplist::list()) -> list()
-%%
-%% @doc Return new Proplist with {Key, Value} set if not is_defined(Key, Proplist).
-set_defaults(DefaultProps, Proplist) ->
- lists:foldl(fun set_default/2, Proplist, DefaultProps).
-
-
-%% @spec is_defined(Key::term(), Proplist::list()) -> bool()
-%%
-%% @doc Returns true if Propist contains at least one entry associated
-%% with Key, otherwise false is returned.
-is_defined(Key, Proplist) ->
- lists:keyfind(Key, 1, Proplist) =/= false.
-
-
-%% @spec get_value(Key::term(), Proplist::list()) -> term() | undefined
-%%
-%% @doc Return the value of <code>Key</code> or undefined
-get_value(Key, Proplist) ->
- get_value(Key, Proplist, undefined).
-
-%% @spec get_value(Key::term(), Proplist::list(), Default::term()) -> term()
-%%
-%% @doc Return the value of <code>Key</code> or <code>Default</code>
-get_value(_Key, [], Default) ->
- Default;
-get_value(Key, Proplist, Default) ->
- case lists:keyfind(Key, 1, Proplist) of
- false ->
- Default;
- {Key, Value} ->
- Value
- end.
-
-%%
-%% Tests
-%%
--include_lib("eunit/include/eunit.hrl").
--ifdef(TEST).
-
-set_defaults_test() ->
- ?assertEqual(
- [{k, v}],
- set_defaults([{k, v}], [])),
- ?assertEqual(
- [{k, v}],
- set_defaults([{k, vee}], [{k, v}])),
- ?assertEqual(
- lists:sort([{kay, vee}, {k, v}]),
- lists:sort(set_defaults([{k, vee}, {kay, vee}], [{k, v}]))),
- ok.
-
-set_default_test() ->
- ?assertEqual(
- [{k, v}],
- set_default({k, v}, [])),
- ?assertEqual(
- [{k, v}],
- set_default({k, vee}, [{k, v}])),
- ok.
-
-get_value_test() ->
- ?assertEqual(
- undefined,
- get_value(foo, [])),
- ?assertEqual(
- undefined,
- get_value(foo, [{bar, baz}])),
- ?assertEqual(
- bar,
- get_value(foo, [{foo, bar}])),
- ?assertEqual(
- default,
- get_value(foo, [], default)),
- ?assertEqual(
- default,
- get_value(foo, [{bar, baz}], default)),
- ?assertEqual(
- bar,
- get_value(foo, [{foo, bar}], default)),
- ok.
-
--endif.
-
http://git-wip-us.apache.org/repos/asf/couchdb/blob/6fdb9e07/src/mochiweb/mochilogfile2.erl
----------------------------------------------------------------------
diff --git a/src/mochiweb/mochilogfile2.erl b/src/mochiweb/mochilogfile2.erl
deleted file mode 100644
index c34ee73..0000000
--- a/src/mochiweb/mochilogfile2.erl
+++ /dev/null
@@ -1,140 +0,0 @@
-%% @author Bob Ippolito <bo...@mochimedia.com>
-%% @copyright 2010 Mochi Media, Inc.
-
-%% @doc Write newline delimited log files, ensuring that if a truncated
-%% entry is found on log open then it is fixed before writing. Uses
-%% delayed writes and raw files for performance.
--module(mochilogfile2).
--author('bob@mochimedia.com').
-
--export([open/1, write/2, close/1, name/1]).
-
-%% @spec open(Name) -> Handle
-%% @doc Open the log file Name, creating or appending as necessary. All data
-%% at the end of the file will be truncated until a newline is found, to
-%% ensure that all records are complete.
-open(Name) ->
- {ok, FD} = file:open(Name, [raw, read, write, delayed_write, binary]),
- fix_log(FD),
- {?MODULE, Name, FD}.
-
-%% @spec name(Handle) -> string()
-%% @doc Return the path of the log file.
-name({?MODULE, Name, _FD}) ->
- Name.
-
-%% @spec write(Handle, IoData) -> ok
-%% @doc Write IoData to the log file referenced by Handle.
-write({?MODULE, _Name, FD}, IoData) ->
- ok = file:write(FD, [IoData, $\n]),
- ok.
-
-%% @spec close(Handle) -> ok
-%% @doc Close the log file referenced by Handle.
-close({?MODULE, _Name, FD}) ->
- ok = file:sync(FD),
- ok = file:close(FD),
- ok.
-
-fix_log(FD) ->
- {ok, Location} = file:position(FD, eof),
- Seek = find_last_newline(FD, Location),
- {ok, Seek} = file:position(FD, Seek),
- ok = file:truncate(FD),
- ok.
-
-%% Seek backwards to the last valid log entry
-find_last_newline(_FD, N) when N =< 1 ->
- 0;
-find_last_newline(FD, Location) ->
- case file:pread(FD, Location - 1, 1) of
- {ok, <<$\n>>} ->
- Location;
- {ok, _} ->
- find_last_newline(FD, Location - 1)
- end.
-
-%%
-%% Tests
-%%
--include_lib("eunit/include/eunit.hrl").
--ifdef(TEST).
-name_test() ->
- D = mochitemp:mkdtemp(),
- FileName = filename:join(D, "open_close_test.log"),
- H = open(FileName),
- ?assertEqual(
- FileName,
- name(H)),
- close(H),
- file:delete(FileName),
- file:del_dir(D),
- ok.
-
-open_close_test() ->
- D = mochitemp:mkdtemp(),
- FileName = filename:join(D, "open_close_test.log"),
- OpenClose = fun () ->
- H = open(FileName),
- ?assertEqual(
- true,
- filelib:is_file(FileName)),
- ok = close(H),
- ?assertEqual(
- {ok, <<>>},
- file:read_file(FileName)),
- ok
- end,
- OpenClose(),
- OpenClose(),
- file:delete(FileName),
- file:del_dir(D),
- ok.
-
-write_test() ->
- D = mochitemp:mkdtemp(),
- FileName = filename:join(D, "write_test.log"),
- F = fun () ->
- H = open(FileName),
- write(H, "test line"),
- close(H),
- ok
- end,
- F(),
- ?assertEqual(
- {ok, <<"test line\n">>},
- file:read_file(FileName)),
- F(),
- ?assertEqual(
- {ok, <<"test line\ntest line\n">>},
- file:read_file(FileName)),
- file:delete(FileName),
- file:del_dir(D),
- ok.
-
-fix_log_test() ->
- D = mochitemp:mkdtemp(),
- FileName = filename:join(D, "write_test.log"),
- file:write_file(FileName, <<"first line good\nsecond line bad">>),
- F = fun () ->
- H = open(FileName),
- write(H, "test line"),
- close(H),
- ok
- end,
- F(),
- ?assertEqual(
- {ok, <<"first line good\ntest line\n">>},
- file:read_file(FileName)),
- file:write_file(FileName, <<"first line bad">>),
- F(),
- ?assertEqual(
- {ok, <<"test line\n">>},
- file:read_file(FileName)),
- F(),
- ?assertEqual(
- {ok, <<"test line\ntest line\n">>},
- file:read_file(FileName)),
- ok.
-
--endif.
http://git-wip-us.apache.org/repos/asf/couchdb/blob/6fdb9e07/src/mochiweb/mochinum.erl
----------------------------------------------------------------------
diff --git a/src/mochiweb/mochinum.erl b/src/mochiweb/mochinum.erl
deleted file mode 100644
index 3c96b13..0000000
--- a/src/mochiweb/mochinum.erl
+++ /dev/null
@@ -1,354 +0,0 @@
-%% @copyright 2007 Mochi Media, Inc.
-%% @author Bob Ippolito <bo...@mochimedia.com>
-
-%% @doc Useful numeric algorithms for floats that cover some deficiencies
-%% in the math module. More interesting is digits/1, which implements
-%% the algorithm from:
-%% http://www.cs.indiana.edu/~burger/fp/index.html
-%% See also "Printing Floating-Point Numbers Quickly and Accurately"
-%% in Proceedings of the SIGPLAN '96 Conference on Programming Language
-%% Design and Implementation.
-
--module(mochinum).
--author("Bob Ippolito <bo...@mochimedia.com>").
--export([digits/1, frexp/1, int_pow/2, int_ceil/1]).
-
-%% IEEE 754 Float exponent bias
--define(FLOAT_BIAS, 1022).
--define(MIN_EXP, -1074).
--define(BIG_POW, 4503599627370496).
-
-%% External API
-
-%% @spec digits(number()) -> string()
-%% @doc Returns a string that accurately represents the given integer or float
-%% using a conservative amount of digits. Great for generating
-%% human-readable output, or compact ASCII serializations for floats.
-digits(N) when is_integer(N) ->
- integer_to_list(N);
-digits(0.0) ->
- "0.0";
-digits(Float) ->
- {Frac1, Exp1} = frexp_int(Float),
- [Place0 | Digits0] = digits1(Float, Exp1, Frac1),
- {Place, Digits} = transform_digits(Place0, Digits0),
- R = insert_decimal(Place, Digits),
- case Float < 0 of
- true ->
- [$- | R];
- _ ->
- R
- end.
-
-%% @spec frexp(F::float()) -> {Frac::float(), Exp::float()}
-%% @doc Return the fractional and exponent part of an IEEE 754 double,
-%% equivalent to the libc function of the same name.
-%% F = Frac * pow(2, Exp).
-frexp(F) ->
- frexp1(unpack(F)).
-
-%% @spec int_pow(X::integer(), N::integer()) -> Y::integer()
-%% @doc Moderately efficient way to exponentiate integers.
-%% int_pow(10, 2) = 100.
-int_pow(_X, 0) ->
- 1;
-int_pow(X, N) when N > 0 ->
- int_pow(X, N, 1).
-
-%% @spec int_ceil(F::float()) -> integer()
-%% @doc Return the ceiling of F as an integer. The ceiling is defined as
-%% F when F == trunc(F);
-%% trunc(F) when F < 0;
-%% trunc(F) + 1 when F > 0.
-int_ceil(X) ->
- T = trunc(X),
- case (X - T) of
- Pos when Pos > 0 -> T + 1;
- _ -> T
- end.
-
-
-%% Internal API
-
-int_pow(X, N, R) when N < 2 ->
- R * X;
-int_pow(X, N, R) ->
- int_pow(X * X, N bsr 1, case N band 1 of 1 -> R * X; 0 -> R end).
-
-insert_decimal(0, S) ->
- "0." ++ S;
-insert_decimal(Place, S) when Place > 0 ->
- L = length(S),
- case Place - L of
- 0 ->
- S ++ ".0";
- N when N < 0 ->
- {S0, S1} = lists:split(L + N, S),
- S0 ++ "." ++ S1;
- N when N < 6 ->
- %% More places than digits
- S ++ lists:duplicate(N, $0) ++ ".0";
- _ ->
- insert_decimal_exp(Place, S)
- end;
-insert_decimal(Place, S) when Place > -6 ->
- "0." ++ lists:duplicate(abs(Place), $0) ++ S;
-insert_decimal(Place, S) ->
- insert_decimal_exp(Place, S).
-
-insert_decimal_exp(Place, S) ->
- [C | S0] = S,
- S1 = case S0 of
- [] ->
- "0";
- _ ->
- S0
- end,
- Exp = case Place < 0 of
- true ->
- "e-";
- false ->
- "e+"
- end,
- [C] ++ "." ++ S1 ++ Exp ++ integer_to_list(abs(Place - 1)).
-
-
-digits1(Float, Exp, Frac) ->
- Round = ((Frac band 1) =:= 0),
- case Exp >= 0 of
- true ->
- BExp = 1 bsl Exp,
- case (Frac =/= ?BIG_POW) of
- true ->
- scale((Frac * BExp * 2), 2, BExp, BExp,
- Round, Round, Float);
- false ->
- scale((Frac * BExp * 4), 4, (BExp * 2), BExp,
- Round, Round, Float)
- end;
- false ->
- case (Exp =:= ?MIN_EXP) orelse (Frac =/= ?BIG_POW) of
- true ->
- scale((Frac * 2), 1 bsl (1 - Exp), 1, 1,
- Round, Round, Float);
- false ->
- scale((Frac * 4), 1 bsl (2 - Exp), 2, 1,
- Round, Round, Float)
- end
- end.
-
-scale(R, S, MPlus, MMinus, LowOk, HighOk, Float) ->
- Est = int_ceil(math:log10(abs(Float)) - 1.0e-10),
- %% Note that the scheme implementation uses a 326 element look-up table
- %% for int_pow(10, N) where we do not.
- case Est >= 0 of
- true ->
- fixup(R, S * int_pow(10, Est), MPlus, MMinus, Est,
- LowOk, HighOk);
- false ->
- Scale = int_pow(10, -Est),
- fixup(R * Scale, S, MPlus * Scale, MMinus * Scale, Est,
- LowOk, HighOk)
- end.
-
-fixup(R, S, MPlus, MMinus, K, LowOk, HighOk) ->
- TooLow = case HighOk of
- true ->
- (R + MPlus) >= S;
- false ->
- (R + MPlus) > S
- end,
- case TooLow of
- true ->
- [(K + 1) | generate(R, S, MPlus, MMinus, LowOk, HighOk)];
- false ->
- [K | generate(R * 10, S, MPlus * 10, MMinus * 10, LowOk, HighOk)]
- end.
-
-generate(R0, S, MPlus, MMinus, LowOk, HighOk) ->
- D = R0 div S,
- R = R0 rem S,
- TC1 = case LowOk of
- true ->
- R =< MMinus;
- false ->
- R < MMinus
- end,
- TC2 = case HighOk of
- true ->
- (R + MPlus) >= S;
- false ->
- (R + MPlus) > S
- end,
- case TC1 of
- false ->
- case TC2 of
- false ->
- [D | generate(R * 10, S, MPlus * 10, MMinus * 10,
- LowOk, HighOk)];
- true ->
- [D + 1]
- end;
- true ->
- case TC2 of
- false ->
- [D];
- true ->
- case R * 2 < S of
- true ->
- [D];
- false ->
- [D + 1]
- end
- end
- end.
-
-unpack(Float) ->
- <<Sign:1, Exp:11, Frac:52>> = <<Float:64/float>>,
- {Sign, Exp, Frac}.
-
-frexp1({_Sign, 0, 0}) ->
- {0.0, 0};
-frexp1({Sign, 0, Frac}) ->
- Exp = log2floor(Frac),
- <<Frac1:64/float>> = <<Sign:1, ?FLOAT_BIAS:11, (Frac-1):52>>,
- {Frac1, -(?FLOAT_BIAS) - 52 + Exp};
-frexp1({Sign, Exp, Frac}) ->
- <<Frac1:64/float>> = <<Sign:1, ?FLOAT_BIAS:11, Frac:52>>,
- {Frac1, Exp - ?FLOAT_BIAS}.
-
-log2floor(Int) ->
- log2floor(Int, 0).
-
-log2floor(0, N) ->
- N;
-log2floor(Int, N) ->
- log2floor(Int bsr 1, 1 + N).
-
-
-transform_digits(Place, [0 | Rest]) ->
- transform_digits(Place, Rest);
-transform_digits(Place, Digits) ->
- {Place, [$0 + D || D <- Digits]}.
-
-
-frexp_int(F) ->
- case unpack(F) of
- {_Sign, 0, Frac} ->
- {Frac, ?MIN_EXP};
- {_Sign, Exp, Frac} ->
- {Frac + (1 bsl 52), Exp - 53 - ?FLOAT_BIAS}
- end.
-
-%%
-%% Tests
-%%
--include_lib("eunit/include/eunit.hrl").
--ifdef(TEST).
-
-int_ceil_test() ->
- ?assertEqual(1, int_ceil(0.0001)),
- ?assertEqual(0, int_ceil(0.0)),
- ?assertEqual(1, int_ceil(0.99)),
- ?assertEqual(1, int_ceil(1.0)),
- ?assertEqual(-1, int_ceil(-1.5)),
- ?assertEqual(-2, int_ceil(-2.0)),
- ok.
-
-int_pow_test() ->
- ?assertEqual(1, int_pow(1, 1)),
- ?assertEqual(1, int_pow(1, 0)),
- ?assertEqual(1, int_pow(10, 0)),
- ?assertEqual(10, int_pow(10, 1)),
- ?assertEqual(100, int_pow(10, 2)),
- ?assertEqual(1000, int_pow(10, 3)),
- ok.
-
-digits_test() ->
- ?assertEqual("0",
- digits(0)),
- ?assertEqual("0.0",
- digits(0.0)),
- ?assertEqual("1.0",
- digits(1.0)),
- ?assertEqual("-1.0",
- digits(-1.0)),
- ?assertEqual("0.1",
- digits(0.1)),
- ?assertEqual("0.01",
- digits(0.01)),
- ?assertEqual("0.001",
- digits(0.001)),
- ?assertEqual("1.0e+6",
- digits(1000000.0)),
- ?assertEqual("0.5",
- digits(0.5)),
- ?assertEqual("4503599627370496.0",
- digits(4503599627370496.0)),
- %% small denormalized number
- %% 4.94065645841246544177e-324 =:= 5.0e-324
- <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>,
- ?assertEqual("5.0e-324",
- digits(SmallDenorm)),
- ?assertEqual(SmallDenorm,
- list_to_float(digits(SmallDenorm))),
- %% large denormalized number
- %% 2.22507385850720088902e-308
- <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>,
- ?assertEqual("2.225073858507201e-308",
- digits(BigDenorm)),
- ?assertEqual(BigDenorm,
- list_to_float(digits(BigDenorm))),
- %% small normalized number
- %% 2.22507385850720138309e-308
- <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>,
- ?assertEqual("2.2250738585072014e-308",
- digits(SmallNorm)),
- ?assertEqual(SmallNorm,
- list_to_float(digits(SmallNorm))),
- %% large normalized number
- %% 1.79769313486231570815e+308
- <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>,
- ?assertEqual("1.7976931348623157e+308",
- digits(LargeNorm)),
- ?assertEqual(LargeNorm,
- list_to_float(digits(LargeNorm))),
- %% issue #10 - mochinum:frexp(math:pow(2, -1074)).
- ?assertEqual("5.0e-324",
- digits(math:pow(2, -1074))),
- ok.
-
-frexp_test() ->
- %% zero
- ?assertEqual({0.0, 0}, frexp(0.0)),
- %% one
- ?assertEqual({0.5, 1}, frexp(1.0)),
- %% negative one
- ?assertEqual({-0.5, 1}, frexp(-1.0)),
- %% small denormalized number
- %% 4.94065645841246544177e-324
- <<SmallDenorm/float>> = <<0,0,0,0,0,0,0,1>>,
- ?assertEqual({0.5, -1073}, frexp(SmallDenorm)),
- %% large denormalized number
- %% 2.22507385850720088902e-308
- <<BigDenorm/float>> = <<0,15,255,255,255,255,255,255>>,
- ?assertEqual(
- {0.99999999999999978, -1022},
- frexp(BigDenorm)),
- %% small normalized number
- %% 2.22507385850720138309e-308
- <<SmallNorm/float>> = <<0,16,0,0,0,0,0,0>>,
- ?assertEqual({0.5, -1021}, frexp(SmallNorm)),
- %% large normalized number
- %% 1.79769313486231570815e+308
- <<LargeNorm/float>> = <<127,239,255,255,255,255,255,255>>,
- ?assertEqual(
- {0.99999999999999989, 1024},
- frexp(LargeNorm)),
- %% issue #10 - mochinum:frexp(math:pow(2, -1074)).
- ?assertEqual(
- {0.5, -1073},
- frexp(math:pow(2, -1074))),
- ok.
-
--endif.
http://git-wip-us.apache.org/repos/asf/couchdb/blob/6fdb9e07/src/mochiweb/mochitemp.erl
----------------------------------------------------------------------
diff --git a/src/mochiweb/mochitemp.erl b/src/mochiweb/mochitemp.erl
deleted file mode 100644
index bb23d2a..0000000
--- a/src/mochiweb/mochitemp.erl
+++ /dev/null
@@ -1,310 +0,0 @@
-%% @author Bob Ippolito <bo...@mochimedia.com>
-%% @copyright 2010 Mochi Media, Inc.
-
-%% @doc Create temporary files and directories. Requires crypto to be started.
-
--module(mochitemp).
--export([gettempdir/0]).
--export([mkdtemp/0, mkdtemp/3]).
--export([rmtempdir/1]).
-%% -export([mkstemp/4]).
--define(SAFE_CHARS, {$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m,
- $n, $o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z,
- $A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M,
- $N, $O, $P, $Q, $R, $S, $T, $U, $V, $W, $X, $Y, $Z,
- $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $_}).
--define(TMP_MAX, 10000).
-
--include_lib("kernel/include/file.hrl").
-
-%% TODO: An ugly wrapper over the mktemp tool with open_port and sadness?
-%% We can't implement this race-free in Erlang without the ability
-%% to issue O_CREAT|O_EXCL. I suppose we could hack something with
-%% mkdtemp, del_dir, open.
-%% mkstemp(Suffix, Prefix, Dir, Options) ->
-%% ok.
-
-rmtempdir(Dir) ->
- case file:del_dir(Dir) of
- {error, eexist} ->
- ok = rmtempdirfiles(Dir),
- ok = file:del_dir(Dir);
- ok ->
- ok
- end.
-
-rmtempdirfiles(Dir) ->
- {ok, Files} = file:list_dir(Dir),
- ok = rmtempdirfiles(Dir, Files).
-
-rmtempdirfiles(_Dir, []) ->
- ok;
-rmtempdirfiles(Dir, [Basename | Rest]) ->
- Path = filename:join([Dir, Basename]),
- case filelib:is_dir(Path) of
- true ->
- ok = rmtempdir(Path);
- false ->
- ok = file:delete(Path)
- end,
- rmtempdirfiles(Dir, Rest).
-
-mkdtemp() ->
- mkdtemp("", "tmp", gettempdir()).
-
-mkdtemp(Suffix, Prefix, Dir) ->
- mkdtemp_n(rngpath_fun(Suffix, Prefix, Dir), ?TMP_MAX).
-
-
-
-mkdtemp_n(RngPath, 1) ->
- make_dir(RngPath());
-mkdtemp_n(RngPath, N) ->
- try make_dir(RngPath())
- catch throw:{error, eexist} ->
- mkdtemp_n(RngPath, N - 1)
- end.
-
-make_dir(Path) ->
- case file:make_dir(Path) of
- ok ->
- ok;
- E={error, eexist} ->
- throw(E)
- end,
- %% Small window for a race condition here because dir is created 777
- ok = file:write_file_info(Path, #file_info{mode=8#0700}),
- Path.
-
-rngpath_fun(Prefix, Suffix, Dir) ->
- fun () ->
- filename:join([Dir, Prefix ++ rngchars(6) ++ Suffix])
- end.
-
-rngchars(0) ->
- "";
-rngchars(N) ->
- [rngchar() | rngchars(N - 1)].
-
-rngchar() ->
- rngchar(crypto:rand_uniform(0, tuple_size(?SAFE_CHARS))).
-
-rngchar(C) ->
- element(1 + C, ?SAFE_CHARS).
-
-%% @spec gettempdir() -> string()
-%% @doc Get a usable temporary directory using the first of these that is a directory:
-%% $TMPDIR, $TMP, $TEMP, "/tmp", "/var/tmp", "/usr/tmp", ".".
-gettempdir() ->
- gettempdir(gettempdir_checks(), fun normalize_dir/1).
-
-gettempdir_checks() ->
- [{fun os:getenv/1, ["TMPDIR", "TMP", "TEMP"]},
- {fun gettempdir_identity/1, ["/tmp", "/var/tmp", "/usr/tmp"]},
- {fun gettempdir_cwd/1, [cwd]}].
-
-gettempdir_identity(L) ->
- L.
-
-gettempdir_cwd(cwd) ->
- {ok, L} = file:get_cwd(),
- L.
-
-gettempdir([{_F, []} | RestF], Normalize) ->
- gettempdir(RestF, Normalize);
-gettempdir([{F, [L | RestL]} | RestF], Normalize) ->
- case Normalize(F(L)) of
- false ->
- gettempdir([{F, RestL} | RestF], Normalize);
- Dir ->
- Dir
- end.
-
-normalize_dir(False) when False =:= false orelse False =:= "" ->
- %% Erlang doesn't have an unsetenv, wtf.
- false;
-normalize_dir(L) ->
- Dir = filename:absname(L),
- case filelib:is_dir(Dir) of
- false ->
- false;
- true ->
- Dir
- end.
-
-%%
-%% Tests
-%%
--include_lib("eunit/include/eunit.hrl").
--ifdef(TEST).
-pushenv(L) ->
- [{K, os:getenv(K)} || K <- L].
-popenv(L) ->
- F = fun ({K, false}) ->
- %% Erlang doesn't have an unsetenv, wtf.
- os:putenv(K, "");
- ({K, V}) ->
- os:putenv(K, V)
- end,
- lists:foreach(F, L).
-
-gettempdir_fallback_test() ->
- ?assertEqual(
- "/",
- gettempdir([{fun gettempdir_identity/1, ["/--not-here--/"]},
- {fun gettempdir_identity/1, ["/"]}],
- fun normalize_dir/1)),
- ?assertEqual(
- "/",
- %% simulate a true os:getenv unset env
- gettempdir([{fun gettempdir_identity/1, [false]},
- {fun gettempdir_identity/1, ["/"]}],
- fun normalize_dir/1)),
- ok.
-
-gettempdir_identity_test() ->
- ?assertEqual(
- "/",
- gettempdir([{fun gettempdir_identity/1, ["/"]}], fun normalize_dir/1)),
- ok.
-
-gettempdir_cwd_test() ->
- {ok, Cwd} = file:get_cwd(),
- ?assertEqual(
- normalize_dir(Cwd),
- gettempdir([{fun gettempdir_cwd/1, [cwd]}], fun normalize_dir/1)),
- ok.
-
-rngchars_test() ->
- crypto:start(),
- ?assertEqual(
- "",
- rngchars(0)),
- ?assertEqual(
- 10,
- length(rngchars(10))),
- ok.
-
-rngchar_test() ->
- ?assertEqual(
- $a,
- rngchar(0)),
- ?assertEqual(
- $A,
- rngchar(26)),
- ?assertEqual(
- $_,
- rngchar(62)),
- ok.
-
-mkdtemp_n_failonce_test() ->
- crypto:start(),
- D = mkdtemp(),
- Path = filename:join([D, "testdir"]),
- %% Toggle the existence of a dir so that it fails
- %% the first time and succeeds the second.
- F = fun () ->
- case filelib:is_dir(Path) of
- true ->
- file:del_dir(Path);
- false ->
- file:make_dir(Path)
- end,
- Path
- end,
- try
- %% Fails the first time
- ?assertThrow(
- {error, eexist},
- mkdtemp_n(F, 1)),
- %% Reset state
- file:del_dir(Path),
- %% Succeeds the second time
- ?assertEqual(
- Path,
- mkdtemp_n(F, 2))
- after rmtempdir(D)
- end,
- ok.
-
-mkdtemp_n_fail_test() ->
- {ok, Cwd} = file:get_cwd(),
- ?assertThrow(
- {error, eexist},
- mkdtemp_n(fun () -> Cwd end, 1)),
- ?assertThrow(
- {error, eexist},
- mkdtemp_n(fun () -> Cwd end, 2)),
- ok.
-
-make_dir_fail_test() ->
- {ok, Cwd} = file:get_cwd(),
- ?assertThrow(
- {error, eexist},
- make_dir(Cwd)),
- ok.
-
-mkdtemp_test() ->
- crypto:start(),
- D = mkdtemp(),
- ?assertEqual(
- true,
- filelib:is_dir(D)),
- ?assertEqual(
- ok,
- file:del_dir(D)),
- ok.
-
-rmtempdir_test() ->
- crypto:start(),
- D1 = mkdtemp(),
- ?assertEqual(
- true,
- filelib:is_dir(D1)),
- ?assertEqual(
- ok,
- rmtempdir(D1)),
- D2 = mkdtemp(),
- ?assertEqual(
- true,
- filelib:is_dir(D2)),
- ok = file:write_file(filename:join([D2, "foo"]), <<"bytes">>),
- D3 = mkdtemp("suffix", "prefix", D2),
- ?assertEqual(
- true,
- filelib:is_dir(D3)),
- ok = file:write_file(filename:join([D3, "foo"]), <<"bytes">>),
- ?assertEqual(
- ok,
- rmtempdir(D2)),
- ?assertEqual(
- {error, enoent},
- file:consult(D3)),
- ?assertEqual(
- {error, enoent},
- file:consult(D2)),
- ok.
-
-gettempdir_env_test() ->
- Env = pushenv(["TMPDIR", "TEMP", "TMP"]),
- FalseEnv = [{"TMPDIR", false}, {"TEMP", false}, {"TMP", false}],
- try
- popenv(FalseEnv),
- popenv([{"TMPDIR", "/"}]),
- ?assertEqual(
- "/",
- os:getenv("TMPDIR")),
- ?assertEqual(
- "/",
- gettempdir()),
- {ok, Cwd} = file:get_cwd(),
- popenv(FalseEnv),
- popenv([{"TMP", Cwd}]),
- ?assertEqual(
- normalize_dir(Cwd),
- gettempdir())
- after popenv(Env)
- end,
- ok.
-
--endif.
http://git-wip-us.apache.org/repos/asf/couchdb/blob/6fdb9e07/src/mochiweb/mochiutf8.erl
----------------------------------------------------------------------
diff --git a/src/mochiweb/mochiutf8.erl b/src/mochiweb/mochiutf8.erl
deleted file mode 100644
index 206e118..0000000
--- a/src/mochiweb/mochiutf8.erl
+++ /dev/null
@@ -1,316 +0,0 @@
-%% @copyright 2010 Mochi Media, Inc.
-%% @author Bob Ippolito <bo...@mochimedia.com>
-
-%% @doc Algorithm to convert any binary to a valid UTF-8 sequence by ignoring
-%% invalid bytes.
-
--module(mochiutf8).
--export([valid_utf8_bytes/1, codepoint_to_bytes/1, bytes_to_codepoints/1]).
--export([bytes_foldl/3, codepoint_foldl/3, read_codepoint/1, len/1]).
-
-%% External API
-
--type unichar_low() :: 0..16#d7ff.
--type unichar_high() :: 16#e000..16#10ffff.
--type unichar() :: unichar_low() | unichar_high().
-
--spec codepoint_to_bytes(unichar()) -> binary().
-%% @doc Convert a unicode codepoint to UTF-8 bytes.
-codepoint_to_bytes(C) when (C >= 16#00 andalso C =< 16#7f) ->
- %% U+0000 - U+007F - 7 bits
- <<C>>;
-codepoint_to_bytes(C) when (C >= 16#080 andalso C =< 16#07FF) ->
- %% U+0080 - U+07FF - 11 bits
- <<0:5, B1:5, B0:6>> = <<C:16>>,
- <<2#110:3, B1:5,
- 2#10:2, B0:6>>;
-codepoint_to_bytes(C) when (C >= 16#0800 andalso C =< 16#FFFF) andalso
- (C < 16#D800 orelse C > 16#DFFF) ->
- %% U+0800 - U+FFFF - 16 bits (excluding UTC-16 surrogate code points)
- <<B2:4, B1:6, B0:6>> = <<C:16>>,
- <<2#1110:4, B2:4,
- 2#10:2, B1:6,
- 2#10:2, B0:6>>;
-codepoint_to_bytes(C) when (C >= 16#010000 andalso C =< 16#10FFFF) ->
- %% U+10000 - U+10FFFF - 21 bits
- <<0:3, B3:3, B2:6, B1:6, B0:6>> = <<C:24>>,
- <<2#11110:5, B3:3,
- 2#10:2, B2:6,
- 2#10:2, B1:6,
- 2#10:2, B0:6>>.
-
--spec codepoints_to_bytes([unichar()]) -> binary().
-%% @doc Convert a list of codepoints to a UTF-8 binary.
-codepoints_to_bytes(L) ->
- <<<<(codepoint_to_bytes(C))/binary>> || C <- L>>.
-
--spec read_codepoint(binary()) -> {unichar(), binary(), binary()}.
-read_codepoint(Bin = <<2#0:1, C:7, Rest/binary>>) ->
- %% U+0000 - U+007F - 7 bits
- <<B:1/binary, _/binary>> = Bin,
- {C, B, Rest};
-read_codepoint(Bin = <<2#110:3, B1:5,
- 2#10:2, B0:6,
- Rest/binary>>) ->
- %% U+0080 - U+07FF - 11 bits
- case <<B1:5, B0:6>> of
- <<C:11>> when C >= 16#80 ->
- <<B:2/binary, _/binary>> = Bin,
- {C, B, Rest}
- end;
-read_codepoint(Bin = <<2#1110:4, B2:4,
- 2#10:2, B1:6,
- 2#10:2, B0:6,
- Rest/binary>>) ->
- %% U+0800 - U+FFFF - 16 bits (excluding UTC-16 surrogate code points)
- case <<B2:4, B1:6, B0:6>> of
- <<C:16>> when (C >= 16#0800 andalso C =< 16#FFFF) andalso
- (C < 16#D800 orelse C > 16#DFFF) ->
- <<B:3/binary, _/binary>> = Bin,
- {C, B, Rest}
- end;
-read_codepoint(Bin = <<2#11110:5, B3:3,
- 2#10:2, B2:6,
- 2#10:2, B1:6,
- 2#10:2, B0:6,
- Rest/binary>>) ->
- %% U+10000 - U+10FFFF - 21 bits
- case <<B3:3, B2:6, B1:6, B0:6>> of
- <<C:21>> when (C >= 16#010000 andalso C =< 16#10FFFF) ->
- <<B:4/binary, _/binary>> = Bin,
- {C, B, Rest}
- end.
-
--spec codepoint_foldl(fun((unichar(), _) -> _), _, binary()) -> _.
-codepoint_foldl(F, Acc, <<>>) when is_function(F, 2) ->
- Acc;
-codepoint_foldl(F, Acc, Bin) ->
- {C, _, Rest} = read_codepoint(Bin),
- codepoint_foldl(F, F(C, Acc), Rest).
-
--spec bytes_foldl(fun((binary(), _) -> _), _, binary()) -> _.
-bytes_foldl(F, Acc, <<>>) when is_function(F, 2) ->
- Acc;
-bytes_foldl(F, Acc, Bin) ->
- {_, B, Rest} = read_codepoint(Bin),
- bytes_foldl(F, F(B, Acc), Rest).
-
--spec bytes_to_codepoints(binary()) -> [unichar()].
-bytes_to_codepoints(B) ->
- lists:reverse(codepoint_foldl(fun (C, Acc) -> [C | Acc] end, [], B)).
-
--spec len(binary()) -> non_neg_integer().
-len(<<>>) ->
- 0;
-len(B) ->
- {_, _, Rest} = read_codepoint(B),
- 1 + len(Rest).
-
--spec valid_utf8_bytes(B::binary()) -> binary().
-%% @doc Return only the bytes in B that represent valid UTF-8. Uses
-%% the following recursive algorithm: skip one byte if B does not
-%% follow UTF-8 syntax (a 1-4 byte encoding of some number),
-%% skip sequence of 2-4 bytes if it represents an overlong encoding
-%% or bad code point (surrogate U+D800 - U+DFFF or > U+10FFFF).
-valid_utf8_bytes(B) when is_binary(B) ->
- binary_skip_bytes(B, invalid_utf8_indexes(B)).
-
-%% Internal API
-
--spec binary_skip_bytes(binary(), [non_neg_integer()]) -> binary().
-%% @doc Return B, but skipping the 0-based indexes in L.
-binary_skip_bytes(B, []) ->
- B;
-binary_skip_bytes(B, L) ->
- binary_skip_bytes(B, L, 0, []).
-
-%% @private
--spec binary_skip_bytes(binary(), [non_neg_integer()], non_neg_integer(), iolist()) -> binary().
-binary_skip_bytes(B, [], _N, Acc) ->
- iolist_to_binary(lists:reverse([B | Acc]));
-binary_skip_bytes(<<_, RestB/binary>>, [N | RestL], N, Acc) ->
- binary_skip_bytes(RestB, RestL, 1 + N, Acc);
-binary_skip_bytes(<<C, RestB/binary>>, L, N, Acc) ->
- binary_skip_bytes(RestB, L, 1 + N, [C | Acc]).
-
--spec invalid_utf8_indexes(binary()) -> [non_neg_integer()].
-%% @doc Return the 0-based indexes in B that are not valid UTF-8.
-invalid_utf8_indexes(B) ->
- invalid_utf8_indexes(B, 0, []).
-
-%% @private.
--spec invalid_utf8_indexes(binary(), non_neg_integer(), [non_neg_integer()]) -> [non_neg_integer()].
-invalid_utf8_indexes(<<C, Rest/binary>>, N, Acc) when C < 16#80 ->
- %% U+0000 - U+007F - 7 bits
- invalid_utf8_indexes(Rest, 1 + N, Acc);
-invalid_utf8_indexes(<<C1, C2, Rest/binary>>, N, Acc)
- when C1 band 16#E0 =:= 16#C0,
- C2 band 16#C0 =:= 16#80 ->
- %% U+0080 - U+07FF - 11 bits
- case ((C1 band 16#1F) bsl 6) bor (C2 band 16#3F) of
- C when C < 16#80 ->
- %% Overlong encoding.
- invalid_utf8_indexes(Rest, 2 + N, [1 + N, N | Acc]);
- _ ->
- %% Upper bound U+07FF does not need to be checked
- invalid_utf8_indexes(Rest, 2 + N, Acc)
- end;
-invalid_utf8_indexes(<<C1, C2, C3, Rest/binary>>, N, Acc)
- when C1 band 16#F0 =:= 16#E0,
- C2 band 16#C0 =:= 16#80,
- C3 band 16#C0 =:= 16#80 ->
- %% U+0800 - U+FFFF - 16 bits
- case ((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor
- (C3 band 16#3F) of
- C when (C < 16#800) orelse (C >= 16#D800 andalso C =< 16#DFFF) ->
- %% Overlong encoding or surrogate.
- invalid_utf8_indexes(Rest, 3 + N, [2 + N, 1 + N, N | Acc]);
- _ ->
- %% Upper bound U+FFFF does not need to be checked
- invalid_utf8_indexes(Rest, 3 + N, Acc)
- end;
-invalid_utf8_indexes(<<C1, C2, C3, C4, Rest/binary>>, N, Acc)
- when C1 band 16#F8 =:= 16#F0,
- C2 band 16#C0 =:= 16#80,
- C3 band 16#C0 =:= 16#80,
- C4 band 16#C0 =:= 16#80 ->
- %% U+10000 - U+10FFFF - 21 bits
- case ((((((C1 band 16#0F) bsl 6) bor (C2 band 16#3F)) bsl 6) bor
- (C3 band 16#3F)) bsl 6) bor (C4 band 16#3F) of
- C when (C < 16#10000) orelse (C > 16#10FFFF) ->
- %% Overlong encoding or invalid code point.
- invalid_utf8_indexes(Rest, 4 + N, [3 + N, 2 + N, 1 + N, N | Acc]);
- _ ->
- invalid_utf8_indexes(Rest, 4 + N, Acc)
- end;
-invalid_utf8_indexes(<<_, Rest/binary>>, N, Acc) ->
- %% Invalid char
- invalid_utf8_indexes(Rest, 1 + N, [N | Acc]);
-invalid_utf8_indexes(<<>>, _N, Acc) ->
- lists:reverse(Acc).
-
-%%
-%% Tests
-%%
--include_lib("eunit/include/eunit.hrl").
--ifdef(TEST).
-
-binary_skip_bytes_test() ->
- ?assertEqual(<<"foo">>,
- binary_skip_bytes(<<"foo">>, [])),
- ?assertEqual(<<"foobar">>,
- binary_skip_bytes(<<"foo bar">>, [3])),
- ?assertEqual(<<"foo">>,
- binary_skip_bytes(<<"foo bar">>, [3, 4, 5, 6])),
- ?assertEqual(<<"oo bar">>,
- binary_skip_bytes(<<"foo bar">>, [0])),
- ok.
-
-invalid_utf8_indexes_test() ->
- ?assertEqual(
- [],
- invalid_utf8_indexes(<<"unicode snowman for you: ", 226, 152, 131>>)),
- ?assertEqual(
- [0],
- invalid_utf8_indexes(<<128>>)),
- ?assertEqual(
- [57,59,60,64,66,67],
- invalid_utf8_indexes(<<"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; (",
- 167, 65, 170, 186, 73, 83, 80, 166, 87, 186, 217, 41, 41>>)),
- ok.
-
-codepoint_to_bytes_test() ->
- %% U+0000 - U+007F - 7 bits
- %% U+0080 - U+07FF - 11 bits
- %% U+0800 - U+FFFF - 16 bits (excluding UTC-16 surrogate code points)
- %% U+10000 - U+10FFFF - 21 bits
- ?assertEqual(
- <<"a">>,
- codepoint_to_bytes($a)),
- ?assertEqual(
- <<16#c2, 16#80>>,
- codepoint_to_bytes(16#80)),
- ?assertEqual(
- <<16#df, 16#bf>>,
- codepoint_to_bytes(16#07ff)),
- ?assertEqual(
- <<16#ef, 16#bf, 16#bf>>,
- codepoint_to_bytes(16#ffff)),
- ?assertEqual(
- <<16#f4, 16#8f, 16#bf, 16#bf>>,
- codepoint_to_bytes(16#10ffff)),
- ok.
-
-bytes_foldl_test() ->
- ?assertEqual(
- <<"abc">>,
- bytes_foldl(fun (B, Acc) -> <<Acc/binary, B/binary>> end, <<>>, <<"abc">>)),
- ?assertEqual(
- <<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>,
- bytes_foldl(fun (B, Acc) -> <<Acc/binary, B/binary>> end, <<>>,
- <<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>)),
- ok.
-
-bytes_to_codepoints_test() ->
- ?assertEqual(
- "abc" ++ [16#2603, 16#4e2d, 16#85, 16#10ffff],
- bytes_to_codepoints(<<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>)),
- ok.
-
-codepoint_foldl_test() ->
- ?assertEqual(
- "cba",
- codepoint_foldl(fun (C, Acc) -> [C | Acc] end, [], <<"abc">>)),
- ?assertEqual(
- [16#10ffff, 16#85, 16#4e2d, 16#2603 | "cba"],
- codepoint_foldl(fun (C, Acc) -> [C | Acc] end, [],
- <<"abc", 226, 152, 131, 228, 184, 173, 194, 133, 244,143,191,191>>)),
- ok.
-
-len_test() ->
- ?assertEqual(
- 29,
- len(<<"unicode snowman for you: ", 226, 152, 131, 228, 184, 173, 194, 133, 244, 143, 191, 191>>)),
- ok.
-
-codepoints_to_bytes_test() ->
- ?assertEqual(
- iolist_to_binary(lists:map(fun codepoint_to_bytes/1, lists:seq(1, 1000))),
- codepoints_to_bytes(lists:seq(1, 1000))),
- ok.
-
-valid_utf8_bytes_test() ->
- ?assertEqual(
- <<"invalid U+11ffff: ">>,
- valid_utf8_bytes(<<"invalid U+11ffff: ", 244, 159, 191, 191>>)),
- ?assertEqual(
- <<"U+10ffff: ", 244, 143, 191, 191>>,
- valid_utf8_bytes(<<"U+10ffff: ", 244, 143, 191, 191>>)),
- ?assertEqual(
- <<"overlong 2-byte encoding (a): ">>,
- valid_utf8_bytes(<<"overlong 2-byte encoding (a): ", 2#11000001, 2#10100001>>)),
- ?assertEqual(
- <<"overlong 2-byte encoding (!): ">>,
- valid_utf8_bytes(<<"overlong 2-byte encoding (!): ", 2#11000000, 2#10100001>>)),
- ?assertEqual(
- <<"mu: ", 194, 181>>,
- valid_utf8_bytes(<<"mu: ", 194, 181>>)),
- ?assertEqual(
- <<"bad coding bytes: ">>,
- valid_utf8_bytes(<<"bad coding bytes: ", 2#10011111, 2#10111111, 2#11111111>>)),
- ?assertEqual(
- <<"low surrogate (unpaired): ">>,
- valid_utf8_bytes(<<"low surrogate (unpaired): ", 237, 176, 128>>)),
- ?assertEqual(
- <<"high surrogate (unpaired): ">>,
- valid_utf8_bytes(<<"high surrogate (unpaired): ", 237, 191, 191>>)),
- ?assertEqual(
- <<"unicode snowman for you: ", 226, 152, 131>>,
- valid_utf8_bytes(<<"unicode snowman for you: ", 226, 152, 131>>)),
- ?assertEqual(
- <<"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; (AISPW))">>,
- valid_utf8_bytes(<<"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; (",
- 167, 65, 170, 186, 73, 83, 80, 166, 87, 186, 217, 41, 41>>)),
- ok.
-
--endif.