You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2014/02/12 07:21:51 UTC

[19/50] [abbrv] mochiweb commit: updated refs/heads/import-master to 3a54dbf

reject attempts to upload invalid UTF-8 JSON.  Closes COUCHDB-345

This patch requires JSON to be encoded using UTF-8.  In the future we will
accept other encodings.  Thanks Joan Touzet and James Dumay for the bug reports
and Curt Arnold for patches and discussion.


git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@818249 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/commit/bfea5817
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/tree/bfea5817
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/diff/bfea5817

Branch: refs/heads/import-master
Commit: bfea5817c326aae749d95b45ef499929f3ac666d
Parents: 962088b
Author: Adam Kocoloski <ko...@apache.org>
Authored: Wed Sep 23 20:43:35 2009 +0000
Committer: Adam Kocoloski <ko...@apache.org>
Committed: Wed Sep 23 20:43:35 2009 +0000

----------------------------------------------------------------------
 mochijson2.erl | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-mochiweb/blob/bfea5817/mochijson2.erl
----------------------------------------------------------------------
diff --git a/mochijson2.erl b/mochijson2.erl
index 8b6adb1..ee19458 100644
--- a/mochijson2.erl
+++ b/mochijson2.erl
@@ -345,10 +345,24 @@ tokenize_string_fast(B, O) ->
     case B of
         <<_:O/binary, ?Q, _/binary>> ->
             O;
-        <<_:O/binary, C, _/binary>> when C =/= $\\ ->
+        <<_:O/binary, $\\, _/binary>> ->
+            {escape, O};
+        <<_:O/binary, C1, _/binary>> when C1 < 128 ->
             tokenize_string_fast(B, 1 + O);
+        <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
+                C2 >= 128, C2 =< 191 ->
+            tokenize_string_fast(B, 2 + O);
+        <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191 ->
+            tokenize_string_fast(B, 3 + O);
+        <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191,
+                C4 >= 128, C4 =< 191 ->
+            tokenize_string_fast(B, 4 + O);
         _ ->
-            {escape, O}
+            throw(invalid_utf8)
     end.
 
 tokenize_string(B, S=#decoder{offset=O}, Acc) ->