You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ko...@apache.org on 2009/09/23 22:43:35 UTC

svn commit: r818249 - in /couchdb/trunk: THANKS share/www/script/test/view_errors.js src/couchdb/couch_httpd.erl src/mochiweb/mochijson2.erl

Author: kocolosk
Date: Wed Sep 23 20:43:35 2009
New Revision: 818249

URL: http://svn.apache.org/viewvc?rev=818249&view=rev
Log:
reject attempts to upload invalid UTF-8 JSON.  Closes COUCHDB-345

This patch requires JSON to be encoded using UTF-8.  In the future we will
accept other encodings.  Thanks Joan Touzet and James Dumay for the bug reports
and Curt Arnold for patches and discussion.

Modified:
    couchdb/trunk/THANKS
    couchdb/trunk/share/www/script/test/view_errors.js
    couchdb/trunk/src/couchdb/couch_httpd.erl
    couchdb/trunk/src/mochiweb/mochijson2.erl

Modified: couchdb/trunk/THANKS
URL: http://svn.apache.org/viewvc/couchdb/trunk/THANKS?rev=818249&r1=818248&r2=818249&view=diff
==============================================================================
--- couchdb/trunk/THANKS (original)
+++ couchdb/trunk/THANKS Wed Sep 23 20:43:35 2009
@@ -35,5 +35,6 @@
  * Sebastian Cohnen <se...@gmx.net>
  * Sven Helmberger <sv...@gmx.de>
  * Dan Walters <da...@danwalters.net>
+ * Curt Arnold <ca...@apache.org>
 
 For a list of authors see the `AUTHORS` file.

Modified: couchdb/trunk/share/www/script/test/view_errors.js
URL: http://svn.apache.org/viewvc/couchdb/trunk/share/www/script/test/view_errors.js?rev=818249&r1=818248&r2=818249&view=diff
==============================================================================
--- couchdb/trunk/share/www/script/test/view_errors.js (original)
+++ couchdb/trunk/share/www/script/test/view_errors.js Wed Sep 23 20:43:35 2009
@@ -55,7 +55,7 @@
           map : "function(doc){emit(doc.integer)}"
         })
       });
-      T(JSON.parse(xhr.responseText).error == "invalid_json");
+      T(JSON.parse(xhr.responseText).error == "bad_request");
 
       // views should ignore Content-Type, like the rest of CouchDB
       var xhr = CouchDB.request("POST", "/test_suite_db/_temp_view", {

Modified: couchdb/trunk/src/couchdb/couch_httpd.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/couchdb/couch_httpd.erl?rev=818249&r1=818248&r2=818249&view=diff
==============================================================================
--- couchdb/trunk/src/couchdb/couch_httpd.erl (original)
+++ couchdb/trunk/src/couchdb/couch_httpd.erl Wed Sep 23 20:43:35 2009
@@ -181,6 +181,9 @@
     catch
         throw:{http_head_abort, Resp0} ->
             {ok, Resp0};
+        throw:{invalid_json, S} ->
+            ?LOG_ERROR("attempted upload of invalid JSON ~s", [S]),
+            send_error(HttpReq, {bad_request, "invalid UTF-8 JSON"});
         exit:normal ->
             exit(normal);
         throw:Error ->

Modified: couchdb/trunk/src/mochiweb/mochijson2.erl
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/mochiweb/mochijson2.erl?rev=818249&r1=818248&r2=818249&view=diff
==============================================================================
--- couchdb/trunk/src/mochiweb/mochijson2.erl (original)
+++ couchdb/trunk/src/mochiweb/mochijson2.erl Wed Sep 23 20:43:35 2009
@@ -345,10 +345,24 @@
     case B of
         <<_:O/binary, ?Q, _/binary>> ->
             O;
-        <<_:O/binary, C, _/binary>> when C =/= $\\ ->
+        <<_:O/binary, $\\, _/binary>> ->
+            {escape, O};
+        <<_:O/binary, C1, _/binary>> when C1 < 128 ->
             tokenize_string_fast(B, 1 + O);
+        <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
+                C2 >= 128, C2 =< 191 ->
+            tokenize_string_fast(B, 2 + O);
+        <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191 ->
+            tokenize_string_fast(B, 3 + O);
+        <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
+                C2 >= 128, C2 =< 191,
+                C3 >= 128, C3 =< 191,
+                C4 >= 128, C4 =< 191 ->
+            tokenize_string_fast(B, 4 + O);
         _ ->
-            {escape, O}
+            throw(invalid_utf8)
     end.
 
 tokenize_string(B, S=#decoder{offset=O}, Acc) ->