You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@thrift.apache.org by je...@apache.org on 2015/10/14 00:17:54 UTC

thrift git commit: THRIFT-2412 UTF-8 sent by PHP as JSON is not understood Client: D Author: Phongphan Phuttha

Repository: thrift
Updated Branches:
  refs/heads/master 673c44bc1 -> bb272dbcd


THRIFT-2412 UTF-8 sent by PHP as JSON is not understood
Client: D
Author: Phongphan Phuttha <ph...@acm.org>

This closes #650


Project: http://git-wip-us.apache.org/repos/asf/thrift/repo
Commit: http://git-wip-us.apache.org/repos/asf/thrift/commit/bb272dbc
Tree: http://git-wip-us.apache.org/repos/asf/thrift/tree/bb272dbc
Diff: http://git-wip-us.apache.org/repos/asf/thrift/diff/bb272dbc

Branch: refs/heads/master
Commit: bb272dbcd5956f727a604f9643bc87b8fd5e1ed4
Parents: 673c44b
Author: Jens Geyer <je...@apache.org>
Authored: Wed Oct 14 00:17:28 2015 +0200
Committer: Jens Geyer <je...@apache.org>
Committed: Wed Oct 14 00:17:28 2015 +0200

----------------------------------------------------------------------
 lib/d/src/thrift/protocol/json.d | 64 ++++++++++++++++++++++++++++++++---
 1 file changed, 59 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/thrift/blob/bb272dbc/lib/d/src/thrift/protocol/json.d
----------------------------------------------------------------------
diff --git a/lib/d/src/thrift/protocol/json.d b/lib/d/src/thrift/protocol/json.d
index ed8f9c0..223d3a3 100644
--- a/lib/d/src/thrift/protocol/json.d
+++ b/lib/d/src/thrift/protocol/json.d
@@ -26,6 +26,7 @@ import std.range;
 import std.string : format;
 import std.traits : isIntegral;
 import std.typetuple : allSatisfy, TypeTuple;
+import std.utf : toUTF8;
 import thrift.protocol.base;
 import thrift.transport.base;
 
@@ -492,12 +493,15 @@ private:
     return readSyntaxChar(reader_, ch);
   }
 
-  ubyte readJsonEscapeChar() {
-    readJsonSyntaxChar(ZERO_CHAR);
-    readJsonSyntaxChar(ZERO_CHAR);
+  wchar readJsonEscapeChar() {
     auto a = reader_.read();
     auto b = reader_.read();
-    return cast(ubyte)((hexVal(a[0]) << 4) + hexVal(b[0]));
+    auto c = reader_.read();
+    auto d = reader_.read();
+    return cast(ushort)(
+          (hexVal(a[0]) << 12) + (hexVal(b[0]) << 8) +
+          (hexVal(c[0]) << 4) + hexVal(d[0])
+        );
   }
 
   string readJsonString(bool skipContext = false) {
@@ -506,6 +510,7 @@ private:
     readJsonSyntaxChar(STRING_DELIMITER);
     auto buffer = appender!string();
 
+    wchar[] wchs;
     int bytesRead;
     while (true) {
       auto ch = reader_.read();
@@ -521,7 +526,18 @@ private:
       if (ch == BACKSLASH) {
         ch = reader_.read();
         if (ch == ESCAPE_CHAR) {
-          ch = readJsonEscapeChar();
+          auto wch = readJsonEscapeChar();
+          if (wch >= 0xD800 && wch <= 0xDBFF) {
+            wchs ~= wch;
+          } else if (wch >= 0xDC00 && wch <= 0xDFFF && wchs.length == 0) {
+            throw new TProtocolException("Missing UTF-16 high surrogate.",
+                                         TProtocolException.Type.INVALID_DATA);
+          } else {
+            wchs ~= wch;
+            buffer.put(wchs.toUTF8);
+            wchs = [];
+          }
+          continue;
         } else {
           auto pos = countUntil(kEscapeChars[], ch[0]);
           if (pos == -1) {
@@ -531,9 +547,17 @@ private:
           ch = kEscapeCharVals[pos];
         }
       }
+      if (wchs.length != 0) {
+        throw new TProtocolException("Missing UTF-16 low surrogate.",
+                                     TProtocolException.Type.INVALID_DATA);
+      }
       buffer.put(ch[0]);
     }
 
+    if (wchs.length != 0) {
+      throw new TProtocolException("Missing UTF-16 low surrogate.",
+                                   TProtocolException.Type.INVALID_DATA);
+    }
     return buffer.data;
   }
 
@@ -772,6 +796,36 @@ unittest {
 }
 
 unittest {
+  import std.exception;
+  import thrift.transport.memory;
+
+  auto buf = new TMemoryBuffer(cast(ubyte[])"\"\\u0e01 \\ud835\\udd3e\"");
+  auto json = tJsonProtocol(buf);
+  auto str = json.readString();
+  enforce(str == "ก 𝔾");
+}
+
+unittest {
+  // Thrown if low surrogate is missing.
+  import std.exception;
+  import thrift.transport.memory;
+
+  auto buf = new TMemoryBuffer(cast(ubyte[])"\"\\u0e01 \\ud835\"");
+  auto json = tJsonProtocol(buf);
+  assertThrown!TProtocolException(json.readString());
+}
+
+unittest {
+  // Thrown if high surrogate is missing.
+  import std.exception;
+  import thrift.transport.memory;
+
+  auto buf = new TMemoryBuffer(cast(ubyte[])"\"\\u0e01 \\udd3e\"");
+  auto json = tJsonProtocol(buf);
+  assertThrown!TProtocolException(json.readString());
+}
+
+unittest {
   import thrift.internal.test.protocol;
   testContainerSizeLimit!(TJsonProtocol!())();
   testStringSizeLimit!(TJsonProtocol!())();