You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@thrift.apache.org by je...@apache.org on 2015/10/29 21:09:51 UTC

thrift git commit: THRIFT-3403 Fixed JSON string reader doesn't recognize UTF-16 surrogate pairs Client: C# Patch: Phongphan Phuttha

Repository: thrift
Updated Branches:
  refs/heads/master 27378fa34 -> 11b515cd2


THRIFT-3403 Fixed JSON string reader doesn't recognize UTF-16 surrogate pairs
Client: C#
Patch: Phongphan Phuttha <ph...@acm.org>

This closes #668


Project: http://git-wip-us.apache.org/repos/asf/thrift/repo
Commit: http://git-wip-us.apache.org/repos/asf/thrift/commit/11b515cd
Tree: http://git-wip-us.apache.org/repos/asf/thrift/tree/11b515cd
Diff: http://git-wip-us.apache.org/repos/asf/thrift/diff/11b515cd

Branch: refs/heads/master
Commit: 11b515cd29292358305ace4ce20d7e626c7e7f42
Parents: 27378fa
Author: Phongphan Phuttha <ph...@acm.org>
Authored: Fri Oct 30 01:31:44 2015 +0700
Committer: Jens Geyer <je...@apache.org>
Committed: Thu Oct 29 22:09:19 2015 +0200

----------------------------------------------------------------------
 lib/csharp/src/Protocol/TJSONProtocol.cs | 37 +++++++++++++++++++++++++--
 lib/csharp/test/JSON/Program.cs          | 13 ++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/thrift/blob/11b515cd/lib/csharp/src/Protocol/TJSONProtocol.cs
----------------------------------------------------------------------
diff --git a/lib/csharp/src/Protocol/TJSONProtocol.cs b/lib/csharp/src/Protocol/TJSONProtocol.cs
index 9d51c74..5e6589e 100644
--- a/lib/csharp/src/Protocol/TJSONProtocol.cs
+++ b/lib/csharp/src/Protocol/TJSONProtocol.cs
@@ -725,6 +725,7 @@ namespace Thrift.Protocol
         private byte[] ReadJSONString(bool skipContext)
         {
             MemoryStream buffer = new MemoryStream();
+            List<char> codeunits = new List<char>();
 
 
             if (!skipContext)
@@ -769,9 +770,41 @@ namespace Thrift.Protocol
                                   (HexVal((byte)tempBuffer[1]) << 8) +
                                   (HexVal((byte)tempBuffer[2]) << 4) +
                                    HexVal(tempBuffer[3]));
-                var tmp = utf8Encoding.GetBytes(new char[] { (char)wch });
-                buffer.Write(tmp, 0, tmp.Length);
+                if (Char.IsHighSurrogate((char)wch))
+                {
+                    if (codeunits.Count > 0)
+                    {
+                        throw new TProtocolException(TProtocolException.INVALID_DATA,
+                                                        "Expected low surrogate char");
+                    }
+                    codeunits.Add((char)wch);
+                }
+                else if (Char.IsLowSurrogate((char)wch))
+                {
+                    if (codeunits.Count == 0)
+                    {
+                        throw new TProtocolException(TProtocolException.INVALID_DATA,
+                                                        "Expected high surrogate char");
+                    }
+                    codeunits.Add((char)wch);
+                    var tmp = utf8Encoding.GetBytes(codeunits.ToArray());
+                    buffer.Write(tmp, 0, tmp.Length);
+                    codeunits.Clear();
+                }
+                else
+                {
+                    var tmp = utf8Encoding.GetBytes(new char[] { (char)wch });
+                    buffer.Write(tmp, 0, tmp.Length);
+                }
             }
+
+
+            if (codeunits.Count > 0)
+            {
+                throw new TProtocolException(TProtocolException.INVALID_DATA,
+                                                "Expected low surrogate char");
+            }
+
             return buffer.ToArray();
         }
 

http://git-wip-us.apache.org/repos/asf/thrift/blob/11b515cd/lib/csharp/test/JSON/Program.cs
----------------------------------------------------------------------
diff --git a/lib/csharp/test/JSON/Program.cs b/lib/csharp/test/JSON/Program.cs
index 9823221..f61388a 100644
--- a/lib/csharp/test/JSON/Program.cs
+++ b/lib/csharp/test/JSON/Program.cs
@@ -34,6 +34,7 @@ namespace JSONTest
         {
             TestThrift2365();  // JSON binary decodes too much data
             TestThrift2336();  // hex encoding using \uXXXX where 0xXXXX > 0xFF
+            TestThrift3403(); // JSON escaped unicode surrogate pair support.
         }
 
 
@@ -78,5 +79,17 @@ namespace JSONTest
             var prot = new TJSONProtocol(trans);
             Debug.Assert(prot.ReadString() == RUSSIAN_TEXT, "reading JSON with hex-encoded chars > 8 bit");
         }
+
+        public static void TestThrift3403()
+        {
+            string GCLEF_TEXT = "\ud834\udd1e";
+            const string GCLEF_JSON = "\"\\ud834\\udd1e\"";
+
+            // parse and check
+            var stm = new MemoryStream(Encoding.UTF8.GetBytes(GCLEF_JSON));
+            var trans = new TStreamTransport(stm, null);
+            var prot = new TJSONProtocol(trans);
+            Debug.Assert(prot.ReadString() == GCLEF_TEXT, "reading JSON with surrogate pair hex-encoded chars");
+        }
     }
 }