You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@thrift.apache.org by ns...@apache.org on 2015/11/10 16:34:23 UTC

[3/3] thrift git commit: THRIFT-2779: Always write unescaped JSON unicode string. Client: PHP Patch: Phongphan Phuttha

THRIFT-2779: Always write unescaped JSON unicode string.
Client: PHP
Patch: Phongphan Phuttha

This closes #666


Project: http://git-wip-us.apache.org/repos/asf/thrift/repo
Commit: http://git-wip-us.apache.org/repos/asf/thrift/commit/90ea4f64
Tree: http://git-wip-us.apache.org/repos/asf/thrift/tree/90ea4f64
Diff: http://git-wip-us.apache.org/repos/asf/thrift/diff/90ea4f64

Branch: refs/heads/master
Commit: 90ea4f64c124404f346c66de47f84fc9ced63132
Parents: c04fb00
Author: Phongphan Phuttha <ph...@acm.org>
Authored: Fri Oct 30 00:00:10 2015 +0700
Committer: Nobuaki Sukegawa <ns...@apache.org>
Committed: Wed Nov 11 00:32:55 2015 +0900

----------------------------------------------------------------------
 lib/php/lib/Thrift/Protocol/TJSONProtocol.php   | 40 +++++++++++++++++++-
 lib/php/test/Test/Thrift/Fixtures.php           |  3 ++
 .../Test/Thrift/Protocol/TestTJSONProtocol.php  | 21 +++++++++-
 3 files changed, 62 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/thrift/blob/90ea4f64/lib/php/lib/Thrift/Protocol/TJSONProtocol.php
----------------------------------------------------------------------
diff --git a/lib/php/lib/Thrift/Protocol/TJSONProtocol.php b/lib/php/lib/Thrift/Protocol/TJSONProtocol.php
index 6c93b09..6d8e81f 100644
--- a/lib/php/lib/Thrift/Protocol/TJSONProtocol.php
+++ b/lib/php/lib/Thrift/Protocol/TJSONProtocol.php
@@ -215,6 +215,44 @@ class TJSONProtocol extends TProtocol
         return dechex($val);
     }
 
+    private function hasJSONUnescapedUnicode()
+    {
+        if (PHP_MAJOR_VERSION > 5
+            || (PHP_MAJOR_VERSION == 5 && PHP_MINOR_VERSION >= 4))
+            return true;
+
+        return false;
+    }
+
+    private function unescapedUnicode($str)
+    {
+        if ($this->hasJSONUnescapedUnicode()) {
+            return json_encode($str, JSON_UNESCAPED_UNICODE);
+        }
+
+        $json = json_encode($str);
+
+        /*
+         * Unescaped character outside the Basic Multilingual Plane
+         * High surrogate: 0xD800 - 0xDBFF
+         * Low surrogate: 0xDC00 - 0xDFFF
+         */
+        $json = preg_replace_callback('/\\\\u(d[89ab][0-9a-f]{2})\\\\u(d[cdef][0-9a-f]{2})/i',
+            function ($matches) {
+                return mb_convert_encoding(pack('H*', $matches[1].$matches[2]), 'UTF-8', 'UTF-16BE');
+            }, $json);
+
+        /*
+         * Unescaped characters within the Basic Multilingual Plane
+         */
+        $json = preg_replace_callback('/\\\\u([0-9a-f]{4})/i',
+            function ($matches) {
+                return mb_convert_encoding(pack('H*', $matches[1]), 'UTF-8', 'UTF-16BE');
+            }, $json);
+
+        return $json;
+    }
+
     private function writeJSONString($b)
     {
         $this->context_->write();
@@ -223,7 +261,7 @@ class TJSONProtocol extends TProtocol
             $this->trans_->write(self::QUOTE);
         }
 
-        $this->trans_->write(json_encode($b));
+        $this->trans_->write($this->unescapedUnicode($b));
 
         if (is_numeric($b) && $this->context_->escapeNum()) {
             $this->trans_->write(self::QUOTE);

http://git-wip-us.apache.org/repos/asf/thrift/blob/90ea4f64/lib/php/test/Test/Thrift/Fixtures.php
----------------------------------------------------------------------
diff --git a/lib/php/test/Test/Thrift/Fixtures.php b/lib/php/test/Test/Thrift/Fixtures.php
index d9d487f..2c60a08 100644
--- a/lib/php/test/Test/Thrift/Fixtures.php
+++ b/lib/php/test/Test/Thrift/Fixtures.php
@@ -46,6 +46,9 @@ class Fixtures
     self::$testArgs['testString3'] =
       "string that ends in double-backslash \\\\";
 
+    self::$testArgs['testUnicodeStringWithNonBMP'] =
+      "สวัสดี/𝒯";
+
     self::$testArgs['testDouble'] = 3.1415926535898;
 
 	// TODO: add testBinary() call

http://git-wip-us.apache.org/repos/asf/thrift/blob/90ea4f64/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php
----------------------------------------------------------------------
diff --git a/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php b/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php
index 7ba3441..a4ca9d5 100755
--- a/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php
+++ b/lib/php/test/Test/Thrift/Protocol/TestTJSONProtocol.php
@@ -200,7 +200,12 @@ class TestTJSONProtocol extends \PHPUnit_Framework_TestCase
     $actual = $this->transport->read( BUFSIZ );
     $expected = TestTJSONProtocol_Fixtures::$testArgsJSON['testStringMap'];
 
-    $this->assertEquals( $expected, $actual );
+    /*
+     * The $actual returns unescaped string.
+     * It is required to to decode then encode it again
+     * to get the expected escaped unicode.
+     */
+    $this->assertEquals( $expected, json_encode(json_decode($actual)) );
   }
 
   public function testSet_Write()
@@ -308,6 +313,18 @@ class TestTJSONProtocol extends \PHPUnit_Framework_TestCase
     $this->assertEquals( $expected, $actual );
   }
 
+  public function testString4_Write()
+  {
+    $args = new \ThriftTest\ThriftTest_testString_args();
+    $args->thing = Fixtures::$testArgs['testUnicodeStringWithNonBMP'];
+    $args->write( $this->protocol );
+
+    $actual = $this->transport->read( BUFSIZ );
+    $expected = TestTJSONProtocol_Fixtures::$testArgsJSON['testUnicodeStringWithNonBMP'];
+
+    $this->assertEquals( $expected, $actual );
+  }
+
   public function testDouble_Read()
   {
     $this->transport->write(
@@ -528,6 +545,8 @@ class TestTJSONProtocol_Fixtures
 
     self::$testArgsJSON['testString3'] = '{"1":{"str":"string that ends in double-backslash \\\\\\\\"}}';
 
+    self::$testArgsJSON['testUnicodeStringWithNonBMP'] = '{"1":{"str":"สวัสดี\/𝒯"}}';
+
     self::$testArgsJSON['testDouble'] = '{"1":{"dbl":3.1415926535898}}';
 
     self::$testArgsJSON['testByte'] = '{"1":{"i8":1}}';