You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mh...@apache.org on 2018/03/01 19:24:48 UTC

asterixdb git commit: [ASTERIXDB-2303][API] Fix Supplementary Chars Printing

Repository: asterixdb
Updated Branches:
  refs/heads/master 49a8a3aca -> 503c62200


[ASTERIXDB-2303][API] Fix Supplementary Chars Printing

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- Properly print supplementary chars as utf8
  by converting their java surrogates to a string.
- Add test case.

Change-Id: I59e825c11ff750d5b651fb86712023c52e98367e
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2429
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <je...@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
Reviewed-by: Michael Blow <mb...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/503c6220
Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/503c6220
Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/503c6220

Branch: refs/heads/master
Commit: 503c622005800f47c2182a258c8aa0ee42d3b936
Parents: 49a8a3a
Author: Murtadha Hubail <mh...@apache.org>
Authored: Thu Mar 1 08:04:45 2018 +0300
Committer: Murtadha Hubail <mh...@apache.org>
Committed: Thu Mar 1 11:24:21 2018 -0800

----------------------------------------------------------------------
 .../string/utf8/utf8.1.query.sqlpp              | 19 +++++++++++++++
 .../runtimets/results/string/utf8/utf8.1.adm    |  1 +
 .../resources/runtimets/testsuite_sqlpp.xml     |  5 ++++
 .../data/nontagged/printers/PrintTools.java     | 25 ++++++++++++++++++++
 4 files changed, 50 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/asterixdb/blob/503c6220/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/utf8/utf8.1.query.sqlpp
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/utf8/utf8.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/utf8/utf8.1.query.sqlpp
new file mode 100644
index 0000000..88909ef
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/utf8/utf8.1.query.sqlpp
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+select value "\uD83D\uDE22\uD83D\uDE22\uD83D\uDC89\uD83D\uDC89 = 😢😢💉💉. Coffee ☕‼️😃. حسنا";
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/503c6220/asterixdb/asterix-app/src/test/resources/runtimets/results/string/utf8/utf8.1.adm
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/utf8/utf8.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/utf8/utf8.1.adm
new file mode 100644
index 0000000..89c6334
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/utf8/utf8.1.adm
@@ -0,0 +1 @@
+"😢😢💉💉 = 😢😢💉💉. Coffee ☕‼️😃. حسنا"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/503c6220/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 4265163..9fc0b4b 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -6445,6 +6445,11 @@
         <output-dir compare="Text">varlen-encoding</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="string">
+      <compilation-unit name="utf8">
+        <output-dir compare="Text">utf8</output-dir>
+      </compilation-unit>
+    </test-case>
   </test-group>
   <test-group name="subquery">
     <test-case FilePath="subquery">

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/503c6220/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/PrintTools.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/PrintTools.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/PrintTools.java
index b1039a5..8d05f0f 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/PrintTools.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/PrintTools.java
@@ -357,6 +357,13 @@ public class PrintTools {
                                     break;
                             }
                             break;
+                        case 3:
+                            // special treatment for surrogates
+                            if (Character.isHighSurrogate(c)) {
+                                position += writeSupplementaryChar(os, b, maxPosition, position, c, sz);
+                                sz = 0;
+                            }
+                            break;
                     }
                     while (sz > 0) {
                         os.write(b[position]);
@@ -378,4 +385,22 @@ public class PrintTools {
         os.write(HexPrinter.hex(c & 0x0f, HexPrinter.CASE.LOWER_CASE));
     }
 
+    /**
+     * Writes a supplementary char consisting of high and low surrogates
+     *
+     * @return The length of the surrogates
+     * @throws IOException
+     */
+    private static int writeSupplementaryChar(OutputStream os, byte[] src, int limit, int highSurrogatePos,
+            char highSurrogate, int highSurrogateSize) throws IOException {
+        final int lowSurrogatePos = highSurrogatePos + highSurrogateSize;
+        if (lowSurrogatePos >= limit) {
+            throw new IllegalStateException("malformed utf8 input");
+        }
+        final char lowSurrogate = UTF8StringUtil.charAt(src, lowSurrogatePos);
+        final int lowSurrogateSize = UTF8StringUtil.charSize(src, lowSurrogatePos);
+        os.write(new String(new char[] { highSurrogate, lowSurrogate }).getBytes());
+        return highSurrogateSize + lowSurrogateSize;
+    }
+
 }