You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/01/10 11:43:12 UTC

[doris] branch master updated: [ehancement](nereids) let parser support utf8 identifier (#15721)

This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7767931aca [ehancement](nereids) let parser support utf8 identifier (#15721)
7767931aca is described below

commit 7767931aca13d03a6c9bd002d68ad3101afb0eb7
Author: AKIRA <33...@users.noreply.github.com>
AuthorDate: Tue Jan 10 19:43:04 2023 +0800

    [ehancement](nereids) let parser support utf8 identifier (#15721)
    
    After this PR, below SQL could be parsed well too
    - SELECT k1 AS 测试 FROM  test;
    - SELECT k1 AS テスト FROM test;
---
 .../antlr4/org/apache/doris/nereids/DorisLexer.g4  |  5 +++-
 .../data/nereids_syntax_p0/utf8_id_test.out        | 16 ++++++++++
 .../suites/nereids_syntax_p0/utf8_id_test.groovy   | 34 ++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index b4c7eb18a3..69683b982b 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
@@ -484,7 +484,10 @@ fragment DIGIT
     ;
 
 fragment LETTER
-    : [A-Z]
+    : [a-zA-Z$_] // these are the "java letters" below 0x7F
+    |   // covers all characters above 0x7F which are not a surrogate
+    ~[\u0000-\u007F\uD800-\uDBFF]
+     {Character.isJavaIdentifierStart(_input.LA(-1))}?
     ;
 
 SIMPLE_COMMENT
diff --git a/regression-test/data/nereids_syntax_p0/utf8_id_test.out b/regression-test/data/nereids_syntax_p0/utf8_id_test.out
new file mode 100644
index 0000000000..44042ab70e
--- /dev/null
+++ b/regression-test/data/nereids_syntax_p0/utf8_id_test.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql_1 --
+1
+2
+3
+
+-- !sql_2 --
+1
+2
+3
+
+-- !sql_3 --
+1
+2
+3
+
diff --git a/regression-test/suites/nereids_syntax_p0/utf8_id_test.groovy b/regression-test/suites/nereids_syntax_p0/utf8_id_test.groovy
new file mode 100644
index 0000000000..5134513b8b
--- /dev/null
+++ b/regression-test/suites/nereids_syntax_p0/utf8_id_test.groovy
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_nereids_utf8_operation") {
+    sql "use test_query_db"
+    sql "set enable_nereids_planner=true"
+    sql "set enable_fallback_to_original_planner=false"
+
+    order_qt_sql_1 """
+        SELECT k1 AS 测试 FROM  test;
+    """
+
+    order_qt_sql_2 """
+        SELECT k1 AS テスト FROM test;        
+    """
+
+    order_qt_sql_3 """
+        SELECT k1 AS Å FROM test;
+    """
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org