You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/11/12 04:59:45 UTC

[21/55] [abbrv] hive git commit: HIVE-12207 : Query fails when non-ascii characters are used in string literals (Aleksei Statkevich via Ashutosh Chauhan)

HIVE-12207 : Query fails when non-ascii characters are used in string literals (Aleksei Statkevich via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a8eb4aef
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a8eb4aef
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a8eb4aef

Branch: refs/heads/spark
Commit: a8eb4aef496568fccbde4898d42c2c14875f7c03
Parents: d06b69f
Author: Aleksei Statkevich <me...@gmail.com>
Authored: Sat Oct 17 23:37:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Thu Nov 5 14:16:31 2015 -0800

----------------------------------------------------------------------
 .../calcite/translator/RexNodeConverter.java    | 13 ++++++++---
 .../queries/clientpositive/non_ascii_literal1.q |  1 +
 .../queries/clientpositive/non_ascii_literal2.q |  5 +++++
 .../clientpositive/non_ascii_literal1.q.out     |  9 ++++++++
 .../clientpositive/non_ascii_literal2.q.out     | 23 ++++++++++++++++++++
 5 files changed, 48 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a8eb4aef/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index d315497..631a4ca 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -38,11 +38,14 @@ import org.apache.calcite.rex.RexBuilder;
 import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.SqlCollation;
 import org.apache.calcite.sql.SqlIntervalQualifier;
 import org.apache.calcite.sql.SqlOperator;
 import org.apache.calcite.sql.fun.SqlCastFunction;
 import org.apache.calcite.sql.parser.SqlParserPos;
 import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.util.ConversionUtil;
+import org.apache.calcite.util.NlsString;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.common.type.Decimal128;
@@ -301,6 +304,10 @@ public class RexNodeConverter {
   private static final BigInteger MIN_LONG_BI = BigInteger.valueOf(Long.MIN_VALUE),
       MAX_LONG_BI = BigInteger.valueOf(Long.MAX_VALUE);
 
+  private static NlsString asUnicodeString(String text) {
+    return new NlsString(text, ConversionUtil.NATIVE_UTF16_CHARSET_NAME, SqlCollation.IMPLICIT);
+  }
+
   protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticException {
     RexBuilder rexBuilder = cluster.getRexBuilder();
     RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
@@ -377,16 +384,16 @@ public class RexNodeConverter {
       if (value instanceof HiveChar) {
         value = ((HiveChar) value).getValue();
       }
-      calciteLiteral = rexBuilder.makeLiteral((String) value);
+      calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
       break;
     case VARCHAR:
       if (value instanceof HiveVarchar) {
         value = ((HiveVarchar) value).getValue();
       }
-      calciteLiteral = rexBuilder.makeLiteral((String) value);
+      calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
       break;
     case STRING:
-      calciteLiteral = rexBuilder.makeLiteral((String) value);
+      calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
       break;
     case DATE:
       Calendar cal = new GregorianCalendar();

http://git-wip-us.apache.org/repos/asf/hive/blob/a8eb4aef/ql/src/test/queries/clientpositive/non_ascii_literal1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/non_ascii_literal1.q b/ql/src/test/queries/clientpositive/non_ascii_literal1.q
new file mode 100644
index 0000000..9573653
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/non_ascii_literal1.q
@@ -0,0 +1 @@
+select concat("Абвгде", "谢谢") from src limit 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/a8eb4aef/ql/src/test/queries/clientpositive/non_ascii_literal2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/non_ascii_literal2.q b/ql/src/test/queries/clientpositive/non_ascii_literal2.q
new file mode 100644
index 0000000..6b25273
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/non_ascii_literal2.q
@@ -0,0 +1,5 @@
+create table non_ascii_literal2 as
+select "谢谢" as col1, "Абвгде" as col2;
+
+select * from non_ascii_literal2
+where col2 = "Абвгде";

http://git-wip-us.apache.org/repos/asf/hive/blob/a8eb4aef/ql/src/test/results/clientpositive/non_ascii_literal1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/non_ascii_literal1.q.out b/ql/src/test/results/clientpositive/non_ascii_literal1.q.out
new file mode 100644
index 0000000..5b28f4e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/non_ascii_literal1.q.out
@@ -0,0 +1,9 @@
+PREHOOK: query: select concat("Абвгде", "谢谢") from src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select concat("Абвгде", "谢谢") from src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+Абвгде谢谢

http://git-wip-us.apache.org/repos/asf/hive/blob/a8eb4aef/ql/src/test/results/clientpositive/non_ascii_literal2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/non_ascii_literal2.q.out b/ql/src/test/results/clientpositive/non_ascii_literal2.q.out
new file mode 100644
index 0000000..7e19143
--- /dev/null
+++ b/ql/src/test/results/clientpositive/non_ascii_literal2.q.out
@@ -0,0 +1,23 @@
+PREHOOK: query: create table non_ascii_literal2 as
+select "谢谢" as col1, "Абвгде" as col2
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@non_ascii_literal2
+POSTHOOK: query: create table non_ascii_literal2 as
+select "谢谢" as col1, "Абвгде" as col2
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@non_ascii_literal2
+PREHOOK: query: select * from non_ascii_literal2
+where col2 = "Абвгде"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@non_ascii_literal2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from non_ascii_literal2
+where col2 = "Абвгде"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@non_ascii_literal2
+#### A masked pattern was here ####
+谢谢	Абвгде