You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/12/03 05:22:02 UTC

hive git commit: HIVE-11312: ORC format: where clause with CHAR data type not returning any rows (Prasanth Jayachandran reviewed by Sergey Shelukhin)

Repository: hive
Updated Branches:
  refs/heads/branch-2.0 bbe23403c -> 971046a55


HIVE-11312: ORC format: where clause with CHAR data type not returning any rows (Prasanth Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/971046a5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/971046a5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/971046a5

Branch: refs/heads/branch-2.0
Commit: 971046a5548ca842e5cb6b82e754b6bff3361c90
Parents: bbe2340
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed Dec 2 22:21:47 2015 -0600
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed Dec 2 22:21:47 2015 -0600

----------------------------------------------------------------------
 .../hive/ql/parse/TypeCheckProcFactory.java     | 18 +++-
 .../clientpositive/orc_ppd_str_conversion.q     | 17 ++++
 .../results/clientpositive/orc_ppd_char.q.out   |  4 +-
 .../clientpositive/orc_ppd_str_conversion.q.out | 99 ++++++++++++++++++++
 .../clientpositive/parquet_ppd_char.q.out       | 10 ++
 5 files changed, 144 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/971046a5/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
index 7f5d72a..9d8b352 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
@@ -32,8 +32,7 @@ import java.util.Set;
 import java.util.Stack;
 
 import org.apache.commons.lang.StringUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
 import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
@@ -82,6 +81,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hive.common.util.DateUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import com.google.common.collect.Lists;
 
@@ -1052,6 +1053,19 @@ public class TypeCheckProcFactory {
               children.set(constIdx, new ExprNodeConstantDesc(value));
             }
           }
+
+          // if column type is char and constant type is string, then convert the constant to char
+          // type with padded spaces.
+          final PrimitiveTypeInfo colTypeInfo = TypeInfoFactory
+              .getPrimitiveTypeInfo(columnType);
+          if (constType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) &&
+              colTypeInfo instanceof CharTypeInfo) {
+            final Object originalValue = ((ExprNodeConstantDesc) children.get(constIdx)).getValue();
+            final String constValue = originalValue.toString();
+            final int length = TypeInfoUtils.getCharacterLengthForType(colTypeInfo);
+            final HiveChar newValue = new HiveChar(constValue, length);
+            children.set(constIdx, new ExprNodeConstantDesc(colTypeInfo, newValue));
+          }
         }
         if (genericUDF instanceof GenericUDFOPOr) {
           // flatten OR

http://git-wip-us.apache.org/repos/asf/hive/blob/971046a5/ql/src/test/queries/clientpositive/orc_ppd_str_conversion.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/orc_ppd_str_conversion.q b/ql/src/test/queries/clientpositive/orc_ppd_str_conversion.q
new file mode 100644
index 0000000..768e3f7
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/orc_ppd_str_conversion.q
@@ -0,0 +1,17 @@
+set hive.cbo.enable=false;
+
+create table orc_test( col1 varchar(15), col2 char(10)) stored as orc;
+create table text_test( col1 varchar(15), col2 char(10));
+
+insert into orc_test values ('val1', '1');
+insert overwrite table text_test select * from orc_test;
+
+explain select * from text_test where col2='1';
+select * from text_test where col2='1';
+
+set hive.optimize.index.filter=false;
+select * from orc_test where col2='1';
+
+set hive.optimize.index.filter=true;
+select * from orc_test where col2='1';
+

http://git-wip-us.apache.org/repos/asf/hive/blob/971046a5/ql/src/test/results/clientpositive/orc_ppd_char.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_ppd_char.q.out b/ql/src/test/results/clientpositive/orc_ppd_char.q.out
index 79838c1..8af2c5a 100644
--- a/ql/src/test/results/clientpositive/orc_ppd_char.q.out
+++ b/ql/src/test/results/clientpositive/orc_ppd_char.q.out
@@ -100,7 +100,7 @@ POSTHOOK: query: select sum(hash(*)) from newtypesorc where c="apple "
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@newtypesorc
 #### A masked pattern was here ####
-NULL
+-252951929000
 PREHOOK: query: select sum(hash(*)) from newtypesorc where c="apple "
 PREHOOK: type: QUERY
 PREHOOK: Input: default@newtypesorc
@@ -109,7 +109,7 @@ POSTHOOK: query: select sum(hash(*)) from newtypesorc where c="apple "
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@newtypesorc
 #### A masked pattern was here ####
-NULL
+-252951929000
 PREHOOK: query: select sum(hash(*)) from newtypesorc where c in ("apple", "carrot")
 PREHOOK: type: QUERY
 PREHOOK: Input: default@newtypesorc

http://git-wip-us.apache.org/repos/asf/hive/blob/971046a5/ql/src/test/results/clientpositive/orc_ppd_str_conversion.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_ppd_str_conversion.q.out b/ql/src/test/results/clientpositive/orc_ppd_str_conversion.q.out
new file mode 100644
index 0000000..0e6dc59
--- /dev/null
+++ b/ql/src/test/results/clientpositive/orc_ppd_str_conversion.q.out
@@ -0,0 +1,99 @@
+PREHOOK: query: create table orc_test( col1 varchar(15), col2 char(10)) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_test
+POSTHOOK: query: create table orc_test( col1 varchar(15), col2 char(10)) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_test
+PREHOOK: query: create table text_test( col1 varchar(15), col2 char(10))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@text_test
+POSTHOOK: query: create table text_test( col1 varchar(15), col2 char(10))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@text_test
+PREHOOK: query: insert into orc_test values ('val1', '1')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@orc_test
+POSTHOOK: query: insert into orc_test values ('val1', '1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@orc_test
+POSTHOOK: Lineage: orc_test.col1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: orc_test.col2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert overwrite table text_test select * from orc_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_test
+PREHOOK: Output: default@text_test
+POSTHOOK: query: insert overwrite table text_test select * from orc_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_test
+POSTHOOK: Output: default@text_test
+POSTHOOK: Lineage: text_test.col1 SIMPLE [(orc_test)orc_test.FieldSchema(name:col1, type:varchar(15), comment:null), ]
+POSTHOOK: Lineage: text_test.col2 SIMPLE [(orc_test)orc_test.FieldSchema(name:col2, type:char(10), comment:null), ]
+PREHOOK: query: explain select * from text_test where col2='1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from text_test where col2='1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: text_test
+            Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (col2 = '1         ') (type: boolean)
+              Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: col1 (type: varchar(15)), '1         ' (type: char(10))
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from text_test where col2='1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@text_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from text_test where col2='1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@text_test
+#### A masked pattern was here ####
+val1	1         
+PREHOOK: query: select * from orc_test where col2='1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_test where col2='1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_test
+#### A masked pattern was here ####
+val1	1         
+PREHOOK: query: select * from orc_test where col2='1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_test where col2='1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_test
+#### A masked pattern was here ####
+val1	1         

http://git-wip-us.apache.org/repos/asf/hive/blob/971046a5/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
index defaa9d..c1565f8 100644
--- a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
+++ b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
@@ -142,6 +142,11 @@ POSTHOOK: query: select * from newtypestbl where c="apple "
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@newtypestbl
 #### A masked pattern was here ####
+apple     	bee	0.220	1970-02-20
+apple     	bee	0.220	1970-02-20
+apple     	bee	0.220	1970-02-20
+apple     	bee	0.220	1970-02-20
+apple     	bee	0.220	1970-02-20
 PREHOOK: query: select * from newtypestbl where c="apple "
 PREHOOK: type: QUERY
 PREHOOK: Input: default@newtypestbl
@@ -150,6 +155,11 @@ POSTHOOK: query: select * from newtypestbl where c="apple "
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@newtypestbl
 #### A masked pattern was here ####
+apple     	bee	0.220	1970-02-20
+apple     	bee	0.220	1970-02-20
+apple     	bee	0.220	1970-02-20
+apple     	bee	0.220	1970-02-20
+apple     	bee	0.220	1970-02-20
 PREHOOK: query: select * from newtypestbl where c in ("apple", "carrot")
 PREHOOK: type: QUERY
 PREHOOK: Input: default@newtypestbl