You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/12/03 05:22:02 UTC
hive git commit: HIVE-11312: ORC format: where clause with CHAR data
type not returning any rows (Prasanth Jayachandran reviewed by Sergey
Shelukhin)
Repository: hive
Updated Branches:
refs/heads/branch-2.0 bbe23403c -> 971046a55
HIVE-11312: ORC format: where clause with CHAR data type not returning any rows (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/971046a5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/971046a5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/971046a5
Branch: refs/heads/branch-2.0
Commit: 971046a5548ca842e5cb6b82e754b6bff3361c90
Parents: bbe2340
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Wed Dec 2 22:21:47 2015 -0600
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Wed Dec 2 22:21:47 2015 -0600
----------------------------------------------------------------------
.../hive/ql/parse/TypeCheckProcFactory.java | 18 +++-
.../clientpositive/orc_ppd_str_conversion.q | 17 ++++
.../results/clientpositive/orc_ppd_char.q.out | 4 +-
.../clientpositive/orc_ppd_str_conversion.q.out | 99 ++++++++++++++++++++
.../clientpositive/parquet_ppd_char.q.out | 10 ++
5 files changed, 144 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/971046a5/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
index 7f5d72a..9d8b352 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
@@ -32,8 +32,7 @@ import java.util.Set;
import java.util.Stack;
import org.apache.commons.lang.StringUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
@@ -82,6 +81,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.NullWritable;
import org.apache.hive.common.util.DateUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
@@ -1052,6 +1053,19 @@ public class TypeCheckProcFactory {
children.set(constIdx, new ExprNodeConstantDesc(value));
}
}
+
+ // if column type is char and constant type is string, then convert the constant to char
+ // type with padded spaces.
+ final PrimitiveTypeInfo colTypeInfo = TypeInfoFactory
+ .getPrimitiveTypeInfo(columnType);
+ if (constType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) &&
+ colTypeInfo instanceof CharTypeInfo) {
+ final Object originalValue = ((ExprNodeConstantDesc) children.get(constIdx)).getValue();
+ final String constValue = originalValue.toString();
+ final int length = TypeInfoUtils.getCharacterLengthForType(colTypeInfo);
+ final HiveChar newValue = new HiveChar(constValue, length);
+ children.set(constIdx, new ExprNodeConstantDesc(colTypeInfo, newValue));
+ }
}
if (genericUDF instanceof GenericUDFOPOr) {
// flatten OR
http://git-wip-us.apache.org/repos/asf/hive/blob/971046a5/ql/src/test/queries/clientpositive/orc_ppd_str_conversion.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/orc_ppd_str_conversion.q b/ql/src/test/queries/clientpositive/orc_ppd_str_conversion.q
new file mode 100644
index 0000000..768e3f7
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/orc_ppd_str_conversion.q
@@ -0,0 +1,17 @@
+set hive.cbo.enable=false;
+
+create table orc_test( col1 varchar(15), col2 char(10)) stored as orc;
+create table text_test( col1 varchar(15), col2 char(10));
+
+insert into orc_test values ('val1', '1');
+insert overwrite table text_test select * from orc_test;
+
+explain select * from text_test where col2='1';
+select * from text_test where col2='1';
+
+set hive.optimize.index.filter=false;
+select * from orc_test where col2='1';
+
+set hive.optimize.index.filter=true;
+select * from orc_test where col2='1';
+
http://git-wip-us.apache.org/repos/asf/hive/blob/971046a5/ql/src/test/results/clientpositive/orc_ppd_char.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_ppd_char.q.out b/ql/src/test/results/clientpositive/orc_ppd_char.q.out
index 79838c1..8af2c5a 100644
--- a/ql/src/test/results/clientpositive/orc_ppd_char.q.out
+++ b/ql/src/test/results/clientpositive/orc_ppd_char.q.out
@@ -100,7 +100,7 @@ POSTHOOK: query: select sum(hash(*)) from newtypesorc where c="apple "
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypesorc
#### A masked pattern was here ####
-NULL
+-252951929000
PREHOOK: query: select sum(hash(*)) from newtypesorc where c="apple "
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypesorc
@@ -109,7 +109,7 @@ POSTHOOK: query: select sum(hash(*)) from newtypesorc where c="apple "
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypesorc
#### A masked pattern was here ####
-NULL
+-252951929000
PREHOOK: query: select sum(hash(*)) from newtypesorc where c in ("apple", "carrot")
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypesorc
http://git-wip-us.apache.org/repos/asf/hive/blob/971046a5/ql/src/test/results/clientpositive/orc_ppd_str_conversion.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/orc_ppd_str_conversion.q.out b/ql/src/test/results/clientpositive/orc_ppd_str_conversion.q.out
new file mode 100644
index 0000000..0e6dc59
--- /dev/null
+++ b/ql/src/test/results/clientpositive/orc_ppd_str_conversion.q.out
@@ -0,0 +1,99 @@
+PREHOOK: query: create table orc_test( col1 varchar(15), col2 char(10)) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_test
+POSTHOOK: query: create table orc_test( col1 varchar(15), col2 char(10)) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_test
+PREHOOK: query: create table text_test( col1 varchar(15), col2 char(10))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@text_test
+POSTHOOK: query: create table text_test( col1 varchar(15), col2 char(10))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@text_test
+PREHOOK: query: insert into orc_test values ('val1', '1')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@orc_test
+POSTHOOK: query: insert into orc_test values ('val1', '1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@orc_test
+POSTHOOK: Lineage: orc_test.col1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: orc_test.col2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert overwrite table text_test select * from orc_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_test
+PREHOOK: Output: default@text_test
+POSTHOOK: query: insert overwrite table text_test select * from orc_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_test
+POSTHOOK: Output: default@text_test
+POSTHOOK: Lineage: text_test.col1 SIMPLE [(orc_test)orc_test.FieldSchema(name:col1, type:varchar(15), comment:null), ]
+POSTHOOK: Lineage: text_test.col2 SIMPLE [(orc_test)orc_test.FieldSchema(name:col2, type:char(10), comment:null), ]
+PREHOOK: query: explain select * from text_test where col2='1'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from text_test where col2='1'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: text_test
+ Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (col2 = '1 ') (type: boolean)
+ Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: col1 (type: varchar(15)), '1 ' (type: char(10))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from text_test where col2='1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@text_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from text_test where col2='1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@text_test
+#### A masked pattern was here ####
+val1 1
+PREHOOK: query: select * from orc_test where col2='1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_test where col2='1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_test
+#### A masked pattern was here ####
+val1 1
+PREHOOK: query: select * from orc_test where col2='1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_test where col2='1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_test
+#### A masked pattern was here ####
+val1 1
http://git-wip-us.apache.org/repos/asf/hive/blob/971046a5/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
index defaa9d..c1565f8 100644
--- a/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
+++ b/ql/src/test/results/clientpositive/parquet_ppd_char.q.out
@@ -142,6 +142,11 @@ POSTHOOK: query: select * from newtypestbl where c="apple "
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
+apple bee 0.220 1970-02-20
+apple bee 0.220 1970-02-20
+apple bee 0.220 1970-02-20
+apple bee 0.220 1970-02-20
+apple bee 0.220 1970-02-20
PREHOOK: query: select * from newtypestbl where c="apple "
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl
@@ -150,6 +155,11 @@ POSTHOOK: query: select * from newtypestbl where c="apple "
POSTHOOK: type: QUERY
POSTHOOK: Input: default@newtypestbl
#### A masked pattern was here ####
+apple bee 0.220 1970-02-20
+apple bee 0.220 1970-02-20
+apple bee 0.220 1970-02-20
+apple bee 0.220 1970-02-20
+apple bee 0.220 1970-02-20
PREHOOK: query: select * from newtypestbl where c in ("apple", "carrot")
PREHOOK: type: QUERY
PREHOOK: Input: default@newtypestbl