You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/07/08 03:57:08 UTC

[15/31] hive git commit: HIVE-11054: Handle varchar/char partition columns in vectorization (Gopal V, reviewed by Gunther Hagleitner)

HIVE-11054: Handle varchar/char partition columns in vectorization (Gopal V, reviewed by Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7b98e37a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7b98e37a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7b98e37a

Branch: refs/heads/llap
Commit: 7b98e37a8ca24321c2f9d74e5ab76b0585ca3a3a
Parents: 196a682
Author: Gopal V <go...@apache.org>
Authored: Mon Jul 6 16:25:35 2015 -0700
Committer: Gopal V <go...@apache.org>
Committed: Mon Jul 6 16:25:35 2015 -0700

----------------------------------------------------------------------
 .../ql/exec/vector/VectorizedRowBatchCtx.java   |  2 +-
 .../clientpositive/vectorization_part_varchar.q |  7 ++
 .../vectorization_part_varchar.q.out            | 72 ++++++++++++++++++++
 3 files changed, 80 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7b98e37a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
index db382cd..82d4a8f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
@@ -591,7 +591,7 @@ public class VectorizedRowBatchCtx {
         case CHAR:
         case VARCHAR: {
           BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex];
-          String sVal = (String) value;
+          String sVal = value.toString();
           if (sVal == null) {
             bcv.noNulls = false;
             bcv.isNull[0] = true;

http://git-wip-us.apache.org/repos/asf/hive/blob/7b98e37a/ql/src/test/queries/clientpositive/vectorization_part_varchar.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorization_part_varchar.q b/ql/src/test/queries/clientpositive/vectorization_part_varchar.q
new file mode 100644
index 0000000..e78f9f5
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vectorization_part_varchar.q
@@ -0,0 +1,7 @@
+SET hive.vectorized.execution.enabled=true;
+CREATE TABLE alltypesorc_part_varchar(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds varchar(4)) STORED AS ORC;
+insert overwrite table alltypesorc_part_varchar partition (ds='2011') select * from alltypesorc limit 100;
+insert overwrite table alltypesorc_part_varchar partition (ds='2012') select * from alltypesorc limit 100;
+
+select count(cdouble), cint from alltypesorc_part_varchar where ds='2011' group by cint limit 10;
+select count(*) from alltypesorc_part_varchar A join alltypesorc_part_varchar B on A.ds=B.ds;

http://git-wip-us.apache.org/repos/asf/hive/blob/7b98e37a/ql/src/test/results/clientpositive/vectorization_part_varchar.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_part_varchar.q.out b/ql/src/test/results/clientpositive/vectorization_part_varchar.q.out
new file mode 100644
index 0000000..c351de7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vectorization_part_varchar.q.out
@@ -0,0 +1,72 @@
+PREHOOK: query: CREATE TABLE alltypesorc_part_varchar(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds varchar(4)) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alltypesorc_part_varchar
+POSTHOOK: query: CREATE TABLE alltypesorc_part_varchar(ctinyint tinyint, csmallint smallint, cint int, cbigint bigint, cfloat float, cdouble double, cstring1 string, cstring2 string, ctimestamp1 timestamp, ctimestamp2 timestamp, cboolean1 boolean, cboolean2 boolean) partitioned by (ds varchar(4)) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypesorc_part_varchar
+PREHOOK: query: insert overwrite table alltypesorc_part_varchar partition (ds='2011') select * from alltypesorc limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@alltypesorc_part_varchar@ds=2011
+POSTHOOK: query: insert overwrite table alltypesorc_part_varchar partition (ds='2011') select * from alltypesorc limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@alltypesorc_part_varchar@ds=2011
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2011).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+PREHOOK: query: insert overwrite table alltypesorc_part_varchar partition (ds='2012') select * from alltypesorc limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@alltypesorc_part_varchar@ds=2012
+POSTHOOK: query: insert overwrite table alltypesorc_part_varchar partition (ds='2012') select * from alltypesorc limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@alltypesorc_part_varchar@ds=2012
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_part_varchar PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
+PREHOOK: query: select count(cdouble), cint from alltypesorc_part_varchar where ds='2011' group by cint limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_part_varchar
+PREHOOK: Input: default@alltypesorc_part_varchar@ds=2011
+#### A masked pattern was here ####
+POSTHOOK: query: select count(cdouble), cint from alltypesorc_part_varchar where ds='2011' group by cint limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_part_varchar
+POSTHOOK: Input: default@alltypesorc_part_varchar@ds=2011
+#### A masked pattern was here ####
+100	528534767
+PREHOOK: query: select count(*) from alltypesorc_part_varchar A join alltypesorc_part_varchar B on A.ds=B.ds
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_part_varchar
+PREHOOK: Input: default@alltypesorc_part_varchar@ds=2011
+PREHOOK: Input: default@alltypesorc_part_varchar@ds=2012
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from alltypesorc_part_varchar A join alltypesorc_part_varchar B on A.ds=B.ds
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_part_varchar
+POSTHOOK: Input: default@alltypesorc_part_varchar@ds=2011
+POSTHOOK: Input: default@alltypesorc_part_varchar@ds=2012
+#### A masked pattern was here ####
+20000