You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/07/28 18:53:43 UTC
[19/25] hive git commit: HIVE-14045: (Vectorization) Add missing case
for BINARY in VectorizationContext.getNormalizedName method (Matt McCline,
reviewed by Jason Dere)
HIVE-14045: (Vectorization) Add missing case for BINARY in VectorizationContext.getNormalizedName method (Matt McCline, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c5b308e8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c5b308e8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c5b308e8
Branch: refs/heads/branch-2.1
Commit: c5b308e8d4726f54655bd7727910732708b1cbab
Parents: 66a2ded
Author: Matt McCline <mm...@hortonworks.com>
Authored: Wed Jun 22 06:47:44 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Thu Jul 28 11:43:27 2016 -0700
----------------------------------------------------------------------
.../exec/vector/VectorExpressionDescriptor.java | 6 +-
.../ql/exec/vector/VectorizationContext.java | 4 +
.../clientpositive/vector_binary_join_groupby.q | 8 +-
.../tez/vector_binary_join_groupby.q.out | 92 +++++++++++++++++++-
.../vector_binary_join_groupby.q.out | 90 ++++++++++++++++++-
5 files changed, 190 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/c5b308e8/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
index 7b3f781..217af3f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
@@ -75,6 +75,7 @@ public class VectorExpressionDescriptor {
TIMESTAMP (0x080),
INTERVAL_YEAR_MONTH (0x100),
INTERVAL_DAY_TIME (0x200),
+ BINARY (0x400),
DATETIME_FAMILY (DATE.value | TIMESTAMP.value),
INTERVAL_FAMILY (INTERVAL_YEAR_MONTH.value | INTERVAL_DAY_TIME.value),
INT_INTERVAL_YEAR_MONTH (INT_FAMILY.value | INTERVAL_YEAR_MONTH.value),
@@ -109,6 +110,8 @@ public class VectorExpressionDescriptor {
return CHAR;
} else if (VectorizationContext.varcharTypePattern.matcher(lower).matches()) {
return VARCHAR;
+ } else if (lower.equals("binary")) {
+ return BINARY;
} else if (VectorizationContext.decimalTypePattern.matcher(lower).matches()) {
return DECIMAL;
} else if (lower.equals("timestamp")) {
@@ -163,7 +166,8 @@ public class VectorExpressionDescriptor {
return "Decimal";
} else if (argType == STRING ||
argType == CHAR ||
- argType == VARCHAR) {
+ argType == VARCHAR ||
+ argType == BINARY) {
return "String";
} else {
return "None";
http://git-wip-us.apache.org/repos/asf/hive/blob/c5b308e8/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index a999625..57873d6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -2297,6 +2297,8 @@ public class VectorizationContext {
case VARCHAR:
//Return the VARCHAR type as is, it includes maximum length.
return hiveTypeName;
+ case BINARY:
+ return "Binary";
case DATE:
return "Date";
case TIMESTAMP:
@@ -2324,6 +2326,8 @@ public class VectorizationContext {
return "Char";
case VARCHAR:
return "VarChar";
+ case BINARY:
+ return "Binary";
case DATE:
return "Date";
case TIMESTAMP:
http://git-wip-us.apache.org/repos/asf/hive/blob/c5b308e8/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q b/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
index 1a9d280..1d99e34 100644
--- a/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
+++ b/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
@@ -45,7 +45,7 @@ SELECT sum(hash(*))
FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin;
SELECT sum(hash(*))
-FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin;
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin;
EXPLAIN
SELECT count(*), bin
@@ -55,3 +55,9 @@ GROUP BY bin;
SELECT count(*), bin
FROM hundredorc
GROUP BY bin;
+
+-- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin).
+
+EXPLAIN
+SELECT t1.i, t1.bin, t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/c5b308e8/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
index 8cbb4b1..6fbbf91 100644
--- a/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
@@ -194,18 +194,17 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: SELECT sum(hash(*))
-FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
PREHOOK: type: QUERY
PREHOOK: Input: default@hundredorc
#### A masked pattern was here ####
POSTHOOK: query: SELECT sum(hash(*))
-FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
POSTHOOK: type: QUERY
POSTHOOK: Input: default@hundredorc
#### A masked pattern was here ####
--107801098240
+-27832781952
PREHOOK: query: EXPLAIN
SELECT count(*), bin
FROM hundredorc
@@ -315,3 +314,88 @@ POSTHOOK: Input: default@hundredorc
3 xylophone band
2 yard duty
3 zync studies
+PREHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin).
+
+EXPLAIN
+SELECT t1.i, t1.bin, t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i
+PREHOOK: type: QUERY
+POSTHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin).
+
+EXPLAIN
+SELECT t1.i, t1.bin, t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i is not null (type: boolean)
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i (type: int), bin (type: binary)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE
+ HybridGraceHashJoin: true
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i is not null (type: boolean)
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i (type: int), bin (type: binary)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: binary)
+ Execution mode: vectorized
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/c5b308e8/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
index d9c027a..dc1fcd7 100644
--- a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
+++ b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
@@ -190,18 +190,17 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
PREHOOK: query: SELECT sum(hash(*))
-FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
PREHOOK: type: QUERY
PREHOOK: Input: default@hundredorc
#### A masked pattern was here ####
POSTHOOK: query: SELECT sum(hash(*))
-FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
POSTHOOK: type: QUERY
POSTHOOK: Input: default@hundredorc
#### A masked pattern was here ####
--107801098240
+-27832781952
PREHOOK: query: EXPLAIN
SELECT count(*), bin
FROM hundredorc
@@ -303,3 +302,86 @@ POSTHOOK: Input: default@hundredorc
3 xylophone band
2 yard duty
3 zync studies
+PREHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin).
+
+EXPLAIN
+SELECT t1.i, t1.bin, t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i
+PREHOOK: type: QUERY
+POSTHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin).
+
+EXPLAIN
+SELECT t1.i, t1.bin, t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-3 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ $hdt$_0:t1
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ $hdt$_0:t1
+ TableScan
+ alias: t1
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i is not null (type: boolean)
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i (type: int), bin (type: binary)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: i is not null (type: boolean)
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: i (type: int), bin (type: binary)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Local Work:
+ Map Reduce Local Work
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+