You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/07/28 18:53:43 UTC

[19/25] hive git commit: HIVE-14045: (Vectorization) Add missing case for BINARY in VectorizationContext.getNormalizedName method (Matt McCline, reviewed by Jason Dere)

HIVE-14045: (Vectorization) Add missing case for BINARY in VectorizationContext.getNormalizedName method (Matt McCline, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c5b308e8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c5b308e8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c5b308e8

Branch: refs/heads/branch-2.1
Commit: c5b308e8d4726f54655bd7727910732708b1cbab
Parents: 66a2ded
Author: Matt McCline <mm...@hortonworks.com>
Authored: Wed Jun 22 06:47:44 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Thu Jul 28 11:43:27 2016 -0700

----------------------------------------------------------------------
 .../exec/vector/VectorExpressionDescriptor.java |  6 +-
 .../ql/exec/vector/VectorizationContext.java    |  4 +
 .../clientpositive/vector_binary_join_groupby.q |  8 +-
 .../tez/vector_binary_join_groupby.q.out        | 92 +++++++++++++++++++-
 .../vector_binary_join_groupby.q.out            | 90 ++++++++++++++++++-
 5 files changed, 190 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c5b308e8/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
index 7b3f781..217af3f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java
@@ -75,6 +75,7 @@ public class VectorExpressionDescriptor {
     TIMESTAMP               (0x080),
     INTERVAL_YEAR_MONTH     (0x100),
     INTERVAL_DAY_TIME       (0x200),
+    BINARY                  (0x400),
     DATETIME_FAMILY         (DATE.value | TIMESTAMP.value),
     INTERVAL_FAMILY         (INTERVAL_YEAR_MONTH.value | INTERVAL_DAY_TIME.value),
     INT_INTERVAL_YEAR_MONTH     (INT_FAMILY.value | INTERVAL_YEAR_MONTH.value),
@@ -109,6 +110,8 @@ public class VectorExpressionDescriptor {
         return CHAR;
       } else if (VectorizationContext.varcharTypePattern.matcher(lower).matches()) {
         return VARCHAR;
+      } else if (lower.equals("binary")) {
+        return BINARY;
       } else if (VectorizationContext.decimalTypePattern.matcher(lower).matches()) {
         return DECIMAL;
       } else if (lower.equals("timestamp")) {
@@ -163,7 +166,8 @@ public class VectorExpressionDescriptor {
         return "Decimal";
       } else if (argType == STRING ||
                  argType == CHAR ||
-                 argType == VARCHAR) {
+                 argType == VARCHAR ||
+                 argType == BINARY) {
         return "String";
       } else {
         return "None";

http://git-wip-us.apache.org/repos/asf/hive/blob/c5b308e8/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index a999625..57873d6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -2297,6 +2297,8 @@ public class VectorizationContext {
     case VARCHAR:
       //Return the VARCHAR type as is, it includes maximum length.
       return hiveTypeName;
+    case BINARY:
+      return "Binary";
     case DATE:
       return "Date";
     case TIMESTAMP:
@@ -2324,6 +2326,8 @@ public class VectorizationContext {
       return "Char";
     case VARCHAR:
       return "VarChar";
+    case BINARY:
+      return "Binary";
     case DATE:
       return "Date";
     case TIMESTAMP:

http://git-wip-us.apache.org/repos/asf/hive/blob/c5b308e8/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q b/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
index 1a9d280..1d99e34 100644
--- a/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
+++ b/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q
@@ -45,7 +45,7 @@ SELECT sum(hash(*))
 FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin;
 
 SELECT sum(hash(*))
-FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin;
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin;
 
 EXPLAIN 
 SELECT count(*), bin
@@ -55,3 +55,9 @@ GROUP BY bin;
 SELECT count(*), bin
 FROM hundredorc
 GROUP BY bin;
+
+-- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin).
+
+EXPLAIN
+SELECT t1.i, t1.bin, t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/c5b308e8/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
index 8cbb4b1..6fbbf91 100644
--- a/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out
@@ -194,18 +194,17 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: SELECT sum(hash(*))
-FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
 PREHOOK: type: QUERY
 PREHOOK: Input: default@hundredorc
 #### A masked pattern was here ####
 POSTHOOK: query: SELECT sum(hash(*))
-FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@hundredorc
 #### A masked pattern was here ####
--107801098240
+-27832781952
 PREHOOK: query: EXPLAIN 
 SELECT count(*), bin
 FROM hundredorc
@@ -315,3 +314,88 @@ POSTHOOK: Input: default@hundredorc
 3	xylophone band
 2	yard duty
 3	zync studies
+PREHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin).
+
+EXPLAIN
+SELECT t1.i, t1.bin, t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i
+PREHOOK: type: QUERY
+POSTHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin).
+
+EXPLAIN
+SELECT t1.i, t1.bin, t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: i is not null (type: boolean)
+                    Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: i (type: int), bin (type: binary)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col0, _col1, _col3
+                        input vertices:
+                          1 Map 2
+                        Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE
+                        HybridGraceHashJoin: true
+                        Select Operator
+                          expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE
+                            table:
+                                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: i is not null (type: boolean)
+                    Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: i (type: int), bin (type: binary)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: binary)
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/c5b308e8/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
index d9c027a..dc1fcd7 100644
--- a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
+++ b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out
@@ -190,18 +190,17 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product
 PREHOOK: query: SELECT sum(hash(*))
-FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
 PREHOOK: type: QUERY
 PREHOOK: Input: default@hundredorc
 #### A masked pattern was here ####
 POSTHOOK: query: SELECT sum(hash(*))
-FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@hundredorc
 #### A masked pattern was here ####
--107801098240
+-27832781952
 PREHOOK: query: EXPLAIN 
 SELECT count(*), bin
 FROM hundredorc
@@ -303,3 +302,86 @@ POSTHOOK: Input: default@hundredorc
 3	xylophone band
 2	yard duty
 3	zync studies
+PREHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin).
+
+EXPLAIN
+SELECT t1.i, t1.bin, t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i
+PREHOOK: type: QUERY
+POSTHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin).
+
+EXPLAIN
+SELECT t1.i, t1.bin, t2.bin
+FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:t1 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:t1 
+          TableScan
+            alias: t1
+            Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: i is not null (type: boolean)
+              Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: i (type: int), bin (type: binary)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+                HashTable Sink Operator
+                  keys:
+                    0 _col0 (type: int)
+                    1 _col0 (type: int)
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: i is not null (type: boolean)
+              Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: i (type: int), bin (type: binary)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE
+                Map Join Operator
+                  condition map:
+                       Inner Join 0 to 1
+                  keys:
+                    0 _col0 (type: int)
+                    1 _col0 (type: int)
+                  outputColumnNames: _col0, _col1, _col3
+                  Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+      Local Work:
+        Map Reduce Local Work
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+