You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/03/30 07:19:49 UTC
[3/3] hive git commit: HIVE-19032: Vectorization: Disable GROUP BY
aggregations with DISTINCT (Matt McCline, reviewed by Gopal Vijayaraghavan)
HIVE-19032: Vectorization: Disable GROUP BY aggregations with DISTINCT (Matt McCline, reviewed by Gopal Vijayaraghavan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/470a2f99
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/470a2f99
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/470a2f99
Branch: refs/heads/master
Commit: 470a2f998494ef2a78e1424efae70d9eb1b17447
Parents: 1974397
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Mar 30 02:19:24 2018 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Mar 30 02:19:24 2018 -0500
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 4 +
.../hive/ql/optimizer/physical/Vectorizer.java | 6 +-
.../test/queries/clientpositive/groupby_cube1.q | 1 +
.../clientpositive/groupby_grouping_id1.q | 2 +
.../clientpositive/groupby_grouping_id2.q | 1 +
.../clientpositive/groupby_grouping_id3.q | 2 +
.../clientpositive/groupby_grouping_sets1.q | 1 +
.../clientpositive/groupby_grouping_sets2.q | 1 +
.../clientpositive/groupby_grouping_sets3.q | 1 +
.../clientpositive/groupby_grouping_sets4.q | 1 +
.../clientpositive/groupby_grouping_sets5.q | 1 +
.../clientpositive/groupby_grouping_sets6.q | 2 +
.../groupby_grouping_sets_grouping.q | 1 +
.../queries/clientpositive/groupby_rollup1.q | 1 +
.../queries/clientpositive/groupby_sort_11.q | 3 +
.../queries/clientpositive/groupby_sort_8.q | 3 +
.../clientpositive/vector_groupby_cube1.q | 2 +-
.../vector_groupby_grouping_sets_grouping.q | 29 +
.../clientpositive/vector_groupby_sort_11.q | 50 +
.../clientpositive/vector_groupby_sort_8.q | 24 +
.../clientpositive/groupby_sort_11.q.out | 4 +-
.../clientpositive/llap/vector_count.q.out | 30 +-
.../llap/vector_groupby_cube1.q.out | 501 +++++++++-
.../vector_groupby_grouping_sets_grouping.q.out | 463 +++++++++
.../llap/vector_groupby_rollup1.q.out | 94 +-
.../llap/vector_groupby_sort_11.q.out | 996 +++++++++++++++++++
.../llap/vector_groupby_sort_8.q.out | 186 ++++
.../llap/vectorization_limit.q.out | 952 ++++++++++++++++++
.../llap/vectorized_distinct_gby.q.out | 362 +++++++
.../parquet_vectorization_limit.q.out | 34 +-
.../results/clientpositive/vector_count.q.out | 29 +-
.../clientpositive/vectorization_limit.q.out | 34 +-
.../vectorized_distinct_gby.q.out | 69 +-
33 files changed, 3601 insertions(+), 289 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 937ea79..a42ae80 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -352,6 +352,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
vector_decimal_udf.q,\
vector_decimal_udf2.q,\
vector_distinct_2.q,\
+ vectorized_distinct_gby.q,\
vector_elt.q,\
vector_groupby4.q,\
vector_groupby6.q,\
@@ -420,6 +421,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
vectorization_8.q,\
vectorization_9.q,\
vectorization_decimal_date.q,\
+ vectorization_limit.q,\
vectorization_nested_udf.q,\
vectorization_not.q,\
vectorization_part.q,\
@@ -747,6 +749,8 @@ minillaplocal.query.files=\
vector_groupby_grouping_sets_limit.q,\
vector_groupby_grouping_window.q,\
vector_groupby_rollup1.q,\
+ vector_groupby_sort_11.q,\
+ vector_groupby_sort_8.q,\
vector_if_expr_2.q,\
vector_join30.q,\
vector_join_filters.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 33830b3..a822a4b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -2876,13 +2876,13 @@ public class Vectorizer implements PhysicalPlanResolver {
setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported");
return false;
}
- /*
+
// The planner seems to pull this one out.
if (aggDesc.getDistinct()) {
setExpressionIssue("Aggregation Function", "DISTINCT not supported");
- return new Pair<Boolean,Boolean>(false, false);
+ return false;
}
- */
+
ArrayList<ExprNodeDesc> parameters = aggDesc.getParameters();
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_cube1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_cube1.q b/ql/src/test/queries/clientpositive/groupby_cube1.q
index fd2f0de..92456d0 100644
--- a/ql/src/test/queries/clientpositive/groupby_cube1.q
+++ b/ql/src/test/queries/clientpositive/groupby_cube1.q
@@ -1,3 +1,4 @@
+SET hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
set hive.map.aggr=true;
set hive.groupby.skewindata=false;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id1.q b/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
index 9948ce9..7068d21 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
@@ -1,3 +1,5 @@
+SET hive.vectorized.execution.enabled=false;
+
CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
index cc7f9e4..ba755c4 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
@@ -1,4 +1,5 @@
set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
set hive.fetch.task.conversion=none;
set hive.cli.print.header=true;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id3.q b/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
index 955dbe0..29b2f15 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
@@ -1,3 +1,5 @@
+SET hive.vectorized.execution.enabled=false;
+
CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
index c22c97f..86c5246 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
@@ -1,4 +1,5 @@
set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
set hive.fetch.task.conversion=none;
set hive.cli.print.header=true;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
index 90e6325..1934321 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
@@ -1,4 +1,5 @@
set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
set hive.cli.print.header=true;
set hive.mapred.mode=nonstrict;
set hive.new.job.grouping.set.cardinality=2;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
index 16421e8..81267dc 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
@@ -1,4 +1,5 @@
set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
set hive.fetch.task.conversion=none;
set hive.cli.print.header=true;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
index 1074a3b..fa62992 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
@@ -1,4 +1,5 @@
set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
set hive.merge.mapfiles = false;
set hive.merge.mapredfiles = false;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
index 570d464..829a0c2 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
@@ -1,4 +1,5 @@
set hive.mapred.mode=nonstrict;
+SET hive.vectorized.execution.enabled=false;
set hive.merge.mapfiles = false;
set hive.merge.mapredfiles = false;
-- Set merging to false above to make the explain more readable
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q
index e537bce..515dce3 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q
@@ -1,4 +1,6 @@
set hive.mapred.mode=nonstrict;
+SET hive.vectorized.execution.enabled=false;
+
CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
index 7157106..3f437a4 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
@@ -1,3 +1,4 @@
+SET hive.vectorized.execution.enabled=false;
-- SORT_QUERY_RESULTS
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_rollup1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_rollup1.q b/ql/src/test/queries/clientpositive/groupby_rollup1.q
index f30ace0..94f533c 100644
--- a/ql/src/test/queries/clientpositive/groupby_rollup1.q
+++ b/ql/src/test/queries/clientpositive/groupby_rollup1.q
@@ -1,3 +1,4 @@
+SET hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
set hive.map.aggr=true;
set hive.groupby.skewindata=false;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_sort_11.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_11.q b/ql/src/test/queries/clientpositive/groupby_sort_11.q
index 3b6c172..c56789d 100644
--- a/ql/src/test/queries/clientpositive/groupby_sort_11.q
+++ b/ql/src/test/queries/clientpositive/groupby_sort_11.q
@@ -1,7 +1,10 @@
+SET hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
set hive.exec.reducers.max = 1;
set hive.map.groupby.sorted=true;
+-- SORT_QUERY_RESULTS
+
CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_sort_8.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_8.q b/ql/src/test/queries/clientpositive/groupby_sort_8.q
index 3f81a69..2c20b29 100644
--- a/ql/src/test/queries/clientpositive/groupby_sort_8.q
+++ b/ql/src/test/queries/clientpositive/groupby_sort_8.q
@@ -1,7 +1,10 @@
+SET hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
set hive.exec.reducers.max = 10;
set hive.map.groupby.sorted=true;
+-- SORT_QUERY_RESULTS
+
CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/vector_groupby_cube1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_cube1.q b/ql/src/test/queries/clientpositive/vector_groupby_cube1.q
index 1f7b467..d6bab2c 100644
--- a/ql/src/test/queries/clientpositive/vector_groupby_cube1.q
+++ b/ql/src/test/queries/clientpositive/vector_groupby_cube1.q
@@ -1,4 +1,4 @@
-SET hive.vectorized.execution.enabled=false;
+SET hive.vectorized.execution.enabled=true;
SET hive.vectorized.execution.reduce.enabled=true;
set hive.mapred.mode=nonstrict;
set hive.map.aggr=true;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q b/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
index a0e874d..5a5757d 100644
--- a/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
+++ b/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
@@ -99,6 +99,35 @@ having grouping(key) = 1 OR grouping(value) = 1
order by x desc, case when x = 1 then key end;
explain vectorization detail
+select key, value, grouping(key), grouping(value)
+from T1
+group by key, value;
+
+select key, value, grouping(key), grouping(value)
+from T1
+group by key, value;
+
+explain vectorization detail
+select key, value, grouping(value)
+from T1
+group by key, value;
+
+select key, value, grouping(value)
+from T1
+group by key, value;
+
+explain vectorization detail
+select key, value
+from T1
+group by key, value
+having grouping(key) = 0;
+
+select key, value
+from T1
+group by key, value
+having grouping(key) = 0;
+
+explain vectorization detail
select key, value, `grouping__id`, grouping(key, value)
from T1
group by cube(key, value);
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/vector_groupby_sort_11.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_sort_11.q b/ql/src/test/queries/clientpositive/vector_groupby_sort_11.q
new file mode 100644
index 0000000..012fe82
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_groupby_sort_11.q
@@ -0,0 +1,50 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.reduce.enabled=true;
+set hive.mapred.mode=nonstrict;
+set hive.exec.reducers.max = 1;
+set hive.map.groupby.sorted=true;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+-- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 PARTITION (ds='1')
+SELECT * from src where key < 10;
+
+-- The plan is optimized to perform partial aggregation on the mapper
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1;
+select count(distinct key) from T1;
+
+-- The plan is optimized to perform partial aggregation on the mapper
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1;
+select count(distinct key), count(1), count(key), sum(distinct key) from T1;
+
+-- The plan is not changed in the presence of a grouping key
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key;
+select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key;
+
+-- The plan is not changed in the presence of a grouping key
+EXPLAIN VECTORIZATION DETAIL
+select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key;
+select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key;
+
+-- The plan is not changed in the presence of a grouping key expression
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key+key) from T1;
+select count(distinct key+key) from T1;
+
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct 1) from T1;
+select count(distinct 1) from T1;
+
+set hive.map.aggr=false;
+
+-- no plan change if map aggr is turned off
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1;
+select count(distinct key) from T1;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/vector_groupby_sort_8.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_sort_8.q b/ql/src/test/queries/clientpositive/vector_groupby_sort_8.q
new file mode 100644
index 0000000..b0c5699
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_groupby_sort_8.q
@@ -0,0 +1,24 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.reduce.enabled=true;
+set hive.mapred.mode=nonstrict;
+set hive.exec.reducers.max = 10;
+set hive.map.groupby.sorted=true;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1 PARTITION (ds='1');
+
+-- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1';
+
+-- The plan is not converted to a map-side, since although the sorting columns and grouping
+-- columns match, the user is issueing a distinct.
+-- However, after HIVE-4310, partial aggregation is performed on the mapper
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1;
+select count(distinct key) from T1;
+
+DROP TABLE T1;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/groupby_sort_11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_11.q.out b/ql/src/test/results/clientpositive/groupby_sort_11.q.out
index 23c89f9..cbdc526 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_11.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_11.q.out
@@ -211,12 +211,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t1@ds=1
#### A masked pattern was here ####
-1 3 3 0.0
1 1 1 2.0
1 1 1 4.0
-1 3 3 5.0
1 1 1 8.0
1 1 1 9.0
+1 3 3 0.0
+1 3 3 5.0
PREHOOK: query: EXPLAIN select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_count.q.out b/ql/src/test/results/clientpositive/llap/vector_count.q.out
index 400d930..ce35eb8 100644
--- a/ql/src/test/results/clientpositive/llap/vector_count.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_count.q.out
@@ -68,26 +68,12 @@ STAGE PLANS:
TableScan
alias: abcd
Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Select Operator
expressions: a (type: int), b (type: int), c (type: int), d (type: int)
outputColumnNames: a, b, c, d
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(DISTINCT b), count(DISTINCT c), sum(d)
- Group By Vectorization:
- aggregators: VectorUDAFCount(col 1:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:int, col 1:int, col 2:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2]
keys: a (type: int), b (type: int), c (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -96,24 +82,16 @@ STAGE PLANS:
key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
sort order: +++
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: No DISTINCT columns IS false
Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: bigint)
- Execution mode: vectorized, llap
+ Execution mode: llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reducer 2
Execution mode: llap
Reduce Vectorization:
@@ -200,7 +178,7 @@ STAGE PLANS:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: GROUPBY operator: Aggregations with > 1 parameter are not supported count([Column[a], Column[b]])
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
vectorized: false
Reducer 2
Execution mode: llap
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
index f1ed146..3bfbda0 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
@@ -21,8 +21,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -41,12 +41,27 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: key, val
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: key (type: string), val (type: string), 0L (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -55,15 +70,58 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col3
@@ -72,9 +130,16 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -94,8 +159,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val)
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -114,12 +179,27 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: key, val
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: key (type: string), val (type: string), 0L (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -128,15 +208,58 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col3
@@ -145,9 +268,16 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -193,8 +323,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -213,12 +343,27 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -227,15 +372,58 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
@@ -243,9 +431,16 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3]
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -291,8 +486,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -328,8 +523,19 @@ STAGE PLANS:
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -377,8 +583,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -398,12 +604,27 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: key, val
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: key (type: string), val (type: string), 0L (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -412,15 +633,59 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: rand() (type: double)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [4]
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: PARTIALS
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
@@ -429,13 +694,41 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [0, 1]
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Reducer 3
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: FINAL
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: final
outputColumnNames: _col0, _col1, _col3
@@ -444,9 +737,16 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -492,8 +792,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -530,8 +830,19 @@ STAGE PLANS:
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -547,6 +858,11 @@ STAGE PLANS:
value expressions: _col2 (type: bigint)
Reducer 3
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -614,8 +930,8 @@ INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube
INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -643,12 +959,27 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: key, val
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:int) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: key (type: string), val (type: string), 0L (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -657,14 +988,33 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: rand() (type: double)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [4]
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: key, val
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(1)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 1) -> 6:int) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: key (type: string), val (type: string), 0L (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -673,15 +1023,59 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: rand() (type: double)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [4]
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, bigint]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: PARTIALS
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
@@ -690,10 +1084,22 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [0, 1]
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Reducer 3
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -729,6 +1135,11 @@ STAGE PLANS:
value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
Reducer 4
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
@@ -743,10 +1154,31 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 5
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: PARTIALS
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
@@ -755,10 +1187,22 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [0, 1]
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Reducer 6
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
@@ -794,6 +1238,11 @@ STAGE PLANS:
value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
Reducer 7
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)