You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2020/07/17 15:12:36 UTC
[hive] 01/02: Revert "Allow PPD when subject is not a column with
grouping sets present (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)"
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
commit 86781643c3d90887c04551aecf5b5d54b86fbe48
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Fri Jul 17 08:11:39 2020 -0700
Revert "Allow PPD when subject is not a column with grouping sets present (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)"
This reverts commit 44aa72f096639d7b1a52ef18887016af98bd6999.
---
.../apache/hadoop/hive/ql/ppd/OpProcFactory.java | 44 +-
.../groupby_grouping_sets_pushdown1.q | 54 +-
.../llap/groupby_grouping_sets_pushdown1.q.out | 802 ---------------------
3 files changed, 23 insertions(+), 877 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
index 56d3e90..6c66260 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
@@ -792,40 +792,40 @@ public final class OpProcFactory {
return null;
}
- private void getGBYKeyPosFromExpr(ExprNodeDesc expr, List<ExprNodeDesc> groupByKeys,
- List<Integer> gbyKeyPos) {
- for (int i = 0; i < groupByKeys.size(); i++) {
- if (groupByKeys.get(i).isSame(expr)) {
- gbyKeyPos.add(i);
- return;
- }
- }
- if (expr.getChildren() != null) {
- for (int i = 0; i < expr.getChildren().size(); i++) {
- getGBYKeyPosFromExpr(expr.getChildren().get(i), groupByKeys, gbyKeyPos);
- }
- }
- }
-
private boolean canPredPushdown(ExprNodeDesc expr, List<ExprNodeDesc> groupByKeys,
FastBitSet[] bitSets, int groupingSetPosition) {
- List<Integer> gbyKeyPos = new ArrayList<Integer>();
- getGBYKeyPosFromExpr(expr, groupByKeys, gbyKeyPos);
- // gbyKeysInExpr can be empty, maybe the expr is a boolean constant, let the expr push down
- for (Integer pos : gbyKeyPos) {
+ List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
+ extractCols(expr, columns);
+ for (ExprNodeDesc col : columns) {
+ int index = groupByKeys.indexOf(col);
+ assert index >= 0;
for (FastBitSet bitset : bitSets) {
int keyPos = bitset.nextClearBit(0);
- while (keyPos < groupingSetPosition && keyPos != pos) {
+ while (keyPos < groupingSetPosition && keyPos != index) {
keyPos = bitset.nextClearBit(keyPos + 1);
}
- // If the gbyKey has not be found in grouping sets, the expr should not be pushed down
- if (keyPos != pos) {
+ // If the column has not be found in grouping sets, the expr should not be pushed down
+ if (keyPos != index) {
return false;
}
}
}
return true;
}
+
+ // Extract columns from expression
+ private void extractCols(ExprNodeDesc expr, List<ExprNodeDesc> columns) {
+ if (expr instanceof ExprNodeColumnDesc) {
+ columns.add(expr);
+ }
+
+ if (expr instanceof ExprNodeGenericFuncDesc) {
+ List<ExprNodeDesc> children = expr.getChildren();
+ for (int i = 0; i < children.size(); ++i) {
+ extractCols(children.get(i), columns);
+ }
+ }
+ }
}
/**
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
index cbfe58c..ce2c68c 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
@@ -39,56 +39,4 @@ SELECT * FROM (
SELECT a, b, sum(s)
FROM T1
GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE b IS NULL;
-
-EXPLAIN EXTENDED SELECT * FROM (
-SELECT upper(a) x, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE x in ("AAA", "BBB");
-
-SELECT * FROM (
-SELECT upper(a) x, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE x in ('AAA', 'BBB');
-
-EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-HAVING upper(a) = 'AAA' AND 1 != 1;
-
-SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-HAVING upper(a) = 'AAA' AND 1 != 1;
-
-EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((), (a), (a, b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100;
-
-SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((), (a), (a, b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100;
-
-EXPLAIN EXTENDED SELECT upper(a), b, sum(s)
-FROM T1
-GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100;
-
-SELECT upper(a), b, sum(s)
-FROM T1
-GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100;
-
-EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((b), (a, b))
-HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB';
-
-SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((b), (a, b))
-HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB';
+) t WHERE b IS NULL;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
index 81fdd06..2d71757 100644
--- a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
+++ b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
@@ -643,805 +643,3 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
aaa NULL 123456
-PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
-SELECT upper(a) x, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE x in ("AAA", "BBB")
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
-SELECT upper(a) x, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE x in ("AAA", "BBB")
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: t1
- filterExpr: (upper(a)) IN ('AAA', 'BBB') (type: boolean)
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (upper(a)) IN ('AAA', 'BBB') (type: boolean)
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(s)
- keys: a (type: string), b (type: string), 0L (type: bigint)
- minReductionHashAggr: 0.0
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- bucketingVersion: 2
- key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- null sort order: zzz
- numBuckets: -1
- sort order: +++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- tag: -1
- value expressions: _col3 (type: bigint)
- auto parallelism: true
- Execution mode: vectorized, llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: t1
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- bucketing_version 2
- column.name.delimiter ,
- columns a,b,s
- columns.types string:string:bigint
-#### A masked pattern was here ####
- name default.t1
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucketing_version 2
- column.name.delimiter ,
- columns a,b,s
- columns.comments
- columns.types string:string:bigint
-#### A masked pattern was here ####
- name default.t1
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.t1
- name: default.t1
- Truncated Path -> Alias:
- /t1 [t1]
- Reducer 2
- Execution mode: vectorized, llap
- Needs Tagging: false
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- pruneGroupingSetId: true
- Select Operator
- expressions: upper(_col0) (type: string), _col1 (type: string), _col3 (type: bigint)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- bucketingVersion: 2
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- bucketing_version -1
- columns _col0,_col1,_col2
- columns.types string:string:bigint
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT * FROM (
-SELECT upper(a) x, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE x in ('AAA', 'BBB')
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT * FROM (
-SELECT upper(a) x, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE x in ('AAA', 'BBB')
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-AAA bbb 123456
-AAA NULL 123456
-PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-HAVING upper(a) = 'AAA' AND 1 != 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-HAVING upper(a) = 'AAA' AND 1 != 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: t1
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: false (type: boolean)
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(s)
- keys: a (type: string), b (type: string), 0L (type: bigint)
- minReductionHashAggr: 0.0
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- bucketingVersion: 2
- key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- null sort order: zzz
- numBuckets: -1
- sort order: +++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- tag: -1
- value expressions: _col3 (type: bigint)
- auto parallelism: true
- Execution mode: vectorized, llap
- LLAP IO: no inputs
- Path -> Alias:
- nullscan://null/default.t1/part_ [t1]
- Path -> Partition:
- nullscan://null/default.t1/part_
- Partition
- input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- bucketing_version 2
- column.name.delimiter ,
- columns a,b,s
- columns.types string:string:bigint
-#### A masked pattern was here ####
- name default.t1
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe
- serde: org.apache.hadoop.hive.serde2.NullStructSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucketing_version 2
- column.name.delimiter ,
- columns a,b,s
- columns.comments
- columns.types string:string:bigint
-#### A masked pattern was here ####
- name default.t1
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.t1
- name: default.t1
- Truncated Path -> Alias:
- nullscan://null/default.t1/part_ [t1]
- Reducer 2
- Execution mode: vectorized, llap
- Needs Tagging: false
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- pruneGroupingSetId: true
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- bucketingVersion: 2
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- bucketing_version -1
- columns _col0,_col1,_col2
- columns.types string:string:bigint
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-HAVING upper(a) = 'AAA' AND 1 != 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((a), (a, b))
-HAVING upper(a) = 'AAA' AND 1 != 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((), (a), (a, b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((), (a), (a, b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: t1
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- GatherStats: false
- Select Operator
- expressions: a (type: string), b (type: string), s (type: bigint)
- outputColumnNames: a, b, s
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(s)
- keys: a (type: string), b (type: string), 0L (type: bigint)
- minReductionHashAggr: 0.0
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- isSamplingPred: false
- predicate: (upper(_col0) = 'AAA') (type: boolean)
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- bucketingVersion: 2
- key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- null sort order: zzz
- numBuckets: -1
- sort order: +++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- tag: -1
- value expressions: _col3 (type: bigint)
- auto parallelism: true
- Execution mode: vectorized, llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: t1
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- bucketing_version 2
- column.name.delimiter ,
- columns a,b,s
- columns.types string:string:bigint
-#### A masked pattern was here ####
- name default.t1
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucketing_version 2
- column.name.delimiter ,
- columns a,b,s
- columns.comments
- columns.types string:string:bigint
-#### A masked pattern was here ####
- name default.t1
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.t1
- name: default.t1
- Truncated Path -> Alias:
- /t1 [t1]
- Reducer 2
- Execution mode: vectorized, llap
- Needs Tagging: false
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- pruneGroupingSetId: true
- Filter Operator
- isSamplingPred: false
- predicate: (_col3 > 100L) (type: boolean)
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- bucketingVersion: 2
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- bucketing_version -1
- columns _col0,_col1,_col2
- columns.types string:string:bigint
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((), (a), (a, b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((), (a), (a, b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-aaa bbb 123456
-aaa NULL 123456
-PREHOOK: query: EXPLAIN EXTENDED SELECT upper(a), b, sum(s)
-FROM T1
-GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN EXTENDED SELECT upper(a), b, sum(s)
-FROM T1
-GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: t1
- filterExpr: (upper(a) = 'AAA') (type: boolean)
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (upper(a) = 'AAA') (type: boolean)
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(s)
- keys: upper(a) (type: string), b (type: string), 0L (type: bigint)
- minReductionHashAggr: 0.0
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- bucketingVersion: 2
- key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- null sort order: zzz
- numBuckets: -1
- sort order: +++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE
- tag: -1
- value expressions: _col3 (type: bigint)
- auto parallelism: true
- Execution mode: vectorized, llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: t1
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- bucketing_version 2
- column.name.delimiter ,
- columns a,b,s
- columns.types string:string:bigint
-#### A masked pattern was here ####
- name default.t1
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucketing_version 2
- column.name.delimiter ,
- columns a,b,s
- columns.comments
- columns.types string:string:bigint
-#### A masked pattern was here ####
- name default.t1
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.t1
- name: default.t1
- Truncated Path -> Alias:
- /t1 [t1]
- Reducer 2
- Execution mode: vectorized, llap
- Needs Tagging: false
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE
- pruneGroupingSetId: true
- Select Operator
- expressions: _col1 (type: string), _col3 (type: bigint)
- outputColumnNames: _col1, _col3
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- isSamplingPred: false
- predicate: (_col3 > 100L) (type: boolean)
- Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'AAA' (type: string), _col1 (type: string), _col3 (type: bigint)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- bucketingVersion: 2
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- bucketing_version -1
- columns _col0,_col1,_col2
- columns.types string:string:bigint
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT upper(a), b, sum(s)
-FROM T1
-GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT upper(a), b, sum(s)
-FROM T1
-GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
-HAVING upper(a) = 'AAA' AND sum(s) > 100
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-AAA bbb 123456
-AAA NULL 123456
-PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((b), (a, b))
-HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((b), (a, b))
-HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: t1
- filterExpr: (upper(b) = 'BBB') (type: boolean)
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- GatherStats: false
- Filter Operator
- isSamplingPred: false
- predicate: (upper(b) = 'BBB') (type: boolean)
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(s)
- keys: a (type: string), b (type: string), 0L (type: bigint)
- minReductionHashAggr: 0.0
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- isSamplingPred: false
- predicate: _col0 is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- bucketingVersion: 2
- key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- null sort order: zzz
- numBuckets: -1
- sort order: +++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- tag: -1
- value expressions: _col3 (type: bigint)
- auto parallelism: true
- Execution mode: vectorized, llap
- LLAP IO: no inputs
- Path -> Alias:
-#### A masked pattern was here ####
- Path -> Partition:
-#### A masked pattern was here ####
- Partition
- base file name: t1
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucket_count -1
- bucketing_version 2
- column.name.delimiter ,
- columns a,b,s
- columns.types string:string:bigint
-#### A masked pattern was here ####
- name default.t1
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- bucketing_version 2
- column.name.delimiter ,
- columns a,b,s
- columns.comments
- columns.types string:string:bigint
-#### A masked pattern was here ####
- name default.t1
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.t1
- name: default.t1
- Truncated Path -> Alias:
- /t1 [t1]
- Reducer 2
- Execution mode: vectorized, llap
- Needs Tagging: false
- Reduce Operator Tree:
- Group By Operator
- aggregations: sum(VALUE._col0)
- keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- pruneGroupingSetId: true
- Filter Operator
- isSamplingPred: false
- predicate: (_col3 > 100L) (type: boolean)
- Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- bucketingVersion: 2
- compressed: false
- GlobalTableId: 0
-#### A masked pattern was here ####
- NumFilesPerFileSink: 1
- Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
-#### A masked pattern was here ####
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- properties:
- bucketing_version -1
- columns _col0,_col1,_col2
- columns.types string:string:bigint
- escape.delim \
- hive.serialization.extend.additional.nesting.levels true
- serialization.escape.crlf true
- serialization.format 1
- serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- TotalFiles: 1
- GatherStats: false
- MultiFileSpray: false
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((b), (a, b))
-HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
-PREHOOK: type: QUERY
-PREHOOK: Input: default@t1
-#### A masked pattern was here ####
-POSTHOOK: query: SELECT a, b, sum(s)
-FROM T1
-GROUP BY a, b GROUPING SETS ((b), (a, b))
-HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@t1
-#### A masked pattern was here ####
-aaa bbb 123456