You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2020/07/17 15:04:27 UTC
[hive] branch master updated: Allow PPD when subject is not a
column with grouping sets present (Zhihua Deng,
reviewed by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 44aa72f Allow PPD when subject is not a column with grouping sets present (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)
44aa72f is described below
commit 44aa72f096639d7b1a52ef18887016af98bd6999
Author: dengzh <de...@gmail.com>
AuthorDate: Fri Jul 17 23:04:17 2020 +0800
Allow PPD when subject is not a column with grouping sets present (Zhihua Deng, reviewed by Jesus Camacho Rodriguez)
Closes apache/hive#1255
---
.../apache/hadoop/hive/ql/ppd/OpProcFactory.java | 44 +-
.../groupby_grouping_sets_pushdown1.q | 54 +-
.../llap/groupby_grouping_sets_pushdown1.q.out | 802 +++++++++++++++++++++
3 files changed, 877 insertions(+), 23 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
index 6c66260..56d3e90 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
@@ -792,40 +792,40 @@ public final class OpProcFactory {
return null;
}
+ private void getGBYKeyPosFromExpr(ExprNodeDesc expr, List<ExprNodeDesc> groupByKeys,
+ List<Integer> gbyKeyPos) {
+ for (int i = 0; i < groupByKeys.size(); i++) {
+ if (groupByKeys.get(i).isSame(expr)) {
+ gbyKeyPos.add(i);
+ return;
+ }
+ }
+ if (expr.getChildren() != null) {
+ for (int i = 0; i < expr.getChildren().size(); i++) {
+ getGBYKeyPosFromExpr(expr.getChildren().get(i), groupByKeys, gbyKeyPos);
+ }
+ }
+ }
+
private boolean canPredPushdown(ExprNodeDesc expr, List<ExprNodeDesc> groupByKeys,
FastBitSet[] bitSets, int groupingSetPosition) {
- List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
- extractCols(expr, columns);
- for (ExprNodeDesc col : columns) {
- int index = groupByKeys.indexOf(col);
- assert index >= 0;
+ List<Integer> gbyKeyPos = new ArrayList<Integer>();
+ getGBYKeyPosFromExpr(expr, groupByKeys, gbyKeyPos);
+ // gbyKeysInExpr can be empty, maybe the expr is a boolean constant, let the expr push down
+ for (Integer pos : gbyKeyPos) {
for (FastBitSet bitset : bitSets) {
int keyPos = bitset.nextClearBit(0);
- while (keyPos < groupingSetPosition && keyPos != index) {
+ while (keyPos < groupingSetPosition && keyPos != pos) {
keyPos = bitset.nextClearBit(keyPos + 1);
}
- // If the column has not be found in grouping sets, the expr should not be pushed down
- if (keyPos != index) {
+ // If the gbyKey has not be found in grouping sets, the expr should not be pushed down
+ if (keyPos != pos) {
return false;
}
}
}
return true;
}
-
- // Extract columns from expression
- private void extractCols(ExprNodeDesc expr, List<ExprNodeDesc> columns) {
- if (expr instanceof ExprNodeColumnDesc) {
- columns.add(expr);
- }
-
- if (expr instanceof ExprNodeGenericFuncDesc) {
- List<ExprNodeDesc> children = expr.getChildren();
- for (int i = 0; i < children.size(); ++i) {
- extractCols(children.get(i), columns);
- }
- }
- }
}
/**
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
index ce2c68c..cbfe58c 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q
@@ -39,4 +39,56 @@ SELECT * FROM (
SELECT a, b, sum(s)
FROM T1
GROUP BY a, b GROUPING SETS ((a), (a, b))
-) t WHERE b IS NULL;
\ No newline at end of file
+) t WHERE b IS NULL;
+
+EXPLAIN EXTENDED SELECT * FROM (
+SELECT upper(a) x, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE x in ("AAA", "BBB");
+
+SELECT * FROM (
+SELECT upper(a) x, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE x in ('AAA', 'BBB');
+
+EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING upper(a) = 'AAA' AND 1 != 1;
+
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING upper(a) = 'AAA' AND 1 != 1;
+
+EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (a, b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100;
+
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (a, b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100;
+
+EXPLAIN EXTENDED SELECT upper(a), b, sum(s)
+FROM T1
+GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100;
+
+SELECT upper(a), b, sum(s)
+FROM T1
+GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100;
+
+EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((b), (a, b))
+HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB';
+
+SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((b), (a, b))
+HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB';
diff --git a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
index 2d71757..81fdd06 100644
--- a/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
+++ b/ql/src/test/results/clientpositive/llap/groupby_grouping_sets_pushdown1.q.out
@@ -643,3 +643,805 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
aaa NULL 123456
+PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT upper(a) x, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE x in ("AAA", "BBB")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT * FROM (
+SELECT upper(a) x, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE x in ("AAA", "BBB")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ filterExpr: (upper(a)) IN ('AAA', 'BBB') (type: boolean)
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (upper(a)) IN ('AAA', 'BBB') (type: boolean)
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(s)
+ keys: a (type: string), b (type: string), 0L (type: bigint)
+ minReductionHashAggr: 0.0
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ null sort order: zzz
+ numBuckets: -1
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col3 (type: bigint)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.comments
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: upper(_col0) (type: string), _col1 (type: string), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2
+ columns.types string:string:bigint
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT * FROM (
+SELECT upper(a) x, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE x in ('AAA', 'BBB')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM (
+SELECT upper(a) x, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+) t WHERE x in ('AAA', 'BBB')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+AAA bbb 123456
+AAA NULL 123456
+PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING upper(a) = 'AAA' AND 1 != 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING upper(a) = 'AAA' AND 1 != 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: false (type: boolean)
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(s)
+ keys: a (type: string), b (type: string), 0L (type: bigint)
+ minReductionHashAggr: 0.0
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ null sort order: zzz
+ numBuckets: -1
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col3 (type: bigint)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Path -> Alias:
+ nullscan://null/default.t1/part_ [t1]
+ Path -> Partition:
+ nullscan://null/default.t1/part_
+ Partition
+ input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe
+ serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.comments
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ nullscan://null/default.t1/part_ [t1]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2
+ columns.types string:string:bigint
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING upper(a) = 'AAA' AND 1 != 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((a), (a, b))
+HAVING upper(a) = 'AAA' AND 1 != 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (a, b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (a, b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Select Operator
+ expressions: a (type: string), b (type: string), s (type: bigint)
+ outputColumnNames: a, b, s
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(s)
+ keys: a (type: string), b (type: string), 0L (type: bigint)
+ minReductionHashAggr: 0.0
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ isSamplingPred: false
+ predicate: (upper(_col0) = 'AAA') (type: boolean)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ null sort order: zzz
+ numBuckets: -1
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col3 (type: bigint)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.comments
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: (_col3 > 100L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2
+ columns.types string:string:bigint
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (a, b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((), (a), (a, b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+aaa bbb 123456
+aaa NULL 123456
+PREHOOK: query: EXPLAIN EXTENDED SELECT upper(a), b, sum(s)
+FROM T1
+GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT upper(a), b, sum(s)
+FROM T1
+GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ filterExpr: (upper(a) = 'AAA') (type: boolean)
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (upper(a) = 'AAA') (type: boolean)
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(s)
+ keys: upper(a) (type: string), b (type: string), 0L (type: bigint)
+ minReductionHashAggr: 0.0
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ null sort order: zzz
+ numBuckets: -1
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col3 (type: bigint)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.comments
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
+ Select Operator
+ expressions: _col1 (type: string), _col3 (type: bigint)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ isSamplingPred: false
+ predicate: (_col3 > 100L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'AAA' (type: string), _col1 (type: string), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2
+ columns.types string:string:bigint
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT upper(a), b, sum(s)
+FROM T1
+GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT upper(a), b, sum(s)
+FROM T1
+GROUP BY upper(a), b GROUPING SETS ((upper(a)), (upper(a), b))
+HAVING upper(a) = 'AAA' AND sum(s) > 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+AAA bbb 123456
+AAA NULL 123456
+PREHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((b), (a, b))
+HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN EXTENDED SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((b), (a, b))
+HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ filterExpr: (upper(b) = 'BBB') (type: boolean)
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate: (upper(b) = 'BBB') (type: boolean)
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(s)
+ keys: a (type: string), b (type: string), 0L (type: bigint)
+ minReductionHashAggr: 0.0
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ isSamplingPred: false
+ predicate: _col0 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ bucketingVersion: 2
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ null sort order: zzz
+ numBuckets: -1
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ tag: -1
+ value expressions: _col3 (type: bigint)
+ auto parallelism: true
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: t1
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucketing_version 2
+ column.name.delimiter ,
+ columns a,b,s
+ columns.comments
+ columns.types string:string:bigint
+#### A masked pattern was here ####
+ name default.t1
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.t1
+ name: default.t1
+ Truncated Path -> Alias:
+ /t1 [t1]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ pruneGroupingSetId: true
+ Filter Operator
+ isSamplingPred: false
+ predicate: (_col3 > 100L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ bucketingVersion: 2
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ bucketing_version -1
+ columns _col0,_col1,_col2
+ columns.types string:string:bigint
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((b), (a, b))
+HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT a, b, sum(s)
+FROM T1
+GROUP BY a, b GROUPING SETS ((b), (a, b))
+HAVING sum(s) > 100 and a IS NOT NULL AND upper(b) = 'BBB'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+aaa bbb 123456