You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/04/27 01:31:45 UTC
[hive] 03/04: HIVE-21633: Estimate range for value generated by
aggregate function in statistics annotation (Jesus Camacho Rodriguez,
reviewed by Vineet Garg)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
commit 4cab80d8a8007c3cc332aa33279d8c7ebde48ed2
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Thu Apr 18 13:32:38 2019 -0700
HIVE-21633: Estimate range for value generated by aggregate function in statistics annotation (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
Close apache/hive#603
---
.../stats/annotation/StatsRulesProcFactory.java | 78 ++++++++++++++++++++++
.../clientpositive/groupby_grouping_window.q.out | 12 ++--
.../clientpositive/llap/subquery_scalar.q.out | 8 +--
.../clientpositive/llap/subquery_select.q.out | 12 ++--
.../llap/vector_groupby_grouping_window.q.out | 12 ++--
.../perf/tez/constraints/query78.q.out | 14 ++--
.../results/clientpositive/perf/tez/query78.q.out | 16 ++---
7 files changed, 115 insertions(+), 37 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 6a1c210..0258e36 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -84,7 +84,12 @@ import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
import org.apache.hadoop.hive.ql.plan.mapper.StatsSource;
import org.apache.hadoop.hive.ql.stats.OperatorStats;
import org.apache.hadoop.hive.ql.stats.StatsUtils;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
@@ -1479,6 +1484,7 @@ public class StatsRulesProcFactory {
// if UDAFs are present, new columns needs to be added
if (!aggDesc.isEmpty() && stats != null) {
List<ColStatistics> aggColStats = Lists.newArrayList();
+ int idx = 0;
for (ColumnInfo ci : rs.getSignature()) {
// if the columns in row schema is not contained in column
@@ -1492,6 +1498,7 @@ public class StatsRulesProcFactory {
cs.setCountDistint(stats.getNumRows());
cs.setNumNulls(0);
cs.setAvgColLen(StatsUtils.getAvgColLenOf(conf, ci.getObjectInspector(), colType));
+ computeAggregateColumnMinMax(cs, conf, aggDesc.get(idx++), colType, parentStats);
aggColStats.add(cs);
}
}
@@ -1524,6 +1531,77 @@ public class StatsRulesProcFactory {
return null;
}
+ /**
+ * If possible, sets the min / max value for the column based on the aggregate function
+ * being calculated and its input.
+ */
+ private static void computeAggregateColumnMinMax(ColStatistics cs, HiveConf conf, AggregationDesc agg, String aggType,
+ Statistics parentStats) throws SemanticException {
+ if (agg.getParameters() != null && agg.getParameters().size() == 1) {
+ ColStatistics parentCS = StatsUtils.getColStatisticsFromExpression(
+ conf, parentStats, agg.getParameters().get(0));
+ if (parentCS != null && parentCS.getRange() != null &&
+ parentCS.getRange().minValue != null && parentCS.getRange().maxValue != null) {
+ long valuesCount = agg.getDistinct() ?
+ parentCS.getCountDistint() :
+ parentStats.getNumRows() - parentCS.getNumNulls();
+ Range range = parentCS.getRange();
+ // Get the aggregate function matching the name in the query.
+ GenericUDAFResolver udaf =
+ FunctionRegistry.getGenericUDAFResolver(agg.getGenericUDAFName());
+ if (udaf instanceof GenericUDAFCount) {
+ cs.setRange(new Range(0, valuesCount));
+ } else if (udaf instanceof GenericUDAFMax || udaf instanceof GenericUDAFMin) {
+ cs.setRange(new Range(range.minValue, range.maxValue));
+ } else if (udaf instanceof GenericUDAFSum) {
+ switch (aggType) {
+ case serdeConstants.TINYINT_TYPE_NAME:
+ case serdeConstants.SMALLINT_TYPE_NAME:
+ case serdeConstants.DATE_TYPE_NAME:
+ case serdeConstants.INT_TYPE_NAME:
+ case serdeConstants.BIGINT_TYPE_NAME:
+ long maxValueLong = range.maxValue.longValue();
+ long minValueLong = range.minValue.longValue();
+ // If min value is less or equal to max value (legal)
+ if (minValueLong <= maxValueLong && minValueLong >= 0) {
+ // min = minValue, max = (minValue + maxValue) * 0.5 * parentNumRows
+ cs.setRange(new Range(
+ minValueLong,
+ StatsUtils.safeMult(
+ StatsUtils.safeMult(StatsUtils.safeAdd(minValueLong, maxValueLong), 0.5),
+ valuesCount)));
+ }
+ break;
+ case serdeConstants.FLOAT_TYPE_NAME:
+ case serdeConstants.DOUBLE_TYPE_NAME:
+ double maxValueDouble = range.maxValue.doubleValue();
+ double minValueDouble = range.minValue.doubleValue();
+ // If min value is less or equal to max value (legal)
+ if (minValueDouble <= maxValueDouble && minValueDouble >= 0) {
+ // min = minValue, max = (minValue + maxValue) * 0.5 * parentNumRows
+ cs.setRange(new Range(
+ minValueDouble,
+ (minValueDouble + maxValueDouble) * 0.5 * valuesCount));
+ }
+ break;
+ default:
+ if (aggType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
+ BigDecimal maxValueBD = new BigDecimal(range.maxValue.toString());
+ BigDecimal minValueBD = new BigDecimal(range.minValue.toString());
+ // If min value is less or equal to max value (legal)
+ if (minValueBD.compareTo(maxValueBD) <= 0 && minValueBD.compareTo(BigDecimal.ZERO) >= 0) {
+ // min = minValue, max = (minValue + maxValue) * 0.5 * parentNumRows
+ cs.setRange(new Range(
+ minValueBD,
+ minValueBD.add(maxValueBD).multiply(new BigDecimal(0.5)).multiply(new BigDecimal(valuesCount))));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
private long getParentNumRows(GroupByOperator op, List<ExprNodeDesc> gbyKeys, HiveConf conf) {
if(gbyKeys == null || gbyKeys.isEmpty()) {
return op.getParentOperators().get(0).getStatistics().getNumRows();
diff --git a/ql/src/test/results/clientpositive/groupby_grouping_window.q.out b/ql/src/test/results/clientpositive/groupby_grouping_window.q.out
index e6cc459..7f687da 100644
--- a/ql/src/test/results/clientpositive/groupby_grouping_window.q.out
+++ b/ql/src/test/results/clientpositive/groupby_grouping_window.q.out
@@ -75,7 +75,7 @@ STAGE PLANS:
pruneGroupingSetId: true
Filter Operator
predicate: (_col3 > 0) (type: boolean)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -91,14 +91,14 @@ STAGE PLANS:
key expressions: _col0 (type: int), _col3 (type: int)
sort order: ++
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: int)
Execution mode: vectorized
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int)
outputColumnNames: _col0, _col2, _col3
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
@@ -119,14 +119,14 @@ STAGE PLANS:
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
index 0d8ff14..5817f98 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
@@ -1500,7 +1500,7 @@ STAGE PLANS:
keys:
0 _col0 (type: string)
1 _col0 (type: string)
- Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
minReductionHashAggr: 0.0
@@ -1537,16 +1537,16 @@ STAGE PLANS:
Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (_col1 > 100) (type: boolean)
- Statistics: Num rows: 4 Data size: 500 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out
index fc70407..d58905c 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out
@@ -5057,16 +5057,16 @@ STAGE PLANS:
Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (_col2 > 0L) (type: boolean)
- Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -5534,16 +5534,16 @@ STAGE PLANS:
Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (_col2 > 0L) (type: boolean)
- Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col1 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 6
Execution mode: llap
Reduce Operator Tree:
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out
index 3a9ea79..5e391bf 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out
@@ -151,7 +151,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterLongColGreaterLongScalar(col 2:int, val 0)
predicate: (_col3 > 0) (type: boolean)
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col3 (type: int)
sort order: ++
@@ -163,7 +163,7 @@ STAGE PLANS:
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
partitionColumns: 0:int
valueColumns: 1:int
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: int)
Reducer 3
Execution mode: vectorized, llap
@@ -188,7 +188,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 2, 1]
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
Input definition
@@ -222,7 +222,7 @@ STAGE PLANS:
outputTypes: [int, int, int, int]
partitionExpressions: [col 0:int]
streamingColumns: [3]
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
@@ -230,13 +230,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 2, 1, 3]
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query78.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query78.q.out
index 888d335..792540f 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query78.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query78.q.out
@@ -158,18 +158,18 @@ Stage-0
File Output Operator [FS_269]
Limit [LIM_268] (rows=100 width=484)
Number of rows:100
- Select Operator [SEL_267] (rows=203549242538 width=483)
+ Select Operator [SEL_267] (rows=1831943309558 width=483)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_73]
- Select Operator [SEL_72] (rows=203549242538 width=719)
+ Select Operator [SEL_72] (rows=1831943309558 width=719)
Output:["_col0","_col1","_col6","_col7","_col8","_col9","_col10","_col11","_col12"]
- Merge Join Operator [MERGEJOIN_220] (rows=203549242538 width=703)
+ Merge Join Operator [MERGEJOIN_220] (rows=1831943309558 width=703)
Conds:RS_69._col0, _col1=RS_266._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col11","_col12","_col13"]
<-Reducer 12 [ONE_TO_ONE_EDGE] vectorized
FORWARD [RS_266]
PartitionCols:_col0, _col1
- Filter Operator [FIL_265] (rows=13513323 width=239)
+ Filter Operator [FIL_265] (rows=40539971 width=239)
predicate:(_col2 > 0L)
Group By Operator [GBY_264] (rows=40539971 width=239)
Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1
@@ -228,7 +228,7 @@ Stage-0
<-Reducer 4 [SIMPLE_EDGE]
SHUFFLE [RS_69]
PartitionCols:_col0, _col1
- Merge Join Operator [MERGEJOIN_219] (rows=7613716536 width=471)
+ Merge Join Operator [MERGEJOIN_219] (rows=22841150061 width=471)
Conds:RS_244._col1=RS_256._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col8"]
<-Reducer 3 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_244]
@@ -287,9 +287,9 @@ Stage-0
<-Reducer 9 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_256]
PartitionCols:_col0
- Select Operator [SEL_255] (rows=33694814 width=235)
+ Select Operator [SEL_255] (rows=101084444 width=235)
Output:["_col0","_col1","_col2","_col3"]
- Filter Operator [FIL_254] (rows=33694814 width=239)
+ Filter Operator [FIL_254] (rows=101084444 width=239)
predicate:(_col2 > 0L)
Select Operator [SEL_253] (rows=101084444 width=239)
Output:["_col1","_col2","_col3","_col4"]
diff --git a/ql/src/test/results/clientpositive/perf/tez/query78.q.out b/ql/src/test/results/clientpositive/perf/tez/query78.q.out
index e66d6f5..9ce2cdb 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query78.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query78.q.out
@@ -158,20 +158,20 @@ Stage-0
File Output Operator [FS_276]
Limit [LIM_275] (rows=100 width=484)
Number of rows:100
- Select Operator [SEL_274] (rows=203549242531 width=483)
+ Select Operator [SEL_274] (rows=1831943309424 width=483)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_76]
- Select Operator [SEL_75] (rows=203549242531 width=719)
+ Select Operator [SEL_75] (rows=1831943309424 width=719)
Output:["_col0","_col1","_col2","_col6","_col7","_col8","_col9","_col10","_col11","_col12"]
- Merge Join Operator [MERGEJOIN_223] (rows=203549242531 width=715)
+ Merge Join Operator [MERGEJOIN_223] (rows=1831943309424 width=715)
Conds:RS_72._col1=RS_273._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col11","_col12","_col13","_col14","_col15"]
<-Reducer 12 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_273]
PartitionCols:_col0
- Select Operator [SEL_272] (rows=33694814 width=247)
+ Select Operator [SEL_272] (rows=101084444 width=247)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
- Filter Operator [FIL_271] (rows=33694814 width=239)
+ Filter Operator [FIL_271] (rows=101084444 width=239)
predicate:(_col2 > 0L)
Select Operator [SEL_270] (rows=101084444 width=239)
Output:["_col1","_col2","_col3","_col4"]
@@ -234,7 +234,7 @@ Stage-0
<-Reducer 4 [SIMPLE_EDGE]
SHUFFLE [RS_72]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_222] (rows=3053485049 width=471)
+ Merge Join Operator [MERGEJOIN_222] (rows=9160455599 width=471)
Conds:RS_248._col1, _col0=RS_260._col1, _col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9"]
<-Reducer 3 [ONE_TO_ONE_EDGE] vectorized
FORWARD [RS_248]
@@ -295,9 +295,9 @@ Stage-0
<-Reducer 9 [ONE_TO_ONE_EDGE] vectorized
FORWARD [RS_260]
PartitionCols:_col1, _col0
- Select Operator [SEL_259] (rows=13513323 width=239)
+ Select Operator [SEL_259] (rows=40539971 width=239)
Output:["_col0","_col1","_col2","_col3","_col4"]
- Filter Operator [FIL_258] (rows=13513323 width=239)
+ Filter Operator [FIL_258] (rows=40539971 width=239)
predicate:(_col2 > 0L)
Group By Operator [GBY_257] (rows=40539971 width=239)
Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1