You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2016/04/20 00:25:45 UTC
[53/58] [abbrv] hive git commit: HIVE-13287: Add logic to estimate
stats for IN operator (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
HIVE-13287: Add logic to estimate stats for IN operator (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3fec161d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3fec161d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3fec161d
Branch: refs/heads/llap
Commit: 3fec161dad40860d493dff203f9da3925226bb8e
Parents: 833a7d1
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Mar 23 21:00:41 2016 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Apr 15 12:01:26 2016 +0100
----------------------------------------------------------------------
.../stats/annotation/StatsRulesProcFactory.java | 107 ++++++++++++++++++-
.../clientpositive/filter_cond_pushdown.q.out | 6 +-
.../groupby_multi_single_reducer3.q.out | 8 +-
.../llap/dynamic_partition_pruning_2.q.out | 30 +++---
ql/src/test/results/clientpositive/pcs.q.out | 6 +-
.../results/clientpositive/perf/query17.q.out | 8 +-
.../results/clientpositive/perf/query29.q.out | 8 +-
.../results/clientpositive/perf/query46.q.out | 10 +-
.../results/clientpositive/perf/query89.q.out | 4 +-
.../results/clientpositive/pointlookup.q.out | 12 +--
.../results/clientpositive/pointlookup2.q.out | 16 +--
.../results/clientpositive/pointlookup3.q.out | 8 +-
.../spark/groupby_multi_single_reducer3.q.out | 8 +-
.../tez/dynamic_partition_pruning_2.q.out | 30 +++---
14 files changed, 180 insertions(+), 81 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index c4fc5ca..320dc10 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -24,6 +24,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Set;
import java.util.Stack;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -53,6 +54,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
@@ -64,6 +66,7 @@ import org.apache.hadoop.hive.ql.plan.Statistics;
import org.apache.hadoop.hive.ql.stats.StatsUtils;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS;
@@ -76,19 +79,24 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
public class StatsRulesProcFactory {
private static final Logger LOG = LoggerFactory.getLogger(StatsRulesProcFactory.class.getName());
private static final boolean isDebugEnabled = LOG.isDebugEnabled();
+
/**
* Collect basic statistics like number of rows, data size and column level statistics from the
* table. Also sets the state of the available statistics. Basic and column statistics can have
@@ -299,7 +307,7 @@ public class StatsRulesProcFactory {
private long evaluateExpression(Statistics stats, ExprNodeDesc pred,
AnnotateStatsProcCtx aspCtx, List<String> neededCols,
- FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException {
+ FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException {
long newNumRows = 0;
Statistics andStats = null;
@@ -338,6 +346,9 @@ public class StatsRulesProcFactory {
evaluatedRowCount = newNumRows;
}
}
+ } else if (udf instanceof GenericUDFIn) {
+ // for IN clause
+ newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, fop);
} else if (udf instanceof GenericUDFOPNot) {
newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop);
} else if (udf instanceof GenericUDFOPNotNull) {
@@ -375,9 +386,97 @@ public class StatsRulesProcFactory {
return newNumRows;
}
+ private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx,
+ List<String> neededCols, FilterOperator fop) throws SemanticException {
+
+ long numRows = stats.getNumRows();
+
+ ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred;
+
+ // 1. It is an IN operator, check if it uses STRUCT
+ List<ExprNodeDesc> children = fd.getChildren();
+ List<ExprNodeDesc> columns = Lists.newArrayList();
+ List<ColStatistics> columnStats = Lists.newArrayList();
+ List<Set<ExprNodeDescEqualityWrapper>> values = Lists.newArrayList();
+ ExprNodeDesc columnsChild = children.get(0);
+ boolean multiColumn;
+ if (columnsChild instanceof ExprNodeGenericFuncDesc &&
+ ((ExprNodeGenericFuncDesc) columnsChild).getGenericUDF() instanceof GenericUDFStruct) {
+ for (int j = 0; j < columnsChild.getChildren().size(); j++) {
+ ExprNodeDesc columnChild = columnsChild.getChildren().get(j);
+ // If column is not column reference , we bail out
+ if (!(columnChild instanceof ExprNodeColumnDesc)) {
+ // Default
+ return numRows / 2;
+ }
+ columns.add(columnChild);
+ final String columnName = ((ExprNodeColumnDesc)columnChild).getColumn();
+ // if column name is not contained in needed column list then it
+ // is a partition column. We do not need to evaluate partition columns
+ // in filter expression since it will be taken care by partition pruner
+ if (neededCols != null && !neededCols.contains(columnName)) {
+ // Default
+ return numRows / 2;
+ }
+ columnStats.add(stats.getColumnStatisticsFromColName(columnName));
+ values.add(Sets.<ExprNodeDescEqualityWrapper>newHashSet());
+ }
+ multiColumn = true;
+ } else {
+ // If column is not column reference , we bail out
+ if (!(columnsChild instanceof ExprNodeColumnDesc)) {
+ // Default
+ return numRows / 2;
+ }
+ columns.add(columnsChild);
+ final String columnName = ((ExprNodeColumnDesc)columnsChild).getColumn();
+ // if column name is not contained in needed column list then it
+ // is a partition column. We do not need to evaluate partition columns
+ // in filter expression since it will be taken care by partition pruner
+ if (neededCols != null && !neededCols.contains(columnName)) {
+ // Default
+ return numRows / 2;
+ }
+ columnStats.add(stats.getColumnStatisticsFromColName(columnName));
+ values.add(Sets.<ExprNodeDescEqualityWrapper>newHashSet());
+ multiColumn = false;
+ }
+
+ // 2. Extract columns and values
+ for (int i = 1; i < children.size(); i++) {
+ ExprNodeDesc child = children.get(i);
+ // If value is not a constant, we bail out
+ if (!(child instanceof ExprNodeConstantDesc)) {
+ // Default
+ return numRows / 2;
+ }
+ if (multiColumn) {
+ ExprNodeConstantDesc constantChild = (ExprNodeConstantDesc) child;
+ List<?> items = (List<?>) constantChild.getWritableObjectInspector().getWritableConstantValue();
+ List<TypeInfo> structTypes = ((StructTypeInfo) constantChild.getTypeInfo()).getAllStructFieldTypeInfos();
+ for (int j = 0; j < structTypes.size(); j++) {
+ ExprNodeConstantDesc constant = new ExprNodeConstantDesc(structTypes.get(j), items.get(j));
+ values.get(j).add(new ExprNodeDescEqualityWrapper(constant));
+ }
+ } else {
+ values.get(0).add(new ExprNodeDescEqualityWrapper(child));
+ }
+ }
+
+ // 3. Calculate IN selectivity
+ float factor = 1;
+ for (int i = 0; i < columnStats.size(); i++) {
+ long dvs = columnStats.get(i) == null ? 0 : columnStats.get(i).getCountDistint();
+ // ( num of distinct vals for col / num of rows ) * num of distinct vals for col in IN clause
+ float columnFactor = dvs == 0 ? 0.5f : ((float)dvs / numRows) * values.get(i).size();
+ factor *= columnFactor;
+ }
+ return Math.round( (double)numRows * factor);
+ }
+
private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred,
AnnotateStatsProcCtx aspCtx, List<String> neededCols, FilterOperator fop)
- throws CloneNotSupportedException {
+ throws CloneNotSupportedException, SemanticException {
long numRows = stats.getNumRows();
@@ -676,7 +775,7 @@ public class StatsRulesProcFactory {
private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
AnnotateStatsProcCtx aspCtx, List<String> neededCols,
- FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException {
+ FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException {
long numRows = stats.getNumRows();
@@ -761,7 +860,7 @@ public class StatsRulesProcFactory {
} else if (udf instanceof GenericUDFOPNull) {
return evaluateColEqualsNullExpr(stats, genFunc);
} else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr
- || udf instanceof GenericUDFOPNot) {
+ || udf instanceof GenericUDFIn || udf instanceof GenericUDFOPNot) {
return evaluateExpression(stats, genFunc, aspCtx, neededCols, fop, evaluatedRowCount);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
index f48a5a4..132b590 100644
--- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
@@ -442,14 +442,14 @@ STAGE PLANS:
Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (((_col1) IN ('2008-04-08', '2008-04-10') and (_col1) IN ('2008-04-08', '2008-04-09') and (_col3 = '2008-04-10')) or (_col3 = '2008-04-08')) (type: boolean)
- Statistics: Num rows: 343 Data size: 3643 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 343 Data size: 3643 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 343 Data size: 3643 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out
index 5362390..c5488de 100644
--- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out
+++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out
@@ -72,7 +72,7 @@ STAGE PLANS:
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean)
- Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: KEY._col0 (type: string)
@@ -93,7 +93,7 @@ STAGE PLANS:
name: default.e1
Filter Operator
predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean)
- Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: KEY._col0 (type: string)
@@ -404,7 +404,7 @@ STAGE PLANS:
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean)
- Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: KEY._col0 (type: string)
@@ -425,7 +425,7 @@ STAGE PLANS:
name: default.e1
Filter Operator
predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean)
- Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: KEY._col0 (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out
index 6f93b6a..db3b85d 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning_2.q.out
@@ -208,31 +208,31 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), label (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Dynamic Partitioning Event Operator
Target column: dim_shops_id (int)
Target Input: agg
Partition key expr: dim_shops_id
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Target Vertex: Map 1
Execution mode: llap
LLAP IO: no inputs
@@ -382,16 +382,16 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), label (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -757,31 +757,31 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), label (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Dynamic Partitioning Event Operator
Target column: dim_shops_id (int)
Target Input: agg
Partition key expr: dim_shops_id
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Target Vertex: Map 1
Execution mode: llap
LLAP IO: no inputs
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/pcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out
index a1382f1..d6d2431 100644
--- a/ql/src/test/results/clientpositive/pcs.q.out
+++ b/ql/src/test/results/clientpositive/pcs.q.out
@@ -921,17 +921,17 @@ STAGE PLANS:
Filter Operator
isSamplingPred: false
predicate: (struct(_col2,_col0,_col8)) IN (const struct('2000-04-08',1,'2000-04-09'), const struct('2000-04-09',2,'2000-04-08')) (type: boolean)
- Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col6 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/perf/query17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query17.q.out b/ql/src/test/results/clientpositive/perf/query17.q.out
index f98ed99..1b5a640 100644
--- a/ql/src/test/results/clientpositive/perf/query17.q.out
+++ b/ql/src/test/results/clientpositive/perf/query17.q.out
@@ -71,9 +71,9 @@ Stage-0
<-Map 15 [SIMPLE_EDGE]
SHUFFLE [RS_37]
PartitionCols:_col0
- Select Operator [SEL_17] (rows=36524 width=1119)
+ Select Operator [SEL_17] (rows=36525 width=1119)
Output:["_col0"]
- Filter Operator [FIL_95] (rows=36524 width=1119)
+ Filter Operator [FIL_95] (rows=36525 width=1119)
predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null)
TableScan [TS_15] (rows=73049 width=1119)
default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"]
@@ -85,9 +85,9 @@ Stage-0
<-Map 14 [SIMPLE_EDGE]
SHUFFLE [RS_34]
PartitionCols:_col0
- Select Operator [SEL_14] (rows=36524 width=1119)
+ Select Operator [SEL_14] (rows=36525 width=1119)
Output:["_col0"]
- Filter Operator [FIL_94] (rows=36524 width=1119)
+ Filter Operator [FIL_94] (rows=36525 width=1119)
predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null)
TableScan [TS_12] (rows=73049 width=1119)
default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"]
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/perf/query29.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query29.q.out b/ql/src/test/results/clientpositive/perf/query29.q.out
index 0f4116a..39aca92 100644
--- a/ql/src/test/results/clientpositive/perf/query29.q.out
+++ b/ql/src/test/results/clientpositive/perf/query29.q.out
@@ -52,7 +52,7 @@ Stage-0
<-Reducer 7 [SIMPLE_EDGE]
SHUFFLE [RS_42]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_102] (rows=44193 width=1119)
+ Merge Join Operator [MERGEJOIN_102] (rows=44194 width=1119)
Conds:RS_39._col3=RS_40._col0(Inner),Output:["_col1","_col5","_col10","_col14","_col24","_col25"]
<-Map 16 [SIMPLE_EDGE]
SHUFFLE [RS_40]
@@ -66,14 +66,14 @@ Stage-0
<-Reducer 6 [SIMPLE_EDGE]
SHUFFLE [RS_39]
PartitionCols:_col3
- Merge Join Operator [MERGEJOIN_101] (rows=40176 width=1119)
+ Merge Join Operator [MERGEJOIN_101] (rows=40177 width=1119)
Conds:RS_36._col11=RS_37._col0(Inner),Output:["_col1","_col3","_col5","_col10","_col14"]
<-Map 15 [SIMPLE_EDGE]
SHUFFLE [RS_37]
PartitionCols:_col0
- Select Operator [SEL_17] (rows=36524 width=1119)
+ Select Operator [SEL_17] (rows=36525 width=1119)
Output:["_col0"]
- Filter Operator [FIL_94] (rows=36524 width=1119)
+ Filter Operator [FIL_94] (rows=36525 width=1119)
predicate:((d_year) IN (2000, 2001, 2002) and d_date_sk is not null)
TableScan [TS_15] (rows=73049 width=1119)
default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/perf/query46.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query46.q.out b/ql/src/test/results/clientpositive/perf/query46.q.out
index 2bd87aa..11804c9 100644
--- a/ql/src/test/results/clientpositive/perf/query46.q.out
+++ b/ql/src/test/results/clientpositive/perf/query46.q.out
@@ -83,7 +83,7 @@ Stage-0
<-Reducer 4 [SIMPLE_EDGE]
SHUFFLE [RS_24]
PartitionCols:_col3
- Merge Join Operator [MERGEJOIN_87] (rows=24305 width=1119)
+ Merge Join Operator [MERGEJOIN_87] (rows=24306 width=1119)
Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7"]
<-Map 12 [SIMPLE_EDGE]
SHUFFLE [RS_22]
@@ -97,7 +97,7 @@ Stage-0
<-Reducer 3 [SIMPLE_EDGE]
SHUFFLE [RS_21]
PartitionCols:_col2
- Merge Join Operator [MERGEJOIN_86] (rows=22096 width=1119)
+ Merge Join Operator [MERGEJOIN_86] (rows=22097 width=1119)
Conds:RS_18._col4=RS_19._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"]
<-Map 11 [SIMPLE_EDGE]
SHUFFLE [RS_19]
@@ -111,7 +111,7 @@ Stage-0
<-Reducer 2 [SIMPLE_EDGE]
SHUFFLE [RS_18]
PartitionCols:_col4
- Merge Join Operator [MERGEJOIN_85] (rows=20088 width=1119)
+ Merge Join Operator [MERGEJOIN_85] (rows=20089 width=1119)
Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
<-Map 1 [SIMPLE_EDGE]
SHUFFLE [RS_15]
@@ -125,9 +125,9 @@ Stage-0
<-Map 10 [SIMPLE_EDGE]
SHUFFLE [RS_16]
PartitionCols:_col0
- Select Operator [SEL_5] (rows=18262 width=1119)
+ Select Operator [SEL_5] (rows=18263 width=1119)
Output:["_col0"]
- Filter Operator [FIL_79] (rows=18262 width=1119)
+ Filter Operator [FIL_79] (rows=18263 width=1119)
predicate:((d_dow) IN (6, 0) and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null)
TableScan [TS_3] (rows=73049 width=1119)
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dow"]
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/perf/query89.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query89.q.out b/ql/src/test/results/clientpositive/perf/query89.q.out
index 75f7385..de91d9b 100644
--- a/ql/src/test/results/clientpositive/perf/query89.q.out
+++ b/ql/src/test/results/clientpositive/perf/query89.q.out
@@ -117,9 +117,9 @@ Stage-0
<-Map 9 [SIMPLE_EDGE]
SHUFFLE [RS_16]
PartitionCols:_col0
- Select Operator [SEL_8] (rows=36524 width=1119)
+ Select Operator [SEL_8] (rows=36525 width=1119)
Output:["_col0","_col2"]
- Filter Operator [FIL_49] (rows=36524 width=1119)
+ Filter Operator [FIL_49] (rows=36525 width=1119)
predicate:((d_year) IN (2000) and d_date_sk is not null)
TableScan [TS_6] (rows=73049 width=1119)
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/pointlookup.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out
index 460cc74..78dd7bc 100644
--- a/ql/src/test/results/clientpositive/pointlookup.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup.q.out
@@ -111,14 +111,14 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -177,14 +177,14 @@ STAGE PLANS:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean)
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/pointlookup2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out
index 869e4cd..6fc6e7f 100644
--- a/ql/src/test/results/clientpositive/pointlookup2.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup2.q.out
@@ -1169,7 +1169,7 @@ STAGE PLANS:
Filter Operator
isSamplingPred: false
predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean)
- Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1197,7 +1197,7 @@ STAGE PLANS:
key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
null sort order: aaa
sort order: +++
- Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
auto parallelism: false
@@ -1231,13 +1231,13 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -1590,7 +1590,7 @@ STAGE PLANS:
Filter Operator
isSamplingPred: false
predicate: (struct(_col0,_col3)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean)
- Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1618,7 +1618,7 @@ STAGE PLANS:
key expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
null sort order: aaa
sort order: +++
- Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col2 (type: string), _col4 (type: int), _col5 (type: string)
auto parallelism: false
@@ -1652,13 +1652,13 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/pointlookup3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out
index e98ba76..2b25b39 100644
--- a/ql/src/test/results/clientpositive/pointlookup3.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup3.q.out
@@ -1337,7 +1337,7 @@ STAGE PLANS:
Filter Operator
isSamplingPred: false
predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean)
- Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
@@ -1365,7 +1365,7 @@ STAGE PLANS:
key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string)
null sort order: aaa
sort order: +++
- Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: string)
auto parallelism: false
@@ -1399,13 +1399,13 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
index 7bb3ff2..982d719 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out
@@ -78,7 +78,7 @@ STAGE PLANS:
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean)
- Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: KEY._col0 (type: string)
@@ -99,7 +99,7 @@ STAGE PLANS:
name: default.e1
Filter Operator
predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean)
- Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: KEY._col0 (type: string)
@@ -422,7 +422,7 @@ STAGE PLANS:
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean)
- Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: KEY._col0 (type: string)
@@ -443,7 +443,7 @@ STAGE PLANS:
name: default.e1
Filter Operator
predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean)
- Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: KEY._col0 (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/3fec161d/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out
index e129795..71b7ee3 100644
--- a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out
+++ b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out
@@ -206,31 +206,31 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), label (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Dynamic Partitioning Event Operator
Target column: dim_shops_id (int)
Target Input: agg
Partition key expr: dim_shops_id
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Target Vertex: Map 1
Reducer 2
Reduce Operator Tree:
@@ -374,16 +374,16 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), label (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
@@ -735,31 +735,31 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: id (type: int), label (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Group By Operator
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Dynamic Partitioning Event Operator
Target column: dim_shops_id (int)
Target Input: agg
Partition key expr: dim_shops_id
- Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Target Vertex: Map 1
Reducer 2
Reduce Operator Tree: