You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/10/25 11:14:57 UTC
[3/3] hive git commit: HIVE-15029: Add logic to estimate stats for
BETWEEN operator (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
HIVE-15029: Add logic to estimate stats for BETWEEN operator (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e2653db3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e2653db3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e2653db3
Branch: refs/heads/master
Commit: e2653db377ab7fff34563d348364fd0c92f359c6
Parents: 749e831
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Fri Oct 21 12:30:06 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Tue Oct 25 07:13:09 2016 -0400
----------------------------------------------------------------------
.../stats/annotation/StatsRulesProcFactory.java | 34 +++++-
.../clientpositive/llap/explainuser_4.q.out | 78 ++++++------
.../llap/orc_predicate_pushdown.q.out | 20 +--
.../llap/parquet_predicate_pushdown.q.out | 44 +++----
.../llap/tez_dynpart_hashjoin_1.q.out | 120 +++++++++---------
.../llap/tez_vector_dynpart_hashjoin_1.q.out | 122 +++++++++----------
.../llap/vector_between_columns.q.out | 6 +-
.../clientpositive/llap/vector_between_in.q.out | 34 +++---
.../results/clientpositive/perf/query12.q.out | 2 +-
.../results/clientpositive/perf/query13.q.out | 24 ++--
.../results/clientpositive/perf/query20.q.out | 4 +-
.../results/clientpositive/perf/query21.q.out | 8 +-
.../results/clientpositive/perf/query22.q.out | 4 +-
.../results/clientpositive/perf/query25.q.out | 8 +-
.../results/clientpositive/perf/query28.q.out | 36 +++---
.../results/clientpositive/perf/query29.q.out | 4 +-
.../results/clientpositive/perf/query32.q.out | 8 +-
.../results/clientpositive/perf/query34.q.out | 12 +-
.../results/clientpositive/perf/query40.q.out | 8 +-
.../results/clientpositive/perf/query48.q.out | 18 +--
.../results/clientpositive/perf/query51.q.out | 8 +-
.../results/clientpositive/perf/query54.q.out | 4 +-
.../results/clientpositive/perf/query58.q.out | 12 +-
.../results/clientpositive/perf/query64.q.out | 8 +-
.../results/clientpositive/perf/query65.q.out | 8 +-
.../results/clientpositive/perf/query66.q.out | 8 +-
.../results/clientpositive/perf/query67.q.out | 4 +-
.../results/clientpositive/perf/query68.q.out | 4 +-
.../results/clientpositive/perf/query70.q.out | 8 +-
.../results/clientpositive/perf/query73.q.out | 12 +-
.../results/clientpositive/perf/query79.q.out | 4 +-
.../results/clientpositive/perf/query80.q.out | 12 +-
.../results/clientpositive/perf/query82.q.out | 14 +--
.../results/clientpositive/perf/query85.q.out | 42 +++----
.../results/clientpositive/perf/query87.q.out | 12 +-
.../results/clientpositive/perf/query90.q.out | 16 +--
.../results/clientpositive/perf/query94.q.out | 4 +-
.../results/clientpositive/perf/query95.q.out | 4 +-
.../results/clientpositive/perf/query97.q.out | 8 +-
.../results/clientpositive/perf/query98.q.out | 4 +-
.../spark/vector_between_in.q.out | 34 +++---
.../clientpositive/tez/explainanalyze_4.q.out | 80 ++++++------
.../results/clientpositive/udf_between.q.out | 8 +-
.../clientpositive/vector_between_columns.q.out | 6 +-
44 files changed, 475 insertions(+), 443 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index ab07fb6..aa1e509 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -71,6 +71,7 @@ import org.apache.hadoop.hive.ql.plan.Statistics;
import org.apache.hadoop.hive.ql.stats.StatsUtils;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
@@ -89,6 +90,7 @@ import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -354,6 +356,9 @@ public class StatsRulesProcFactory {
} else if (udf instanceof GenericUDFIn) {
// for IN clause
newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, fop);
+ } else if (udf instanceof GenericUDFBetween) {
+ // for BETWEEN clause
+ newNumRows = evaluateBetweenExpr(stats, pred, aspCtx, neededCols, fop);
} else if (udf instanceof GenericUDFOPNot) {
newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop);
} else if (udf instanceof GenericUDFOPNotNull) {
@@ -480,6 +485,32 @@ public class StatsRulesProcFactory {
return Math.round( (double)numRows * factor * inFactor);
}
+ private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx,
+ List<String> neededCols, FilterOperator fop) throws SemanticException, CloneNotSupportedException {
+ final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred;
+ final boolean invert = Boolean.TRUE.equals(
+ ((ExprNodeConstantDesc) fd.getChildren().get(0)).getValue()); // boolean invert (not)
+ final ExprNodeDesc comparisonExpression = fd.getChildren().get(1); // expression
+ final ExprNodeDesc leftExpression = fd.getChildren().get(2); // left expression
+ final ExprNodeDesc rightExpression = fd.getChildren().get(3); // right expression
+
+ // We transform the BETWEEN clause to AND clause (with NOT on top in invert is true).
+ // This is more straightforward, as the evaluateExpression method will deal with
+ // generating the final row count relying on the basic comparator evaluation methods
+ final ExprNodeDesc leftComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPEqualOrGreaterThan(), Lists.newArrayList(comparisonExpression, leftExpression));
+ final ExprNodeDesc rightComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPEqualOrLessThan(), Lists.newArrayList(comparisonExpression, rightExpression));
+ ExprNodeDesc newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPAnd(), Lists.newArrayList(leftComparator, rightComparator));
+ if (invert) {
+ newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPNot(), Lists.newArrayList(newExpression));
+ }
+
+ return evaluateExpression(stats, newExpression, aspCtx, neededCols, fop, 0);
+ }
+
private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred,
AnnotateStatsProcCtx aspCtx, List<String> neededCols, FilterOperator fop)
throws CloneNotSupportedException, SemanticException {
@@ -866,7 +897,8 @@ public class StatsRulesProcFactory {
} else if (udf instanceof GenericUDFOPNull) {
return evaluateColEqualsNullExpr(stats, genFunc);
} else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr
- || udf instanceof GenericUDFIn || udf instanceof GenericUDFOPNot) {
+ || udf instanceof GenericUDFIn || udf instanceof GenericUDFBetween
+ || udf instanceof GenericUDFOPNot) {
return evaluateExpression(stats, genFunc, aspCtx, neededCols, fop, evaluatedRowCount);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/test/results/clientpositive/llap/explainuser_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_4.q.out b/ql/src/test/results/clientpositive/llap/explainuser_4.q.out
index 0978ddd..e83d6d8 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_4.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_4.q.out
@@ -28,27 +28,27 @@ Stage-0
Stage-1
Reducer 3 llap
File Output Operator [FS_12]
- Select Operator [SEL_11] (rows=9759 width=620)
+ Select Operator [SEL_11] (rows=2166 width=620)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"]
<-Reducer 2 [SIMPLE_EDGE] llap
SHUFFLE [RS_10]
- Merge Join Operator [MERGEJOIN_17] (rows=9759 width=620)
+ Merge Join Operator [MERGEJOIN_17] (rows=2166 width=620)
Conds:RS_6._col2=RS_7._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"]
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_6]
PartitionCols:_col2
- Select Operator [SEL_2] (rows=6144 width=251)
+ Select Operator [SEL_2] (rows=1365 width=251)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"]
- Filter Operator [FIL_15] (rows=6144 width=251)
+ Filter Operator [FIL_15] (rows=1365 width=251)
predicate:cint BETWEEN 1000000 AND 3000000
TableScan [TS_0] (rows=12288 width=251)
default@alltypesorc,a,Tbl:COMPLETE,Col:COMPLETE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"]
<-Map 4 [SIMPLE_EDGE] llap
SHUFFLE [RS_7]
PartitionCols:_col2
- Select Operator [SEL_5] (rows=4586 width=251)
+ Select Operator [SEL_5] (rows=1019 width=251)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"]
- Filter Operator [FIL_16] (rows=4586 width=251)
+ Filter Operator [FIL_16] (rows=1019 width=251)
predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null)
TableScan [TS_3] (rows=12288 width=251)
default@alltypesorc,b,Tbl:COMPLETE,Col:COMPLETE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"]
@@ -113,23 +113,23 @@ Stage-0
SHUFFLE [RS_11]
Group By Operator [GBY_10] (rows=1 width=8)
Output:["_col0"],aggregations:["count()"]
- Merge Join Operator [MERGEJOIN_19] (rows=9759 width=8)
+ Merge Join Operator [MERGEJOIN_19] (rows=2166 width=8)
Conds:RS_6._col0=RS_7._col0(Inner)
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_6]
PartitionCols:_col0
- Select Operator [SEL_2] (rows=6144 width=2)
+ Select Operator [SEL_2] (rows=1365 width=2)
Output:["_col0"]
- Filter Operator [FIL_17] (rows=6144 width=2)
+ Filter Operator [FIL_17] (rows=1365 width=2)
predicate:cint BETWEEN 1000000 AND 3000000
TableScan [TS_0] (rows=12288 width=2)
default@alltypesorc,a,Tbl:COMPLETE,Col:COMPLETE,Output:["cint"]
<-Map 4 [SIMPLE_EDGE] llap
SHUFFLE [RS_7]
PartitionCols:_col0
- Select Operator [SEL_5] (rows=4586 width=8)
+ Select Operator [SEL_5] (rows=1019 width=8)
Output:["_col0"]
- Filter Operator [FIL_18] (rows=4586 width=8)
+ Filter Operator [FIL_18] (rows=1019 width=8)
predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null)
TableScan [TS_3] (rows=12288 width=8)
default@alltypesorc,b,Tbl:COMPLETE,Col:COMPLETE,Output:["cint","cbigint"]
@@ -182,34 +182,34 @@ Stage-0
Stage-1
Reducer 4 llap
File Output Operator [FS_16]
- Select Operator [SEL_15] (rows=2765 width=12)
+ Select Operator [SEL_15] (rows=615 width=12)
Output:["_col0","_col1"]
<-Reducer 3 [SIMPLE_EDGE] llap
SHUFFLE [RS_14]
- Group By Operator [GBY_12] (rows=2765 width=12)
+ Group By Operator [GBY_12] (rows=615 width=12)
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
<-Reducer 2 [SIMPLE_EDGE] llap
SHUFFLE [RS_11]
PartitionCols:_col0
- Group By Operator [GBY_10] (rows=2765 width=12)
+ Group By Operator [GBY_10] (rows=615 width=12)
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
- Merge Join Operator [MERGEJOIN_21] (rows=9759 width=4)
+ Merge Join Operator [MERGEJOIN_21] (rows=2166 width=4)
Conds:RS_6._col1=RS_7._col0(Inner),Output:["_col0"]
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_6]
PartitionCols:_col1
- Select Operator [SEL_2] (rows=6144 width=5)
+ Select Operator [SEL_2] (rows=1365 width=5)
Output:["_col0","_col1"]
- Filter Operator [FIL_19] (rows=6144 width=5)
+ Filter Operator [FIL_19] (rows=1365 width=5)
predicate:cint BETWEEN 1000000 AND 3000000
TableScan [TS_0] (rows=12288 width=5)
default@alltypesorc,a,Tbl:COMPLETE,Col:COMPLETE,Output:["csmallint","cint"]
<-Map 5 [SIMPLE_EDGE] llap
SHUFFLE [RS_7]
PartitionCols:_col0
- Select Operator [SEL_5] (rows=4586 width=8)
+ Select Operator [SEL_5] (rows=1019 width=8)
Output:["_col0"]
- Filter Operator [FIL_20] (rows=4586 width=8)
+ Filter Operator [FIL_20] (rows=1019 width=8)
predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null)
TableScan [TS_3] (rows=12288 width=8)
default@alltypesorc,b,Tbl:COMPLETE,Col:COMPLETE,Output:["cint","cbigint"]
@@ -269,27 +269,27 @@ Stage-0
Stage-1
Reducer 3 llap
File Output Operator [FS_12]
- Select Operator [SEL_11] (rows=6758 width=215)
+ Select Operator [SEL_11] (rows=1501 width=215)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"]
<-Reducer 2 [SIMPLE_EDGE] llap
SHUFFLE [RS_10]
- Map Join Operator [MAPJOIN_17] (rows=6758 width=215)
+ Map Join Operator [MAPJOIN_17] (rows=1501 width=215)
Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"]
<-Map 4 [CUSTOM_SIMPLE_EDGE] llap
PARTITION_ONLY_SHUFFLE [RS_7]
PartitionCols:_col2
- Select Operator [SEL_5] (rows=6144 width=215)
+ Select Operator [SEL_5] (rows=1365 width=215)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"]
- Filter Operator [FIL_16] (rows=6144 width=215)
+ Filter Operator [FIL_16] (rows=1365 width=215)
predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null)
TableScan [TS_3] (rows=12288 width=215)
default@alltypesorc,b,Tbl:COMPLETE,Col:NONE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"]
<-Map 1 [CUSTOM_SIMPLE_EDGE] llap
PARTITION_ONLY_SHUFFLE [RS_6]
PartitionCols:_col2
- Select Operator [SEL_2] (rows=6144 width=215)
+ Select Operator [SEL_2] (rows=1365 width=215)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"]
- Filter Operator [FIL_15] (rows=6144 width=215)
+ Filter Operator [FIL_15] (rows=1365 width=215)
predicate:cint BETWEEN 1000000 AND 3000000
TableScan [TS_0] (rows=12288 width=215)
default@alltypesorc,a,Tbl:COMPLETE,Col:NONE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"]
@@ -354,23 +354,23 @@ Stage-0
SHUFFLE [RS_11]
Group By Operator [GBY_10] (rows=1 width=8)
Output:["_col0"],aggregations:["count()"]
- Map Join Operator [MAPJOIN_19] (rows=6758 width=215)
+ Map Join Operator [MAPJOIN_19] (rows=1501 width=215)
Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true
<-Map 4 [CUSTOM_SIMPLE_EDGE] llap
PARTITION_ONLY_SHUFFLE [RS_7]
PartitionCols:_col0
- Select Operator [SEL_5] (rows=6144 width=215)
+ Select Operator [SEL_5] (rows=1365 width=215)
Output:["_col0"]
- Filter Operator [FIL_18] (rows=6144 width=215)
+ Filter Operator [FIL_18] (rows=1365 width=215)
predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null)
TableScan [TS_3] (rows=12288 width=215)
default@alltypesorc,b,Tbl:COMPLETE,Col:NONE,Output:["cint","cbigint"]
<-Map 1 [CUSTOM_SIMPLE_EDGE] llap
PARTITION_ONLY_SHUFFLE [RS_6]
PartitionCols:_col0
- Select Operator [SEL_2] (rows=6144 width=215)
+ Select Operator [SEL_2] (rows=1365 width=215)
Output:["_col0"]
- Filter Operator [FIL_17] (rows=6144 width=215)
+ Filter Operator [FIL_17] (rows=1365 width=215)
predicate:cint BETWEEN 1000000 AND 3000000
TableScan [TS_0] (rows=12288 width=215)
default@alltypesorc,a,Tbl:COMPLETE,Col:NONE,Output:["cint"]
@@ -423,34 +423,34 @@ Stage-0
Stage-1
Reducer 4 llap
File Output Operator [FS_16]
- Select Operator [SEL_15] (rows=3379 width=215)
+ Select Operator [SEL_15] (rows=750 width=215)
Output:["_col0","_col1"]
<-Reducer 3 [SIMPLE_EDGE] llap
SHUFFLE [RS_14]
- Group By Operator [GBY_12] (rows=3379 width=215)
+ Group By Operator [GBY_12] (rows=750 width=215)
Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0
<-Reducer 2 [SIMPLE_EDGE] llap
SHUFFLE [RS_11]
PartitionCols:_col0
- Group By Operator [GBY_10] (rows=6758 width=215)
+ Group By Operator [GBY_10] (rows=1501 width=215)
Output:["_col0","_col1"],aggregations:["count()"],keys:_col0
- Map Join Operator [MAPJOIN_21] (rows=6758 width=215)
+ Map Join Operator [MAPJOIN_21] (rows=1501 width=215)
Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0"]
<-Map 5 [CUSTOM_SIMPLE_EDGE] llap
PARTITION_ONLY_SHUFFLE [RS_7]
PartitionCols:_col0
- Select Operator [SEL_5] (rows=6144 width=215)
+ Select Operator [SEL_5] (rows=1365 width=215)
Output:["_col0"]
- Filter Operator [FIL_20] (rows=6144 width=215)
+ Filter Operator [FIL_20] (rows=1365 width=215)
predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null)
TableScan [TS_3] (rows=12288 width=215)
default@alltypesorc,b,Tbl:COMPLETE,Col:NONE,Output:["cint","cbigint"]
<-Map 1 [CUSTOM_SIMPLE_EDGE] llap
PARTITION_ONLY_SHUFFLE [RS_6]
PartitionCols:_col1
- Select Operator [SEL_2] (rows=6144 width=215)
+ Select Operator [SEL_2] (rows=1365 width=215)
Output:["_col0","_col1"]
- Filter Operator [FIL_19] (rows=6144 width=215)
+ Filter Operator [FIL_19] (rows=1365 width=215)
predicate:cint BETWEEN 1000000 AND 3000000
TableScan [TS_0] (rows=12288 width=215)
default@alltypesorc,a,Tbl:COMPLETE,Col:NONE,Output:["csmallint","cint"]
@@ -475,8 +475,8 @@ order by c1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
+-13036 1
-8915 1
-3799 1
10782 1
--13036 1
NULL 6
http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out
index db0baee..48a86cf 100644
--- a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out
@@ -608,15 +608,15 @@ STAGE PLANS:
Statistics: Num rows: 232 Data size: 24150 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean)
- Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: t (type: tinyint), s (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: all inputs
Reducer 2
@@ -625,10 +625,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -674,15 +674,15 @@ STAGE PLANS:
Statistics: Num rows: 232 Data size: 24150 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean)
- Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: t (type: tinyint), s (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: all inputs
Reducer 2
@@ -691,10 +691,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 29 Data size: 3018 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 624 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
index 6541772..3254fb4 100644
--- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
@@ -544,15 +544,15 @@ STAGE PLANS:
Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean)
- Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: t (type: tinyint), s (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -561,10 +561,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -610,15 +610,15 @@ STAGE PLANS:
Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean)
- Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: t (type: tinyint), s (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint), _col1 (type: string)
sort order: ++
- Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -627,10 +627,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 131 Data size: 1441 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -741,15 +741,15 @@ STAGE PLANS:
Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean)
- Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: string)
sort order: -
- Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
Execution mode: llap
@@ -760,13 +760,13 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 3
- Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -820,15 +820,15 @@ STAGE PLANS:
Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and si BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean)
- Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: string)
sort order: -
- Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double)
Execution mode: llap
@@ -839,13 +839,13 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 55 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 3
- Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 3 Data size: 33 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/e2653db3/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
index 5c8db64..25c6f15 100644
--- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
@@ -36,16 +36,16 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
- Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1365 Data size: 343800 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1365 Data size: 343800 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: int)
sort order: +
Map-reduce partition columns: _col2 (type: int)
- Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1365 Data size: 343800 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
Execution mode: llap
LLAP IO: all inputs
@@ -56,16 +56,16 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean)
- Statistics: Num rows: 4586 Data size: 1154510 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1019 Data size: 256780 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 4586 Data size: 1154510 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1019 Data size: 256780 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: int)
sort order: +
Map-reduce partition columns: _col2 (type: int)
- Statistics: Num rows: 4586 Data size: 1154510 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1019 Data size: 256780 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
Execution mode: llap
LLAP IO: all inputs
@@ -79,11 +79,11 @@ STAGE PLANS:
0 _col2 (type: int)
1 _col2 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
- Statistics: Num rows: 9759 Data size: 6050580 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2166 Data size: 1342920 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: int)
sort order: +
- Statistics: Num rows: 9759 Data size: 6050580 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2166 Data size: 1342920 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean)
Reducer 3
Execution mode: llap
@@ -91,10 +91,10 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
- Statistics: Num rows: 9759 Data size: 6050580 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2166 Data size: 1342920 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 9759 Data size: 6050580 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2166 Data size: 1342920 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -168,16 +168,16 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
- Statistics: Num rows: 6144 Data size: 18348 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1365 Data size: 4080 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 6144 Data size: 18348 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1365 Data size: 4080 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 6144 Data size: 18348 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1365 Data size: 4080 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: all inputs
Map 4
@@ -187,16 +187,16 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean)
- Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: all inputs
Reducer 2
@@ -208,7 +208,7 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- Statistics: Num rows: 9759 Data size: 78072 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2166 Data size: 17328 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
mode: hash
@@ -296,16 +296,16 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 73396 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
- Statistics: Num rows: 6144 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1365 Data size: 8160 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: csmallint (type: smallint), cint (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6144 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1365 Data size: 8160 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
- Statistics: Num rows: 6144 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1365 Data size: 8160 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: smallint)
Execution mode: llap
LLAP IO: all inputs
@@ -316,16 +316,16 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 110088 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean)
- Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 4586 Data size: 41088 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1019 Data size: 9144 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: all inputs
Reducer 2
@@ -338,18 +338,18 @@ STAGE PLANS:
0 _col1 (type: int)
1 _col0 (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 9759 Data size: 39036 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2166 Data size: 8664 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
keys: _col0 (type: smallint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: smallint)
sort order: +
Map-reduce partition columns: _col0 (type: smallint)
- Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Reducer 3
Execution mode: llap
@@ -359,11 +359,11 @@ STAGE PLANS:
keys: KEY._col0 (type: smallint)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col1 (type: bigint)
sort order: +
- Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: smallint)
Reducer 4
Execution mode: llap
@@ -371,10 +371,10 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 2765 Data size: 33180 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 615 Data size: 7380 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -449,16 +449,16 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: int)
sort order: +
Map-reduce partition columns: _col2 (type: int)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
Execution mode: llap
LLAP IO: all inputs
@@ -469,16 +469,16 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: int)
sort order: +
Map-reduce partition columns: _col2 (type: int)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean)
Execution mode: llap
LLAP IO: all inputs
@@ -494,12 +494,12 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
input vertices:
1 Map 4
- Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
Reduce Output Operator
key expressions: _col2 (type: int)
sort order: +
- Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15 (type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19 (type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean), _col23 (type: boolean)
Reducer 3
Execution mode: llap
@@ -507,10 +507,10 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double), VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11 (type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type: bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string), VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp), VALUE._col21 (type: boolean), VALUE._col22 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
- Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -584,16 +584,16 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: all inputs
Map 4
@@ -603,16 +603,16 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: all inputs
Reducer 2
@@ -626,7 +626,7 @@ STAGE PLANS:
1 KEY.reducesinkkey0 (type: int)
input vertices:
1 Map 4
- Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
Group By Operator
aggregations: count()
@@ -715,16 +715,16 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: csmallint (type: smallint), cint (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: smallint)
Execution mode: llap
LLAP IO: all inputs
@@ -735,16 +735,16 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (cint BETWEEN 1000000 AND 3000000 and cbigint is not null) (type: boolean)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: all inputs
Reducer 2
@@ -759,19 +759,19 @@ STAGE PLANS:
outputColumnNames: _col0
input vertices:
1 Map 5
- Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
HybridGraceHashJoin: true
Group By Operator
aggregations: count()
keys: _col0 (type: smallint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: smallint)
sort order: +
Map-reduce partition columns: _col0 (type: smallint)
- Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reducer 3
Execution mode: llap
@@ -781,11 +781,11 @@ STAGE PLANS:
keys: KEY._col0 (type: smallint)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 750 Data size: 161305 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: smallint)
sort order: +
- Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 750 Data size: 161305 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reducer 4
Execution mode: llap
@@ -793,10 +793,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 750 Data size: 161305 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 750 Data size: 161305 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat