You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2022/06/30 12:22:40 UTC
[hive] branch master updated: HIVE-26365: Remove column statistics collection task from merge statement plan (Krisztian Kasa, reviewed by Peter Vary, Alessandro Solimando, Aman Sinha)
This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new c4c73c0435 HIVE-26365: Remove column statistics collection task from merge statement plan (Krisztian Kasa, reviewed by Peter Vary, Alessandro Solimando, Aman Sinha)
c4c73c0435 is described below
commit c4c73c0435b34b332f65af77244db89ced12b44f
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Thu Jun 30 14:22:28 2022 +0200
HIVE-26365: Remove column statistics collection task from merge statement plan (Krisztian Kasa, reviewed by Peter Vary, Alessandro Solimando, Aman Sinha)
---
.../hive/ql/parse/MergeSemanticAnalyzer.java | 11 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 8 +-
.../ql/parse/UpdateDeleteSemanticAnalyzer.java | 5 +
.../clientnegative/merge_constraint_notnull.q.out | 6 +-
.../clientpositive/llap/acid_no_buckets.q.out | 288 +++--------
.../llap/dynamic_semijoin_reduction_3.q.out | 274 +++--------
.../llap/enforce_constraint_notnull.q.out | 289 +----------
.../llap/insert_into_default_keyword.q.out | 82 +---
.../clientpositive/llap/semijoin_hint.q.out | 208 ++------
.../results/clientpositive/llap/sqlmerge.q.out | 129 +----
.../clientpositive/llap/sqlmerge_stats.q.out | 537 ++-------------------
11 files changed, 229 insertions(+), 1608 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java
index eda54a5f07..4532c416f7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/MergeSemanticAnalyzer.java
@@ -49,6 +49,9 @@ import org.apache.hadoop.hive.ql.session.SessionState;
* they are actually inserts) and then doing some patch up to make them work as merges instead.
*/
public class MergeSemanticAnalyzer extends RewriteSemanticAnalyzer {
+ private int numWhenMatchedUpdateClauses;
+ private int numWhenMatchedDeleteClauses;
+
MergeSemanticAnalyzer(QueryState queryState) throws SemanticException {
super(queryState);
}
@@ -163,8 +166,9 @@ public class MergeSemanticAnalyzer extends RewriteSemanticAnalyzer {
* Update and Delete may be in any order. (Insert is always last)
*/
String extraPredicate = null;
- int numWhenMatchedUpdateClauses = 0, numWhenMatchedDeleteClauses = 0;
int numInsertClauses = 0;
+ numWhenMatchedUpdateClauses = 0;
+ numWhenMatchedDeleteClauses = 0;
boolean hintProcessed = false;
for (ASTNode whenClause : whenClauses) {
switch (getWhenClauseOperation(whenClause).getType()) {
@@ -706,4 +710,9 @@ public class MergeSemanticAnalyzer extends RewriteSemanticAnalyzer {
protected boolean allowOutputMultipleTimes() {
return conf.getBoolVar(HiveConf.ConfVars.SPLIT_UPDATE) || conf.getBoolVar(HiveConf.ConfVars.MERGE_SPLIT_UPDATE);
}
+
+ @Override
+ protected boolean enableColumnStatsCollecting() {
+ return numWhenMatchedUpdateClauses == 0 && numWhenMatchedDeleteClauses == 0;
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 9e4886d84d..48e0cf9b2e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -7957,11 +7957,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
// and it is an insert overwrite or insert into table
if (conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)
&& conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER)
+ && enableColumnStatsCollecting()
&& destinationTable != null
&& (!destinationTable.isNonNative() || destinationTable.getStorageHandler().commitInMoveTask())
&& !destTableIsTemporary && !destTableIsMaterialization
- && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)
- && !(this instanceof UpdateDeleteSemanticAnalyzer)) {
+ && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) {
if (destType == QBMetaData.DEST_TABLE) {
genAutoColumnStatsGatheringPipeline(destinationTable, partSpec, input,
qb.getParseInfo().isInsertIntoTable(destinationTable.getDbName(), destinationTable.getTableName()),
@@ -7979,6 +7979,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return output;
}
+ protected boolean enableColumnStatsCollecting() {
+ return true;
+ }
+
private Path getCtasLocation(CreateTableDesc tblDesc) throws SemanticException {
Path location;
try {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index deca32eb42..1bfe69bc2d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -409,4 +409,9 @@ public class UpdateDeleteSemanticAnalyzer extends RewriteSemanticAnalyzer {
protected boolean allowOutputMultipleTimes() {
return conf.getBoolVar(HiveConf.ConfVars.SPLIT_UPDATE);
}
+
+ @Override
+ protected boolean enableColumnStatsCollecting() {
+ return false;
+ }
}
diff --git a/ql/src/test/results/clientnegative/merge_constraint_notnull.q.out b/ql/src/test/results/clientnegative/merge_constraint_notnull.q.out
index 5b853dc913..86aed6ba0e 100644
--- a/ql/src/test/results/clientnegative/merge_constraint_notnull.q.out
+++ b/ql/src/test/results/clientnegative/merge_constraint_notnull.q.out
@@ -65,9 +65,7 @@ Caused by: org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: E
[Masked Vertex killed due to OTHER_VERTEX_FAILURE]
[Masked Vertex killed due to OTHER_VERTEX_FAILURE]
[Masked Vertex killed due to OTHER_VERTEX_FAILURE]
-[Masked Vertex killed due to OTHER_VERTEX_FAILURE]
-[Masked Vertex killed due to OTHER_VERTEX_FAILURE]
-DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:7
+DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:5
FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.tez.TezTask. Vertex failed, vertexName=Reducer 2, vertexId=vertex_#ID#, diagnostics=[Task failed, taskId=task_#ID#, diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( failure ) : attempt_#ID#:java.lang.RuntimeException: org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: Either CHECK or NOT NULL constraint violated!
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: Either CHECK or NOT NULL constraint violated!
@@ -76,4 +74,4 @@ Caused by: org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: E
#### A masked pattern was here ####
Caused by: org.apache.hadoop.hive.ql.exec.errors.DataConstraintViolationError: Either CHECK or NOT NULL constraint violated!
#### A masked pattern was here ####
-]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:0, Vertex vertex_#ID# [Reducer 2] killed/failed due to:OWN_TASK_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE][Masked Vertex killed due to OTHER_VERTEX_ [...]
+]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:0, Vertex vertex_#ID# [Reducer 2] killed/failed due to:OWN_TASK_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE][Masked Vertex killed due to OTHER_VERTEX_FAILURE]DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:5
diff --git a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
index 65f3c3e587..9989aba616 100644
--- a/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out
@@ -1868,13 +1868,11 @@ STAGE PLANS:
Stage: Stage-5
Tez
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 9 <- Map 8 (SIMPLE_EDGE)
+ Reducer 7 <- Map 6 (SIMPLE_EDGE)
Vertices:
Map 1
Map Operator Tree:
@@ -1914,7 +1912,7 @@ STAGE PLANS:
partitionColumnCount: 2
partitionColumns: ds:string, hr:string
scratchColumnTypeNames: []
- Map 8
+ Map 6
Map Operator Tree:
TableScan Vectorization:
native: true
@@ -2004,70 +2002,6 @@ STAGE PLANS:
className: VectorFileSinkOperator
native: false
Reducer 5
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
- reduceColumnNullOrder: zz
- reduceColumnSortOrder: ++
- allNative: false
- usesVectorUDFAdaptor: true
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:int, VALUE._col1:struct<count:bigint,sum:double,input:int>, VALUE._col2:bigint, VALUE._col3:bigint, VALUE._col4:binary, VALUE._col5:int, VALUE._col6:struct<count:bigint,sum:double,input:int>, VALUE._col7:bigint, VALUE._col8:binary
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFAvgFinal(col 3:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 6:binary) -> binary, VectorUDAFMaxLong(col 7:int) -> int, VectorUDAFAvgFinal(col 8:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFComputeBitVectorFinal( [...]
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [11, 13, 15, 16, 19, 6, 20, 22, 24, 25, 28, 10, 0, 1]
- selectExpressions: ConstantVectorExpression(val STRING) -> 11:string, VectorCoalesce(columns [2, 12])(children: col 2:int, ConstantVectorExpression(val 0) -> 12:int) -> 13:int, VectorCoalesce(columns [3, 14])(children: col 3:double, ConstantVectorExpression(val 0.0) -> 14:double) -> 15:double, LongColSubtractLongColumn(col 4:bigint, col 5:bigint) -> 16:bigint, VectorCoalesce(columns [17, 18])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col6)) -> 17:bigint, Co [...]
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Reducer 6
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
- reduceColumnNullOrder: zz
- reduceColumnSortOrder: ++
- allNative: false
- usesVectorUDFAdaptor: true
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:int, VALUE._col1:struct<count:bigint,sum:double,input:int>, VALUE._col2:bigint, VALUE._col3:bigint, VALUE._col4:binary, VALUE._col5:int, VALUE._col6:struct<count:bigint,sum:double,input:int>, VALUE._col7:bigint, VALUE._col8:binary
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFAvgFinal(col 3:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 6:binary) -> binary, VectorUDAFMaxLong(col 7:int) -> int, VectorUDAFAvgFinal(col 8:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFComputeBitVectorFinal( [...]
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [11, 13, 15, 16, 19, 6, 20, 22, 24, 25, 28, 10, 0, 1]
- selectExpressions: ConstantVectorExpression(val STRING) -> 11:string, VectorCoalesce(columns [2, 12])(children: col 2:int, ConstantVectorExpression(val 0) -> 12:int) -> 13:int, VectorCoalesce(columns [3, 14])(children: col 3:double, ConstantVectorExpression(val 0.0) -> 14:double) -> 15:double, LongColSubtractLongColumn(col 4:bigint, col 5:bigint) -> 16:bigint, VectorCoalesce(columns [17, 18])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col6)) -> 17:bigint, Co [...]
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Reducer 7
Execution mode: llap
Reduce Vectorization:
enabled: true
@@ -2075,7 +2009,7 @@ STAGE PLANS:
notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported
vectorized: false
Reduce Operator Tree:
- Reducer 9
+ Reducer 7
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
@@ -2836,15 +2770,13 @@ STAGE PLANS:
Stage: Stage-5
Tez
Edges:
- Reducer 11 <- Map 10 (SIMPLE_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+ Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
Reducer 7 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
- Reducer 9 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 9 <- Map 8 (SIMPLE_EDGE)
Vertices:
Map 1
Map Operator Tree:
@@ -2884,7 +2816,7 @@ STAGE PLANS:
partitionColumnCount: 2
partitionColumns: ds:string, hr:string
scratchColumnTypeNames: []
- Map 10
+ Map 8
Map Operator Tree:
TableScan Vectorization:
native: true
@@ -2923,66 +2855,6 @@ STAGE PLANS:
partitionColumnCount: 2
partitionColumns: ds:string, hr:string
scratchColumnTypeNames: []
- Reducer 11
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
- reduceColumnNullOrder: zzzz
- reduceColumnSortOrder: ++++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 4
- dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:string, KEY._col3:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: []
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [2, 3, 0, 1]
- Reduce Sink Vectorization:
- className: VectorReduceSinkMultiKeyOperator
- keyColumns: 0:string, 1:string, 2:string, 3:string
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [2]
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 2:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
- App Master Event Vectorization:
- className: VectorAppMasterEventOperator
- native: true
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [3]
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 3:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
- App Master Event Vectorization:
- className: VectorAppMasterEventOperator
- native: true
Reducer 2
MergeJoin Vectorization:
enabled: false
@@ -3047,7 +2919,7 @@ STAGE PLANS:
dataColumnCount: 4
dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint]
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Select Vectorization:
className: VectorSelectOperator
@@ -3056,57 +2928,7 @@ STAGE PLANS:
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(StringLength(col 0:string) -> 4:int) -> int, VectorUDAFAvgLong(VectorCoalesce(columns [5, 6])(children: StringLength(col 0:string) -> 5:int, ConstantVectorExpression(val 0) -> 6:int) -> 7:int) -> struct<count:bigint,sum:double,input:int>, VectorUDAFCount(ConstantVectorExpression(val 1) -> 8:int) -> bigint, VectorUDAFCount(col 0:string) -> bigint, VectorUDAFComputeBitVectorString(col 0:string) -> binary, VectorUDAFMaxLong(StringLength [...]
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 2:string, col 3:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
- Reduce Sink Vectorization:
- className: VectorReduceSinkMultiKeyOperator
- keyColumns: 0:string, 1:string
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumns: 2:int, 3:struct<count:bigint,sum:double,input:int>, 4:bigint, 5:bigint, 6:binary, 7:int, 8:struct<count:bigint,sum:double,input:int>, 9:bigint, 10:binary
Reducer 6
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
- reduceColumnNullOrder: zz
- reduceColumnSortOrder: ++
- allNative: false
- usesVectorUDFAdaptor: true
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:int, VALUE._col1:struct<count:bigint,sum:double,input:int>, VALUE._col2:bigint, VALUE._col3:bigint, VALUE._col4:binary, VALUE._col5:int, VALUE._col6:struct<count:bigint,sum:double,input:int>, VALUE._col7:bigint, VALUE._col8:binary
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFAvgFinal(col 3:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 6:binary) -> binary, VectorUDAFMaxLong(col 7:int) -> int, VectorUDAFAvgFinal(col 8:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFComputeBitVectorFinal( [...]
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [11, 13, 15, 16, 19, 6, 20, 22, 24, 25, 28, 10, 0, 1]
- selectExpressions: ConstantVectorExpression(val STRING) -> 11:string, VectorCoalesce(columns [2, 12])(children: col 2:int, ConstantVectorExpression(val 0) -> 12:int) -> 13:int, VectorCoalesce(columns [3, 14])(children: col 3:double, ConstantVectorExpression(val 0.0) -> 14:double) -> 15:double, LongColSubtractLongColumn(col 4:bigint, col 5:bigint) -> 16:bigint, VectorCoalesce(columns [17, 18])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col6)) -> 17:bigint, Co [...]
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Reducer 7
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
@@ -3120,7 +2942,7 @@ STAGE PLANS:
dataColumnCount: 4
dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:string, VALUE._col2:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint]
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Select Vectorization:
className: VectorSelectOperator
@@ -3129,64 +2951,74 @@ STAGE PLANS:
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(StringLength(col 0:string) -> 4:int) -> int, VectorUDAFAvgLong(VectorCoalesce(columns [5, 6])(children: StringLength(col 0:string) -> 5:int, ConstantVectorExpression(val 0) -> 6:int) -> 7:int) -> struct<count:bigint,sum:double,input:int>, VectorUDAFCount(ConstantVectorExpression(val 1) -> 8:int) -> bigint, VectorUDAFCount(col 0:string) -> bigint, VectorUDAFComputeBitVectorString(col 0:string) -> binary, VectorUDAFMaxLong(StringLength [...]
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 2:string, col 3:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
- Reduce Sink Vectorization:
- className: VectorReduceSinkMultiKeyOperator
- keyColumns: 0:string, 1:string
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumns: 2:int, 3:struct<count:bigint,sum:double,input:int>, 4:bigint, 5:bigint, 6:binary, 7:int, 8:struct<count:bigint,sum:double,input:int>, 9:bigint, 10:binary
- Reducer 8
+ Reducer 7
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
+ notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported
+ vectorized: false
+ Reduce Operator Tree:
+ Reducer 9
Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
- reduceColumnNullOrder: zz
- reduceColumnSortOrder: ++
+ reduceColumnNullOrder: zzzz
+ reduceColumnSortOrder: ++++
allNative: false
- usesVectorUDFAdaptor: true
+ usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 11
- dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:int, VALUE._col1:struct<count:bigint,sum:double,input:int>, VALUE._col2:bigint, VALUE._col3:bigint, VALUE._col4:binary, VALUE._col5:int, VALUE._col6:struct<count:bigint,sum:double,input:int>, VALUE._col7:bigint, VALUE._col8:binary
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:string, KEY._col3:string
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFAvgFinal(col 3:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFComputeBitVectorFinal(col 6:binary) -> binary, VectorUDAFMaxLong(col 7:int) -> int, VectorUDAFAvgFinal(col 8:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFComputeBitVectorFinal( [...]
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:string
+ keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string
native: false
vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ projectedOutputColumnNums: []
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [11, 13, 15, 16, 19, 6, 20, 22, 24, 25, 28, 10, 0, 1]
- selectExpressions: ConstantVectorExpression(val STRING) -> 11:string, VectorCoalesce(columns [2, 12])(children: col 2:int, ConstantVectorExpression(val 0) -> 12:int) -> 13:int, VectorCoalesce(columns [3, 14])(children: col 3:double, ConstantVectorExpression(val 0.0) -> 14:double) -> 15:double, LongColSubtractLongColumn(col 4:bigint, col 5:bigint) -> 16:bigint, VectorCoalesce(columns [17, 18])(children: VectorUDFAdaptor(ndv_compute_bit_vector(_col6)) -> 17:bigint, Co [...]
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Reducer 9
- Execution mode: llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true
- notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type STRUCT not supported
- vectorized: false
- Reduce Operator Tree:
+ projectedOutputColumnNums: [2, 3, 0, 1]
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumns: 0:string, 1:string, 2:string, 3:string
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [2]
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 2:string
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ App Master Event Vectorization:
+ className: VectorAppMasterEventOperator
+ native: true
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [3]
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 3:string
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ App Master Event Vectorization:
+ className: VectorAppMasterEventOperator
+ native: true
Stage: Stage-6
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
index 1027fb9358..5a04a178ad 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out
@@ -53,16 +53,14 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 11 <- Reducer 10 (BROADCAST_EDGE)
- Reducer 10 <- Map 1 (CUSTOM_SIMPLE_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE)
+ Map 9 <- Reducer 8 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
Reducer 7 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE)
- Reducer 9 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 8 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -98,7 +96,7 @@ STAGE PLANS:
value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map 11
+ Map 9
Map Operator Tree:
TableScan
alias: t
@@ -120,19 +118,6 @@ STAGE PLANS:
value expressions: _col1 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Reducer 10
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
- mode: final
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -271,41 +256,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtbl
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -321,41 +272,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtbl
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
- Reducer 8
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 9
+ Reducer 7
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -379,6 +296,19 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.merge_tmp_table
+ Reducer 8
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
Stage: Stage-6
Dependency Collection
@@ -442,10 +372,6 @@ STAGE PLANS:
Stage: Stage-10
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b
- Column Types: int, int
- Table: default.acidtbl
Stage: Stage-4
Move Operator
@@ -704,15 +630,13 @@ STAGE PLANS:
#### A masked pattern was here ####
Edges:
Map 1 <- Union 2 (CONTAINS)
- Map 11 <- Union 2 (CONTAINS)
- Reducer 10 <- Reducer 3 (SIMPLE_EDGE)
- Reducer 3 <- Map 12 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE)
+ Map 9 <- Union 2 (CONTAINS)
+ Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
Reducer 5 <- Reducer 3 (SIMPLE_EDGE)
Reducer 6 <- Reducer 3 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Reducer 3 (SIMPLE_EDGE)
Reducer 8 <- Reducer 3 (SIMPLE_EDGE)
- Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -737,29 +661,7 @@ STAGE PLANS:
value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map 11
- Map Operator Tree:
- TableScan
- alias: nonacidorctbl
- filterExpr: (b > 0) (type: boolean)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (b > 0) (type: boolean)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: a (type: int), b (type: int)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: int)
- Execution mode: vectorized, llap
- LLAP IO: all inputs
- Map 12
+ Map 10
Map Operator Tree:
TableScan
alias: t
@@ -781,30 +683,28 @@ STAGE PLANS:
value expressions: _col1 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Reducer 10
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
- mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (_col1 > 1L) (type: boolean)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: nonacidorctbl
+ filterExpr: (b > 0) (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: cardinality_violation(_col0) (type: int)
- outputColumnNames: _col0
+ Filter Operator
+ predicate: (b > 0) (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: a (type: int), b (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.merge_tmp_table
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
Reducer 3
Execution mode: llap
Reduce Operator Tree:
@@ -943,41 +843,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtbl
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
Reducer 7
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 8
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -993,40 +859,30 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtbl
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
- Reducer 9
- Execution mode: vectorized, llap
+ Reducer 8
+ Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 > 1L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cardinality_violation(_col0) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
Union 2
Vertex: Union 2
@@ -1092,10 +948,6 @@ STAGE PLANS:
Stage: Stage-10
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b
- Column Types: int, int
- Table: default.acidtbl
Stage: Stage-4
Move Operator
diff --git a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
index 4a6324d784..1fb7f5d1de 100644
--- a/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
+++ b/ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out
@@ -4549,13 +4549,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
- Reducer 7 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -4576,7 +4574,7 @@ STAGE PLANS:
value expressions: _col1 (type: string), _col2 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map 9
+ Map 7
Map Operator Tree:
TableScan
alias: t
@@ -4717,41 +4715,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.masking_test_n4
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- outputColumnNames: key, a1, value
- Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(key), max(key), count(1), count(key), compute_bit_vector_hll(key), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector_hll(a1), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary)
Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE( [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -4767,40 +4731,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.masking_test_n4
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- outputColumnNames: key, a1, value
- Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(key), max(key), count(1), count(key), compute_bit_vector_hll(key), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector_hll(a1), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary)
- Reducer 8
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE( [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-5
Dependency Collection
@@ -4864,10 +4794,6 @@ STAGE PLANS:
Stage: Stage-9
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: key, a1, value
- Column Types: int, string, string
- Table: default.masking_test_n4
PREHOOK: query: explain MERGE INTO masking_test_n4 as t using nonacid_n2 as s ON t.key = s.key
WHEN MATCHED AND s.key < 5 THEN DELETE
@@ -4908,14 +4834,12 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
Reducer 7 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE)
- Reducer 9 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -4936,7 +4860,7 @@ STAGE PLANS:
value expressions: _col1 (type: string), _col2 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map 10
+ Map 8
Map Operator Tree:
TableScan
alias: t
@@ -5098,41 +5022,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.masking_test_n4
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- outputColumnNames: key, a1, value
- Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(key), max(key), count(1), count(key), compute_bit_vector_hll(key), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector_hll(a1), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary)
Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE( [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -5148,41 +5038,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.masking_test_n4
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- outputColumnNames: key, a1, value
- Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(key), max(key), count(1), count(key), compute_bit_vector_hll(key), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector_hll(a1), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary)
- Reducer 8
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE( [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 9
+ Reducer 7
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -5269,10 +5125,6 @@ STAGE PLANS:
Stage: Stage-10
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: key, a1, value
- Column Types: int, string, string
- Table: default.masking_test_n4
Stage: Stage-4
Move Operator
@@ -5321,11 +5173,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
- Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -5346,7 +5197,7 @@ STAGE PLANS:
value expressions: _col1 (type: string), _col2 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map 7
+ Map 6
Map Operator Tree:
TableScan
alias: t
@@ -5462,41 +5313,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.masking_test_n4
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- outputColumnNames: key, a1, value
- Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(key), max(key), count(1), count(key), compute_bit_vector_hll(key), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector_hll(a1), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary)
Reducer 5
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE( [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -5553,10 +5370,6 @@ STAGE PLANS:
Stage: Stage-6
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: key, a1, value
- Column Types: int, string, string
- Table: default.masking_test_n4
Stage: Stage-2
Move Operator
@@ -5607,13 +5420,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
- Reducer 8 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -5634,7 +5445,7 @@ STAGE PLANS:
value expressions: _col1 (type: string), _col2 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map 9
+ Map 7
Map Operator Tree:
TableScan
alias: t
@@ -5767,41 +5578,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.masking_test_n4
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- outputColumnNames: key, a1, value
- Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(key), max(key), count(1), count(key), compute_bit_vector_hll(key), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector_hll(a1), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary)
Reducer 5
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE( [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -5817,41 +5594,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.masking_test_n4
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- outputColumnNames: key, a1, value
- Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(key), max(key), count(1), count(key), compute_bit_vector_hll(key), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector_hll(a1), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary)
- Reducer 7
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE( [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 8
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -5923,10 +5666,6 @@ STAGE PLANS:
Stage: Stage-8
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: key, a1, value
- Column Types: int, string, string
- Table: default.masking_test_n4
Stage: Stage-3
Move Operator
diff --git a/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out b/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out
index d6bf75e4ea..4eadd740e2 100644
--- a/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out
+++ b/ql/src/test/results/clientpositive/llap/insert_into_default_keyword.q.out
@@ -3031,14 +3031,12 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
Reducer 7 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE)
- Reducer 9 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -3059,7 +3057,7 @@ STAGE PLANS:
value expressions: _col1 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map 10
+ Map 8
Map Operator Tree:
TableScan
alias: t
@@ -3225,41 +3223,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtable
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- outputColumnNames: key, a1, value
- Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(key), max(key), count(1), count(key), compute_bit_vector_hll(key), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector_hll(a1), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value)
- minReductionHashAggr: 0.4
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary)
Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE( [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -3275,41 +3239,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtable
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
- outputColumnNames: key, a1, value
- Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(key), max(key), count(1), count(key), compute_bit_vector_hll(key), max(length(a1)), avg(COALESCE(length(a1),0)), count(a1), compute_bit_vector_hll(a1), max(length(value)), avg(COALESCE(length(value),0)), count(value), compute_bit_vector_hll(value)
- minReductionHashAggr: 0.4
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary)
- Reducer 8
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE( [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 9
+ Reducer 7
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -3396,10 +3326,6 @@ STAGE PLANS:
Stage: Stage-10
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: key, a1, value
- Column Types: int, string, string
- Table: default.acidtable
Stage: Stage-4
Move Operator
diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
index ba21a07965..93264fa09f 100644
--- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
+++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
@@ -3360,16 +3360,14 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Reducer 11 (BROADCAST_EDGE)
- Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+ Map 1 <- Reducer 9 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
Reducer 7 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 8 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE)
+ Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -3390,7 +3388,7 @@ STAGE PLANS:
value expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 10
+ Map 8
Map Operator Tree:
TableScan
alias: s
@@ -3419,19 +3417,6 @@ STAGE PLANS:
value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Reducer 11
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
- mode: final
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -3565,41 +3550,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtbl
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), 7 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 7
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -3623,7 +3574,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.merge_tmp_table
- Reducer 8
+ Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -3639,40 +3590,19 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtbl
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
Reducer 9
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
Stage: Stage-6
Dependency Collection
@@ -3750,10 +3680,6 @@ STAGE PLANS:
Stage: Stage-11
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b
- Column Types: int, int
- Table: default.acidtbl
PREHOOK: query: explain merge /*+ semi(s, a, t, 1000)*/ into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a
WHEN MATCHED AND s.a > 8 THEN DELETE
@@ -3794,16 +3720,14 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Reducer 11 (BROADCAST_EDGE)
- Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+ Map 1 <- Reducer 9 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
Reducer 7 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 8 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE)
+ Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -3824,7 +3748,7 @@ STAGE PLANS:
value expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 10
+ Map 8
Map Operator Tree:
TableScan
alias: s
@@ -3853,19 +3777,6 @@ STAGE PLANS:
value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Reducer 11
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000)
- mode: final
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -3999,41 +3910,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtbl
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), 7 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 7
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -4057,7 +3934,7 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.merge_tmp_table
- Reducer 8
+ Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -4073,40 +3950,19 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtbl
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
Reducer 9
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
Stage: Stage-6
Dependency Collection
@@ -4184,8 +4040,4 @@ STAGE PLANS:
Stage: Stage-11
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b
- Column Types: int, int
- Table: default.acidtbl
diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out
index e0745afb45..a5da3447dd 100644
--- a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out
+++ b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out
@@ -53,14 +53,12 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
Reducer 7 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE)
- Reducer 9 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -81,7 +79,7 @@ STAGE PLANS:
value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map 10
+ Map 8
Map Operator Tree:
TableScan
alias: t
@@ -241,41 +239,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtbl_n0
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
Reducer 6
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -291,41 +255,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtbl_n0
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
- Reducer 8
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 9
+ Reducer 7
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -412,10 +342,6 @@ STAGE PLANS:
Stage: Stage-10
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b
- Column Types: int, int
- Table: default.acidtbl_n0
Stage: Stage-4
Move Operator
@@ -635,12 +561,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -661,7 +586,7 @@ STAGE PLANS:
value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map 8
+ Map 7
Map Operator Tree:
TableScan
alias: t
@@ -792,41 +717,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtbl_n0
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.99
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
Reducer 5
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -842,7 +733,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.acidtbl_n0
Write Type: UPDATE
- Reducer 7
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -914,10 +805,6 @@ STAGE PLANS:
Stage: Stage-8
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b
- Column Types: int, int
- Table: default.acidtbl_n0
Stage: Stage-3
Move Operator
diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out b/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out
index 25f114d223..080b7ce1af 100644
--- a/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out
+++ b/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out
@@ -139,13 +139,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
- Reducer 8 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -166,7 +164,7 @@ STAGE PLANS:
value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map 9
+ Map 7
Map Operator Tree:
TableScan
alias: t
@@ -297,41 +295,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.4
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
Reducer 5
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -347,41 +311,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.4
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
- Reducer 7
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 8
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -453,10 +383,6 @@ STAGE PLANS:
Stage: Stage-8
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b
- Column Types: int, int
- Table: default.t
Stage: Stage-3
Move Operator
@@ -773,13 +699,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
- Reducer 8 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -804,7 +728,7 @@ STAGE PLANS:
value expressions: _col1 (type: int), _col2 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 9
+ Map 7
Map Operator Tree:
TableScan
alias: u
@@ -931,41 +855,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t2
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
- outputColumnNames: a, b, c
- Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b), min(c), max(c), count(c), compute_bit_vector_hll(c)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary)
Reducer 5
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDF [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -981,41 +871,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t2
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
- outputColumnNames: a, b, c
- Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b), min(c), max(c), count(c), compute_bit_vector_hll(c)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary)
- Reducer 7
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDF [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 8
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -1087,10 +943,6 @@ STAGE PLANS:
Stage: Stage-8
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b, c
- Column Types: int, int, int
- Table: default.t2
Stage: Stage-3
Move Operator
@@ -1166,13 +1018,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
- Reducer 8 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1197,7 +1047,7 @@ STAGE PLANS:
value expressions: _col1 (type: int), _col2 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 9
+ Map 7
Map Operator Tree:
TableScan
alias: u
@@ -1324,41 +1174,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t2
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
- outputColumnNames: a, b, c
- Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b), min(c), max(c), count(c), compute_bit_vector_hll(c)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary)
Reducer 5
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDF [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -1374,41 +1190,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t2
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
- outputColumnNames: a, b, c
- Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b), min(c), max(c), count(c), compute_bit_vector_hll(c)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary)
- Reducer 7
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDF [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 8
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -1480,10 +1262,6 @@ STAGE PLANS:
Stage: Stage-8
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b, c
- Column Types: int, int, int
- Table: default.t2
Stage: Stage-3
Move Operator
@@ -1559,13 +1337,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
- Reducer 8 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1590,7 +1366,7 @@ STAGE PLANS:
value expressions: _col1 (type: int), _col2 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 9
+ Map 7
Map Operator Tree:
TableScan
alias: u
@@ -1717,41 +1493,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t2
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
- outputColumnNames: a, b, c
- Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b), min(c), max(c), count(c), compute_bit_vector_hll(c)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary)
Reducer 5
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDF [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -1767,41 +1509,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t2
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
- outputColumnNames: a, b, c
- Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b), min(c), max(c), count(c), compute_bit_vector_hll(c)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary)
- Reducer 7
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDF [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 8
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -1873,10 +1581,6 @@ STAGE PLANS:
Stage: Stage-8
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b, c
- Column Types: int, int, int
- Table: default.t2
Stage: Stage-3
Move Operator
@@ -1952,13 +1656,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
- Reducer 8 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -1983,7 +1685,7 @@ STAGE PLANS:
value expressions: _col1 (type: int), _col2 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 9
+ Map 7
Map Operator Tree:
TableScan
alias: u
@@ -2110,41 +1812,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t2
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
- outputColumnNames: a, b, c
- Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b), min(c), max(c), count(c), compute_bit_vector_hll(c)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary)
Reducer 5
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDF [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -2160,41 +1828,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t2
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
- outputColumnNames: a, b, c
- Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b), min(c), max(c), count(c), compute_bit_vector_hll(c)
- minReductionHashAggr: 0.5
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary)
- Reducer 7
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 488 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (type: bigint), UDF [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 792 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 8
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -2266,10 +1900,6 @@ STAGE PLANS:
Stage: Stage-8
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b, c
- Column Types: int, int, int
- Table: default.t2
Stage: Stage-3
Move Operator
@@ -2395,11 +2025,10 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
- Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -2420,7 +2049,7 @@ STAGE PLANS:
value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 7
+ Map 6
Map Operator Tree:
TableScan
alias: t
@@ -2537,41 +2166,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t3
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.4
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
Reducer 5
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -2628,10 +2223,6 @@ STAGE PLANS:
Stage: Stage-6
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b
- Column Types: int, int
- Table: default.t3
Stage: Stage-2
Move Operator
@@ -2749,13 +2340,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
Reducer 6 <- Reducer 2 (SIMPLE_EDGE)
- Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
- Reducer 8 <- Reducer 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -2776,7 +2365,7 @@ STAGE PLANS:
value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 9
+ Map 7
Map Operator Tree:
TableScan
alias: t
@@ -2907,41 +2496,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t4
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.4
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
Reducer 5
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
@@ -2957,41 +2512,7 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
name: default.t4
Write Type: INSERT
- Select Operator
- expressions: _col0 (type: int), _col1 (type: int)
- outputColumnNames: a, b
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(a), max(a), count(1), count(a), compute_bit_vector_hll(a), min(b), max(b), count(b), compute_bit_vector_hll(b)
- minReductionHashAggr: 0.4
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: int), _col7 (type: bigint), _col8 (type: binary)
- Reducer 7
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), min(VALUE._col5), max(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8)
- mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'LONG' (type: string), UDFToLong(_col5) (type: bigint), UDFToLong(_col6) (type: bigint), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 8
+ Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -3063,10 +2584,6 @@ STAGE PLANS:
Stage: Stage-8
Stats Work
Basic Stats Work:
- Column Stats Desc:
- Columns: a, b
- Column Types: int, int
- Table: default.t4
Stage: Stage-3
Move Operator