You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dj...@apache.org on 2018/08/12 08:47:51 UTC
hive git commit: HIVE-20354 : Semijoin hints dont work with merge
statements (Deepak Jaiswal, reviewed by Eugene Koifman)
Repository: hive
Updated Branches:
refs/heads/master 28b24dbf5 -> 4a30574d3
HIVE-20354 : Semijoin hints dont work with merge statements (Deepak Jaiswal, reviewed by Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4a30574d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4a30574d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4a30574d
Branch: refs/heads/master
Commit: 4a30574d38fc71771b3abffa225285dedf77c56a
Parents: 28b24db
Author: Deepak Jaiswal <dj...@apache.org>
Authored: Sun Aug 12 01:47:42 2018 -0700
Committer: Deepak Jaiswal <dj...@apache.org>
Committed: Sun Aug 12 01:47:42 2018 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/parse/HiveParser.g | 4 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 32 +-
.../ql/parse/UpdateDeleteSemanticAnalyzer.java | 61 +-
.../test/queries/clientpositive/semijoin_hint.q | 21 +
.../clientpositive/llap/semijoin_hint.q.out | 714 +++++++++++++++++++
5 files changed, 804 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4a30574d/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index f4d12ae..15d4edf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -2967,8 +2967,8 @@ mergeStatement
@init { pushMsg("MERGE statement", state); }
@after { popMsg(state); }
:
- KW_MERGE KW_INTO tableName (KW_AS? identifier)? KW_USING joinSourcePart KW_ON expression whenClauses ->
- ^(TOK_MERGE ^(TOK_TABREF tableName identifier?) joinSourcePart expression whenClauses)
+ KW_MERGE QUERY_HINT? KW_INTO tableName (KW_AS? identifier)? KW_USING joinSourcePart KW_ON expression whenClauses
+ -> ^(TOK_MERGE ^(TOK_TABREF tableName identifier?) joinSourcePart expression QUERY_HINT? whenClauses)
;
/*
Allow 0,1 or 2 WHEN MATCHED clauses and 0 or 1 WHEN NOT MATCHED
http://git-wip-us.apache.org/repos/asf/hive/blob/4a30574d/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index a63aabe..2ee562a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -1569,19 +1569,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
qbp.setSelExprForClause(ctx_1.dest, ast);
int posn = 0;
- if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.QUERY_HINT) {
- ParseDriver pd = new ParseDriver();
- String queryHintStr = ast.getChild(0).getText();
- if (LOG.isDebugEnabled()) {
- LOG.debug("QUERY HINT: "+queryHintStr);
- }
- try {
- ASTNode hintNode = pd.parseHint(queryHintStr);
- qbp.setHints(hintNode);
- posn++;
- } catch (ParseException e) {
- throw new SemanticException("failed to parse query hint: "+e.getMessage(), e);
- }
+ if (((ASTNode) ast.getChild(0)).getType() == HiveParser.QUERY_HINT) {
+ posn = processQueryHint((ASTNode)ast.getChild(0), qbp, posn);
}
if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM)) {
@@ -1881,6 +1870,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
case HiveParser.TOK_CTE:
processCTE(qb, ast);
break;
+ case HiveParser.QUERY_HINT:
+ processQueryHint(ast, qbp, 0);
default:
skipRecursion = false;
break;
@@ -1899,6 +1890,21 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return phase1Result;
}
+ private int processQueryHint(ASTNode ast, QBParseInfo qbp, int posn) throws SemanticException{
+ ParseDriver pd = new ParseDriver();
+ String queryHintStr = ast.getText();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("QUERY HINT: "+queryHintStr);
+ }
+ try {
+ ASTNode hintNode = pd.parseHint(queryHintStr);
+ qbp.setHints(hintNode);
+ } catch (ParseException e) {
+ throw new SemanticException("failed to parse query hint: "+e.getMessage(), e);
+ }
+ return posn + 1;
+ }
+
/**
* This is phase1 of supporting specifying schema in insert statement
* insert into foo(z,y) select a,b from bar;
http://git-wip-us.apache.org/repos/asf/hive/blob/4a30574d/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 8df2904..0d80ed3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -888,11 +888,20 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
ASTNode onClause = (ASTNode) tree.getChild(2);
String onClauseAsText = getMatchedText(onClause);
+ int whenClauseBegins = 3;
+ boolean hasHint = false;
+ // query hint
+ ASTNode qHint = (ASTNode) tree.getChild(3);
+ if (qHint.getType() == HiveParser.QUERY_HINT) {
+ hasHint = true;
+ whenClauseBegins++;
+ }
Table targetTable = getTargetTable(target);
validateTargetTable(targetTable);
- List<ASTNode> whenClauses = findWhenClauses(tree);
+ List<ASTNode> whenClauses = findWhenClauses(tree, whenClauseBegins);
StringBuilder rewrittenQueryStr = new StringBuilder("FROM\n");
+
rewrittenQueryStr.append(Indent).append(getFullTableNameForSQL(target));
if(isAliased(target)) {
rewrittenQueryStr.append(" ").append(targetName);
@@ -912,6 +921,12 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
rewrittenQueryStr.append('\n');
rewrittenQueryStr.append(Indent).append("ON ").append(onClauseAsText).append('\n');
+ // Add the hint if any
+ String hintStr = null;
+ if (hasHint) {
+ hintStr = " /*+ " + qHint.getText() + " */ ";
+ }
+
/**
* We allow at most 2 WHEN MATCHED clause, in which case 1 must be Update the other Delete
* If we have both update and delete, the 1st one (in SQL code) must have "AND <extra predicate>"
@@ -921,22 +936,29 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
String extraPredicate = null;
int numWhenMatchedUpdateClauses = 0, numWhenMatchedDeleteClauses = 0;
int numInsertClauses = 0;
+ boolean hintProcessed = false;
for(ASTNode whenClause : whenClauses) {
switch (getWhenClauseOperation(whenClause).getType()) {
case HiveParser.TOK_INSERT:
numInsertClauses++;
- handleInsert(whenClause, rewrittenQueryStr, target, onClause, targetTable, targetName, onClauseAsText);
+ handleInsert(whenClause, rewrittenQueryStr, target, onClause,
+ targetTable, targetName, onClauseAsText, hintProcessed ? null : hintStr);
+ hintProcessed = true;
break;
case HiveParser.TOK_UPDATE:
numWhenMatchedUpdateClauses++;
- String s = handleUpdate(whenClause, rewrittenQueryStr, target, onClauseAsText, targetTable, extraPredicate);
+ String s = handleUpdate(whenClause, rewrittenQueryStr, target,
+ onClauseAsText, targetTable, extraPredicate, hintProcessed ? null : hintStr);
+ hintProcessed = true;
if(numWhenMatchedUpdateClauses + numWhenMatchedDeleteClauses == 1) {
extraPredicate = s;//i.e. it's the 1st WHEN MATCHED
}
break;
case HiveParser.TOK_DELETE:
numWhenMatchedDeleteClauses++;
- String s1 = handleDelete(whenClause, rewrittenQueryStr, target, onClauseAsText, targetTable, extraPredicate);
+ String s1 = handleDelete(whenClause, rewrittenQueryStr, target,
+ onClauseAsText, targetTable, extraPredicate, hintProcessed ? null : hintStr);
+ hintProcessed = true;
if(numWhenMatchedUpdateClauses + numWhenMatchedDeleteClauses == 1) {
extraPredicate = s1;//i.e. it's the 1st WHEN MATCHED
}
@@ -956,6 +978,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
if(numWhenMatchedDeleteClauses + numWhenMatchedUpdateClauses == 2 && extraPredicate == null) {
throw new SemanticException(ErrorMsg.MERGE_PREDIACTE_REQUIRED, ctx.getCmd());
}
+
boolean validating = handleCardinalityViolation(rewrittenQueryStr, target, onClauseAsText,
targetTable, numWhenMatchedDeleteClauses == 0 && numWhenMatchedUpdateClauses == 0);
ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
@@ -987,6 +1010,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
//here means the last branch of the multi-insert is Cardinality Validation
rewrittenCtx.addDestNamePrefix(rewrittenTree.getChildCount() - 1, Context.DestClausePrefix.INSERT);
}
+
try {
useSuper = true;
super.analyze(rewrittenTree, rewrittenCtx);
@@ -1153,13 +1177,17 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
*/
private String handleUpdate(ASTNode whenMatchedUpdateClause, StringBuilder rewrittenQueryStr,
ASTNode target, String onClauseAsString, Table targetTable,
- String deleteExtraPredicate) throws SemanticException {
+ String deleteExtraPredicate, String hintStr) throws SemanticException {
assert whenMatchedUpdateClause.getType() == HiveParser.TOK_MATCHED;
assert getWhenClauseOperation(whenMatchedUpdateClause).getType() == HiveParser.TOK_UPDATE;
String targetName = getSimpleTableName(target);
rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
addPartitionColsToInsert(targetTable.getPartCols(), rewrittenQueryStr);
- rewrittenQueryStr.append(" -- update clause\n select ").append(targetName).append(".ROW__ID");
+ rewrittenQueryStr.append(" -- update clause\n select ");
+ if (hintStr != null) {
+ rewrittenQueryStr.append(hintStr);
+ }
+ rewrittenQueryStr.append(targetName).append(".ROW__ID");
ASTNode setClause = (ASTNode)getWhenClauseOperation(whenMatchedUpdateClause).getChild(0);
//columns being updated -> update expressions; "setRCols" (last param) is null because we use actual expressions
@@ -1211,7 +1239,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
* @param updateExtraPredicate - see notes at caller
*/
private String handleDelete(ASTNode whenMatchedDeleteClause, StringBuilder rewrittenQueryStr, ASTNode target,
- String onClauseAsString, Table targetTable, String updateExtraPredicate) throws SemanticException {
+ String onClauseAsString, Table targetTable, String updateExtraPredicate, String hintStr) throws SemanticException {
assert whenMatchedDeleteClause.getType() == HiveParser.TOK_MATCHED;
assert getWhenClauseOperation(whenMatchedDeleteClause).getType() == HiveParser.TOK_DELETE;
List<FieldSchema> partCols = targetTable.getPartCols();
@@ -1219,7 +1247,11 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
addPartitionColsToInsert(partCols, rewrittenQueryStr);
- rewrittenQueryStr.append(" -- delete clause\n select ").append(targetName).append(".ROW__ID ");
+ rewrittenQueryStr.append(" -- delete clause\n select ");
+ if (hintStr != null) {
+ rewrittenQueryStr.append(hintStr);
+ }
+ rewrittenQueryStr.append(targetName).append(".ROW__ID ");
addPartitionColsToSelect(partCols, rewrittenQueryStr, target);
rewrittenQueryStr.append("\n WHERE ").append(onClauseAsString);
String extraPredicate = getWhenClausePredicate(whenMatchedDeleteClause);
@@ -1291,10 +1323,10 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
/**
* Collect WHEN clauses from Merge statement AST
*/
- private List<ASTNode> findWhenClauses(ASTNode tree) throws SemanticException {
+ private List<ASTNode> findWhenClauses(ASTNode tree, int start) throws SemanticException {
assert tree.getType() == HiveParser.TOK_MERGE;
List<ASTNode> whenClauses = new ArrayList<>();
- for(int idx = 3; idx < tree.getChildCount(); idx++) {
+ for(int idx = start; idx < tree.getChildCount(); idx++) {
ASTNode whenClause = (ASTNode)tree.getChild(idx);
assert whenClause.getType() == HiveParser.TOK_MATCHED ||
whenClause.getType() == HiveParser.TOK_NOT_MATCHED :
@@ -1333,7 +1365,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
*/
private void handleInsert(ASTNode whenNotMatchedClause, StringBuilder rewrittenQueryStr, ASTNode target,
ASTNode onClause, Table targetTable,
- String targetTableNameInSourceQuery, String onClauseAsString) throws SemanticException {
+ String targetTableNameInSourceQuery, String onClauseAsString, String hintStr) throws SemanticException {
assert whenNotMatchedClause.getType() == HiveParser.TOK_NOT_MATCHED;
assert getWhenClauseOperation(whenNotMatchedClause).getType() == HiveParser.TOK_INSERT;
List<FieldSchema> partCols = targetTable.getPartCols();
@@ -1347,8 +1379,11 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
OnClauseAnalyzer oca = new OnClauseAnalyzer(onClause, targetTable, targetTableNameInSourceQuery,
conf, onClauseAsString);
oca.analyze();
- rewrittenQueryStr.append(" -- insert clause\n select ")
- .append(valuesClause).append("\n WHERE ").append(oca.getPredicate());
+ rewrittenQueryStr.append(" -- insert clause\n select ");
+ if (hintStr != null) {
+ rewrittenQueryStr.append(hintStr);
+ }
+ rewrittenQueryStr.append(valuesClause).append("\n WHERE ").append(oca.getPredicate());
String extraPredicate = getWhenClausePredicate(whenNotMatchedClause);
if(extraPredicate != null) {
//we have WHEN NOT MATCHED AND <boolean expr> THEN INSERT
http://git-wip-us.apache.org/repos/asf/hive/blob/4a30574d/ql/src/test/queries/clientpositive/semijoin_hint.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/semijoin_hint.q b/ql/src/test/queries/clientpositive/semijoin_hint.q
index de176af..cdf23b7 100644
--- a/ql/src/test/queries/clientpositive/semijoin_hint.q
+++ b/ql/src/test/queries/clientpositive/semijoin_hint.q
@@ -100,3 +100,24 @@ explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join src
-- This should NOT create a semijoin
explain select /*+ semi(k, str, v, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1);
+
+
+-- Make sure hints work with merge
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.explain.user=false;
+set hive.merge.cardinality.check=true;
+
+create table acidTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+create table nonAcidOrcTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false');
+
+--without hint, the semijoin is still made, note the difference in bloom filter entries.
+explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a
+WHEN MATCHED AND s.a > 8 THEN DELETE
+WHEN MATCHED THEN UPDATE SET b = 7
+WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b);
+-- with hint, the bloom filter entries become 1000 due to hint.
+explain merge /*+ semi(s, a, t, 1000)*/ into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a
+WHEN MATCHED AND s.a > 8 THEN DELETE
+WHEN MATCHED THEN UPDATE SET b = 7
+WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/4a30574d/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
index 679916d..9ee70ed 100644
--- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
+++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
@@ -2836,3 +2836,717 @@ STAGE PLANS:
Processor Tree:
ListSink
+PREHOOK: query: create table acidTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acidTbl
+POSTHOOK: query: create table acidTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acidTbl
+PREHOOK: query: create table nonAcidOrcTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@nonAcidOrcTbl
+POSTHOOK: query: create table nonAcidOrcTbl(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@nonAcidOrcTbl
+PREHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a
+WHEN MATCHED AND s.a > 8 THEN DELETE
+WHEN MATCHED THEN UPDATE SET b = 7
+WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a
+WHEN MATCHED AND s.a > 8 THEN DELETE
+WHEN MATCHED THEN UPDATE SET b = 7
+WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-5 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-5
+ Stage-6 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-5
+ Stage-7 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-5
+ Stage-8 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-5
+ Stage-9 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-4
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 9 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
+ Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (a BETWEEN DynamicValue(RS_3_s_a_min) AND DynamicValue(RS_3_s_a_max) and in_bloom_filter(a, DynamicValue(RS_3_s_a_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ value expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: b (type: int)
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 a (type: int)
+ 1 a (type: int)
+ outputColumnNames: _col0, _col4, _col5, _col6
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 = _col5) and (_col5 > 8)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 = _col5) and (_col5 <= 8)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Filter Operator
+ predicate: (_col0 = _col5) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col4
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Filter Operator
+ predicate: _col0 is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col5 (type: int), _col6 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acidtbl
+ Write Type: DELETE
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 7 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acidtbl
+ Write Type: UPDATE
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 > 1L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cardinality_violation(_col0) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(val, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acidtbl
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: a, b
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 7
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+
+ Stage: Stage-5
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acidtbl
+ Write Type: DELETE
+
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acidtbl
+ Write Type: UPDATE
+
+ Stage: Stage-7
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-3
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+
+ Stage: Stage-8
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: val
+ Column Types: int
+ Table: default.merge_tmp_table
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acidtbl
+ Write Type: INSERT
+
+ Stage: Stage-9
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: a, b
+ Column Types: int, int
+ Table: default.acidtbl
+
+PREHOOK: query: explain merge /*+ semi(s, a, t, 1000)*/ into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a
+WHEN MATCHED AND s.a > 8 THEN DELETE
+WHEN MATCHED THEN UPDATE SET b = 7
+WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain merge /*+ semi(s, a, t, 1000)*/ into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a
+WHEN MATCHED AND s.a > 8 THEN DELETE
+WHEN MATCHED THEN UPDATE SET b = 7
+WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-5 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-5
+ Stage-6 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-5
+ Stage-7 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-5
+ Stage-8 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-5
+ Stage-9 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-4
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 9 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE)
+ Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (a BETWEEN DynamicValue(RS_3_s_a_min) AND DynamicValue(RS_3_s_a_max) and in_bloom_filter(a, DynamicValue(RS_3_s_a_bloom_filter))) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ value expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: b (type: int)
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 a (type: int)
+ 1 a (type: int)
+ outputColumnNames: _col0, _col4, _col5, _col6
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 = _col5) and (_col5 > 8)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 = _col5) and (_col5 <= 8)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: int)
+ Filter Operator
+ predicate: (_col0 = _col5) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col4
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
+ Filter Operator
+ predicate: _col0 is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col5 (type: int), _col6 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acidtbl
+ Write Type: DELETE
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 7 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acidtbl
+ Write Type: UPDATE
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col1 > 1L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cardinality_violation(_col0) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(val, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acidtbl
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: a, b
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 7
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 9
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+
+ Stage: Stage-5
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acidtbl
+ Write Type: DELETE
+
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acidtbl
+ Write Type: UPDATE
+
+ Stage: Stage-7
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-3
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+
+ Stage: Stage-8
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: val
+ Column Types: int
+ Table: default.merge_tmp_table
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.acidtbl
+ Write Type: INSERT
+
+ Stage: Stage-9
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: a, b
+ Column Types: int, int
+ Table: default.acidtbl
+